gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

id.go (11357B)


      1 // Package xid is a globally unique id generator suited for web scale
      2 //
      3 // Xid is using Mongo Object ID algorithm to generate globally unique ids:
      4 // https://docs.mongodb.org/manual/reference/object-id/
      5 //
      6 //   - 4-byte value representing the seconds since the Unix epoch,
      7 //   - 3-byte machine identifier,
      8 //   - 2-byte process id, and
      9 //   - 3-byte counter, starting with a random value.
     10 //
     11 // The binary representation of the id is compatible with Mongo 12 bytes Object IDs.
     12 // The string representation is using base32 hex (w/o padding) for better space efficiency
     13 // when stored in that form (20 bytes). The hex variant of base32 is used to retain the
     14 // sortable property of the id.
     15 //
     16 // Xid doesn't use base64 because case sensitivity and the 2 non alphanum chars may be an
     17 // issue when transported as a string between various systems. Base36 wasn't retained either
     18 // because 1/ it's not standard 2/ the resulting size is not predictable (not bit aligned)
     19 // and 3/ it would not remain sortable. To validate a base32 `xid`, expect a 20 chars long,
     20 // all lowercase sequence of `a` to `v` letters and `0` to `9` numbers (`[0-9a-v]{20}`).
     21 //
     22 // UUID is 16 bytes (128 bits), snowflake is 8 bytes (64 bits), xid stands in between
     23 // with 12 bytes with a more compact string representation ready for the web and no
     24 // required configuration or central generation server.
     25 //
     26 // Features:
     27 //
     28 //   - Size: 12 bytes (96 bits), smaller than UUID, larger than snowflake
     29 //   - Base32 hex encoded by default (16 bytes storage when transported as printable string)
     30 //   - Non configured, you don't need set a unique machine and/or data center id
     31 //   - K-ordered
     32 //   - Embedded time with 1 second precision
     33 //   - Unicity guaranteed for 16,777,216 (24 bits) unique ids per second and per host/process
     34 //
     35 // Best used with xlog's RequestIDHandler (https://godoc.org/github.com/rs/xlog#RequestIDHandler).
     36 //
     37 // References:
     38 //
     39 //   - http://www.slideshare.net/davegardnerisme/unique-id-generation-in-distributed-systems
     40 //   - https://en.wikipedia.org/wiki/Universally_unique_identifier
     41 //   - https://blog.twitter.com/2010/announcing-snowflake
     42 package xid
     43 
     44 import (
     45 	"bytes"
     46 	"crypto/sha256"
     47 	"crypto/rand"
     48 	"database/sql/driver"
     49 	"encoding/binary"
     50 	"fmt"
     51 	"hash/crc32"
     52 	"io/ioutil"
     53 	"os"
     54 	"sort"
     55 	"sync/atomic"
     56 	"time"
     57 	"unsafe"
     58 )
     59 
     60 // Code inspired from mgo/bson ObjectId
     61 
     62 // ID represents a unique request id
     63 type ID [rawLen]byte
     64 
     65 const (
     66 	encodedLen = 20 // string encoded len
     67 	rawLen     = 12 // binary raw len
     68 
     69 	// encoding stores a custom version of the base32 encoding with lower case
     70 	// letters.
     71 	encoding = "0123456789abcdefghijklmnopqrstuv"
     72 )
     73 
     74 var (
     75 	// objectIDCounter is atomically incremented when generating a new ObjectId. It's
     76 	// used as the counter part of an id. This id is initialized with a random value.
     77 	objectIDCounter = randInt()
     78 
     79 	// machineID is generated once and used in subsequent calls to the New* functions.
     80 	machineID = readMachineID()
     81 
     82 	// pid stores the current process id
     83 	pid = os.Getpid()
     84 
     85 	nilID ID
     86 
     87 	// dec is the decoding map for base32 encoding
     88 	dec [256]byte
     89 )
     90 
     91 func init() {
     92 	for i := 0; i < len(dec); i++ {
     93 		dec[i] = 0xFF
     94 	}
     95 	for i := 0; i < len(encoding); i++ {
     96 		dec[encoding[i]] = byte(i)
     97 	}
     98 
     99 	// If /proc/self/cpuset exists and is not /, we can assume that we are in a
    100 	// form of container and use the content of cpuset xor-ed with the PID in
    101 	// order get a reasonable machine global unique PID.
    102 	b, err := ioutil.ReadFile("/proc/self/cpuset")
    103 	if err == nil && len(b) > 1 {
    104 		pid ^= int(crc32.ChecksumIEEE(b))
    105 	}
    106 }
    107 
    108 // readMachineID generates a machine ID, derived from a platform-specific machine ID
    109 // value, or else the machine's hostname, or else a randomly-generated number.
    110 // It panics if all of these methods fail.
    111 func readMachineID() []byte {
    112 	id := make([]byte, 3)
    113 	hid, err := readPlatformMachineID()
    114 	if err != nil || len(hid) == 0 {
    115 		hid, err = os.Hostname()
    116 	}
    117 	if err == nil && len(hid) != 0 {
    118 		hw := sha256.New()
    119 		hw.Write([]byte(hid))
    120 		copy(id, hw.Sum(nil))
    121 	} else {
    122 		// Fallback to rand number if machine id can't be gathered
    123 		if _, randErr := rand.Reader.Read(id); randErr != nil {
    124 			panic(fmt.Errorf("xid: cannot get hostname nor generate a random number: %v; %v", err, randErr))
    125 		}
    126 	}
    127 	return id
    128 }
    129 
    130 // randInt generates a random uint32
    131 func randInt() uint32 {
    132 	b := make([]byte, 3)
    133 	if _, err := rand.Reader.Read(b); err != nil {
    134 		panic(fmt.Errorf("xid: cannot generate random number: %v;", err))
    135 	}
    136 	return uint32(b[0])<<16 | uint32(b[1])<<8 | uint32(b[2])
    137 }
    138 
    139 // New generates a globally unique ID
    140 func New() ID {
    141 	return NewWithTime(time.Now())
    142 }
    143 
    144 // NewWithTime generates a globally unique ID with the passed in time
    145 func NewWithTime(t time.Time) ID {
    146 	var id ID
    147 	// Timestamp, 4 bytes, big endian
    148 	binary.BigEndian.PutUint32(id[:], uint32(t.Unix()))
    149 	// Machine ID, 3 bytes
    150 	id[4] = machineID[0]
    151 	id[5] = machineID[1]
    152 	id[6] = machineID[2]
    153 	// Pid, 2 bytes, specs don't specify endianness, but we use big endian.
    154 	id[7] = byte(pid >> 8)
    155 	id[8] = byte(pid)
    156 	// Increment, 3 bytes, big endian
    157 	i := atomic.AddUint32(&objectIDCounter, 1)
    158 	id[9] = byte(i >> 16)
    159 	id[10] = byte(i >> 8)
    160 	id[11] = byte(i)
    161 	return id
    162 }
    163 
    164 // FromString reads an ID from its string representation
    165 func FromString(id string) (ID, error) {
    166 	i := &ID{}
    167 	err := i.UnmarshalText([]byte(id))
    168 	return *i, err
    169 }
    170 
    171 // String returns a base32 hex lowercased with no padding representation of the id (char set is 0-9, a-v).
    172 func (id ID) String() string {
    173 	text := make([]byte, encodedLen)
    174 	encode(text, id[:])
    175 	return *(*string)(unsafe.Pointer(&text))
    176 }
    177 
    178 // Encode encodes the id using base32 encoding, writing 20 bytes to dst and return it.
    179 func (id ID) Encode(dst []byte) []byte {
    180 	encode(dst, id[:])
    181 	return dst
    182 }
    183 
    184 // MarshalText implements encoding/text TextMarshaler interface
    185 func (id ID) MarshalText() ([]byte, error) {
    186 	text := make([]byte, encodedLen)
    187 	encode(text, id[:])
    188 	return text, nil
    189 }
    190 
    191 // MarshalJSON implements encoding/json Marshaler interface
    192 func (id ID) MarshalJSON() ([]byte, error) {
    193 	if id.IsNil() {
    194 		return []byte("null"), nil
    195 	}
    196 	text := make([]byte, encodedLen+2)
    197 	encode(text[1:encodedLen+1], id[:])
    198 	text[0], text[encodedLen+1] = '"', '"'
    199 	return text, nil
    200 }
    201 
    202 // encode by unrolling the stdlib base32 algorithm + removing all safe checks
    203 func encode(dst, id []byte) {
    204 	_ = dst[19]
    205 	_ = id[11]
    206 
    207 	dst[19] = encoding[(id[11]<<4)&0x1F]
    208 	dst[18] = encoding[(id[11]>>1)&0x1F]
    209 	dst[17] = encoding[(id[11]>>6)&0x1F|(id[10]<<2)&0x1F]
    210 	dst[16] = encoding[id[10]>>3]
    211 	dst[15] = encoding[id[9]&0x1F]
    212 	dst[14] = encoding[(id[9]>>5)|(id[8]<<3)&0x1F]
    213 	dst[13] = encoding[(id[8]>>2)&0x1F]
    214 	dst[12] = encoding[id[8]>>7|(id[7]<<1)&0x1F]
    215 	dst[11] = encoding[(id[7]>>4)&0x1F|(id[6]<<4)&0x1F]
    216 	dst[10] = encoding[(id[6]>>1)&0x1F]
    217 	dst[9] = encoding[(id[6]>>6)&0x1F|(id[5]<<2)&0x1F]
    218 	dst[8] = encoding[id[5]>>3]
    219 	dst[7] = encoding[id[4]&0x1F]
    220 	dst[6] = encoding[id[4]>>5|(id[3]<<3)&0x1F]
    221 	dst[5] = encoding[(id[3]>>2)&0x1F]
    222 	dst[4] = encoding[id[3]>>7|(id[2]<<1)&0x1F]
    223 	dst[3] = encoding[(id[2]>>4)&0x1F|(id[1]<<4)&0x1F]
    224 	dst[2] = encoding[(id[1]>>1)&0x1F]
    225 	dst[1] = encoding[(id[1]>>6)&0x1F|(id[0]<<2)&0x1F]
    226 	dst[0] = encoding[id[0]>>3]
    227 }
    228 
    229 // UnmarshalText implements encoding/text TextUnmarshaler interface
    230 func (id *ID) UnmarshalText(text []byte) error {
    231 	if len(text) != encodedLen {
    232 		return ErrInvalidID
    233 	}
    234 	for _, c := range text {
    235 		if dec[c] == 0xFF {
    236 			return ErrInvalidID
    237 		}
    238 	}
    239 	if !decode(id, text) {
    240 		*id = nilID
    241 		return ErrInvalidID
    242 	}
    243 	return nil
    244 }
    245 
    246 // UnmarshalJSON implements encoding/json Unmarshaler interface
    247 func (id *ID) UnmarshalJSON(b []byte) error {
    248 	s := string(b)
    249 	if s == "null" {
    250 		*id = nilID
    251 		return nil
    252 	}
    253 	// Check the slice length to prevent panic on passing it to UnmarshalText()
    254 	if len(b) < 2 {
    255 		return ErrInvalidID
    256 	}
    257 	return id.UnmarshalText(b[1 : len(b)-1])
    258 }
    259 
    260 // decode by unrolling the stdlib base32 algorithm + customized safe check.
    261 func decode(id *ID, src []byte) bool {
    262 	_ = src[19]
    263 	_ = id[11]
    264 
    265 	id[11] = dec[src[17]]<<6 | dec[src[18]]<<1 | dec[src[19]]>>4
    266 	// check the last byte
    267 	if encoding[(id[11]<<4)&0x1F] != src[19] {
    268 		return false
    269 	}
    270 	id[10] = dec[src[16]]<<3 | dec[src[17]]>>2
    271 	id[9] = dec[src[14]]<<5 | dec[src[15]]
    272 	id[8] = dec[src[12]]<<7 | dec[src[13]]<<2 | dec[src[14]]>>3
    273 	id[7] = dec[src[11]]<<4 | dec[src[12]]>>1
    274 	id[6] = dec[src[9]]<<6 | dec[src[10]]<<1 | dec[src[11]]>>4
    275 	id[5] = dec[src[8]]<<3 | dec[src[9]]>>2
    276 	id[4] = dec[src[6]]<<5 | dec[src[7]]
    277 	id[3] = dec[src[4]]<<7 | dec[src[5]]<<2 | dec[src[6]]>>3
    278 	id[2] = dec[src[3]]<<4 | dec[src[4]]>>1
    279 	id[1] = dec[src[1]]<<6 | dec[src[2]]<<1 | dec[src[3]]>>4
    280 	id[0] = dec[src[0]]<<3 | dec[src[1]]>>2
    281 	return true
    282 }
    283 
    284 // Time returns the timestamp part of the id.
    285 // It's a runtime error to call this method with an invalid id.
    286 func (id ID) Time() time.Time {
    287 	// First 4 bytes of ObjectId is 32-bit big-endian seconds from epoch.
    288 	secs := int64(binary.BigEndian.Uint32(id[0:4]))
    289 	return time.Unix(secs, 0)
    290 }
    291 
    292 // Machine returns the 3-byte machine id part of the id.
    293 // It's a runtime error to call this method with an invalid id.
    294 func (id ID) Machine() []byte {
    295 	return id[4:7]
    296 }
    297 
    298 // Pid returns the process id part of the id.
    299 // It's a runtime error to call this method with an invalid id.
    300 func (id ID) Pid() uint16 {
    301 	return binary.BigEndian.Uint16(id[7:9])
    302 }
    303 
    304 // Counter returns the incrementing value part of the id.
    305 // It's a runtime error to call this method with an invalid id.
    306 func (id ID) Counter() int32 {
    307 	b := id[9:12]
    308 	// Counter is stored as big-endian 3-byte value
    309 	return int32(uint32(b[0])<<16 | uint32(b[1])<<8 | uint32(b[2]))
    310 }
    311 
    312 // Value implements the driver.Valuer interface.
    313 func (id ID) Value() (driver.Value, error) {
    314 	if id.IsNil() {
    315 		return nil, nil
    316 	}
    317 	b, err := id.MarshalText()
    318 	return string(b), err
    319 }
    320 
    321 // Scan implements the sql.Scanner interface.
    322 func (id *ID) Scan(value interface{}) (err error) {
    323 	switch val := value.(type) {
    324 	case string:
    325 		return id.UnmarshalText([]byte(val))
    326 	case []byte:
    327 		return id.UnmarshalText(val)
    328 	case nil:
    329 		*id = nilID
    330 		return nil
    331 	default:
    332 		return fmt.Errorf("xid: scanning unsupported type: %T", value)
    333 	}
    334 }
    335 
    336 // IsNil Returns true if this is a "nil" ID
    337 func (id ID) IsNil() bool {
    338 	return id == nilID
    339 }
    340 
    341 // Alias of IsNil
    342 func (id ID) IsZero() bool {
    343 	return id.IsNil()
    344 }
    345 
    346 // NilID returns a zero value for `xid.ID`.
    347 func NilID() ID {
    348 	return nilID
    349 }
    350 
    351 // Bytes returns the byte array representation of `ID`
    352 func (id ID) Bytes() []byte {
    353 	return id[:]
    354 }
    355 
    356 // FromBytes convert the byte array representation of `ID` back to `ID`
    357 func FromBytes(b []byte) (ID, error) {
    358 	var id ID
    359 	if len(b) != rawLen {
    360 		return id, ErrInvalidID
    361 	}
    362 	copy(id[:], b)
    363 	return id, nil
    364 }
    365 
    366 // Compare returns an integer comparing two IDs. It behaves just like `bytes.Compare`.
    367 // The result will be 0 if two IDs are identical, -1 if current id is less than the other one,
    368 // and 1 if current id is greater than the other.
    369 func (id ID) Compare(other ID) int {
    370 	return bytes.Compare(id[:], other[:])
    371 }
    372 
    373 type sorter []ID
    374 
    375 func (s sorter) Len() int {
    376 	return len(s)
    377 }
    378 
    379 func (s sorter) Less(i, j int) bool {
    380 	return s[i].Compare(s[j]) < 0
    381 }
    382 
    383 func (s sorter) Swap(i, j int) {
    384 	s[i], s[j] = s[j], s[i]
    385 }
    386 
    387 // Sort sorts an array of IDs inplace.
    388 // It works by wrapping `[]ID` and use `sort.Sort`.
    389 func Sort(ids []ID) {
    390 	sort.Sort(sorter(ids))
    391 }