gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

decimal.go (11350B)


      1 // Copyright (c) 2012-2020 Ugorji Nwoke. All rights reserved.
      2 // Use of this source code is governed by a MIT license found in the LICENSE file.
      3 
      4 package codec
      5 
      6 import (
      7 	"math"
      8 	"strconv"
      9 )
     10 
     11 // Per go spec, floats are represented in memory as
     12 // IEEE single or double precision floating point values.
     13 //
     14 // We also looked at the source for stdlib math/modf.go,
     15 // reviewed https://github.com/chewxy/math32
     16 // and read wikipedia documents describing the formats.
     17 //
     18 // It became clear that we could easily look at the bits to determine
     19 // whether any fraction exists.
     20 
     21 func parseFloat32(b []byte) (f float32, err error) {
     22 	return parseFloat32_custom(b)
     23 }
     24 
     25 func parseFloat64(b []byte) (f float64, err error) {
     26 	return parseFloat64_custom(b)
     27 }
     28 
     29 func parseFloat32_strconv(b []byte) (f float32, err error) {
     30 	f64, err := strconv.ParseFloat(stringView(b), 32)
     31 	f = float32(f64)
     32 	return
     33 }
     34 
     35 func parseFloat64_strconv(b []byte) (f float64, err error) {
     36 	return strconv.ParseFloat(stringView(b), 64)
     37 }
     38 
     39 // ------ parseFloat custom below --------
     40 
     41 // JSON really supports decimal numbers in base 10 notation, with exponent support.
     42 //
     43 // We assume the following:
     44 //   - a lot of floating point numbers in json files will have defined precision
     45 //     (in terms of number of digits after decimal point), etc.
     46 //   - these (referenced above) can be written in exact format.
     47 //
     48 // strconv.ParseFloat has some unnecessary overhead which we can do without
     49 // for the common case:
     50 //
     51 //    - expensive char-by-char check to see if underscores are in right place
     52 //    - testing for and skipping underscores
     53 //    - check if the string matches ignorecase +/- inf, +/- infinity, nan
     54 //    - support for base 16 (0xFFFF...)
     55 //
     56 // The functions below will try a fast-path for floats which can be decoded
     57 // without any loss of precision, meaning they:
     58 //
     59 //    - fits within the significand bits of the 32-bits or 64-bits
     60 //    - exponent fits within the exponent value
     61 //    - there is no truncation (any extra numbers are all trailing zeros)
     62 //
     63 // To figure out what the values are for maxMantDigits, use this idea below:
     64 //
     65 // 2^23 =                 838 8608 (between 10^ 6 and 10^ 7) (significand bits of uint32)
     66 // 2^32 =             42 9496 7296 (between 10^ 9 and 10^10) (full uint32)
     67 // 2^52 =      4503 5996 2737 0496 (between 10^15 and 10^16) (significand bits of uint64)
     68 // 2^64 = 1844 6744 0737 0955 1616 (between 10^19 and 10^20) (full uint64)
     69 //
     70 // Note: we only allow for up to what can comfortably fit into the significand
     71 // ignoring the exponent, and we only try to parse iff significand fits.
     72 
     73 const (
     74 	fMaxMultiplierForExactPow10_64 = 1e15
     75 	fMaxMultiplierForExactPow10_32 = 1e7
     76 
     77 	fUint64Cutoff = (1<<64-1)/10 + 1
     78 	// fUint32Cutoff = (1<<32-1)/10 + 1
     79 
     80 	fBase = 10
     81 )
     82 
     83 const (
     84 	thousand    = 1000
     85 	million     = thousand * thousand
     86 	billion     = thousand * million
     87 	trillion    = thousand * billion
     88 	quadrillion = thousand * trillion
     89 	quintillion = thousand * quadrillion
     90 )
     91 
     92 // Exact powers of 10.
     93 var uint64pow10 = [...]uint64{
     94 	1, 10, 100,
     95 	1 * thousand, 10 * thousand, 100 * thousand,
     96 	1 * million, 10 * million, 100 * million,
     97 	1 * billion, 10 * billion, 100 * billion,
     98 	1 * trillion, 10 * trillion, 100 * trillion,
     99 	1 * quadrillion, 10 * quadrillion, 100 * quadrillion,
    100 	1 * quintillion, 10 * quintillion,
    101 }
    102 var float64pow10 = [...]float64{
    103 	1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
    104 	1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
    105 	1e20, 1e21, 1e22,
    106 }
    107 var float32pow10 = [...]float32{
    108 	1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10,
    109 }
    110 
    111 type floatinfo struct {
    112 	mantbits uint8
    113 
    114 	// expbits uint8 // (unused)
    115 	// bias    int16 // (unused)
    116 	// is32bit bool // (unused)
    117 
    118 	exactPow10 int8 // Exact powers of ten are <= 10^N (32: 10, 64: 22)
    119 
    120 	exactInts int8 // Exact integers are <= 10^N (for non-float, set to 0)
    121 
    122 	// maxMantDigits int8 // 10^19 fits in uint64, while 10^9 fits in uint32
    123 
    124 	mantCutoffIsUint64Cutoff bool
    125 
    126 	mantCutoff uint64
    127 }
    128 
    129 var fi32 = floatinfo{23, 10, 7, false, 1<<23 - 1}
    130 var fi64 = floatinfo{52, 22, 15, false, 1<<52 - 1}
    131 
    132 var fi64u = floatinfo{0, 19, 0, true, fUint64Cutoff}
    133 
    134 func noFrac64(fbits uint64) bool {
    135 	if fbits == 0 {
    136 		return true
    137 	}
    138 
    139 	exp := uint64(fbits>>52)&0x7FF - 1023 // uint(x>>shift)&mask - bias
    140 	// clear top 12+e bits, the integer part; if the rest is 0, then no fraction.
    141 	return exp < 52 && fbits<<(12+exp) == 0 // means there's no fractional part
    142 }
    143 
    144 func noFrac32(fbits uint32) bool {
    145 	if fbits == 0 {
    146 		return true
    147 	}
    148 
    149 	exp := uint32(fbits>>23)&0xFF - 127 // uint(x>>shift)&mask - bias
    150 	// clear top 9+e bits, the integer part; if the rest is 0, then no fraction.
    151 	return exp < 23 && fbits<<(9+exp) == 0 // means there's no fractional part
    152 }
    153 
    154 func strconvParseErr(b []byte, fn string) error {
    155 	return &strconv.NumError{
    156 		Func: fn,
    157 		Err:  strconv.ErrSyntax,
    158 		Num:  string(b),
    159 	}
    160 }
    161 
    162 func parseFloat32_reader(r readFloatResult) (f float32, fail bool) {
    163 	f = float32(r.mantissa)
    164 	if r.exp == 0 {
    165 	} else if r.exp < 0 { // int / 10^k
    166 		f /= float32pow10[uint8(-r.exp)]
    167 	} else { // exp > 0
    168 		if r.exp > fi32.exactPow10 {
    169 			f *= float32pow10[r.exp-fi32.exactPow10]
    170 			if f > fMaxMultiplierForExactPow10_32 { // exponent too large - outside range
    171 				fail = true
    172 				return // ok = false
    173 			}
    174 			f *= float32pow10[fi32.exactPow10]
    175 		} else {
    176 			f *= float32pow10[uint8(r.exp)]
    177 		}
    178 	}
    179 	if r.neg {
    180 		f = -f
    181 	}
    182 	return
    183 }
    184 
    185 func parseFloat32_custom(b []byte) (f float32, err error) {
    186 	r := readFloat(b, fi32)
    187 	if r.bad {
    188 		return 0, strconvParseErr(b, "ParseFloat")
    189 	}
    190 	if r.ok {
    191 		f, r.bad = parseFloat32_reader(r)
    192 		if !r.bad {
    193 			return
    194 		}
    195 	}
    196 	return parseFloat32_strconv(b)
    197 }
    198 
    199 func parseFloat64_reader(r readFloatResult) (f float64, fail bool) {
    200 	f = float64(r.mantissa)
    201 	if r.exp == 0 {
    202 	} else if r.exp < 0 { // int / 10^k
    203 		f /= float64pow10[-uint8(r.exp)]
    204 	} else { // exp > 0
    205 		if r.exp > fi64.exactPow10 {
    206 			f *= float64pow10[r.exp-fi64.exactPow10]
    207 			if f > fMaxMultiplierForExactPow10_64 { // exponent too large - outside range
    208 				fail = true
    209 				return
    210 			}
    211 			f *= float64pow10[fi64.exactPow10]
    212 		} else {
    213 			f *= float64pow10[uint8(r.exp)]
    214 		}
    215 	}
    216 	if r.neg {
    217 		f = -f
    218 	}
    219 	return
    220 }
    221 
    222 func parseFloat64_custom(b []byte) (f float64, err error) {
    223 	r := readFloat(b, fi64)
    224 	if r.bad {
    225 		return 0, strconvParseErr(b, "ParseFloat")
    226 	}
    227 	if r.ok {
    228 		f, r.bad = parseFloat64_reader(r)
    229 		if !r.bad {
    230 			return
    231 		}
    232 	}
    233 	return parseFloat64_strconv(b)
    234 }
    235 
    236 func parseUint64_simple(b []byte) (n uint64, ok bool) {
    237 	var i int
    238 	var n1 uint64
    239 	var c uint8
    240 LOOP:
    241 	if i < len(b) {
    242 		c = b[i]
    243 		// unsigned integers don't overflow well on multiplication, so check cutoff here
    244 		// e.g. (maxUint64-5)*10 doesn't overflow well ...
    245 		// if n >= fUint64Cutoff || !isDigitChar(b[i]) { // if c < '0' || c > '9' {
    246 		if n >= fUint64Cutoff || c < '0' || c > '9' {
    247 			return
    248 		} else if c == '0' {
    249 			n *= fBase
    250 		} else {
    251 			n1 = n
    252 			n = n*fBase + uint64(c-'0')
    253 			if n < n1 {
    254 				return
    255 			}
    256 		}
    257 		i++
    258 		goto LOOP
    259 	}
    260 	ok = true
    261 	return
    262 }
    263 
    264 func parseUint64_reader(r readFloatResult) (f uint64, fail bool) {
    265 	f = r.mantissa
    266 	if r.exp == 0 {
    267 	} else if r.exp < 0 { // int / 10^k
    268 		if f%uint64pow10[uint8(-r.exp)] != 0 {
    269 			fail = true
    270 		} else {
    271 			f /= uint64pow10[uint8(-r.exp)]
    272 		}
    273 	} else { // exp > 0
    274 		f *= uint64pow10[uint8(r.exp)]
    275 	}
    276 	return
    277 }
    278 
    279 func parseInteger_bytes(b []byte) (u uint64, neg, ok bool) {
    280 	if len(b) == 0 {
    281 		ok = true
    282 		return
    283 	}
    284 	if b[0] == '-' {
    285 		if len(b) == 1 {
    286 			return
    287 		}
    288 		neg = true
    289 		b = b[1:]
    290 	}
    291 
    292 	u, ok = parseUint64_simple(b)
    293 	if ok {
    294 		return
    295 	}
    296 
    297 	r := readFloat(b, fi64u)
    298 	if r.ok {
    299 		var fail bool
    300 		u, fail = parseUint64_reader(r)
    301 		if fail {
    302 			f, err := parseFloat64(b)
    303 			if err != nil {
    304 				return
    305 			}
    306 			if !noFrac64(math.Float64bits(f)) {
    307 				return
    308 			}
    309 			u = uint64(f)
    310 		}
    311 		ok = true
    312 		return
    313 	}
    314 	return
    315 }
    316 
    317 // parseNumber will return an integer if only composed of [-]?[0-9]+
    318 // Else it will return a float.
    319 func parseNumber(b []byte, z *fauxUnion, preferSignedInt bool) (err error) {
    320 	var ok, neg bool
    321 	var f uint64
    322 
    323 	if len(b) == 0 {
    324 		return
    325 	}
    326 
    327 	if b[0] == '-' {
    328 		neg = true
    329 		f, ok = parseUint64_simple(b[1:])
    330 	} else {
    331 		f, ok = parseUint64_simple(b)
    332 	}
    333 
    334 	if ok {
    335 		if neg {
    336 			z.v = valueTypeInt
    337 			if chkOvf.Uint2Int(f, neg) {
    338 				return strconvParseErr(b, "ParseInt")
    339 			}
    340 			z.i = -int64(f)
    341 		} else if preferSignedInt {
    342 			z.v = valueTypeInt
    343 			if chkOvf.Uint2Int(f, neg) {
    344 				return strconvParseErr(b, "ParseInt")
    345 			}
    346 			z.i = int64(f)
    347 		} else {
    348 			z.v = valueTypeUint
    349 			z.u = f
    350 		}
    351 		return
    352 	}
    353 
    354 	z.v = valueTypeFloat
    355 	z.f, err = parseFloat64_custom(b)
    356 	return
    357 }
    358 
    359 type readFloatResult struct {
    360 	mantissa uint64
    361 	exp      int8
    362 	neg      bool
    363 	trunc    bool
    364 	bad      bool // bad decimal string
    365 	hardexp  bool // exponent is hard to handle (> 2 digits, etc)
    366 	ok       bool
    367 	// sawdot   bool
    368 	// sawexp   bool
    369 	//_ [2]bool // padding
    370 }
    371 
    372 func readFloat(s []byte, y floatinfo) (r readFloatResult) {
    373 	var i uint // uint, so that we eliminate bounds checking
    374 	var slen = uint(len(s))
    375 	if slen == 0 {
    376 		// read an empty string as the zero value
    377 		// r.bad = true
    378 		r.ok = true
    379 		return
    380 	}
    381 
    382 	if s[0] == '-' {
    383 		r.neg = true
    384 		i++
    385 	}
    386 
    387 	// we considered punting early if string has length > maxMantDigits, but this doesn't account
    388 	// for trailing 0's e.g. 700000000000000000000 can be encoded exactly as it is 7e20
    389 
    390 	var nd, ndMant, dp int8
    391 	var sawdot, sawexp bool
    392 	var xu uint64
    393 
    394 LOOP:
    395 	for ; i < slen; i++ {
    396 		switch s[i] {
    397 		case '.':
    398 			if sawdot {
    399 				r.bad = true
    400 				return
    401 			}
    402 			sawdot = true
    403 			dp = nd
    404 		case 'e', 'E':
    405 			sawexp = true
    406 			break LOOP
    407 		case '0':
    408 			if nd == 0 {
    409 				dp--
    410 				continue LOOP
    411 			}
    412 			nd++
    413 			if r.mantissa < y.mantCutoff {
    414 				r.mantissa *= fBase
    415 				ndMant++
    416 			}
    417 		case '1', '2', '3', '4', '5', '6', '7', '8', '9':
    418 			nd++
    419 			if y.mantCutoffIsUint64Cutoff && r.mantissa < fUint64Cutoff {
    420 				r.mantissa *= fBase
    421 				xu = r.mantissa + uint64(s[i]-'0')
    422 				if xu < r.mantissa {
    423 					r.trunc = true
    424 					return
    425 				}
    426 				r.mantissa = xu
    427 			} else if r.mantissa < y.mantCutoff {
    428 				// mantissa = (mantissa << 1) + (mantissa << 3) + uint64(c-'0')
    429 				r.mantissa = r.mantissa*fBase + uint64(s[i]-'0')
    430 			} else {
    431 				r.trunc = true
    432 				return
    433 			}
    434 			ndMant++
    435 		default:
    436 			r.bad = true
    437 			return
    438 		}
    439 	}
    440 
    441 	if !sawdot {
    442 		dp = nd
    443 	}
    444 
    445 	if sawexp {
    446 		i++
    447 		if i < slen {
    448 			var eneg bool
    449 			if s[i] == '+' {
    450 				i++
    451 			} else if s[i] == '-' {
    452 				i++
    453 				eneg = true
    454 			}
    455 			if i < slen {
    456 				// for exact match, exponent is 1 or 2 digits (float64: -22 to 37, float32: -1 to 17).
    457 				// exit quick if exponent is more than 2 digits.
    458 				if i+2 < slen {
    459 					r.hardexp = true
    460 					return
    461 				}
    462 				var e int8
    463 				if s[i] < '0' || s[i] > '9' { // !isDigitChar(s[i]) { //
    464 					r.bad = true
    465 					return
    466 				}
    467 				e = int8(s[i] - '0')
    468 				i++
    469 				if i < slen {
    470 					if s[i] < '0' || s[i] > '9' { // !isDigitChar(s[i]) { //
    471 						r.bad = true
    472 						return
    473 					}
    474 					e = e*fBase + int8(s[i]-'0') // (e << 1) + (e << 3) + int8(s[i]-'0')
    475 					i++
    476 				}
    477 				if eneg {
    478 					dp -= e
    479 				} else {
    480 					dp += e
    481 				}
    482 			}
    483 		}
    484 	}
    485 
    486 	if r.mantissa != 0 {
    487 		r.exp = dp - ndMant
    488 		// do not set ok=true for cases we cannot handle
    489 		if r.exp < -y.exactPow10 ||
    490 			r.exp > y.exactInts+y.exactPow10 ||
    491 			(y.mantbits != 0 && r.mantissa>>y.mantbits != 0) {
    492 			r.hardexp = true
    493 			return
    494 		}
    495 	}
    496 
    497 	r.ok = true
    498 	return
    499 }