decimal.go (11350B)
1 // Copyright (c) 2012-2020 Ugorji Nwoke. All rights reserved. 2 // Use of this source code is governed by a MIT license found in the LICENSE file. 3 4 package codec 5 6 import ( 7 "math" 8 "strconv" 9 ) 10 11 // Per go spec, floats are represented in memory as 12 // IEEE single or double precision floating point values. 13 // 14 // We also looked at the source for stdlib math/modf.go, 15 // reviewed https://github.com/chewxy/math32 16 // and read wikipedia documents describing the formats. 17 // 18 // It became clear that we could easily look at the bits to determine 19 // whether any fraction exists. 20 21 func parseFloat32(b []byte) (f float32, err error) { 22 return parseFloat32_custom(b) 23 } 24 25 func parseFloat64(b []byte) (f float64, err error) { 26 return parseFloat64_custom(b) 27 } 28 29 func parseFloat32_strconv(b []byte) (f float32, err error) { 30 f64, err := strconv.ParseFloat(stringView(b), 32) 31 f = float32(f64) 32 return 33 } 34 35 func parseFloat64_strconv(b []byte) (f float64, err error) { 36 return strconv.ParseFloat(stringView(b), 64) 37 } 38 39 // ------ parseFloat custom below -------- 40 41 // JSON really supports decimal numbers in base 10 notation, with exponent support. 42 // 43 // We assume the following: 44 // - a lot of floating point numbers in json files will have defined precision 45 // (in terms of number of digits after decimal point), etc. 46 // - these (referenced above) can be written in exact format. 47 // 48 // strconv.ParseFloat has some unnecessary overhead which we can do without 49 // for the common case: 50 // 51 // - expensive char-by-char check to see if underscores are in right place 52 // - testing for and skipping underscores 53 // - check if the string matches ignorecase +/- inf, +/- infinity, nan 54 // - support for base 16 (0xFFFF...) 55 // 56 // The functions below will try a fast-path for floats which can be decoded 57 // without any loss of precision, meaning they: 58 // 59 // - fits within the significand bits of the 32-bits or 64-bits 60 // - exponent fits within the exponent value 61 // - there is no truncation (any extra numbers are all trailing zeros) 62 // 63 // To figure out what the values are for maxMantDigits, use this idea below: 64 // 65 // 2^23 = 838 8608 (between 10^ 6 and 10^ 7) (significand bits of uint32) 66 // 2^32 = 42 9496 7296 (between 10^ 9 and 10^10) (full uint32) 67 // 2^52 = 4503 5996 2737 0496 (between 10^15 and 10^16) (significand bits of uint64) 68 // 2^64 = 1844 6744 0737 0955 1616 (between 10^19 and 10^20) (full uint64) 69 // 70 // Note: we only allow for up to what can comfortably fit into the significand 71 // ignoring the exponent, and we only try to parse iff significand fits. 72 73 const ( 74 fMaxMultiplierForExactPow10_64 = 1e15 75 fMaxMultiplierForExactPow10_32 = 1e7 76 77 fUint64Cutoff = (1<<64-1)/10 + 1 78 // fUint32Cutoff = (1<<32-1)/10 + 1 79 80 fBase = 10 81 ) 82 83 const ( 84 thousand = 1000 85 million = thousand * thousand 86 billion = thousand * million 87 trillion = thousand * billion 88 quadrillion = thousand * trillion 89 quintillion = thousand * quadrillion 90 ) 91 92 // Exact powers of 10. 93 var uint64pow10 = [...]uint64{ 94 1, 10, 100, 95 1 * thousand, 10 * thousand, 100 * thousand, 96 1 * million, 10 * million, 100 * million, 97 1 * billion, 10 * billion, 100 * billion, 98 1 * trillion, 10 * trillion, 100 * trillion, 99 1 * quadrillion, 10 * quadrillion, 100 * quadrillion, 100 1 * quintillion, 10 * quintillion, 101 } 102 var float64pow10 = [...]float64{ 103 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 104 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 105 1e20, 1e21, 1e22, 106 } 107 var float32pow10 = [...]float32{ 108 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 109 } 110 111 type floatinfo struct { 112 mantbits uint8 113 114 // expbits uint8 // (unused) 115 // bias int16 // (unused) 116 // is32bit bool // (unused) 117 118 exactPow10 int8 // Exact powers of ten are <= 10^N (32: 10, 64: 22) 119 120 exactInts int8 // Exact integers are <= 10^N (for non-float, set to 0) 121 122 // maxMantDigits int8 // 10^19 fits in uint64, while 10^9 fits in uint32 123 124 mantCutoffIsUint64Cutoff bool 125 126 mantCutoff uint64 127 } 128 129 var fi32 = floatinfo{23, 10, 7, false, 1<<23 - 1} 130 var fi64 = floatinfo{52, 22, 15, false, 1<<52 - 1} 131 132 var fi64u = floatinfo{0, 19, 0, true, fUint64Cutoff} 133 134 func noFrac64(fbits uint64) bool { 135 if fbits == 0 { 136 return true 137 } 138 139 exp := uint64(fbits>>52)&0x7FF - 1023 // uint(x>>shift)&mask - bias 140 // clear top 12+e bits, the integer part; if the rest is 0, then no fraction. 141 return exp < 52 && fbits<<(12+exp) == 0 // means there's no fractional part 142 } 143 144 func noFrac32(fbits uint32) bool { 145 if fbits == 0 { 146 return true 147 } 148 149 exp := uint32(fbits>>23)&0xFF - 127 // uint(x>>shift)&mask - bias 150 // clear top 9+e bits, the integer part; if the rest is 0, then no fraction. 151 return exp < 23 && fbits<<(9+exp) == 0 // means there's no fractional part 152 } 153 154 func strconvParseErr(b []byte, fn string) error { 155 return &strconv.NumError{ 156 Func: fn, 157 Err: strconv.ErrSyntax, 158 Num: string(b), 159 } 160 } 161 162 func parseFloat32_reader(r readFloatResult) (f float32, fail bool) { 163 f = float32(r.mantissa) 164 if r.exp == 0 { 165 } else if r.exp < 0 { // int / 10^k 166 f /= float32pow10[uint8(-r.exp)] 167 } else { // exp > 0 168 if r.exp > fi32.exactPow10 { 169 f *= float32pow10[r.exp-fi32.exactPow10] 170 if f > fMaxMultiplierForExactPow10_32 { // exponent too large - outside range 171 fail = true 172 return // ok = false 173 } 174 f *= float32pow10[fi32.exactPow10] 175 } else { 176 f *= float32pow10[uint8(r.exp)] 177 } 178 } 179 if r.neg { 180 f = -f 181 } 182 return 183 } 184 185 func parseFloat32_custom(b []byte) (f float32, err error) { 186 r := readFloat(b, fi32) 187 if r.bad { 188 return 0, strconvParseErr(b, "ParseFloat") 189 } 190 if r.ok { 191 f, r.bad = parseFloat32_reader(r) 192 if !r.bad { 193 return 194 } 195 } 196 return parseFloat32_strconv(b) 197 } 198 199 func parseFloat64_reader(r readFloatResult) (f float64, fail bool) { 200 f = float64(r.mantissa) 201 if r.exp == 0 { 202 } else if r.exp < 0 { // int / 10^k 203 f /= float64pow10[-uint8(r.exp)] 204 } else { // exp > 0 205 if r.exp > fi64.exactPow10 { 206 f *= float64pow10[r.exp-fi64.exactPow10] 207 if f > fMaxMultiplierForExactPow10_64 { // exponent too large - outside range 208 fail = true 209 return 210 } 211 f *= float64pow10[fi64.exactPow10] 212 } else { 213 f *= float64pow10[uint8(r.exp)] 214 } 215 } 216 if r.neg { 217 f = -f 218 } 219 return 220 } 221 222 func parseFloat64_custom(b []byte) (f float64, err error) { 223 r := readFloat(b, fi64) 224 if r.bad { 225 return 0, strconvParseErr(b, "ParseFloat") 226 } 227 if r.ok { 228 f, r.bad = parseFloat64_reader(r) 229 if !r.bad { 230 return 231 } 232 } 233 return parseFloat64_strconv(b) 234 } 235 236 func parseUint64_simple(b []byte) (n uint64, ok bool) { 237 var i int 238 var n1 uint64 239 var c uint8 240 LOOP: 241 if i < len(b) { 242 c = b[i] 243 // unsigned integers don't overflow well on multiplication, so check cutoff here 244 // e.g. (maxUint64-5)*10 doesn't overflow well ... 245 // if n >= fUint64Cutoff || !isDigitChar(b[i]) { // if c < '0' || c > '9' { 246 if n >= fUint64Cutoff || c < '0' || c > '9' { 247 return 248 } else if c == '0' { 249 n *= fBase 250 } else { 251 n1 = n 252 n = n*fBase + uint64(c-'0') 253 if n < n1 { 254 return 255 } 256 } 257 i++ 258 goto LOOP 259 } 260 ok = true 261 return 262 } 263 264 func parseUint64_reader(r readFloatResult) (f uint64, fail bool) { 265 f = r.mantissa 266 if r.exp == 0 { 267 } else if r.exp < 0 { // int / 10^k 268 if f%uint64pow10[uint8(-r.exp)] != 0 { 269 fail = true 270 } else { 271 f /= uint64pow10[uint8(-r.exp)] 272 } 273 } else { // exp > 0 274 f *= uint64pow10[uint8(r.exp)] 275 } 276 return 277 } 278 279 func parseInteger_bytes(b []byte) (u uint64, neg, ok bool) { 280 if len(b) == 0 { 281 ok = true 282 return 283 } 284 if b[0] == '-' { 285 if len(b) == 1 { 286 return 287 } 288 neg = true 289 b = b[1:] 290 } 291 292 u, ok = parseUint64_simple(b) 293 if ok { 294 return 295 } 296 297 r := readFloat(b, fi64u) 298 if r.ok { 299 var fail bool 300 u, fail = parseUint64_reader(r) 301 if fail { 302 f, err := parseFloat64(b) 303 if err != nil { 304 return 305 } 306 if !noFrac64(math.Float64bits(f)) { 307 return 308 } 309 u = uint64(f) 310 } 311 ok = true 312 return 313 } 314 return 315 } 316 317 // parseNumber will return an integer if only composed of [-]?[0-9]+ 318 // Else it will return a float. 319 func parseNumber(b []byte, z *fauxUnion, preferSignedInt bool) (err error) { 320 var ok, neg bool 321 var f uint64 322 323 if len(b) == 0 { 324 return 325 } 326 327 if b[0] == '-' { 328 neg = true 329 f, ok = parseUint64_simple(b[1:]) 330 } else { 331 f, ok = parseUint64_simple(b) 332 } 333 334 if ok { 335 if neg { 336 z.v = valueTypeInt 337 if chkOvf.Uint2Int(f, neg) { 338 return strconvParseErr(b, "ParseInt") 339 } 340 z.i = -int64(f) 341 } else if preferSignedInt { 342 z.v = valueTypeInt 343 if chkOvf.Uint2Int(f, neg) { 344 return strconvParseErr(b, "ParseInt") 345 } 346 z.i = int64(f) 347 } else { 348 z.v = valueTypeUint 349 z.u = f 350 } 351 return 352 } 353 354 z.v = valueTypeFloat 355 z.f, err = parseFloat64_custom(b) 356 return 357 } 358 359 type readFloatResult struct { 360 mantissa uint64 361 exp int8 362 neg bool 363 trunc bool 364 bad bool // bad decimal string 365 hardexp bool // exponent is hard to handle (> 2 digits, etc) 366 ok bool 367 // sawdot bool 368 // sawexp bool 369 //_ [2]bool // padding 370 } 371 372 func readFloat(s []byte, y floatinfo) (r readFloatResult) { 373 var i uint // uint, so that we eliminate bounds checking 374 var slen = uint(len(s)) 375 if slen == 0 { 376 // read an empty string as the zero value 377 // r.bad = true 378 r.ok = true 379 return 380 } 381 382 if s[0] == '-' { 383 r.neg = true 384 i++ 385 } 386 387 // we considered punting early if string has length > maxMantDigits, but this doesn't account 388 // for trailing 0's e.g. 700000000000000000000 can be encoded exactly as it is 7e20 389 390 var nd, ndMant, dp int8 391 var sawdot, sawexp bool 392 var xu uint64 393 394 LOOP: 395 for ; i < slen; i++ { 396 switch s[i] { 397 case '.': 398 if sawdot { 399 r.bad = true 400 return 401 } 402 sawdot = true 403 dp = nd 404 case 'e', 'E': 405 sawexp = true 406 break LOOP 407 case '0': 408 if nd == 0 { 409 dp-- 410 continue LOOP 411 } 412 nd++ 413 if r.mantissa < y.mantCutoff { 414 r.mantissa *= fBase 415 ndMant++ 416 } 417 case '1', '2', '3', '4', '5', '6', '7', '8', '9': 418 nd++ 419 if y.mantCutoffIsUint64Cutoff && r.mantissa < fUint64Cutoff { 420 r.mantissa *= fBase 421 xu = r.mantissa + uint64(s[i]-'0') 422 if xu < r.mantissa { 423 r.trunc = true 424 return 425 } 426 r.mantissa = xu 427 } else if r.mantissa < y.mantCutoff { 428 // mantissa = (mantissa << 1) + (mantissa << 3) + uint64(c-'0') 429 r.mantissa = r.mantissa*fBase + uint64(s[i]-'0') 430 } else { 431 r.trunc = true 432 return 433 } 434 ndMant++ 435 default: 436 r.bad = true 437 return 438 } 439 } 440 441 if !sawdot { 442 dp = nd 443 } 444 445 if sawexp { 446 i++ 447 if i < slen { 448 var eneg bool 449 if s[i] == '+' { 450 i++ 451 } else if s[i] == '-' { 452 i++ 453 eneg = true 454 } 455 if i < slen { 456 // for exact match, exponent is 1 or 2 digits (float64: -22 to 37, float32: -1 to 17). 457 // exit quick if exponent is more than 2 digits. 458 if i+2 < slen { 459 r.hardexp = true 460 return 461 } 462 var e int8 463 if s[i] < '0' || s[i] > '9' { // !isDigitChar(s[i]) { // 464 r.bad = true 465 return 466 } 467 e = int8(s[i] - '0') 468 i++ 469 if i < slen { 470 if s[i] < '0' || s[i] > '9' { // !isDigitChar(s[i]) { // 471 r.bad = true 472 return 473 } 474 e = e*fBase + int8(s[i]-'0') // (e << 1) + (e << 3) + int8(s[i]-'0') 475 i++ 476 } 477 if eneg { 478 dp -= e 479 } else { 480 dp += e 481 } 482 } 483 } 484 } 485 486 if r.mantissa != 0 { 487 r.exp = dp - ndMant 488 // do not set ok=true for cases we cannot handle 489 if r.exp < -y.exactPow10 || 490 r.exp > y.exactInts+y.exactPow10 || 491 (y.mantbits != 0 && r.mantissa>>y.mantbits != 0) { 492 r.hardexp = true 493 return 494 } 495 } 496 497 r.ok = true 498 return 499 }