decode.go (14601B)
1 /* 2 * Copyright 2022 ByteDance Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package ast 18 19 import ( 20 `encoding/base64` 21 `runtime` 22 `strconv` 23 `unsafe` 24 25 `github.com/bytedance/sonic/internal/native/types` 26 `github.com/bytedance/sonic/internal/rt` 27 ) 28 29 const _blankCharsMask = (1 << ' ') | (1 << '\t') | (1 << '\r') | (1 << '\n') 30 31 const ( 32 bytesNull = "null" 33 bytesTrue = "true" 34 bytesFalse = "false" 35 bytesObject = "{}" 36 bytesArray = "[]" 37 ) 38 39 func isSpace(c byte) bool { 40 return (int(1<<c) & _blankCharsMask) != 0 41 } 42 43 //go:nocheckptr 44 func skipBlank(src string, pos int) int { 45 se := uintptr(rt.IndexChar(src, len(src))) 46 sp := uintptr(rt.IndexChar(src, pos)) 47 48 for sp < se { 49 if !isSpace(*(*byte)(unsafe.Pointer(sp))) { 50 break 51 } 52 sp += 1 53 } 54 if sp >= se { 55 return -int(types.ERR_EOF) 56 } 57 runtime.KeepAlive(src) 58 return int(sp - uintptr(rt.IndexChar(src, 0))) 59 } 60 61 func decodeNull(src string, pos int) (ret int) { 62 ret = pos + 4 63 if ret > len(src) { 64 return -int(types.ERR_EOF) 65 } 66 if src[pos:ret] == bytesNull { 67 return ret 68 } else { 69 return -int(types.ERR_INVALID_CHAR) 70 } 71 } 72 73 func decodeTrue(src string, pos int) (ret int) { 74 ret = pos + 4 75 if ret > len(src) { 76 return -int(types.ERR_EOF) 77 } 78 if src[pos:ret] == bytesTrue { 79 return ret 80 } else { 81 return -int(types.ERR_INVALID_CHAR) 82 } 83 84 } 85 86 func decodeFalse(src string, pos int) (ret int) { 87 ret = pos + 5 88 if ret > len(src) { 89 return -int(types.ERR_EOF) 90 } 91 if src[pos:ret] == bytesFalse { 92 return ret 93 } 94 return -int(types.ERR_INVALID_CHAR) 95 } 96 97 //go:nocheckptr 98 func decodeString(src string, pos int) (ret int, v string) { 99 ret, ep := skipString(src, pos) 100 if ep == -1 { 101 (*rt.GoString)(unsafe.Pointer(&v)).Ptr = rt.IndexChar(src, pos+1) 102 (*rt.GoString)(unsafe.Pointer(&v)).Len = ret - pos - 2 103 return ret, v 104 } 105 106 vv, ok := unquoteBytes(rt.Str2Mem(src[pos:ret])) 107 if !ok { 108 return -int(types.ERR_INVALID_CHAR), "" 109 } 110 111 runtime.KeepAlive(src) 112 return ret, rt.Mem2Str(vv) 113 } 114 115 func decodeBinary(src string, pos int) (ret int, v []byte) { 116 var vv string 117 ret, vv = decodeString(src, pos) 118 if ret < 0 { 119 return ret, nil 120 } 121 var err error 122 v, err = base64.StdEncoding.DecodeString(vv) 123 if err != nil { 124 return -int(types.ERR_INVALID_CHAR), nil 125 } 126 return ret, v 127 } 128 129 func isDigit(c byte) bool { 130 return c >= '0' && c <= '9' 131 } 132 133 //go:nocheckptr 134 func decodeInt64(src string, pos int) (ret int, v int64, err error) { 135 sp := uintptr(rt.IndexChar(src, pos)) 136 ss := uintptr(sp) 137 se := uintptr(rt.IndexChar(src, len(src))) 138 if uintptr(sp) >= se { 139 return -int(types.ERR_EOF), 0, nil 140 } 141 142 if c := *(*byte)(unsafe.Pointer(sp)); c == '-' { 143 sp += 1 144 } 145 if sp == se { 146 return -int(types.ERR_EOF), 0, nil 147 } 148 149 for ; sp < se; sp += uintptr(1) { 150 if !isDigit(*(*byte)(unsafe.Pointer(sp))) { 151 break 152 } 153 } 154 155 if sp < se { 156 if c := *(*byte)(unsafe.Pointer(sp)); c == '.' || c == 'e' || c == 'E' { 157 return -int(types.ERR_INVALID_NUMBER_FMT), 0, nil 158 } 159 } 160 161 var vv string 162 ret = int(uintptr(sp) - uintptr((*rt.GoString)(unsafe.Pointer(&src)).Ptr)) 163 (*rt.GoString)(unsafe.Pointer(&vv)).Ptr = unsafe.Pointer(ss) 164 (*rt.GoString)(unsafe.Pointer(&vv)).Len = ret - pos 165 166 v, err = strconv.ParseInt(vv, 10, 64) 167 if err != nil { 168 //NOTICE: allow overflow here 169 if err.(*strconv.NumError).Err == strconv.ErrRange { 170 return ret, 0, err 171 } 172 return -int(types.ERR_INVALID_CHAR), 0, err 173 } 174 175 runtime.KeepAlive(src) 176 return ret, v, nil 177 } 178 179 func isNumberChars(c byte) bool { 180 return (c >= '0' && c <= '9') || c == '+' || c == '-' || c == 'e' || c == 'E' || c == '.' 181 } 182 183 //go:nocheckptr 184 func decodeFloat64(src string, pos int) (ret int, v float64, err error) { 185 sp := uintptr(rt.IndexChar(src, pos)) 186 ss := uintptr(sp) 187 se := uintptr(rt.IndexChar(src, len(src))) 188 if uintptr(sp) >= se { 189 return -int(types.ERR_EOF), 0, nil 190 } 191 192 if c := *(*byte)(unsafe.Pointer(sp)); c == '-' { 193 sp += 1 194 } 195 if sp == se { 196 return -int(types.ERR_EOF), 0, nil 197 } 198 199 for ; sp < se; sp += uintptr(1) { 200 if !isNumberChars(*(*byte)(unsafe.Pointer(sp))) { 201 break 202 } 203 } 204 205 var vv string 206 ret = int(uintptr(sp) - uintptr((*rt.GoString)(unsafe.Pointer(&src)).Ptr)) 207 (*rt.GoString)(unsafe.Pointer(&vv)).Ptr = unsafe.Pointer(ss) 208 (*rt.GoString)(unsafe.Pointer(&vv)).Len = ret - pos 209 210 v, err = strconv.ParseFloat(vv, 64) 211 if err != nil { 212 //NOTICE: allow overflow here 213 if err.(*strconv.NumError).Err == strconv.ErrRange { 214 return ret, 0, err 215 } 216 return -int(types.ERR_INVALID_CHAR), 0, err 217 } 218 219 runtime.KeepAlive(src) 220 return ret, v, nil 221 } 222 223 func decodeValue(src string, pos int) (ret int, v types.JsonState) { 224 pos = skipBlank(src, pos) 225 if pos < 0 { 226 return pos, types.JsonState{Vt: types.ValueType(pos)} 227 } 228 switch c := src[pos]; c { 229 case 'n': 230 ret = decodeNull(src, pos) 231 if ret < 0 { 232 return ret, types.JsonState{Vt: types.ValueType(ret)} 233 } 234 return ret, types.JsonState{Vt: types.V_NULL} 235 case '"': 236 var ep int 237 ret, ep = skipString(src, pos) 238 if ret < 0 { 239 return ret, types.JsonState{Vt: types.ValueType(ret)} 240 } 241 return ret, types.JsonState{Vt: types.V_STRING, Iv: int64(pos + 1), Ep: ep} 242 case '{': 243 return pos + 1, types.JsonState{Vt: types.V_OBJECT} 244 case '[': 245 return pos + 1, types.JsonState{Vt: types.V_ARRAY} 246 case 't': 247 ret = decodeTrue(src, pos) 248 if ret < 0 { 249 return ret, types.JsonState{Vt: types.ValueType(ret)} 250 } 251 return ret, types.JsonState{Vt: types.V_TRUE} 252 case 'f': 253 ret = decodeFalse(src, pos) 254 if ret < 0 { 255 return ret, types.JsonState{Vt: types.ValueType(ret)} 256 } 257 return ret, types.JsonState{Vt: types.V_FALSE} 258 case '-', '+', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 259 var iv int64 260 ret, iv, _ = decodeInt64(src, pos) 261 if ret >= 0 { 262 return ret, types.JsonState{Vt: types.V_INTEGER, Iv: iv, Ep: pos} 263 } else if ret != -int(types.ERR_INVALID_NUMBER_FMT) { 264 return ret, types.JsonState{Vt: types.ValueType(ret)} 265 } 266 var fv float64 267 ret, fv, _ = decodeFloat64(src, pos) 268 if ret >= 0 { 269 return ret, types.JsonState{Vt: types.V_DOUBLE, Dv: fv, Ep: pos} 270 } else { 271 return ret, types.JsonState{Vt: types.ValueType(ret)} 272 } 273 default: 274 return -int(types.ERR_INVALID_CHAR), types.JsonState{Vt:-types.ValueType(types.ERR_INVALID_CHAR)} 275 } 276 } 277 278 //go:nocheckptr 279 func skipNumber(src string, pos int) (ret int) { 280 sp := uintptr(rt.IndexChar(src, pos)) 281 se := uintptr(rt.IndexChar(src, len(src))) 282 if uintptr(sp) >= se { 283 return -int(types.ERR_EOF) 284 } 285 286 if c := *(*byte)(unsafe.Pointer(sp)); c == '-' { 287 sp += 1 288 } 289 ss := sp 290 291 var pointer bool 292 var exponent bool 293 var lastIsDigit bool 294 var nextNeedDigit = true 295 296 for ; sp < se; sp += uintptr(1) { 297 c := *(*byte)(unsafe.Pointer(sp)) 298 if isDigit(c) { 299 lastIsDigit = true 300 nextNeedDigit = false 301 continue 302 } else if nextNeedDigit { 303 return -int(types.ERR_INVALID_CHAR) 304 } else if c == '.' { 305 if !lastIsDigit || pointer || exponent || sp == ss { 306 return -int(types.ERR_INVALID_CHAR) 307 } 308 pointer = true 309 lastIsDigit = false 310 nextNeedDigit = true 311 continue 312 } else if c == 'e' || c == 'E' { 313 if !lastIsDigit || exponent { 314 return -int(types.ERR_INVALID_CHAR) 315 } 316 if sp == se-1 { 317 return -int(types.ERR_EOF) 318 } 319 exponent = true 320 lastIsDigit = false 321 nextNeedDigit = false 322 continue 323 } else if c == '-' || c == '+' { 324 if prev := *(*byte)(unsafe.Pointer(sp - 1)); prev != 'e' && prev != 'E' { 325 return -int(types.ERR_INVALID_CHAR) 326 } 327 lastIsDigit = false 328 nextNeedDigit = true 329 continue 330 } else { 331 break 332 } 333 } 334 335 if nextNeedDigit { 336 return -int(types.ERR_EOF) 337 } 338 339 runtime.KeepAlive(src) 340 return int(uintptr(sp) - uintptr((*rt.GoString)(unsafe.Pointer(&src)).Ptr)) 341 } 342 343 //go:nocheckptr 344 func skipString(src string, pos int) (ret int, ep int) { 345 if pos+1 >= len(src) { 346 return -int(types.ERR_EOF), -1 347 } 348 349 sp := uintptr(rt.IndexChar(src, pos)) 350 se := uintptr(rt.IndexChar(src, len(src))) 351 352 // not start with quote 353 if *(*byte)(unsafe.Pointer(sp)) != '"' { 354 return -int(types.ERR_INVALID_CHAR), -1 355 } 356 sp += 1 357 358 ep = -1 359 for sp < se { 360 c := *(*byte)(unsafe.Pointer(sp)) 361 if c == '\\' { 362 if ep == -1 { 363 ep = int(uintptr(sp) - uintptr((*rt.GoString)(unsafe.Pointer(&src)).Ptr)) 364 } 365 sp += 2 366 continue 367 } 368 sp += 1 369 if c == '"' { 370 return int(uintptr(sp) - uintptr((*rt.GoString)(unsafe.Pointer(&src)).Ptr)), ep 371 } 372 } 373 374 runtime.KeepAlive(src) 375 // not found the closed quote until EOF 376 return -int(types.ERR_EOF), -1 377 } 378 379 //go:nocheckptr 380 func skipPair(src string, pos int, lchar byte, rchar byte) (ret int) { 381 if pos+1 >= len(src) { 382 return -int(types.ERR_EOF) 383 } 384 385 sp := uintptr(rt.IndexChar(src, pos)) 386 se := uintptr(rt.IndexChar(src, len(src))) 387 388 if *(*byte)(unsafe.Pointer(sp)) != lchar { 389 return -int(types.ERR_INVALID_CHAR) 390 } 391 392 sp += 1 393 nbrace := 1 394 inquote := false 395 396 for sp < se { 397 c := *(*byte)(unsafe.Pointer(sp)) 398 if c == '\\' { 399 sp += 2 400 continue 401 } else if c == '"' { 402 inquote = !inquote 403 } else if c == lchar { 404 if !inquote { 405 nbrace += 1 406 } 407 } else if c == rchar { 408 if !inquote { 409 nbrace -= 1 410 if nbrace == 0 { 411 sp += 1 412 break 413 } 414 } 415 } 416 sp += 1 417 } 418 419 if nbrace != 0 { 420 return -int(types.ERR_INVALID_CHAR) 421 } 422 423 runtime.KeepAlive(src) 424 return int(uintptr(sp) - uintptr((*rt.GoString)(unsafe.Pointer(&src)).Ptr)) 425 } 426 427 func skipValueFast(src string, pos int) (ret int, start int) { 428 pos = skipBlank(src, pos) 429 if pos < 0 { 430 return pos, -1 431 } 432 switch c := src[pos]; c { 433 case 'n': 434 ret = decodeNull(src, pos) 435 case '"': 436 ret, _ = skipString(src, pos) 437 case '{': 438 ret = skipPair(src, pos, '{', '}') 439 case '[': 440 ret = skipPair(src, pos, '[', ']') 441 case 't': 442 ret = decodeTrue(src, pos) 443 case 'f': 444 ret = decodeFalse(src, pos) 445 case '-', '+', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 446 ret = skipNumber(src, pos) 447 default: 448 ret = -int(types.ERR_INVALID_CHAR) 449 } 450 return ret, pos 451 } 452 453 func skipValue(src string, pos int) (ret int, start int) { 454 pos = skipBlank(src, pos) 455 if pos < 0 { 456 return pos, -1 457 } 458 switch c := src[pos]; c { 459 case 'n': 460 ret = decodeNull(src, pos) 461 case '"': 462 ret, _ = skipString(src, pos) 463 case '{': 464 ret, _ = skipObject(src, pos) 465 case '[': 466 ret, _ = skipArray(src, pos) 467 case 't': 468 ret = decodeTrue(src, pos) 469 case 'f': 470 ret = decodeFalse(src, pos) 471 case '-', '+', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 472 ret = skipNumber(src, pos) 473 default: 474 ret = -int(types.ERR_INVALID_CHAR) 475 } 476 return ret, pos 477 } 478 479 func skipObject(src string, pos int) (ret int, start int) { 480 start = skipBlank(src, pos) 481 if start < 0 { 482 return start, -1 483 } 484 485 if src[start] != '{' { 486 return -int(types.ERR_INVALID_CHAR), -1 487 } 488 489 pos = start + 1 490 pos = skipBlank(src, pos) 491 if pos < 0 { 492 return pos, -1 493 } 494 if src[pos] == '}' { 495 return pos + 1, start 496 } 497 498 for { 499 pos, _ = skipString(src, pos) 500 if pos < 0 { 501 return pos, -1 502 } 503 504 pos = skipBlank(src, pos) 505 if pos < 0 { 506 return pos, -1 507 } 508 if src[pos] != ':' { 509 return -int(types.ERR_INVALID_CHAR), -1 510 } 511 512 pos++ 513 pos, _ = skipValue(src, pos) 514 if pos < 0 { 515 return pos, -1 516 } 517 518 pos = skipBlank(src, pos) 519 if pos < 0 { 520 return pos, -1 521 } 522 if src[pos] == '}' { 523 return pos + 1, start 524 } 525 if src[pos] != ',' { 526 return -int(types.ERR_INVALID_CHAR), -1 527 } 528 529 pos++ 530 pos = skipBlank(src, pos) 531 if pos < 0 { 532 return pos, -1 533 } 534 535 } 536 } 537 538 func skipArray(src string, pos int) (ret int, start int) { 539 start = skipBlank(src, pos) 540 if start < 0 { 541 return start, -1 542 } 543 544 if src[start] != '[' { 545 return -int(types.ERR_INVALID_CHAR), -1 546 } 547 548 pos = start + 1 549 pos = skipBlank(src, pos) 550 if pos < 0 { 551 return pos, -1 552 } 553 if src[pos] == ']' { 554 return pos + 1, start 555 } 556 557 for { 558 pos, _ = skipValue(src, pos) 559 if pos < 0 { 560 return pos, -1 561 } 562 563 pos = skipBlank(src, pos) 564 if pos < 0 { 565 return pos, -1 566 } 567 if src[pos] == ']' { 568 return pos + 1, start 569 } 570 if src[pos] != ',' { 571 return -int(types.ERR_INVALID_CHAR), -1 572 } 573 pos++ 574 } 575 }