parser.go (15377B)
1 /* 2 * Copyright 2021 ByteDance Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package ast 18 19 import ( 20 `fmt` 21 `github.com/bytedance/sonic/internal/native/types` 22 `github.com/bytedance/sonic/internal/rt` 23 ) 24 25 const _DEFAULT_NODE_CAP int = 16 26 27 const ( 28 _ERR_NOT_FOUND types.ParsingError = 33 29 _ERR_UNSUPPORT_TYPE types.ParsingError = 34 30 ) 31 32 var ( 33 ErrNotExist error = newError(_ERR_NOT_FOUND, "value not exists") 34 ErrUnsupportType error = newError(_ERR_UNSUPPORT_TYPE, "unsupported type") 35 ) 36 37 type Parser struct { 38 p int 39 s string 40 noLazy bool 41 skipValue bool 42 } 43 44 /** Parser Private Methods **/ 45 46 func (self *Parser) delim() types.ParsingError { 47 n := len(self.s) 48 p := self.lspace(self.p) 49 50 /* check for EOF */ 51 if p >= n { 52 return types.ERR_EOF 53 } 54 55 /* check for the delimtier */ 56 if self.s[p] != ':' { 57 return types.ERR_INVALID_CHAR 58 } 59 60 /* update the read pointer */ 61 self.p = p + 1 62 return 0 63 } 64 65 func (self *Parser) object() types.ParsingError { 66 n := len(self.s) 67 p := self.lspace(self.p) 68 69 /* check for EOF */ 70 if p >= n { 71 return types.ERR_EOF 72 } 73 74 /* check for the delimtier */ 75 if self.s[p] != '{' { 76 return types.ERR_INVALID_CHAR 77 } 78 79 /* update the read pointer */ 80 self.p = p + 1 81 return 0 82 } 83 84 func (self *Parser) array() types.ParsingError { 85 n := len(self.s) 86 p := self.lspace(self.p) 87 88 /* check for EOF */ 89 if p >= n { 90 return types.ERR_EOF 91 } 92 93 /* check for the delimtier */ 94 if self.s[p] != '[' { 95 return types.ERR_INVALID_CHAR 96 } 97 98 /* update the read pointer */ 99 self.p = p + 1 100 return 0 101 } 102 103 func (self *Parser) lspace(sp int) int { 104 ns := len(self.s) 105 for ; sp<ns && isSpace(self.s[sp]); sp+=1 {} 106 107 return sp 108 } 109 110 func (self *Parser) decodeArray(ret []Node) (Node, types.ParsingError) { 111 sp := self.p 112 ns := len(self.s) 113 114 /* check for EOF */ 115 if self.p = self.lspace(sp); self.p >= ns { 116 return Node{}, types.ERR_EOF 117 } 118 119 /* check for empty array */ 120 if self.s[self.p] == ']' { 121 self.p++ 122 return emptyArrayNode, 0 123 } 124 125 /* allocate array space and parse every element */ 126 for { 127 var val Node 128 var err types.ParsingError 129 130 if self.skipValue { 131 /* skip the value */ 132 var start int 133 if start, err = self.skipFast(); err != 0 { 134 return Node{}, err 135 } 136 if self.p > ns { 137 return Node{}, types.ERR_EOF 138 } 139 t := switchRawType(self.s[start]) 140 if t == _V_NONE { 141 return Node{}, types.ERR_INVALID_CHAR 142 } 143 val = newRawNode(self.s[start:self.p], t) 144 }else{ 145 /* decode the value */ 146 if val, err = self.Parse(); err != 0 { 147 return Node{}, err 148 } 149 } 150 151 /* add the value to result */ 152 ret = append(ret, val) 153 self.p = self.lspace(self.p) 154 155 /* check for EOF */ 156 if self.p >= ns { 157 return Node{}, types.ERR_EOF 158 } 159 160 /* check for the next character */ 161 switch self.s[self.p] { 162 case ',' : self.p++ 163 case ']' : self.p++; return NewArray(ret), 0 164 default: 165 if val.isLazy() { 166 return newLazyArray(self, ret), 0 167 } 168 return Node{}, types.ERR_INVALID_CHAR 169 } 170 } 171 } 172 173 func (self *Parser) decodeObject(ret []Pair) (Node, types.ParsingError) { 174 sp := self.p 175 ns := len(self.s) 176 177 /* check for EOF */ 178 if self.p = self.lspace(sp); self.p >= ns { 179 return Node{}, types.ERR_EOF 180 } 181 182 /* check for empty object */ 183 if self.s[self.p] == '}' { 184 self.p++ 185 return emptyObjectNode, 0 186 } 187 188 /* decode each pair */ 189 for { 190 var val Node 191 var njs types.JsonState 192 var err types.ParsingError 193 194 /* decode the key */ 195 if njs = self.decodeValue(); njs.Vt != types.V_STRING { 196 return Node{}, types.ERR_INVALID_CHAR 197 } 198 199 /* extract the key */ 200 idx := self.p - 1 201 key := self.s[njs.Iv:idx] 202 203 /* check for escape sequence */ 204 if njs.Ep != -1 { 205 if key, err = unquote(key); err != 0 { 206 return Node{}, err 207 } 208 } 209 210 /* expect a ':' delimiter */ 211 if err = self.delim(); err != 0 { 212 return Node{}, err 213 } 214 215 216 if self.skipValue { 217 /* skip the value */ 218 var start int 219 if start, err = self.skipFast(); err != 0 { 220 return Node{}, err 221 } 222 if self.p > ns { 223 return Node{}, types.ERR_EOF 224 } 225 t := switchRawType(self.s[start]) 226 if t == _V_NONE { 227 return Node{}, types.ERR_INVALID_CHAR 228 } 229 val = newRawNode(self.s[start:self.p], t) 230 } else { 231 /* decode the value */ 232 if val, err = self.Parse(); err != 0 { 233 return Node{}, err 234 } 235 } 236 237 /* add the value to result */ 238 ret = append(ret, Pair{Key: key, Value: val}) 239 self.p = self.lspace(self.p) 240 241 /* check for EOF */ 242 if self.p >= ns { 243 return Node{}, types.ERR_EOF 244 } 245 246 /* check for the next character */ 247 switch self.s[self.p] { 248 case ',' : self.p++ 249 case '}' : self.p++; return NewObject(ret), 0 250 default: 251 if val.isLazy() { 252 return newLazyObject(self, ret), 0 253 } 254 return Node{}, types.ERR_INVALID_CHAR 255 } 256 } 257 } 258 259 func (self *Parser) decodeString(iv int64, ep int) (Node, types.ParsingError) { 260 p := self.p - 1 261 s := self.s[iv:p] 262 263 /* fast path: no escape sequence */ 264 if ep == -1 { 265 return NewString(s), 0 266 } 267 268 /* unquote the string */ 269 out, err := unquote(s) 270 271 /* check for errors */ 272 if err != 0 { 273 return Node{}, err 274 } else { 275 return newBytes(rt.Str2Mem(out)), 0 276 } 277 } 278 279 /** Parser Interface **/ 280 281 func (self *Parser) Pos() int { 282 return self.p 283 } 284 285 func (self *Parser) Parse() (Node, types.ParsingError) { 286 switch val := self.decodeValue(); val.Vt { 287 case types.V_EOF : return Node{}, types.ERR_EOF 288 case types.V_NULL : return nullNode, 0 289 case types.V_TRUE : return trueNode, 0 290 case types.V_FALSE : return falseNode, 0 291 case types.V_STRING : return self.decodeString(val.Iv, val.Ep) 292 case types.V_ARRAY: 293 if self.noLazy { 294 return self.decodeArray(make([]Node, 0, _DEFAULT_NODE_CAP)) 295 } 296 return newLazyArray(self, make([]Node, 0, _DEFAULT_NODE_CAP)), 0 297 case types.V_OBJECT: 298 if self.noLazy { 299 return self.decodeObject(make([]Pair, 0, _DEFAULT_NODE_CAP)) 300 } 301 return newLazyObject(self, make([]Pair, 0, _DEFAULT_NODE_CAP)), 0 302 case types.V_DOUBLE : return NewNumber(self.s[val.Ep:self.p]), 0 303 case types.V_INTEGER : return NewNumber(self.s[val.Ep:self.p]), 0 304 default : return Node{}, types.ParsingError(-val.Vt) 305 } 306 } 307 308 func (self *Parser) searchKey(match string) types.ParsingError { 309 ns := len(self.s) 310 if err := self.object(); err != 0 { 311 return err 312 } 313 314 /* check for EOF */ 315 if self.p = self.lspace(self.p); self.p >= ns { 316 return types.ERR_EOF 317 } 318 319 /* check for empty object */ 320 if self.s[self.p] == '}' { 321 self.p++ 322 return _ERR_NOT_FOUND 323 } 324 325 var njs types.JsonState 326 var err types.ParsingError 327 /* decode each pair */ 328 for { 329 330 /* decode the key */ 331 if njs = self.decodeValue(); njs.Vt != types.V_STRING { 332 return types.ERR_INVALID_CHAR 333 } 334 335 /* extract the key */ 336 idx := self.p - 1 337 key := self.s[njs.Iv:idx] 338 339 /* check for escape sequence */ 340 if njs.Ep != -1 { 341 if key, err = unquote(key); err != 0 { 342 return err 343 } 344 } 345 346 /* expect a ':' delimiter */ 347 if err = self.delim(); err != 0 { 348 return err 349 } 350 351 /* skip value */ 352 if key != match { 353 if _, err = self.skipFast(); err != 0 { 354 return err 355 } 356 } else { 357 return 0 358 } 359 360 /* check for EOF */ 361 self.p = self.lspace(self.p) 362 if self.p >= ns { 363 return types.ERR_EOF 364 } 365 366 /* check for the next character */ 367 switch self.s[self.p] { 368 case ',': 369 self.p++ 370 case '}': 371 self.p++ 372 return _ERR_NOT_FOUND 373 default: 374 return types.ERR_INVALID_CHAR 375 } 376 } 377 } 378 379 func (self *Parser) searchIndex(idx int) types.ParsingError { 380 ns := len(self.s) 381 if err := self.array(); err != 0 { 382 return err 383 } 384 385 /* check for EOF */ 386 if self.p = self.lspace(self.p); self.p >= ns { 387 return types.ERR_EOF 388 } 389 390 /* check for empty array */ 391 if self.s[self.p] == ']' { 392 self.p++ 393 return _ERR_NOT_FOUND 394 } 395 396 var err types.ParsingError 397 /* allocate array space and parse every element */ 398 for i := 0; i < idx; i++ { 399 400 /* decode the value */ 401 if _, err = self.skipFast(); err != 0 { 402 return err 403 } 404 405 /* check for EOF */ 406 self.p = self.lspace(self.p) 407 if self.p >= ns { 408 return types.ERR_EOF 409 } 410 411 /* check for the next character */ 412 switch self.s[self.p] { 413 case ',': 414 self.p++ 415 case ']': 416 self.p++ 417 return _ERR_NOT_FOUND 418 default: 419 return types.ERR_INVALID_CHAR 420 } 421 } 422 423 return 0 424 } 425 426 func (self *Node) skipNextNode() *Node { 427 if !self.isLazy() { 428 return nil 429 } 430 431 parser, stack := self.getParserAndArrayStack() 432 ret := stack.v 433 sp := parser.p 434 ns := len(parser.s) 435 436 /* check for EOF */ 437 if parser.p = parser.lspace(sp); parser.p >= ns { 438 return newSyntaxError(parser.syntaxError(types.ERR_EOF)) 439 } 440 441 /* check for empty array */ 442 if parser.s[parser.p] == ']' { 443 parser.p++ 444 self.setArray(ret) 445 return nil 446 } 447 448 var val Node 449 /* skip the value */ 450 if start, err := parser.skipFast(); err != 0 { 451 return newSyntaxError(parser.syntaxError(err)) 452 } else { 453 t := switchRawType(parser.s[start]) 454 if t == _V_NONE { 455 return newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR)) 456 } 457 val = newRawNode(parser.s[start:parser.p], t) 458 } 459 460 /* add the value to result */ 461 ret = append(ret, val) 462 parser.p = parser.lspace(parser.p) 463 464 /* check for EOF */ 465 if parser.p >= ns { 466 return newSyntaxError(parser.syntaxError(types.ERR_EOF)) 467 } 468 469 /* check for the next character */ 470 switch parser.s[parser.p] { 471 case ',': 472 parser.p++ 473 self.setLazyArray(parser, ret) 474 return &ret[len(ret)-1] 475 case ']': 476 parser.p++ 477 self.setArray(ret) 478 return &ret[len(ret)-1] 479 default: 480 return newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR)) 481 } 482 } 483 484 func (self *Node) skipNextPair() (*Pair) { 485 if !self.isLazy() { 486 return nil 487 } 488 489 parser, stack := self.getParserAndObjectStack() 490 ret := stack.v 491 sp := parser.p 492 ns := len(parser.s) 493 494 /* check for EOF */ 495 if parser.p = parser.lspace(sp); parser.p >= ns { 496 return &Pair{"", *newSyntaxError(parser.syntaxError(types.ERR_EOF))} 497 } 498 499 /* check for empty object */ 500 if parser.s[parser.p] == '}' { 501 parser.p++ 502 self.setObject(ret) 503 return nil 504 } 505 506 /* decode one pair */ 507 var val Node 508 var njs types.JsonState 509 var err types.ParsingError 510 511 /* decode the key */ 512 if njs = parser.decodeValue(); njs.Vt != types.V_STRING { 513 return &Pair{"", *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))} 514 } 515 516 /* extract the key */ 517 idx := parser.p - 1 518 key := parser.s[njs.Iv:idx] 519 520 /* check for escape sequence */ 521 if njs.Ep != -1 { 522 if key, err = unquote(key); err != 0 { 523 return &Pair{key, *newSyntaxError(parser.syntaxError(err))} 524 } 525 } 526 527 /* expect a ':' delimiter */ 528 if err = parser.delim(); err != 0 { 529 return &Pair{key, *newSyntaxError(parser.syntaxError(err))} 530 } 531 532 /* skip the value */ 533 if start, err := parser.skipFast(); err != 0 { 534 return &Pair{key, *newSyntaxError(parser.syntaxError(err))} 535 } else { 536 t := switchRawType(parser.s[start]) 537 if t == _V_NONE { 538 return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))} 539 } 540 val = newRawNode(parser.s[start:parser.p], t) 541 } 542 543 /* add the value to result */ 544 ret = append(ret, Pair{Key: key, Value: val}) 545 parser.p = parser.lspace(parser.p) 546 547 /* check for EOF */ 548 if parser.p >= ns { 549 return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_EOF))} 550 } 551 552 /* check for the next character */ 553 switch parser.s[parser.p] { 554 case ',': 555 parser.p++ 556 self.setLazyObject(parser, ret) 557 return &ret[len(ret)-1] 558 case '}': 559 parser.p++ 560 self.setObject(ret) 561 return &ret[len(ret)-1] 562 default: 563 return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))} 564 } 565 } 566 567 568 /** Parser Factory **/ 569 570 // Loads parse all json into interface{} 571 func Loads(src string) (int, interface{}, error) { 572 ps := &Parser{s: src} 573 np, err := ps.Parse() 574 575 /* check for errors */ 576 if err != 0 { 577 return 0, nil, ps.ExportError(err) 578 } else { 579 x, err := np.Interface() 580 if err != nil { 581 return 0, nil, err 582 } 583 return ps.Pos(), x, nil 584 } 585 } 586 587 // LoadsUseNumber parse all json into interface{}, with numeric nodes casted to json.Number 588 func LoadsUseNumber(src string) (int, interface{}, error) { 589 ps := &Parser{s: src} 590 np, err := ps.Parse() 591 592 /* check for errors */ 593 if err != 0 { 594 return 0, nil, err 595 } else { 596 x, err := np.InterfaceUseNumber() 597 if err != nil { 598 return 0, nil, err 599 } 600 return ps.Pos(), x, nil 601 } 602 } 603 604 func NewParser(src string) *Parser { 605 return &Parser{s: src} 606 } 607 608 // ExportError converts types.ParsingError to std Error 609 func (self *Parser) ExportError(err types.ParsingError) error { 610 if err == _ERR_NOT_FOUND { 611 return ErrNotExist 612 } 613 return fmt.Errorf("%q", SyntaxError{ 614 Pos : self.p, 615 Src : self.s, 616 Code: err, 617 }.Description()) 618 }