gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

parser.go (15377B)


      1 /*
      2  * Copyright 2021 ByteDance Inc.
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package ast
     18 
     19 import (
     20     `fmt`
     21     `github.com/bytedance/sonic/internal/native/types`
     22     `github.com/bytedance/sonic/internal/rt`
     23 )
     24 
     25 const _DEFAULT_NODE_CAP int = 16
     26 
     27 const (
     28     _ERR_NOT_FOUND      types.ParsingError = 33
     29     _ERR_UNSUPPORT_TYPE types.ParsingError = 34
     30 )
     31 
     32 var (
     33     ErrNotExist error = newError(_ERR_NOT_FOUND, "value not exists")
     34     ErrUnsupportType error = newError(_ERR_UNSUPPORT_TYPE, "unsupported type")
     35 )
     36 
     37 type Parser struct {
     38     p           int
     39     s           string
     40     noLazy      bool
     41     skipValue   bool
     42 }
     43 
     44 /** Parser Private Methods **/
     45 
     46 func (self *Parser) delim() types.ParsingError {
     47     n := len(self.s)
     48     p := self.lspace(self.p)
     49 
     50     /* check for EOF */
     51     if p >= n {
     52         return types.ERR_EOF
     53     }
     54 
     55     /* check for the delimtier */
     56     if self.s[p] != ':' {
     57         return types.ERR_INVALID_CHAR
     58     }
     59 
     60     /* update the read pointer */
     61     self.p = p + 1
     62     return 0
     63 }
     64 
     65 func (self *Parser) object() types.ParsingError {
     66     n := len(self.s)
     67     p := self.lspace(self.p)
     68 
     69     /* check for EOF */
     70     if p >= n {
     71         return types.ERR_EOF
     72     }
     73 
     74     /* check for the delimtier */
     75     if self.s[p] != '{' {
     76         return types.ERR_INVALID_CHAR
     77     }
     78 
     79     /* update the read pointer */
     80     self.p = p + 1
     81     return 0
     82 }
     83 
     84 func (self *Parser) array() types.ParsingError {
     85     n := len(self.s)
     86     p := self.lspace(self.p)
     87 
     88     /* check for EOF */
     89     if p >= n {
     90         return types.ERR_EOF
     91     }
     92 
     93     /* check for the delimtier */
     94     if self.s[p] != '[' {
     95         return types.ERR_INVALID_CHAR
     96     }
     97 
     98     /* update the read pointer */
     99     self.p = p + 1
    100     return 0
    101 }
    102 
    103 func (self *Parser) lspace(sp int) int {
    104     ns := len(self.s)
    105     for ; sp<ns && isSpace(self.s[sp]); sp+=1 {}
    106 
    107     return sp
    108 }
    109 
    110 func (self *Parser) decodeArray(ret []Node) (Node, types.ParsingError) {
    111     sp := self.p
    112     ns := len(self.s)
    113 
    114     /* check for EOF */
    115     if self.p = self.lspace(sp); self.p >= ns {
    116         return Node{}, types.ERR_EOF
    117     }
    118 
    119     /* check for empty array */
    120     if self.s[self.p] == ']' {
    121         self.p++
    122         return emptyArrayNode, 0
    123     }
    124 
    125     /* allocate array space and parse every element */
    126     for {
    127         var val Node
    128         var err types.ParsingError
    129 
    130         if self.skipValue {
    131             /* skip the value */
    132             var start int
    133             if start, err = self.skipFast(); err != 0 {
    134                 return Node{}, err
    135             }
    136             if self.p > ns {
    137                 return Node{}, types.ERR_EOF
    138             }
    139             t := switchRawType(self.s[start])
    140             if t == _V_NONE {
    141                 return Node{}, types.ERR_INVALID_CHAR
    142             }
    143             val = newRawNode(self.s[start:self.p], t)
    144         }else{
    145             /* decode the value */
    146             if val, err = self.Parse(); err != 0 {
    147                 return Node{}, err
    148             }
    149         }
    150 
    151         /* add the value to result */
    152         ret = append(ret, val)
    153         self.p = self.lspace(self.p)
    154 
    155         /* check for EOF */
    156         if self.p >= ns {
    157             return Node{}, types.ERR_EOF
    158         }
    159 
    160         /* check for the next character */
    161         switch self.s[self.p] {
    162             case ',' : self.p++
    163             case ']' : self.p++; return NewArray(ret), 0
    164         default:
    165             if val.isLazy() {
    166                 return newLazyArray(self, ret), 0
    167             }
    168             return Node{}, types.ERR_INVALID_CHAR
    169         }
    170     }
    171 }
    172 
    173 func (self *Parser) decodeObject(ret []Pair) (Node, types.ParsingError) {
    174     sp := self.p
    175     ns := len(self.s)
    176 
    177     /* check for EOF */
    178     if self.p = self.lspace(sp); self.p >= ns {
    179         return Node{}, types.ERR_EOF
    180     }
    181 
    182     /* check for empty object */
    183     if self.s[self.p] == '}' {
    184         self.p++
    185         return emptyObjectNode, 0
    186     }
    187 
    188     /* decode each pair */
    189     for {
    190         var val Node
    191         var njs types.JsonState
    192         var err types.ParsingError
    193 
    194         /* decode the key */
    195         if njs = self.decodeValue(); njs.Vt != types.V_STRING {
    196             return Node{}, types.ERR_INVALID_CHAR
    197         }
    198 
    199         /* extract the key */
    200         idx := self.p - 1
    201         key := self.s[njs.Iv:idx]
    202 
    203         /* check for escape sequence */
    204         if njs.Ep != -1 {
    205             if key, err = unquote(key); err != 0 {
    206                 return Node{}, err
    207             }
    208         }
    209 
    210         /* expect a ':' delimiter */
    211         if err = self.delim(); err != 0 {
    212             return Node{}, err
    213         }
    214 
    215         
    216         if self.skipValue {
    217             /* skip the value */
    218             var start int
    219             if start, err = self.skipFast(); err != 0 {
    220                 return Node{}, err
    221             }
    222             if self.p > ns {
    223                 return Node{}, types.ERR_EOF
    224             }
    225             t := switchRawType(self.s[start])
    226             if t == _V_NONE {
    227                 return Node{}, types.ERR_INVALID_CHAR
    228             }
    229             val = newRawNode(self.s[start:self.p], t)
    230         } else {
    231             /* decode the value */
    232             if val, err = self.Parse(); err != 0 {
    233                 return Node{}, err
    234             }
    235         }
    236 
    237         /* add the value to result */
    238         ret = append(ret, Pair{Key: key, Value: val})
    239         self.p = self.lspace(self.p)
    240 
    241         /* check for EOF */
    242         if self.p >= ns {
    243             return Node{}, types.ERR_EOF
    244         }
    245 
    246         /* check for the next character */
    247         switch self.s[self.p] {
    248             case ',' : self.p++
    249             case '}' : self.p++; return NewObject(ret), 0
    250         default:
    251             if val.isLazy() {
    252                 return newLazyObject(self, ret), 0
    253             }
    254             return Node{}, types.ERR_INVALID_CHAR
    255         }
    256     }
    257 }
    258 
    259 func (self *Parser) decodeString(iv int64, ep int) (Node, types.ParsingError) {
    260     p := self.p - 1
    261     s := self.s[iv:p]
    262 
    263     /* fast path: no escape sequence */
    264     if ep == -1 {
    265         return NewString(s), 0
    266     }
    267 
    268     /* unquote the string */
    269     out, err := unquote(s)
    270 
    271     /* check for errors */
    272     if err != 0 {
    273         return Node{}, err
    274     } else {
    275         return newBytes(rt.Str2Mem(out)), 0
    276     }
    277 }
    278 
    279 /** Parser Interface **/
    280 
    281 func (self *Parser) Pos() int {
    282     return self.p
    283 }
    284 
    285 func (self *Parser) Parse() (Node, types.ParsingError) {
    286     switch val := self.decodeValue(); val.Vt {
    287         case types.V_EOF     : return Node{}, types.ERR_EOF
    288         case types.V_NULL    : return nullNode, 0
    289         case types.V_TRUE    : return trueNode, 0
    290         case types.V_FALSE   : return falseNode, 0
    291         case types.V_STRING  : return self.decodeString(val.Iv, val.Ep)
    292         case types.V_ARRAY:
    293             if self.noLazy {
    294                 return self.decodeArray(make([]Node, 0, _DEFAULT_NODE_CAP))
    295             }
    296             return newLazyArray(self, make([]Node, 0, _DEFAULT_NODE_CAP)), 0
    297         case types.V_OBJECT:
    298             if self.noLazy {
    299                 return self.decodeObject(make([]Pair, 0, _DEFAULT_NODE_CAP))
    300             }
    301             return newLazyObject(self, make([]Pair, 0, _DEFAULT_NODE_CAP)), 0
    302         case types.V_DOUBLE  : return NewNumber(self.s[val.Ep:self.p]), 0
    303         case types.V_INTEGER : return NewNumber(self.s[val.Ep:self.p]), 0
    304         default              : return Node{}, types.ParsingError(-val.Vt)
    305     }
    306 }
    307 
    308 func (self *Parser) searchKey(match string) types.ParsingError {
    309     ns := len(self.s)
    310     if err := self.object(); err != 0 {
    311         return err
    312     }
    313 
    314     /* check for EOF */
    315     if self.p = self.lspace(self.p); self.p >= ns {
    316         return types.ERR_EOF
    317     }
    318 
    319     /* check for empty object */
    320     if self.s[self.p] == '}' {
    321         self.p++
    322         return _ERR_NOT_FOUND
    323     }
    324 
    325     var njs types.JsonState
    326     var err types.ParsingError
    327     /* decode each pair */
    328     for {
    329 
    330         /* decode the key */
    331         if njs = self.decodeValue(); njs.Vt != types.V_STRING {
    332             return types.ERR_INVALID_CHAR
    333         }
    334 
    335         /* extract the key */
    336         idx := self.p - 1
    337         key := self.s[njs.Iv:idx]
    338 
    339         /* check for escape sequence */
    340         if njs.Ep != -1 {
    341             if key, err = unquote(key); err != 0 {
    342                 return err
    343             }
    344         }
    345 
    346         /* expect a ':' delimiter */
    347         if err = self.delim(); err != 0 {
    348             return err
    349         }
    350 
    351         /* skip value */
    352         if key != match {
    353             if _, err = self.skipFast(); err != 0 {
    354                 return err
    355             }
    356         } else {
    357             return 0
    358         }
    359 
    360         /* check for EOF */
    361         self.p = self.lspace(self.p)
    362         if self.p >= ns {
    363             return types.ERR_EOF
    364         }
    365 
    366         /* check for the next character */
    367         switch self.s[self.p] {
    368         case ',':
    369             self.p++
    370         case '}':
    371             self.p++
    372             return _ERR_NOT_FOUND
    373         default:
    374             return types.ERR_INVALID_CHAR
    375         }
    376     }
    377 }
    378 
    379 func (self *Parser) searchIndex(idx int) types.ParsingError {
    380     ns := len(self.s)
    381     if err := self.array(); err != 0 {
    382         return err
    383     }
    384 
    385     /* check for EOF */
    386     if self.p = self.lspace(self.p); self.p >= ns {
    387         return types.ERR_EOF
    388     }
    389 
    390     /* check for empty array */
    391     if self.s[self.p] == ']' {
    392         self.p++
    393         return _ERR_NOT_FOUND
    394     }
    395 
    396     var err types.ParsingError
    397     /* allocate array space and parse every element */
    398     for i := 0; i < idx; i++ {
    399 
    400         /* decode the value */
    401         if _, err = self.skipFast(); err != 0 {
    402             return err
    403         }
    404 
    405         /* check for EOF */
    406         self.p = self.lspace(self.p)
    407         if self.p >= ns {
    408             return types.ERR_EOF
    409         }
    410 
    411         /* check for the next character */
    412         switch self.s[self.p] {
    413         case ',':
    414             self.p++
    415         case ']':
    416             self.p++
    417             return _ERR_NOT_FOUND
    418         default:
    419             return types.ERR_INVALID_CHAR
    420         }
    421     }
    422 
    423     return 0
    424 }
    425 
    426 func (self *Node) skipNextNode() *Node {
    427     if !self.isLazy() {
    428         return nil
    429     }
    430 
    431     parser, stack := self.getParserAndArrayStack()
    432     ret := stack.v
    433     sp := parser.p
    434     ns := len(parser.s)
    435 
    436     /* check for EOF */
    437     if parser.p = parser.lspace(sp); parser.p >= ns {
    438         return newSyntaxError(parser.syntaxError(types.ERR_EOF))
    439     }
    440 
    441     /* check for empty array */
    442     if parser.s[parser.p] == ']' {
    443         parser.p++
    444         self.setArray(ret)
    445         return nil
    446     }
    447 
    448     var val Node
    449     /* skip the value */
    450     if start, err := parser.skipFast(); err != 0 {
    451         return newSyntaxError(parser.syntaxError(err))
    452     } else {
    453         t := switchRawType(parser.s[start])
    454         if t == _V_NONE {
    455             return newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))
    456         }
    457         val = newRawNode(parser.s[start:parser.p], t)
    458     }
    459 
    460     /* add the value to result */
    461     ret = append(ret, val)
    462     parser.p = parser.lspace(parser.p)
    463 
    464     /* check for EOF */
    465     if parser.p >= ns {
    466         return newSyntaxError(parser.syntaxError(types.ERR_EOF))
    467     }
    468 
    469     /* check for the next character */
    470     switch parser.s[parser.p] {
    471     case ',':
    472         parser.p++
    473         self.setLazyArray(parser, ret)
    474         return &ret[len(ret)-1]
    475     case ']':
    476         parser.p++
    477         self.setArray(ret)
    478         return &ret[len(ret)-1]
    479     default:
    480         return newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))
    481     }
    482 }
    483 
    484 func (self *Node) skipNextPair() (*Pair) {
    485     if !self.isLazy() {
    486         return nil
    487     }
    488 
    489     parser, stack := self.getParserAndObjectStack()
    490     ret := stack.v
    491     sp := parser.p
    492     ns := len(parser.s)
    493 
    494     /* check for EOF */
    495     if parser.p = parser.lspace(sp); parser.p >= ns {
    496         return &Pair{"", *newSyntaxError(parser.syntaxError(types.ERR_EOF))}
    497     }
    498 
    499     /* check for empty object */
    500     if parser.s[parser.p] == '}' {
    501         parser.p++
    502         self.setObject(ret)
    503         return nil
    504     }
    505 
    506     /* decode one pair */
    507     var val Node
    508     var njs types.JsonState
    509     var err types.ParsingError
    510 
    511     /* decode the key */
    512     if njs = parser.decodeValue(); njs.Vt != types.V_STRING {
    513         return &Pair{"", *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))}
    514     }
    515 
    516     /* extract the key */
    517     idx := parser.p - 1
    518     key := parser.s[njs.Iv:idx]
    519 
    520     /* check for escape sequence */
    521     if njs.Ep != -1 {
    522         if key, err = unquote(key); err != 0 {
    523             return &Pair{key, *newSyntaxError(parser.syntaxError(err))}
    524         }
    525     }
    526 
    527     /* expect a ':' delimiter */
    528     if err = parser.delim(); err != 0 {
    529         return &Pair{key, *newSyntaxError(parser.syntaxError(err))}
    530     }
    531 
    532     /* skip the value */
    533     if start, err := parser.skipFast(); err != 0 {
    534         return &Pair{key, *newSyntaxError(parser.syntaxError(err))}
    535     } else {
    536         t := switchRawType(parser.s[start])
    537         if t == _V_NONE {
    538             return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))}
    539         }
    540         val = newRawNode(parser.s[start:parser.p], t)
    541     }
    542 
    543     /* add the value to result */
    544     ret = append(ret, Pair{Key: key, Value: val})
    545     parser.p = parser.lspace(parser.p)
    546 
    547     /* check for EOF */
    548     if parser.p >= ns {
    549         return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_EOF))}
    550     }
    551 
    552     /* check for the next character */
    553     switch parser.s[parser.p] {
    554     case ',':
    555         parser.p++
    556         self.setLazyObject(parser, ret)
    557         return &ret[len(ret)-1]
    558     case '}':
    559         parser.p++
    560         self.setObject(ret)
    561         return &ret[len(ret)-1]
    562     default:
    563         return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))}
    564     }
    565 }
    566 
    567 
    568 /** Parser Factory **/
    569 
    570 // Loads parse all json into interface{}
    571 func Loads(src string) (int, interface{}, error) {
    572     ps := &Parser{s: src}
    573     np, err := ps.Parse()
    574 
    575     /* check for errors */
    576     if err != 0 {
    577         return 0, nil, ps.ExportError(err)
    578     } else {
    579         x, err := np.Interface()
    580         if err != nil {
    581             return 0, nil, err
    582         }
    583         return ps.Pos(), x, nil
    584     }
    585 }
    586 
    587 // LoadsUseNumber parse all json into interface{}, with numeric nodes casted to json.Number
    588 func LoadsUseNumber(src string) (int, interface{}, error) {
    589     ps := &Parser{s: src}
    590     np, err := ps.Parse()
    591 
    592     /* check for errors */
    593     if err != 0 {
    594         return 0, nil, err
    595     } else {
    596         x, err := np.InterfaceUseNumber()
    597         if err != nil {
    598             return 0, nil, err
    599         }
    600         return ps.Pos(), x, nil
    601     }
    602 }
    603 
    604 func NewParser(src string) *Parser {
    605     return &Parser{s: src}
    606 }
    607 
    608 // ExportError converts types.ParsingError to std Error
    609 func (self *Parser) ExportError(err types.ParsingError) error {
    610     if err == _ERR_NOT_FOUND {
    611         return ErrNotExist
    612     }
    613     return fmt.Errorf("%q", SyntaxError{
    614         Pos : self.p,
    615         Src : self.s,
    616         Code: err,
    617     }.Description())
    618 }