gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

unmarshal_text.go (5797B)


      1 package decoder
      2 
      3 import (
      4 	"bytes"
      5 	"encoding"
      6 	"fmt"
      7 	"unicode"
      8 	"unicode/utf16"
      9 	"unicode/utf8"
     10 	"unsafe"
     11 
     12 	"github.com/goccy/go-json/internal/errors"
     13 	"github.com/goccy/go-json/internal/runtime"
     14 )
     15 
     16 type unmarshalTextDecoder struct {
     17 	typ        *runtime.Type
     18 	structName string
     19 	fieldName  string
     20 }
     21 
     22 func newUnmarshalTextDecoder(typ *runtime.Type, structName, fieldName string) *unmarshalTextDecoder {
     23 	return &unmarshalTextDecoder{
     24 		typ:        typ,
     25 		structName: structName,
     26 		fieldName:  fieldName,
     27 	}
     28 }
     29 
     30 func (d *unmarshalTextDecoder) annotateError(cursor int64, err error) {
     31 	switch e := err.(type) {
     32 	case *errors.UnmarshalTypeError:
     33 		e.Struct = d.structName
     34 		e.Field = d.fieldName
     35 	case *errors.SyntaxError:
     36 		e.Offset = cursor
     37 	}
     38 }
     39 
     40 var (
     41 	nullbytes = []byte(`null`)
     42 )
     43 
     44 func (d *unmarshalTextDecoder) DecodeStream(s *Stream, depth int64, p unsafe.Pointer) error {
     45 	s.skipWhiteSpace()
     46 	start := s.cursor
     47 	if err := s.skipValue(depth); err != nil {
     48 		return err
     49 	}
     50 	src := s.buf[start:s.cursor]
     51 	if len(src) > 0 {
     52 		switch src[0] {
     53 		case '[':
     54 			return &errors.UnmarshalTypeError{
     55 				Value:  "array",
     56 				Type:   runtime.RType2Type(d.typ),
     57 				Offset: s.totalOffset(),
     58 			}
     59 		case '{':
     60 			return &errors.UnmarshalTypeError{
     61 				Value:  "object",
     62 				Type:   runtime.RType2Type(d.typ),
     63 				Offset: s.totalOffset(),
     64 			}
     65 		case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
     66 			return &errors.UnmarshalTypeError{
     67 				Value:  "number",
     68 				Type:   runtime.RType2Type(d.typ),
     69 				Offset: s.totalOffset(),
     70 			}
     71 		case 'n':
     72 			if bytes.Equal(src, nullbytes) {
     73 				*(*unsafe.Pointer)(p) = nil
     74 				return nil
     75 			}
     76 		}
     77 	}
     78 	dst := make([]byte, len(src))
     79 	copy(dst, src)
     80 
     81 	if b, ok := unquoteBytes(dst); ok {
     82 		dst = b
     83 	}
     84 	v := *(*interface{})(unsafe.Pointer(&emptyInterface{
     85 		typ: d.typ,
     86 		ptr: p,
     87 	}))
     88 	if err := v.(encoding.TextUnmarshaler).UnmarshalText(dst); err != nil {
     89 		d.annotateError(s.cursor, err)
     90 		return err
     91 	}
     92 	return nil
     93 }
     94 
     95 func (d *unmarshalTextDecoder) Decode(ctx *RuntimeContext, cursor, depth int64, p unsafe.Pointer) (int64, error) {
     96 	buf := ctx.Buf
     97 	cursor = skipWhiteSpace(buf, cursor)
     98 	start := cursor
     99 	end, err := skipValue(buf, cursor, depth)
    100 	if err != nil {
    101 		return 0, err
    102 	}
    103 	src := buf[start:end]
    104 	if len(src) > 0 {
    105 		switch src[0] {
    106 		case '[':
    107 			return 0, &errors.UnmarshalTypeError{
    108 				Value:  "array",
    109 				Type:   runtime.RType2Type(d.typ),
    110 				Offset: start,
    111 			}
    112 		case '{':
    113 			return 0, &errors.UnmarshalTypeError{
    114 				Value:  "object",
    115 				Type:   runtime.RType2Type(d.typ),
    116 				Offset: start,
    117 			}
    118 		case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
    119 			return 0, &errors.UnmarshalTypeError{
    120 				Value:  "number",
    121 				Type:   runtime.RType2Type(d.typ),
    122 				Offset: start,
    123 			}
    124 		case 'n':
    125 			if bytes.Equal(src, nullbytes) {
    126 				*(*unsafe.Pointer)(p) = nil
    127 				return end, nil
    128 			}
    129 		}
    130 	}
    131 
    132 	if s, ok := unquoteBytes(src); ok {
    133 		src = s
    134 	}
    135 	v := *(*interface{})(unsafe.Pointer(&emptyInterface{
    136 		typ: d.typ,
    137 		ptr: *(*unsafe.Pointer)(unsafe.Pointer(&p)),
    138 	}))
    139 	if err := v.(encoding.TextUnmarshaler).UnmarshalText(src); err != nil {
    140 		d.annotateError(cursor, err)
    141 		return 0, err
    142 	}
    143 	return end, nil
    144 }
    145 
    146 func (d *unmarshalTextDecoder) DecodePath(ctx *RuntimeContext, cursor, depth int64) ([][]byte, int64, error) {
    147 	return nil, 0, fmt.Errorf("json: unmarshal text decoder does not support decode path")
    148 }
    149 
    150 func unquoteBytes(s []byte) (t []byte, ok bool) {
    151 	length := len(s)
    152 	if length < 2 || s[0] != '"' || s[length-1] != '"' {
    153 		return
    154 	}
    155 	s = s[1 : length-1]
    156 	length -= 2
    157 
    158 	// Check for unusual characters. If there are none,
    159 	// then no unquoting is needed, so return a slice of the
    160 	// original bytes.
    161 	r := 0
    162 	for r < length {
    163 		c := s[r]
    164 		if c == '\\' || c == '"' || c < ' ' {
    165 			break
    166 		}
    167 		if c < utf8.RuneSelf {
    168 			r++
    169 			continue
    170 		}
    171 		rr, size := utf8.DecodeRune(s[r:])
    172 		if rr == utf8.RuneError && size == 1 {
    173 			break
    174 		}
    175 		r += size
    176 	}
    177 	if r == length {
    178 		return s, true
    179 	}
    180 
    181 	b := make([]byte, length+2*utf8.UTFMax)
    182 	w := copy(b, s[0:r])
    183 	for r < length {
    184 		// Out of room? Can only happen if s is full of
    185 		// malformed UTF-8 and we're replacing each
    186 		// byte with RuneError.
    187 		if w >= len(b)-2*utf8.UTFMax {
    188 			nb := make([]byte, (len(b)+utf8.UTFMax)*2)
    189 			copy(nb, b[0:w])
    190 			b = nb
    191 		}
    192 		switch c := s[r]; {
    193 		case c == '\\':
    194 			r++
    195 			if r >= length {
    196 				return
    197 			}
    198 			switch s[r] {
    199 			default:
    200 				return
    201 			case '"', '\\', '/', '\'':
    202 				b[w] = s[r]
    203 				r++
    204 				w++
    205 			case 'b':
    206 				b[w] = '\b'
    207 				r++
    208 				w++
    209 			case 'f':
    210 				b[w] = '\f'
    211 				r++
    212 				w++
    213 			case 'n':
    214 				b[w] = '\n'
    215 				r++
    216 				w++
    217 			case 'r':
    218 				b[w] = '\r'
    219 				r++
    220 				w++
    221 			case 't':
    222 				b[w] = '\t'
    223 				r++
    224 				w++
    225 			case 'u':
    226 				r--
    227 				rr := getu4(s[r:])
    228 				if rr < 0 {
    229 					return
    230 				}
    231 				r += 6
    232 				if utf16.IsSurrogate(rr) {
    233 					rr1 := getu4(s[r:])
    234 					if dec := utf16.DecodeRune(rr, rr1); dec != unicode.ReplacementChar {
    235 						// A valid pair; consume.
    236 						r += 6
    237 						w += utf8.EncodeRune(b[w:], dec)
    238 						break
    239 					}
    240 					// Invalid surrogate; fall back to replacement rune.
    241 					rr = unicode.ReplacementChar
    242 				}
    243 				w += utf8.EncodeRune(b[w:], rr)
    244 			}
    245 
    246 		// Quote, control characters are invalid.
    247 		case c == '"', c < ' ':
    248 			return
    249 
    250 		// ASCII
    251 		case c < utf8.RuneSelf:
    252 			b[w] = c
    253 			r++
    254 			w++
    255 
    256 		// Coerce to well-formed UTF-8.
    257 		default:
    258 			rr, size := utf8.DecodeRune(s[r:])
    259 			r += size
    260 			w += utf8.EncodeRune(b[w:], rr)
    261 		}
    262 	}
    263 	return b[0:w], true
    264 }
    265 
    266 func getu4(s []byte) rune {
    267 	if len(s) < 6 || s[0] != '\\' || s[1] != 'u' {
    268 		return -1
    269 	}
    270 	var r rune
    271 	for _, c := range s[2:6] {
    272 		switch {
    273 		case '0' <= c && c <= '9':
    274 			c = c - '0'
    275 		case 'a' <= c && c <= 'f':
    276 			c = c - 'a' + 10
    277 		case 'A' <= c && c <= 'F':
    278 			c = c - 'A' + 10
    279 		default:
    280 			return -1
    281 		}
    282 		r = r*16 + rune(c)
    283 	}
    284 	return r
    285 }