gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

iter_str.go (4821B)


      1 package jsoniter
      2 
      3 import (
      4 	"fmt"
      5 	"unicode/utf16"
      6 )
      7 
      8 // ReadString read string from iterator
      9 func (iter *Iterator) ReadString() (ret string) {
     10 	c := iter.nextToken()
     11 	if c == '"' {
     12 		for i := iter.head; i < iter.tail; i++ {
     13 			c := iter.buf[i]
     14 			if c == '"' {
     15 				ret = string(iter.buf[iter.head:i])
     16 				iter.head = i + 1
     17 				return ret
     18 			} else if c == '\\' {
     19 				break
     20 			} else if c < ' ' {
     21 				iter.ReportError("ReadString",
     22 					fmt.Sprintf(`invalid control character found: %d`, c))
     23 				return
     24 			}
     25 		}
     26 		return iter.readStringSlowPath()
     27 	} else if c == 'n' {
     28 		iter.skipThreeBytes('u', 'l', 'l')
     29 		return ""
     30 	}
     31 	iter.ReportError("ReadString", `expects " or n, but found `+string([]byte{c}))
     32 	return
     33 }
     34 
     35 func (iter *Iterator) readStringSlowPath() (ret string) {
     36 	var str []byte
     37 	var c byte
     38 	for iter.Error == nil {
     39 		c = iter.readByte()
     40 		if c == '"' {
     41 			return string(str)
     42 		}
     43 		if c == '\\' {
     44 			c = iter.readByte()
     45 			str = iter.readEscapedChar(c, str)
     46 		} else {
     47 			str = append(str, c)
     48 		}
     49 	}
     50 	iter.ReportError("readStringSlowPath", "unexpected end of input")
     51 	return
     52 }
     53 
     54 func (iter *Iterator) readEscapedChar(c byte, str []byte) []byte {
     55 	switch c {
     56 	case 'u':
     57 		r := iter.readU4()
     58 		if utf16.IsSurrogate(r) {
     59 			c = iter.readByte()
     60 			if iter.Error != nil {
     61 				return nil
     62 			}
     63 			if c != '\\' {
     64 				iter.unreadByte()
     65 				str = appendRune(str, r)
     66 				return str
     67 			}
     68 			c = iter.readByte()
     69 			if iter.Error != nil {
     70 				return nil
     71 			}
     72 			if c != 'u' {
     73 				str = appendRune(str, r)
     74 				return iter.readEscapedChar(c, str)
     75 			}
     76 			r2 := iter.readU4()
     77 			if iter.Error != nil {
     78 				return nil
     79 			}
     80 			combined := utf16.DecodeRune(r, r2)
     81 			if combined == '\uFFFD' {
     82 				str = appendRune(str, r)
     83 				str = appendRune(str, r2)
     84 			} else {
     85 				str = appendRune(str, combined)
     86 			}
     87 		} else {
     88 			str = appendRune(str, r)
     89 		}
     90 	case '"':
     91 		str = append(str, '"')
     92 	case '\\':
     93 		str = append(str, '\\')
     94 	case '/':
     95 		str = append(str, '/')
     96 	case 'b':
     97 		str = append(str, '\b')
     98 	case 'f':
     99 		str = append(str, '\f')
    100 	case 'n':
    101 		str = append(str, '\n')
    102 	case 'r':
    103 		str = append(str, '\r')
    104 	case 't':
    105 		str = append(str, '\t')
    106 	default:
    107 		iter.ReportError("readEscapedChar",
    108 			`invalid escape char after \`)
    109 		return nil
    110 	}
    111 	return str
    112 }
    113 
    114 // ReadStringAsSlice read string from iterator without copying into string form.
    115 // The []byte can not be kept, as it will change after next iterator call.
    116 func (iter *Iterator) ReadStringAsSlice() (ret []byte) {
    117 	c := iter.nextToken()
    118 	if c == '"' {
    119 		for i := iter.head; i < iter.tail; i++ {
    120 			// require ascii string and no escape
    121 			// for: field name, base64, number
    122 			if iter.buf[i] == '"' {
    123 				// fast path: reuse the underlying buffer
    124 				ret = iter.buf[iter.head:i]
    125 				iter.head = i + 1
    126 				return ret
    127 			}
    128 		}
    129 		readLen := iter.tail - iter.head
    130 		copied := make([]byte, readLen, readLen*2)
    131 		copy(copied, iter.buf[iter.head:iter.tail])
    132 		iter.head = iter.tail
    133 		for iter.Error == nil {
    134 			c := iter.readByte()
    135 			if c == '"' {
    136 				return copied
    137 			}
    138 			copied = append(copied, c)
    139 		}
    140 		return copied
    141 	}
    142 	iter.ReportError("ReadStringAsSlice", `expects " or n, but found `+string([]byte{c}))
    143 	return
    144 }
    145 
    146 func (iter *Iterator) readU4() (ret rune) {
    147 	for i := 0; i < 4; i++ {
    148 		c := iter.readByte()
    149 		if iter.Error != nil {
    150 			return
    151 		}
    152 		if c >= '0' && c <= '9' {
    153 			ret = ret*16 + rune(c-'0')
    154 		} else if c >= 'a' && c <= 'f' {
    155 			ret = ret*16 + rune(c-'a'+10)
    156 		} else if c >= 'A' && c <= 'F' {
    157 			ret = ret*16 + rune(c-'A'+10)
    158 		} else {
    159 			iter.ReportError("readU4", "expects 0~9 or a~f, but found "+string([]byte{c}))
    160 			return
    161 		}
    162 	}
    163 	return ret
    164 }
    165 
    166 const (
    167 	t1 = 0x00 // 0000 0000
    168 	tx = 0x80 // 1000 0000
    169 	t2 = 0xC0 // 1100 0000
    170 	t3 = 0xE0 // 1110 0000
    171 	t4 = 0xF0 // 1111 0000
    172 	t5 = 0xF8 // 1111 1000
    173 
    174 	maskx = 0x3F // 0011 1111
    175 	mask2 = 0x1F // 0001 1111
    176 	mask3 = 0x0F // 0000 1111
    177 	mask4 = 0x07 // 0000 0111
    178 
    179 	rune1Max = 1<<7 - 1
    180 	rune2Max = 1<<11 - 1
    181 	rune3Max = 1<<16 - 1
    182 
    183 	surrogateMin = 0xD800
    184 	surrogateMax = 0xDFFF
    185 
    186 	maxRune   = '\U0010FFFF' // Maximum valid Unicode code point.
    187 	runeError = '\uFFFD'     // the "error" Rune or "Unicode replacement character"
    188 )
    189 
    190 func appendRune(p []byte, r rune) []byte {
    191 	// Negative values are erroneous. Making it unsigned addresses the problem.
    192 	switch i := uint32(r); {
    193 	case i <= rune1Max:
    194 		p = append(p, byte(r))
    195 		return p
    196 	case i <= rune2Max:
    197 		p = append(p, t2|byte(r>>6))
    198 		p = append(p, tx|byte(r)&maskx)
    199 		return p
    200 	case i > maxRune, surrogateMin <= i && i <= surrogateMax:
    201 		r = runeError
    202 		fallthrough
    203 	case i <= rune3Max:
    204 		p = append(p, t3|byte(r>>12))
    205 		p = append(p, tx|byte(r>>6)&maskx)
    206 		p = append(p, tx|byte(r)&maskx)
    207 		return p
    208 	default:
    209 		p = append(p, t4|byte(r>>18))
    210 		p = append(p, tx|byte(r>>12)&maskx)
    211 		p = append(p, tx|byte(r>>6)&maskx)
    212 		p = append(p, tx|byte(r)&maskx)
    213 		return p
    214 	}
    215 }