encoder.go (9263B)
1 /* 2 * Copyright 2021 ByteDance Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package encoder 18 19 import ( 20 `bytes` 21 `encoding/json` 22 `reflect` 23 `runtime` 24 `unsafe` 25 26 `github.com/bytedance/sonic/internal/native` 27 `github.com/bytedance/sonic/internal/native/types` 28 `github.com/bytedance/sonic/internal/rt` 29 `github.com/bytedance/sonic/utf8` 30 `github.com/bytedance/sonic/option` 31 ) 32 33 // Options is a set of encoding options. 34 type Options uint64 35 36 const ( 37 bitSortMapKeys = iota 38 bitEscapeHTML 39 bitCompactMarshaler 40 bitNoQuoteTextMarshaler 41 bitNoNullSliceOrMap 42 bitValidateString 43 44 // used for recursive compile 45 bitPointerValue = 63 46 ) 47 48 const ( 49 // SortMapKeys indicates that the keys of a map needs to be sorted 50 // before serializing into JSON. 51 // WARNING: This hurts performance A LOT, USE WITH CARE. 52 SortMapKeys Options = 1 << bitSortMapKeys 53 54 // EscapeHTML indicates encoder to escape all HTML characters 55 // after serializing into JSON (see https://pkg.go.dev/encoding/json#HTMLEscape). 56 // WARNING: This hurts performance A LOT, USE WITH CARE. 57 EscapeHTML Options = 1 << bitEscapeHTML 58 59 // CompactMarshaler indicates that the output JSON from json.Marshaler 60 // is always compact and needs no validation 61 CompactMarshaler Options = 1 << bitCompactMarshaler 62 63 // NoQuoteTextMarshaler indicates that the output text from encoding.TextMarshaler 64 // is always escaped string and needs no quoting 65 NoQuoteTextMarshaler Options = 1 << bitNoQuoteTextMarshaler 66 67 // NoNullSliceOrMap indicates all empty Array or Object are encoded as '[]' or '{}', 68 // instead of 'null' 69 NoNullSliceOrMap Options = 1 << bitNoNullSliceOrMap 70 71 // ValidateString indicates that encoder should validate the input string 72 // before encoding it into JSON. 73 ValidateString Options = 1 << bitValidateString 74 75 // CompatibleWithStd is used to be compatible with std encoder. 76 CompatibleWithStd Options = SortMapKeys | EscapeHTML | CompactMarshaler 77 ) 78 79 // Encoder represents a specific set of encoder configurations. 80 type Encoder struct { 81 Opts Options 82 prefix string 83 indent string 84 } 85 86 // Encode returns the JSON encoding of v. 87 func (self *Encoder) Encode(v interface{}) ([]byte, error) { 88 if self.indent != "" || self.prefix != "" { 89 return EncodeIndented(v, self.prefix, self.indent, self.Opts) 90 } 91 return Encode(v, self.Opts) 92 } 93 94 // SortKeys enables the SortMapKeys option. 95 func (self *Encoder) SortKeys() *Encoder { 96 self.Opts |= SortMapKeys 97 return self 98 } 99 100 // SetEscapeHTML specifies if option EscapeHTML opens 101 func (self *Encoder) SetEscapeHTML(f bool) { 102 if f { 103 self.Opts |= EscapeHTML 104 } else { 105 self.Opts &= ^EscapeHTML 106 } 107 } 108 109 // SetValidateString specifies if option ValidateString opens 110 func (self *Encoder) SetValidateString(f bool) { 111 if f { 112 self.Opts |= ValidateString 113 } else { 114 self.Opts &= ^ValidateString 115 } 116 } 117 118 // SetCompactMarshaler specifies if option CompactMarshaler opens 119 func (self *Encoder) SetCompactMarshaler(f bool) { 120 if f { 121 self.Opts |= CompactMarshaler 122 } else { 123 self.Opts &= ^CompactMarshaler 124 } 125 } 126 127 // SetNoQuoteTextMarshaler specifies if option NoQuoteTextMarshaler opens 128 func (self *Encoder) SetNoQuoteTextMarshaler(f bool) { 129 if f { 130 self.Opts |= NoQuoteTextMarshaler 131 } else { 132 self.Opts &= ^NoQuoteTextMarshaler 133 } 134 } 135 136 // SetIndent instructs the encoder to format each subsequent encoded 137 // value as if indented by the package-level function EncodeIndent(). 138 // Calling SetIndent("", "") disables indentation. 139 func (enc *Encoder) SetIndent(prefix, indent string) { 140 enc.prefix = prefix 141 enc.indent = indent 142 } 143 144 // Quote returns the JSON-quoted version of s. 145 func Quote(s string) string { 146 var n int 147 var p []byte 148 149 /* check for empty string */ 150 if s == "" { 151 return `""` 152 } 153 154 /* allocate space for result */ 155 n = len(s) + 2 156 p = make([]byte, 0, n) 157 158 /* call the encoder */ 159 _ = encodeString(&p, s) 160 return rt.Mem2Str(p) 161 } 162 163 // Encode returns the JSON encoding of val, encoded with opts. 164 func Encode(val interface{}, opts Options) ([]byte, error) { 165 var ret []byte 166 167 buf := newBytes() 168 err := encodeInto(&buf, val, opts) 169 170 /* check for errors */ 171 if err != nil { 172 freeBytes(buf) 173 return nil, err 174 } 175 176 /* htmlescape or correct UTF-8 if opts enable */ 177 old := buf 178 buf = encodeFinish(old, opts) 179 pbuf := ((*rt.GoSlice)(unsafe.Pointer(&buf))).Ptr 180 pold := ((*rt.GoSlice)(unsafe.Pointer(&old))).Ptr 181 182 /* return when allocated a new buffer */ 183 if pbuf != pold { 184 freeBytes(old) 185 return buf, nil 186 } 187 188 /* make a copy of the result */ 189 ret = make([]byte, len(buf)) 190 copy(ret, buf) 191 192 freeBytes(buf) 193 /* return the buffer into pool */ 194 return ret, nil 195 } 196 197 // EncodeInto is like Encode but uses a user-supplied buffer instead of allocating 198 // a new one. 199 func EncodeInto(buf *[]byte, val interface{}, opts Options) error { 200 err := encodeInto(buf, val, opts) 201 if err != nil { 202 return err 203 } 204 *buf = encodeFinish(*buf, opts) 205 return err 206 } 207 208 func encodeInto(buf *[]byte, val interface{}, opts Options) error { 209 stk := newStack() 210 efv := rt.UnpackEface(val) 211 err := encodeTypedPointer(buf, efv.Type, &efv.Value, stk, uint64(opts)) 212 213 /* return the stack into pool */ 214 if err != nil { 215 resetStack(stk) 216 } 217 freeStack(stk) 218 219 /* avoid GC ahead */ 220 runtime.KeepAlive(buf) 221 runtime.KeepAlive(efv) 222 return err 223 } 224 225 func encodeFinish(buf []byte, opts Options) []byte { 226 if opts & EscapeHTML != 0 { 227 buf = HTMLEscape(nil, buf) 228 } 229 if opts & ValidateString != 0 && !utf8.Validate(buf) { 230 buf = utf8.CorrectWith(nil, buf, `\ufffd`) 231 } 232 return buf 233 } 234 235 var typeByte = rt.UnpackType(reflect.TypeOf(byte(0))) 236 237 // HTMLEscape appends to dst the JSON-encoded src with <, >, &, U+2028 and U+2029 238 // characters inside string literals changed to \u003c, \u003e, \u0026, \u2028, \u2029 239 // so that the JSON will be safe to embed inside HTML <script> tags. 240 // For historical reasons, web browsers don't honor standard HTML 241 // escaping within <script> tags, so an alternative JSON encoding must 242 // be used. 243 func HTMLEscape(dst []byte, src []byte) []byte { 244 return htmlEscape(dst, src) 245 } 246 247 // EncodeIndented is like Encode but applies Indent to format the output. 248 // Each JSON element in the output will begin on a new line beginning with prefix 249 // followed by one or more copies of indent according to the indentation nesting. 250 func EncodeIndented(val interface{}, prefix string, indent string, opts Options) ([]byte, error) { 251 var err error 252 var out []byte 253 var buf *bytes.Buffer 254 255 /* encode into the buffer */ 256 out = newBytes() 257 err = EncodeInto(&out, val, opts) 258 259 /* check for errors */ 260 if err != nil { 261 freeBytes(out) 262 return nil, err 263 } 264 265 /* indent the JSON */ 266 buf = newBuffer() 267 err = json.Indent(buf, out, prefix, indent) 268 269 /* check for errors */ 270 if err != nil { 271 freeBytes(out) 272 freeBuffer(buf) 273 return nil, err 274 } 275 276 /* copy to the result buffer */ 277 ret := make([]byte, buf.Len()) 278 copy(ret, buf.Bytes()) 279 280 /* return the buffers into pool */ 281 freeBytes(out) 282 freeBuffer(buf) 283 return ret, nil 284 } 285 286 // Pretouch compiles vt ahead-of-time to avoid JIT compilation on-the-fly, in 287 // order to reduce the first-hit latency. 288 // 289 // Opts are the compile options, for example, "option.WithCompileRecursiveDepth" is 290 // a compile option to set the depth of recursive compile for the nested struct type. 291 func Pretouch(vt reflect.Type, opts ...option.CompileOption) error { 292 cfg := option.DefaultCompileOptions() 293 for _, opt := range opts { 294 opt(&cfg) 295 break 296 } 297 return pretouchRec(map[reflect.Type]uint8{vt: 0}, cfg) 298 } 299 300 // Valid validates json and returns first non-blank character position, 301 // if it is only one valid json value. 302 // Otherwise returns invalid character position using start. 303 // 304 // Note: it does not check for the invalid UTF-8 characters. 305 func Valid(data []byte) (ok bool, start int) { 306 n := len(data) 307 if n == 0 { 308 return false, -1 309 } 310 s := rt.Mem2Str(data) 311 p := 0 312 m := types.NewStateMachine() 313 ret := native.ValidateOne(&s, &p, m) 314 types.FreeStateMachine(m) 315 316 if ret < 0 { 317 return false, p-1 318 } 319 320 /* check for trailing spaces */ 321 for ;p < n; p++ { 322 if (types.SPACE_MASK & (1 << data[p])) == 0 { 323 return false, p 324 } 325 } 326 327 return true, ret 328 }