encode.go (7190B)
1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package text 6 7 import ( 8 "math" 9 "math/bits" 10 "strconv" 11 "strings" 12 "unicode/utf8" 13 14 "google.golang.org/protobuf/internal/detrand" 15 "google.golang.org/protobuf/internal/errors" 16 ) 17 18 // encType represents an encoding type. 19 type encType uint8 20 21 const ( 22 _ encType = (1 << iota) / 2 23 name 24 scalar 25 messageOpen 26 messageClose 27 ) 28 29 // Encoder provides methods to write out textproto constructs and values. The user is 30 // responsible for producing valid sequences of constructs and values. 31 type Encoder struct { 32 encoderState 33 34 indent string 35 delims [2]byte 36 outputASCII bool 37 } 38 39 type encoderState struct { 40 lastType encType 41 indents []byte 42 out []byte 43 } 44 45 // NewEncoder returns an Encoder. 46 // 47 // If indent is a non-empty string, it causes every entry in a List or Message 48 // to be preceded by the indent and trailed by a newline. 49 // 50 // If delims is not the zero value, it controls the delimiter characters used 51 // for messages (e.g., "{}" vs "<>"). 52 // 53 // If outputASCII is true, strings will be serialized in such a way that 54 // multi-byte UTF-8 sequences are escaped. This property ensures that the 55 // overall output is ASCII (as opposed to UTF-8). 56 func NewEncoder(indent string, delims [2]byte, outputASCII bool) (*Encoder, error) { 57 e := &Encoder{} 58 if len(indent) > 0 { 59 if strings.Trim(indent, " \t") != "" { 60 return nil, errors.New("indent may only be composed of space and tab characters") 61 } 62 e.indent = indent 63 } 64 switch delims { 65 case [2]byte{0, 0}: 66 e.delims = [2]byte{'{', '}'} 67 case [2]byte{'{', '}'}, [2]byte{'<', '>'}: 68 e.delims = delims 69 default: 70 return nil, errors.New("delimiters may only be \"{}\" or \"<>\"") 71 } 72 e.outputASCII = outputASCII 73 74 return e, nil 75 } 76 77 // Bytes returns the content of the written bytes. 78 func (e *Encoder) Bytes() []byte { 79 return e.out 80 } 81 82 // StartMessage writes out the '{' or '<' symbol. 83 func (e *Encoder) StartMessage() { 84 e.prepareNext(messageOpen) 85 e.out = append(e.out, e.delims[0]) 86 } 87 88 // EndMessage writes out the '}' or '>' symbol. 89 func (e *Encoder) EndMessage() { 90 e.prepareNext(messageClose) 91 e.out = append(e.out, e.delims[1]) 92 } 93 94 // WriteName writes out the field name and the separator ':'. 95 func (e *Encoder) WriteName(s string) { 96 e.prepareNext(name) 97 e.out = append(e.out, s...) 98 e.out = append(e.out, ':') 99 } 100 101 // WriteBool writes out the given boolean value. 102 func (e *Encoder) WriteBool(b bool) { 103 if b { 104 e.WriteLiteral("true") 105 } else { 106 e.WriteLiteral("false") 107 } 108 } 109 110 // WriteString writes out the given string value. 111 func (e *Encoder) WriteString(s string) { 112 e.prepareNext(scalar) 113 e.out = appendString(e.out, s, e.outputASCII) 114 } 115 116 func appendString(out []byte, in string, outputASCII bool) []byte { 117 out = append(out, '"') 118 i := indexNeedEscapeInString(in) 119 in, out = in[i:], append(out, in[:i]...) 120 for len(in) > 0 { 121 switch r, n := utf8.DecodeRuneInString(in); { 122 case r == utf8.RuneError && n == 1: 123 // We do not report invalid UTF-8 because strings in the text format 124 // are used to represent both the proto string and bytes type. 125 r = rune(in[0]) 126 fallthrough 127 case r < ' ' || r == '"' || r == '\\' || r == 0x7f: 128 out = append(out, '\\') 129 switch r { 130 case '"', '\\': 131 out = append(out, byte(r)) 132 case '\n': 133 out = append(out, 'n') 134 case '\r': 135 out = append(out, 'r') 136 case '\t': 137 out = append(out, 't') 138 default: 139 out = append(out, 'x') 140 out = append(out, "00"[1+(bits.Len32(uint32(r))-1)/4:]...) 141 out = strconv.AppendUint(out, uint64(r), 16) 142 } 143 in = in[n:] 144 case r >= utf8.RuneSelf && (outputASCII || r <= 0x009f): 145 out = append(out, '\\') 146 if r <= math.MaxUint16 { 147 out = append(out, 'u') 148 out = append(out, "0000"[1+(bits.Len32(uint32(r))-1)/4:]...) 149 out = strconv.AppendUint(out, uint64(r), 16) 150 } else { 151 out = append(out, 'U') 152 out = append(out, "00000000"[1+(bits.Len32(uint32(r))-1)/4:]...) 153 out = strconv.AppendUint(out, uint64(r), 16) 154 } 155 in = in[n:] 156 default: 157 i := indexNeedEscapeInString(in[n:]) 158 in, out = in[n+i:], append(out, in[:n+i]...) 159 } 160 } 161 out = append(out, '"') 162 return out 163 } 164 165 // indexNeedEscapeInString returns the index of the character that needs 166 // escaping. If no characters need escaping, this returns the input length. 167 func indexNeedEscapeInString(s string) int { 168 for i := 0; i < len(s); i++ { 169 if c := s[i]; c < ' ' || c == '"' || c == '\'' || c == '\\' || c >= 0x7f { 170 return i 171 } 172 } 173 return len(s) 174 } 175 176 // WriteFloat writes out the given float value for given bitSize. 177 func (e *Encoder) WriteFloat(n float64, bitSize int) { 178 e.prepareNext(scalar) 179 e.out = appendFloat(e.out, n, bitSize) 180 } 181 182 func appendFloat(out []byte, n float64, bitSize int) []byte { 183 switch { 184 case math.IsNaN(n): 185 return append(out, "nan"...) 186 case math.IsInf(n, +1): 187 return append(out, "inf"...) 188 case math.IsInf(n, -1): 189 return append(out, "-inf"...) 190 default: 191 return strconv.AppendFloat(out, n, 'g', -1, bitSize) 192 } 193 } 194 195 // WriteInt writes out the given signed integer value. 196 func (e *Encoder) WriteInt(n int64) { 197 e.prepareNext(scalar) 198 e.out = append(e.out, strconv.FormatInt(n, 10)...) 199 } 200 201 // WriteUint writes out the given unsigned integer value. 202 func (e *Encoder) WriteUint(n uint64) { 203 e.prepareNext(scalar) 204 e.out = append(e.out, strconv.FormatUint(n, 10)...) 205 } 206 207 // WriteLiteral writes out the given string as a literal value without quotes. 208 // This is used for writing enum literal strings. 209 func (e *Encoder) WriteLiteral(s string) { 210 e.prepareNext(scalar) 211 e.out = append(e.out, s...) 212 } 213 214 // prepareNext adds possible space and indentation for the next value based 215 // on last encType and indent option. It also updates e.lastType to next. 216 func (e *Encoder) prepareNext(next encType) { 217 defer func() { 218 e.lastType = next 219 }() 220 221 // Single line. 222 if len(e.indent) == 0 { 223 // Add space after each field before the next one. 224 if e.lastType&(scalar|messageClose) != 0 && next == name { 225 e.out = append(e.out, ' ') 226 // Add a random extra space to make output unstable. 227 if detrand.Bool() { 228 e.out = append(e.out, ' ') 229 } 230 } 231 return 232 } 233 234 // Multi-line. 235 switch { 236 case e.lastType == name: 237 e.out = append(e.out, ' ') 238 // Add a random extra space after name: to make output unstable. 239 if detrand.Bool() { 240 e.out = append(e.out, ' ') 241 } 242 243 case e.lastType == messageOpen && next != messageClose: 244 e.indents = append(e.indents, e.indent...) 245 e.out = append(e.out, '\n') 246 e.out = append(e.out, e.indents...) 247 248 case e.lastType&(scalar|messageClose) != 0: 249 if next == messageClose { 250 e.indents = e.indents[:len(e.indents)-len(e.indent)] 251 } 252 e.out = append(e.out, '\n') 253 e.out = append(e.out, e.indents...) 254 } 255 } 256 257 // Snapshot returns the current snapshot for use in Reset. 258 func (e *Encoder) Snapshot() encoderState { 259 return e.encoderState 260 } 261 262 // Reset resets the Encoder to the given encoderState from a Snapshot. 263 func (e *Encoder) Reset(es encoderState) { 264 e.encoderState = es 265 } 266 267 // AppendString appends the escaped form of the input string to b. 268 func AppendString(b []byte, s string) []byte { 269 return appendString(b, s, false) 270 }