encoder.go (9579B)
1 // Copyright 2021 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package pkgbits 6 7 import ( 8 "bytes" 9 "crypto/md5" 10 "encoding/binary" 11 "go/constant" 12 "io" 13 "math/big" 14 "runtime" 15 ) 16 17 // currentVersion is the current version number. 18 // 19 // - v0: initial prototype 20 // 21 // - v1: adds the flags uint32 word 22 const currentVersion uint32 = 1 23 24 // A PkgEncoder provides methods for encoding a package's Unified IR 25 // export data. 26 type PkgEncoder struct { 27 // elems holds the bitstream for previously encoded elements. 28 elems [numRelocs][]string 29 30 // stringsIdx maps previously encoded strings to their index within 31 // the RelocString section, to allow deduplication. That is, 32 // elems[RelocString][stringsIdx[s]] == s (if present). 33 stringsIdx map[string]Index 34 35 // syncFrames is the number of frames to write at each sync 36 // marker. A negative value means sync markers are omitted. 37 syncFrames int 38 } 39 40 // SyncMarkers reports whether pw uses sync markers. 41 func (pw *PkgEncoder) SyncMarkers() bool { return pw.syncFrames >= 0 } 42 43 // NewPkgEncoder returns an initialized PkgEncoder. 44 // 45 // syncFrames is the number of caller frames that should be serialized 46 // at Sync points. Serializing additional frames results in larger 47 // export data files, but can help diagnosing desync errors in 48 // higher-level Unified IR reader/writer code. If syncFrames is 49 // negative, then sync markers are omitted entirely. 50 func NewPkgEncoder(syncFrames int) PkgEncoder { 51 return PkgEncoder{ 52 stringsIdx: make(map[string]Index), 53 syncFrames: syncFrames, 54 } 55 } 56 57 // DumpTo writes the package's encoded data to out0 and returns the 58 // package fingerprint. 59 func (pw *PkgEncoder) DumpTo(out0 io.Writer) (fingerprint [8]byte) { 60 h := md5.New() 61 out := io.MultiWriter(out0, h) 62 63 writeUint32 := func(x uint32) { 64 assert(binary.Write(out, binary.LittleEndian, x) == nil) 65 } 66 67 writeUint32(currentVersion) 68 69 var flags uint32 70 if pw.SyncMarkers() { 71 flags |= flagSyncMarkers 72 } 73 writeUint32(flags) 74 75 // Write elemEndsEnds. 76 var sum uint32 77 for _, elems := range &pw.elems { 78 sum += uint32(len(elems)) 79 writeUint32(sum) 80 } 81 82 // Write elemEnds. 83 sum = 0 84 for _, elems := range &pw.elems { 85 for _, elem := range elems { 86 sum += uint32(len(elem)) 87 writeUint32(sum) 88 } 89 } 90 91 // Write elemData. 92 for _, elems := range &pw.elems { 93 for _, elem := range elems { 94 _, err := io.WriteString(out, elem) 95 assert(err == nil) 96 } 97 } 98 99 // Write fingerprint. 100 copy(fingerprint[:], h.Sum(nil)) 101 _, err := out0.Write(fingerprint[:]) 102 assert(err == nil) 103 104 return 105 } 106 107 // StringIdx adds a string value to the strings section, if not 108 // already present, and returns its index. 109 func (pw *PkgEncoder) StringIdx(s string) Index { 110 if idx, ok := pw.stringsIdx[s]; ok { 111 assert(pw.elems[RelocString][idx] == s) 112 return idx 113 } 114 115 idx := Index(len(pw.elems[RelocString])) 116 pw.elems[RelocString] = append(pw.elems[RelocString], s) 117 pw.stringsIdx[s] = idx 118 return idx 119 } 120 121 // NewEncoder returns an Encoder for a new element within the given 122 // section, and encodes the given SyncMarker as the start of the 123 // element bitstream. 124 func (pw *PkgEncoder) NewEncoder(k RelocKind, marker SyncMarker) Encoder { 125 e := pw.NewEncoderRaw(k) 126 e.Sync(marker) 127 return e 128 } 129 130 // NewEncoderRaw returns an Encoder for a new element within the given 131 // section. 132 // 133 // Most callers should use NewEncoder instead. 134 func (pw *PkgEncoder) NewEncoderRaw(k RelocKind) Encoder { 135 idx := Index(len(pw.elems[k])) 136 pw.elems[k] = append(pw.elems[k], "") // placeholder 137 138 return Encoder{ 139 p: pw, 140 k: k, 141 Idx: idx, 142 } 143 } 144 145 // An Encoder provides methods for encoding an individual element's 146 // bitstream data. 147 type Encoder struct { 148 p *PkgEncoder 149 150 Relocs []RelocEnt 151 RelocMap map[RelocEnt]uint32 152 Data bytes.Buffer // accumulated element bitstream data 153 154 encodingRelocHeader bool 155 156 k RelocKind 157 Idx Index // index within relocation section 158 } 159 160 // Flush finalizes the element's bitstream and returns its Index. 161 func (w *Encoder) Flush() Index { 162 var sb bytes.Buffer // TODO(mdempsky): strings.Builder after #44505 is resolved 163 164 // Backup the data so we write the relocations at the front. 165 var tmp bytes.Buffer 166 io.Copy(&tmp, &w.Data) 167 168 // TODO(mdempsky): Consider writing these out separately so they're 169 // easier to strip, along with function bodies, so that we can prune 170 // down to just the data that's relevant to go/types. 171 if w.encodingRelocHeader { 172 panic("encodingRelocHeader already true; recursive flush?") 173 } 174 w.encodingRelocHeader = true 175 w.Sync(SyncRelocs) 176 w.Len(len(w.Relocs)) 177 for _, rEnt := range w.Relocs { 178 w.Sync(SyncReloc) 179 w.Len(int(rEnt.Kind)) 180 w.Len(int(rEnt.Idx)) 181 } 182 183 io.Copy(&sb, &w.Data) 184 io.Copy(&sb, &tmp) 185 w.p.elems[w.k][w.Idx] = sb.String() 186 187 return w.Idx 188 } 189 190 func (w *Encoder) checkErr(err error) { 191 if err != nil { 192 errorf("unexpected encoding error: %v", err) 193 } 194 } 195 196 func (w *Encoder) rawUvarint(x uint64) { 197 var buf [binary.MaxVarintLen64]byte 198 n := binary.PutUvarint(buf[:], x) 199 _, err := w.Data.Write(buf[:n]) 200 w.checkErr(err) 201 } 202 203 func (w *Encoder) rawVarint(x int64) { 204 // Zig-zag encode. 205 ux := uint64(x) << 1 206 if x < 0 { 207 ux = ^ux 208 } 209 210 w.rawUvarint(ux) 211 } 212 213 func (w *Encoder) rawReloc(r RelocKind, idx Index) int { 214 e := RelocEnt{r, idx} 215 if w.RelocMap != nil { 216 if i, ok := w.RelocMap[e]; ok { 217 return int(i) 218 } 219 } else { 220 w.RelocMap = make(map[RelocEnt]uint32) 221 } 222 223 i := len(w.Relocs) 224 w.RelocMap[e] = uint32(i) 225 w.Relocs = append(w.Relocs, e) 226 return i 227 } 228 229 func (w *Encoder) Sync(m SyncMarker) { 230 if !w.p.SyncMarkers() { 231 return 232 } 233 234 // Writing out stack frame string references requires working 235 // relocations, but writing out the relocations themselves involves 236 // sync markers. To prevent infinite recursion, we simply trim the 237 // stack frame for sync markers within the relocation header. 238 var frames []string 239 if !w.encodingRelocHeader && w.p.syncFrames > 0 { 240 pcs := make([]uintptr, w.p.syncFrames) 241 n := runtime.Callers(2, pcs) 242 frames = fmtFrames(pcs[:n]...) 243 } 244 245 // TODO(mdempsky): Save space by writing out stack frames as a 246 // linked list so we can share common stack frames. 247 w.rawUvarint(uint64(m)) 248 w.rawUvarint(uint64(len(frames))) 249 for _, frame := range frames { 250 w.rawUvarint(uint64(w.rawReloc(RelocString, w.p.StringIdx(frame)))) 251 } 252 } 253 254 // Bool encodes and writes a bool value into the element bitstream, 255 // and then returns the bool value. 256 // 257 // For simple, 2-alternative encodings, the idiomatic way to call Bool 258 // is something like: 259 // 260 // if w.Bool(x != 0) { 261 // // alternative #1 262 // } else { 263 // // alternative #2 264 // } 265 // 266 // For multi-alternative encodings, use Code instead. 267 func (w *Encoder) Bool(b bool) bool { 268 w.Sync(SyncBool) 269 var x byte 270 if b { 271 x = 1 272 } 273 err := w.Data.WriteByte(x) 274 w.checkErr(err) 275 return b 276 } 277 278 // Int64 encodes and writes an int64 value into the element bitstream. 279 func (w *Encoder) Int64(x int64) { 280 w.Sync(SyncInt64) 281 w.rawVarint(x) 282 } 283 284 // Uint64 encodes and writes a uint64 value into the element bitstream. 285 func (w *Encoder) Uint64(x uint64) { 286 w.Sync(SyncUint64) 287 w.rawUvarint(x) 288 } 289 290 // Len encodes and writes a non-negative int value into the element bitstream. 291 func (w *Encoder) Len(x int) { assert(x >= 0); w.Uint64(uint64(x)) } 292 293 // Int encodes and writes an int value into the element bitstream. 294 func (w *Encoder) Int(x int) { w.Int64(int64(x)) } 295 296 // Uint encodes and writes a uint value into the element bitstream. 297 func (w *Encoder) Uint(x uint) { w.Uint64(uint64(x)) } 298 299 // Reloc encodes and writes a relocation for the given (section, 300 // index) pair into the element bitstream. 301 // 302 // Note: Only the index is formally written into the element 303 // bitstream, so bitstream decoders must know from context which 304 // section an encoded relocation refers to. 305 func (w *Encoder) Reloc(r RelocKind, idx Index) { 306 w.Sync(SyncUseReloc) 307 w.Len(w.rawReloc(r, idx)) 308 } 309 310 // Code encodes and writes a Code value into the element bitstream. 311 func (w *Encoder) Code(c Code) { 312 w.Sync(c.Marker()) 313 w.Len(c.Value()) 314 } 315 316 // String encodes and writes a string value into the element 317 // bitstream. 318 // 319 // Internally, strings are deduplicated by adding them to the strings 320 // section (if not already present), and then writing a relocation 321 // into the element bitstream. 322 func (w *Encoder) String(s string) { 323 w.Sync(SyncString) 324 w.Reloc(RelocString, w.p.StringIdx(s)) 325 } 326 327 // Strings encodes and writes a variable-length slice of strings into 328 // the element bitstream. 329 func (w *Encoder) Strings(ss []string) { 330 w.Len(len(ss)) 331 for _, s := range ss { 332 w.String(s) 333 } 334 } 335 336 // Value encodes and writes a constant.Value into the element 337 // bitstream. 338 func (w *Encoder) Value(val constant.Value) { 339 w.Sync(SyncValue) 340 if w.Bool(val.Kind() == constant.Complex) { 341 w.scalar(constant.Real(val)) 342 w.scalar(constant.Imag(val)) 343 } else { 344 w.scalar(val) 345 } 346 } 347 348 func (w *Encoder) scalar(val constant.Value) { 349 switch v := constant.Val(val).(type) { 350 default: 351 errorf("unhandled %v (%v)", val, val.Kind()) 352 case bool: 353 w.Code(ValBool) 354 w.Bool(v) 355 case string: 356 w.Code(ValString) 357 w.String(v) 358 case int64: 359 w.Code(ValInt64) 360 w.Int64(v) 361 case *big.Int: 362 w.Code(ValBigInt) 363 w.bigInt(v) 364 case *big.Rat: 365 w.Code(ValBigRat) 366 w.bigInt(v.Num()) 367 w.bigInt(v.Denom()) 368 case *big.Float: 369 w.Code(ValBigFloat) 370 w.bigFloat(v) 371 } 372 } 373 374 func (w *Encoder) bigInt(v *big.Int) { 375 b := v.Bytes() 376 w.String(string(b)) // TODO: More efficient encoding. 377 w.Bool(v.Sign() < 0) 378 } 379 380 func (w *Encoder) bigFloat(v *big.Float) { 381 b := v.Append(nil, 'p', -1) 382 w.String(string(b)) // TODO: More efficient encoding. 383 }