decoder.go (13645B)
1 // Copyright 2021 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package pkgbits 6 7 import ( 8 "encoding/binary" 9 "errors" 10 "fmt" 11 "go/constant" 12 "go/token" 13 "io" 14 "math/big" 15 "os" 16 "runtime" 17 "strings" 18 ) 19 20 // A PkgDecoder provides methods for decoding a package's Unified IR 21 // export data. 22 type PkgDecoder struct { 23 // version is the file format version. 24 version uint32 25 26 // sync indicates whether the file uses sync markers. 27 sync bool 28 29 // pkgPath is the package path for the package to be decoded. 30 // 31 // TODO(mdempsky): Remove; unneeded since CL 391014. 32 pkgPath string 33 34 // elemData is the full data payload of the encoded package. 35 // Elements are densely and contiguously packed together. 36 // 37 // The last 8 bytes of elemData are the package fingerprint. 38 elemData string 39 40 // elemEnds stores the byte-offset end positions of element 41 // bitstreams within elemData. 42 // 43 // For example, element I's bitstream data starts at elemEnds[I-1] 44 // (or 0, if I==0) and ends at elemEnds[I]. 45 // 46 // Note: elemEnds is indexed by absolute indices, not 47 // section-relative indices. 48 elemEnds []uint32 49 50 // elemEndsEnds stores the index-offset end positions of relocation 51 // sections within elemEnds. 52 // 53 // For example, section K's end positions start at elemEndsEnds[K-1] 54 // (or 0, if K==0) and end at elemEndsEnds[K]. 55 elemEndsEnds [numRelocs]uint32 56 57 scratchRelocEnt []RelocEnt 58 } 59 60 // PkgPath returns the package path for the package 61 // 62 // TODO(mdempsky): Remove; unneeded since CL 391014. 63 func (pr *PkgDecoder) PkgPath() string { return pr.pkgPath } 64 65 // SyncMarkers reports whether pr uses sync markers. 66 func (pr *PkgDecoder) SyncMarkers() bool { return pr.sync } 67 68 // NewPkgDecoder returns a PkgDecoder initialized to read the Unified 69 // IR export data from input. pkgPath is the package path for the 70 // compilation unit that produced the export data. 71 // 72 // TODO(mdempsky): Remove pkgPath parameter; unneeded since CL 391014. 73 func NewPkgDecoder(pkgPath, input string) PkgDecoder { 74 pr := PkgDecoder{ 75 pkgPath: pkgPath, 76 } 77 78 // TODO(mdempsky): Implement direct indexing of input string to 79 // avoid copying the position information. 80 81 r := strings.NewReader(input) 82 83 assert(binary.Read(r, binary.LittleEndian, &pr.version) == nil) 84 85 switch pr.version { 86 default: 87 panic(fmt.Errorf("unsupported version: %v", pr.version)) 88 case 0: 89 // no flags 90 case 1: 91 var flags uint32 92 assert(binary.Read(r, binary.LittleEndian, &flags) == nil) 93 pr.sync = flags&flagSyncMarkers != 0 94 } 95 96 assert(binary.Read(r, binary.LittleEndian, pr.elemEndsEnds[:]) == nil) 97 98 pr.elemEnds = make([]uint32, pr.elemEndsEnds[len(pr.elemEndsEnds)-1]) 99 assert(binary.Read(r, binary.LittleEndian, pr.elemEnds[:]) == nil) 100 101 pos, err := r.Seek(0, io.SeekCurrent) 102 assert(err == nil) 103 104 pr.elemData = input[pos:] 105 assert(len(pr.elemData)-8 == int(pr.elemEnds[len(pr.elemEnds)-1])) 106 107 return pr 108 } 109 110 // NumElems returns the number of elements in section k. 111 func (pr *PkgDecoder) NumElems(k RelocKind) int { 112 count := int(pr.elemEndsEnds[k]) 113 if k > 0 { 114 count -= int(pr.elemEndsEnds[k-1]) 115 } 116 return count 117 } 118 119 // TotalElems returns the total number of elements across all sections. 120 func (pr *PkgDecoder) TotalElems() int { 121 return len(pr.elemEnds) 122 } 123 124 // Fingerprint returns the package fingerprint. 125 func (pr *PkgDecoder) Fingerprint() [8]byte { 126 var fp [8]byte 127 copy(fp[:], pr.elemData[len(pr.elemData)-8:]) 128 return fp 129 } 130 131 // AbsIdx returns the absolute index for the given (section, index) 132 // pair. 133 func (pr *PkgDecoder) AbsIdx(k RelocKind, idx Index) int { 134 absIdx := int(idx) 135 if k > 0 { 136 absIdx += int(pr.elemEndsEnds[k-1]) 137 } 138 if absIdx >= int(pr.elemEndsEnds[k]) { 139 errorf("%v:%v is out of bounds; %v", k, idx, pr.elemEndsEnds) 140 } 141 return absIdx 142 } 143 144 // DataIdx returns the raw element bitstream for the given (section, 145 // index) pair. 146 func (pr *PkgDecoder) DataIdx(k RelocKind, idx Index) string { 147 absIdx := pr.AbsIdx(k, idx) 148 149 var start uint32 150 if absIdx > 0 { 151 start = pr.elemEnds[absIdx-1] 152 } 153 end := pr.elemEnds[absIdx] 154 155 return pr.elemData[start:end] 156 } 157 158 // StringIdx returns the string value for the given string index. 159 func (pr *PkgDecoder) StringIdx(idx Index) string { 160 return pr.DataIdx(RelocString, idx) 161 } 162 163 // NewDecoder returns a Decoder for the given (section, index) pair, 164 // and decodes the given SyncMarker from the element bitstream. 165 func (pr *PkgDecoder) NewDecoder(k RelocKind, idx Index, marker SyncMarker) Decoder { 166 r := pr.NewDecoderRaw(k, idx) 167 r.Sync(marker) 168 return r 169 } 170 171 // TempDecoder returns a Decoder for the given (section, index) pair, 172 // and decodes the given SyncMarker from the element bitstream. 173 // If possible the Decoder should be RetireDecoder'd when it is no longer 174 // needed, this will avoid heap allocations. 175 func (pr *PkgDecoder) TempDecoder(k RelocKind, idx Index, marker SyncMarker) Decoder { 176 r := pr.TempDecoderRaw(k, idx) 177 r.Sync(marker) 178 return r 179 } 180 181 func (pr *PkgDecoder) RetireDecoder(d *Decoder) { 182 pr.scratchRelocEnt = d.Relocs 183 d.Relocs = nil 184 } 185 186 // NewDecoderRaw returns a Decoder for the given (section, index) pair. 187 // 188 // Most callers should use NewDecoder instead. 189 func (pr *PkgDecoder) NewDecoderRaw(k RelocKind, idx Index) Decoder { 190 r := Decoder{ 191 common: pr, 192 k: k, 193 Idx: idx, 194 } 195 196 // TODO(mdempsky) r.data.Reset(...) after #44505 is resolved. 197 r.Data = *strings.NewReader(pr.DataIdx(k, idx)) 198 199 r.Sync(SyncRelocs) 200 r.Relocs = make([]RelocEnt, r.Len()) 201 for i := range r.Relocs { 202 r.Sync(SyncReloc) 203 r.Relocs[i] = RelocEnt{RelocKind(r.Len()), Index(r.Len())} 204 } 205 206 return r 207 } 208 209 func (pr *PkgDecoder) TempDecoderRaw(k RelocKind, idx Index) Decoder { 210 r := Decoder{ 211 common: pr, 212 k: k, 213 Idx: idx, 214 } 215 216 r.Data.Reset(pr.DataIdx(k, idx)) 217 r.Sync(SyncRelocs) 218 l := r.Len() 219 if cap(pr.scratchRelocEnt) >= l { 220 r.Relocs = pr.scratchRelocEnt[:l] 221 pr.scratchRelocEnt = nil 222 } else { 223 r.Relocs = make([]RelocEnt, l) 224 } 225 for i := range r.Relocs { 226 r.Sync(SyncReloc) 227 r.Relocs[i] = RelocEnt{RelocKind(r.Len()), Index(r.Len())} 228 } 229 230 return r 231 } 232 233 // A Decoder provides methods for decoding an individual element's 234 // bitstream data. 235 type Decoder struct { 236 common *PkgDecoder 237 238 Relocs []RelocEnt 239 Data strings.Reader 240 241 k RelocKind 242 Idx Index 243 } 244 245 func (r *Decoder) checkErr(err error) { 246 if err != nil { 247 errorf("unexpected decoding error: %w", err) 248 } 249 } 250 251 func (r *Decoder) rawUvarint() uint64 { 252 x, err := readUvarint(&r.Data) 253 r.checkErr(err) 254 return x 255 } 256 257 // readUvarint is a type-specialized copy of encoding/binary.ReadUvarint. 258 // This avoids the interface conversion and thus has better escape properties, 259 // which flows up the stack. 260 func readUvarint(r *strings.Reader) (uint64, error) { 261 var x uint64 262 var s uint 263 for i := 0; i < binary.MaxVarintLen64; i++ { 264 b, err := r.ReadByte() 265 if err != nil { 266 if i > 0 && err == io.EOF { 267 err = io.ErrUnexpectedEOF 268 } 269 return x, err 270 } 271 if b < 0x80 { 272 if i == binary.MaxVarintLen64-1 && b > 1 { 273 return x, overflow 274 } 275 return x | uint64(b)<<s, nil 276 } 277 x |= uint64(b&0x7f) << s 278 s += 7 279 } 280 return x, overflow 281 } 282 283 var overflow = errors.New("pkgbits: readUvarint overflows a 64-bit integer") 284 285 func (r *Decoder) rawVarint() int64 { 286 ux := r.rawUvarint() 287 288 // Zig-zag decode. 289 x := int64(ux >> 1) 290 if ux&1 != 0 { 291 x = ^x 292 } 293 return x 294 } 295 296 func (r *Decoder) rawReloc(k RelocKind, idx int) Index { 297 e := r.Relocs[idx] 298 assert(e.Kind == k) 299 return e.Idx 300 } 301 302 // Sync decodes a sync marker from the element bitstream and asserts 303 // that it matches the expected marker. 304 // 305 // If r.common.sync is false, then Sync is a no-op. 306 func (r *Decoder) Sync(mWant SyncMarker) { 307 if !r.common.sync { 308 return 309 } 310 311 pos, _ := r.Data.Seek(0, io.SeekCurrent) 312 mHave := SyncMarker(r.rawUvarint()) 313 writerPCs := make([]int, r.rawUvarint()) 314 for i := range writerPCs { 315 writerPCs[i] = int(r.rawUvarint()) 316 } 317 318 if mHave == mWant { 319 return 320 } 321 322 // There's some tension here between printing: 323 // 324 // (1) full file paths that tools can recognize (e.g., so emacs 325 // hyperlinks the "file:line" text for easy navigation), or 326 // 327 // (2) short file paths that are easier for humans to read (e.g., by 328 // omitting redundant or irrelevant details, so it's easier to 329 // focus on the useful bits that remain). 330 // 331 // The current formatting favors the former, as it seems more 332 // helpful in practice. But perhaps the formatting could be improved 333 // to better address both concerns. For example, use relative file 334 // paths if they would be shorter, or rewrite file paths to contain 335 // "$GOROOT" (like objabi.AbsFile does) if tools can be taught how 336 // to reliably expand that again. 337 338 fmt.Printf("export data desync: package %q, section %v, index %v, offset %v\n", r.common.pkgPath, r.k, r.Idx, pos) 339 340 fmt.Printf("\nfound %v, written at:\n", mHave) 341 if len(writerPCs) == 0 { 342 fmt.Printf("\t[stack trace unavailable; recompile package %q with -d=syncframes]\n", r.common.pkgPath) 343 } 344 for _, pc := range writerPCs { 345 fmt.Printf("\t%s\n", r.common.StringIdx(r.rawReloc(RelocString, pc))) 346 } 347 348 fmt.Printf("\nexpected %v, reading at:\n", mWant) 349 var readerPCs [32]uintptr // TODO(mdempsky): Dynamically size? 350 n := runtime.Callers(2, readerPCs[:]) 351 for _, pc := range fmtFrames(readerPCs[:n]...) { 352 fmt.Printf("\t%s\n", pc) 353 } 354 355 // We already printed a stack trace for the reader, so now we can 356 // simply exit. Printing a second one with panic or base.Fatalf 357 // would just be noise. 358 os.Exit(1) 359 } 360 361 // Bool decodes and returns a bool value from the element bitstream. 362 func (r *Decoder) Bool() bool { 363 r.Sync(SyncBool) 364 x, err := r.Data.ReadByte() 365 r.checkErr(err) 366 assert(x < 2) 367 return x != 0 368 } 369 370 // Int64 decodes and returns an int64 value from the element bitstream. 371 func (r *Decoder) Int64() int64 { 372 r.Sync(SyncInt64) 373 return r.rawVarint() 374 } 375 376 // Uint64 decodes and returns a uint64 value from the element bitstream. 377 func (r *Decoder) Uint64() uint64 { 378 r.Sync(SyncUint64) 379 return r.rawUvarint() 380 } 381 382 // Len decodes and returns a non-negative int value from the element bitstream. 383 func (r *Decoder) Len() int { x := r.Uint64(); v := int(x); assert(uint64(v) == x); return v } 384 385 // Int decodes and returns an int value from the element bitstream. 386 func (r *Decoder) Int() int { x := r.Int64(); v := int(x); assert(int64(v) == x); return v } 387 388 // Uint decodes and returns a uint value from the element bitstream. 389 func (r *Decoder) Uint() uint { x := r.Uint64(); v := uint(x); assert(uint64(v) == x); return v } 390 391 // Code decodes a Code value from the element bitstream and returns 392 // its ordinal value. It's the caller's responsibility to convert the 393 // result to an appropriate Code type. 394 // 395 // TODO(mdempsky): Ideally this method would have signature "Code[T 396 // Code] T" instead, but we don't allow generic methods and the 397 // compiler can't depend on generics yet anyway. 398 func (r *Decoder) Code(mark SyncMarker) int { 399 r.Sync(mark) 400 return r.Len() 401 } 402 403 // Reloc decodes a relocation of expected section k from the element 404 // bitstream and returns an index to the referenced element. 405 func (r *Decoder) Reloc(k RelocKind) Index { 406 r.Sync(SyncUseReloc) 407 return r.rawReloc(k, r.Len()) 408 } 409 410 // String decodes and returns a string value from the element 411 // bitstream. 412 func (r *Decoder) String() string { 413 r.Sync(SyncString) 414 return r.common.StringIdx(r.Reloc(RelocString)) 415 } 416 417 // Strings decodes and returns a variable-length slice of strings from 418 // the element bitstream. 419 func (r *Decoder) Strings() []string { 420 res := make([]string, r.Len()) 421 for i := range res { 422 res[i] = r.String() 423 } 424 return res 425 } 426 427 // Value decodes and returns a constant.Value from the element 428 // bitstream. 429 func (r *Decoder) Value() constant.Value { 430 r.Sync(SyncValue) 431 isComplex := r.Bool() 432 val := r.scalar() 433 if isComplex { 434 val = constant.BinaryOp(val, token.ADD, constant.MakeImag(r.scalar())) 435 } 436 return val 437 } 438 439 func (r *Decoder) scalar() constant.Value { 440 switch tag := CodeVal(r.Code(SyncVal)); tag { 441 default: 442 panic(fmt.Errorf("unexpected scalar tag: %v", tag)) 443 444 case ValBool: 445 return constant.MakeBool(r.Bool()) 446 case ValString: 447 return constant.MakeString(r.String()) 448 case ValInt64: 449 return constant.MakeInt64(r.Int64()) 450 case ValBigInt: 451 return constant.Make(r.bigInt()) 452 case ValBigRat: 453 num := r.bigInt() 454 denom := r.bigInt() 455 return constant.Make(new(big.Rat).SetFrac(num, denom)) 456 case ValBigFloat: 457 return constant.Make(r.bigFloat()) 458 } 459 } 460 461 func (r *Decoder) bigInt() *big.Int { 462 v := new(big.Int).SetBytes([]byte(r.String())) 463 if r.Bool() { 464 v.Neg(v) 465 } 466 return v 467 } 468 469 func (r *Decoder) bigFloat() *big.Float { 470 v := new(big.Float).SetPrec(512) 471 assert(v.UnmarshalText([]byte(r.String())) == nil) 472 return v 473 } 474 475 // @@@ Helpers 476 477 // TODO(mdempsky): These should probably be removed. I think they're a 478 // smell that the export data format is not yet quite right. 479 480 // PeekPkgPath returns the package path for the specified package 481 // index. 482 func (pr *PkgDecoder) PeekPkgPath(idx Index) string { 483 var path string 484 { 485 r := pr.TempDecoder(RelocPkg, idx, SyncPkgDef) 486 path = r.String() 487 pr.RetireDecoder(&r) 488 } 489 if path == "" { 490 path = pr.pkgPath 491 } 492 return path 493 } 494 495 // PeekObj returns the package path, object name, and CodeObj for the 496 // specified object index. 497 func (pr *PkgDecoder) PeekObj(idx Index) (string, string, CodeObj) { 498 var ridx Index 499 var name string 500 var rcode int 501 { 502 r := pr.TempDecoder(RelocName, idx, SyncObject1) 503 r.Sync(SyncSym) 504 r.Sync(SyncPkg) 505 ridx = r.Reloc(RelocPkg) 506 name = r.String() 507 rcode = r.Code(SyncCodeObj) 508 pr.RetireDecoder(&r) 509 } 510 511 path := pr.PeekPkgPath(ridx) 512 assert(name != "") 513 514 tag := CodeObj(rcode) 515 516 return path, name, tag 517 }