gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

decoder.go (13645B)


      1 // Copyright 2021 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package pkgbits
      6 
      7 import (
      8 	"encoding/binary"
      9 	"errors"
     10 	"fmt"
     11 	"go/constant"
     12 	"go/token"
     13 	"io"
     14 	"math/big"
     15 	"os"
     16 	"runtime"
     17 	"strings"
     18 )
     19 
     20 // A PkgDecoder provides methods for decoding a package's Unified IR
     21 // export data.
     22 type PkgDecoder struct {
     23 	// version is the file format version.
     24 	version uint32
     25 
     26 	// sync indicates whether the file uses sync markers.
     27 	sync bool
     28 
     29 	// pkgPath is the package path for the package to be decoded.
     30 	//
     31 	// TODO(mdempsky): Remove; unneeded since CL 391014.
     32 	pkgPath string
     33 
     34 	// elemData is the full data payload of the encoded package.
     35 	// Elements are densely and contiguously packed together.
     36 	//
     37 	// The last 8 bytes of elemData are the package fingerprint.
     38 	elemData string
     39 
     40 	// elemEnds stores the byte-offset end positions of element
     41 	// bitstreams within elemData.
     42 	//
     43 	// For example, element I's bitstream data starts at elemEnds[I-1]
     44 	// (or 0, if I==0) and ends at elemEnds[I].
     45 	//
     46 	// Note: elemEnds is indexed by absolute indices, not
     47 	// section-relative indices.
     48 	elemEnds []uint32
     49 
     50 	// elemEndsEnds stores the index-offset end positions of relocation
     51 	// sections within elemEnds.
     52 	//
     53 	// For example, section K's end positions start at elemEndsEnds[K-1]
     54 	// (or 0, if K==0) and end at elemEndsEnds[K].
     55 	elemEndsEnds [numRelocs]uint32
     56 
     57 	scratchRelocEnt []RelocEnt
     58 }
     59 
     60 // PkgPath returns the package path for the package
     61 //
     62 // TODO(mdempsky): Remove; unneeded since CL 391014.
     63 func (pr *PkgDecoder) PkgPath() string { return pr.pkgPath }
     64 
     65 // SyncMarkers reports whether pr uses sync markers.
     66 func (pr *PkgDecoder) SyncMarkers() bool { return pr.sync }
     67 
     68 // NewPkgDecoder returns a PkgDecoder initialized to read the Unified
     69 // IR export data from input. pkgPath is the package path for the
     70 // compilation unit that produced the export data.
     71 //
     72 // TODO(mdempsky): Remove pkgPath parameter; unneeded since CL 391014.
     73 func NewPkgDecoder(pkgPath, input string) PkgDecoder {
     74 	pr := PkgDecoder{
     75 		pkgPath: pkgPath,
     76 	}
     77 
     78 	// TODO(mdempsky): Implement direct indexing of input string to
     79 	// avoid copying the position information.
     80 
     81 	r := strings.NewReader(input)
     82 
     83 	assert(binary.Read(r, binary.LittleEndian, &pr.version) == nil)
     84 
     85 	switch pr.version {
     86 	default:
     87 		panic(fmt.Errorf("unsupported version: %v", pr.version))
     88 	case 0:
     89 		// no flags
     90 	case 1:
     91 		var flags uint32
     92 		assert(binary.Read(r, binary.LittleEndian, &flags) == nil)
     93 		pr.sync = flags&flagSyncMarkers != 0
     94 	}
     95 
     96 	assert(binary.Read(r, binary.LittleEndian, pr.elemEndsEnds[:]) == nil)
     97 
     98 	pr.elemEnds = make([]uint32, pr.elemEndsEnds[len(pr.elemEndsEnds)-1])
     99 	assert(binary.Read(r, binary.LittleEndian, pr.elemEnds[:]) == nil)
    100 
    101 	pos, err := r.Seek(0, io.SeekCurrent)
    102 	assert(err == nil)
    103 
    104 	pr.elemData = input[pos:]
    105 	assert(len(pr.elemData)-8 == int(pr.elemEnds[len(pr.elemEnds)-1]))
    106 
    107 	return pr
    108 }
    109 
    110 // NumElems returns the number of elements in section k.
    111 func (pr *PkgDecoder) NumElems(k RelocKind) int {
    112 	count := int(pr.elemEndsEnds[k])
    113 	if k > 0 {
    114 		count -= int(pr.elemEndsEnds[k-1])
    115 	}
    116 	return count
    117 }
    118 
    119 // TotalElems returns the total number of elements across all sections.
    120 func (pr *PkgDecoder) TotalElems() int {
    121 	return len(pr.elemEnds)
    122 }
    123 
    124 // Fingerprint returns the package fingerprint.
    125 func (pr *PkgDecoder) Fingerprint() [8]byte {
    126 	var fp [8]byte
    127 	copy(fp[:], pr.elemData[len(pr.elemData)-8:])
    128 	return fp
    129 }
    130 
    131 // AbsIdx returns the absolute index for the given (section, index)
    132 // pair.
    133 func (pr *PkgDecoder) AbsIdx(k RelocKind, idx Index) int {
    134 	absIdx := int(idx)
    135 	if k > 0 {
    136 		absIdx += int(pr.elemEndsEnds[k-1])
    137 	}
    138 	if absIdx >= int(pr.elemEndsEnds[k]) {
    139 		errorf("%v:%v is out of bounds; %v", k, idx, pr.elemEndsEnds)
    140 	}
    141 	return absIdx
    142 }
    143 
    144 // DataIdx returns the raw element bitstream for the given (section,
    145 // index) pair.
    146 func (pr *PkgDecoder) DataIdx(k RelocKind, idx Index) string {
    147 	absIdx := pr.AbsIdx(k, idx)
    148 
    149 	var start uint32
    150 	if absIdx > 0 {
    151 		start = pr.elemEnds[absIdx-1]
    152 	}
    153 	end := pr.elemEnds[absIdx]
    154 
    155 	return pr.elemData[start:end]
    156 }
    157 
    158 // StringIdx returns the string value for the given string index.
    159 func (pr *PkgDecoder) StringIdx(idx Index) string {
    160 	return pr.DataIdx(RelocString, idx)
    161 }
    162 
    163 // NewDecoder returns a Decoder for the given (section, index) pair,
    164 // and decodes the given SyncMarker from the element bitstream.
    165 func (pr *PkgDecoder) NewDecoder(k RelocKind, idx Index, marker SyncMarker) Decoder {
    166 	r := pr.NewDecoderRaw(k, idx)
    167 	r.Sync(marker)
    168 	return r
    169 }
    170 
    171 // TempDecoder returns a Decoder for the given (section, index) pair,
    172 // and decodes the given SyncMarker from the element bitstream.
    173 // If possible the Decoder should be RetireDecoder'd when it is no longer
    174 // needed, this will avoid heap allocations.
    175 func (pr *PkgDecoder) TempDecoder(k RelocKind, idx Index, marker SyncMarker) Decoder {
    176 	r := pr.TempDecoderRaw(k, idx)
    177 	r.Sync(marker)
    178 	return r
    179 }
    180 
    181 func (pr *PkgDecoder) RetireDecoder(d *Decoder) {
    182 	pr.scratchRelocEnt = d.Relocs
    183 	d.Relocs = nil
    184 }
    185 
    186 // NewDecoderRaw returns a Decoder for the given (section, index) pair.
    187 //
    188 // Most callers should use NewDecoder instead.
    189 func (pr *PkgDecoder) NewDecoderRaw(k RelocKind, idx Index) Decoder {
    190 	r := Decoder{
    191 		common: pr,
    192 		k:      k,
    193 		Idx:    idx,
    194 	}
    195 
    196 	// TODO(mdempsky) r.data.Reset(...) after #44505 is resolved.
    197 	r.Data = *strings.NewReader(pr.DataIdx(k, idx))
    198 
    199 	r.Sync(SyncRelocs)
    200 	r.Relocs = make([]RelocEnt, r.Len())
    201 	for i := range r.Relocs {
    202 		r.Sync(SyncReloc)
    203 		r.Relocs[i] = RelocEnt{RelocKind(r.Len()), Index(r.Len())}
    204 	}
    205 
    206 	return r
    207 }
    208 
    209 func (pr *PkgDecoder) TempDecoderRaw(k RelocKind, idx Index) Decoder {
    210 	r := Decoder{
    211 		common: pr,
    212 		k:      k,
    213 		Idx:    idx,
    214 	}
    215 
    216 	r.Data.Reset(pr.DataIdx(k, idx))
    217 	r.Sync(SyncRelocs)
    218 	l := r.Len()
    219 	if cap(pr.scratchRelocEnt) >= l {
    220 		r.Relocs = pr.scratchRelocEnt[:l]
    221 		pr.scratchRelocEnt = nil
    222 	} else {
    223 		r.Relocs = make([]RelocEnt, l)
    224 	}
    225 	for i := range r.Relocs {
    226 		r.Sync(SyncReloc)
    227 		r.Relocs[i] = RelocEnt{RelocKind(r.Len()), Index(r.Len())}
    228 	}
    229 
    230 	return r
    231 }
    232 
    233 // A Decoder provides methods for decoding an individual element's
    234 // bitstream data.
    235 type Decoder struct {
    236 	common *PkgDecoder
    237 
    238 	Relocs []RelocEnt
    239 	Data   strings.Reader
    240 
    241 	k   RelocKind
    242 	Idx Index
    243 }
    244 
    245 func (r *Decoder) checkErr(err error) {
    246 	if err != nil {
    247 		errorf("unexpected decoding error: %w", err)
    248 	}
    249 }
    250 
    251 func (r *Decoder) rawUvarint() uint64 {
    252 	x, err := readUvarint(&r.Data)
    253 	r.checkErr(err)
    254 	return x
    255 }
    256 
    257 // readUvarint is a type-specialized copy of encoding/binary.ReadUvarint.
    258 // This avoids the interface conversion and thus has better escape properties,
    259 // which flows up the stack.
    260 func readUvarint(r *strings.Reader) (uint64, error) {
    261 	var x uint64
    262 	var s uint
    263 	for i := 0; i < binary.MaxVarintLen64; i++ {
    264 		b, err := r.ReadByte()
    265 		if err != nil {
    266 			if i > 0 && err == io.EOF {
    267 				err = io.ErrUnexpectedEOF
    268 			}
    269 			return x, err
    270 		}
    271 		if b < 0x80 {
    272 			if i == binary.MaxVarintLen64-1 && b > 1 {
    273 				return x, overflow
    274 			}
    275 			return x | uint64(b)<<s, nil
    276 		}
    277 		x |= uint64(b&0x7f) << s
    278 		s += 7
    279 	}
    280 	return x, overflow
    281 }
    282 
    283 var overflow = errors.New("pkgbits: readUvarint overflows a 64-bit integer")
    284 
    285 func (r *Decoder) rawVarint() int64 {
    286 	ux := r.rawUvarint()
    287 
    288 	// Zig-zag decode.
    289 	x := int64(ux >> 1)
    290 	if ux&1 != 0 {
    291 		x = ^x
    292 	}
    293 	return x
    294 }
    295 
    296 func (r *Decoder) rawReloc(k RelocKind, idx int) Index {
    297 	e := r.Relocs[idx]
    298 	assert(e.Kind == k)
    299 	return e.Idx
    300 }
    301 
    302 // Sync decodes a sync marker from the element bitstream and asserts
    303 // that it matches the expected marker.
    304 //
    305 // If r.common.sync is false, then Sync is a no-op.
    306 func (r *Decoder) Sync(mWant SyncMarker) {
    307 	if !r.common.sync {
    308 		return
    309 	}
    310 
    311 	pos, _ := r.Data.Seek(0, io.SeekCurrent)
    312 	mHave := SyncMarker(r.rawUvarint())
    313 	writerPCs := make([]int, r.rawUvarint())
    314 	for i := range writerPCs {
    315 		writerPCs[i] = int(r.rawUvarint())
    316 	}
    317 
    318 	if mHave == mWant {
    319 		return
    320 	}
    321 
    322 	// There's some tension here between printing:
    323 	//
    324 	// (1) full file paths that tools can recognize (e.g., so emacs
    325 	//     hyperlinks the "file:line" text for easy navigation), or
    326 	//
    327 	// (2) short file paths that are easier for humans to read (e.g., by
    328 	//     omitting redundant or irrelevant details, so it's easier to
    329 	//     focus on the useful bits that remain).
    330 	//
    331 	// The current formatting favors the former, as it seems more
    332 	// helpful in practice. But perhaps the formatting could be improved
    333 	// to better address both concerns. For example, use relative file
    334 	// paths if they would be shorter, or rewrite file paths to contain
    335 	// "$GOROOT" (like objabi.AbsFile does) if tools can be taught how
    336 	// to reliably expand that again.
    337 
    338 	fmt.Printf("export data desync: package %q, section %v, index %v, offset %v\n", r.common.pkgPath, r.k, r.Idx, pos)
    339 
    340 	fmt.Printf("\nfound %v, written at:\n", mHave)
    341 	if len(writerPCs) == 0 {
    342 		fmt.Printf("\t[stack trace unavailable; recompile package %q with -d=syncframes]\n", r.common.pkgPath)
    343 	}
    344 	for _, pc := range writerPCs {
    345 		fmt.Printf("\t%s\n", r.common.StringIdx(r.rawReloc(RelocString, pc)))
    346 	}
    347 
    348 	fmt.Printf("\nexpected %v, reading at:\n", mWant)
    349 	var readerPCs [32]uintptr // TODO(mdempsky): Dynamically size?
    350 	n := runtime.Callers(2, readerPCs[:])
    351 	for _, pc := range fmtFrames(readerPCs[:n]...) {
    352 		fmt.Printf("\t%s\n", pc)
    353 	}
    354 
    355 	// We already printed a stack trace for the reader, so now we can
    356 	// simply exit. Printing a second one with panic or base.Fatalf
    357 	// would just be noise.
    358 	os.Exit(1)
    359 }
    360 
    361 // Bool decodes and returns a bool value from the element bitstream.
    362 func (r *Decoder) Bool() bool {
    363 	r.Sync(SyncBool)
    364 	x, err := r.Data.ReadByte()
    365 	r.checkErr(err)
    366 	assert(x < 2)
    367 	return x != 0
    368 }
    369 
    370 // Int64 decodes and returns an int64 value from the element bitstream.
    371 func (r *Decoder) Int64() int64 {
    372 	r.Sync(SyncInt64)
    373 	return r.rawVarint()
    374 }
    375 
    376 // Uint64 decodes and returns a uint64 value from the element bitstream.
    377 func (r *Decoder) Uint64() uint64 {
    378 	r.Sync(SyncUint64)
    379 	return r.rawUvarint()
    380 }
    381 
    382 // Len decodes and returns a non-negative int value from the element bitstream.
    383 func (r *Decoder) Len() int { x := r.Uint64(); v := int(x); assert(uint64(v) == x); return v }
    384 
    385 // Int decodes and returns an int value from the element bitstream.
    386 func (r *Decoder) Int() int { x := r.Int64(); v := int(x); assert(int64(v) == x); return v }
    387 
    388 // Uint decodes and returns a uint value from the element bitstream.
    389 func (r *Decoder) Uint() uint { x := r.Uint64(); v := uint(x); assert(uint64(v) == x); return v }
    390 
    391 // Code decodes a Code value from the element bitstream and returns
    392 // its ordinal value. It's the caller's responsibility to convert the
    393 // result to an appropriate Code type.
    394 //
    395 // TODO(mdempsky): Ideally this method would have signature "Code[T
    396 // Code] T" instead, but we don't allow generic methods and the
    397 // compiler can't depend on generics yet anyway.
    398 func (r *Decoder) Code(mark SyncMarker) int {
    399 	r.Sync(mark)
    400 	return r.Len()
    401 }
    402 
    403 // Reloc decodes a relocation of expected section k from the element
    404 // bitstream and returns an index to the referenced element.
    405 func (r *Decoder) Reloc(k RelocKind) Index {
    406 	r.Sync(SyncUseReloc)
    407 	return r.rawReloc(k, r.Len())
    408 }
    409 
    410 // String decodes and returns a string value from the element
    411 // bitstream.
    412 func (r *Decoder) String() string {
    413 	r.Sync(SyncString)
    414 	return r.common.StringIdx(r.Reloc(RelocString))
    415 }
    416 
    417 // Strings decodes and returns a variable-length slice of strings from
    418 // the element bitstream.
    419 func (r *Decoder) Strings() []string {
    420 	res := make([]string, r.Len())
    421 	for i := range res {
    422 		res[i] = r.String()
    423 	}
    424 	return res
    425 }
    426 
    427 // Value decodes and returns a constant.Value from the element
    428 // bitstream.
    429 func (r *Decoder) Value() constant.Value {
    430 	r.Sync(SyncValue)
    431 	isComplex := r.Bool()
    432 	val := r.scalar()
    433 	if isComplex {
    434 		val = constant.BinaryOp(val, token.ADD, constant.MakeImag(r.scalar()))
    435 	}
    436 	return val
    437 }
    438 
    439 func (r *Decoder) scalar() constant.Value {
    440 	switch tag := CodeVal(r.Code(SyncVal)); tag {
    441 	default:
    442 		panic(fmt.Errorf("unexpected scalar tag: %v", tag))
    443 
    444 	case ValBool:
    445 		return constant.MakeBool(r.Bool())
    446 	case ValString:
    447 		return constant.MakeString(r.String())
    448 	case ValInt64:
    449 		return constant.MakeInt64(r.Int64())
    450 	case ValBigInt:
    451 		return constant.Make(r.bigInt())
    452 	case ValBigRat:
    453 		num := r.bigInt()
    454 		denom := r.bigInt()
    455 		return constant.Make(new(big.Rat).SetFrac(num, denom))
    456 	case ValBigFloat:
    457 		return constant.Make(r.bigFloat())
    458 	}
    459 }
    460 
    461 func (r *Decoder) bigInt() *big.Int {
    462 	v := new(big.Int).SetBytes([]byte(r.String()))
    463 	if r.Bool() {
    464 		v.Neg(v)
    465 	}
    466 	return v
    467 }
    468 
    469 func (r *Decoder) bigFloat() *big.Float {
    470 	v := new(big.Float).SetPrec(512)
    471 	assert(v.UnmarshalText([]byte(r.String())) == nil)
    472 	return v
    473 }
    474 
    475 // @@@ Helpers
    476 
    477 // TODO(mdempsky): These should probably be removed. I think they're a
    478 // smell that the export data format is not yet quite right.
    479 
    480 // PeekPkgPath returns the package path for the specified package
    481 // index.
    482 func (pr *PkgDecoder) PeekPkgPath(idx Index) string {
    483 	var path string
    484 	{
    485 		r := pr.TempDecoder(RelocPkg, idx, SyncPkgDef)
    486 		path = r.String()
    487 		pr.RetireDecoder(&r)
    488 	}
    489 	if path == "" {
    490 		path = pr.pkgPath
    491 	}
    492 	return path
    493 }
    494 
    495 // PeekObj returns the package path, object name, and CodeObj for the
    496 // specified object index.
    497 func (pr *PkgDecoder) PeekObj(idx Index) (string, string, CodeObj) {
    498 	var ridx Index
    499 	var name string
    500 	var rcode int
    501 	{
    502 		r := pr.TempDecoder(RelocName, idx, SyncObject1)
    503 		r.Sync(SyncSym)
    504 		r.Sync(SyncPkg)
    505 		ridx = r.Reloc(RelocPkg)
    506 		name = r.String()
    507 		rcode = r.Code(SyncCodeObj)
    508 		pr.RetireDecoder(&r)
    509 	}
    510 
    511 	path := pr.PeekPkgPath(ridx)
    512 	assert(name != "")
    513 
    514 	tag := CodeObj(rcode)
    515 
    516 	return path, name, tag
    517 }