gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

encoder.go (9579B)


      1 // Copyright 2021 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package pkgbits
      6 
      7 import (
      8 	"bytes"
      9 	"crypto/md5"
     10 	"encoding/binary"
     11 	"go/constant"
     12 	"io"
     13 	"math/big"
     14 	"runtime"
     15 )
     16 
     17 // currentVersion is the current version number.
     18 //
     19 //   - v0: initial prototype
     20 //
     21 //   - v1: adds the flags uint32 word
     22 const currentVersion uint32 = 1
     23 
     24 // A PkgEncoder provides methods for encoding a package's Unified IR
     25 // export data.
     26 type PkgEncoder struct {
     27 	// elems holds the bitstream for previously encoded elements.
     28 	elems [numRelocs][]string
     29 
     30 	// stringsIdx maps previously encoded strings to their index within
     31 	// the RelocString section, to allow deduplication. That is,
     32 	// elems[RelocString][stringsIdx[s]] == s (if present).
     33 	stringsIdx map[string]Index
     34 
     35 	// syncFrames is the number of frames to write at each sync
     36 	// marker. A negative value means sync markers are omitted.
     37 	syncFrames int
     38 }
     39 
     40 // SyncMarkers reports whether pw uses sync markers.
     41 func (pw *PkgEncoder) SyncMarkers() bool { return pw.syncFrames >= 0 }
     42 
     43 // NewPkgEncoder returns an initialized PkgEncoder.
     44 //
     45 // syncFrames is the number of caller frames that should be serialized
     46 // at Sync points. Serializing additional frames results in larger
     47 // export data files, but can help diagnosing desync errors in
     48 // higher-level Unified IR reader/writer code. If syncFrames is
     49 // negative, then sync markers are omitted entirely.
     50 func NewPkgEncoder(syncFrames int) PkgEncoder {
     51 	return PkgEncoder{
     52 		stringsIdx: make(map[string]Index),
     53 		syncFrames: syncFrames,
     54 	}
     55 }
     56 
     57 // DumpTo writes the package's encoded data to out0 and returns the
     58 // package fingerprint.
     59 func (pw *PkgEncoder) DumpTo(out0 io.Writer) (fingerprint [8]byte) {
     60 	h := md5.New()
     61 	out := io.MultiWriter(out0, h)
     62 
     63 	writeUint32 := func(x uint32) {
     64 		assert(binary.Write(out, binary.LittleEndian, x) == nil)
     65 	}
     66 
     67 	writeUint32(currentVersion)
     68 
     69 	var flags uint32
     70 	if pw.SyncMarkers() {
     71 		flags |= flagSyncMarkers
     72 	}
     73 	writeUint32(flags)
     74 
     75 	// Write elemEndsEnds.
     76 	var sum uint32
     77 	for _, elems := range &pw.elems {
     78 		sum += uint32(len(elems))
     79 		writeUint32(sum)
     80 	}
     81 
     82 	// Write elemEnds.
     83 	sum = 0
     84 	for _, elems := range &pw.elems {
     85 		for _, elem := range elems {
     86 			sum += uint32(len(elem))
     87 			writeUint32(sum)
     88 		}
     89 	}
     90 
     91 	// Write elemData.
     92 	for _, elems := range &pw.elems {
     93 		for _, elem := range elems {
     94 			_, err := io.WriteString(out, elem)
     95 			assert(err == nil)
     96 		}
     97 	}
     98 
     99 	// Write fingerprint.
    100 	copy(fingerprint[:], h.Sum(nil))
    101 	_, err := out0.Write(fingerprint[:])
    102 	assert(err == nil)
    103 
    104 	return
    105 }
    106 
    107 // StringIdx adds a string value to the strings section, if not
    108 // already present, and returns its index.
    109 func (pw *PkgEncoder) StringIdx(s string) Index {
    110 	if idx, ok := pw.stringsIdx[s]; ok {
    111 		assert(pw.elems[RelocString][idx] == s)
    112 		return idx
    113 	}
    114 
    115 	idx := Index(len(pw.elems[RelocString]))
    116 	pw.elems[RelocString] = append(pw.elems[RelocString], s)
    117 	pw.stringsIdx[s] = idx
    118 	return idx
    119 }
    120 
    121 // NewEncoder returns an Encoder for a new element within the given
    122 // section, and encodes the given SyncMarker as the start of the
    123 // element bitstream.
    124 func (pw *PkgEncoder) NewEncoder(k RelocKind, marker SyncMarker) Encoder {
    125 	e := pw.NewEncoderRaw(k)
    126 	e.Sync(marker)
    127 	return e
    128 }
    129 
    130 // NewEncoderRaw returns an Encoder for a new element within the given
    131 // section.
    132 //
    133 // Most callers should use NewEncoder instead.
    134 func (pw *PkgEncoder) NewEncoderRaw(k RelocKind) Encoder {
    135 	idx := Index(len(pw.elems[k]))
    136 	pw.elems[k] = append(pw.elems[k], "") // placeholder
    137 
    138 	return Encoder{
    139 		p:   pw,
    140 		k:   k,
    141 		Idx: idx,
    142 	}
    143 }
    144 
    145 // An Encoder provides methods for encoding an individual element's
    146 // bitstream data.
    147 type Encoder struct {
    148 	p *PkgEncoder
    149 
    150 	Relocs   []RelocEnt
    151 	RelocMap map[RelocEnt]uint32
    152 	Data     bytes.Buffer // accumulated element bitstream data
    153 
    154 	encodingRelocHeader bool
    155 
    156 	k   RelocKind
    157 	Idx Index // index within relocation section
    158 }
    159 
    160 // Flush finalizes the element's bitstream and returns its Index.
    161 func (w *Encoder) Flush() Index {
    162 	var sb bytes.Buffer // TODO(mdempsky): strings.Builder after #44505 is resolved
    163 
    164 	// Backup the data so we write the relocations at the front.
    165 	var tmp bytes.Buffer
    166 	io.Copy(&tmp, &w.Data)
    167 
    168 	// TODO(mdempsky): Consider writing these out separately so they're
    169 	// easier to strip, along with function bodies, so that we can prune
    170 	// down to just the data that's relevant to go/types.
    171 	if w.encodingRelocHeader {
    172 		panic("encodingRelocHeader already true; recursive flush?")
    173 	}
    174 	w.encodingRelocHeader = true
    175 	w.Sync(SyncRelocs)
    176 	w.Len(len(w.Relocs))
    177 	for _, rEnt := range w.Relocs {
    178 		w.Sync(SyncReloc)
    179 		w.Len(int(rEnt.Kind))
    180 		w.Len(int(rEnt.Idx))
    181 	}
    182 
    183 	io.Copy(&sb, &w.Data)
    184 	io.Copy(&sb, &tmp)
    185 	w.p.elems[w.k][w.Idx] = sb.String()
    186 
    187 	return w.Idx
    188 }
    189 
    190 func (w *Encoder) checkErr(err error) {
    191 	if err != nil {
    192 		errorf("unexpected encoding error: %v", err)
    193 	}
    194 }
    195 
    196 func (w *Encoder) rawUvarint(x uint64) {
    197 	var buf [binary.MaxVarintLen64]byte
    198 	n := binary.PutUvarint(buf[:], x)
    199 	_, err := w.Data.Write(buf[:n])
    200 	w.checkErr(err)
    201 }
    202 
    203 func (w *Encoder) rawVarint(x int64) {
    204 	// Zig-zag encode.
    205 	ux := uint64(x) << 1
    206 	if x < 0 {
    207 		ux = ^ux
    208 	}
    209 
    210 	w.rawUvarint(ux)
    211 }
    212 
    213 func (w *Encoder) rawReloc(r RelocKind, idx Index) int {
    214 	e := RelocEnt{r, idx}
    215 	if w.RelocMap != nil {
    216 		if i, ok := w.RelocMap[e]; ok {
    217 			return int(i)
    218 		}
    219 	} else {
    220 		w.RelocMap = make(map[RelocEnt]uint32)
    221 	}
    222 
    223 	i := len(w.Relocs)
    224 	w.RelocMap[e] = uint32(i)
    225 	w.Relocs = append(w.Relocs, e)
    226 	return i
    227 }
    228 
    229 func (w *Encoder) Sync(m SyncMarker) {
    230 	if !w.p.SyncMarkers() {
    231 		return
    232 	}
    233 
    234 	// Writing out stack frame string references requires working
    235 	// relocations, but writing out the relocations themselves involves
    236 	// sync markers. To prevent infinite recursion, we simply trim the
    237 	// stack frame for sync markers within the relocation header.
    238 	var frames []string
    239 	if !w.encodingRelocHeader && w.p.syncFrames > 0 {
    240 		pcs := make([]uintptr, w.p.syncFrames)
    241 		n := runtime.Callers(2, pcs)
    242 		frames = fmtFrames(pcs[:n]...)
    243 	}
    244 
    245 	// TODO(mdempsky): Save space by writing out stack frames as a
    246 	// linked list so we can share common stack frames.
    247 	w.rawUvarint(uint64(m))
    248 	w.rawUvarint(uint64(len(frames)))
    249 	for _, frame := range frames {
    250 		w.rawUvarint(uint64(w.rawReloc(RelocString, w.p.StringIdx(frame))))
    251 	}
    252 }
    253 
    254 // Bool encodes and writes a bool value into the element bitstream,
    255 // and then returns the bool value.
    256 //
    257 // For simple, 2-alternative encodings, the idiomatic way to call Bool
    258 // is something like:
    259 //
    260 //	if w.Bool(x != 0) {
    261 //		// alternative #1
    262 //	} else {
    263 //		// alternative #2
    264 //	}
    265 //
    266 // For multi-alternative encodings, use Code instead.
    267 func (w *Encoder) Bool(b bool) bool {
    268 	w.Sync(SyncBool)
    269 	var x byte
    270 	if b {
    271 		x = 1
    272 	}
    273 	err := w.Data.WriteByte(x)
    274 	w.checkErr(err)
    275 	return b
    276 }
    277 
    278 // Int64 encodes and writes an int64 value into the element bitstream.
    279 func (w *Encoder) Int64(x int64) {
    280 	w.Sync(SyncInt64)
    281 	w.rawVarint(x)
    282 }
    283 
    284 // Uint64 encodes and writes a uint64 value into the element bitstream.
    285 func (w *Encoder) Uint64(x uint64) {
    286 	w.Sync(SyncUint64)
    287 	w.rawUvarint(x)
    288 }
    289 
    290 // Len encodes and writes a non-negative int value into the element bitstream.
    291 func (w *Encoder) Len(x int) { assert(x >= 0); w.Uint64(uint64(x)) }
    292 
    293 // Int encodes and writes an int value into the element bitstream.
    294 func (w *Encoder) Int(x int) { w.Int64(int64(x)) }
    295 
    296 // Uint encodes and writes a uint value into the element bitstream.
    297 func (w *Encoder) Uint(x uint) { w.Uint64(uint64(x)) }
    298 
    299 // Reloc encodes and writes a relocation for the given (section,
    300 // index) pair into the element bitstream.
    301 //
    302 // Note: Only the index is formally written into the element
    303 // bitstream, so bitstream decoders must know from context which
    304 // section an encoded relocation refers to.
    305 func (w *Encoder) Reloc(r RelocKind, idx Index) {
    306 	w.Sync(SyncUseReloc)
    307 	w.Len(w.rawReloc(r, idx))
    308 }
    309 
    310 // Code encodes and writes a Code value into the element bitstream.
    311 func (w *Encoder) Code(c Code) {
    312 	w.Sync(c.Marker())
    313 	w.Len(c.Value())
    314 }
    315 
    316 // String encodes and writes a string value into the element
    317 // bitstream.
    318 //
    319 // Internally, strings are deduplicated by adding them to the strings
    320 // section (if not already present), and then writing a relocation
    321 // into the element bitstream.
    322 func (w *Encoder) String(s string) {
    323 	w.Sync(SyncString)
    324 	w.Reloc(RelocString, w.p.StringIdx(s))
    325 }
    326 
    327 // Strings encodes and writes a variable-length slice of strings into
    328 // the element bitstream.
    329 func (w *Encoder) Strings(ss []string) {
    330 	w.Len(len(ss))
    331 	for _, s := range ss {
    332 		w.String(s)
    333 	}
    334 }
    335 
    336 // Value encodes and writes a constant.Value into the element
    337 // bitstream.
    338 func (w *Encoder) Value(val constant.Value) {
    339 	w.Sync(SyncValue)
    340 	if w.Bool(val.Kind() == constant.Complex) {
    341 		w.scalar(constant.Real(val))
    342 		w.scalar(constant.Imag(val))
    343 	} else {
    344 		w.scalar(val)
    345 	}
    346 }
    347 
    348 func (w *Encoder) scalar(val constant.Value) {
    349 	switch v := constant.Val(val).(type) {
    350 	default:
    351 		errorf("unhandled %v (%v)", val, val.Kind())
    352 	case bool:
    353 		w.Code(ValBool)
    354 		w.Bool(v)
    355 	case string:
    356 		w.Code(ValString)
    357 		w.String(v)
    358 	case int64:
    359 		w.Code(ValInt64)
    360 		w.Int64(v)
    361 	case *big.Int:
    362 		w.Code(ValBigInt)
    363 		w.bigInt(v)
    364 	case *big.Rat:
    365 		w.Code(ValBigRat)
    366 		w.bigInt(v.Num())
    367 		w.bigInt(v.Denom())
    368 	case *big.Float:
    369 		w.Code(ValBigFloat)
    370 		w.bigFloat(v)
    371 	}
    372 }
    373 
    374 func (w *Encoder) bigInt(v *big.Int) {
    375 	b := v.Bytes()
    376 	w.String(string(b)) // TODO: More efficient encoding.
    377 	w.Bool(v.Sign() < 0)
    378 }
    379 
    380 func (w *Encoder) bigFloat(v *big.Float) {
    381 	b := v.Append(nil, 'p', -1)
    382 	w.String(string(b)) // TODO: More efficient encoding.
    383 }