gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

lexer.go (3897B)


      1 package buffer
      2 
      3 import (
      4 	"io"
      5 	"io/ioutil"
      6 )
      7 
      8 var nullBuffer = []byte{0}
      9 
     10 // Lexer is a buffered reader that allows peeking forward and shifting, taking an io.Reader.
     11 // It keeps data in-memory until Free, taking a byte length, is called to move beyond the data.
     12 type Lexer struct {
     13 	buf   []byte
     14 	pos   int // index in buf
     15 	start int // index in buf
     16 	err   error
     17 
     18 	restore func()
     19 }
     20 
     21 // NewLexer returns a new Lexer for a given io.Reader, and uses ioutil.ReadAll to read it into a byte slice.
     22 // If the io.Reader implements Bytes, that is used instead.
     23 // It will append a NULL at the end of the buffer.
     24 func NewLexer(r io.Reader) *Lexer {
     25 	var b []byte
     26 	if r != nil {
     27 		if buffer, ok := r.(interface {
     28 			Bytes() []byte
     29 		}); ok {
     30 			b = buffer.Bytes()
     31 		} else {
     32 			var err error
     33 			b, err = ioutil.ReadAll(r)
     34 			if err != nil {
     35 				return &Lexer{
     36 					buf: nullBuffer,
     37 					err: err,
     38 				}
     39 			}
     40 		}
     41 	}
     42 	return NewLexerBytes(b)
     43 }
     44 
     45 // NewLexerBytes returns a new Lexer for a given byte slice, and appends NULL at the end.
     46 // To avoid reallocation, make sure the capacity has room for one more byte.
     47 func NewLexerBytes(b []byte) *Lexer {
     48 	z := &Lexer{
     49 		buf: b,
     50 	}
     51 
     52 	n := len(b)
     53 	if n == 0 {
     54 		z.buf = nullBuffer
     55 	} else {
     56 		// Append NULL to buffer, but try to avoid reallocation
     57 		if cap(b) > n {
     58 			// Overwrite next byte but restore when done
     59 			b = b[:n+1]
     60 			c := b[n]
     61 			b[n] = 0
     62 
     63 			z.buf = b
     64 			z.restore = func() {
     65 				b[n] = c
     66 			}
     67 		} else {
     68 			z.buf = append(b, 0)
     69 		}
     70 	}
     71 	return z
     72 }
     73 
     74 // Restore restores the replaced byte past the end of the buffer by NULL.
     75 func (z *Lexer) Restore() {
     76 	if z.restore != nil {
     77 		z.restore()
     78 		z.restore = nil
     79 	}
     80 }
     81 
     82 // Err returns the error returned from io.Reader or io.EOF when the end has been reached.
     83 func (z *Lexer) Err() error {
     84 	return z.PeekErr(0)
     85 }
     86 
     87 // PeekErr returns the error at position pos. When pos is zero, this is the same as calling Err().
     88 func (z *Lexer) PeekErr(pos int) error {
     89 	if z.err != nil {
     90 		return z.err
     91 	} else if z.pos+pos >= len(z.buf)-1 {
     92 		return io.EOF
     93 	}
     94 	return nil
     95 }
     96 
     97 // Peek returns the ith byte relative to the end position.
     98 // Peek returns 0 when an error has occurred, Err returns the error.
     99 func (z *Lexer) Peek(pos int) byte {
    100 	pos += z.pos
    101 	return z.buf[pos]
    102 }
    103 
    104 // PeekRune returns the rune and rune length of the ith byte relative to the end position.
    105 func (z *Lexer) PeekRune(pos int) (rune, int) {
    106 	// from unicode/utf8
    107 	c := z.Peek(pos)
    108 	if c < 0xC0 || z.Peek(pos+1) == 0 {
    109 		return rune(c), 1
    110 	} else if c < 0xE0 || z.Peek(pos+2) == 0 {
    111 		return rune(c&0x1F)<<6 | rune(z.Peek(pos+1)&0x3F), 2
    112 	} else if c < 0xF0 || z.Peek(pos+3) == 0 {
    113 		return rune(c&0x0F)<<12 | rune(z.Peek(pos+1)&0x3F)<<6 | rune(z.Peek(pos+2)&0x3F), 3
    114 	}
    115 	return rune(c&0x07)<<18 | rune(z.Peek(pos+1)&0x3F)<<12 | rune(z.Peek(pos+2)&0x3F)<<6 | rune(z.Peek(pos+3)&0x3F), 4
    116 }
    117 
    118 // Move advances the position.
    119 func (z *Lexer) Move(n int) {
    120 	z.pos += n
    121 }
    122 
    123 // Pos returns a mark to which can be rewinded.
    124 func (z *Lexer) Pos() int {
    125 	return z.pos - z.start
    126 }
    127 
    128 // Rewind rewinds the position to the given position.
    129 func (z *Lexer) Rewind(pos int) {
    130 	z.pos = z.start + pos
    131 }
    132 
    133 // Lexeme returns the bytes of the current selection.
    134 func (z *Lexer) Lexeme() []byte {
    135 	return z.buf[z.start:z.pos:z.pos]
    136 }
    137 
    138 // Skip collapses the position to the end of the selection.
    139 func (z *Lexer) Skip() {
    140 	z.start = z.pos
    141 }
    142 
    143 // Shift returns the bytes of the current selection and collapses the position to the end of the selection.
    144 func (z *Lexer) Shift() []byte {
    145 	b := z.buf[z.start:z.pos:z.pos]
    146 	z.start = z.pos
    147 	return b
    148 }
    149 
    150 // Offset returns the character position in the buffer.
    151 func (z *Lexer) Offset() int {
    152 	return z.pos
    153 }
    154 
    155 // Bytes returns the underlying buffer.
    156 func (z *Lexer) Bytes() []byte {
    157 	return z.buf[: len(z.buf)-1 : len(z.buf)-1]
    158 }
    159 
    160 // Reset resets position to the underlying buffer.
    161 func (z *Lexer) Reset() {
    162 	z.start = 0
    163 	z.pos = 0
    164 }