lexer.go (3897B)
1 package buffer 2 3 import ( 4 "io" 5 "io/ioutil" 6 ) 7 8 var nullBuffer = []byte{0} 9 10 // Lexer is a buffered reader that allows peeking forward and shifting, taking an io.Reader. 11 // It keeps data in-memory until Free, taking a byte length, is called to move beyond the data. 12 type Lexer struct { 13 buf []byte 14 pos int // index in buf 15 start int // index in buf 16 err error 17 18 restore func() 19 } 20 21 // NewLexer returns a new Lexer for a given io.Reader, and uses ioutil.ReadAll to read it into a byte slice. 22 // If the io.Reader implements Bytes, that is used instead. 23 // It will append a NULL at the end of the buffer. 24 func NewLexer(r io.Reader) *Lexer { 25 var b []byte 26 if r != nil { 27 if buffer, ok := r.(interface { 28 Bytes() []byte 29 }); ok { 30 b = buffer.Bytes() 31 } else { 32 var err error 33 b, err = ioutil.ReadAll(r) 34 if err != nil { 35 return &Lexer{ 36 buf: nullBuffer, 37 err: err, 38 } 39 } 40 } 41 } 42 return NewLexerBytes(b) 43 } 44 45 // NewLexerBytes returns a new Lexer for a given byte slice, and appends NULL at the end. 46 // To avoid reallocation, make sure the capacity has room for one more byte. 47 func NewLexerBytes(b []byte) *Lexer { 48 z := &Lexer{ 49 buf: b, 50 } 51 52 n := len(b) 53 if n == 0 { 54 z.buf = nullBuffer 55 } else { 56 // Append NULL to buffer, but try to avoid reallocation 57 if cap(b) > n { 58 // Overwrite next byte but restore when done 59 b = b[:n+1] 60 c := b[n] 61 b[n] = 0 62 63 z.buf = b 64 z.restore = func() { 65 b[n] = c 66 } 67 } else { 68 z.buf = append(b, 0) 69 } 70 } 71 return z 72 } 73 74 // Restore restores the replaced byte past the end of the buffer by NULL. 75 func (z *Lexer) Restore() { 76 if z.restore != nil { 77 z.restore() 78 z.restore = nil 79 } 80 } 81 82 // Err returns the error returned from io.Reader or io.EOF when the end has been reached. 83 func (z *Lexer) Err() error { 84 return z.PeekErr(0) 85 } 86 87 // PeekErr returns the error at position pos. When pos is zero, this is the same as calling Err(). 88 func (z *Lexer) PeekErr(pos int) error { 89 if z.err != nil { 90 return z.err 91 } else if z.pos+pos >= len(z.buf)-1 { 92 return io.EOF 93 } 94 return nil 95 } 96 97 // Peek returns the ith byte relative to the end position. 98 // Peek returns 0 when an error has occurred, Err returns the error. 99 func (z *Lexer) Peek(pos int) byte { 100 pos += z.pos 101 return z.buf[pos] 102 } 103 104 // PeekRune returns the rune and rune length of the ith byte relative to the end position. 105 func (z *Lexer) PeekRune(pos int) (rune, int) { 106 // from unicode/utf8 107 c := z.Peek(pos) 108 if c < 0xC0 || z.Peek(pos+1) == 0 { 109 return rune(c), 1 110 } else if c < 0xE0 || z.Peek(pos+2) == 0 { 111 return rune(c&0x1F)<<6 | rune(z.Peek(pos+1)&0x3F), 2 112 } else if c < 0xF0 || z.Peek(pos+3) == 0 { 113 return rune(c&0x0F)<<12 | rune(z.Peek(pos+1)&0x3F)<<6 | rune(z.Peek(pos+2)&0x3F), 3 114 } 115 return rune(c&0x07)<<18 | rune(z.Peek(pos+1)&0x3F)<<12 | rune(z.Peek(pos+2)&0x3F)<<6 | rune(z.Peek(pos+3)&0x3F), 4 116 } 117 118 // Move advances the position. 119 func (z *Lexer) Move(n int) { 120 z.pos += n 121 } 122 123 // Pos returns a mark to which can be rewinded. 124 func (z *Lexer) Pos() int { 125 return z.pos - z.start 126 } 127 128 // Rewind rewinds the position to the given position. 129 func (z *Lexer) Rewind(pos int) { 130 z.pos = z.start + pos 131 } 132 133 // Lexeme returns the bytes of the current selection. 134 func (z *Lexer) Lexeme() []byte { 135 return z.buf[z.start:z.pos:z.pos] 136 } 137 138 // Skip collapses the position to the end of the selection. 139 func (z *Lexer) Skip() { 140 z.start = z.pos 141 } 142 143 // Shift returns the bytes of the current selection and collapses the position to the end of the selection. 144 func (z *Lexer) Shift() []byte { 145 b := z.buf[z.start:z.pos:z.pos] 146 z.start = z.pos 147 return b 148 } 149 150 // Offset returns the character position in the buffer. 151 func (z *Lexer) Offset() int { 152 return z.pos 153 } 154 155 // Bytes returns the underlying buffer. 156 func (z *Lexer) Bytes() []byte { 157 return z.buf[: len(z.buf)-1 : len(z.buf)-1] 158 } 159 160 // Reset resets position to the underlying buffer. 161 func (z *Lexer) Reset() { 162 z.start = 0 163 z.pos = 0 164 }