gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

lex.go (8799B)


      1 // Copyright 2013-2022 Frank Schroeder. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 //
      5 // Parts of the lexer are from the template/text/parser package
      6 // For these parts the following applies:
      7 //
      8 // Copyright 2011 The Go Authors. All rights reserved.
      9 // Use of this source code is governed by a BSD-style
     10 // license that can be found in the LICENSE file of the go 1.2
     11 // distribution.
     12 
     13 package properties
     14 
     15 import (
     16 	"fmt"
     17 	"strconv"
     18 	"strings"
     19 	"unicode/utf8"
     20 )
     21 
     22 // item represents a token or text string returned from the scanner.
     23 type item struct {
     24 	typ itemType // The type of this item.
     25 	pos int      // The starting position, in bytes, of this item in the input string.
     26 	val string   // The value of this item.
     27 }
     28 
     29 func (i item) String() string {
     30 	switch {
     31 	case i.typ == itemEOF:
     32 		return "EOF"
     33 	case i.typ == itemError:
     34 		return i.val
     35 	case len(i.val) > 10:
     36 		return fmt.Sprintf("%.10q...", i.val)
     37 	}
     38 	return fmt.Sprintf("%q", i.val)
     39 }
     40 
     41 // itemType identifies the type of lex items.
     42 type itemType int
     43 
     44 const (
     45 	itemError itemType = iota // error occurred; value is text of error
     46 	itemEOF
     47 	itemKey     // a key
     48 	itemValue   // a value
     49 	itemComment // a comment
     50 )
     51 
     52 // defines a constant for EOF
     53 const eof = -1
     54 
     55 // permitted whitespace characters space, FF and TAB
     56 const whitespace = " \f\t"
     57 
     58 // stateFn represents the state of the scanner as a function that returns the next state.
     59 type stateFn func(*lexer) stateFn
     60 
     61 // lexer holds the state of the scanner.
     62 type lexer struct {
     63 	input   string    // the string being scanned
     64 	state   stateFn   // the next lexing function to enter
     65 	pos     int       // current position in the input
     66 	start   int       // start position of this item
     67 	width   int       // width of last rune read from input
     68 	lastPos int       // position of most recent item returned by nextItem
     69 	runes   []rune    // scanned runes for this item
     70 	items   chan item // channel of scanned items
     71 }
     72 
     73 // next returns the next rune in the input.
     74 func (l *lexer) next() rune {
     75 	if l.pos >= len(l.input) {
     76 		l.width = 0
     77 		return eof
     78 	}
     79 	r, w := utf8.DecodeRuneInString(l.input[l.pos:])
     80 	l.width = w
     81 	l.pos += l.width
     82 	return r
     83 }
     84 
     85 // peek returns but does not consume the next rune in the input.
     86 func (l *lexer) peek() rune {
     87 	r := l.next()
     88 	l.backup()
     89 	return r
     90 }
     91 
     92 // backup steps back one rune. Can only be called once per call of next.
     93 func (l *lexer) backup() {
     94 	l.pos -= l.width
     95 }
     96 
     97 // emit passes an item back to the client.
     98 func (l *lexer) emit(t itemType) {
     99 	i := item{t, l.start, string(l.runes)}
    100 	l.items <- i
    101 	l.start = l.pos
    102 	l.runes = l.runes[:0]
    103 }
    104 
    105 // ignore skips over the pending input before this point.
    106 func (l *lexer) ignore() {
    107 	l.start = l.pos
    108 }
    109 
    110 // appends the rune to the current value
    111 func (l *lexer) appendRune(r rune) {
    112 	l.runes = append(l.runes, r)
    113 }
    114 
    115 // accept consumes the next rune if it's from the valid set.
    116 func (l *lexer) accept(valid string) bool {
    117 	if strings.ContainsRune(valid, l.next()) {
    118 		return true
    119 	}
    120 	l.backup()
    121 	return false
    122 }
    123 
    124 // acceptRun consumes a run of runes from the valid set.
    125 func (l *lexer) acceptRun(valid string) {
    126 	for strings.ContainsRune(valid, l.next()) {
    127 	}
    128 	l.backup()
    129 }
    130 
    131 // lineNumber reports which line we're on, based on the position of
    132 // the previous item returned by nextItem. Doing it this way
    133 // means we don't have to worry about peek double counting.
    134 func (l *lexer) lineNumber() int {
    135 	return 1 + strings.Count(l.input[:l.lastPos], "\n")
    136 }
    137 
    138 // errorf returns an error token and terminates the scan by passing
    139 // back a nil pointer that will be the next state, terminating l.nextItem.
    140 func (l *lexer) errorf(format string, args ...interface{}) stateFn {
    141 	l.items <- item{itemError, l.start, fmt.Sprintf(format, args...)}
    142 	return nil
    143 }
    144 
    145 // nextItem returns the next item from the input.
    146 func (l *lexer) nextItem() item {
    147 	i := <-l.items
    148 	l.lastPos = i.pos
    149 	return i
    150 }
    151 
    152 // lex creates a new scanner for the input string.
    153 func lex(input string) *lexer {
    154 	l := &lexer{
    155 		input: input,
    156 		items: make(chan item),
    157 		runes: make([]rune, 0, 32),
    158 	}
    159 	go l.run()
    160 	return l
    161 }
    162 
    163 // run runs the state machine for the lexer.
    164 func (l *lexer) run() {
    165 	for l.state = lexBeforeKey(l); l.state != nil; {
    166 		l.state = l.state(l)
    167 	}
    168 }
    169 
    170 // state functions
    171 
    172 // lexBeforeKey scans until a key begins.
    173 func lexBeforeKey(l *lexer) stateFn {
    174 	switch r := l.next(); {
    175 	case isEOF(r):
    176 		l.emit(itemEOF)
    177 		return nil
    178 
    179 	case isEOL(r):
    180 		l.ignore()
    181 		return lexBeforeKey
    182 
    183 	case isComment(r):
    184 		return lexComment
    185 
    186 	case isWhitespace(r):
    187 		l.ignore()
    188 		return lexBeforeKey
    189 
    190 	default:
    191 		l.backup()
    192 		return lexKey
    193 	}
    194 }
    195 
    196 // lexComment scans a comment line. The comment character has already been scanned.
    197 func lexComment(l *lexer) stateFn {
    198 	l.acceptRun(whitespace)
    199 	l.ignore()
    200 	for {
    201 		switch r := l.next(); {
    202 		case isEOF(r):
    203 			l.ignore()
    204 			l.emit(itemEOF)
    205 			return nil
    206 		case isEOL(r):
    207 			l.emit(itemComment)
    208 			return lexBeforeKey
    209 		default:
    210 			l.appendRune(r)
    211 		}
    212 	}
    213 }
    214 
    215 // lexKey scans the key up to a delimiter
    216 func lexKey(l *lexer) stateFn {
    217 	var r rune
    218 
    219 Loop:
    220 	for {
    221 		switch r = l.next(); {
    222 
    223 		case isEscape(r):
    224 			err := l.scanEscapeSequence()
    225 			if err != nil {
    226 				return l.errorf(err.Error())
    227 			}
    228 
    229 		case isEndOfKey(r):
    230 			l.backup()
    231 			break Loop
    232 
    233 		case isEOF(r):
    234 			break Loop
    235 
    236 		default:
    237 			l.appendRune(r)
    238 		}
    239 	}
    240 
    241 	if len(l.runes) > 0 {
    242 		l.emit(itemKey)
    243 	}
    244 
    245 	if isEOF(r) {
    246 		l.emit(itemEOF)
    247 		return nil
    248 	}
    249 
    250 	return lexBeforeValue
    251 }
    252 
    253 // lexBeforeValue scans the delimiter between key and value.
    254 // Leading and trailing whitespace is ignored.
    255 // We expect to be just after the key.
    256 func lexBeforeValue(l *lexer) stateFn {
    257 	l.acceptRun(whitespace)
    258 	l.accept(":=")
    259 	l.acceptRun(whitespace)
    260 	l.ignore()
    261 	return lexValue
    262 }
    263 
    264 // lexValue scans text until the end of the line. We expect to be just after the delimiter.
    265 func lexValue(l *lexer) stateFn {
    266 	for {
    267 		switch r := l.next(); {
    268 		case isEscape(r):
    269 			if isEOL(l.peek()) {
    270 				l.next()
    271 				l.acceptRun(whitespace)
    272 			} else {
    273 				err := l.scanEscapeSequence()
    274 				if err != nil {
    275 					return l.errorf(err.Error())
    276 				}
    277 			}
    278 
    279 		case isEOL(r):
    280 			l.emit(itemValue)
    281 			l.ignore()
    282 			return lexBeforeKey
    283 
    284 		case isEOF(r):
    285 			l.emit(itemValue)
    286 			l.emit(itemEOF)
    287 			return nil
    288 
    289 		default:
    290 			l.appendRune(r)
    291 		}
    292 	}
    293 }
    294 
    295 // scanEscapeSequence scans either one of the escaped characters
    296 // or a unicode literal. We expect to be after the escape character.
    297 func (l *lexer) scanEscapeSequence() error {
    298 	switch r := l.next(); {
    299 
    300 	case isEscapedCharacter(r):
    301 		l.appendRune(decodeEscapedCharacter(r))
    302 		return nil
    303 
    304 	case atUnicodeLiteral(r):
    305 		return l.scanUnicodeLiteral()
    306 
    307 	case isEOF(r):
    308 		return fmt.Errorf("premature EOF")
    309 
    310 	// silently drop the escape character and append the rune as is
    311 	default:
    312 		l.appendRune(r)
    313 		return nil
    314 	}
    315 }
    316 
    317 // scans a unicode literal in the form \uXXXX. We expect to be after the \u.
    318 func (l *lexer) scanUnicodeLiteral() error {
    319 	// scan the digits
    320 	d := make([]rune, 4)
    321 	for i := 0; i < 4; i++ {
    322 		d[i] = l.next()
    323 		if d[i] == eof || !strings.ContainsRune("0123456789abcdefABCDEF", d[i]) {
    324 			return fmt.Errorf("invalid unicode literal")
    325 		}
    326 	}
    327 
    328 	// decode the digits into a rune
    329 	r, err := strconv.ParseInt(string(d), 16, 0)
    330 	if err != nil {
    331 		return err
    332 	}
    333 
    334 	l.appendRune(rune(r))
    335 	return nil
    336 }
    337 
    338 // decodeEscapedCharacter returns the unescaped rune. We expect to be after the escape character.
    339 func decodeEscapedCharacter(r rune) rune {
    340 	switch r {
    341 	case 'f':
    342 		return '\f'
    343 	case 'n':
    344 		return '\n'
    345 	case 'r':
    346 		return '\r'
    347 	case 't':
    348 		return '\t'
    349 	default:
    350 		return r
    351 	}
    352 }
    353 
    354 // atUnicodeLiteral reports whether we are at a unicode literal.
    355 // The escape character has already been consumed.
    356 func atUnicodeLiteral(r rune) bool {
    357 	return r == 'u'
    358 }
    359 
    360 // isComment reports whether we are at the start of a comment.
    361 func isComment(r rune) bool {
    362 	return r == '#' || r == '!'
    363 }
    364 
    365 // isEndOfKey reports whether the rune terminates the current key.
    366 func isEndOfKey(r rune) bool {
    367 	return strings.ContainsRune(" \f\t\r\n:=", r)
    368 }
    369 
    370 // isEOF reports whether we are at EOF.
    371 func isEOF(r rune) bool {
    372 	return r == eof
    373 }
    374 
    375 // isEOL reports whether we are at a new line character.
    376 func isEOL(r rune) bool {
    377 	return r == '\n' || r == '\r'
    378 }
    379 
    380 // isEscape reports whether the rune is the escape character which
    381 // prefixes unicode literals and other escaped characters.
    382 func isEscape(r rune) bool {
    383 	return r == '\\'
    384 }
    385 
    386 // isEscapedCharacter reports whether we are at one of the characters that need escaping.
    387 // The escape character has already been consumed.
    388 func isEscapedCharacter(r rune) bool {
    389 	return strings.ContainsRune(" :=fnrt", r)
    390 }
    391 
    392 // isWhitespace reports whether the rune is a whitespace character.
    393 func isWhitespace(r rune) bool {
    394 	return strings.ContainsRune(whitespace, r)
    395 }