gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

parser.go (26946B)


      1 package unstable
      2 
      3 import (
      4 	"bytes"
      5 	"fmt"
      6 	"unicode"
      7 
      8 	"github.com/pelletier/go-toml/v2/internal/characters"
      9 	"github.com/pelletier/go-toml/v2/internal/danger"
     10 )
     11 
     12 // ParserError describes an error relative to the content of the document.
     13 //
     14 // It cannot outlive the instance of Parser it refers to, and may cause panics
     15 // if the parser is reset.
     16 type ParserError struct {
     17 	Highlight []byte
     18 	Message   string
     19 	Key       []string // optional
     20 }
     21 
     22 // Error is the implementation of the error interface.
     23 func (e *ParserError) Error() string {
     24 	return e.Message
     25 }
     26 
     27 // NewParserError is a convenience function to create a ParserError
     28 //
     29 // Warning: Highlight needs to be a subslice of Parser.data, so only slices
     30 // returned by Parser.Raw are valid candidates.
     31 func NewParserError(highlight []byte, format string, args ...interface{}) error {
     32 	return &ParserError{
     33 		Highlight: highlight,
     34 		Message:   fmt.Errorf(format, args...).Error(),
     35 	}
     36 }
     37 
     38 // Parser scans over a TOML-encoded document and generates an iterative AST.
     39 //
     40 // To prime the Parser, first reset it with the contents of a TOML document.
     41 // Then, process all top-level expressions sequentially. See Example.
     42 //
     43 // Don't forget to check Error() after you're done parsing.
     44 //
     45 // Each top-level expression needs to be fully processed before calling
     46 // NextExpression() again. Otherwise, calls to various Node methods may panic if
     47 // the parser has moved on the next expression.
     48 //
     49 // For performance reasons, go-toml doesn't make a copy of the input bytes to
     50 // the parser. Make sure to copy all the bytes you need to outlive the slice
     51 // given to the parser.
     52 type Parser struct {
     53 	data    []byte
     54 	builder builder
     55 	ref     reference
     56 	left    []byte
     57 	err     error
     58 	first   bool
     59 
     60 	KeepComments bool
     61 }
     62 
     63 // Data returns the slice provided to the last call to Reset.
     64 func (p *Parser) Data() []byte {
     65 	return p.data
     66 }
     67 
     68 // Range returns a range description that corresponds to a given slice of the
     69 // input. If the argument is not a subslice of the parser input, this function
     70 // panics.
     71 func (p *Parser) Range(b []byte) Range {
     72 	return Range{
     73 		Offset: uint32(danger.SubsliceOffset(p.data, b)),
     74 		Length: uint32(len(b)),
     75 	}
     76 }
     77 
     78 // Raw returns the slice corresponding to the bytes in the given range.
     79 func (p *Parser) Raw(raw Range) []byte {
     80 	return p.data[raw.Offset : raw.Offset+raw.Length]
     81 }
     82 
     83 // Reset brings the parser to its initial state for a given input. It wipes an
     84 // reuses internal storage to reduce allocation.
     85 func (p *Parser) Reset(b []byte) {
     86 	p.builder.Reset()
     87 	p.ref = invalidReference
     88 	p.data = b
     89 	p.left = b
     90 	p.err = nil
     91 	p.first = true
     92 }
     93 
     94 // NextExpression parses the next top-level expression. If an expression was
     95 // successfully parsed, it returns true. If the parser is at the end of the
     96 // document or an error occurred, it returns false.
     97 //
     98 // Retrieve the parsed expression with Expression().
     99 func (p *Parser) NextExpression() bool {
    100 	if len(p.left) == 0 || p.err != nil {
    101 		return false
    102 	}
    103 
    104 	p.builder.Reset()
    105 	p.ref = invalidReference
    106 
    107 	for {
    108 		if len(p.left) == 0 || p.err != nil {
    109 			return false
    110 		}
    111 
    112 		if !p.first {
    113 			p.left, p.err = p.parseNewline(p.left)
    114 		}
    115 
    116 		if len(p.left) == 0 || p.err != nil {
    117 			return false
    118 		}
    119 
    120 		p.ref, p.left, p.err = p.parseExpression(p.left)
    121 
    122 		if p.err != nil {
    123 			return false
    124 		}
    125 
    126 		p.first = false
    127 
    128 		if p.ref.Valid() {
    129 			return true
    130 		}
    131 	}
    132 }
    133 
    134 // Expression returns a pointer to the node representing the last successfully
    135 // parsed expression.
    136 func (p *Parser) Expression() *Node {
    137 	return p.builder.NodeAt(p.ref)
    138 }
    139 
    140 // Error returns any error that has occurred during parsing.
    141 func (p *Parser) Error() error {
    142 	return p.err
    143 }
    144 
    145 // Position describes a position in the input.
    146 type Position struct {
    147 	// Number of bytes from the beginning of the input.
    148 	Offset int
    149 	// Line number, starting at 1.
    150 	Line int
    151 	// Column number, starting at 1.
    152 	Column int
    153 }
    154 
    155 // Shape describes the position of a range in the input.
    156 type Shape struct {
    157 	Start Position
    158 	End   Position
    159 }
    160 
    161 func (p *Parser) position(b []byte) Position {
    162 	offset := danger.SubsliceOffset(p.data, b)
    163 
    164 	lead := p.data[:offset]
    165 
    166 	return Position{
    167 		Offset: offset,
    168 		Line:   bytes.Count(lead, []byte{'\n'}) + 1,
    169 		Column: len(lead) - bytes.LastIndex(lead, []byte{'\n'}),
    170 	}
    171 }
    172 
    173 // Shape returns the shape of the given range in the input.  Will
    174 // panic if the range is not a subslice of the input.
    175 func (p *Parser) Shape(r Range) Shape {
    176 	raw := p.Raw(r)
    177 	return Shape{
    178 		Start: p.position(raw),
    179 		End:   p.position(raw[r.Length:]),
    180 	}
    181 }
    182 
    183 func (p *Parser) parseNewline(b []byte) ([]byte, error) {
    184 	if b[0] == '\n' {
    185 		return b[1:], nil
    186 	}
    187 
    188 	if b[0] == '\r' {
    189 		_, rest, err := scanWindowsNewline(b)
    190 		return rest, err
    191 	}
    192 
    193 	return nil, NewParserError(b[0:1], "expected newline but got %#U", b[0])
    194 }
    195 
    196 func (p *Parser) parseComment(b []byte) (reference, []byte, error) {
    197 	ref := invalidReference
    198 	data, rest, err := scanComment(b)
    199 	if p.KeepComments && err == nil {
    200 		ref = p.builder.Push(Node{
    201 			Kind: Comment,
    202 			Raw:  p.Range(data),
    203 			Data: data,
    204 		})
    205 	}
    206 	return ref, rest, err
    207 }
    208 
    209 func (p *Parser) parseExpression(b []byte) (reference, []byte, error) {
    210 	// expression =  ws [ comment ]
    211 	// expression =/ ws keyval ws [ comment ]
    212 	// expression =/ ws table ws [ comment ]
    213 	ref := invalidReference
    214 
    215 	b = p.parseWhitespace(b)
    216 
    217 	if len(b) == 0 {
    218 		return ref, b, nil
    219 	}
    220 
    221 	if b[0] == '#' {
    222 		ref, rest, err := p.parseComment(b)
    223 		return ref, rest, err
    224 	}
    225 
    226 	if b[0] == '\n' || b[0] == '\r' {
    227 		return ref, b, nil
    228 	}
    229 
    230 	var err error
    231 	if b[0] == '[' {
    232 		ref, b, err = p.parseTable(b)
    233 	} else {
    234 		ref, b, err = p.parseKeyval(b)
    235 	}
    236 
    237 	if err != nil {
    238 		return ref, nil, err
    239 	}
    240 
    241 	b = p.parseWhitespace(b)
    242 
    243 	if len(b) > 0 && b[0] == '#' {
    244 		cref, rest, err := p.parseComment(b)
    245 		if cref != invalidReference {
    246 			p.builder.Chain(ref, cref)
    247 		}
    248 		return ref, rest, err
    249 	}
    250 
    251 	return ref, b, nil
    252 }
    253 
    254 func (p *Parser) parseTable(b []byte) (reference, []byte, error) {
    255 	// table = std-table / array-table
    256 	if len(b) > 1 && b[1] == '[' {
    257 		return p.parseArrayTable(b)
    258 	}
    259 
    260 	return p.parseStdTable(b)
    261 }
    262 
    263 func (p *Parser) parseArrayTable(b []byte) (reference, []byte, error) {
    264 	// array-table = array-table-open key array-table-close
    265 	// array-table-open  = %x5B.5B ws  ; [[ Double left square bracket
    266 	// array-table-close = ws %x5D.5D  ; ]] Double right square bracket
    267 	ref := p.builder.Push(Node{
    268 		Kind: ArrayTable,
    269 	})
    270 
    271 	b = b[2:]
    272 	b = p.parseWhitespace(b)
    273 
    274 	k, b, err := p.parseKey(b)
    275 	if err != nil {
    276 		return ref, nil, err
    277 	}
    278 
    279 	p.builder.AttachChild(ref, k)
    280 	b = p.parseWhitespace(b)
    281 
    282 	b, err = expect(']', b)
    283 	if err != nil {
    284 		return ref, nil, err
    285 	}
    286 
    287 	b, err = expect(']', b)
    288 
    289 	return ref, b, err
    290 }
    291 
    292 func (p *Parser) parseStdTable(b []byte) (reference, []byte, error) {
    293 	// std-table = std-table-open key std-table-close
    294 	// std-table-open  = %x5B ws     ; [ Left square bracket
    295 	// std-table-close = ws %x5D     ; ] Right square bracket
    296 	ref := p.builder.Push(Node{
    297 		Kind: Table,
    298 	})
    299 
    300 	b = b[1:]
    301 	b = p.parseWhitespace(b)
    302 
    303 	key, b, err := p.parseKey(b)
    304 	if err != nil {
    305 		return ref, nil, err
    306 	}
    307 
    308 	p.builder.AttachChild(ref, key)
    309 
    310 	b = p.parseWhitespace(b)
    311 
    312 	b, err = expect(']', b)
    313 
    314 	return ref, b, err
    315 }
    316 
    317 func (p *Parser) parseKeyval(b []byte) (reference, []byte, error) {
    318 	// keyval = key keyval-sep val
    319 	ref := p.builder.Push(Node{
    320 		Kind: KeyValue,
    321 	})
    322 
    323 	key, b, err := p.parseKey(b)
    324 	if err != nil {
    325 		return invalidReference, nil, err
    326 	}
    327 
    328 	// keyval-sep = ws %x3D ws ; =
    329 
    330 	b = p.parseWhitespace(b)
    331 
    332 	if len(b) == 0 {
    333 		return invalidReference, nil, NewParserError(b, "expected = after a key, but the document ends there")
    334 	}
    335 
    336 	b, err = expect('=', b)
    337 	if err != nil {
    338 		return invalidReference, nil, err
    339 	}
    340 
    341 	b = p.parseWhitespace(b)
    342 
    343 	valRef, b, err := p.parseVal(b)
    344 	if err != nil {
    345 		return ref, b, err
    346 	}
    347 
    348 	p.builder.Chain(valRef, key)
    349 	p.builder.AttachChild(ref, valRef)
    350 
    351 	return ref, b, err
    352 }
    353 
    354 //nolint:cyclop,funlen
    355 func (p *Parser) parseVal(b []byte) (reference, []byte, error) {
    356 	// val = string / boolean / array / inline-table / date-time / float / integer
    357 	ref := invalidReference
    358 
    359 	if len(b) == 0 {
    360 		return ref, nil, NewParserError(b, "expected value, not eof")
    361 	}
    362 
    363 	var err error
    364 	c := b[0]
    365 
    366 	switch c {
    367 	case '"':
    368 		var raw []byte
    369 		var v []byte
    370 		if scanFollowsMultilineBasicStringDelimiter(b) {
    371 			raw, v, b, err = p.parseMultilineBasicString(b)
    372 		} else {
    373 			raw, v, b, err = p.parseBasicString(b)
    374 		}
    375 
    376 		if err == nil {
    377 			ref = p.builder.Push(Node{
    378 				Kind: String,
    379 				Raw:  p.Range(raw),
    380 				Data: v,
    381 			})
    382 		}
    383 
    384 		return ref, b, err
    385 	case '\'':
    386 		var raw []byte
    387 		var v []byte
    388 		if scanFollowsMultilineLiteralStringDelimiter(b) {
    389 			raw, v, b, err = p.parseMultilineLiteralString(b)
    390 		} else {
    391 			raw, v, b, err = p.parseLiteralString(b)
    392 		}
    393 
    394 		if err == nil {
    395 			ref = p.builder.Push(Node{
    396 				Kind: String,
    397 				Raw:  p.Range(raw),
    398 				Data: v,
    399 			})
    400 		}
    401 
    402 		return ref, b, err
    403 	case 't':
    404 		if !scanFollowsTrue(b) {
    405 			return ref, nil, NewParserError(atmost(b, 4), "expected 'true'")
    406 		}
    407 
    408 		ref = p.builder.Push(Node{
    409 			Kind: Bool,
    410 			Data: b[:4],
    411 		})
    412 
    413 		return ref, b[4:], nil
    414 	case 'f':
    415 		if !scanFollowsFalse(b) {
    416 			return ref, nil, NewParserError(atmost(b, 5), "expected 'false'")
    417 		}
    418 
    419 		ref = p.builder.Push(Node{
    420 			Kind: Bool,
    421 			Data: b[:5],
    422 		})
    423 
    424 		return ref, b[5:], nil
    425 	case '[':
    426 		return p.parseValArray(b)
    427 	case '{':
    428 		return p.parseInlineTable(b)
    429 	default:
    430 		return p.parseIntOrFloatOrDateTime(b)
    431 	}
    432 }
    433 
    434 func atmost(b []byte, n int) []byte {
    435 	if n >= len(b) {
    436 		return b
    437 	}
    438 
    439 	return b[:n]
    440 }
    441 
    442 func (p *Parser) parseLiteralString(b []byte) ([]byte, []byte, []byte, error) {
    443 	v, rest, err := scanLiteralString(b)
    444 	if err != nil {
    445 		return nil, nil, nil, err
    446 	}
    447 
    448 	return v, v[1 : len(v)-1], rest, nil
    449 }
    450 
    451 func (p *Parser) parseInlineTable(b []byte) (reference, []byte, error) {
    452 	// inline-table = inline-table-open [ inline-table-keyvals ] inline-table-close
    453 	// inline-table-open  = %x7B ws     ; {
    454 	// inline-table-close = ws %x7D     ; }
    455 	// inline-table-sep   = ws %x2C ws  ; , Comma
    456 	// inline-table-keyvals = keyval [ inline-table-sep inline-table-keyvals ]
    457 	parent := p.builder.Push(Node{
    458 		Kind: InlineTable,
    459 		Raw:  p.Range(b[:1]),
    460 	})
    461 
    462 	first := true
    463 
    464 	var child reference
    465 
    466 	b = b[1:]
    467 
    468 	var err error
    469 
    470 	for len(b) > 0 {
    471 		previousB := b
    472 		b = p.parseWhitespace(b)
    473 
    474 		if len(b) == 0 {
    475 			return parent, nil, NewParserError(previousB[:1], "inline table is incomplete")
    476 		}
    477 
    478 		if b[0] == '}' {
    479 			break
    480 		}
    481 
    482 		if !first {
    483 			b, err = expect(',', b)
    484 			if err != nil {
    485 				return parent, nil, err
    486 			}
    487 			b = p.parseWhitespace(b)
    488 		}
    489 
    490 		var kv reference
    491 
    492 		kv, b, err = p.parseKeyval(b)
    493 		if err != nil {
    494 			return parent, nil, err
    495 		}
    496 
    497 		if first {
    498 			p.builder.AttachChild(parent, kv)
    499 		} else {
    500 			p.builder.Chain(child, kv)
    501 		}
    502 		child = kv
    503 
    504 		first = false
    505 	}
    506 
    507 	rest, err := expect('}', b)
    508 
    509 	return parent, rest, err
    510 }
    511 
    512 //nolint:funlen,cyclop
    513 func (p *Parser) parseValArray(b []byte) (reference, []byte, error) {
    514 	// array = array-open [ array-values ] ws-comment-newline array-close
    515 	// array-open =  %x5B ; [
    516 	// array-close = %x5D ; ]
    517 	// array-values =  ws-comment-newline val ws-comment-newline array-sep array-values
    518 	// array-values =/ ws-comment-newline val ws-comment-newline [ array-sep ]
    519 	// array-sep = %x2C  ; , Comma
    520 	// ws-comment-newline = *( wschar / [ comment ] newline )
    521 	arrayStart := b
    522 	b = b[1:]
    523 
    524 	parent := p.builder.Push(Node{
    525 		Kind: Array,
    526 	})
    527 
    528 	// First indicates whether the parser is looking for the first element
    529 	// (non-comment) of the array.
    530 	first := true
    531 
    532 	lastChild := invalidReference
    533 
    534 	addChild := func(valueRef reference) {
    535 		if lastChild == invalidReference {
    536 			p.builder.AttachChild(parent, valueRef)
    537 		} else {
    538 			p.builder.Chain(lastChild, valueRef)
    539 		}
    540 		lastChild = valueRef
    541 	}
    542 
    543 	var err error
    544 	for len(b) > 0 {
    545 		cref := invalidReference
    546 		cref, b, err = p.parseOptionalWhitespaceCommentNewline(b)
    547 		if err != nil {
    548 			return parent, nil, err
    549 		}
    550 
    551 		if cref != invalidReference {
    552 			addChild(cref)
    553 		}
    554 
    555 		if len(b) == 0 {
    556 			return parent, nil, NewParserError(arrayStart[:1], "array is incomplete")
    557 		}
    558 
    559 		if b[0] == ']' {
    560 			break
    561 		}
    562 
    563 		if b[0] == ',' {
    564 			if first {
    565 				return parent, nil, NewParserError(b[0:1], "array cannot start with comma")
    566 			}
    567 			b = b[1:]
    568 
    569 			cref, b, err = p.parseOptionalWhitespaceCommentNewline(b)
    570 			if err != nil {
    571 				return parent, nil, err
    572 			}
    573 			if cref != invalidReference {
    574 				addChild(cref)
    575 			}
    576 		} else if !first {
    577 			return parent, nil, NewParserError(b[0:1], "array elements must be separated by commas")
    578 		}
    579 
    580 		// TOML allows trailing commas in arrays.
    581 		if len(b) > 0 && b[0] == ']' {
    582 			break
    583 		}
    584 
    585 		var valueRef reference
    586 		valueRef, b, err = p.parseVal(b)
    587 		if err != nil {
    588 			return parent, nil, err
    589 		}
    590 
    591 		addChild(valueRef)
    592 
    593 		cref, b, err = p.parseOptionalWhitespaceCommentNewline(b)
    594 		if err != nil {
    595 			return parent, nil, err
    596 		}
    597 		if cref != invalidReference {
    598 			addChild(cref)
    599 		}
    600 
    601 		first = false
    602 	}
    603 
    604 	rest, err := expect(']', b)
    605 
    606 	return parent, rest, err
    607 }
    608 
    609 func (p *Parser) parseOptionalWhitespaceCommentNewline(b []byte) (reference, []byte, error) {
    610 	rootCommentRef := invalidReference
    611 	latestCommentRef := invalidReference
    612 
    613 	addComment := func(ref reference) {
    614 		if rootCommentRef == invalidReference {
    615 			rootCommentRef = ref
    616 		} else if latestCommentRef == invalidReference {
    617 			p.builder.AttachChild(rootCommentRef, ref)
    618 			latestCommentRef = ref
    619 		} else {
    620 			p.builder.Chain(latestCommentRef, ref)
    621 			latestCommentRef = ref
    622 		}
    623 	}
    624 
    625 	for len(b) > 0 {
    626 		var err error
    627 		b = p.parseWhitespace(b)
    628 
    629 		if len(b) > 0 && b[0] == '#' {
    630 			var ref reference
    631 			ref, b, err = p.parseComment(b)
    632 			if err != nil {
    633 				return invalidReference, nil, err
    634 			}
    635 			if ref != invalidReference {
    636 				addComment(ref)
    637 			}
    638 		}
    639 
    640 		if len(b) == 0 {
    641 			break
    642 		}
    643 
    644 		if b[0] == '\n' || b[0] == '\r' {
    645 			b, err = p.parseNewline(b)
    646 			if err != nil {
    647 				return invalidReference, nil, err
    648 			}
    649 		} else {
    650 			break
    651 		}
    652 	}
    653 
    654 	return rootCommentRef, b, nil
    655 }
    656 
    657 func (p *Parser) parseMultilineLiteralString(b []byte) ([]byte, []byte, []byte, error) {
    658 	token, rest, err := scanMultilineLiteralString(b)
    659 	if err != nil {
    660 		return nil, nil, nil, err
    661 	}
    662 
    663 	i := 3
    664 
    665 	// skip the immediate new line
    666 	if token[i] == '\n' {
    667 		i++
    668 	} else if token[i] == '\r' && token[i+1] == '\n' {
    669 		i += 2
    670 	}
    671 
    672 	return token, token[i : len(token)-3], rest, err
    673 }
    674 
    675 //nolint:funlen,gocognit,cyclop
    676 func (p *Parser) parseMultilineBasicString(b []byte) ([]byte, []byte, []byte, error) {
    677 	// ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body
    678 	// ml-basic-string-delim
    679 	// ml-basic-string-delim = 3quotation-mark
    680 	// ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ]
    681 	//
    682 	// mlb-content = mlb-char / newline / mlb-escaped-nl
    683 	// mlb-char = mlb-unescaped / escaped
    684 	// mlb-quotes = 1*2quotation-mark
    685 	// mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
    686 	// mlb-escaped-nl = escape ws newline *( wschar / newline )
    687 	token, escaped, rest, err := scanMultilineBasicString(b)
    688 	if err != nil {
    689 		return nil, nil, nil, err
    690 	}
    691 
    692 	i := 3
    693 
    694 	// skip the immediate new line
    695 	if token[i] == '\n' {
    696 		i++
    697 	} else if token[i] == '\r' && token[i+1] == '\n' {
    698 		i += 2
    699 	}
    700 
    701 	// fast path
    702 	startIdx := i
    703 	endIdx := len(token) - len(`"""`)
    704 
    705 	if !escaped {
    706 		str := token[startIdx:endIdx]
    707 		verr := characters.Utf8TomlValidAlreadyEscaped(str)
    708 		if verr.Zero() {
    709 			return token, str, rest, nil
    710 		}
    711 		return nil, nil, nil, NewParserError(str[verr.Index:verr.Index+verr.Size], "invalid UTF-8")
    712 	}
    713 
    714 	var builder bytes.Buffer
    715 
    716 	// The scanner ensures that the token starts and ends with quotes and that
    717 	// escapes are balanced.
    718 	for i < len(token)-3 {
    719 		c := token[i]
    720 
    721 		//nolint:nestif
    722 		if c == '\\' {
    723 			// When the last non-whitespace character on a line is an unescaped \,
    724 			// it will be trimmed along with all whitespace (including newlines) up
    725 			// to the next non-whitespace character or closing delimiter.
    726 
    727 			isLastNonWhitespaceOnLine := false
    728 			j := 1
    729 		findEOLLoop:
    730 			for ; j < len(token)-3-i; j++ {
    731 				switch token[i+j] {
    732 				case ' ', '\t':
    733 					continue
    734 				case '\r':
    735 					if token[i+j+1] == '\n' {
    736 						continue
    737 					}
    738 				case '\n':
    739 					isLastNonWhitespaceOnLine = true
    740 				}
    741 				break findEOLLoop
    742 			}
    743 			if isLastNonWhitespaceOnLine {
    744 				i += j
    745 				for ; i < len(token)-3; i++ {
    746 					c := token[i]
    747 					if !(c == '\n' || c == '\r' || c == ' ' || c == '\t') {
    748 						i--
    749 						break
    750 					}
    751 				}
    752 				i++
    753 				continue
    754 			}
    755 
    756 			// handle escaping
    757 			i++
    758 			c = token[i]
    759 
    760 			switch c {
    761 			case '"', '\\':
    762 				builder.WriteByte(c)
    763 			case 'b':
    764 				builder.WriteByte('\b')
    765 			case 'f':
    766 				builder.WriteByte('\f')
    767 			case 'n':
    768 				builder.WriteByte('\n')
    769 			case 'r':
    770 				builder.WriteByte('\r')
    771 			case 't':
    772 				builder.WriteByte('\t')
    773 			case 'e':
    774 				builder.WriteByte(0x1B)
    775 			case 'u':
    776 				x, err := hexToRune(atmost(token[i+1:], 4), 4)
    777 				if err != nil {
    778 					return nil, nil, nil, err
    779 				}
    780 				builder.WriteRune(x)
    781 				i += 4
    782 			case 'U':
    783 				x, err := hexToRune(atmost(token[i+1:], 8), 8)
    784 				if err != nil {
    785 					return nil, nil, nil, err
    786 				}
    787 
    788 				builder.WriteRune(x)
    789 				i += 8
    790 			default:
    791 				return nil, nil, nil, NewParserError(token[i:i+1], "invalid escaped character %#U", c)
    792 			}
    793 			i++
    794 		} else {
    795 			size := characters.Utf8ValidNext(token[i:])
    796 			if size == 0 {
    797 				return nil, nil, nil, NewParserError(token[i:i+1], "invalid character %#U", c)
    798 			}
    799 			builder.Write(token[i : i+size])
    800 			i += size
    801 		}
    802 	}
    803 
    804 	return token, builder.Bytes(), rest, nil
    805 }
    806 
    807 func (p *Parser) parseKey(b []byte) (reference, []byte, error) {
    808 	// key = simple-key / dotted-key
    809 	// simple-key = quoted-key / unquoted-key
    810 	//
    811 	// unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
    812 	// quoted-key = basic-string / literal-string
    813 	// dotted-key = simple-key 1*( dot-sep simple-key )
    814 	//
    815 	// dot-sep   = ws %x2E ws  ; . Period
    816 	raw, key, b, err := p.parseSimpleKey(b)
    817 	if err != nil {
    818 		return invalidReference, nil, err
    819 	}
    820 
    821 	ref := p.builder.Push(Node{
    822 		Kind: Key,
    823 		Raw:  p.Range(raw),
    824 		Data: key,
    825 	})
    826 
    827 	for {
    828 		b = p.parseWhitespace(b)
    829 		if len(b) > 0 && b[0] == '.' {
    830 			b = p.parseWhitespace(b[1:])
    831 
    832 			raw, key, b, err = p.parseSimpleKey(b)
    833 			if err != nil {
    834 				return ref, nil, err
    835 			}
    836 
    837 			p.builder.PushAndChain(Node{
    838 				Kind: Key,
    839 				Raw:  p.Range(raw),
    840 				Data: key,
    841 			})
    842 		} else {
    843 			break
    844 		}
    845 	}
    846 
    847 	return ref, b, nil
    848 }
    849 
    850 func (p *Parser) parseSimpleKey(b []byte) (raw, key, rest []byte, err error) {
    851 	if len(b) == 0 {
    852 		return nil, nil, nil, NewParserError(b, "expected key but found none")
    853 	}
    854 
    855 	// simple-key = quoted-key / unquoted-key
    856 	// unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
    857 	// quoted-key = basic-string / literal-string
    858 	switch {
    859 	case b[0] == '\'':
    860 		return p.parseLiteralString(b)
    861 	case b[0] == '"':
    862 		return p.parseBasicString(b)
    863 	case isUnquotedKeyChar(b[0]):
    864 		key, rest = scanUnquotedKey(b)
    865 		return key, key, rest, nil
    866 	default:
    867 		return nil, nil, nil, NewParserError(b[0:1], "invalid character at start of key: %c", b[0])
    868 	}
    869 }
    870 
    871 //nolint:funlen,cyclop
    872 func (p *Parser) parseBasicString(b []byte) ([]byte, []byte, []byte, error) {
    873 	// basic-string = quotation-mark *basic-char quotation-mark
    874 	// quotation-mark = %x22            ; "
    875 	// basic-char = basic-unescaped / escaped
    876 	// basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
    877 	// escaped = escape escape-seq-char
    878 	// escape-seq-char =  %x22         ; "    quotation mark  U+0022
    879 	// escape-seq-char =/ %x5C         ; \    reverse solidus U+005C
    880 	// escape-seq-char =/ %x62         ; b    backspace       U+0008
    881 	// escape-seq-char =/ %x66         ; f    form feed       U+000C
    882 	// escape-seq-char =/ %x6E         ; n    line feed       U+000A
    883 	// escape-seq-char =/ %x72         ; r    carriage return U+000D
    884 	// escape-seq-char =/ %x74         ; t    tab             U+0009
    885 	// escape-seq-char =/ %x75 4HEXDIG ; uXXXX                U+XXXX
    886 	// escape-seq-char =/ %x55 8HEXDIG ; UXXXXXXXX            U+XXXXXXXX
    887 	token, escaped, rest, err := scanBasicString(b)
    888 	if err != nil {
    889 		return nil, nil, nil, err
    890 	}
    891 
    892 	startIdx := len(`"`)
    893 	endIdx := len(token) - len(`"`)
    894 
    895 	// Fast path. If there is no escape sequence, the string should just be
    896 	// an UTF-8 encoded string, which is the same as Go. In that case,
    897 	// validate the string and return a direct reference to the buffer.
    898 	if !escaped {
    899 		str := token[startIdx:endIdx]
    900 		verr := characters.Utf8TomlValidAlreadyEscaped(str)
    901 		if verr.Zero() {
    902 			return token, str, rest, nil
    903 		}
    904 		return nil, nil, nil, NewParserError(str[verr.Index:verr.Index+verr.Size], "invalid UTF-8")
    905 	}
    906 
    907 	i := startIdx
    908 
    909 	var builder bytes.Buffer
    910 
    911 	// The scanner ensures that the token starts and ends with quotes and that
    912 	// escapes are balanced.
    913 	for i < len(token)-1 {
    914 		c := token[i]
    915 		if c == '\\' {
    916 			i++
    917 			c = token[i]
    918 
    919 			switch c {
    920 			case '"', '\\':
    921 				builder.WriteByte(c)
    922 			case 'b':
    923 				builder.WriteByte('\b')
    924 			case 'f':
    925 				builder.WriteByte('\f')
    926 			case 'n':
    927 				builder.WriteByte('\n')
    928 			case 'r':
    929 				builder.WriteByte('\r')
    930 			case 't':
    931 				builder.WriteByte('\t')
    932 			case 'e':
    933 				builder.WriteByte(0x1B)
    934 			case 'u':
    935 				x, err := hexToRune(token[i+1:len(token)-1], 4)
    936 				if err != nil {
    937 					return nil, nil, nil, err
    938 				}
    939 
    940 				builder.WriteRune(x)
    941 				i += 4
    942 			case 'U':
    943 				x, err := hexToRune(token[i+1:len(token)-1], 8)
    944 				if err != nil {
    945 					return nil, nil, nil, err
    946 				}
    947 
    948 				builder.WriteRune(x)
    949 				i += 8
    950 			default:
    951 				return nil, nil, nil, NewParserError(token[i:i+1], "invalid escaped character %#U", c)
    952 			}
    953 			i++
    954 		} else {
    955 			size := characters.Utf8ValidNext(token[i:])
    956 			if size == 0 {
    957 				return nil, nil, nil, NewParserError(token[i:i+1], "invalid character %#U", c)
    958 			}
    959 			builder.Write(token[i : i+size])
    960 			i += size
    961 		}
    962 	}
    963 
    964 	return token, builder.Bytes(), rest, nil
    965 }
    966 
    967 func hexToRune(b []byte, length int) (rune, error) {
    968 	if len(b) < length {
    969 		return -1, NewParserError(b, "unicode point needs %d character, not %d", length, len(b))
    970 	}
    971 	b = b[:length]
    972 
    973 	var r uint32
    974 	for i, c := range b {
    975 		d := uint32(0)
    976 		switch {
    977 		case '0' <= c && c <= '9':
    978 			d = uint32(c - '0')
    979 		case 'a' <= c && c <= 'f':
    980 			d = uint32(c - 'a' + 10)
    981 		case 'A' <= c && c <= 'F':
    982 			d = uint32(c - 'A' + 10)
    983 		default:
    984 			return -1, NewParserError(b[i:i+1], "non-hex character")
    985 		}
    986 		r = r*16 + d
    987 	}
    988 
    989 	if r > unicode.MaxRune || 0xD800 <= r && r < 0xE000 {
    990 		return -1, NewParserError(b, "escape sequence is invalid Unicode code point")
    991 	}
    992 
    993 	return rune(r), nil
    994 }
    995 
    996 func (p *Parser) parseWhitespace(b []byte) []byte {
    997 	// ws = *wschar
    998 	// wschar =  %x20  ; Space
    999 	// wschar =/ %x09  ; Horizontal tab
   1000 	_, rest := scanWhitespace(b)
   1001 
   1002 	return rest
   1003 }
   1004 
   1005 //nolint:cyclop
   1006 func (p *Parser) parseIntOrFloatOrDateTime(b []byte) (reference, []byte, error) {
   1007 	switch b[0] {
   1008 	case 'i':
   1009 		if !scanFollowsInf(b) {
   1010 			return invalidReference, nil, NewParserError(atmost(b, 3), "expected 'inf'")
   1011 		}
   1012 
   1013 		return p.builder.Push(Node{
   1014 			Kind: Float,
   1015 			Data: b[:3],
   1016 		}), b[3:], nil
   1017 	case 'n':
   1018 		if !scanFollowsNan(b) {
   1019 			return invalidReference, nil, NewParserError(atmost(b, 3), "expected 'nan'")
   1020 		}
   1021 
   1022 		return p.builder.Push(Node{
   1023 			Kind: Float,
   1024 			Data: b[:3],
   1025 		}), b[3:], nil
   1026 	case '+', '-':
   1027 		return p.scanIntOrFloat(b)
   1028 	}
   1029 
   1030 	if len(b) < 3 {
   1031 		return p.scanIntOrFloat(b)
   1032 	}
   1033 
   1034 	s := 5
   1035 	if len(b) < s {
   1036 		s = len(b)
   1037 	}
   1038 
   1039 	for idx, c := range b[:s] {
   1040 		if isDigit(c) {
   1041 			continue
   1042 		}
   1043 
   1044 		if idx == 2 && c == ':' || (idx == 4 && c == '-') {
   1045 			return p.scanDateTime(b)
   1046 		}
   1047 
   1048 		break
   1049 	}
   1050 
   1051 	return p.scanIntOrFloat(b)
   1052 }
   1053 
   1054 func (p *Parser) scanDateTime(b []byte) (reference, []byte, error) {
   1055 	// scans for contiguous characters in [0-9T:Z.+-], and up to one space if
   1056 	// followed by a digit.
   1057 	hasDate := false
   1058 	hasTime := false
   1059 	hasTz := false
   1060 	seenSpace := false
   1061 
   1062 	i := 0
   1063 byteLoop:
   1064 	for ; i < len(b); i++ {
   1065 		c := b[i]
   1066 
   1067 		switch {
   1068 		case isDigit(c):
   1069 		case c == '-':
   1070 			hasDate = true
   1071 			const minOffsetOfTz = 8
   1072 			if i >= minOffsetOfTz {
   1073 				hasTz = true
   1074 			}
   1075 		case c == 'T' || c == 't' || c == ':' || c == '.':
   1076 			hasTime = true
   1077 		case c == '+' || c == '-' || c == 'Z' || c == 'z':
   1078 			hasTz = true
   1079 		case c == ' ':
   1080 			if !seenSpace && i+1 < len(b) && isDigit(b[i+1]) {
   1081 				i += 2
   1082 				// Avoid reaching past the end of the document in case the time
   1083 				// is malformed. See TestIssue585.
   1084 				if i >= len(b) {
   1085 					i--
   1086 				}
   1087 				seenSpace = true
   1088 				hasTime = true
   1089 			} else {
   1090 				break byteLoop
   1091 			}
   1092 		default:
   1093 			break byteLoop
   1094 		}
   1095 	}
   1096 
   1097 	var kind Kind
   1098 
   1099 	if hasTime {
   1100 		if hasDate {
   1101 			if hasTz {
   1102 				kind = DateTime
   1103 			} else {
   1104 				kind = LocalDateTime
   1105 			}
   1106 		} else {
   1107 			kind = LocalTime
   1108 		}
   1109 	} else {
   1110 		kind = LocalDate
   1111 	}
   1112 
   1113 	return p.builder.Push(Node{
   1114 		Kind: kind,
   1115 		Data: b[:i],
   1116 	}), b[i:], nil
   1117 }
   1118 
   1119 //nolint:funlen,gocognit,cyclop
   1120 func (p *Parser) scanIntOrFloat(b []byte) (reference, []byte, error) {
   1121 	i := 0
   1122 
   1123 	if len(b) > 2 && b[0] == '0' && b[1] != '.' && b[1] != 'e' && b[1] != 'E' {
   1124 		var isValidRune validRuneFn
   1125 
   1126 		switch b[1] {
   1127 		case 'x':
   1128 			isValidRune = isValidHexRune
   1129 		case 'o':
   1130 			isValidRune = isValidOctalRune
   1131 		case 'b':
   1132 			isValidRune = isValidBinaryRune
   1133 		default:
   1134 			i++
   1135 		}
   1136 
   1137 		if isValidRune != nil {
   1138 			i += 2
   1139 			for ; i < len(b); i++ {
   1140 				if !isValidRune(b[i]) {
   1141 					break
   1142 				}
   1143 			}
   1144 		}
   1145 
   1146 		return p.builder.Push(Node{
   1147 			Kind: Integer,
   1148 			Data: b[:i],
   1149 		}), b[i:], nil
   1150 	}
   1151 
   1152 	isFloat := false
   1153 
   1154 	for ; i < len(b); i++ {
   1155 		c := b[i]
   1156 
   1157 		if c >= '0' && c <= '9' || c == '+' || c == '-' || c == '_' {
   1158 			continue
   1159 		}
   1160 
   1161 		if c == '.' || c == 'e' || c == 'E' {
   1162 			isFloat = true
   1163 
   1164 			continue
   1165 		}
   1166 
   1167 		if c == 'i' {
   1168 			if scanFollowsInf(b[i:]) {
   1169 				return p.builder.Push(Node{
   1170 					Kind: Float,
   1171 					Data: b[:i+3],
   1172 				}), b[i+3:], nil
   1173 			}
   1174 
   1175 			return invalidReference, nil, NewParserError(b[i:i+1], "unexpected character 'i' while scanning for a number")
   1176 		}
   1177 
   1178 		if c == 'n' {
   1179 			if scanFollowsNan(b[i:]) {
   1180 				return p.builder.Push(Node{
   1181 					Kind: Float,
   1182 					Data: b[:i+3],
   1183 				}), b[i+3:], nil
   1184 			}
   1185 
   1186 			return invalidReference, nil, NewParserError(b[i:i+1], "unexpected character 'n' while scanning for a number")
   1187 		}
   1188 
   1189 		break
   1190 	}
   1191 
   1192 	if i == 0 {
   1193 		return invalidReference, b, NewParserError(b, "incomplete number")
   1194 	}
   1195 
   1196 	kind := Integer
   1197 
   1198 	if isFloat {
   1199 		kind = Float
   1200 	}
   1201 
   1202 	return p.builder.Push(Node{
   1203 		Kind: kind,
   1204 		Data: b[:i],
   1205 	}), b[i:], nil
   1206 }
   1207 
   1208 func isDigit(r byte) bool {
   1209 	return r >= '0' && r <= '9'
   1210 }
   1211 
   1212 type validRuneFn func(r byte) bool
   1213 
   1214 func isValidHexRune(r byte) bool {
   1215 	return r >= 'a' && r <= 'f' ||
   1216 		r >= 'A' && r <= 'F' ||
   1217 		r >= '0' && r <= '9' ||
   1218 		r == '_'
   1219 }
   1220 
   1221 func isValidOctalRune(r byte) bool {
   1222 	return r >= '0' && r <= '7' || r == '_'
   1223 }
   1224 
   1225 func isValidBinaryRune(r byte) bool {
   1226 	return r == '0' || r == '1' || r == '_'
   1227 }
   1228 
   1229 func expect(x byte, b []byte) ([]byte, error) {
   1230 	if len(b) == 0 {
   1231 		return nil, NewParserError(b, "expected character %c but the document ended here", x)
   1232 	}
   1233 
   1234 	if b[0] != x {
   1235 		return nil, NewParserError(b[0:1], "expected character %c", x)
   1236 	}
   1237 
   1238 	return b[1:], nil
   1239 }