parser.go (26946B)
1 package unstable 2 3 import ( 4 "bytes" 5 "fmt" 6 "unicode" 7 8 "github.com/pelletier/go-toml/v2/internal/characters" 9 "github.com/pelletier/go-toml/v2/internal/danger" 10 ) 11 12 // ParserError describes an error relative to the content of the document. 13 // 14 // It cannot outlive the instance of Parser it refers to, and may cause panics 15 // if the parser is reset. 16 type ParserError struct { 17 Highlight []byte 18 Message string 19 Key []string // optional 20 } 21 22 // Error is the implementation of the error interface. 23 func (e *ParserError) Error() string { 24 return e.Message 25 } 26 27 // NewParserError is a convenience function to create a ParserError 28 // 29 // Warning: Highlight needs to be a subslice of Parser.data, so only slices 30 // returned by Parser.Raw are valid candidates. 31 func NewParserError(highlight []byte, format string, args ...interface{}) error { 32 return &ParserError{ 33 Highlight: highlight, 34 Message: fmt.Errorf(format, args...).Error(), 35 } 36 } 37 38 // Parser scans over a TOML-encoded document and generates an iterative AST. 39 // 40 // To prime the Parser, first reset it with the contents of a TOML document. 41 // Then, process all top-level expressions sequentially. See Example. 42 // 43 // Don't forget to check Error() after you're done parsing. 44 // 45 // Each top-level expression needs to be fully processed before calling 46 // NextExpression() again. Otherwise, calls to various Node methods may panic if 47 // the parser has moved on the next expression. 48 // 49 // For performance reasons, go-toml doesn't make a copy of the input bytes to 50 // the parser. Make sure to copy all the bytes you need to outlive the slice 51 // given to the parser. 52 type Parser struct { 53 data []byte 54 builder builder 55 ref reference 56 left []byte 57 err error 58 first bool 59 60 KeepComments bool 61 } 62 63 // Data returns the slice provided to the last call to Reset. 64 func (p *Parser) Data() []byte { 65 return p.data 66 } 67 68 // Range returns a range description that corresponds to a given slice of the 69 // input. If the argument is not a subslice of the parser input, this function 70 // panics. 71 func (p *Parser) Range(b []byte) Range { 72 return Range{ 73 Offset: uint32(danger.SubsliceOffset(p.data, b)), 74 Length: uint32(len(b)), 75 } 76 } 77 78 // Raw returns the slice corresponding to the bytes in the given range. 79 func (p *Parser) Raw(raw Range) []byte { 80 return p.data[raw.Offset : raw.Offset+raw.Length] 81 } 82 83 // Reset brings the parser to its initial state for a given input. It wipes an 84 // reuses internal storage to reduce allocation. 85 func (p *Parser) Reset(b []byte) { 86 p.builder.Reset() 87 p.ref = invalidReference 88 p.data = b 89 p.left = b 90 p.err = nil 91 p.first = true 92 } 93 94 // NextExpression parses the next top-level expression. If an expression was 95 // successfully parsed, it returns true. If the parser is at the end of the 96 // document or an error occurred, it returns false. 97 // 98 // Retrieve the parsed expression with Expression(). 99 func (p *Parser) NextExpression() bool { 100 if len(p.left) == 0 || p.err != nil { 101 return false 102 } 103 104 p.builder.Reset() 105 p.ref = invalidReference 106 107 for { 108 if len(p.left) == 0 || p.err != nil { 109 return false 110 } 111 112 if !p.first { 113 p.left, p.err = p.parseNewline(p.left) 114 } 115 116 if len(p.left) == 0 || p.err != nil { 117 return false 118 } 119 120 p.ref, p.left, p.err = p.parseExpression(p.left) 121 122 if p.err != nil { 123 return false 124 } 125 126 p.first = false 127 128 if p.ref.Valid() { 129 return true 130 } 131 } 132 } 133 134 // Expression returns a pointer to the node representing the last successfully 135 // parsed expression. 136 func (p *Parser) Expression() *Node { 137 return p.builder.NodeAt(p.ref) 138 } 139 140 // Error returns any error that has occurred during parsing. 141 func (p *Parser) Error() error { 142 return p.err 143 } 144 145 // Position describes a position in the input. 146 type Position struct { 147 // Number of bytes from the beginning of the input. 148 Offset int 149 // Line number, starting at 1. 150 Line int 151 // Column number, starting at 1. 152 Column int 153 } 154 155 // Shape describes the position of a range in the input. 156 type Shape struct { 157 Start Position 158 End Position 159 } 160 161 func (p *Parser) position(b []byte) Position { 162 offset := danger.SubsliceOffset(p.data, b) 163 164 lead := p.data[:offset] 165 166 return Position{ 167 Offset: offset, 168 Line: bytes.Count(lead, []byte{'\n'}) + 1, 169 Column: len(lead) - bytes.LastIndex(lead, []byte{'\n'}), 170 } 171 } 172 173 // Shape returns the shape of the given range in the input. Will 174 // panic if the range is not a subslice of the input. 175 func (p *Parser) Shape(r Range) Shape { 176 raw := p.Raw(r) 177 return Shape{ 178 Start: p.position(raw), 179 End: p.position(raw[r.Length:]), 180 } 181 } 182 183 func (p *Parser) parseNewline(b []byte) ([]byte, error) { 184 if b[0] == '\n' { 185 return b[1:], nil 186 } 187 188 if b[0] == '\r' { 189 _, rest, err := scanWindowsNewline(b) 190 return rest, err 191 } 192 193 return nil, NewParserError(b[0:1], "expected newline but got %#U", b[0]) 194 } 195 196 func (p *Parser) parseComment(b []byte) (reference, []byte, error) { 197 ref := invalidReference 198 data, rest, err := scanComment(b) 199 if p.KeepComments && err == nil { 200 ref = p.builder.Push(Node{ 201 Kind: Comment, 202 Raw: p.Range(data), 203 Data: data, 204 }) 205 } 206 return ref, rest, err 207 } 208 209 func (p *Parser) parseExpression(b []byte) (reference, []byte, error) { 210 // expression = ws [ comment ] 211 // expression =/ ws keyval ws [ comment ] 212 // expression =/ ws table ws [ comment ] 213 ref := invalidReference 214 215 b = p.parseWhitespace(b) 216 217 if len(b) == 0 { 218 return ref, b, nil 219 } 220 221 if b[0] == '#' { 222 ref, rest, err := p.parseComment(b) 223 return ref, rest, err 224 } 225 226 if b[0] == '\n' || b[0] == '\r' { 227 return ref, b, nil 228 } 229 230 var err error 231 if b[0] == '[' { 232 ref, b, err = p.parseTable(b) 233 } else { 234 ref, b, err = p.parseKeyval(b) 235 } 236 237 if err != nil { 238 return ref, nil, err 239 } 240 241 b = p.parseWhitespace(b) 242 243 if len(b) > 0 && b[0] == '#' { 244 cref, rest, err := p.parseComment(b) 245 if cref != invalidReference { 246 p.builder.Chain(ref, cref) 247 } 248 return ref, rest, err 249 } 250 251 return ref, b, nil 252 } 253 254 func (p *Parser) parseTable(b []byte) (reference, []byte, error) { 255 // table = std-table / array-table 256 if len(b) > 1 && b[1] == '[' { 257 return p.parseArrayTable(b) 258 } 259 260 return p.parseStdTable(b) 261 } 262 263 func (p *Parser) parseArrayTable(b []byte) (reference, []byte, error) { 264 // array-table = array-table-open key array-table-close 265 // array-table-open = %x5B.5B ws ; [[ Double left square bracket 266 // array-table-close = ws %x5D.5D ; ]] Double right square bracket 267 ref := p.builder.Push(Node{ 268 Kind: ArrayTable, 269 }) 270 271 b = b[2:] 272 b = p.parseWhitespace(b) 273 274 k, b, err := p.parseKey(b) 275 if err != nil { 276 return ref, nil, err 277 } 278 279 p.builder.AttachChild(ref, k) 280 b = p.parseWhitespace(b) 281 282 b, err = expect(']', b) 283 if err != nil { 284 return ref, nil, err 285 } 286 287 b, err = expect(']', b) 288 289 return ref, b, err 290 } 291 292 func (p *Parser) parseStdTable(b []byte) (reference, []byte, error) { 293 // std-table = std-table-open key std-table-close 294 // std-table-open = %x5B ws ; [ Left square bracket 295 // std-table-close = ws %x5D ; ] Right square bracket 296 ref := p.builder.Push(Node{ 297 Kind: Table, 298 }) 299 300 b = b[1:] 301 b = p.parseWhitespace(b) 302 303 key, b, err := p.parseKey(b) 304 if err != nil { 305 return ref, nil, err 306 } 307 308 p.builder.AttachChild(ref, key) 309 310 b = p.parseWhitespace(b) 311 312 b, err = expect(']', b) 313 314 return ref, b, err 315 } 316 317 func (p *Parser) parseKeyval(b []byte) (reference, []byte, error) { 318 // keyval = key keyval-sep val 319 ref := p.builder.Push(Node{ 320 Kind: KeyValue, 321 }) 322 323 key, b, err := p.parseKey(b) 324 if err != nil { 325 return invalidReference, nil, err 326 } 327 328 // keyval-sep = ws %x3D ws ; = 329 330 b = p.parseWhitespace(b) 331 332 if len(b) == 0 { 333 return invalidReference, nil, NewParserError(b, "expected = after a key, but the document ends there") 334 } 335 336 b, err = expect('=', b) 337 if err != nil { 338 return invalidReference, nil, err 339 } 340 341 b = p.parseWhitespace(b) 342 343 valRef, b, err := p.parseVal(b) 344 if err != nil { 345 return ref, b, err 346 } 347 348 p.builder.Chain(valRef, key) 349 p.builder.AttachChild(ref, valRef) 350 351 return ref, b, err 352 } 353 354 //nolint:cyclop,funlen 355 func (p *Parser) parseVal(b []byte) (reference, []byte, error) { 356 // val = string / boolean / array / inline-table / date-time / float / integer 357 ref := invalidReference 358 359 if len(b) == 0 { 360 return ref, nil, NewParserError(b, "expected value, not eof") 361 } 362 363 var err error 364 c := b[0] 365 366 switch c { 367 case '"': 368 var raw []byte 369 var v []byte 370 if scanFollowsMultilineBasicStringDelimiter(b) { 371 raw, v, b, err = p.parseMultilineBasicString(b) 372 } else { 373 raw, v, b, err = p.parseBasicString(b) 374 } 375 376 if err == nil { 377 ref = p.builder.Push(Node{ 378 Kind: String, 379 Raw: p.Range(raw), 380 Data: v, 381 }) 382 } 383 384 return ref, b, err 385 case '\'': 386 var raw []byte 387 var v []byte 388 if scanFollowsMultilineLiteralStringDelimiter(b) { 389 raw, v, b, err = p.parseMultilineLiteralString(b) 390 } else { 391 raw, v, b, err = p.parseLiteralString(b) 392 } 393 394 if err == nil { 395 ref = p.builder.Push(Node{ 396 Kind: String, 397 Raw: p.Range(raw), 398 Data: v, 399 }) 400 } 401 402 return ref, b, err 403 case 't': 404 if !scanFollowsTrue(b) { 405 return ref, nil, NewParserError(atmost(b, 4), "expected 'true'") 406 } 407 408 ref = p.builder.Push(Node{ 409 Kind: Bool, 410 Data: b[:4], 411 }) 412 413 return ref, b[4:], nil 414 case 'f': 415 if !scanFollowsFalse(b) { 416 return ref, nil, NewParserError(atmost(b, 5), "expected 'false'") 417 } 418 419 ref = p.builder.Push(Node{ 420 Kind: Bool, 421 Data: b[:5], 422 }) 423 424 return ref, b[5:], nil 425 case '[': 426 return p.parseValArray(b) 427 case '{': 428 return p.parseInlineTable(b) 429 default: 430 return p.parseIntOrFloatOrDateTime(b) 431 } 432 } 433 434 func atmost(b []byte, n int) []byte { 435 if n >= len(b) { 436 return b 437 } 438 439 return b[:n] 440 } 441 442 func (p *Parser) parseLiteralString(b []byte) ([]byte, []byte, []byte, error) { 443 v, rest, err := scanLiteralString(b) 444 if err != nil { 445 return nil, nil, nil, err 446 } 447 448 return v, v[1 : len(v)-1], rest, nil 449 } 450 451 func (p *Parser) parseInlineTable(b []byte) (reference, []byte, error) { 452 // inline-table = inline-table-open [ inline-table-keyvals ] inline-table-close 453 // inline-table-open = %x7B ws ; { 454 // inline-table-close = ws %x7D ; } 455 // inline-table-sep = ws %x2C ws ; , Comma 456 // inline-table-keyvals = keyval [ inline-table-sep inline-table-keyvals ] 457 parent := p.builder.Push(Node{ 458 Kind: InlineTable, 459 Raw: p.Range(b[:1]), 460 }) 461 462 first := true 463 464 var child reference 465 466 b = b[1:] 467 468 var err error 469 470 for len(b) > 0 { 471 previousB := b 472 b = p.parseWhitespace(b) 473 474 if len(b) == 0 { 475 return parent, nil, NewParserError(previousB[:1], "inline table is incomplete") 476 } 477 478 if b[0] == '}' { 479 break 480 } 481 482 if !first { 483 b, err = expect(',', b) 484 if err != nil { 485 return parent, nil, err 486 } 487 b = p.parseWhitespace(b) 488 } 489 490 var kv reference 491 492 kv, b, err = p.parseKeyval(b) 493 if err != nil { 494 return parent, nil, err 495 } 496 497 if first { 498 p.builder.AttachChild(parent, kv) 499 } else { 500 p.builder.Chain(child, kv) 501 } 502 child = kv 503 504 first = false 505 } 506 507 rest, err := expect('}', b) 508 509 return parent, rest, err 510 } 511 512 //nolint:funlen,cyclop 513 func (p *Parser) parseValArray(b []byte) (reference, []byte, error) { 514 // array = array-open [ array-values ] ws-comment-newline array-close 515 // array-open = %x5B ; [ 516 // array-close = %x5D ; ] 517 // array-values = ws-comment-newline val ws-comment-newline array-sep array-values 518 // array-values =/ ws-comment-newline val ws-comment-newline [ array-sep ] 519 // array-sep = %x2C ; , Comma 520 // ws-comment-newline = *( wschar / [ comment ] newline ) 521 arrayStart := b 522 b = b[1:] 523 524 parent := p.builder.Push(Node{ 525 Kind: Array, 526 }) 527 528 // First indicates whether the parser is looking for the first element 529 // (non-comment) of the array. 530 first := true 531 532 lastChild := invalidReference 533 534 addChild := func(valueRef reference) { 535 if lastChild == invalidReference { 536 p.builder.AttachChild(parent, valueRef) 537 } else { 538 p.builder.Chain(lastChild, valueRef) 539 } 540 lastChild = valueRef 541 } 542 543 var err error 544 for len(b) > 0 { 545 cref := invalidReference 546 cref, b, err = p.parseOptionalWhitespaceCommentNewline(b) 547 if err != nil { 548 return parent, nil, err 549 } 550 551 if cref != invalidReference { 552 addChild(cref) 553 } 554 555 if len(b) == 0 { 556 return parent, nil, NewParserError(arrayStart[:1], "array is incomplete") 557 } 558 559 if b[0] == ']' { 560 break 561 } 562 563 if b[0] == ',' { 564 if first { 565 return parent, nil, NewParserError(b[0:1], "array cannot start with comma") 566 } 567 b = b[1:] 568 569 cref, b, err = p.parseOptionalWhitespaceCommentNewline(b) 570 if err != nil { 571 return parent, nil, err 572 } 573 if cref != invalidReference { 574 addChild(cref) 575 } 576 } else if !first { 577 return parent, nil, NewParserError(b[0:1], "array elements must be separated by commas") 578 } 579 580 // TOML allows trailing commas in arrays. 581 if len(b) > 0 && b[0] == ']' { 582 break 583 } 584 585 var valueRef reference 586 valueRef, b, err = p.parseVal(b) 587 if err != nil { 588 return parent, nil, err 589 } 590 591 addChild(valueRef) 592 593 cref, b, err = p.parseOptionalWhitespaceCommentNewline(b) 594 if err != nil { 595 return parent, nil, err 596 } 597 if cref != invalidReference { 598 addChild(cref) 599 } 600 601 first = false 602 } 603 604 rest, err := expect(']', b) 605 606 return parent, rest, err 607 } 608 609 func (p *Parser) parseOptionalWhitespaceCommentNewline(b []byte) (reference, []byte, error) { 610 rootCommentRef := invalidReference 611 latestCommentRef := invalidReference 612 613 addComment := func(ref reference) { 614 if rootCommentRef == invalidReference { 615 rootCommentRef = ref 616 } else if latestCommentRef == invalidReference { 617 p.builder.AttachChild(rootCommentRef, ref) 618 latestCommentRef = ref 619 } else { 620 p.builder.Chain(latestCommentRef, ref) 621 latestCommentRef = ref 622 } 623 } 624 625 for len(b) > 0 { 626 var err error 627 b = p.parseWhitespace(b) 628 629 if len(b) > 0 && b[0] == '#' { 630 var ref reference 631 ref, b, err = p.parseComment(b) 632 if err != nil { 633 return invalidReference, nil, err 634 } 635 if ref != invalidReference { 636 addComment(ref) 637 } 638 } 639 640 if len(b) == 0 { 641 break 642 } 643 644 if b[0] == '\n' || b[0] == '\r' { 645 b, err = p.parseNewline(b) 646 if err != nil { 647 return invalidReference, nil, err 648 } 649 } else { 650 break 651 } 652 } 653 654 return rootCommentRef, b, nil 655 } 656 657 func (p *Parser) parseMultilineLiteralString(b []byte) ([]byte, []byte, []byte, error) { 658 token, rest, err := scanMultilineLiteralString(b) 659 if err != nil { 660 return nil, nil, nil, err 661 } 662 663 i := 3 664 665 // skip the immediate new line 666 if token[i] == '\n' { 667 i++ 668 } else if token[i] == '\r' && token[i+1] == '\n' { 669 i += 2 670 } 671 672 return token, token[i : len(token)-3], rest, err 673 } 674 675 //nolint:funlen,gocognit,cyclop 676 func (p *Parser) parseMultilineBasicString(b []byte) ([]byte, []byte, []byte, error) { 677 // ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body 678 // ml-basic-string-delim 679 // ml-basic-string-delim = 3quotation-mark 680 // ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ] 681 // 682 // mlb-content = mlb-char / newline / mlb-escaped-nl 683 // mlb-char = mlb-unescaped / escaped 684 // mlb-quotes = 1*2quotation-mark 685 // mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii 686 // mlb-escaped-nl = escape ws newline *( wschar / newline ) 687 token, escaped, rest, err := scanMultilineBasicString(b) 688 if err != nil { 689 return nil, nil, nil, err 690 } 691 692 i := 3 693 694 // skip the immediate new line 695 if token[i] == '\n' { 696 i++ 697 } else if token[i] == '\r' && token[i+1] == '\n' { 698 i += 2 699 } 700 701 // fast path 702 startIdx := i 703 endIdx := len(token) - len(`"""`) 704 705 if !escaped { 706 str := token[startIdx:endIdx] 707 verr := characters.Utf8TomlValidAlreadyEscaped(str) 708 if verr.Zero() { 709 return token, str, rest, nil 710 } 711 return nil, nil, nil, NewParserError(str[verr.Index:verr.Index+verr.Size], "invalid UTF-8") 712 } 713 714 var builder bytes.Buffer 715 716 // The scanner ensures that the token starts and ends with quotes and that 717 // escapes are balanced. 718 for i < len(token)-3 { 719 c := token[i] 720 721 //nolint:nestif 722 if c == '\\' { 723 // When the last non-whitespace character on a line is an unescaped \, 724 // it will be trimmed along with all whitespace (including newlines) up 725 // to the next non-whitespace character or closing delimiter. 726 727 isLastNonWhitespaceOnLine := false 728 j := 1 729 findEOLLoop: 730 for ; j < len(token)-3-i; j++ { 731 switch token[i+j] { 732 case ' ', '\t': 733 continue 734 case '\r': 735 if token[i+j+1] == '\n' { 736 continue 737 } 738 case '\n': 739 isLastNonWhitespaceOnLine = true 740 } 741 break findEOLLoop 742 } 743 if isLastNonWhitespaceOnLine { 744 i += j 745 for ; i < len(token)-3; i++ { 746 c := token[i] 747 if !(c == '\n' || c == '\r' || c == ' ' || c == '\t') { 748 i-- 749 break 750 } 751 } 752 i++ 753 continue 754 } 755 756 // handle escaping 757 i++ 758 c = token[i] 759 760 switch c { 761 case '"', '\\': 762 builder.WriteByte(c) 763 case 'b': 764 builder.WriteByte('\b') 765 case 'f': 766 builder.WriteByte('\f') 767 case 'n': 768 builder.WriteByte('\n') 769 case 'r': 770 builder.WriteByte('\r') 771 case 't': 772 builder.WriteByte('\t') 773 case 'e': 774 builder.WriteByte(0x1B) 775 case 'u': 776 x, err := hexToRune(atmost(token[i+1:], 4), 4) 777 if err != nil { 778 return nil, nil, nil, err 779 } 780 builder.WriteRune(x) 781 i += 4 782 case 'U': 783 x, err := hexToRune(atmost(token[i+1:], 8), 8) 784 if err != nil { 785 return nil, nil, nil, err 786 } 787 788 builder.WriteRune(x) 789 i += 8 790 default: 791 return nil, nil, nil, NewParserError(token[i:i+1], "invalid escaped character %#U", c) 792 } 793 i++ 794 } else { 795 size := characters.Utf8ValidNext(token[i:]) 796 if size == 0 { 797 return nil, nil, nil, NewParserError(token[i:i+1], "invalid character %#U", c) 798 } 799 builder.Write(token[i : i+size]) 800 i += size 801 } 802 } 803 804 return token, builder.Bytes(), rest, nil 805 } 806 807 func (p *Parser) parseKey(b []byte) (reference, []byte, error) { 808 // key = simple-key / dotted-key 809 // simple-key = quoted-key / unquoted-key 810 // 811 // unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _ 812 // quoted-key = basic-string / literal-string 813 // dotted-key = simple-key 1*( dot-sep simple-key ) 814 // 815 // dot-sep = ws %x2E ws ; . Period 816 raw, key, b, err := p.parseSimpleKey(b) 817 if err != nil { 818 return invalidReference, nil, err 819 } 820 821 ref := p.builder.Push(Node{ 822 Kind: Key, 823 Raw: p.Range(raw), 824 Data: key, 825 }) 826 827 for { 828 b = p.parseWhitespace(b) 829 if len(b) > 0 && b[0] == '.' { 830 b = p.parseWhitespace(b[1:]) 831 832 raw, key, b, err = p.parseSimpleKey(b) 833 if err != nil { 834 return ref, nil, err 835 } 836 837 p.builder.PushAndChain(Node{ 838 Kind: Key, 839 Raw: p.Range(raw), 840 Data: key, 841 }) 842 } else { 843 break 844 } 845 } 846 847 return ref, b, nil 848 } 849 850 func (p *Parser) parseSimpleKey(b []byte) (raw, key, rest []byte, err error) { 851 if len(b) == 0 { 852 return nil, nil, nil, NewParserError(b, "expected key but found none") 853 } 854 855 // simple-key = quoted-key / unquoted-key 856 // unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _ 857 // quoted-key = basic-string / literal-string 858 switch { 859 case b[0] == '\'': 860 return p.parseLiteralString(b) 861 case b[0] == '"': 862 return p.parseBasicString(b) 863 case isUnquotedKeyChar(b[0]): 864 key, rest = scanUnquotedKey(b) 865 return key, key, rest, nil 866 default: 867 return nil, nil, nil, NewParserError(b[0:1], "invalid character at start of key: %c", b[0]) 868 } 869 } 870 871 //nolint:funlen,cyclop 872 func (p *Parser) parseBasicString(b []byte) ([]byte, []byte, []byte, error) { 873 // basic-string = quotation-mark *basic-char quotation-mark 874 // quotation-mark = %x22 ; " 875 // basic-char = basic-unescaped / escaped 876 // basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii 877 // escaped = escape escape-seq-char 878 // escape-seq-char = %x22 ; " quotation mark U+0022 879 // escape-seq-char =/ %x5C ; \ reverse solidus U+005C 880 // escape-seq-char =/ %x62 ; b backspace U+0008 881 // escape-seq-char =/ %x66 ; f form feed U+000C 882 // escape-seq-char =/ %x6E ; n line feed U+000A 883 // escape-seq-char =/ %x72 ; r carriage return U+000D 884 // escape-seq-char =/ %x74 ; t tab U+0009 885 // escape-seq-char =/ %x75 4HEXDIG ; uXXXX U+XXXX 886 // escape-seq-char =/ %x55 8HEXDIG ; UXXXXXXXX U+XXXXXXXX 887 token, escaped, rest, err := scanBasicString(b) 888 if err != nil { 889 return nil, nil, nil, err 890 } 891 892 startIdx := len(`"`) 893 endIdx := len(token) - len(`"`) 894 895 // Fast path. If there is no escape sequence, the string should just be 896 // an UTF-8 encoded string, which is the same as Go. In that case, 897 // validate the string and return a direct reference to the buffer. 898 if !escaped { 899 str := token[startIdx:endIdx] 900 verr := characters.Utf8TomlValidAlreadyEscaped(str) 901 if verr.Zero() { 902 return token, str, rest, nil 903 } 904 return nil, nil, nil, NewParserError(str[verr.Index:verr.Index+verr.Size], "invalid UTF-8") 905 } 906 907 i := startIdx 908 909 var builder bytes.Buffer 910 911 // The scanner ensures that the token starts and ends with quotes and that 912 // escapes are balanced. 913 for i < len(token)-1 { 914 c := token[i] 915 if c == '\\' { 916 i++ 917 c = token[i] 918 919 switch c { 920 case '"', '\\': 921 builder.WriteByte(c) 922 case 'b': 923 builder.WriteByte('\b') 924 case 'f': 925 builder.WriteByte('\f') 926 case 'n': 927 builder.WriteByte('\n') 928 case 'r': 929 builder.WriteByte('\r') 930 case 't': 931 builder.WriteByte('\t') 932 case 'e': 933 builder.WriteByte(0x1B) 934 case 'u': 935 x, err := hexToRune(token[i+1:len(token)-1], 4) 936 if err != nil { 937 return nil, nil, nil, err 938 } 939 940 builder.WriteRune(x) 941 i += 4 942 case 'U': 943 x, err := hexToRune(token[i+1:len(token)-1], 8) 944 if err != nil { 945 return nil, nil, nil, err 946 } 947 948 builder.WriteRune(x) 949 i += 8 950 default: 951 return nil, nil, nil, NewParserError(token[i:i+1], "invalid escaped character %#U", c) 952 } 953 i++ 954 } else { 955 size := characters.Utf8ValidNext(token[i:]) 956 if size == 0 { 957 return nil, nil, nil, NewParserError(token[i:i+1], "invalid character %#U", c) 958 } 959 builder.Write(token[i : i+size]) 960 i += size 961 } 962 } 963 964 return token, builder.Bytes(), rest, nil 965 } 966 967 func hexToRune(b []byte, length int) (rune, error) { 968 if len(b) < length { 969 return -1, NewParserError(b, "unicode point needs %d character, not %d", length, len(b)) 970 } 971 b = b[:length] 972 973 var r uint32 974 for i, c := range b { 975 d := uint32(0) 976 switch { 977 case '0' <= c && c <= '9': 978 d = uint32(c - '0') 979 case 'a' <= c && c <= 'f': 980 d = uint32(c - 'a' + 10) 981 case 'A' <= c && c <= 'F': 982 d = uint32(c - 'A' + 10) 983 default: 984 return -1, NewParserError(b[i:i+1], "non-hex character") 985 } 986 r = r*16 + d 987 } 988 989 if r > unicode.MaxRune || 0xD800 <= r && r < 0xE000 { 990 return -1, NewParserError(b, "escape sequence is invalid Unicode code point") 991 } 992 993 return rune(r), nil 994 } 995 996 func (p *Parser) parseWhitespace(b []byte) []byte { 997 // ws = *wschar 998 // wschar = %x20 ; Space 999 // wschar =/ %x09 ; Horizontal tab 1000 _, rest := scanWhitespace(b) 1001 1002 return rest 1003 } 1004 1005 //nolint:cyclop 1006 func (p *Parser) parseIntOrFloatOrDateTime(b []byte) (reference, []byte, error) { 1007 switch b[0] { 1008 case 'i': 1009 if !scanFollowsInf(b) { 1010 return invalidReference, nil, NewParserError(atmost(b, 3), "expected 'inf'") 1011 } 1012 1013 return p.builder.Push(Node{ 1014 Kind: Float, 1015 Data: b[:3], 1016 }), b[3:], nil 1017 case 'n': 1018 if !scanFollowsNan(b) { 1019 return invalidReference, nil, NewParserError(atmost(b, 3), "expected 'nan'") 1020 } 1021 1022 return p.builder.Push(Node{ 1023 Kind: Float, 1024 Data: b[:3], 1025 }), b[3:], nil 1026 case '+', '-': 1027 return p.scanIntOrFloat(b) 1028 } 1029 1030 if len(b) < 3 { 1031 return p.scanIntOrFloat(b) 1032 } 1033 1034 s := 5 1035 if len(b) < s { 1036 s = len(b) 1037 } 1038 1039 for idx, c := range b[:s] { 1040 if isDigit(c) { 1041 continue 1042 } 1043 1044 if idx == 2 && c == ':' || (idx == 4 && c == '-') { 1045 return p.scanDateTime(b) 1046 } 1047 1048 break 1049 } 1050 1051 return p.scanIntOrFloat(b) 1052 } 1053 1054 func (p *Parser) scanDateTime(b []byte) (reference, []byte, error) { 1055 // scans for contiguous characters in [0-9T:Z.+-], and up to one space if 1056 // followed by a digit. 1057 hasDate := false 1058 hasTime := false 1059 hasTz := false 1060 seenSpace := false 1061 1062 i := 0 1063 byteLoop: 1064 for ; i < len(b); i++ { 1065 c := b[i] 1066 1067 switch { 1068 case isDigit(c): 1069 case c == '-': 1070 hasDate = true 1071 const minOffsetOfTz = 8 1072 if i >= minOffsetOfTz { 1073 hasTz = true 1074 } 1075 case c == 'T' || c == 't' || c == ':' || c == '.': 1076 hasTime = true 1077 case c == '+' || c == '-' || c == 'Z' || c == 'z': 1078 hasTz = true 1079 case c == ' ': 1080 if !seenSpace && i+1 < len(b) && isDigit(b[i+1]) { 1081 i += 2 1082 // Avoid reaching past the end of the document in case the time 1083 // is malformed. See TestIssue585. 1084 if i >= len(b) { 1085 i-- 1086 } 1087 seenSpace = true 1088 hasTime = true 1089 } else { 1090 break byteLoop 1091 } 1092 default: 1093 break byteLoop 1094 } 1095 } 1096 1097 var kind Kind 1098 1099 if hasTime { 1100 if hasDate { 1101 if hasTz { 1102 kind = DateTime 1103 } else { 1104 kind = LocalDateTime 1105 } 1106 } else { 1107 kind = LocalTime 1108 } 1109 } else { 1110 kind = LocalDate 1111 } 1112 1113 return p.builder.Push(Node{ 1114 Kind: kind, 1115 Data: b[:i], 1116 }), b[i:], nil 1117 } 1118 1119 //nolint:funlen,gocognit,cyclop 1120 func (p *Parser) scanIntOrFloat(b []byte) (reference, []byte, error) { 1121 i := 0 1122 1123 if len(b) > 2 && b[0] == '0' && b[1] != '.' && b[1] != 'e' && b[1] != 'E' { 1124 var isValidRune validRuneFn 1125 1126 switch b[1] { 1127 case 'x': 1128 isValidRune = isValidHexRune 1129 case 'o': 1130 isValidRune = isValidOctalRune 1131 case 'b': 1132 isValidRune = isValidBinaryRune 1133 default: 1134 i++ 1135 } 1136 1137 if isValidRune != nil { 1138 i += 2 1139 for ; i < len(b); i++ { 1140 if !isValidRune(b[i]) { 1141 break 1142 } 1143 } 1144 } 1145 1146 return p.builder.Push(Node{ 1147 Kind: Integer, 1148 Data: b[:i], 1149 }), b[i:], nil 1150 } 1151 1152 isFloat := false 1153 1154 for ; i < len(b); i++ { 1155 c := b[i] 1156 1157 if c >= '0' && c <= '9' || c == '+' || c == '-' || c == '_' { 1158 continue 1159 } 1160 1161 if c == '.' || c == 'e' || c == 'E' { 1162 isFloat = true 1163 1164 continue 1165 } 1166 1167 if c == 'i' { 1168 if scanFollowsInf(b[i:]) { 1169 return p.builder.Push(Node{ 1170 Kind: Float, 1171 Data: b[:i+3], 1172 }), b[i+3:], nil 1173 } 1174 1175 return invalidReference, nil, NewParserError(b[i:i+1], "unexpected character 'i' while scanning for a number") 1176 } 1177 1178 if c == 'n' { 1179 if scanFollowsNan(b[i:]) { 1180 return p.builder.Push(Node{ 1181 Kind: Float, 1182 Data: b[:i+3], 1183 }), b[i+3:], nil 1184 } 1185 1186 return invalidReference, nil, NewParserError(b[i:i+1], "unexpected character 'n' while scanning for a number") 1187 } 1188 1189 break 1190 } 1191 1192 if i == 0 { 1193 return invalidReference, b, NewParserError(b, "incomplete number") 1194 } 1195 1196 kind := Integer 1197 1198 if isFloat { 1199 kind = Float 1200 } 1201 1202 return p.builder.Push(Node{ 1203 Kind: kind, 1204 Data: b[:i], 1205 }), b[i:], nil 1206 } 1207 1208 func isDigit(r byte) bool { 1209 return r >= '0' && r <= '9' 1210 } 1211 1212 type validRuneFn func(r byte) bool 1213 1214 func isValidHexRune(r byte) bool { 1215 return r >= 'a' && r <= 'f' || 1216 r >= 'A' && r <= 'F' || 1217 r >= '0' && r <= '9' || 1218 r == '_' 1219 } 1220 1221 func isValidOctalRune(r byte) bool { 1222 return r >= '0' && r <= '7' || r == '_' 1223 } 1224 1225 func isValidBinaryRune(r byte) bool { 1226 return r == '0' || r == '1' || r == '_' 1227 } 1228 1229 func expect(x byte, b []byte) ([]byte, error) { 1230 if len(b) == 0 { 1231 return nil, NewParserError(b, "expected character %c but the document ended here", x) 1232 } 1233 1234 if b[0] != x { 1235 return nil, NewParserError(b[0:1], "expected character %c", x) 1236 } 1237 1238 return b[1:], nil 1239 }