scannerc.go (77594B)
1 package yaml 2 3 import ( 4 "bytes" 5 "fmt" 6 ) 7 8 // Introduction 9 // ************ 10 // 11 // The following notes assume that you are familiar with the YAML specification 12 // (http://yaml.org/spec/1.2/spec.html). We mostly follow it, although in 13 // some cases we are less restrictive that it requires. 14 // 15 // The process of transforming a YAML stream into a sequence of events is 16 // divided on two steps: Scanning and Parsing. 17 // 18 // The Scanner transforms the input stream into a sequence of tokens, while the 19 // parser transform the sequence of tokens produced by the Scanner into a 20 // sequence of parsing events. 21 // 22 // The Scanner is rather clever and complicated. The Parser, on the contrary, 23 // is a straightforward implementation of a recursive-descendant parser (or, 24 // LL(1) parser, as it is usually called). 25 // 26 // Actually there are two issues of Scanning that might be called "clever", the 27 // rest is quite straightforward. The issues are "block collection start" and 28 // "simple keys". Both issues are explained below in details. 29 // 30 // Here the Scanning step is explained and implemented. We start with the list 31 // of all the tokens produced by the Scanner together with short descriptions. 32 // 33 // Now, tokens: 34 // 35 // STREAM-START(encoding) # The stream start. 36 // STREAM-END # The stream end. 37 // VERSION-DIRECTIVE(major,minor) # The '%YAML' directive. 38 // TAG-DIRECTIVE(handle,prefix) # The '%TAG' directive. 39 // DOCUMENT-START # '---' 40 // DOCUMENT-END # '...' 41 // BLOCK-SEQUENCE-START # Indentation increase denoting a block 42 // BLOCK-MAPPING-START # sequence or a block mapping. 43 // BLOCK-END # Indentation decrease. 44 // FLOW-SEQUENCE-START # '[' 45 // FLOW-SEQUENCE-END # ']' 46 // BLOCK-SEQUENCE-START # '{' 47 // BLOCK-SEQUENCE-END # '}' 48 // BLOCK-ENTRY # '-' 49 // FLOW-ENTRY # ',' 50 // KEY # '?' or nothing (simple keys). 51 // VALUE # ':' 52 // ALIAS(anchor) # '*anchor' 53 // ANCHOR(anchor) # '&anchor' 54 // TAG(handle,suffix) # '!handle!suffix' 55 // SCALAR(value,style) # A scalar. 56 // 57 // The following two tokens are "virtual" tokens denoting the beginning and the 58 // end of the stream: 59 // 60 // STREAM-START(encoding) 61 // STREAM-END 62 // 63 // We pass the information about the input stream encoding with the 64 // STREAM-START token. 65 // 66 // The next two tokens are responsible for tags: 67 // 68 // VERSION-DIRECTIVE(major,minor) 69 // TAG-DIRECTIVE(handle,prefix) 70 // 71 // Example: 72 // 73 // %YAML 1.1 74 // %TAG ! !foo 75 // %TAG !yaml! tag:yaml.org,2002: 76 // --- 77 // 78 // The correspoding sequence of tokens: 79 // 80 // STREAM-START(utf-8) 81 // VERSION-DIRECTIVE(1,1) 82 // TAG-DIRECTIVE("!","!foo") 83 // TAG-DIRECTIVE("!yaml","tag:yaml.org,2002:") 84 // DOCUMENT-START 85 // STREAM-END 86 // 87 // Note that the VERSION-DIRECTIVE and TAG-DIRECTIVE tokens occupy a whole 88 // line. 89 // 90 // The document start and end indicators are represented by: 91 // 92 // DOCUMENT-START 93 // DOCUMENT-END 94 // 95 // Note that if a YAML stream contains an implicit document (without '---' 96 // and '...' indicators), no DOCUMENT-START and DOCUMENT-END tokens will be 97 // produced. 98 // 99 // In the following examples, we present whole documents together with the 100 // produced tokens. 101 // 102 // 1. An implicit document: 103 // 104 // 'a scalar' 105 // 106 // Tokens: 107 // 108 // STREAM-START(utf-8) 109 // SCALAR("a scalar",single-quoted) 110 // STREAM-END 111 // 112 // 2. An explicit document: 113 // 114 // --- 115 // 'a scalar' 116 // ... 117 // 118 // Tokens: 119 // 120 // STREAM-START(utf-8) 121 // DOCUMENT-START 122 // SCALAR("a scalar",single-quoted) 123 // DOCUMENT-END 124 // STREAM-END 125 // 126 // 3. Several documents in a stream: 127 // 128 // 'a scalar' 129 // --- 130 // 'another scalar' 131 // --- 132 // 'yet another scalar' 133 // 134 // Tokens: 135 // 136 // STREAM-START(utf-8) 137 // SCALAR("a scalar",single-quoted) 138 // DOCUMENT-START 139 // SCALAR("another scalar",single-quoted) 140 // DOCUMENT-START 141 // SCALAR("yet another scalar",single-quoted) 142 // STREAM-END 143 // 144 // We have already introduced the SCALAR token above. The following tokens are 145 // used to describe aliases, anchors, tag, and scalars: 146 // 147 // ALIAS(anchor) 148 // ANCHOR(anchor) 149 // TAG(handle,suffix) 150 // SCALAR(value,style) 151 // 152 // The following series of examples illustrate the usage of these tokens: 153 // 154 // 1. A recursive sequence: 155 // 156 // &A [ *A ] 157 // 158 // Tokens: 159 // 160 // STREAM-START(utf-8) 161 // ANCHOR("A") 162 // FLOW-SEQUENCE-START 163 // ALIAS("A") 164 // FLOW-SEQUENCE-END 165 // STREAM-END 166 // 167 // 2. A tagged scalar: 168 // 169 // !!float "3.14" # A good approximation. 170 // 171 // Tokens: 172 // 173 // STREAM-START(utf-8) 174 // TAG("!!","float") 175 // SCALAR("3.14",double-quoted) 176 // STREAM-END 177 // 178 // 3. Various scalar styles: 179 // 180 // --- # Implicit empty plain scalars do not produce tokens. 181 // --- a plain scalar 182 // --- 'a single-quoted scalar' 183 // --- "a double-quoted scalar" 184 // --- |- 185 // a literal scalar 186 // --- >- 187 // a folded 188 // scalar 189 // 190 // Tokens: 191 // 192 // STREAM-START(utf-8) 193 // DOCUMENT-START 194 // DOCUMENT-START 195 // SCALAR("a plain scalar",plain) 196 // DOCUMENT-START 197 // SCALAR("a single-quoted scalar",single-quoted) 198 // DOCUMENT-START 199 // SCALAR("a double-quoted scalar",double-quoted) 200 // DOCUMENT-START 201 // SCALAR("a literal scalar",literal) 202 // DOCUMENT-START 203 // SCALAR("a folded scalar",folded) 204 // STREAM-END 205 // 206 // Now it's time to review collection-related tokens. We will start with 207 // flow collections: 208 // 209 // FLOW-SEQUENCE-START 210 // FLOW-SEQUENCE-END 211 // FLOW-MAPPING-START 212 // FLOW-MAPPING-END 213 // FLOW-ENTRY 214 // KEY 215 // VALUE 216 // 217 // The tokens FLOW-SEQUENCE-START, FLOW-SEQUENCE-END, FLOW-MAPPING-START, and 218 // FLOW-MAPPING-END represent the indicators '[', ']', '{', and '}' 219 // correspondingly. FLOW-ENTRY represent the ',' indicator. Finally the 220 // indicators '?' and ':', which are used for denoting mapping keys and values, 221 // are represented by the KEY and VALUE tokens. 222 // 223 // The following examples show flow collections: 224 // 225 // 1. A flow sequence: 226 // 227 // [item 1, item 2, item 3] 228 // 229 // Tokens: 230 // 231 // STREAM-START(utf-8) 232 // FLOW-SEQUENCE-START 233 // SCALAR("item 1",plain) 234 // FLOW-ENTRY 235 // SCALAR("item 2",plain) 236 // FLOW-ENTRY 237 // SCALAR("item 3",plain) 238 // FLOW-SEQUENCE-END 239 // STREAM-END 240 // 241 // 2. A flow mapping: 242 // 243 // { 244 // a simple key: a value, # Note that the KEY token is produced. 245 // ? a complex key: another value, 246 // } 247 // 248 // Tokens: 249 // 250 // STREAM-START(utf-8) 251 // FLOW-MAPPING-START 252 // KEY 253 // SCALAR("a simple key",plain) 254 // VALUE 255 // SCALAR("a value",plain) 256 // FLOW-ENTRY 257 // KEY 258 // SCALAR("a complex key",plain) 259 // VALUE 260 // SCALAR("another value",plain) 261 // FLOW-ENTRY 262 // FLOW-MAPPING-END 263 // STREAM-END 264 // 265 // A simple key is a key which is not denoted by the '?' indicator. Note that 266 // the Scanner still produce the KEY token whenever it encounters a simple key. 267 // 268 // For scanning block collections, the following tokens are used (note that we 269 // repeat KEY and VALUE here): 270 // 271 // BLOCK-SEQUENCE-START 272 // BLOCK-MAPPING-START 273 // BLOCK-END 274 // BLOCK-ENTRY 275 // KEY 276 // VALUE 277 // 278 // The tokens BLOCK-SEQUENCE-START and BLOCK-MAPPING-START denote indentation 279 // increase that precedes a block collection (cf. the INDENT token in Python). 280 // The token BLOCK-END denote indentation decrease that ends a block collection 281 // (cf. the DEDENT token in Python). However YAML has some syntax pecularities 282 // that makes detections of these tokens more complex. 283 // 284 // The tokens BLOCK-ENTRY, KEY, and VALUE are used to represent the indicators 285 // '-', '?', and ':' correspondingly. 286 // 287 // The following examples show how the tokens BLOCK-SEQUENCE-START, 288 // BLOCK-MAPPING-START, and BLOCK-END are emitted by the Scanner: 289 // 290 // 1. Block sequences: 291 // 292 // - item 1 293 // - item 2 294 // - 295 // - item 3.1 296 // - item 3.2 297 // - 298 // key 1: value 1 299 // key 2: value 2 300 // 301 // Tokens: 302 // 303 // STREAM-START(utf-8) 304 // BLOCK-SEQUENCE-START 305 // BLOCK-ENTRY 306 // SCALAR("item 1",plain) 307 // BLOCK-ENTRY 308 // SCALAR("item 2",plain) 309 // BLOCK-ENTRY 310 // BLOCK-SEQUENCE-START 311 // BLOCK-ENTRY 312 // SCALAR("item 3.1",plain) 313 // BLOCK-ENTRY 314 // SCALAR("item 3.2",plain) 315 // BLOCK-END 316 // BLOCK-ENTRY 317 // BLOCK-MAPPING-START 318 // KEY 319 // SCALAR("key 1",plain) 320 // VALUE 321 // SCALAR("value 1",plain) 322 // KEY 323 // SCALAR("key 2",plain) 324 // VALUE 325 // SCALAR("value 2",plain) 326 // BLOCK-END 327 // BLOCK-END 328 // STREAM-END 329 // 330 // 2. Block mappings: 331 // 332 // a simple key: a value # The KEY token is produced here. 333 // ? a complex key 334 // : another value 335 // a mapping: 336 // key 1: value 1 337 // key 2: value 2 338 // a sequence: 339 // - item 1 340 // - item 2 341 // 342 // Tokens: 343 // 344 // STREAM-START(utf-8) 345 // BLOCK-MAPPING-START 346 // KEY 347 // SCALAR("a simple key",plain) 348 // VALUE 349 // SCALAR("a value",plain) 350 // KEY 351 // SCALAR("a complex key",plain) 352 // VALUE 353 // SCALAR("another value",plain) 354 // KEY 355 // SCALAR("a mapping",plain) 356 // BLOCK-MAPPING-START 357 // KEY 358 // SCALAR("key 1",plain) 359 // VALUE 360 // SCALAR("value 1",plain) 361 // KEY 362 // SCALAR("key 2",plain) 363 // VALUE 364 // SCALAR("value 2",plain) 365 // BLOCK-END 366 // KEY 367 // SCALAR("a sequence",plain) 368 // VALUE 369 // BLOCK-SEQUENCE-START 370 // BLOCK-ENTRY 371 // SCALAR("item 1",plain) 372 // BLOCK-ENTRY 373 // SCALAR("item 2",plain) 374 // BLOCK-END 375 // BLOCK-END 376 // STREAM-END 377 // 378 // YAML does not always require to start a new block collection from a new 379 // line. If the current line contains only '-', '?', and ':' indicators, a new 380 // block collection may start at the current line. The following examples 381 // illustrate this case: 382 // 383 // 1. Collections in a sequence: 384 // 385 // - - item 1 386 // - item 2 387 // - key 1: value 1 388 // key 2: value 2 389 // - ? complex key 390 // : complex value 391 // 392 // Tokens: 393 // 394 // STREAM-START(utf-8) 395 // BLOCK-SEQUENCE-START 396 // BLOCK-ENTRY 397 // BLOCK-SEQUENCE-START 398 // BLOCK-ENTRY 399 // SCALAR("item 1",plain) 400 // BLOCK-ENTRY 401 // SCALAR("item 2",plain) 402 // BLOCK-END 403 // BLOCK-ENTRY 404 // BLOCK-MAPPING-START 405 // KEY 406 // SCALAR("key 1",plain) 407 // VALUE 408 // SCALAR("value 1",plain) 409 // KEY 410 // SCALAR("key 2",plain) 411 // VALUE 412 // SCALAR("value 2",plain) 413 // BLOCK-END 414 // BLOCK-ENTRY 415 // BLOCK-MAPPING-START 416 // KEY 417 // SCALAR("complex key") 418 // VALUE 419 // SCALAR("complex value") 420 // BLOCK-END 421 // BLOCK-END 422 // STREAM-END 423 // 424 // 2. Collections in a mapping: 425 // 426 // ? a sequence 427 // : - item 1 428 // - item 2 429 // ? a mapping 430 // : key 1: value 1 431 // key 2: value 2 432 // 433 // Tokens: 434 // 435 // STREAM-START(utf-8) 436 // BLOCK-MAPPING-START 437 // KEY 438 // SCALAR("a sequence",plain) 439 // VALUE 440 // BLOCK-SEQUENCE-START 441 // BLOCK-ENTRY 442 // SCALAR("item 1",plain) 443 // BLOCK-ENTRY 444 // SCALAR("item 2",plain) 445 // BLOCK-END 446 // KEY 447 // SCALAR("a mapping",plain) 448 // VALUE 449 // BLOCK-MAPPING-START 450 // KEY 451 // SCALAR("key 1",plain) 452 // VALUE 453 // SCALAR("value 1",plain) 454 // KEY 455 // SCALAR("key 2",plain) 456 // VALUE 457 // SCALAR("value 2",plain) 458 // BLOCK-END 459 // BLOCK-END 460 // STREAM-END 461 // 462 // YAML also permits non-indented sequences if they are included into a block 463 // mapping. In this case, the token BLOCK-SEQUENCE-START is not produced: 464 // 465 // key: 466 // - item 1 # BLOCK-SEQUENCE-START is NOT produced here. 467 // - item 2 468 // 469 // Tokens: 470 // 471 // STREAM-START(utf-8) 472 // BLOCK-MAPPING-START 473 // KEY 474 // SCALAR("key",plain) 475 // VALUE 476 // BLOCK-ENTRY 477 // SCALAR("item 1",plain) 478 // BLOCK-ENTRY 479 // SCALAR("item 2",plain) 480 // BLOCK-END 481 // 482 483 // Ensure that the buffer contains the required number of characters. 484 // Return true on success, false on failure (reader error or memory error). 485 func cache(parser *yaml_parser_t, length int) bool { 486 // [Go] This was inlined: !cache(A, B) -> unread < B && !update(A, B) 487 return parser.unread >= length || yaml_parser_update_buffer(parser, length) 488 } 489 490 // Advance the buffer pointer. 491 func skip(parser *yaml_parser_t) { 492 parser.mark.index++ 493 parser.mark.column++ 494 parser.unread-- 495 parser.buffer_pos += width(parser.buffer[parser.buffer_pos]) 496 } 497 498 func skip_line(parser *yaml_parser_t) { 499 if is_crlf(parser.buffer, parser.buffer_pos) { 500 parser.mark.index += 2 501 parser.mark.column = 0 502 parser.mark.line++ 503 parser.unread -= 2 504 parser.buffer_pos += 2 505 } else if is_break(parser.buffer, parser.buffer_pos) { 506 parser.mark.index++ 507 parser.mark.column = 0 508 parser.mark.line++ 509 parser.unread-- 510 parser.buffer_pos += width(parser.buffer[parser.buffer_pos]) 511 } 512 } 513 514 // Copy a character to a string buffer and advance pointers. 515 func read(parser *yaml_parser_t, s []byte) []byte { 516 w := width(parser.buffer[parser.buffer_pos]) 517 if w == 0 { 518 panic("invalid character sequence") 519 } 520 if len(s) == 0 { 521 s = make([]byte, 0, 32) 522 } 523 if w == 1 && len(s)+w <= cap(s) { 524 s = s[:len(s)+1] 525 s[len(s)-1] = parser.buffer[parser.buffer_pos] 526 parser.buffer_pos++ 527 } else { 528 s = append(s, parser.buffer[parser.buffer_pos:parser.buffer_pos+w]...) 529 parser.buffer_pos += w 530 } 531 parser.mark.index++ 532 parser.mark.column++ 533 parser.unread-- 534 return s 535 } 536 537 // Copy a line break character to a string buffer and advance pointers. 538 func read_line(parser *yaml_parser_t, s []byte) []byte { 539 buf := parser.buffer 540 pos := parser.buffer_pos 541 switch { 542 case buf[pos] == '\r' && buf[pos+1] == '\n': 543 // CR LF . LF 544 s = append(s, '\n') 545 parser.buffer_pos += 2 546 parser.mark.index++ 547 parser.unread-- 548 case buf[pos] == '\r' || buf[pos] == '\n': 549 // CR|LF . LF 550 s = append(s, '\n') 551 parser.buffer_pos += 1 552 case buf[pos] == '\xC2' && buf[pos+1] == '\x85': 553 // NEL . LF 554 s = append(s, '\n') 555 parser.buffer_pos += 2 556 case buf[pos] == '\xE2' && buf[pos+1] == '\x80' && (buf[pos+2] == '\xA8' || buf[pos+2] == '\xA9'): 557 // LS|PS . LS|PS 558 s = append(s, buf[parser.buffer_pos:pos+3]...) 559 parser.buffer_pos += 3 560 default: 561 return s 562 } 563 parser.mark.index++ 564 parser.mark.column = 0 565 parser.mark.line++ 566 parser.unread-- 567 return s 568 } 569 570 // Get the next token. 571 func yaml_parser_scan(parser *yaml_parser_t, token *yaml_token_t) bool { 572 // Erase the token object. 573 *token = yaml_token_t{} // [Go] Is this necessary? 574 575 // No tokens after STREAM-END or error. 576 if parser.stream_end_produced || parser.error != yaml_NO_ERROR { 577 return true 578 } 579 580 // Ensure that the tokens queue contains enough tokens. 581 if !parser.token_available { 582 if !yaml_parser_fetch_more_tokens(parser) { 583 return false 584 } 585 } 586 587 // Fetch the next token from the queue. 588 *token = parser.tokens[parser.tokens_head] 589 parser.tokens_head++ 590 parser.tokens_parsed++ 591 parser.token_available = false 592 593 if token.typ == yaml_STREAM_END_TOKEN { 594 parser.stream_end_produced = true 595 } 596 return true 597 } 598 599 // Set the scanner error and return false. 600 func yaml_parser_set_scanner_error(parser *yaml_parser_t, context string, context_mark yaml_mark_t, problem string) bool { 601 parser.error = yaml_SCANNER_ERROR 602 parser.context = context 603 parser.context_mark = context_mark 604 parser.problem = problem 605 parser.problem_mark = parser.mark 606 return false 607 } 608 609 func yaml_parser_set_scanner_tag_error(parser *yaml_parser_t, directive bool, context_mark yaml_mark_t, problem string) bool { 610 context := "while parsing a tag" 611 if directive { 612 context = "while parsing a %TAG directive" 613 } 614 return yaml_parser_set_scanner_error(parser, context, context_mark, problem) 615 } 616 617 func trace(args ...interface{}) func() { 618 pargs := append([]interface{}{"+++"}, args...) 619 fmt.Println(pargs...) 620 pargs = append([]interface{}{"---"}, args...) 621 return func() { fmt.Println(pargs...) } 622 } 623 624 // Ensure that the tokens queue contains at least one token which can be 625 // returned to the Parser. 626 func yaml_parser_fetch_more_tokens(parser *yaml_parser_t) bool { 627 // While we need more tokens to fetch, do it. 628 for { 629 if parser.tokens_head != len(parser.tokens) { 630 // If queue is non-empty, check if any potential simple key may 631 // occupy the head position. 632 head_tok_idx, ok := parser.simple_keys_by_tok[parser.tokens_parsed] 633 if !ok { 634 break 635 } else if valid, ok := yaml_simple_key_is_valid(parser, &parser.simple_keys[head_tok_idx]); !ok { 636 return false 637 } else if !valid { 638 break 639 } 640 } 641 // Fetch the next token. 642 if !yaml_parser_fetch_next_token(parser) { 643 return false 644 } 645 } 646 647 parser.token_available = true 648 return true 649 } 650 651 // The dispatcher for token fetchers. 652 func yaml_parser_fetch_next_token(parser *yaml_parser_t) bool { 653 // Ensure that the buffer is initialized. 654 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 655 return false 656 } 657 658 // Check if we just started scanning. Fetch STREAM-START then. 659 if !parser.stream_start_produced { 660 return yaml_parser_fetch_stream_start(parser) 661 } 662 663 // Eat whitespaces and comments until we reach the next token. 664 if !yaml_parser_scan_to_next_token(parser) { 665 return false 666 } 667 668 // Check the indentation level against the current column. 669 if !yaml_parser_unroll_indent(parser, parser.mark.column) { 670 return false 671 } 672 673 // Ensure that the buffer contains at least 4 characters. 4 is the length 674 // of the longest indicators ('--- ' and '... '). 675 if parser.unread < 4 && !yaml_parser_update_buffer(parser, 4) { 676 return false 677 } 678 679 // Is it the end of the stream? 680 if is_z(parser.buffer, parser.buffer_pos) { 681 return yaml_parser_fetch_stream_end(parser) 682 } 683 684 // Is it a directive? 685 if parser.mark.column == 0 && parser.buffer[parser.buffer_pos] == '%' { 686 return yaml_parser_fetch_directive(parser) 687 } 688 689 buf := parser.buffer 690 pos := parser.buffer_pos 691 692 // Is it the document start indicator? 693 if parser.mark.column == 0 && buf[pos] == '-' && buf[pos+1] == '-' && buf[pos+2] == '-' && is_blankz(buf, pos+3) { 694 return yaml_parser_fetch_document_indicator(parser, yaml_DOCUMENT_START_TOKEN) 695 } 696 697 // Is it the document end indicator? 698 if parser.mark.column == 0 && buf[pos] == '.' && buf[pos+1] == '.' && buf[pos+2] == '.' && is_blankz(buf, pos+3) { 699 return yaml_parser_fetch_document_indicator(parser, yaml_DOCUMENT_END_TOKEN) 700 } 701 702 // Is it the flow sequence start indicator? 703 if buf[pos] == '[' { 704 return yaml_parser_fetch_flow_collection_start(parser, yaml_FLOW_SEQUENCE_START_TOKEN) 705 } 706 707 // Is it the flow mapping start indicator? 708 if parser.buffer[parser.buffer_pos] == '{' { 709 return yaml_parser_fetch_flow_collection_start(parser, yaml_FLOW_MAPPING_START_TOKEN) 710 } 711 712 // Is it the flow sequence end indicator? 713 if parser.buffer[parser.buffer_pos] == ']' { 714 return yaml_parser_fetch_flow_collection_end(parser, 715 yaml_FLOW_SEQUENCE_END_TOKEN) 716 } 717 718 // Is it the flow mapping end indicator? 719 if parser.buffer[parser.buffer_pos] == '}' { 720 return yaml_parser_fetch_flow_collection_end(parser, 721 yaml_FLOW_MAPPING_END_TOKEN) 722 } 723 724 // Is it the flow entry indicator? 725 if parser.buffer[parser.buffer_pos] == ',' { 726 return yaml_parser_fetch_flow_entry(parser) 727 } 728 729 // Is it the block entry indicator? 730 if parser.buffer[parser.buffer_pos] == '-' && is_blankz(parser.buffer, parser.buffer_pos+1) { 731 return yaml_parser_fetch_block_entry(parser) 732 } 733 734 // Is it the key indicator? 735 if parser.buffer[parser.buffer_pos] == '?' && (parser.flow_level > 0 || is_blankz(parser.buffer, parser.buffer_pos+1)) { 736 return yaml_parser_fetch_key(parser) 737 } 738 739 // Is it the value indicator? 740 if parser.buffer[parser.buffer_pos] == ':' && (parser.flow_level > 0 || is_blankz(parser.buffer, parser.buffer_pos+1)) { 741 return yaml_parser_fetch_value(parser) 742 } 743 744 // Is it an alias? 745 if parser.buffer[parser.buffer_pos] == '*' { 746 return yaml_parser_fetch_anchor(parser, yaml_ALIAS_TOKEN) 747 } 748 749 // Is it an anchor? 750 if parser.buffer[parser.buffer_pos] == '&' { 751 return yaml_parser_fetch_anchor(parser, yaml_ANCHOR_TOKEN) 752 } 753 754 // Is it a tag? 755 if parser.buffer[parser.buffer_pos] == '!' { 756 return yaml_parser_fetch_tag(parser) 757 } 758 759 // Is it a literal scalar? 760 if parser.buffer[parser.buffer_pos] == '|' && parser.flow_level == 0 { 761 return yaml_parser_fetch_block_scalar(parser, true) 762 } 763 764 // Is it a folded scalar? 765 if parser.buffer[parser.buffer_pos] == '>' && parser.flow_level == 0 { 766 return yaml_parser_fetch_block_scalar(parser, false) 767 } 768 769 // Is it a single-quoted scalar? 770 if parser.buffer[parser.buffer_pos] == '\'' { 771 return yaml_parser_fetch_flow_scalar(parser, true) 772 } 773 774 // Is it a double-quoted scalar? 775 if parser.buffer[parser.buffer_pos] == '"' { 776 return yaml_parser_fetch_flow_scalar(parser, false) 777 } 778 779 // Is it a plain scalar? 780 // 781 // A plain scalar may start with any non-blank characters except 782 // 783 // '-', '?', ':', ',', '[', ']', '{', '}', 784 // '#', '&', '*', '!', '|', '>', '\'', '\"', 785 // '%', '@', '`'. 786 // 787 // In the block context (and, for the '-' indicator, in the flow context 788 // too), it may also start with the characters 789 // 790 // '-', '?', ':' 791 // 792 // if it is followed by a non-space character. 793 // 794 // The last rule is more restrictive than the specification requires. 795 // [Go] Make this logic more reasonable. 796 //switch parser.buffer[parser.buffer_pos] { 797 //case '-', '?', ':', ',', '?', '-', ',', ':', ']', '[', '}', '{', '&', '#', '!', '*', '>', '|', '"', '\'', '@', '%', '-', '`': 798 //} 799 if !(is_blankz(parser.buffer, parser.buffer_pos) || parser.buffer[parser.buffer_pos] == '-' || 800 parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == ':' || 801 parser.buffer[parser.buffer_pos] == ',' || parser.buffer[parser.buffer_pos] == '[' || 802 parser.buffer[parser.buffer_pos] == ']' || parser.buffer[parser.buffer_pos] == '{' || 803 parser.buffer[parser.buffer_pos] == '}' || parser.buffer[parser.buffer_pos] == '#' || 804 parser.buffer[parser.buffer_pos] == '&' || parser.buffer[parser.buffer_pos] == '*' || 805 parser.buffer[parser.buffer_pos] == '!' || parser.buffer[parser.buffer_pos] == '|' || 806 parser.buffer[parser.buffer_pos] == '>' || parser.buffer[parser.buffer_pos] == '\'' || 807 parser.buffer[parser.buffer_pos] == '"' || parser.buffer[parser.buffer_pos] == '%' || 808 parser.buffer[parser.buffer_pos] == '@' || parser.buffer[parser.buffer_pos] == '`') || 809 (parser.buffer[parser.buffer_pos] == '-' && !is_blank(parser.buffer, parser.buffer_pos+1)) || 810 (parser.flow_level == 0 && 811 (parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == ':') && 812 !is_blankz(parser.buffer, parser.buffer_pos+1)) { 813 return yaml_parser_fetch_plain_scalar(parser) 814 } 815 816 // If we don't determine the token type so far, it is an error. 817 return yaml_parser_set_scanner_error(parser, 818 "while scanning for the next token", parser.mark, 819 "found character that cannot start any token") 820 } 821 822 func yaml_simple_key_is_valid(parser *yaml_parser_t, simple_key *yaml_simple_key_t) (valid, ok bool) { 823 if !simple_key.possible { 824 return false, true 825 } 826 827 // The 1.2 specification says: 828 // 829 // "If the ? indicator is omitted, parsing needs to see past the 830 // implicit key to recognize it as such. To limit the amount of 831 // lookahead required, the “:” indicator must appear at most 1024 832 // Unicode characters beyond the start of the key. In addition, the key 833 // is restricted to a single line." 834 // 835 if simple_key.mark.line < parser.mark.line || simple_key.mark.index+1024 < parser.mark.index { 836 // Check if the potential simple key to be removed is required. 837 if simple_key.required { 838 return false, yaml_parser_set_scanner_error(parser, 839 "while scanning a simple key", simple_key.mark, 840 "could not find expected ':'") 841 } 842 simple_key.possible = false 843 return false, true 844 } 845 return true, true 846 } 847 848 // Check if a simple key may start at the current position and add it if 849 // needed. 850 func yaml_parser_save_simple_key(parser *yaml_parser_t) bool { 851 // A simple key is required at the current position if the scanner is in 852 // the block context and the current column coincides with the indentation 853 // level. 854 855 required := parser.flow_level == 0 && parser.indent == parser.mark.column 856 857 // 858 // If the current position may start a simple key, save it. 859 // 860 if parser.simple_key_allowed { 861 simple_key := yaml_simple_key_t{ 862 possible: true, 863 required: required, 864 token_number: parser.tokens_parsed + (len(parser.tokens) - parser.tokens_head), 865 mark: parser.mark, 866 } 867 868 if !yaml_parser_remove_simple_key(parser) { 869 return false 870 } 871 parser.simple_keys[len(parser.simple_keys)-1] = simple_key 872 parser.simple_keys_by_tok[simple_key.token_number] = len(parser.simple_keys) - 1 873 } 874 return true 875 } 876 877 // Remove a potential simple key at the current flow level. 878 func yaml_parser_remove_simple_key(parser *yaml_parser_t) bool { 879 i := len(parser.simple_keys) - 1 880 if parser.simple_keys[i].possible { 881 // If the key is required, it is an error. 882 if parser.simple_keys[i].required { 883 return yaml_parser_set_scanner_error(parser, 884 "while scanning a simple key", parser.simple_keys[i].mark, 885 "could not find expected ':'") 886 } 887 // Remove the key from the stack. 888 parser.simple_keys[i].possible = false 889 delete(parser.simple_keys_by_tok, parser.simple_keys[i].token_number) 890 } 891 return true 892 } 893 894 // max_flow_level limits the flow_level 895 const max_flow_level = 10000 896 897 // Increase the flow level and resize the simple key list if needed. 898 func yaml_parser_increase_flow_level(parser *yaml_parser_t) bool { 899 // Reset the simple key on the next level. 900 parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{ 901 possible: false, 902 required: false, 903 token_number: parser.tokens_parsed + (len(parser.tokens) - parser.tokens_head), 904 mark: parser.mark, 905 }) 906 907 // Increase the flow level. 908 parser.flow_level++ 909 if parser.flow_level > max_flow_level { 910 return yaml_parser_set_scanner_error(parser, 911 "while increasing flow level", parser.simple_keys[len(parser.simple_keys)-1].mark, 912 fmt.Sprintf("exceeded max depth of %d", max_flow_level)) 913 } 914 return true 915 } 916 917 // Decrease the flow level. 918 func yaml_parser_decrease_flow_level(parser *yaml_parser_t) bool { 919 if parser.flow_level > 0 { 920 parser.flow_level-- 921 last := len(parser.simple_keys) - 1 922 delete(parser.simple_keys_by_tok, parser.simple_keys[last].token_number) 923 parser.simple_keys = parser.simple_keys[:last] 924 } 925 return true 926 } 927 928 // max_indents limits the indents stack size 929 const max_indents = 10000 930 931 // Push the current indentation level to the stack and set the new level 932 // the current column is greater than the indentation level. In this case, 933 // append or insert the specified token into the token queue. 934 func yaml_parser_roll_indent(parser *yaml_parser_t, column, number int, typ yaml_token_type_t, mark yaml_mark_t) bool { 935 // In the flow context, do nothing. 936 if parser.flow_level > 0 { 937 return true 938 } 939 940 if parser.indent < column { 941 // Push the current indentation level to the stack and set the new 942 // indentation level. 943 parser.indents = append(parser.indents, parser.indent) 944 parser.indent = column 945 if len(parser.indents) > max_indents { 946 return yaml_parser_set_scanner_error(parser, 947 "while increasing indent level", parser.simple_keys[len(parser.simple_keys)-1].mark, 948 fmt.Sprintf("exceeded max depth of %d", max_indents)) 949 } 950 951 // Create a token and insert it into the queue. 952 token := yaml_token_t{ 953 typ: typ, 954 start_mark: mark, 955 end_mark: mark, 956 } 957 if number > -1 { 958 number -= parser.tokens_parsed 959 } 960 yaml_insert_token(parser, number, &token) 961 } 962 return true 963 } 964 965 // Pop indentation levels from the indents stack until the current level 966 // becomes less or equal to the column. For each indentation level, append 967 // the BLOCK-END token. 968 func yaml_parser_unroll_indent(parser *yaml_parser_t, column int) bool { 969 // In the flow context, do nothing. 970 if parser.flow_level > 0 { 971 return true 972 } 973 974 // Loop through the indentation levels in the stack. 975 for parser.indent > column { 976 // Create a token and append it to the queue. 977 token := yaml_token_t{ 978 typ: yaml_BLOCK_END_TOKEN, 979 start_mark: parser.mark, 980 end_mark: parser.mark, 981 } 982 yaml_insert_token(parser, -1, &token) 983 984 // Pop the indentation level. 985 parser.indent = parser.indents[len(parser.indents)-1] 986 parser.indents = parser.indents[:len(parser.indents)-1] 987 } 988 return true 989 } 990 991 // Initialize the scanner and produce the STREAM-START token. 992 func yaml_parser_fetch_stream_start(parser *yaml_parser_t) bool { 993 994 // Set the initial indentation. 995 parser.indent = -1 996 997 // Initialize the simple key stack. 998 parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{}) 999 1000 parser.simple_keys_by_tok = make(map[int]int) 1001 1002 // A simple key is allowed at the beginning of the stream. 1003 parser.simple_key_allowed = true 1004 1005 // We have started. 1006 parser.stream_start_produced = true 1007 1008 // Create the STREAM-START token and append it to the queue. 1009 token := yaml_token_t{ 1010 typ: yaml_STREAM_START_TOKEN, 1011 start_mark: parser.mark, 1012 end_mark: parser.mark, 1013 encoding: parser.encoding, 1014 } 1015 yaml_insert_token(parser, -1, &token) 1016 return true 1017 } 1018 1019 // Produce the STREAM-END token and shut down the scanner. 1020 func yaml_parser_fetch_stream_end(parser *yaml_parser_t) bool { 1021 1022 // Force new line. 1023 if parser.mark.column != 0 { 1024 parser.mark.column = 0 1025 parser.mark.line++ 1026 } 1027 1028 // Reset the indentation level. 1029 if !yaml_parser_unroll_indent(parser, -1) { 1030 return false 1031 } 1032 1033 // Reset simple keys. 1034 if !yaml_parser_remove_simple_key(parser) { 1035 return false 1036 } 1037 1038 parser.simple_key_allowed = false 1039 1040 // Create the STREAM-END token and append it to the queue. 1041 token := yaml_token_t{ 1042 typ: yaml_STREAM_END_TOKEN, 1043 start_mark: parser.mark, 1044 end_mark: parser.mark, 1045 } 1046 yaml_insert_token(parser, -1, &token) 1047 return true 1048 } 1049 1050 // Produce a VERSION-DIRECTIVE or TAG-DIRECTIVE token. 1051 func yaml_parser_fetch_directive(parser *yaml_parser_t) bool { 1052 // Reset the indentation level. 1053 if !yaml_parser_unroll_indent(parser, -1) { 1054 return false 1055 } 1056 1057 // Reset simple keys. 1058 if !yaml_parser_remove_simple_key(parser) { 1059 return false 1060 } 1061 1062 parser.simple_key_allowed = false 1063 1064 // Create the YAML-DIRECTIVE or TAG-DIRECTIVE token. 1065 token := yaml_token_t{} 1066 if !yaml_parser_scan_directive(parser, &token) { 1067 return false 1068 } 1069 // Append the token to the queue. 1070 yaml_insert_token(parser, -1, &token) 1071 return true 1072 } 1073 1074 // Produce the DOCUMENT-START or DOCUMENT-END token. 1075 func yaml_parser_fetch_document_indicator(parser *yaml_parser_t, typ yaml_token_type_t) bool { 1076 // Reset the indentation level. 1077 if !yaml_parser_unroll_indent(parser, -1) { 1078 return false 1079 } 1080 1081 // Reset simple keys. 1082 if !yaml_parser_remove_simple_key(parser) { 1083 return false 1084 } 1085 1086 parser.simple_key_allowed = false 1087 1088 // Consume the token. 1089 start_mark := parser.mark 1090 1091 skip(parser) 1092 skip(parser) 1093 skip(parser) 1094 1095 end_mark := parser.mark 1096 1097 // Create the DOCUMENT-START or DOCUMENT-END token. 1098 token := yaml_token_t{ 1099 typ: typ, 1100 start_mark: start_mark, 1101 end_mark: end_mark, 1102 } 1103 // Append the token to the queue. 1104 yaml_insert_token(parser, -1, &token) 1105 return true 1106 } 1107 1108 // Produce the FLOW-SEQUENCE-START or FLOW-MAPPING-START token. 1109 func yaml_parser_fetch_flow_collection_start(parser *yaml_parser_t, typ yaml_token_type_t) bool { 1110 // The indicators '[' and '{' may start a simple key. 1111 if !yaml_parser_save_simple_key(parser) { 1112 return false 1113 } 1114 1115 // Increase the flow level. 1116 if !yaml_parser_increase_flow_level(parser) { 1117 return false 1118 } 1119 1120 // A simple key may follow the indicators '[' and '{'. 1121 parser.simple_key_allowed = true 1122 1123 // Consume the token. 1124 start_mark := parser.mark 1125 skip(parser) 1126 end_mark := parser.mark 1127 1128 // Create the FLOW-SEQUENCE-START of FLOW-MAPPING-START token. 1129 token := yaml_token_t{ 1130 typ: typ, 1131 start_mark: start_mark, 1132 end_mark: end_mark, 1133 } 1134 // Append the token to the queue. 1135 yaml_insert_token(parser, -1, &token) 1136 return true 1137 } 1138 1139 // Produce the FLOW-SEQUENCE-END or FLOW-MAPPING-END token. 1140 func yaml_parser_fetch_flow_collection_end(parser *yaml_parser_t, typ yaml_token_type_t) bool { 1141 // Reset any potential simple key on the current flow level. 1142 if !yaml_parser_remove_simple_key(parser) { 1143 return false 1144 } 1145 1146 // Decrease the flow level. 1147 if !yaml_parser_decrease_flow_level(parser) { 1148 return false 1149 } 1150 1151 // No simple keys after the indicators ']' and '}'. 1152 parser.simple_key_allowed = false 1153 1154 // Consume the token. 1155 1156 start_mark := parser.mark 1157 skip(parser) 1158 end_mark := parser.mark 1159 1160 // Create the FLOW-SEQUENCE-END of FLOW-MAPPING-END token. 1161 token := yaml_token_t{ 1162 typ: typ, 1163 start_mark: start_mark, 1164 end_mark: end_mark, 1165 } 1166 // Append the token to the queue. 1167 yaml_insert_token(parser, -1, &token) 1168 return true 1169 } 1170 1171 // Produce the FLOW-ENTRY token. 1172 func yaml_parser_fetch_flow_entry(parser *yaml_parser_t) bool { 1173 // Reset any potential simple keys on the current flow level. 1174 if !yaml_parser_remove_simple_key(parser) { 1175 return false 1176 } 1177 1178 // Simple keys are allowed after ','. 1179 parser.simple_key_allowed = true 1180 1181 // Consume the token. 1182 start_mark := parser.mark 1183 skip(parser) 1184 end_mark := parser.mark 1185 1186 // Create the FLOW-ENTRY token and append it to the queue. 1187 token := yaml_token_t{ 1188 typ: yaml_FLOW_ENTRY_TOKEN, 1189 start_mark: start_mark, 1190 end_mark: end_mark, 1191 } 1192 yaml_insert_token(parser, -1, &token) 1193 return true 1194 } 1195 1196 // Produce the BLOCK-ENTRY token. 1197 func yaml_parser_fetch_block_entry(parser *yaml_parser_t) bool { 1198 // Check if the scanner is in the block context. 1199 if parser.flow_level == 0 { 1200 // Check if we are allowed to start a new entry. 1201 if !parser.simple_key_allowed { 1202 return yaml_parser_set_scanner_error(parser, "", parser.mark, 1203 "block sequence entries are not allowed in this context") 1204 } 1205 // Add the BLOCK-SEQUENCE-START token if needed. 1206 if !yaml_parser_roll_indent(parser, parser.mark.column, -1, yaml_BLOCK_SEQUENCE_START_TOKEN, parser.mark) { 1207 return false 1208 } 1209 } else { 1210 // It is an error for the '-' indicator to occur in the flow context, 1211 // but we let the Parser detect and report about it because the Parser 1212 // is able to point to the context. 1213 } 1214 1215 // Reset any potential simple keys on the current flow level. 1216 if !yaml_parser_remove_simple_key(parser) { 1217 return false 1218 } 1219 1220 // Simple keys are allowed after '-'. 1221 parser.simple_key_allowed = true 1222 1223 // Consume the token. 1224 start_mark := parser.mark 1225 skip(parser) 1226 end_mark := parser.mark 1227 1228 // Create the BLOCK-ENTRY token and append it to the queue. 1229 token := yaml_token_t{ 1230 typ: yaml_BLOCK_ENTRY_TOKEN, 1231 start_mark: start_mark, 1232 end_mark: end_mark, 1233 } 1234 yaml_insert_token(parser, -1, &token) 1235 return true 1236 } 1237 1238 // Produce the KEY token. 1239 func yaml_parser_fetch_key(parser *yaml_parser_t) bool { 1240 1241 // In the block context, additional checks are required. 1242 if parser.flow_level == 0 { 1243 // Check if we are allowed to start a new key (not nessesary simple). 1244 if !parser.simple_key_allowed { 1245 return yaml_parser_set_scanner_error(parser, "", parser.mark, 1246 "mapping keys are not allowed in this context") 1247 } 1248 // Add the BLOCK-MAPPING-START token if needed. 1249 if !yaml_parser_roll_indent(parser, parser.mark.column, -1, yaml_BLOCK_MAPPING_START_TOKEN, parser.mark) { 1250 return false 1251 } 1252 } 1253 1254 // Reset any potential simple keys on the current flow level. 1255 if !yaml_parser_remove_simple_key(parser) { 1256 return false 1257 } 1258 1259 // Simple keys are allowed after '?' in the block context. 1260 parser.simple_key_allowed = parser.flow_level == 0 1261 1262 // Consume the token. 1263 start_mark := parser.mark 1264 skip(parser) 1265 end_mark := parser.mark 1266 1267 // Create the KEY token and append it to the queue. 1268 token := yaml_token_t{ 1269 typ: yaml_KEY_TOKEN, 1270 start_mark: start_mark, 1271 end_mark: end_mark, 1272 } 1273 yaml_insert_token(parser, -1, &token) 1274 return true 1275 } 1276 1277 // Produce the VALUE token. 1278 func yaml_parser_fetch_value(parser *yaml_parser_t) bool { 1279 1280 simple_key := &parser.simple_keys[len(parser.simple_keys)-1] 1281 1282 // Have we found a simple key? 1283 if valid, ok := yaml_simple_key_is_valid(parser, simple_key); !ok { 1284 return false 1285 1286 } else if valid { 1287 1288 // Create the KEY token and insert it into the queue. 1289 token := yaml_token_t{ 1290 typ: yaml_KEY_TOKEN, 1291 start_mark: simple_key.mark, 1292 end_mark: simple_key.mark, 1293 } 1294 yaml_insert_token(parser, simple_key.token_number-parser.tokens_parsed, &token) 1295 1296 // In the block context, we may need to add the BLOCK-MAPPING-START token. 1297 if !yaml_parser_roll_indent(parser, simple_key.mark.column, 1298 simple_key.token_number, 1299 yaml_BLOCK_MAPPING_START_TOKEN, simple_key.mark) { 1300 return false 1301 } 1302 1303 // Remove the simple key. 1304 simple_key.possible = false 1305 delete(parser.simple_keys_by_tok, simple_key.token_number) 1306 1307 // A simple key cannot follow another simple key. 1308 parser.simple_key_allowed = false 1309 1310 } else { 1311 // The ':' indicator follows a complex key. 1312 1313 // In the block context, extra checks are required. 1314 if parser.flow_level == 0 { 1315 1316 // Check if we are allowed to start a complex value. 1317 if !parser.simple_key_allowed { 1318 return yaml_parser_set_scanner_error(parser, "", parser.mark, 1319 "mapping values are not allowed in this context") 1320 } 1321 1322 // Add the BLOCK-MAPPING-START token if needed. 1323 if !yaml_parser_roll_indent(parser, parser.mark.column, -1, yaml_BLOCK_MAPPING_START_TOKEN, parser.mark) { 1324 return false 1325 } 1326 } 1327 1328 // Simple keys after ':' are allowed in the block context. 1329 parser.simple_key_allowed = parser.flow_level == 0 1330 } 1331 1332 // Consume the token. 1333 start_mark := parser.mark 1334 skip(parser) 1335 end_mark := parser.mark 1336 1337 // Create the VALUE token and append it to the queue. 1338 token := yaml_token_t{ 1339 typ: yaml_VALUE_TOKEN, 1340 start_mark: start_mark, 1341 end_mark: end_mark, 1342 } 1343 yaml_insert_token(parser, -1, &token) 1344 return true 1345 } 1346 1347 // Produce the ALIAS or ANCHOR token. 1348 func yaml_parser_fetch_anchor(parser *yaml_parser_t, typ yaml_token_type_t) bool { 1349 // An anchor or an alias could be a simple key. 1350 if !yaml_parser_save_simple_key(parser) { 1351 return false 1352 } 1353 1354 // A simple key cannot follow an anchor or an alias. 1355 parser.simple_key_allowed = false 1356 1357 // Create the ALIAS or ANCHOR token and append it to the queue. 1358 var token yaml_token_t 1359 if !yaml_parser_scan_anchor(parser, &token, typ) { 1360 return false 1361 } 1362 yaml_insert_token(parser, -1, &token) 1363 return true 1364 } 1365 1366 // Produce the TAG token. 1367 func yaml_parser_fetch_tag(parser *yaml_parser_t) bool { 1368 // A tag could be a simple key. 1369 if !yaml_parser_save_simple_key(parser) { 1370 return false 1371 } 1372 1373 // A simple key cannot follow a tag. 1374 parser.simple_key_allowed = false 1375 1376 // Create the TAG token and append it to the queue. 1377 var token yaml_token_t 1378 if !yaml_parser_scan_tag(parser, &token) { 1379 return false 1380 } 1381 yaml_insert_token(parser, -1, &token) 1382 return true 1383 } 1384 1385 // Produce the SCALAR(...,literal) or SCALAR(...,folded) tokens. 1386 func yaml_parser_fetch_block_scalar(parser *yaml_parser_t, literal bool) bool { 1387 // Remove any potential simple keys. 1388 if !yaml_parser_remove_simple_key(parser) { 1389 return false 1390 } 1391 1392 // A simple key may follow a block scalar. 1393 parser.simple_key_allowed = true 1394 1395 // Create the SCALAR token and append it to the queue. 1396 var token yaml_token_t 1397 if !yaml_parser_scan_block_scalar(parser, &token, literal) { 1398 return false 1399 } 1400 yaml_insert_token(parser, -1, &token) 1401 return true 1402 } 1403 1404 // Produce the SCALAR(...,single-quoted) or SCALAR(...,double-quoted) tokens. 1405 func yaml_parser_fetch_flow_scalar(parser *yaml_parser_t, single bool) bool { 1406 // A plain scalar could be a simple key. 1407 if !yaml_parser_save_simple_key(parser) { 1408 return false 1409 } 1410 1411 // A simple key cannot follow a flow scalar. 1412 parser.simple_key_allowed = false 1413 1414 // Create the SCALAR token and append it to the queue. 1415 var token yaml_token_t 1416 if !yaml_parser_scan_flow_scalar(parser, &token, single) { 1417 return false 1418 } 1419 yaml_insert_token(parser, -1, &token) 1420 return true 1421 } 1422 1423 // Produce the SCALAR(...,plain) token. 1424 func yaml_parser_fetch_plain_scalar(parser *yaml_parser_t) bool { 1425 // A plain scalar could be a simple key. 1426 if !yaml_parser_save_simple_key(parser) { 1427 return false 1428 } 1429 1430 // A simple key cannot follow a flow scalar. 1431 parser.simple_key_allowed = false 1432 1433 // Create the SCALAR token and append it to the queue. 1434 var token yaml_token_t 1435 if !yaml_parser_scan_plain_scalar(parser, &token) { 1436 return false 1437 } 1438 yaml_insert_token(parser, -1, &token) 1439 return true 1440 } 1441 1442 // Eat whitespaces and comments until the next token is found. 1443 func yaml_parser_scan_to_next_token(parser *yaml_parser_t) bool { 1444 1445 // Until the next token is not found. 1446 for { 1447 // Allow the BOM mark to start a line. 1448 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1449 return false 1450 } 1451 if parser.mark.column == 0 && is_bom(parser.buffer, parser.buffer_pos) { 1452 skip(parser) 1453 } 1454 1455 // Eat whitespaces. 1456 // Tabs are allowed: 1457 // - in the flow context 1458 // - in the block context, but not at the beginning of the line or 1459 // after '-', '?', or ':' (complex value). 1460 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1461 return false 1462 } 1463 1464 for parser.buffer[parser.buffer_pos] == ' ' || ((parser.flow_level > 0 || !parser.simple_key_allowed) && parser.buffer[parser.buffer_pos] == '\t') { 1465 skip(parser) 1466 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1467 return false 1468 } 1469 } 1470 1471 // Eat a comment until a line break. 1472 if parser.buffer[parser.buffer_pos] == '#' { 1473 for !is_breakz(parser.buffer, parser.buffer_pos) { 1474 skip(parser) 1475 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1476 return false 1477 } 1478 } 1479 } 1480 1481 // If it is a line break, eat it. 1482 if is_break(parser.buffer, parser.buffer_pos) { 1483 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 1484 return false 1485 } 1486 skip_line(parser) 1487 1488 // In the block context, a new line may start a simple key. 1489 if parser.flow_level == 0 { 1490 parser.simple_key_allowed = true 1491 } 1492 } else { 1493 break // We have found a token. 1494 } 1495 } 1496 1497 return true 1498 } 1499 1500 // Scan a YAML-DIRECTIVE or TAG-DIRECTIVE token. 1501 // 1502 // Scope: 1503 // %YAML 1.1 # a comment \n 1504 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 1505 // %TAG !yaml! tag:yaml.org,2002: \n 1506 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 1507 // 1508 func yaml_parser_scan_directive(parser *yaml_parser_t, token *yaml_token_t) bool { 1509 // Eat '%'. 1510 start_mark := parser.mark 1511 skip(parser) 1512 1513 // Scan the directive name. 1514 var name []byte 1515 if !yaml_parser_scan_directive_name(parser, start_mark, &name) { 1516 return false 1517 } 1518 1519 // Is it a YAML directive? 1520 if bytes.Equal(name, []byte("YAML")) { 1521 // Scan the VERSION directive value. 1522 var major, minor int8 1523 if !yaml_parser_scan_version_directive_value(parser, start_mark, &major, &minor) { 1524 return false 1525 } 1526 end_mark := parser.mark 1527 1528 // Create a VERSION-DIRECTIVE token. 1529 *token = yaml_token_t{ 1530 typ: yaml_VERSION_DIRECTIVE_TOKEN, 1531 start_mark: start_mark, 1532 end_mark: end_mark, 1533 major: major, 1534 minor: minor, 1535 } 1536 1537 // Is it a TAG directive? 1538 } else if bytes.Equal(name, []byte("TAG")) { 1539 // Scan the TAG directive value. 1540 var handle, prefix []byte 1541 if !yaml_parser_scan_tag_directive_value(parser, start_mark, &handle, &prefix) { 1542 return false 1543 } 1544 end_mark := parser.mark 1545 1546 // Create a TAG-DIRECTIVE token. 1547 *token = yaml_token_t{ 1548 typ: yaml_TAG_DIRECTIVE_TOKEN, 1549 start_mark: start_mark, 1550 end_mark: end_mark, 1551 value: handle, 1552 prefix: prefix, 1553 } 1554 1555 // Unknown directive. 1556 } else { 1557 yaml_parser_set_scanner_error(parser, "while scanning a directive", 1558 start_mark, "found unknown directive name") 1559 return false 1560 } 1561 1562 // Eat the rest of the line including any comments. 1563 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1564 return false 1565 } 1566 1567 for is_blank(parser.buffer, parser.buffer_pos) { 1568 skip(parser) 1569 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1570 return false 1571 } 1572 } 1573 1574 if parser.buffer[parser.buffer_pos] == '#' { 1575 for !is_breakz(parser.buffer, parser.buffer_pos) { 1576 skip(parser) 1577 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1578 return false 1579 } 1580 } 1581 } 1582 1583 // Check if we are at the end of the line. 1584 if !is_breakz(parser.buffer, parser.buffer_pos) { 1585 yaml_parser_set_scanner_error(parser, "while scanning a directive", 1586 start_mark, "did not find expected comment or line break") 1587 return false 1588 } 1589 1590 // Eat a line break. 1591 if is_break(parser.buffer, parser.buffer_pos) { 1592 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 1593 return false 1594 } 1595 skip_line(parser) 1596 } 1597 1598 return true 1599 } 1600 1601 // Scan the directive name. 1602 // 1603 // Scope: 1604 // %YAML 1.1 # a comment \n 1605 // ^^^^ 1606 // %TAG !yaml! tag:yaml.org,2002: \n 1607 // ^^^ 1608 // 1609 func yaml_parser_scan_directive_name(parser *yaml_parser_t, start_mark yaml_mark_t, name *[]byte) bool { 1610 // Consume the directive name. 1611 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1612 return false 1613 } 1614 1615 var s []byte 1616 for is_alpha(parser.buffer, parser.buffer_pos) { 1617 s = read(parser, s) 1618 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1619 return false 1620 } 1621 } 1622 1623 // Check if the name is empty. 1624 if len(s) == 0 { 1625 yaml_parser_set_scanner_error(parser, "while scanning a directive", 1626 start_mark, "could not find expected directive name") 1627 return false 1628 } 1629 1630 // Check for an blank character after the name. 1631 if !is_blankz(parser.buffer, parser.buffer_pos) { 1632 yaml_parser_set_scanner_error(parser, "while scanning a directive", 1633 start_mark, "found unexpected non-alphabetical character") 1634 return false 1635 } 1636 *name = s 1637 return true 1638 } 1639 1640 // Scan the value of VERSION-DIRECTIVE. 1641 // 1642 // Scope: 1643 // %YAML 1.1 # a comment \n 1644 // ^^^^^^ 1645 func yaml_parser_scan_version_directive_value(parser *yaml_parser_t, start_mark yaml_mark_t, major, minor *int8) bool { 1646 // Eat whitespaces. 1647 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1648 return false 1649 } 1650 for is_blank(parser.buffer, parser.buffer_pos) { 1651 skip(parser) 1652 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1653 return false 1654 } 1655 } 1656 1657 // Consume the major version number. 1658 if !yaml_parser_scan_version_directive_number(parser, start_mark, major) { 1659 return false 1660 } 1661 1662 // Eat '.'. 1663 if parser.buffer[parser.buffer_pos] != '.' { 1664 return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive", 1665 start_mark, "did not find expected digit or '.' character") 1666 } 1667 1668 skip(parser) 1669 1670 // Consume the minor version number. 1671 if !yaml_parser_scan_version_directive_number(parser, start_mark, minor) { 1672 return false 1673 } 1674 return true 1675 } 1676 1677 const max_number_length = 2 1678 1679 // Scan the version number of VERSION-DIRECTIVE. 1680 // 1681 // Scope: 1682 // %YAML 1.1 # a comment \n 1683 // ^ 1684 // %YAML 1.1 # a comment \n 1685 // ^ 1686 func yaml_parser_scan_version_directive_number(parser *yaml_parser_t, start_mark yaml_mark_t, number *int8) bool { 1687 1688 // Repeat while the next character is digit. 1689 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1690 return false 1691 } 1692 var value, length int8 1693 for is_digit(parser.buffer, parser.buffer_pos) { 1694 // Check if the number is too long. 1695 length++ 1696 if length > max_number_length { 1697 return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive", 1698 start_mark, "found extremely long version number") 1699 } 1700 value = value*10 + int8(as_digit(parser.buffer, parser.buffer_pos)) 1701 skip(parser) 1702 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1703 return false 1704 } 1705 } 1706 1707 // Check if the number was present. 1708 if length == 0 { 1709 return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive", 1710 start_mark, "did not find expected version number") 1711 } 1712 *number = value 1713 return true 1714 } 1715 1716 // Scan the value of a TAG-DIRECTIVE token. 1717 // 1718 // Scope: 1719 // %TAG !yaml! tag:yaml.org,2002: \n 1720 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 1721 // 1722 func yaml_parser_scan_tag_directive_value(parser *yaml_parser_t, start_mark yaml_mark_t, handle, prefix *[]byte) bool { 1723 var handle_value, prefix_value []byte 1724 1725 // Eat whitespaces. 1726 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1727 return false 1728 } 1729 1730 for is_blank(parser.buffer, parser.buffer_pos) { 1731 skip(parser) 1732 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1733 return false 1734 } 1735 } 1736 1737 // Scan a handle. 1738 if !yaml_parser_scan_tag_handle(parser, true, start_mark, &handle_value) { 1739 return false 1740 } 1741 1742 // Expect a whitespace. 1743 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1744 return false 1745 } 1746 if !is_blank(parser.buffer, parser.buffer_pos) { 1747 yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive", 1748 start_mark, "did not find expected whitespace") 1749 return false 1750 } 1751 1752 // Eat whitespaces. 1753 for is_blank(parser.buffer, parser.buffer_pos) { 1754 skip(parser) 1755 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1756 return false 1757 } 1758 } 1759 1760 // Scan a prefix. 1761 if !yaml_parser_scan_tag_uri(parser, true, nil, start_mark, &prefix_value) { 1762 return false 1763 } 1764 1765 // Expect a whitespace or line break. 1766 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1767 return false 1768 } 1769 if !is_blankz(parser.buffer, parser.buffer_pos) { 1770 yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive", 1771 start_mark, "did not find expected whitespace or line break") 1772 return false 1773 } 1774 1775 *handle = handle_value 1776 *prefix = prefix_value 1777 return true 1778 } 1779 1780 func yaml_parser_scan_anchor(parser *yaml_parser_t, token *yaml_token_t, typ yaml_token_type_t) bool { 1781 var s []byte 1782 1783 // Eat the indicator character. 1784 start_mark := parser.mark 1785 skip(parser) 1786 1787 // Consume the value. 1788 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1789 return false 1790 } 1791 1792 for is_alpha(parser.buffer, parser.buffer_pos) { 1793 s = read(parser, s) 1794 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1795 return false 1796 } 1797 } 1798 1799 end_mark := parser.mark 1800 1801 /* 1802 * Check if length of the anchor is greater than 0 and it is followed by 1803 * a whitespace character or one of the indicators: 1804 * 1805 * '?', ':', ',', ']', '}', '%', '@', '`'. 1806 */ 1807 1808 if len(s) == 0 || 1809 !(is_blankz(parser.buffer, parser.buffer_pos) || parser.buffer[parser.buffer_pos] == '?' || 1810 parser.buffer[parser.buffer_pos] == ':' || parser.buffer[parser.buffer_pos] == ',' || 1811 parser.buffer[parser.buffer_pos] == ']' || parser.buffer[parser.buffer_pos] == '}' || 1812 parser.buffer[parser.buffer_pos] == '%' || parser.buffer[parser.buffer_pos] == '@' || 1813 parser.buffer[parser.buffer_pos] == '`') { 1814 context := "while scanning an alias" 1815 if typ == yaml_ANCHOR_TOKEN { 1816 context = "while scanning an anchor" 1817 } 1818 yaml_parser_set_scanner_error(parser, context, start_mark, 1819 "did not find expected alphabetic or numeric character") 1820 return false 1821 } 1822 1823 // Create a token. 1824 *token = yaml_token_t{ 1825 typ: typ, 1826 start_mark: start_mark, 1827 end_mark: end_mark, 1828 value: s, 1829 } 1830 1831 return true 1832 } 1833 1834 /* 1835 * Scan a TAG token. 1836 */ 1837 1838 func yaml_parser_scan_tag(parser *yaml_parser_t, token *yaml_token_t) bool { 1839 var handle, suffix []byte 1840 1841 start_mark := parser.mark 1842 1843 // Check if the tag is in the canonical form. 1844 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 1845 return false 1846 } 1847 1848 if parser.buffer[parser.buffer_pos+1] == '<' { 1849 // Keep the handle as '' 1850 1851 // Eat '!<' 1852 skip(parser) 1853 skip(parser) 1854 1855 // Consume the tag value. 1856 if !yaml_parser_scan_tag_uri(parser, false, nil, start_mark, &suffix) { 1857 return false 1858 } 1859 1860 // Check for '>' and eat it. 1861 if parser.buffer[parser.buffer_pos] != '>' { 1862 yaml_parser_set_scanner_error(parser, "while scanning a tag", 1863 start_mark, "did not find the expected '>'") 1864 return false 1865 } 1866 1867 skip(parser) 1868 } else { 1869 // The tag has either the '!suffix' or the '!handle!suffix' form. 1870 1871 // First, try to scan a handle. 1872 if !yaml_parser_scan_tag_handle(parser, false, start_mark, &handle) { 1873 return false 1874 } 1875 1876 // Check if it is, indeed, handle. 1877 if handle[0] == '!' && len(handle) > 1 && handle[len(handle)-1] == '!' { 1878 // Scan the suffix now. 1879 if !yaml_parser_scan_tag_uri(parser, false, nil, start_mark, &suffix) { 1880 return false 1881 } 1882 } else { 1883 // It wasn't a handle after all. Scan the rest of the tag. 1884 if !yaml_parser_scan_tag_uri(parser, false, handle, start_mark, &suffix) { 1885 return false 1886 } 1887 1888 // Set the handle to '!'. 1889 handle = []byte{'!'} 1890 1891 // A special case: the '!' tag. Set the handle to '' and the 1892 // suffix to '!'. 1893 if len(suffix) == 0 { 1894 handle, suffix = suffix, handle 1895 } 1896 } 1897 } 1898 1899 // Check the character which ends the tag. 1900 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1901 return false 1902 } 1903 if !is_blankz(parser.buffer, parser.buffer_pos) { 1904 yaml_parser_set_scanner_error(parser, "while scanning a tag", 1905 start_mark, "did not find expected whitespace or line break") 1906 return false 1907 } 1908 1909 end_mark := parser.mark 1910 1911 // Create a token. 1912 *token = yaml_token_t{ 1913 typ: yaml_TAG_TOKEN, 1914 start_mark: start_mark, 1915 end_mark: end_mark, 1916 value: handle, 1917 suffix: suffix, 1918 } 1919 return true 1920 } 1921 1922 // Scan a tag handle. 1923 func yaml_parser_scan_tag_handle(parser *yaml_parser_t, directive bool, start_mark yaml_mark_t, handle *[]byte) bool { 1924 // Check the initial '!' character. 1925 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1926 return false 1927 } 1928 if parser.buffer[parser.buffer_pos] != '!' { 1929 yaml_parser_set_scanner_tag_error(parser, directive, 1930 start_mark, "did not find expected '!'") 1931 return false 1932 } 1933 1934 var s []byte 1935 1936 // Copy the '!' character. 1937 s = read(parser, s) 1938 1939 // Copy all subsequent alphabetical and numerical characters. 1940 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1941 return false 1942 } 1943 for is_alpha(parser.buffer, parser.buffer_pos) { 1944 s = read(parser, s) 1945 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1946 return false 1947 } 1948 } 1949 1950 // Check if the trailing character is '!' and copy it. 1951 if parser.buffer[parser.buffer_pos] == '!' { 1952 s = read(parser, s) 1953 } else { 1954 // It's either the '!' tag or not really a tag handle. If it's a %TAG 1955 // directive, it's an error. If it's a tag token, it must be a part of URI. 1956 if directive && string(s) != "!" { 1957 yaml_parser_set_scanner_tag_error(parser, directive, 1958 start_mark, "did not find expected '!'") 1959 return false 1960 } 1961 } 1962 1963 *handle = s 1964 return true 1965 } 1966 1967 // Scan a tag. 1968 func yaml_parser_scan_tag_uri(parser *yaml_parser_t, directive bool, head []byte, start_mark yaml_mark_t, uri *[]byte) bool { 1969 //size_t length = head ? strlen((char *)head) : 0 1970 var s []byte 1971 hasTag := len(head) > 0 1972 1973 // Copy the head if needed. 1974 // 1975 // Note that we don't copy the leading '!' character. 1976 if len(head) > 1 { 1977 s = append(s, head[1:]...) 1978 } 1979 1980 // Scan the tag. 1981 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1982 return false 1983 } 1984 1985 // The set of characters that may appear in URI is as follows: 1986 // 1987 // '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&', 1988 // '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']', 1989 // '%'. 1990 // [Go] Convert this into more reasonable logic. 1991 for is_alpha(parser.buffer, parser.buffer_pos) || parser.buffer[parser.buffer_pos] == ';' || 1992 parser.buffer[parser.buffer_pos] == '/' || parser.buffer[parser.buffer_pos] == '?' || 1993 parser.buffer[parser.buffer_pos] == ':' || parser.buffer[parser.buffer_pos] == '@' || 1994 parser.buffer[parser.buffer_pos] == '&' || parser.buffer[parser.buffer_pos] == '=' || 1995 parser.buffer[parser.buffer_pos] == '+' || parser.buffer[parser.buffer_pos] == '$' || 1996 parser.buffer[parser.buffer_pos] == ',' || parser.buffer[parser.buffer_pos] == '.' || 1997 parser.buffer[parser.buffer_pos] == '!' || parser.buffer[parser.buffer_pos] == '~' || 1998 parser.buffer[parser.buffer_pos] == '*' || parser.buffer[parser.buffer_pos] == '\'' || 1999 parser.buffer[parser.buffer_pos] == '(' || parser.buffer[parser.buffer_pos] == ')' || 2000 parser.buffer[parser.buffer_pos] == '[' || parser.buffer[parser.buffer_pos] == ']' || 2001 parser.buffer[parser.buffer_pos] == '%' { 2002 // Check if it is a URI-escape sequence. 2003 if parser.buffer[parser.buffer_pos] == '%' { 2004 if !yaml_parser_scan_uri_escapes(parser, directive, start_mark, &s) { 2005 return false 2006 } 2007 } else { 2008 s = read(parser, s) 2009 } 2010 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2011 return false 2012 } 2013 hasTag = true 2014 } 2015 2016 if !hasTag { 2017 yaml_parser_set_scanner_tag_error(parser, directive, 2018 start_mark, "did not find expected tag URI") 2019 return false 2020 } 2021 *uri = s 2022 return true 2023 } 2024 2025 // Decode an URI-escape sequence corresponding to a single UTF-8 character. 2026 func yaml_parser_scan_uri_escapes(parser *yaml_parser_t, directive bool, start_mark yaml_mark_t, s *[]byte) bool { 2027 2028 // Decode the required number of characters. 2029 w := 1024 2030 for w > 0 { 2031 // Check for a URI-escaped octet. 2032 if parser.unread < 3 && !yaml_parser_update_buffer(parser, 3) { 2033 return false 2034 } 2035 2036 if !(parser.buffer[parser.buffer_pos] == '%' && 2037 is_hex(parser.buffer, parser.buffer_pos+1) && 2038 is_hex(parser.buffer, parser.buffer_pos+2)) { 2039 return yaml_parser_set_scanner_tag_error(parser, directive, 2040 start_mark, "did not find URI escaped octet") 2041 } 2042 2043 // Get the octet. 2044 octet := byte((as_hex(parser.buffer, parser.buffer_pos+1) << 4) + as_hex(parser.buffer, parser.buffer_pos+2)) 2045 2046 // If it is the leading octet, determine the length of the UTF-8 sequence. 2047 if w == 1024 { 2048 w = width(octet) 2049 if w == 0 { 2050 return yaml_parser_set_scanner_tag_error(parser, directive, 2051 start_mark, "found an incorrect leading UTF-8 octet") 2052 } 2053 } else { 2054 // Check if the trailing octet is correct. 2055 if octet&0xC0 != 0x80 { 2056 return yaml_parser_set_scanner_tag_error(parser, directive, 2057 start_mark, "found an incorrect trailing UTF-8 octet") 2058 } 2059 } 2060 2061 // Copy the octet and move the pointers. 2062 *s = append(*s, octet) 2063 skip(parser) 2064 skip(parser) 2065 skip(parser) 2066 w-- 2067 } 2068 return true 2069 } 2070 2071 // Scan a block scalar. 2072 func yaml_parser_scan_block_scalar(parser *yaml_parser_t, token *yaml_token_t, literal bool) bool { 2073 // Eat the indicator '|' or '>'. 2074 start_mark := parser.mark 2075 skip(parser) 2076 2077 // Scan the additional block scalar indicators. 2078 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2079 return false 2080 } 2081 2082 // Check for a chomping indicator. 2083 var chomping, increment int 2084 if parser.buffer[parser.buffer_pos] == '+' || parser.buffer[parser.buffer_pos] == '-' { 2085 // Set the chomping method and eat the indicator. 2086 if parser.buffer[parser.buffer_pos] == '+' { 2087 chomping = +1 2088 } else { 2089 chomping = -1 2090 } 2091 skip(parser) 2092 2093 // Check for an indentation indicator. 2094 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2095 return false 2096 } 2097 if is_digit(parser.buffer, parser.buffer_pos) { 2098 // Check that the indentation is greater than 0. 2099 if parser.buffer[parser.buffer_pos] == '0' { 2100 yaml_parser_set_scanner_error(parser, "while scanning a block scalar", 2101 start_mark, "found an indentation indicator equal to 0") 2102 return false 2103 } 2104 2105 // Get the indentation level and eat the indicator. 2106 increment = as_digit(parser.buffer, parser.buffer_pos) 2107 skip(parser) 2108 } 2109 2110 } else if is_digit(parser.buffer, parser.buffer_pos) { 2111 // Do the same as above, but in the opposite order. 2112 2113 if parser.buffer[parser.buffer_pos] == '0' { 2114 yaml_parser_set_scanner_error(parser, "while scanning a block scalar", 2115 start_mark, "found an indentation indicator equal to 0") 2116 return false 2117 } 2118 increment = as_digit(parser.buffer, parser.buffer_pos) 2119 skip(parser) 2120 2121 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2122 return false 2123 } 2124 if parser.buffer[parser.buffer_pos] == '+' || parser.buffer[parser.buffer_pos] == '-' { 2125 if parser.buffer[parser.buffer_pos] == '+' { 2126 chomping = +1 2127 } else { 2128 chomping = -1 2129 } 2130 skip(parser) 2131 } 2132 } 2133 2134 // Eat whitespaces and comments to the end of the line. 2135 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2136 return false 2137 } 2138 for is_blank(parser.buffer, parser.buffer_pos) { 2139 skip(parser) 2140 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2141 return false 2142 } 2143 } 2144 if parser.buffer[parser.buffer_pos] == '#' { 2145 for !is_breakz(parser.buffer, parser.buffer_pos) { 2146 skip(parser) 2147 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2148 return false 2149 } 2150 } 2151 } 2152 2153 // Check if we are at the end of the line. 2154 if !is_breakz(parser.buffer, parser.buffer_pos) { 2155 yaml_parser_set_scanner_error(parser, "while scanning a block scalar", 2156 start_mark, "did not find expected comment or line break") 2157 return false 2158 } 2159 2160 // Eat a line break. 2161 if is_break(parser.buffer, parser.buffer_pos) { 2162 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 2163 return false 2164 } 2165 skip_line(parser) 2166 } 2167 2168 end_mark := parser.mark 2169 2170 // Set the indentation level if it was specified. 2171 var indent int 2172 if increment > 0 { 2173 if parser.indent >= 0 { 2174 indent = parser.indent + increment 2175 } else { 2176 indent = increment 2177 } 2178 } 2179 2180 // Scan the leading line breaks and determine the indentation level if needed. 2181 var s, leading_break, trailing_breaks []byte 2182 if !yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks, start_mark, &end_mark) { 2183 return false 2184 } 2185 2186 // Scan the block scalar content. 2187 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2188 return false 2189 } 2190 var leading_blank, trailing_blank bool 2191 for parser.mark.column == indent && !is_z(parser.buffer, parser.buffer_pos) { 2192 // We are at the beginning of a non-empty line. 2193 2194 // Is it a trailing whitespace? 2195 trailing_blank = is_blank(parser.buffer, parser.buffer_pos) 2196 2197 // Check if we need to fold the leading line break. 2198 if !literal && !leading_blank && !trailing_blank && len(leading_break) > 0 && leading_break[0] == '\n' { 2199 // Do we need to join the lines by space? 2200 if len(trailing_breaks) == 0 { 2201 s = append(s, ' ') 2202 } 2203 } else { 2204 s = append(s, leading_break...) 2205 } 2206 leading_break = leading_break[:0] 2207 2208 // Append the remaining line breaks. 2209 s = append(s, trailing_breaks...) 2210 trailing_breaks = trailing_breaks[:0] 2211 2212 // Is it a leading whitespace? 2213 leading_blank = is_blank(parser.buffer, parser.buffer_pos) 2214 2215 // Consume the current line. 2216 for !is_breakz(parser.buffer, parser.buffer_pos) { 2217 s = read(parser, s) 2218 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2219 return false 2220 } 2221 } 2222 2223 // Consume the line break. 2224 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 2225 return false 2226 } 2227 2228 leading_break = read_line(parser, leading_break) 2229 2230 // Eat the following indentation spaces and line breaks. 2231 if !yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks, start_mark, &end_mark) { 2232 return false 2233 } 2234 } 2235 2236 // Chomp the tail. 2237 if chomping != -1 { 2238 s = append(s, leading_break...) 2239 } 2240 if chomping == 1 { 2241 s = append(s, trailing_breaks...) 2242 } 2243 2244 // Create a token. 2245 *token = yaml_token_t{ 2246 typ: yaml_SCALAR_TOKEN, 2247 start_mark: start_mark, 2248 end_mark: end_mark, 2249 value: s, 2250 style: yaml_LITERAL_SCALAR_STYLE, 2251 } 2252 if !literal { 2253 token.style = yaml_FOLDED_SCALAR_STYLE 2254 } 2255 return true 2256 } 2257 2258 // Scan indentation spaces and line breaks for a block scalar. Determine the 2259 // indentation level if needed. 2260 func yaml_parser_scan_block_scalar_breaks(parser *yaml_parser_t, indent *int, breaks *[]byte, start_mark yaml_mark_t, end_mark *yaml_mark_t) bool { 2261 *end_mark = parser.mark 2262 2263 // Eat the indentation spaces and line breaks. 2264 max_indent := 0 2265 for { 2266 // Eat the indentation spaces. 2267 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2268 return false 2269 } 2270 for (*indent == 0 || parser.mark.column < *indent) && is_space(parser.buffer, parser.buffer_pos) { 2271 skip(parser) 2272 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2273 return false 2274 } 2275 } 2276 if parser.mark.column > max_indent { 2277 max_indent = parser.mark.column 2278 } 2279 2280 // Check for a tab character messing the indentation. 2281 if (*indent == 0 || parser.mark.column < *indent) && is_tab(parser.buffer, parser.buffer_pos) { 2282 return yaml_parser_set_scanner_error(parser, "while scanning a block scalar", 2283 start_mark, "found a tab character where an indentation space is expected") 2284 } 2285 2286 // Have we found a non-empty line? 2287 if !is_break(parser.buffer, parser.buffer_pos) { 2288 break 2289 } 2290 2291 // Consume the line break. 2292 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 2293 return false 2294 } 2295 // [Go] Should really be returning breaks instead. 2296 *breaks = read_line(parser, *breaks) 2297 *end_mark = parser.mark 2298 } 2299 2300 // Determine the indentation level if needed. 2301 if *indent == 0 { 2302 *indent = max_indent 2303 if *indent < parser.indent+1 { 2304 *indent = parser.indent + 1 2305 } 2306 if *indent < 1 { 2307 *indent = 1 2308 } 2309 } 2310 return true 2311 } 2312 2313 // Scan a quoted scalar. 2314 func yaml_parser_scan_flow_scalar(parser *yaml_parser_t, token *yaml_token_t, single bool) bool { 2315 // Eat the left quote. 2316 start_mark := parser.mark 2317 skip(parser) 2318 2319 // Consume the content of the quoted scalar. 2320 var s, leading_break, trailing_breaks, whitespaces []byte 2321 for { 2322 // Check that there are no document indicators at the beginning of the line. 2323 if parser.unread < 4 && !yaml_parser_update_buffer(parser, 4) { 2324 return false 2325 } 2326 2327 if parser.mark.column == 0 && 2328 ((parser.buffer[parser.buffer_pos+0] == '-' && 2329 parser.buffer[parser.buffer_pos+1] == '-' && 2330 parser.buffer[parser.buffer_pos+2] == '-') || 2331 (parser.buffer[parser.buffer_pos+0] == '.' && 2332 parser.buffer[parser.buffer_pos+1] == '.' && 2333 parser.buffer[parser.buffer_pos+2] == '.')) && 2334 is_blankz(parser.buffer, parser.buffer_pos+3) { 2335 yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar", 2336 start_mark, "found unexpected document indicator") 2337 return false 2338 } 2339 2340 // Check for EOF. 2341 if is_z(parser.buffer, parser.buffer_pos) { 2342 yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar", 2343 start_mark, "found unexpected end of stream") 2344 return false 2345 } 2346 2347 // Consume non-blank characters. 2348 leading_blanks := false 2349 for !is_blankz(parser.buffer, parser.buffer_pos) { 2350 if single && parser.buffer[parser.buffer_pos] == '\'' && parser.buffer[parser.buffer_pos+1] == '\'' { 2351 // Is is an escaped single quote. 2352 s = append(s, '\'') 2353 skip(parser) 2354 skip(parser) 2355 2356 } else if single && parser.buffer[parser.buffer_pos] == '\'' { 2357 // It is a right single quote. 2358 break 2359 } else if !single && parser.buffer[parser.buffer_pos] == '"' { 2360 // It is a right double quote. 2361 break 2362 2363 } else if !single && parser.buffer[parser.buffer_pos] == '\\' && is_break(parser.buffer, parser.buffer_pos+1) { 2364 // It is an escaped line break. 2365 if parser.unread < 3 && !yaml_parser_update_buffer(parser, 3) { 2366 return false 2367 } 2368 skip(parser) 2369 skip_line(parser) 2370 leading_blanks = true 2371 break 2372 2373 } else if !single && parser.buffer[parser.buffer_pos] == '\\' { 2374 // It is an escape sequence. 2375 code_length := 0 2376 2377 // Check the escape character. 2378 switch parser.buffer[parser.buffer_pos+1] { 2379 case '0': 2380 s = append(s, 0) 2381 case 'a': 2382 s = append(s, '\x07') 2383 case 'b': 2384 s = append(s, '\x08') 2385 case 't', '\t': 2386 s = append(s, '\x09') 2387 case 'n': 2388 s = append(s, '\x0A') 2389 case 'v': 2390 s = append(s, '\x0B') 2391 case 'f': 2392 s = append(s, '\x0C') 2393 case 'r': 2394 s = append(s, '\x0D') 2395 case 'e': 2396 s = append(s, '\x1B') 2397 case ' ': 2398 s = append(s, '\x20') 2399 case '"': 2400 s = append(s, '"') 2401 case '\'': 2402 s = append(s, '\'') 2403 case '\\': 2404 s = append(s, '\\') 2405 case 'N': // NEL (#x85) 2406 s = append(s, '\xC2') 2407 s = append(s, '\x85') 2408 case '_': // #xA0 2409 s = append(s, '\xC2') 2410 s = append(s, '\xA0') 2411 case 'L': // LS (#x2028) 2412 s = append(s, '\xE2') 2413 s = append(s, '\x80') 2414 s = append(s, '\xA8') 2415 case 'P': // PS (#x2029) 2416 s = append(s, '\xE2') 2417 s = append(s, '\x80') 2418 s = append(s, '\xA9') 2419 case 'x': 2420 code_length = 2 2421 case 'u': 2422 code_length = 4 2423 case 'U': 2424 code_length = 8 2425 default: 2426 yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar", 2427 start_mark, "found unknown escape character") 2428 return false 2429 } 2430 2431 skip(parser) 2432 skip(parser) 2433 2434 // Consume an arbitrary escape code. 2435 if code_length > 0 { 2436 var value int 2437 2438 // Scan the character value. 2439 if parser.unread < code_length && !yaml_parser_update_buffer(parser, code_length) { 2440 return false 2441 } 2442 for k := 0; k < code_length; k++ { 2443 if !is_hex(parser.buffer, parser.buffer_pos+k) { 2444 yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar", 2445 start_mark, "did not find expected hexdecimal number") 2446 return false 2447 } 2448 value = (value << 4) + as_hex(parser.buffer, parser.buffer_pos+k) 2449 } 2450 2451 // Check the value and write the character. 2452 if (value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF { 2453 yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar", 2454 start_mark, "found invalid Unicode character escape code") 2455 return false 2456 } 2457 if value <= 0x7F { 2458 s = append(s, byte(value)) 2459 } else if value <= 0x7FF { 2460 s = append(s, byte(0xC0+(value>>6))) 2461 s = append(s, byte(0x80+(value&0x3F))) 2462 } else if value <= 0xFFFF { 2463 s = append(s, byte(0xE0+(value>>12))) 2464 s = append(s, byte(0x80+((value>>6)&0x3F))) 2465 s = append(s, byte(0x80+(value&0x3F))) 2466 } else { 2467 s = append(s, byte(0xF0+(value>>18))) 2468 s = append(s, byte(0x80+((value>>12)&0x3F))) 2469 s = append(s, byte(0x80+((value>>6)&0x3F))) 2470 s = append(s, byte(0x80+(value&0x3F))) 2471 } 2472 2473 // Advance the pointer. 2474 for k := 0; k < code_length; k++ { 2475 skip(parser) 2476 } 2477 } 2478 } else { 2479 // It is a non-escaped non-blank character. 2480 s = read(parser, s) 2481 } 2482 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 2483 return false 2484 } 2485 } 2486 2487 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2488 return false 2489 } 2490 2491 // Check if we are at the end of the scalar. 2492 if single { 2493 if parser.buffer[parser.buffer_pos] == '\'' { 2494 break 2495 } 2496 } else { 2497 if parser.buffer[parser.buffer_pos] == '"' { 2498 break 2499 } 2500 } 2501 2502 // Consume blank characters. 2503 for is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos) { 2504 if is_blank(parser.buffer, parser.buffer_pos) { 2505 // Consume a space or a tab character. 2506 if !leading_blanks { 2507 whitespaces = read(parser, whitespaces) 2508 } else { 2509 skip(parser) 2510 } 2511 } else { 2512 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 2513 return false 2514 } 2515 2516 // Check if it is a first line break. 2517 if !leading_blanks { 2518 whitespaces = whitespaces[:0] 2519 leading_break = read_line(parser, leading_break) 2520 leading_blanks = true 2521 } else { 2522 trailing_breaks = read_line(parser, trailing_breaks) 2523 } 2524 } 2525 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2526 return false 2527 } 2528 } 2529 2530 // Join the whitespaces or fold line breaks. 2531 if leading_blanks { 2532 // Do we need to fold line breaks? 2533 if len(leading_break) > 0 && leading_break[0] == '\n' { 2534 if len(trailing_breaks) == 0 { 2535 s = append(s, ' ') 2536 } else { 2537 s = append(s, trailing_breaks...) 2538 } 2539 } else { 2540 s = append(s, leading_break...) 2541 s = append(s, trailing_breaks...) 2542 } 2543 trailing_breaks = trailing_breaks[:0] 2544 leading_break = leading_break[:0] 2545 } else { 2546 s = append(s, whitespaces...) 2547 whitespaces = whitespaces[:0] 2548 } 2549 } 2550 2551 // Eat the right quote. 2552 skip(parser) 2553 end_mark := parser.mark 2554 2555 // Create a token. 2556 *token = yaml_token_t{ 2557 typ: yaml_SCALAR_TOKEN, 2558 start_mark: start_mark, 2559 end_mark: end_mark, 2560 value: s, 2561 style: yaml_SINGLE_QUOTED_SCALAR_STYLE, 2562 } 2563 if !single { 2564 token.style = yaml_DOUBLE_QUOTED_SCALAR_STYLE 2565 } 2566 return true 2567 } 2568 2569 // Scan a plain scalar. 2570 func yaml_parser_scan_plain_scalar(parser *yaml_parser_t, token *yaml_token_t) bool { 2571 2572 var s, leading_break, trailing_breaks, whitespaces []byte 2573 var leading_blanks bool 2574 var indent = parser.indent + 1 2575 2576 start_mark := parser.mark 2577 end_mark := parser.mark 2578 2579 // Consume the content of the plain scalar. 2580 for { 2581 // Check for a document indicator. 2582 if parser.unread < 4 && !yaml_parser_update_buffer(parser, 4) { 2583 return false 2584 } 2585 if parser.mark.column == 0 && 2586 ((parser.buffer[parser.buffer_pos+0] == '-' && 2587 parser.buffer[parser.buffer_pos+1] == '-' && 2588 parser.buffer[parser.buffer_pos+2] == '-') || 2589 (parser.buffer[parser.buffer_pos+0] == '.' && 2590 parser.buffer[parser.buffer_pos+1] == '.' && 2591 parser.buffer[parser.buffer_pos+2] == '.')) && 2592 is_blankz(parser.buffer, parser.buffer_pos+3) { 2593 break 2594 } 2595 2596 // Check for a comment. 2597 if parser.buffer[parser.buffer_pos] == '#' { 2598 break 2599 } 2600 2601 // Consume non-blank characters. 2602 for !is_blankz(parser.buffer, parser.buffer_pos) { 2603 2604 // Check for indicators that may end a plain scalar. 2605 if (parser.buffer[parser.buffer_pos] == ':' && is_blankz(parser.buffer, parser.buffer_pos+1)) || 2606 (parser.flow_level > 0 && 2607 (parser.buffer[parser.buffer_pos] == ',' || 2608 parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == '[' || 2609 parser.buffer[parser.buffer_pos] == ']' || parser.buffer[parser.buffer_pos] == '{' || 2610 parser.buffer[parser.buffer_pos] == '}')) { 2611 break 2612 } 2613 2614 // Check if we need to join whitespaces and breaks. 2615 if leading_blanks || len(whitespaces) > 0 { 2616 if leading_blanks { 2617 // Do we need to fold line breaks? 2618 if leading_break[0] == '\n' { 2619 if len(trailing_breaks) == 0 { 2620 s = append(s, ' ') 2621 } else { 2622 s = append(s, trailing_breaks...) 2623 } 2624 } else { 2625 s = append(s, leading_break...) 2626 s = append(s, trailing_breaks...) 2627 } 2628 trailing_breaks = trailing_breaks[:0] 2629 leading_break = leading_break[:0] 2630 leading_blanks = false 2631 } else { 2632 s = append(s, whitespaces...) 2633 whitespaces = whitespaces[:0] 2634 } 2635 } 2636 2637 // Copy the character. 2638 s = read(parser, s) 2639 2640 end_mark = parser.mark 2641 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 2642 return false 2643 } 2644 } 2645 2646 // Is it the end? 2647 if !(is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos)) { 2648 break 2649 } 2650 2651 // Consume blank characters. 2652 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2653 return false 2654 } 2655 2656 for is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos) { 2657 if is_blank(parser.buffer, parser.buffer_pos) { 2658 2659 // Check for tab characters that abuse indentation. 2660 if leading_blanks && parser.mark.column < indent && is_tab(parser.buffer, parser.buffer_pos) { 2661 yaml_parser_set_scanner_error(parser, "while scanning a plain scalar", 2662 start_mark, "found a tab character that violates indentation") 2663 return false 2664 } 2665 2666 // Consume a space or a tab character. 2667 if !leading_blanks { 2668 whitespaces = read(parser, whitespaces) 2669 } else { 2670 skip(parser) 2671 } 2672 } else { 2673 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 2674 return false 2675 } 2676 2677 // Check if it is a first line break. 2678 if !leading_blanks { 2679 whitespaces = whitespaces[:0] 2680 leading_break = read_line(parser, leading_break) 2681 leading_blanks = true 2682 } else { 2683 trailing_breaks = read_line(parser, trailing_breaks) 2684 } 2685 } 2686 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2687 return false 2688 } 2689 } 2690 2691 // Check indentation level. 2692 if parser.flow_level == 0 && parser.mark.column < indent { 2693 break 2694 } 2695 } 2696 2697 // Create a token. 2698 *token = yaml_token_t{ 2699 typ: yaml_SCALAR_TOKEN, 2700 start_mark: start_mark, 2701 end_mark: end_mark, 2702 value: s, 2703 style: yaml_PLAIN_SCALAR_STYLE, 2704 } 2705 2706 // Note that we change the 'simple_key_allowed' flag. 2707 if leading_blanks { 2708 parser.simple_key_allowed = true 2709 } 2710 return true 2711 }