scannerc.go (87960B)
1 // 2 // Copyright (c) 2011-2019 Canonical Ltd 3 // Copyright (c) 2006-2010 Kirill Simonov 4 // 5 // Permission is hereby granted, free of charge, to any person obtaining a copy of 6 // this software and associated documentation files (the "Software"), to deal in 7 // the Software without restriction, including without limitation the rights to 8 // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 9 // of the Software, and to permit persons to whom the Software is furnished to do 10 // so, subject to the following conditions: 11 // 12 // The above copyright notice and this permission notice shall be included in all 13 // copies or substantial portions of the Software. 14 // 15 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 // SOFTWARE. 22 23 package yaml 24 25 import ( 26 "bytes" 27 "fmt" 28 ) 29 30 // Introduction 31 // ************ 32 // 33 // The following notes assume that you are familiar with the YAML specification 34 // (http://yaml.org/spec/1.2/spec.html). We mostly follow it, although in 35 // some cases we are less restrictive that it requires. 36 // 37 // The process of transforming a YAML stream into a sequence of events is 38 // divided on two steps: Scanning and Parsing. 39 // 40 // The Scanner transforms the input stream into a sequence of tokens, while the 41 // parser transform the sequence of tokens produced by the Scanner into a 42 // sequence of parsing events. 43 // 44 // The Scanner is rather clever and complicated. The Parser, on the contrary, 45 // is a straightforward implementation of a recursive-descendant parser (or, 46 // LL(1) parser, as it is usually called). 47 // 48 // Actually there are two issues of Scanning that might be called "clever", the 49 // rest is quite straightforward. The issues are "block collection start" and 50 // "simple keys". Both issues are explained below in details. 51 // 52 // Here the Scanning step is explained and implemented. We start with the list 53 // of all the tokens produced by the Scanner together with short descriptions. 54 // 55 // Now, tokens: 56 // 57 // STREAM-START(encoding) # The stream start. 58 // STREAM-END # The stream end. 59 // VERSION-DIRECTIVE(major,minor) # The '%YAML' directive. 60 // TAG-DIRECTIVE(handle,prefix) # The '%TAG' directive. 61 // DOCUMENT-START # '---' 62 // DOCUMENT-END # '...' 63 // BLOCK-SEQUENCE-START # Indentation increase denoting a block 64 // BLOCK-MAPPING-START # sequence or a block mapping. 65 // BLOCK-END # Indentation decrease. 66 // FLOW-SEQUENCE-START # '[' 67 // FLOW-SEQUENCE-END # ']' 68 // BLOCK-SEQUENCE-START # '{' 69 // BLOCK-SEQUENCE-END # '}' 70 // BLOCK-ENTRY # '-' 71 // FLOW-ENTRY # ',' 72 // KEY # '?' or nothing (simple keys). 73 // VALUE # ':' 74 // ALIAS(anchor) # '*anchor' 75 // ANCHOR(anchor) # '&anchor' 76 // TAG(handle,suffix) # '!handle!suffix' 77 // SCALAR(value,style) # A scalar. 78 // 79 // The following two tokens are "virtual" tokens denoting the beginning and the 80 // end of the stream: 81 // 82 // STREAM-START(encoding) 83 // STREAM-END 84 // 85 // We pass the information about the input stream encoding with the 86 // STREAM-START token. 87 // 88 // The next two tokens are responsible for tags: 89 // 90 // VERSION-DIRECTIVE(major,minor) 91 // TAG-DIRECTIVE(handle,prefix) 92 // 93 // Example: 94 // 95 // %YAML 1.1 96 // %TAG ! !foo 97 // %TAG !yaml! tag:yaml.org,2002: 98 // --- 99 // 100 // The correspoding sequence of tokens: 101 // 102 // STREAM-START(utf-8) 103 // VERSION-DIRECTIVE(1,1) 104 // TAG-DIRECTIVE("!","!foo") 105 // TAG-DIRECTIVE("!yaml","tag:yaml.org,2002:") 106 // DOCUMENT-START 107 // STREAM-END 108 // 109 // Note that the VERSION-DIRECTIVE and TAG-DIRECTIVE tokens occupy a whole 110 // line. 111 // 112 // The document start and end indicators are represented by: 113 // 114 // DOCUMENT-START 115 // DOCUMENT-END 116 // 117 // Note that if a YAML stream contains an implicit document (without '---' 118 // and '...' indicators), no DOCUMENT-START and DOCUMENT-END tokens will be 119 // produced. 120 // 121 // In the following examples, we present whole documents together with the 122 // produced tokens. 123 // 124 // 1. An implicit document: 125 // 126 // 'a scalar' 127 // 128 // Tokens: 129 // 130 // STREAM-START(utf-8) 131 // SCALAR("a scalar",single-quoted) 132 // STREAM-END 133 // 134 // 2. An explicit document: 135 // 136 // --- 137 // 'a scalar' 138 // ... 139 // 140 // Tokens: 141 // 142 // STREAM-START(utf-8) 143 // DOCUMENT-START 144 // SCALAR("a scalar",single-quoted) 145 // DOCUMENT-END 146 // STREAM-END 147 // 148 // 3. Several documents in a stream: 149 // 150 // 'a scalar' 151 // --- 152 // 'another scalar' 153 // --- 154 // 'yet another scalar' 155 // 156 // Tokens: 157 // 158 // STREAM-START(utf-8) 159 // SCALAR("a scalar",single-quoted) 160 // DOCUMENT-START 161 // SCALAR("another scalar",single-quoted) 162 // DOCUMENT-START 163 // SCALAR("yet another scalar",single-quoted) 164 // STREAM-END 165 // 166 // We have already introduced the SCALAR token above. The following tokens are 167 // used to describe aliases, anchors, tag, and scalars: 168 // 169 // ALIAS(anchor) 170 // ANCHOR(anchor) 171 // TAG(handle,suffix) 172 // SCALAR(value,style) 173 // 174 // The following series of examples illustrate the usage of these tokens: 175 // 176 // 1. A recursive sequence: 177 // 178 // &A [ *A ] 179 // 180 // Tokens: 181 // 182 // STREAM-START(utf-8) 183 // ANCHOR("A") 184 // FLOW-SEQUENCE-START 185 // ALIAS("A") 186 // FLOW-SEQUENCE-END 187 // STREAM-END 188 // 189 // 2. A tagged scalar: 190 // 191 // !!float "3.14" # A good approximation. 192 // 193 // Tokens: 194 // 195 // STREAM-START(utf-8) 196 // TAG("!!","float") 197 // SCALAR("3.14",double-quoted) 198 // STREAM-END 199 // 200 // 3. Various scalar styles: 201 // 202 // --- # Implicit empty plain scalars do not produce tokens. 203 // --- a plain scalar 204 // --- 'a single-quoted scalar' 205 // --- "a double-quoted scalar" 206 // --- |- 207 // a literal scalar 208 // --- >- 209 // a folded 210 // scalar 211 // 212 // Tokens: 213 // 214 // STREAM-START(utf-8) 215 // DOCUMENT-START 216 // DOCUMENT-START 217 // SCALAR("a plain scalar",plain) 218 // DOCUMENT-START 219 // SCALAR("a single-quoted scalar",single-quoted) 220 // DOCUMENT-START 221 // SCALAR("a double-quoted scalar",double-quoted) 222 // DOCUMENT-START 223 // SCALAR("a literal scalar",literal) 224 // DOCUMENT-START 225 // SCALAR("a folded scalar",folded) 226 // STREAM-END 227 // 228 // Now it's time to review collection-related tokens. We will start with 229 // flow collections: 230 // 231 // FLOW-SEQUENCE-START 232 // FLOW-SEQUENCE-END 233 // FLOW-MAPPING-START 234 // FLOW-MAPPING-END 235 // FLOW-ENTRY 236 // KEY 237 // VALUE 238 // 239 // The tokens FLOW-SEQUENCE-START, FLOW-SEQUENCE-END, FLOW-MAPPING-START, and 240 // FLOW-MAPPING-END represent the indicators '[', ']', '{', and '}' 241 // correspondingly. FLOW-ENTRY represent the ',' indicator. Finally the 242 // indicators '?' and ':', which are used for denoting mapping keys and values, 243 // are represented by the KEY and VALUE tokens. 244 // 245 // The following examples show flow collections: 246 // 247 // 1. A flow sequence: 248 // 249 // [item 1, item 2, item 3] 250 // 251 // Tokens: 252 // 253 // STREAM-START(utf-8) 254 // FLOW-SEQUENCE-START 255 // SCALAR("item 1",plain) 256 // FLOW-ENTRY 257 // SCALAR("item 2",plain) 258 // FLOW-ENTRY 259 // SCALAR("item 3",plain) 260 // FLOW-SEQUENCE-END 261 // STREAM-END 262 // 263 // 2. A flow mapping: 264 // 265 // { 266 // a simple key: a value, # Note that the KEY token is produced. 267 // ? a complex key: another value, 268 // } 269 // 270 // Tokens: 271 // 272 // STREAM-START(utf-8) 273 // FLOW-MAPPING-START 274 // KEY 275 // SCALAR("a simple key",plain) 276 // VALUE 277 // SCALAR("a value",plain) 278 // FLOW-ENTRY 279 // KEY 280 // SCALAR("a complex key",plain) 281 // VALUE 282 // SCALAR("another value",plain) 283 // FLOW-ENTRY 284 // FLOW-MAPPING-END 285 // STREAM-END 286 // 287 // A simple key is a key which is not denoted by the '?' indicator. Note that 288 // the Scanner still produce the KEY token whenever it encounters a simple key. 289 // 290 // For scanning block collections, the following tokens are used (note that we 291 // repeat KEY and VALUE here): 292 // 293 // BLOCK-SEQUENCE-START 294 // BLOCK-MAPPING-START 295 // BLOCK-END 296 // BLOCK-ENTRY 297 // KEY 298 // VALUE 299 // 300 // The tokens BLOCK-SEQUENCE-START and BLOCK-MAPPING-START denote indentation 301 // increase that precedes a block collection (cf. the INDENT token in Python). 302 // The token BLOCK-END denote indentation decrease that ends a block collection 303 // (cf. the DEDENT token in Python). However YAML has some syntax pecularities 304 // that makes detections of these tokens more complex. 305 // 306 // The tokens BLOCK-ENTRY, KEY, and VALUE are used to represent the indicators 307 // '-', '?', and ':' correspondingly. 308 // 309 // The following examples show how the tokens BLOCK-SEQUENCE-START, 310 // BLOCK-MAPPING-START, and BLOCK-END are emitted by the Scanner: 311 // 312 // 1. Block sequences: 313 // 314 // - item 1 315 // - item 2 316 // - 317 // - item 3.1 318 // - item 3.2 319 // - 320 // key 1: value 1 321 // key 2: value 2 322 // 323 // Tokens: 324 // 325 // STREAM-START(utf-8) 326 // BLOCK-SEQUENCE-START 327 // BLOCK-ENTRY 328 // SCALAR("item 1",plain) 329 // BLOCK-ENTRY 330 // SCALAR("item 2",plain) 331 // BLOCK-ENTRY 332 // BLOCK-SEQUENCE-START 333 // BLOCK-ENTRY 334 // SCALAR("item 3.1",plain) 335 // BLOCK-ENTRY 336 // SCALAR("item 3.2",plain) 337 // BLOCK-END 338 // BLOCK-ENTRY 339 // BLOCK-MAPPING-START 340 // KEY 341 // SCALAR("key 1",plain) 342 // VALUE 343 // SCALAR("value 1",plain) 344 // KEY 345 // SCALAR("key 2",plain) 346 // VALUE 347 // SCALAR("value 2",plain) 348 // BLOCK-END 349 // BLOCK-END 350 // STREAM-END 351 // 352 // 2. Block mappings: 353 // 354 // a simple key: a value # The KEY token is produced here. 355 // ? a complex key 356 // : another value 357 // a mapping: 358 // key 1: value 1 359 // key 2: value 2 360 // a sequence: 361 // - item 1 362 // - item 2 363 // 364 // Tokens: 365 // 366 // STREAM-START(utf-8) 367 // BLOCK-MAPPING-START 368 // KEY 369 // SCALAR("a simple key",plain) 370 // VALUE 371 // SCALAR("a value",plain) 372 // KEY 373 // SCALAR("a complex key",plain) 374 // VALUE 375 // SCALAR("another value",plain) 376 // KEY 377 // SCALAR("a mapping",plain) 378 // BLOCK-MAPPING-START 379 // KEY 380 // SCALAR("key 1",plain) 381 // VALUE 382 // SCALAR("value 1",plain) 383 // KEY 384 // SCALAR("key 2",plain) 385 // VALUE 386 // SCALAR("value 2",plain) 387 // BLOCK-END 388 // KEY 389 // SCALAR("a sequence",plain) 390 // VALUE 391 // BLOCK-SEQUENCE-START 392 // BLOCK-ENTRY 393 // SCALAR("item 1",plain) 394 // BLOCK-ENTRY 395 // SCALAR("item 2",plain) 396 // BLOCK-END 397 // BLOCK-END 398 // STREAM-END 399 // 400 // YAML does not always require to start a new block collection from a new 401 // line. If the current line contains only '-', '?', and ':' indicators, a new 402 // block collection may start at the current line. The following examples 403 // illustrate this case: 404 // 405 // 1. Collections in a sequence: 406 // 407 // - - item 1 408 // - item 2 409 // - key 1: value 1 410 // key 2: value 2 411 // - ? complex key 412 // : complex value 413 // 414 // Tokens: 415 // 416 // STREAM-START(utf-8) 417 // BLOCK-SEQUENCE-START 418 // BLOCK-ENTRY 419 // BLOCK-SEQUENCE-START 420 // BLOCK-ENTRY 421 // SCALAR("item 1",plain) 422 // BLOCK-ENTRY 423 // SCALAR("item 2",plain) 424 // BLOCK-END 425 // BLOCK-ENTRY 426 // BLOCK-MAPPING-START 427 // KEY 428 // SCALAR("key 1",plain) 429 // VALUE 430 // SCALAR("value 1",plain) 431 // KEY 432 // SCALAR("key 2",plain) 433 // VALUE 434 // SCALAR("value 2",plain) 435 // BLOCK-END 436 // BLOCK-ENTRY 437 // BLOCK-MAPPING-START 438 // KEY 439 // SCALAR("complex key") 440 // VALUE 441 // SCALAR("complex value") 442 // BLOCK-END 443 // BLOCK-END 444 // STREAM-END 445 // 446 // 2. Collections in a mapping: 447 // 448 // ? a sequence 449 // : - item 1 450 // - item 2 451 // ? a mapping 452 // : key 1: value 1 453 // key 2: value 2 454 // 455 // Tokens: 456 // 457 // STREAM-START(utf-8) 458 // BLOCK-MAPPING-START 459 // KEY 460 // SCALAR("a sequence",plain) 461 // VALUE 462 // BLOCK-SEQUENCE-START 463 // BLOCK-ENTRY 464 // SCALAR("item 1",plain) 465 // BLOCK-ENTRY 466 // SCALAR("item 2",plain) 467 // BLOCK-END 468 // KEY 469 // SCALAR("a mapping",plain) 470 // VALUE 471 // BLOCK-MAPPING-START 472 // KEY 473 // SCALAR("key 1",plain) 474 // VALUE 475 // SCALAR("value 1",plain) 476 // KEY 477 // SCALAR("key 2",plain) 478 // VALUE 479 // SCALAR("value 2",plain) 480 // BLOCK-END 481 // BLOCK-END 482 // STREAM-END 483 // 484 // YAML also permits non-indented sequences if they are included into a block 485 // mapping. In this case, the token BLOCK-SEQUENCE-START is not produced: 486 // 487 // key: 488 // - item 1 # BLOCK-SEQUENCE-START is NOT produced here. 489 // - item 2 490 // 491 // Tokens: 492 // 493 // STREAM-START(utf-8) 494 // BLOCK-MAPPING-START 495 // KEY 496 // SCALAR("key",plain) 497 // VALUE 498 // BLOCK-ENTRY 499 // SCALAR("item 1",plain) 500 // BLOCK-ENTRY 501 // SCALAR("item 2",plain) 502 // BLOCK-END 503 // 504 505 // Ensure that the buffer contains the required number of characters. 506 // Return true on success, false on failure (reader error or memory error). 507 func cache(parser *yaml_parser_t, length int) bool { 508 // [Go] This was inlined: !cache(A, B) -> unread < B && !update(A, B) 509 return parser.unread >= length || yaml_parser_update_buffer(parser, length) 510 } 511 512 // Advance the buffer pointer. 513 func skip(parser *yaml_parser_t) { 514 if !is_blank(parser.buffer, parser.buffer_pos) { 515 parser.newlines = 0 516 } 517 parser.mark.index++ 518 parser.mark.column++ 519 parser.unread-- 520 parser.buffer_pos += width(parser.buffer[parser.buffer_pos]) 521 } 522 523 func skip_line(parser *yaml_parser_t) { 524 if is_crlf(parser.buffer, parser.buffer_pos) { 525 parser.mark.index += 2 526 parser.mark.column = 0 527 parser.mark.line++ 528 parser.unread -= 2 529 parser.buffer_pos += 2 530 parser.newlines++ 531 } else if is_break(parser.buffer, parser.buffer_pos) { 532 parser.mark.index++ 533 parser.mark.column = 0 534 parser.mark.line++ 535 parser.unread-- 536 parser.buffer_pos += width(parser.buffer[parser.buffer_pos]) 537 parser.newlines++ 538 } 539 } 540 541 // Copy a character to a string buffer and advance pointers. 542 func read(parser *yaml_parser_t, s []byte) []byte { 543 if !is_blank(parser.buffer, parser.buffer_pos) { 544 parser.newlines = 0 545 } 546 w := width(parser.buffer[parser.buffer_pos]) 547 if w == 0 { 548 panic("invalid character sequence") 549 } 550 if len(s) == 0 { 551 s = make([]byte, 0, 32) 552 } 553 if w == 1 && len(s)+w <= cap(s) { 554 s = s[:len(s)+1] 555 s[len(s)-1] = parser.buffer[parser.buffer_pos] 556 parser.buffer_pos++ 557 } else { 558 s = append(s, parser.buffer[parser.buffer_pos:parser.buffer_pos+w]...) 559 parser.buffer_pos += w 560 } 561 parser.mark.index++ 562 parser.mark.column++ 563 parser.unread-- 564 return s 565 } 566 567 // Copy a line break character to a string buffer and advance pointers. 568 func read_line(parser *yaml_parser_t, s []byte) []byte { 569 buf := parser.buffer 570 pos := parser.buffer_pos 571 switch { 572 case buf[pos] == '\r' && buf[pos+1] == '\n': 573 // CR LF . LF 574 s = append(s, '\n') 575 parser.buffer_pos += 2 576 parser.mark.index++ 577 parser.unread-- 578 case buf[pos] == '\r' || buf[pos] == '\n': 579 // CR|LF . LF 580 s = append(s, '\n') 581 parser.buffer_pos += 1 582 case buf[pos] == '\xC2' && buf[pos+1] == '\x85': 583 // NEL . LF 584 s = append(s, '\n') 585 parser.buffer_pos += 2 586 case buf[pos] == '\xE2' && buf[pos+1] == '\x80' && (buf[pos+2] == '\xA8' || buf[pos+2] == '\xA9'): 587 // LS|PS . LS|PS 588 s = append(s, buf[parser.buffer_pos:pos+3]...) 589 parser.buffer_pos += 3 590 default: 591 return s 592 } 593 parser.mark.index++ 594 parser.mark.column = 0 595 parser.mark.line++ 596 parser.unread-- 597 parser.newlines++ 598 return s 599 } 600 601 // Get the next token. 602 func yaml_parser_scan(parser *yaml_parser_t, token *yaml_token_t) bool { 603 // Erase the token object. 604 *token = yaml_token_t{} // [Go] Is this necessary? 605 606 // No tokens after STREAM-END or error. 607 if parser.stream_end_produced || parser.error != yaml_NO_ERROR { 608 return true 609 } 610 611 // Ensure that the tokens queue contains enough tokens. 612 if !parser.token_available { 613 if !yaml_parser_fetch_more_tokens(parser) { 614 return false 615 } 616 } 617 618 // Fetch the next token from the queue. 619 *token = parser.tokens[parser.tokens_head] 620 parser.tokens_head++ 621 parser.tokens_parsed++ 622 parser.token_available = false 623 624 if token.typ == yaml_STREAM_END_TOKEN { 625 parser.stream_end_produced = true 626 } 627 return true 628 } 629 630 // Set the scanner error and return false. 631 func yaml_parser_set_scanner_error(parser *yaml_parser_t, context string, context_mark yaml_mark_t, problem string) bool { 632 parser.error = yaml_SCANNER_ERROR 633 parser.context = context 634 parser.context_mark = context_mark 635 parser.problem = problem 636 parser.problem_mark = parser.mark 637 return false 638 } 639 640 func yaml_parser_set_scanner_tag_error(parser *yaml_parser_t, directive bool, context_mark yaml_mark_t, problem string) bool { 641 context := "while parsing a tag" 642 if directive { 643 context = "while parsing a %TAG directive" 644 } 645 return yaml_parser_set_scanner_error(parser, context, context_mark, problem) 646 } 647 648 func trace(args ...interface{}) func() { 649 pargs := append([]interface{}{"+++"}, args...) 650 fmt.Println(pargs...) 651 pargs = append([]interface{}{"---"}, args...) 652 return func() { fmt.Println(pargs...) } 653 } 654 655 // Ensure that the tokens queue contains at least one token which can be 656 // returned to the Parser. 657 func yaml_parser_fetch_more_tokens(parser *yaml_parser_t) bool { 658 // While we need more tokens to fetch, do it. 659 for { 660 // [Go] The comment parsing logic requires a lookahead of two tokens 661 // so that foot comments may be parsed in time of associating them 662 // with the tokens that are parsed before them, and also for line 663 // comments to be transformed into head comments in some edge cases. 664 if parser.tokens_head < len(parser.tokens)-2 { 665 // If a potential simple key is at the head position, we need to fetch 666 // the next token to disambiguate it. 667 head_tok_idx, ok := parser.simple_keys_by_tok[parser.tokens_parsed] 668 if !ok { 669 break 670 } else if valid, ok := yaml_simple_key_is_valid(parser, &parser.simple_keys[head_tok_idx]); !ok { 671 return false 672 } else if !valid { 673 break 674 } 675 } 676 // Fetch the next token. 677 if !yaml_parser_fetch_next_token(parser) { 678 return false 679 } 680 } 681 682 parser.token_available = true 683 return true 684 } 685 686 // The dispatcher for token fetchers. 687 func yaml_parser_fetch_next_token(parser *yaml_parser_t) (ok bool) { 688 // Ensure that the buffer is initialized. 689 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 690 return false 691 } 692 693 // Check if we just started scanning. Fetch STREAM-START then. 694 if !parser.stream_start_produced { 695 return yaml_parser_fetch_stream_start(parser) 696 } 697 698 scan_mark := parser.mark 699 700 // Eat whitespaces and comments until we reach the next token. 701 if !yaml_parser_scan_to_next_token(parser) { 702 return false 703 } 704 705 // [Go] While unrolling indents, transform the head comments of prior 706 // indentation levels observed after scan_start into foot comments at 707 // the respective indexes. 708 709 // Check the indentation level against the current column. 710 if !yaml_parser_unroll_indent(parser, parser.mark.column, scan_mark) { 711 return false 712 } 713 714 // Ensure that the buffer contains at least 4 characters. 4 is the length 715 // of the longest indicators ('--- ' and '... '). 716 if parser.unread < 4 && !yaml_parser_update_buffer(parser, 4) { 717 return false 718 } 719 720 // Is it the end of the stream? 721 if is_z(parser.buffer, parser.buffer_pos) { 722 return yaml_parser_fetch_stream_end(parser) 723 } 724 725 // Is it a directive? 726 if parser.mark.column == 0 && parser.buffer[parser.buffer_pos] == '%' { 727 return yaml_parser_fetch_directive(parser) 728 } 729 730 buf := parser.buffer 731 pos := parser.buffer_pos 732 733 // Is it the document start indicator? 734 if parser.mark.column == 0 && buf[pos] == '-' && buf[pos+1] == '-' && buf[pos+2] == '-' && is_blankz(buf, pos+3) { 735 return yaml_parser_fetch_document_indicator(parser, yaml_DOCUMENT_START_TOKEN) 736 } 737 738 // Is it the document end indicator? 739 if parser.mark.column == 0 && buf[pos] == '.' && buf[pos+1] == '.' && buf[pos+2] == '.' && is_blankz(buf, pos+3) { 740 return yaml_parser_fetch_document_indicator(parser, yaml_DOCUMENT_END_TOKEN) 741 } 742 743 comment_mark := parser.mark 744 if len(parser.tokens) > 0 && (parser.flow_level == 0 && buf[pos] == ':' || parser.flow_level > 0 && buf[pos] == ',') { 745 // Associate any following comments with the prior token. 746 comment_mark = parser.tokens[len(parser.tokens)-1].start_mark 747 } 748 defer func() { 749 if !ok { 750 return 751 } 752 if len(parser.tokens) > 0 && parser.tokens[len(parser.tokens)-1].typ == yaml_BLOCK_ENTRY_TOKEN { 753 // Sequence indicators alone have no line comments. It becomes 754 // a head comment for whatever follows. 755 return 756 } 757 if !yaml_parser_scan_line_comment(parser, comment_mark) { 758 ok = false 759 return 760 } 761 }() 762 763 // Is it the flow sequence start indicator? 764 if buf[pos] == '[' { 765 return yaml_parser_fetch_flow_collection_start(parser, yaml_FLOW_SEQUENCE_START_TOKEN) 766 } 767 768 // Is it the flow mapping start indicator? 769 if parser.buffer[parser.buffer_pos] == '{' { 770 return yaml_parser_fetch_flow_collection_start(parser, yaml_FLOW_MAPPING_START_TOKEN) 771 } 772 773 // Is it the flow sequence end indicator? 774 if parser.buffer[parser.buffer_pos] == ']' { 775 return yaml_parser_fetch_flow_collection_end(parser, 776 yaml_FLOW_SEQUENCE_END_TOKEN) 777 } 778 779 // Is it the flow mapping end indicator? 780 if parser.buffer[parser.buffer_pos] == '}' { 781 return yaml_parser_fetch_flow_collection_end(parser, 782 yaml_FLOW_MAPPING_END_TOKEN) 783 } 784 785 // Is it the flow entry indicator? 786 if parser.buffer[parser.buffer_pos] == ',' { 787 return yaml_parser_fetch_flow_entry(parser) 788 } 789 790 // Is it the block entry indicator? 791 if parser.buffer[parser.buffer_pos] == '-' && is_blankz(parser.buffer, parser.buffer_pos+1) { 792 return yaml_parser_fetch_block_entry(parser) 793 } 794 795 // Is it the key indicator? 796 if parser.buffer[parser.buffer_pos] == '?' && (parser.flow_level > 0 || is_blankz(parser.buffer, parser.buffer_pos+1)) { 797 return yaml_parser_fetch_key(parser) 798 } 799 800 // Is it the value indicator? 801 if parser.buffer[parser.buffer_pos] == ':' && (parser.flow_level > 0 || is_blankz(parser.buffer, parser.buffer_pos+1)) { 802 return yaml_parser_fetch_value(parser) 803 } 804 805 // Is it an alias? 806 if parser.buffer[parser.buffer_pos] == '*' { 807 return yaml_parser_fetch_anchor(parser, yaml_ALIAS_TOKEN) 808 } 809 810 // Is it an anchor? 811 if parser.buffer[parser.buffer_pos] == '&' { 812 return yaml_parser_fetch_anchor(parser, yaml_ANCHOR_TOKEN) 813 } 814 815 // Is it a tag? 816 if parser.buffer[parser.buffer_pos] == '!' { 817 return yaml_parser_fetch_tag(parser) 818 } 819 820 // Is it a literal scalar? 821 if parser.buffer[parser.buffer_pos] == '|' && parser.flow_level == 0 { 822 return yaml_parser_fetch_block_scalar(parser, true) 823 } 824 825 // Is it a folded scalar? 826 if parser.buffer[parser.buffer_pos] == '>' && parser.flow_level == 0 { 827 return yaml_parser_fetch_block_scalar(parser, false) 828 } 829 830 // Is it a single-quoted scalar? 831 if parser.buffer[parser.buffer_pos] == '\'' { 832 return yaml_parser_fetch_flow_scalar(parser, true) 833 } 834 835 // Is it a double-quoted scalar? 836 if parser.buffer[parser.buffer_pos] == '"' { 837 return yaml_parser_fetch_flow_scalar(parser, false) 838 } 839 840 // Is it a plain scalar? 841 // 842 // A plain scalar may start with any non-blank characters except 843 // 844 // '-', '?', ':', ',', '[', ']', '{', '}', 845 // '#', '&', '*', '!', '|', '>', '\'', '\"', 846 // '%', '@', '`'. 847 // 848 // In the block context (and, for the '-' indicator, in the flow context 849 // too), it may also start with the characters 850 // 851 // '-', '?', ':' 852 // 853 // if it is followed by a non-space character. 854 // 855 // The last rule is more restrictive than the specification requires. 856 // [Go] TODO Make this logic more reasonable. 857 //switch parser.buffer[parser.buffer_pos] { 858 //case '-', '?', ':', ',', '?', '-', ',', ':', ']', '[', '}', '{', '&', '#', '!', '*', '>', '|', '"', '\'', '@', '%', '-', '`': 859 //} 860 if !(is_blankz(parser.buffer, parser.buffer_pos) || parser.buffer[parser.buffer_pos] == '-' || 861 parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == ':' || 862 parser.buffer[parser.buffer_pos] == ',' || parser.buffer[parser.buffer_pos] == '[' || 863 parser.buffer[parser.buffer_pos] == ']' || parser.buffer[parser.buffer_pos] == '{' || 864 parser.buffer[parser.buffer_pos] == '}' || parser.buffer[parser.buffer_pos] == '#' || 865 parser.buffer[parser.buffer_pos] == '&' || parser.buffer[parser.buffer_pos] == '*' || 866 parser.buffer[parser.buffer_pos] == '!' || parser.buffer[parser.buffer_pos] == '|' || 867 parser.buffer[parser.buffer_pos] == '>' || parser.buffer[parser.buffer_pos] == '\'' || 868 parser.buffer[parser.buffer_pos] == '"' || parser.buffer[parser.buffer_pos] == '%' || 869 parser.buffer[parser.buffer_pos] == '@' || parser.buffer[parser.buffer_pos] == '`') || 870 (parser.buffer[parser.buffer_pos] == '-' && !is_blank(parser.buffer, parser.buffer_pos+1)) || 871 (parser.flow_level == 0 && 872 (parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == ':') && 873 !is_blankz(parser.buffer, parser.buffer_pos+1)) { 874 return yaml_parser_fetch_plain_scalar(parser) 875 } 876 877 // If we don't determine the token type so far, it is an error. 878 return yaml_parser_set_scanner_error(parser, 879 "while scanning for the next token", parser.mark, 880 "found character that cannot start any token") 881 } 882 883 func yaml_simple_key_is_valid(parser *yaml_parser_t, simple_key *yaml_simple_key_t) (valid, ok bool) { 884 if !simple_key.possible { 885 return false, true 886 } 887 888 // The 1.2 specification says: 889 // 890 // "If the ? indicator is omitted, parsing needs to see past the 891 // implicit key to recognize it as such. To limit the amount of 892 // lookahead required, the “:” indicator must appear at most 1024 893 // Unicode characters beyond the start of the key. In addition, the key 894 // is restricted to a single line." 895 // 896 if simple_key.mark.line < parser.mark.line || simple_key.mark.index+1024 < parser.mark.index { 897 // Check if the potential simple key to be removed is required. 898 if simple_key.required { 899 return false, yaml_parser_set_scanner_error(parser, 900 "while scanning a simple key", simple_key.mark, 901 "could not find expected ':'") 902 } 903 simple_key.possible = false 904 return false, true 905 } 906 return true, true 907 } 908 909 // Check if a simple key may start at the current position and add it if 910 // needed. 911 func yaml_parser_save_simple_key(parser *yaml_parser_t) bool { 912 // A simple key is required at the current position if the scanner is in 913 // the block context and the current column coincides with the indentation 914 // level. 915 916 required := parser.flow_level == 0 && parser.indent == parser.mark.column 917 918 // 919 // If the current position may start a simple key, save it. 920 // 921 if parser.simple_key_allowed { 922 simple_key := yaml_simple_key_t{ 923 possible: true, 924 required: required, 925 token_number: parser.tokens_parsed + (len(parser.tokens) - parser.tokens_head), 926 mark: parser.mark, 927 } 928 929 if !yaml_parser_remove_simple_key(parser) { 930 return false 931 } 932 parser.simple_keys[len(parser.simple_keys)-1] = simple_key 933 parser.simple_keys_by_tok[simple_key.token_number] = len(parser.simple_keys) - 1 934 } 935 return true 936 } 937 938 // Remove a potential simple key at the current flow level. 939 func yaml_parser_remove_simple_key(parser *yaml_parser_t) bool { 940 i := len(parser.simple_keys) - 1 941 if parser.simple_keys[i].possible { 942 // If the key is required, it is an error. 943 if parser.simple_keys[i].required { 944 return yaml_parser_set_scanner_error(parser, 945 "while scanning a simple key", parser.simple_keys[i].mark, 946 "could not find expected ':'") 947 } 948 // Remove the key from the stack. 949 parser.simple_keys[i].possible = false 950 delete(parser.simple_keys_by_tok, parser.simple_keys[i].token_number) 951 } 952 return true 953 } 954 955 // max_flow_level limits the flow_level 956 const max_flow_level = 10000 957 958 // Increase the flow level and resize the simple key list if needed. 959 func yaml_parser_increase_flow_level(parser *yaml_parser_t) bool { 960 // Reset the simple key on the next level. 961 parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{ 962 possible: false, 963 required: false, 964 token_number: parser.tokens_parsed + (len(parser.tokens) - parser.tokens_head), 965 mark: parser.mark, 966 }) 967 968 // Increase the flow level. 969 parser.flow_level++ 970 if parser.flow_level > max_flow_level { 971 return yaml_parser_set_scanner_error(parser, 972 "while increasing flow level", parser.simple_keys[len(parser.simple_keys)-1].mark, 973 fmt.Sprintf("exceeded max depth of %d", max_flow_level)) 974 } 975 return true 976 } 977 978 // Decrease the flow level. 979 func yaml_parser_decrease_flow_level(parser *yaml_parser_t) bool { 980 if parser.flow_level > 0 { 981 parser.flow_level-- 982 last := len(parser.simple_keys) - 1 983 delete(parser.simple_keys_by_tok, parser.simple_keys[last].token_number) 984 parser.simple_keys = parser.simple_keys[:last] 985 } 986 return true 987 } 988 989 // max_indents limits the indents stack size 990 const max_indents = 10000 991 992 // Push the current indentation level to the stack and set the new level 993 // the current column is greater than the indentation level. In this case, 994 // append or insert the specified token into the token queue. 995 func yaml_parser_roll_indent(parser *yaml_parser_t, column, number int, typ yaml_token_type_t, mark yaml_mark_t) bool { 996 // In the flow context, do nothing. 997 if parser.flow_level > 0 { 998 return true 999 } 1000 1001 if parser.indent < column { 1002 // Push the current indentation level to the stack and set the new 1003 // indentation level. 1004 parser.indents = append(parser.indents, parser.indent) 1005 parser.indent = column 1006 if len(parser.indents) > max_indents { 1007 return yaml_parser_set_scanner_error(parser, 1008 "while increasing indent level", parser.simple_keys[len(parser.simple_keys)-1].mark, 1009 fmt.Sprintf("exceeded max depth of %d", max_indents)) 1010 } 1011 1012 // Create a token and insert it into the queue. 1013 token := yaml_token_t{ 1014 typ: typ, 1015 start_mark: mark, 1016 end_mark: mark, 1017 } 1018 if number > -1 { 1019 number -= parser.tokens_parsed 1020 } 1021 yaml_insert_token(parser, number, &token) 1022 } 1023 return true 1024 } 1025 1026 // Pop indentation levels from the indents stack until the current level 1027 // becomes less or equal to the column. For each indentation level, append 1028 // the BLOCK-END token. 1029 func yaml_parser_unroll_indent(parser *yaml_parser_t, column int, scan_mark yaml_mark_t) bool { 1030 // In the flow context, do nothing. 1031 if parser.flow_level > 0 { 1032 return true 1033 } 1034 1035 block_mark := scan_mark 1036 block_mark.index-- 1037 1038 // Loop through the indentation levels in the stack. 1039 for parser.indent > column { 1040 1041 // [Go] Reposition the end token before potential following 1042 // foot comments of parent blocks. For that, search 1043 // backwards for recent comments that were at the same 1044 // indent as the block that is ending now. 1045 stop_index := block_mark.index 1046 for i := len(parser.comments) - 1; i >= 0; i-- { 1047 comment := &parser.comments[i] 1048 1049 if comment.end_mark.index < stop_index { 1050 // Don't go back beyond the start of the comment/whitespace scan, unless column < 0. 1051 // If requested indent column is < 0, then the document is over and everything else 1052 // is a foot anyway. 1053 break 1054 } 1055 if comment.start_mark.column == parser.indent+1 { 1056 // This is a good match. But maybe there's a former comment 1057 // at that same indent level, so keep searching. 1058 block_mark = comment.start_mark 1059 } 1060 1061 // While the end of the former comment matches with 1062 // the start of the following one, we know there's 1063 // nothing in between and scanning is still safe. 1064 stop_index = comment.scan_mark.index 1065 } 1066 1067 // Create a token and append it to the queue. 1068 token := yaml_token_t{ 1069 typ: yaml_BLOCK_END_TOKEN, 1070 start_mark: block_mark, 1071 end_mark: block_mark, 1072 } 1073 yaml_insert_token(parser, -1, &token) 1074 1075 // Pop the indentation level. 1076 parser.indent = parser.indents[len(parser.indents)-1] 1077 parser.indents = parser.indents[:len(parser.indents)-1] 1078 } 1079 return true 1080 } 1081 1082 // Initialize the scanner and produce the STREAM-START token. 1083 func yaml_parser_fetch_stream_start(parser *yaml_parser_t) bool { 1084 1085 // Set the initial indentation. 1086 parser.indent = -1 1087 1088 // Initialize the simple key stack. 1089 parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{}) 1090 1091 parser.simple_keys_by_tok = make(map[int]int) 1092 1093 // A simple key is allowed at the beginning of the stream. 1094 parser.simple_key_allowed = true 1095 1096 // We have started. 1097 parser.stream_start_produced = true 1098 1099 // Create the STREAM-START token and append it to the queue. 1100 token := yaml_token_t{ 1101 typ: yaml_STREAM_START_TOKEN, 1102 start_mark: parser.mark, 1103 end_mark: parser.mark, 1104 encoding: parser.encoding, 1105 } 1106 yaml_insert_token(parser, -1, &token) 1107 return true 1108 } 1109 1110 // Produce the STREAM-END token and shut down the scanner. 1111 func yaml_parser_fetch_stream_end(parser *yaml_parser_t) bool { 1112 1113 // Force new line. 1114 if parser.mark.column != 0 { 1115 parser.mark.column = 0 1116 parser.mark.line++ 1117 } 1118 1119 // Reset the indentation level. 1120 if !yaml_parser_unroll_indent(parser, -1, parser.mark) { 1121 return false 1122 } 1123 1124 // Reset simple keys. 1125 if !yaml_parser_remove_simple_key(parser) { 1126 return false 1127 } 1128 1129 parser.simple_key_allowed = false 1130 1131 // Create the STREAM-END token and append it to the queue. 1132 token := yaml_token_t{ 1133 typ: yaml_STREAM_END_TOKEN, 1134 start_mark: parser.mark, 1135 end_mark: parser.mark, 1136 } 1137 yaml_insert_token(parser, -1, &token) 1138 return true 1139 } 1140 1141 // Produce a VERSION-DIRECTIVE or TAG-DIRECTIVE token. 1142 func yaml_parser_fetch_directive(parser *yaml_parser_t) bool { 1143 // Reset the indentation level. 1144 if !yaml_parser_unroll_indent(parser, -1, parser.mark) { 1145 return false 1146 } 1147 1148 // Reset simple keys. 1149 if !yaml_parser_remove_simple_key(parser) { 1150 return false 1151 } 1152 1153 parser.simple_key_allowed = false 1154 1155 // Create the YAML-DIRECTIVE or TAG-DIRECTIVE token. 1156 token := yaml_token_t{} 1157 if !yaml_parser_scan_directive(parser, &token) { 1158 return false 1159 } 1160 // Append the token to the queue. 1161 yaml_insert_token(parser, -1, &token) 1162 return true 1163 } 1164 1165 // Produce the DOCUMENT-START or DOCUMENT-END token. 1166 func yaml_parser_fetch_document_indicator(parser *yaml_parser_t, typ yaml_token_type_t) bool { 1167 // Reset the indentation level. 1168 if !yaml_parser_unroll_indent(parser, -1, parser.mark) { 1169 return false 1170 } 1171 1172 // Reset simple keys. 1173 if !yaml_parser_remove_simple_key(parser) { 1174 return false 1175 } 1176 1177 parser.simple_key_allowed = false 1178 1179 // Consume the token. 1180 start_mark := parser.mark 1181 1182 skip(parser) 1183 skip(parser) 1184 skip(parser) 1185 1186 end_mark := parser.mark 1187 1188 // Create the DOCUMENT-START or DOCUMENT-END token. 1189 token := yaml_token_t{ 1190 typ: typ, 1191 start_mark: start_mark, 1192 end_mark: end_mark, 1193 } 1194 // Append the token to the queue. 1195 yaml_insert_token(parser, -1, &token) 1196 return true 1197 } 1198 1199 // Produce the FLOW-SEQUENCE-START or FLOW-MAPPING-START token. 1200 func yaml_parser_fetch_flow_collection_start(parser *yaml_parser_t, typ yaml_token_type_t) bool { 1201 1202 // The indicators '[' and '{' may start a simple key. 1203 if !yaml_parser_save_simple_key(parser) { 1204 return false 1205 } 1206 1207 // Increase the flow level. 1208 if !yaml_parser_increase_flow_level(parser) { 1209 return false 1210 } 1211 1212 // A simple key may follow the indicators '[' and '{'. 1213 parser.simple_key_allowed = true 1214 1215 // Consume the token. 1216 start_mark := parser.mark 1217 skip(parser) 1218 end_mark := parser.mark 1219 1220 // Create the FLOW-SEQUENCE-START of FLOW-MAPPING-START token. 1221 token := yaml_token_t{ 1222 typ: typ, 1223 start_mark: start_mark, 1224 end_mark: end_mark, 1225 } 1226 // Append the token to the queue. 1227 yaml_insert_token(parser, -1, &token) 1228 return true 1229 } 1230 1231 // Produce the FLOW-SEQUENCE-END or FLOW-MAPPING-END token. 1232 func yaml_parser_fetch_flow_collection_end(parser *yaml_parser_t, typ yaml_token_type_t) bool { 1233 // Reset any potential simple key on the current flow level. 1234 if !yaml_parser_remove_simple_key(parser) { 1235 return false 1236 } 1237 1238 // Decrease the flow level. 1239 if !yaml_parser_decrease_flow_level(parser) { 1240 return false 1241 } 1242 1243 // No simple keys after the indicators ']' and '}'. 1244 parser.simple_key_allowed = false 1245 1246 // Consume the token. 1247 1248 start_mark := parser.mark 1249 skip(parser) 1250 end_mark := parser.mark 1251 1252 // Create the FLOW-SEQUENCE-END of FLOW-MAPPING-END token. 1253 token := yaml_token_t{ 1254 typ: typ, 1255 start_mark: start_mark, 1256 end_mark: end_mark, 1257 } 1258 // Append the token to the queue. 1259 yaml_insert_token(parser, -1, &token) 1260 return true 1261 } 1262 1263 // Produce the FLOW-ENTRY token. 1264 func yaml_parser_fetch_flow_entry(parser *yaml_parser_t) bool { 1265 // Reset any potential simple keys on the current flow level. 1266 if !yaml_parser_remove_simple_key(parser) { 1267 return false 1268 } 1269 1270 // Simple keys are allowed after ','. 1271 parser.simple_key_allowed = true 1272 1273 // Consume the token. 1274 start_mark := parser.mark 1275 skip(parser) 1276 end_mark := parser.mark 1277 1278 // Create the FLOW-ENTRY token and append it to the queue. 1279 token := yaml_token_t{ 1280 typ: yaml_FLOW_ENTRY_TOKEN, 1281 start_mark: start_mark, 1282 end_mark: end_mark, 1283 } 1284 yaml_insert_token(parser, -1, &token) 1285 return true 1286 } 1287 1288 // Produce the BLOCK-ENTRY token. 1289 func yaml_parser_fetch_block_entry(parser *yaml_parser_t) bool { 1290 // Check if the scanner is in the block context. 1291 if parser.flow_level == 0 { 1292 // Check if we are allowed to start a new entry. 1293 if !parser.simple_key_allowed { 1294 return yaml_parser_set_scanner_error(parser, "", parser.mark, 1295 "block sequence entries are not allowed in this context") 1296 } 1297 // Add the BLOCK-SEQUENCE-START token if needed. 1298 if !yaml_parser_roll_indent(parser, parser.mark.column, -1, yaml_BLOCK_SEQUENCE_START_TOKEN, parser.mark) { 1299 return false 1300 } 1301 } else { 1302 // It is an error for the '-' indicator to occur in the flow context, 1303 // but we let the Parser detect and report about it because the Parser 1304 // is able to point to the context. 1305 } 1306 1307 // Reset any potential simple keys on the current flow level. 1308 if !yaml_parser_remove_simple_key(parser) { 1309 return false 1310 } 1311 1312 // Simple keys are allowed after '-'. 1313 parser.simple_key_allowed = true 1314 1315 // Consume the token. 1316 start_mark := parser.mark 1317 skip(parser) 1318 end_mark := parser.mark 1319 1320 // Create the BLOCK-ENTRY token and append it to the queue. 1321 token := yaml_token_t{ 1322 typ: yaml_BLOCK_ENTRY_TOKEN, 1323 start_mark: start_mark, 1324 end_mark: end_mark, 1325 } 1326 yaml_insert_token(parser, -1, &token) 1327 return true 1328 } 1329 1330 // Produce the KEY token. 1331 func yaml_parser_fetch_key(parser *yaml_parser_t) bool { 1332 1333 // In the block context, additional checks are required. 1334 if parser.flow_level == 0 { 1335 // Check if we are allowed to start a new key (not nessesary simple). 1336 if !parser.simple_key_allowed { 1337 return yaml_parser_set_scanner_error(parser, "", parser.mark, 1338 "mapping keys are not allowed in this context") 1339 } 1340 // Add the BLOCK-MAPPING-START token if needed. 1341 if !yaml_parser_roll_indent(parser, parser.mark.column, -1, yaml_BLOCK_MAPPING_START_TOKEN, parser.mark) { 1342 return false 1343 } 1344 } 1345 1346 // Reset any potential simple keys on the current flow level. 1347 if !yaml_parser_remove_simple_key(parser) { 1348 return false 1349 } 1350 1351 // Simple keys are allowed after '?' in the block context. 1352 parser.simple_key_allowed = parser.flow_level == 0 1353 1354 // Consume the token. 1355 start_mark := parser.mark 1356 skip(parser) 1357 end_mark := parser.mark 1358 1359 // Create the KEY token and append it to the queue. 1360 token := yaml_token_t{ 1361 typ: yaml_KEY_TOKEN, 1362 start_mark: start_mark, 1363 end_mark: end_mark, 1364 } 1365 yaml_insert_token(parser, -1, &token) 1366 return true 1367 } 1368 1369 // Produce the VALUE token. 1370 func yaml_parser_fetch_value(parser *yaml_parser_t) bool { 1371 1372 simple_key := &parser.simple_keys[len(parser.simple_keys)-1] 1373 1374 // Have we found a simple key? 1375 if valid, ok := yaml_simple_key_is_valid(parser, simple_key); !ok { 1376 return false 1377 1378 } else if valid { 1379 1380 // Create the KEY token and insert it into the queue. 1381 token := yaml_token_t{ 1382 typ: yaml_KEY_TOKEN, 1383 start_mark: simple_key.mark, 1384 end_mark: simple_key.mark, 1385 } 1386 yaml_insert_token(parser, simple_key.token_number-parser.tokens_parsed, &token) 1387 1388 // In the block context, we may need to add the BLOCK-MAPPING-START token. 1389 if !yaml_parser_roll_indent(parser, simple_key.mark.column, 1390 simple_key.token_number, 1391 yaml_BLOCK_MAPPING_START_TOKEN, simple_key.mark) { 1392 return false 1393 } 1394 1395 // Remove the simple key. 1396 simple_key.possible = false 1397 delete(parser.simple_keys_by_tok, simple_key.token_number) 1398 1399 // A simple key cannot follow another simple key. 1400 parser.simple_key_allowed = false 1401 1402 } else { 1403 // The ':' indicator follows a complex key. 1404 1405 // In the block context, extra checks are required. 1406 if parser.flow_level == 0 { 1407 1408 // Check if we are allowed to start a complex value. 1409 if !parser.simple_key_allowed { 1410 return yaml_parser_set_scanner_error(parser, "", parser.mark, 1411 "mapping values are not allowed in this context") 1412 } 1413 1414 // Add the BLOCK-MAPPING-START token if needed. 1415 if !yaml_parser_roll_indent(parser, parser.mark.column, -1, yaml_BLOCK_MAPPING_START_TOKEN, parser.mark) { 1416 return false 1417 } 1418 } 1419 1420 // Simple keys after ':' are allowed in the block context. 1421 parser.simple_key_allowed = parser.flow_level == 0 1422 } 1423 1424 // Consume the token. 1425 start_mark := parser.mark 1426 skip(parser) 1427 end_mark := parser.mark 1428 1429 // Create the VALUE token and append it to the queue. 1430 token := yaml_token_t{ 1431 typ: yaml_VALUE_TOKEN, 1432 start_mark: start_mark, 1433 end_mark: end_mark, 1434 } 1435 yaml_insert_token(parser, -1, &token) 1436 return true 1437 } 1438 1439 // Produce the ALIAS or ANCHOR token. 1440 func yaml_parser_fetch_anchor(parser *yaml_parser_t, typ yaml_token_type_t) bool { 1441 // An anchor or an alias could be a simple key. 1442 if !yaml_parser_save_simple_key(parser) { 1443 return false 1444 } 1445 1446 // A simple key cannot follow an anchor or an alias. 1447 parser.simple_key_allowed = false 1448 1449 // Create the ALIAS or ANCHOR token and append it to the queue. 1450 var token yaml_token_t 1451 if !yaml_parser_scan_anchor(parser, &token, typ) { 1452 return false 1453 } 1454 yaml_insert_token(parser, -1, &token) 1455 return true 1456 } 1457 1458 // Produce the TAG token. 1459 func yaml_parser_fetch_tag(parser *yaml_parser_t) bool { 1460 // A tag could be a simple key. 1461 if !yaml_parser_save_simple_key(parser) { 1462 return false 1463 } 1464 1465 // A simple key cannot follow a tag. 1466 parser.simple_key_allowed = false 1467 1468 // Create the TAG token and append it to the queue. 1469 var token yaml_token_t 1470 if !yaml_parser_scan_tag(parser, &token) { 1471 return false 1472 } 1473 yaml_insert_token(parser, -1, &token) 1474 return true 1475 } 1476 1477 // Produce the SCALAR(...,literal) or SCALAR(...,folded) tokens. 1478 func yaml_parser_fetch_block_scalar(parser *yaml_parser_t, literal bool) bool { 1479 // Remove any potential simple keys. 1480 if !yaml_parser_remove_simple_key(parser) { 1481 return false 1482 } 1483 1484 // A simple key may follow a block scalar. 1485 parser.simple_key_allowed = true 1486 1487 // Create the SCALAR token and append it to the queue. 1488 var token yaml_token_t 1489 if !yaml_parser_scan_block_scalar(parser, &token, literal) { 1490 return false 1491 } 1492 yaml_insert_token(parser, -1, &token) 1493 return true 1494 } 1495 1496 // Produce the SCALAR(...,single-quoted) or SCALAR(...,double-quoted) tokens. 1497 func yaml_parser_fetch_flow_scalar(parser *yaml_parser_t, single bool) bool { 1498 // A plain scalar could be a simple key. 1499 if !yaml_parser_save_simple_key(parser) { 1500 return false 1501 } 1502 1503 // A simple key cannot follow a flow scalar. 1504 parser.simple_key_allowed = false 1505 1506 // Create the SCALAR token and append it to the queue. 1507 var token yaml_token_t 1508 if !yaml_parser_scan_flow_scalar(parser, &token, single) { 1509 return false 1510 } 1511 yaml_insert_token(parser, -1, &token) 1512 return true 1513 } 1514 1515 // Produce the SCALAR(...,plain) token. 1516 func yaml_parser_fetch_plain_scalar(parser *yaml_parser_t) bool { 1517 // A plain scalar could be a simple key. 1518 if !yaml_parser_save_simple_key(parser) { 1519 return false 1520 } 1521 1522 // A simple key cannot follow a flow scalar. 1523 parser.simple_key_allowed = false 1524 1525 // Create the SCALAR token and append it to the queue. 1526 var token yaml_token_t 1527 if !yaml_parser_scan_plain_scalar(parser, &token) { 1528 return false 1529 } 1530 yaml_insert_token(parser, -1, &token) 1531 return true 1532 } 1533 1534 // Eat whitespaces and comments until the next token is found. 1535 func yaml_parser_scan_to_next_token(parser *yaml_parser_t) bool { 1536 1537 scan_mark := parser.mark 1538 1539 // Until the next token is not found. 1540 for { 1541 // Allow the BOM mark to start a line. 1542 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1543 return false 1544 } 1545 if parser.mark.column == 0 && is_bom(parser.buffer, parser.buffer_pos) { 1546 skip(parser) 1547 } 1548 1549 // Eat whitespaces. 1550 // Tabs are allowed: 1551 // - in the flow context 1552 // - in the block context, but not at the beginning of the line or 1553 // after '-', '?', or ':' (complex value). 1554 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1555 return false 1556 } 1557 1558 for parser.buffer[parser.buffer_pos] == ' ' || ((parser.flow_level > 0 || !parser.simple_key_allowed) && parser.buffer[parser.buffer_pos] == '\t') { 1559 skip(parser) 1560 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1561 return false 1562 } 1563 } 1564 1565 // Check if we just had a line comment under a sequence entry that 1566 // looks more like a header to the following content. Similar to this: 1567 // 1568 // - # The comment 1569 // - Some data 1570 // 1571 // If so, transform the line comment to a head comment and reposition. 1572 if len(parser.comments) > 0 && len(parser.tokens) > 1 { 1573 tokenA := parser.tokens[len(parser.tokens)-2] 1574 tokenB := parser.tokens[len(parser.tokens)-1] 1575 comment := &parser.comments[len(parser.comments)-1] 1576 if tokenA.typ == yaml_BLOCK_SEQUENCE_START_TOKEN && tokenB.typ == yaml_BLOCK_ENTRY_TOKEN && len(comment.line) > 0 && !is_break(parser.buffer, parser.buffer_pos) { 1577 // If it was in the prior line, reposition so it becomes a 1578 // header of the follow up token. Otherwise, keep it in place 1579 // so it becomes a header of the former. 1580 comment.head = comment.line 1581 comment.line = nil 1582 if comment.start_mark.line == parser.mark.line-1 { 1583 comment.token_mark = parser.mark 1584 } 1585 } 1586 } 1587 1588 // Eat a comment until a line break. 1589 if parser.buffer[parser.buffer_pos] == '#' { 1590 if !yaml_parser_scan_comments(parser, scan_mark) { 1591 return false 1592 } 1593 } 1594 1595 // If it is a line break, eat it. 1596 if is_break(parser.buffer, parser.buffer_pos) { 1597 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 1598 return false 1599 } 1600 skip_line(parser) 1601 1602 // In the block context, a new line may start a simple key. 1603 if parser.flow_level == 0 { 1604 parser.simple_key_allowed = true 1605 } 1606 } else { 1607 break // We have found a token. 1608 } 1609 } 1610 1611 return true 1612 } 1613 1614 // Scan a YAML-DIRECTIVE or TAG-DIRECTIVE token. 1615 // 1616 // Scope: 1617 // %YAML 1.1 # a comment \n 1618 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 1619 // %TAG !yaml! tag:yaml.org,2002: \n 1620 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 1621 // 1622 func yaml_parser_scan_directive(parser *yaml_parser_t, token *yaml_token_t) bool { 1623 // Eat '%'. 1624 start_mark := parser.mark 1625 skip(parser) 1626 1627 // Scan the directive name. 1628 var name []byte 1629 if !yaml_parser_scan_directive_name(parser, start_mark, &name) { 1630 return false 1631 } 1632 1633 // Is it a YAML directive? 1634 if bytes.Equal(name, []byte("YAML")) { 1635 // Scan the VERSION directive value. 1636 var major, minor int8 1637 if !yaml_parser_scan_version_directive_value(parser, start_mark, &major, &minor) { 1638 return false 1639 } 1640 end_mark := parser.mark 1641 1642 // Create a VERSION-DIRECTIVE token. 1643 *token = yaml_token_t{ 1644 typ: yaml_VERSION_DIRECTIVE_TOKEN, 1645 start_mark: start_mark, 1646 end_mark: end_mark, 1647 major: major, 1648 minor: minor, 1649 } 1650 1651 // Is it a TAG directive? 1652 } else if bytes.Equal(name, []byte("TAG")) { 1653 // Scan the TAG directive value. 1654 var handle, prefix []byte 1655 if !yaml_parser_scan_tag_directive_value(parser, start_mark, &handle, &prefix) { 1656 return false 1657 } 1658 end_mark := parser.mark 1659 1660 // Create a TAG-DIRECTIVE token. 1661 *token = yaml_token_t{ 1662 typ: yaml_TAG_DIRECTIVE_TOKEN, 1663 start_mark: start_mark, 1664 end_mark: end_mark, 1665 value: handle, 1666 prefix: prefix, 1667 } 1668 1669 // Unknown directive. 1670 } else { 1671 yaml_parser_set_scanner_error(parser, "while scanning a directive", 1672 start_mark, "found unknown directive name") 1673 return false 1674 } 1675 1676 // Eat the rest of the line including any comments. 1677 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1678 return false 1679 } 1680 1681 for is_blank(parser.buffer, parser.buffer_pos) { 1682 skip(parser) 1683 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1684 return false 1685 } 1686 } 1687 1688 if parser.buffer[parser.buffer_pos] == '#' { 1689 // [Go] Discard this inline comment for the time being. 1690 //if !yaml_parser_scan_line_comment(parser, start_mark) { 1691 // return false 1692 //} 1693 for !is_breakz(parser.buffer, parser.buffer_pos) { 1694 skip(parser) 1695 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1696 return false 1697 } 1698 } 1699 } 1700 1701 // Check if we are at the end of the line. 1702 if !is_breakz(parser.buffer, parser.buffer_pos) { 1703 yaml_parser_set_scanner_error(parser, "while scanning a directive", 1704 start_mark, "did not find expected comment or line break") 1705 return false 1706 } 1707 1708 // Eat a line break. 1709 if is_break(parser.buffer, parser.buffer_pos) { 1710 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 1711 return false 1712 } 1713 skip_line(parser) 1714 } 1715 1716 return true 1717 } 1718 1719 // Scan the directive name. 1720 // 1721 // Scope: 1722 // %YAML 1.1 # a comment \n 1723 // ^^^^ 1724 // %TAG !yaml! tag:yaml.org,2002: \n 1725 // ^^^ 1726 // 1727 func yaml_parser_scan_directive_name(parser *yaml_parser_t, start_mark yaml_mark_t, name *[]byte) bool { 1728 // Consume the directive name. 1729 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1730 return false 1731 } 1732 1733 var s []byte 1734 for is_alpha(parser.buffer, parser.buffer_pos) { 1735 s = read(parser, s) 1736 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1737 return false 1738 } 1739 } 1740 1741 // Check if the name is empty. 1742 if len(s) == 0 { 1743 yaml_parser_set_scanner_error(parser, "while scanning a directive", 1744 start_mark, "could not find expected directive name") 1745 return false 1746 } 1747 1748 // Check for an blank character after the name. 1749 if !is_blankz(parser.buffer, parser.buffer_pos) { 1750 yaml_parser_set_scanner_error(parser, "while scanning a directive", 1751 start_mark, "found unexpected non-alphabetical character") 1752 return false 1753 } 1754 *name = s 1755 return true 1756 } 1757 1758 // Scan the value of VERSION-DIRECTIVE. 1759 // 1760 // Scope: 1761 // %YAML 1.1 # a comment \n 1762 // ^^^^^^ 1763 func yaml_parser_scan_version_directive_value(parser *yaml_parser_t, start_mark yaml_mark_t, major, minor *int8) bool { 1764 // Eat whitespaces. 1765 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1766 return false 1767 } 1768 for is_blank(parser.buffer, parser.buffer_pos) { 1769 skip(parser) 1770 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1771 return false 1772 } 1773 } 1774 1775 // Consume the major version number. 1776 if !yaml_parser_scan_version_directive_number(parser, start_mark, major) { 1777 return false 1778 } 1779 1780 // Eat '.'. 1781 if parser.buffer[parser.buffer_pos] != '.' { 1782 return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive", 1783 start_mark, "did not find expected digit or '.' character") 1784 } 1785 1786 skip(parser) 1787 1788 // Consume the minor version number. 1789 if !yaml_parser_scan_version_directive_number(parser, start_mark, minor) { 1790 return false 1791 } 1792 return true 1793 } 1794 1795 const max_number_length = 2 1796 1797 // Scan the version number of VERSION-DIRECTIVE. 1798 // 1799 // Scope: 1800 // %YAML 1.1 # a comment \n 1801 // ^ 1802 // %YAML 1.1 # a comment \n 1803 // ^ 1804 func yaml_parser_scan_version_directive_number(parser *yaml_parser_t, start_mark yaml_mark_t, number *int8) bool { 1805 1806 // Repeat while the next character is digit. 1807 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1808 return false 1809 } 1810 var value, length int8 1811 for is_digit(parser.buffer, parser.buffer_pos) { 1812 // Check if the number is too long. 1813 length++ 1814 if length > max_number_length { 1815 return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive", 1816 start_mark, "found extremely long version number") 1817 } 1818 value = value*10 + int8(as_digit(parser.buffer, parser.buffer_pos)) 1819 skip(parser) 1820 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1821 return false 1822 } 1823 } 1824 1825 // Check if the number was present. 1826 if length == 0 { 1827 return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive", 1828 start_mark, "did not find expected version number") 1829 } 1830 *number = value 1831 return true 1832 } 1833 1834 // Scan the value of a TAG-DIRECTIVE token. 1835 // 1836 // Scope: 1837 // %TAG !yaml! tag:yaml.org,2002: \n 1838 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 1839 // 1840 func yaml_parser_scan_tag_directive_value(parser *yaml_parser_t, start_mark yaml_mark_t, handle, prefix *[]byte) bool { 1841 var handle_value, prefix_value []byte 1842 1843 // Eat whitespaces. 1844 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1845 return false 1846 } 1847 1848 for is_blank(parser.buffer, parser.buffer_pos) { 1849 skip(parser) 1850 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1851 return false 1852 } 1853 } 1854 1855 // Scan a handle. 1856 if !yaml_parser_scan_tag_handle(parser, true, start_mark, &handle_value) { 1857 return false 1858 } 1859 1860 // Expect a whitespace. 1861 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1862 return false 1863 } 1864 if !is_blank(parser.buffer, parser.buffer_pos) { 1865 yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive", 1866 start_mark, "did not find expected whitespace") 1867 return false 1868 } 1869 1870 // Eat whitespaces. 1871 for is_blank(parser.buffer, parser.buffer_pos) { 1872 skip(parser) 1873 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1874 return false 1875 } 1876 } 1877 1878 // Scan a prefix. 1879 if !yaml_parser_scan_tag_uri(parser, true, nil, start_mark, &prefix_value) { 1880 return false 1881 } 1882 1883 // Expect a whitespace or line break. 1884 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1885 return false 1886 } 1887 if !is_blankz(parser.buffer, parser.buffer_pos) { 1888 yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive", 1889 start_mark, "did not find expected whitespace or line break") 1890 return false 1891 } 1892 1893 *handle = handle_value 1894 *prefix = prefix_value 1895 return true 1896 } 1897 1898 func yaml_parser_scan_anchor(parser *yaml_parser_t, token *yaml_token_t, typ yaml_token_type_t) bool { 1899 var s []byte 1900 1901 // Eat the indicator character. 1902 start_mark := parser.mark 1903 skip(parser) 1904 1905 // Consume the value. 1906 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1907 return false 1908 } 1909 1910 for is_alpha(parser.buffer, parser.buffer_pos) { 1911 s = read(parser, s) 1912 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1913 return false 1914 } 1915 } 1916 1917 end_mark := parser.mark 1918 1919 /* 1920 * Check if length of the anchor is greater than 0 and it is followed by 1921 * a whitespace character or one of the indicators: 1922 * 1923 * '?', ':', ',', ']', '}', '%', '@', '`'. 1924 */ 1925 1926 if len(s) == 0 || 1927 !(is_blankz(parser.buffer, parser.buffer_pos) || parser.buffer[parser.buffer_pos] == '?' || 1928 parser.buffer[parser.buffer_pos] == ':' || parser.buffer[parser.buffer_pos] == ',' || 1929 parser.buffer[parser.buffer_pos] == ']' || parser.buffer[parser.buffer_pos] == '}' || 1930 parser.buffer[parser.buffer_pos] == '%' || parser.buffer[parser.buffer_pos] == '@' || 1931 parser.buffer[parser.buffer_pos] == '`') { 1932 context := "while scanning an alias" 1933 if typ == yaml_ANCHOR_TOKEN { 1934 context = "while scanning an anchor" 1935 } 1936 yaml_parser_set_scanner_error(parser, context, start_mark, 1937 "did not find expected alphabetic or numeric character") 1938 return false 1939 } 1940 1941 // Create a token. 1942 *token = yaml_token_t{ 1943 typ: typ, 1944 start_mark: start_mark, 1945 end_mark: end_mark, 1946 value: s, 1947 } 1948 1949 return true 1950 } 1951 1952 /* 1953 * Scan a TAG token. 1954 */ 1955 1956 func yaml_parser_scan_tag(parser *yaml_parser_t, token *yaml_token_t) bool { 1957 var handle, suffix []byte 1958 1959 start_mark := parser.mark 1960 1961 // Check if the tag is in the canonical form. 1962 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 1963 return false 1964 } 1965 1966 if parser.buffer[parser.buffer_pos+1] == '<' { 1967 // Keep the handle as '' 1968 1969 // Eat '!<' 1970 skip(parser) 1971 skip(parser) 1972 1973 // Consume the tag value. 1974 if !yaml_parser_scan_tag_uri(parser, false, nil, start_mark, &suffix) { 1975 return false 1976 } 1977 1978 // Check for '>' and eat it. 1979 if parser.buffer[parser.buffer_pos] != '>' { 1980 yaml_parser_set_scanner_error(parser, "while scanning a tag", 1981 start_mark, "did not find the expected '>'") 1982 return false 1983 } 1984 1985 skip(parser) 1986 } else { 1987 // The tag has either the '!suffix' or the '!handle!suffix' form. 1988 1989 // First, try to scan a handle. 1990 if !yaml_parser_scan_tag_handle(parser, false, start_mark, &handle) { 1991 return false 1992 } 1993 1994 // Check if it is, indeed, handle. 1995 if handle[0] == '!' && len(handle) > 1 && handle[len(handle)-1] == '!' { 1996 // Scan the suffix now. 1997 if !yaml_parser_scan_tag_uri(parser, false, nil, start_mark, &suffix) { 1998 return false 1999 } 2000 } else { 2001 // It wasn't a handle after all. Scan the rest of the tag. 2002 if !yaml_parser_scan_tag_uri(parser, false, handle, start_mark, &suffix) { 2003 return false 2004 } 2005 2006 // Set the handle to '!'. 2007 handle = []byte{'!'} 2008 2009 // A special case: the '!' tag. Set the handle to '' and the 2010 // suffix to '!'. 2011 if len(suffix) == 0 { 2012 handle, suffix = suffix, handle 2013 } 2014 } 2015 } 2016 2017 // Check the character which ends the tag. 2018 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2019 return false 2020 } 2021 if !is_blankz(parser.buffer, parser.buffer_pos) { 2022 yaml_parser_set_scanner_error(parser, "while scanning a tag", 2023 start_mark, "did not find expected whitespace or line break") 2024 return false 2025 } 2026 2027 end_mark := parser.mark 2028 2029 // Create a token. 2030 *token = yaml_token_t{ 2031 typ: yaml_TAG_TOKEN, 2032 start_mark: start_mark, 2033 end_mark: end_mark, 2034 value: handle, 2035 suffix: suffix, 2036 } 2037 return true 2038 } 2039 2040 // Scan a tag handle. 2041 func yaml_parser_scan_tag_handle(parser *yaml_parser_t, directive bool, start_mark yaml_mark_t, handle *[]byte) bool { 2042 // Check the initial '!' character. 2043 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2044 return false 2045 } 2046 if parser.buffer[parser.buffer_pos] != '!' { 2047 yaml_parser_set_scanner_tag_error(parser, directive, 2048 start_mark, "did not find expected '!'") 2049 return false 2050 } 2051 2052 var s []byte 2053 2054 // Copy the '!' character. 2055 s = read(parser, s) 2056 2057 // Copy all subsequent alphabetical and numerical characters. 2058 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2059 return false 2060 } 2061 for is_alpha(parser.buffer, parser.buffer_pos) { 2062 s = read(parser, s) 2063 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2064 return false 2065 } 2066 } 2067 2068 // Check if the trailing character is '!' and copy it. 2069 if parser.buffer[parser.buffer_pos] == '!' { 2070 s = read(parser, s) 2071 } else { 2072 // It's either the '!' tag or not really a tag handle. If it's a %TAG 2073 // directive, it's an error. If it's a tag token, it must be a part of URI. 2074 if directive && string(s) != "!" { 2075 yaml_parser_set_scanner_tag_error(parser, directive, 2076 start_mark, "did not find expected '!'") 2077 return false 2078 } 2079 } 2080 2081 *handle = s 2082 return true 2083 } 2084 2085 // Scan a tag. 2086 func yaml_parser_scan_tag_uri(parser *yaml_parser_t, directive bool, head []byte, start_mark yaml_mark_t, uri *[]byte) bool { 2087 //size_t length = head ? strlen((char *)head) : 0 2088 var s []byte 2089 hasTag := len(head) > 0 2090 2091 // Copy the head if needed. 2092 // 2093 // Note that we don't copy the leading '!' character. 2094 if len(head) > 1 { 2095 s = append(s, head[1:]...) 2096 } 2097 2098 // Scan the tag. 2099 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2100 return false 2101 } 2102 2103 // The set of characters that may appear in URI is as follows: 2104 // 2105 // '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&', 2106 // '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']', 2107 // '%'. 2108 // [Go] TODO Convert this into more reasonable logic. 2109 for is_alpha(parser.buffer, parser.buffer_pos) || parser.buffer[parser.buffer_pos] == ';' || 2110 parser.buffer[parser.buffer_pos] == '/' || parser.buffer[parser.buffer_pos] == '?' || 2111 parser.buffer[parser.buffer_pos] == ':' || parser.buffer[parser.buffer_pos] == '@' || 2112 parser.buffer[parser.buffer_pos] == '&' || parser.buffer[parser.buffer_pos] == '=' || 2113 parser.buffer[parser.buffer_pos] == '+' || parser.buffer[parser.buffer_pos] == '$' || 2114 parser.buffer[parser.buffer_pos] == ',' || parser.buffer[parser.buffer_pos] == '.' || 2115 parser.buffer[parser.buffer_pos] == '!' || parser.buffer[parser.buffer_pos] == '~' || 2116 parser.buffer[parser.buffer_pos] == '*' || parser.buffer[parser.buffer_pos] == '\'' || 2117 parser.buffer[parser.buffer_pos] == '(' || parser.buffer[parser.buffer_pos] == ')' || 2118 parser.buffer[parser.buffer_pos] == '[' || parser.buffer[parser.buffer_pos] == ']' || 2119 parser.buffer[parser.buffer_pos] == '%' { 2120 // Check if it is a URI-escape sequence. 2121 if parser.buffer[parser.buffer_pos] == '%' { 2122 if !yaml_parser_scan_uri_escapes(parser, directive, start_mark, &s) { 2123 return false 2124 } 2125 } else { 2126 s = read(parser, s) 2127 } 2128 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2129 return false 2130 } 2131 hasTag = true 2132 } 2133 2134 if !hasTag { 2135 yaml_parser_set_scanner_tag_error(parser, directive, 2136 start_mark, "did not find expected tag URI") 2137 return false 2138 } 2139 *uri = s 2140 return true 2141 } 2142 2143 // Decode an URI-escape sequence corresponding to a single UTF-8 character. 2144 func yaml_parser_scan_uri_escapes(parser *yaml_parser_t, directive bool, start_mark yaml_mark_t, s *[]byte) bool { 2145 2146 // Decode the required number of characters. 2147 w := 1024 2148 for w > 0 { 2149 // Check for a URI-escaped octet. 2150 if parser.unread < 3 && !yaml_parser_update_buffer(parser, 3) { 2151 return false 2152 } 2153 2154 if !(parser.buffer[parser.buffer_pos] == '%' && 2155 is_hex(parser.buffer, parser.buffer_pos+1) && 2156 is_hex(parser.buffer, parser.buffer_pos+2)) { 2157 return yaml_parser_set_scanner_tag_error(parser, directive, 2158 start_mark, "did not find URI escaped octet") 2159 } 2160 2161 // Get the octet. 2162 octet := byte((as_hex(parser.buffer, parser.buffer_pos+1) << 4) + as_hex(parser.buffer, parser.buffer_pos+2)) 2163 2164 // If it is the leading octet, determine the length of the UTF-8 sequence. 2165 if w == 1024 { 2166 w = width(octet) 2167 if w == 0 { 2168 return yaml_parser_set_scanner_tag_error(parser, directive, 2169 start_mark, "found an incorrect leading UTF-8 octet") 2170 } 2171 } else { 2172 // Check if the trailing octet is correct. 2173 if octet&0xC0 != 0x80 { 2174 return yaml_parser_set_scanner_tag_error(parser, directive, 2175 start_mark, "found an incorrect trailing UTF-8 octet") 2176 } 2177 } 2178 2179 // Copy the octet and move the pointers. 2180 *s = append(*s, octet) 2181 skip(parser) 2182 skip(parser) 2183 skip(parser) 2184 w-- 2185 } 2186 return true 2187 } 2188 2189 // Scan a block scalar. 2190 func yaml_parser_scan_block_scalar(parser *yaml_parser_t, token *yaml_token_t, literal bool) bool { 2191 // Eat the indicator '|' or '>'. 2192 start_mark := parser.mark 2193 skip(parser) 2194 2195 // Scan the additional block scalar indicators. 2196 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2197 return false 2198 } 2199 2200 // Check for a chomping indicator. 2201 var chomping, increment int 2202 if parser.buffer[parser.buffer_pos] == '+' || parser.buffer[parser.buffer_pos] == '-' { 2203 // Set the chomping method and eat the indicator. 2204 if parser.buffer[parser.buffer_pos] == '+' { 2205 chomping = +1 2206 } else { 2207 chomping = -1 2208 } 2209 skip(parser) 2210 2211 // Check for an indentation indicator. 2212 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2213 return false 2214 } 2215 if is_digit(parser.buffer, parser.buffer_pos) { 2216 // Check that the indentation is greater than 0. 2217 if parser.buffer[parser.buffer_pos] == '0' { 2218 yaml_parser_set_scanner_error(parser, "while scanning a block scalar", 2219 start_mark, "found an indentation indicator equal to 0") 2220 return false 2221 } 2222 2223 // Get the indentation level and eat the indicator. 2224 increment = as_digit(parser.buffer, parser.buffer_pos) 2225 skip(parser) 2226 } 2227 2228 } else if is_digit(parser.buffer, parser.buffer_pos) { 2229 // Do the same as above, but in the opposite order. 2230 2231 if parser.buffer[parser.buffer_pos] == '0' { 2232 yaml_parser_set_scanner_error(parser, "while scanning a block scalar", 2233 start_mark, "found an indentation indicator equal to 0") 2234 return false 2235 } 2236 increment = as_digit(parser.buffer, parser.buffer_pos) 2237 skip(parser) 2238 2239 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2240 return false 2241 } 2242 if parser.buffer[parser.buffer_pos] == '+' || parser.buffer[parser.buffer_pos] == '-' { 2243 if parser.buffer[parser.buffer_pos] == '+' { 2244 chomping = +1 2245 } else { 2246 chomping = -1 2247 } 2248 skip(parser) 2249 } 2250 } 2251 2252 // Eat whitespaces and comments to the end of the line. 2253 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2254 return false 2255 } 2256 for is_blank(parser.buffer, parser.buffer_pos) { 2257 skip(parser) 2258 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2259 return false 2260 } 2261 } 2262 if parser.buffer[parser.buffer_pos] == '#' { 2263 if !yaml_parser_scan_line_comment(parser, start_mark) { 2264 return false 2265 } 2266 for !is_breakz(parser.buffer, parser.buffer_pos) { 2267 skip(parser) 2268 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2269 return false 2270 } 2271 } 2272 } 2273 2274 // Check if we are at the end of the line. 2275 if !is_breakz(parser.buffer, parser.buffer_pos) { 2276 yaml_parser_set_scanner_error(parser, "while scanning a block scalar", 2277 start_mark, "did not find expected comment or line break") 2278 return false 2279 } 2280 2281 // Eat a line break. 2282 if is_break(parser.buffer, parser.buffer_pos) { 2283 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 2284 return false 2285 } 2286 skip_line(parser) 2287 } 2288 2289 end_mark := parser.mark 2290 2291 // Set the indentation level if it was specified. 2292 var indent int 2293 if increment > 0 { 2294 if parser.indent >= 0 { 2295 indent = parser.indent + increment 2296 } else { 2297 indent = increment 2298 } 2299 } 2300 2301 // Scan the leading line breaks and determine the indentation level if needed. 2302 var s, leading_break, trailing_breaks []byte 2303 if !yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks, start_mark, &end_mark) { 2304 return false 2305 } 2306 2307 // Scan the block scalar content. 2308 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2309 return false 2310 } 2311 var leading_blank, trailing_blank bool 2312 for parser.mark.column == indent && !is_z(parser.buffer, parser.buffer_pos) { 2313 // We are at the beginning of a non-empty line. 2314 2315 // Is it a trailing whitespace? 2316 trailing_blank = is_blank(parser.buffer, parser.buffer_pos) 2317 2318 // Check if we need to fold the leading line break. 2319 if !literal && !leading_blank && !trailing_blank && len(leading_break) > 0 && leading_break[0] == '\n' { 2320 // Do we need to join the lines by space? 2321 if len(trailing_breaks) == 0 { 2322 s = append(s, ' ') 2323 } 2324 } else { 2325 s = append(s, leading_break...) 2326 } 2327 leading_break = leading_break[:0] 2328 2329 // Append the remaining line breaks. 2330 s = append(s, trailing_breaks...) 2331 trailing_breaks = trailing_breaks[:0] 2332 2333 // Is it a leading whitespace? 2334 leading_blank = is_blank(parser.buffer, parser.buffer_pos) 2335 2336 // Consume the current line. 2337 for !is_breakz(parser.buffer, parser.buffer_pos) { 2338 s = read(parser, s) 2339 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2340 return false 2341 } 2342 } 2343 2344 // Consume the line break. 2345 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 2346 return false 2347 } 2348 2349 leading_break = read_line(parser, leading_break) 2350 2351 // Eat the following indentation spaces and line breaks. 2352 if !yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks, start_mark, &end_mark) { 2353 return false 2354 } 2355 } 2356 2357 // Chomp the tail. 2358 if chomping != -1 { 2359 s = append(s, leading_break...) 2360 } 2361 if chomping == 1 { 2362 s = append(s, trailing_breaks...) 2363 } 2364 2365 // Create a token. 2366 *token = yaml_token_t{ 2367 typ: yaml_SCALAR_TOKEN, 2368 start_mark: start_mark, 2369 end_mark: end_mark, 2370 value: s, 2371 style: yaml_LITERAL_SCALAR_STYLE, 2372 } 2373 if !literal { 2374 token.style = yaml_FOLDED_SCALAR_STYLE 2375 } 2376 return true 2377 } 2378 2379 // Scan indentation spaces and line breaks for a block scalar. Determine the 2380 // indentation level if needed. 2381 func yaml_parser_scan_block_scalar_breaks(parser *yaml_parser_t, indent *int, breaks *[]byte, start_mark yaml_mark_t, end_mark *yaml_mark_t) bool { 2382 *end_mark = parser.mark 2383 2384 // Eat the indentation spaces and line breaks. 2385 max_indent := 0 2386 for { 2387 // Eat the indentation spaces. 2388 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2389 return false 2390 } 2391 for (*indent == 0 || parser.mark.column < *indent) && is_space(parser.buffer, parser.buffer_pos) { 2392 skip(parser) 2393 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2394 return false 2395 } 2396 } 2397 if parser.mark.column > max_indent { 2398 max_indent = parser.mark.column 2399 } 2400 2401 // Check for a tab character messing the indentation. 2402 if (*indent == 0 || parser.mark.column < *indent) && is_tab(parser.buffer, parser.buffer_pos) { 2403 return yaml_parser_set_scanner_error(parser, "while scanning a block scalar", 2404 start_mark, "found a tab character where an indentation space is expected") 2405 } 2406 2407 // Have we found a non-empty line? 2408 if !is_break(parser.buffer, parser.buffer_pos) { 2409 break 2410 } 2411 2412 // Consume the line break. 2413 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 2414 return false 2415 } 2416 // [Go] Should really be returning breaks instead. 2417 *breaks = read_line(parser, *breaks) 2418 *end_mark = parser.mark 2419 } 2420 2421 // Determine the indentation level if needed. 2422 if *indent == 0 { 2423 *indent = max_indent 2424 if *indent < parser.indent+1 { 2425 *indent = parser.indent + 1 2426 } 2427 if *indent < 1 { 2428 *indent = 1 2429 } 2430 } 2431 return true 2432 } 2433 2434 // Scan a quoted scalar. 2435 func yaml_parser_scan_flow_scalar(parser *yaml_parser_t, token *yaml_token_t, single bool) bool { 2436 // Eat the left quote. 2437 start_mark := parser.mark 2438 skip(parser) 2439 2440 // Consume the content of the quoted scalar. 2441 var s, leading_break, trailing_breaks, whitespaces []byte 2442 for { 2443 // Check that there are no document indicators at the beginning of the line. 2444 if parser.unread < 4 && !yaml_parser_update_buffer(parser, 4) { 2445 return false 2446 } 2447 2448 if parser.mark.column == 0 && 2449 ((parser.buffer[parser.buffer_pos+0] == '-' && 2450 parser.buffer[parser.buffer_pos+1] == '-' && 2451 parser.buffer[parser.buffer_pos+2] == '-') || 2452 (parser.buffer[parser.buffer_pos+0] == '.' && 2453 parser.buffer[parser.buffer_pos+1] == '.' && 2454 parser.buffer[parser.buffer_pos+2] == '.')) && 2455 is_blankz(parser.buffer, parser.buffer_pos+3) { 2456 yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar", 2457 start_mark, "found unexpected document indicator") 2458 return false 2459 } 2460 2461 // Check for EOF. 2462 if is_z(parser.buffer, parser.buffer_pos) { 2463 yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar", 2464 start_mark, "found unexpected end of stream") 2465 return false 2466 } 2467 2468 // Consume non-blank characters. 2469 leading_blanks := false 2470 for !is_blankz(parser.buffer, parser.buffer_pos) { 2471 if single && parser.buffer[parser.buffer_pos] == '\'' && parser.buffer[parser.buffer_pos+1] == '\'' { 2472 // Is is an escaped single quote. 2473 s = append(s, '\'') 2474 skip(parser) 2475 skip(parser) 2476 2477 } else if single && parser.buffer[parser.buffer_pos] == '\'' { 2478 // It is a right single quote. 2479 break 2480 } else if !single && parser.buffer[parser.buffer_pos] == '"' { 2481 // It is a right double quote. 2482 break 2483 2484 } else if !single && parser.buffer[parser.buffer_pos] == '\\' && is_break(parser.buffer, parser.buffer_pos+1) { 2485 // It is an escaped line break. 2486 if parser.unread < 3 && !yaml_parser_update_buffer(parser, 3) { 2487 return false 2488 } 2489 skip(parser) 2490 skip_line(parser) 2491 leading_blanks = true 2492 break 2493 2494 } else if !single && parser.buffer[parser.buffer_pos] == '\\' { 2495 // It is an escape sequence. 2496 code_length := 0 2497 2498 // Check the escape character. 2499 switch parser.buffer[parser.buffer_pos+1] { 2500 case '0': 2501 s = append(s, 0) 2502 case 'a': 2503 s = append(s, '\x07') 2504 case 'b': 2505 s = append(s, '\x08') 2506 case 't', '\t': 2507 s = append(s, '\x09') 2508 case 'n': 2509 s = append(s, '\x0A') 2510 case 'v': 2511 s = append(s, '\x0B') 2512 case 'f': 2513 s = append(s, '\x0C') 2514 case 'r': 2515 s = append(s, '\x0D') 2516 case 'e': 2517 s = append(s, '\x1B') 2518 case ' ': 2519 s = append(s, '\x20') 2520 case '"': 2521 s = append(s, '"') 2522 case '\'': 2523 s = append(s, '\'') 2524 case '\\': 2525 s = append(s, '\\') 2526 case 'N': // NEL (#x85) 2527 s = append(s, '\xC2') 2528 s = append(s, '\x85') 2529 case '_': // #xA0 2530 s = append(s, '\xC2') 2531 s = append(s, '\xA0') 2532 case 'L': // LS (#x2028) 2533 s = append(s, '\xE2') 2534 s = append(s, '\x80') 2535 s = append(s, '\xA8') 2536 case 'P': // PS (#x2029) 2537 s = append(s, '\xE2') 2538 s = append(s, '\x80') 2539 s = append(s, '\xA9') 2540 case 'x': 2541 code_length = 2 2542 case 'u': 2543 code_length = 4 2544 case 'U': 2545 code_length = 8 2546 default: 2547 yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar", 2548 start_mark, "found unknown escape character") 2549 return false 2550 } 2551 2552 skip(parser) 2553 skip(parser) 2554 2555 // Consume an arbitrary escape code. 2556 if code_length > 0 { 2557 var value int 2558 2559 // Scan the character value. 2560 if parser.unread < code_length && !yaml_parser_update_buffer(parser, code_length) { 2561 return false 2562 } 2563 for k := 0; k < code_length; k++ { 2564 if !is_hex(parser.buffer, parser.buffer_pos+k) { 2565 yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar", 2566 start_mark, "did not find expected hexdecimal number") 2567 return false 2568 } 2569 value = (value << 4) + as_hex(parser.buffer, parser.buffer_pos+k) 2570 } 2571 2572 // Check the value and write the character. 2573 if (value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF { 2574 yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar", 2575 start_mark, "found invalid Unicode character escape code") 2576 return false 2577 } 2578 if value <= 0x7F { 2579 s = append(s, byte(value)) 2580 } else if value <= 0x7FF { 2581 s = append(s, byte(0xC0+(value>>6))) 2582 s = append(s, byte(0x80+(value&0x3F))) 2583 } else if value <= 0xFFFF { 2584 s = append(s, byte(0xE0+(value>>12))) 2585 s = append(s, byte(0x80+((value>>6)&0x3F))) 2586 s = append(s, byte(0x80+(value&0x3F))) 2587 } else { 2588 s = append(s, byte(0xF0+(value>>18))) 2589 s = append(s, byte(0x80+((value>>12)&0x3F))) 2590 s = append(s, byte(0x80+((value>>6)&0x3F))) 2591 s = append(s, byte(0x80+(value&0x3F))) 2592 } 2593 2594 // Advance the pointer. 2595 for k := 0; k < code_length; k++ { 2596 skip(parser) 2597 } 2598 } 2599 } else { 2600 // It is a non-escaped non-blank character. 2601 s = read(parser, s) 2602 } 2603 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 2604 return false 2605 } 2606 } 2607 2608 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2609 return false 2610 } 2611 2612 // Check if we are at the end of the scalar. 2613 if single { 2614 if parser.buffer[parser.buffer_pos] == '\'' { 2615 break 2616 } 2617 } else { 2618 if parser.buffer[parser.buffer_pos] == '"' { 2619 break 2620 } 2621 } 2622 2623 // Consume blank characters. 2624 for is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos) { 2625 if is_blank(parser.buffer, parser.buffer_pos) { 2626 // Consume a space or a tab character. 2627 if !leading_blanks { 2628 whitespaces = read(parser, whitespaces) 2629 } else { 2630 skip(parser) 2631 } 2632 } else { 2633 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 2634 return false 2635 } 2636 2637 // Check if it is a first line break. 2638 if !leading_blanks { 2639 whitespaces = whitespaces[:0] 2640 leading_break = read_line(parser, leading_break) 2641 leading_blanks = true 2642 } else { 2643 trailing_breaks = read_line(parser, trailing_breaks) 2644 } 2645 } 2646 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2647 return false 2648 } 2649 } 2650 2651 // Join the whitespaces or fold line breaks. 2652 if leading_blanks { 2653 // Do we need to fold line breaks? 2654 if len(leading_break) > 0 && leading_break[0] == '\n' { 2655 if len(trailing_breaks) == 0 { 2656 s = append(s, ' ') 2657 } else { 2658 s = append(s, trailing_breaks...) 2659 } 2660 } else { 2661 s = append(s, leading_break...) 2662 s = append(s, trailing_breaks...) 2663 } 2664 trailing_breaks = trailing_breaks[:0] 2665 leading_break = leading_break[:0] 2666 } else { 2667 s = append(s, whitespaces...) 2668 whitespaces = whitespaces[:0] 2669 } 2670 } 2671 2672 // Eat the right quote. 2673 skip(parser) 2674 end_mark := parser.mark 2675 2676 // Create a token. 2677 *token = yaml_token_t{ 2678 typ: yaml_SCALAR_TOKEN, 2679 start_mark: start_mark, 2680 end_mark: end_mark, 2681 value: s, 2682 style: yaml_SINGLE_QUOTED_SCALAR_STYLE, 2683 } 2684 if !single { 2685 token.style = yaml_DOUBLE_QUOTED_SCALAR_STYLE 2686 } 2687 return true 2688 } 2689 2690 // Scan a plain scalar. 2691 func yaml_parser_scan_plain_scalar(parser *yaml_parser_t, token *yaml_token_t) bool { 2692 2693 var s, leading_break, trailing_breaks, whitespaces []byte 2694 var leading_blanks bool 2695 var indent = parser.indent + 1 2696 2697 start_mark := parser.mark 2698 end_mark := parser.mark 2699 2700 // Consume the content of the plain scalar. 2701 for { 2702 // Check for a document indicator. 2703 if parser.unread < 4 && !yaml_parser_update_buffer(parser, 4) { 2704 return false 2705 } 2706 if parser.mark.column == 0 && 2707 ((parser.buffer[parser.buffer_pos+0] == '-' && 2708 parser.buffer[parser.buffer_pos+1] == '-' && 2709 parser.buffer[parser.buffer_pos+2] == '-') || 2710 (parser.buffer[parser.buffer_pos+0] == '.' && 2711 parser.buffer[parser.buffer_pos+1] == '.' && 2712 parser.buffer[parser.buffer_pos+2] == '.')) && 2713 is_blankz(parser.buffer, parser.buffer_pos+3) { 2714 break 2715 } 2716 2717 // Check for a comment. 2718 if parser.buffer[parser.buffer_pos] == '#' { 2719 break 2720 } 2721 2722 // Consume non-blank characters. 2723 for !is_blankz(parser.buffer, parser.buffer_pos) { 2724 2725 // Check for indicators that may end a plain scalar. 2726 if (parser.buffer[parser.buffer_pos] == ':' && is_blankz(parser.buffer, parser.buffer_pos+1)) || 2727 (parser.flow_level > 0 && 2728 (parser.buffer[parser.buffer_pos] == ',' || 2729 parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == '[' || 2730 parser.buffer[parser.buffer_pos] == ']' || parser.buffer[parser.buffer_pos] == '{' || 2731 parser.buffer[parser.buffer_pos] == '}')) { 2732 break 2733 } 2734 2735 // Check if we need to join whitespaces and breaks. 2736 if leading_blanks || len(whitespaces) > 0 { 2737 if leading_blanks { 2738 // Do we need to fold line breaks? 2739 if leading_break[0] == '\n' { 2740 if len(trailing_breaks) == 0 { 2741 s = append(s, ' ') 2742 } else { 2743 s = append(s, trailing_breaks...) 2744 } 2745 } else { 2746 s = append(s, leading_break...) 2747 s = append(s, trailing_breaks...) 2748 } 2749 trailing_breaks = trailing_breaks[:0] 2750 leading_break = leading_break[:0] 2751 leading_blanks = false 2752 } else { 2753 s = append(s, whitespaces...) 2754 whitespaces = whitespaces[:0] 2755 } 2756 } 2757 2758 // Copy the character. 2759 s = read(parser, s) 2760 2761 end_mark = parser.mark 2762 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 2763 return false 2764 } 2765 } 2766 2767 // Is it the end? 2768 if !(is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos)) { 2769 break 2770 } 2771 2772 // Consume blank characters. 2773 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2774 return false 2775 } 2776 2777 for is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos) { 2778 if is_blank(parser.buffer, parser.buffer_pos) { 2779 2780 // Check for tab characters that abuse indentation. 2781 if leading_blanks && parser.mark.column < indent && is_tab(parser.buffer, parser.buffer_pos) { 2782 yaml_parser_set_scanner_error(parser, "while scanning a plain scalar", 2783 start_mark, "found a tab character that violates indentation") 2784 return false 2785 } 2786 2787 // Consume a space or a tab character. 2788 if !leading_blanks { 2789 whitespaces = read(parser, whitespaces) 2790 } else { 2791 skip(parser) 2792 } 2793 } else { 2794 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 2795 return false 2796 } 2797 2798 // Check if it is a first line break. 2799 if !leading_blanks { 2800 whitespaces = whitespaces[:0] 2801 leading_break = read_line(parser, leading_break) 2802 leading_blanks = true 2803 } else { 2804 trailing_breaks = read_line(parser, trailing_breaks) 2805 } 2806 } 2807 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2808 return false 2809 } 2810 } 2811 2812 // Check indentation level. 2813 if parser.flow_level == 0 && parser.mark.column < indent { 2814 break 2815 } 2816 } 2817 2818 // Create a token. 2819 *token = yaml_token_t{ 2820 typ: yaml_SCALAR_TOKEN, 2821 start_mark: start_mark, 2822 end_mark: end_mark, 2823 value: s, 2824 style: yaml_PLAIN_SCALAR_STYLE, 2825 } 2826 2827 // Note that we change the 'simple_key_allowed' flag. 2828 if leading_blanks { 2829 parser.simple_key_allowed = true 2830 } 2831 return true 2832 } 2833 2834 func yaml_parser_scan_line_comment(parser *yaml_parser_t, token_mark yaml_mark_t) bool { 2835 if parser.newlines > 0 { 2836 return true 2837 } 2838 2839 var start_mark yaml_mark_t 2840 var text []byte 2841 2842 for peek := 0; peek < 512; peek++ { 2843 if parser.unread < peek+1 && !yaml_parser_update_buffer(parser, peek+1) { 2844 break 2845 } 2846 if is_blank(parser.buffer, parser.buffer_pos+peek) { 2847 continue 2848 } 2849 if parser.buffer[parser.buffer_pos+peek] == '#' { 2850 seen := parser.mark.index+peek 2851 for { 2852 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2853 return false 2854 } 2855 if is_breakz(parser.buffer, parser.buffer_pos) { 2856 if parser.mark.index >= seen { 2857 break 2858 } 2859 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 2860 return false 2861 } 2862 skip_line(parser) 2863 } else if parser.mark.index >= seen { 2864 if len(text) == 0 { 2865 start_mark = parser.mark 2866 } 2867 text = read(parser, text) 2868 } else { 2869 skip(parser) 2870 } 2871 } 2872 } 2873 break 2874 } 2875 if len(text) > 0 { 2876 parser.comments = append(parser.comments, yaml_comment_t{ 2877 token_mark: token_mark, 2878 start_mark: start_mark, 2879 line: text, 2880 }) 2881 } 2882 return true 2883 } 2884 2885 func yaml_parser_scan_comments(parser *yaml_parser_t, scan_mark yaml_mark_t) bool { 2886 token := parser.tokens[len(parser.tokens)-1] 2887 2888 if token.typ == yaml_FLOW_ENTRY_TOKEN && len(parser.tokens) > 1 { 2889 token = parser.tokens[len(parser.tokens)-2] 2890 } 2891 2892 var token_mark = token.start_mark 2893 var start_mark yaml_mark_t 2894 var next_indent = parser.indent 2895 if next_indent < 0 { 2896 next_indent = 0 2897 } 2898 2899 var recent_empty = false 2900 var first_empty = parser.newlines <= 1 2901 2902 var line = parser.mark.line 2903 var column = parser.mark.column 2904 2905 var text []byte 2906 2907 // The foot line is the place where a comment must start to 2908 // still be considered as a foot of the prior content. 2909 // If there's some content in the currently parsed line, then 2910 // the foot is the line below it. 2911 var foot_line = -1 2912 if scan_mark.line > 0 { 2913 foot_line = parser.mark.line-parser.newlines+1 2914 if parser.newlines == 0 && parser.mark.column > 1 { 2915 foot_line++ 2916 } 2917 } 2918 2919 var peek = 0 2920 for ; peek < 512; peek++ { 2921 if parser.unread < peek+1 && !yaml_parser_update_buffer(parser, peek+1) { 2922 break 2923 } 2924 column++ 2925 if is_blank(parser.buffer, parser.buffer_pos+peek) { 2926 continue 2927 } 2928 c := parser.buffer[parser.buffer_pos+peek] 2929 var close_flow = parser.flow_level > 0 && (c == ']' || c == '}') 2930 if close_flow || is_breakz(parser.buffer, parser.buffer_pos+peek) { 2931 // Got line break or terminator. 2932 if close_flow || !recent_empty { 2933 if close_flow || first_empty && (start_mark.line == foot_line && token.typ != yaml_VALUE_TOKEN || start_mark.column-1 < next_indent) { 2934 // This is the first empty line and there were no empty lines before, 2935 // so this initial part of the comment is a foot of the prior token 2936 // instead of being a head for the following one. Split it up. 2937 // Alternatively, this might also be the last comment inside a flow 2938 // scope, so it must be a footer. 2939 if len(text) > 0 { 2940 if start_mark.column-1 < next_indent { 2941 // If dedented it's unrelated to the prior token. 2942 token_mark = start_mark 2943 } 2944 parser.comments = append(parser.comments, yaml_comment_t{ 2945 scan_mark: scan_mark, 2946 token_mark: token_mark, 2947 start_mark: start_mark, 2948 end_mark: yaml_mark_t{parser.mark.index + peek, line, column}, 2949 foot: text, 2950 }) 2951 scan_mark = yaml_mark_t{parser.mark.index + peek, line, column} 2952 token_mark = scan_mark 2953 text = nil 2954 } 2955 } else { 2956 if len(text) > 0 && parser.buffer[parser.buffer_pos+peek] != 0 { 2957 text = append(text, '\n') 2958 } 2959 } 2960 } 2961 if !is_break(parser.buffer, parser.buffer_pos+peek) { 2962 break 2963 } 2964 first_empty = false 2965 recent_empty = true 2966 column = 0 2967 line++ 2968 continue 2969 } 2970 2971 if len(text) > 0 && (close_flow || column-1 < next_indent && column != start_mark.column) { 2972 // The comment at the different indentation is a foot of the 2973 // preceding data rather than a head of the upcoming one. 2974 parser.comments = append(parser.comments, yaml_comment_t{ 2975 scan_mark: scan_mark, 2976 token_mark: token_mark, 2977 start_mark: start_mark, 2978 end_mark: yaml_mark_t{parser.mark.index + peek, line, column}, 2979 foot: text, 2980 }) 2981 scan_mark = yaml_mark_t{parser.mark.index + peek, line, column} 2982 token_mark = scan_mark 2983 text = nil 2984 } 2985 2986 if parser.buffer[parser.buffer_pos+peek] != '#' { 2987 break 2988 } 2989 2990 if len(text) == 0 { 2991 start_mark = yaml_mark_t{parser.mark.index + peek, line, column} 2992 } else { 2993 text = append(text, '\n') 2994 } 2995 2996 recent_empty = false 2997 2998 // Consume until after the consumed comment line. 2999 seen := parser.mark.index+peek 3000 for { 3001 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 3002 return false 3003 } 3004 if is_breakz(parser.buffer, parser.buffer_pos) { 3005 if parser.mark.index >= seen { 3006 break 3007 } 3008 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 3009 return false 3010 } 3011 skip_line(parser) 3012 } else if parser.mark.index >= seen { 3013 text = read(parser, text) 3014 } else { 3015 skip(parser) 3016 } 3017 } 3018 3019 peek = 0 3020 column = 0 3021 line = parser.mark.line 3022 next_indent = parser.indent 3023 if next_indent < 0 { 3024 next_indent = 0 3025 } 3026 } 3027 3028 if len(text) > 0 { 3029 parser.comments = append(parser.comments, yaml_comment_t{ 3030 scan_mark: scan_mark, 3031 token_mark: start_mark, 3032 start_mark: start_mark, 3033 end_mark: yaml_mark_t{parser.mark.index + peek - 1, line, column}, 3034 head: text, 3035 }) 3036 } 3037 return true 3038 }