parser.go (34022B)
1 // Package parser contains stuff that are related to parsing a Markdown text. 2 package parser 3 4 import ( 5 "fmt" 6 "strings" 7 "sync" 8 9 "github.com/yuin/goldmark/ast" 10 "github.com/yuin/goldmark/text" 11 "github.com/yuin/goldmark/util" 12 ) 13 14 // A Reference interface represents a link reference in Markdown text. 15 type Reference interface { 16 // String implements Stringer. 17 String() string 18 19 // Label returns a label of the reference. 20 Label() []byte 21 22 // Destination returns a destination(URL) of the reference. 23 Destination() []byte 24 25 // Title returns a title of the reference. 26 Title() []byte 27 } 28 29 type reference struct { 30 label []byte 31 destination []byte 32 title []byte 33 } 34 35 // NewReference returns a new Reference. 36 func NewReference(label, destination, title []byte) Reference { 37 return &reference{label, destination, title} 38 } 39 40 func (r *reference) Label() []byte { 41 return r.label 42 } 43 44 func (r *reference) Destination() []byte { 45 return r.destination 46 } 47 48 func (r *reference) Title() []byte { 49 return r.title 50 } 51 52 func (r *reference) String() string { 53 return fmt.Sprintf("Reference{Label:%s, Destination:%s, Title:%s}", r.label, r.destination, r.title) 54 } 55 56 // An IDs interface is a collection of the element ids. 57 type IDs interface { 58 // Generate generates a new element id. 59 Generate(value []byte, kind ast.NodeKind) []byte 60 61 // Put puts a given element id to the used ids table. 62 Put(value []byte) 63 } 64 65 type ids struct { 66 values map[string]bool 67 } 68 69 func newIDs() IDs { 70 return &ids{ 71 values: map[string]bool{}, 72 } 73 } 74 75 func (s *ids) Generate(value []byte, kind ast.NodeKind) []byte { 76 value = util.TrimLeftSpace(value) 77 value = util.TrimRightSpace(value) 78 result := []byte{} 79 for i := 0; i < len(value); { 80 v := value[i] 81 l := util.UTF8Len(v) 82 i += int(l) 83 if l != 1 { 84 continue 85 } 86 if util.IsAlphaNumeric(v) { 87 if 'A' <= v && v <= 'Z' { 88 v += 'a' - 'A' 89 } 90 result = append(result, v) 91 } else if util.IsSpace(v) || v == '-' || v == '_' { 92 result = append(result, '-') 93 } 94 } 95 if len(result) == 0 { 96 if kind == ast.KindHeading { 97 result = []byte("heading") 98 } else { 99 result = []byte("id") 100 } 101 } 102 if _, ok := s.values[util.BytesToReadOnlyString(result)]; !ok { 103 s.values[util.BytesToReadOnlyString(result)] = true 104 return result 105 } 106 for i := 1; ; i++ { 107 newResult := fmt.Sprintf("%s-%d", result, i) 108 if _, ok := s.values[newResult]; !ok { 109 s.values[newResult] = true 110 return []byte(newResult) 111 } 112 113 } 114 } 115 116 func (s *ids) Put(value []byte) { 117 s.values[util.BytesToReadOnlyString(value)] = true 118 } 119 120 // ContextKey is a key that is used to set arbitrary values to the context. 121 type ContextKey int 122 123 // ContextKeyMax is a maximum value of the ContextKey. 124 var ContextKeyMax ContextKey 125 126 // NewContextKey return a new ContextKey value. 127 func NewContextKey() ContextKey { 128 ContextKeyMax++ 129 return ContextKeyMax 130 } 131 132 // A Context interface holds a information that are necessary to parse 133 // Markdown text. 134 type Context interface { 135 // String implements Stringer. 136 String() string 137 138 // Get returns a value associated with the given key. 139 Get(ContextKey) interface{} 140 141 // ComputeIfAbsent computes a value if a value associated with the given key is absent and returns the value. 142 ComputeIfAbsent(ContextKey, func() interface{}) interface{} 143 144 // Set sets the given value to the context. 145 Set(ContextKey, interface{}) 146 147 // AddReference adds the given reference to this context. 148 AddReference(Reference) 149 150 // Reference returns (a reference, true) if a reference associated with 151 // the given label exists, otherwise (nil, false). 152 Reference(label string) (Reference, bool) 153 154 // References returns a list of references. 155 References() []Reference 156 157 // IDs returns a collection of the element ids. 158 IDs() IDs 159 160 // BlockOffset returns a first non-space character position on current line. 161 // This value is valid only for BlockParser.Open. 162 // BlockOffset returns -1 if current line is blank. 163 BlockOffset() int 164 165 // BlockOffset sets a first non-space character position on current line. 166 // This value is valid only for BlockParser.Open. 167 SetBlockOffset(int) 168 169 // BlockIndent returns an indent width on current line. 170 // This value is valid only for BlockParser.Open. 171 // BlockIndent returns -1 if current line is blank. 172 BlockIndent() int 173 174 // BlockIndent sets an indent width on current line. 175 // This value is valid only for BlockParser.Open. 176 SetBlockIndent(int) 177 178 // FirstDelimiter returns a first delimiter of the current delimiter list. 179 FirstDelimiter() *Delimiter 180 181 // LastDelimiter returns a last delimiter of the current delimiter list. 182 LastDelimiter() *Delimiter 183 184 // PushDelimiter appends the given delimiter to the tail of the current 185 // delimiter list. 186 PushDelimiter(delimiter *Delimiter) 187 188 // RemoveDelimiter removes the given delimiter from the current delimiter list. 189 RemoveDelimiter(d *Delimiter) 190 191 // ClearDelimiters clears the current delimiter list. 192 ClearDelimiters(bottom ast.Node) 193 194 // OpenedBlocks returns a list of nodes that are currently in parsing. 195 OpenedBlocks() []Block 196 197 // SetOpenedBlocks sets a list of nodes that are currently in parsing. 198 SetOpenedBlocks([]Block) 199 200 // LastOpenedBlock returns a last node that is currently in parsing. 201 LastOpenedBlock() Block 202 203 // IsInLinkLabel returns true if current position seems to be in link label. 204 IsInLinkLabel() bool 205 } 206 207 // A ContextConfig struct is a data structure that holds configuration of the Context. 208 type ContextConfig struct { 209 IDs IDs 210 } 211 212 // An ContextOption is a functional option type for the Context. 213 type ContextOption func(*ContextConfig) 214 215 // WithIDs is a functional option for the Context. 216 func WithIDs(ids IDs) ContextOption { 217 return func(c *ContextConfig) { 218 c.IDs = ids 219 } 220 } 221 222 type parseContext struct { 223 store []interface{} 224 ids IDs 225 refs map[string]Reference 226 blockOffset int 227 blockIndent int 228 delimiters *Delimiter 229 lastDelimiter *Delimiter 230 openedBlocks []Block 231 } 232 233 // NewContext returns a new Context. 234 func NewContext(options ...ContextOption) Context { 235 cfg := &ContextConfig{ 236 IDs: newIDs(), 237 } 238 for _, option := range options { 239 option(cfg) 240 } 241 242 return &parseContext{ 243 store: make([]interface{}, ContextKeyMax+1), 244 refs: map[string]Reference{}, 245 ids: cfg.IDs, 246 blockOffset: -1, 247 blockIndent: -1, 248 delimiters: nil, 249 lastDelimiter: nil, 250 openedBlocks: []Block{}, 251 } 252 } 253 254 func (p *parseContext) Get(key ContextKey) interface{} { 255 return p.store[key] 256 } 257 258 func (p *parseContext) ComputeIfAbsent(key ContextKey, f func() interface{}) interface{} { 259 v := p.store[key] 260 if v == nil { 261 v = f() 262 p.store[key] = v 263 } 264 return v 265 } 266 267 func (p *parseContext) Set(key ContextKey, value interface{}) { 268 p.store[key] = value 269 } 270 271 func (p *parseContext) IDs() IDs { 272 return p.ids 273 } 274 275 func (p *parseContext) BlockOffset() int { 276 return p.blockOffset 277 } 278 279 func (p *parseContext) SetBlockOffset(v int) { 280 p.blockOffset = v 281 } 282 283 func (p *parseContext) BlockIndent() int { 284 return p.blockIndent 285 } 286 287 func (p *parseContext) SetBlockIndent(v int) { 288 p.blockIndent = v 289 } 290 291 func (p *parseContext) LastDelimiter() *Delimiter { 292 return p.lastDelimiter 293 } 294 295 func (p *parseContext) FirstDelimiter() *Delimiter { 296 return p.delimiters 297 } 298 299 func (p *parseContext) PushDelimiter(d *Delimiter) { 300 if p.delimiters == nil { 301 p.delimiters = d 302 p.lastDelimiter = d 303 } else { 304 l := p.lastDelimiter 305 p.lastDelimiter = d 306 l.NextDelimiter = d 307 d.PreviousDelimiter = l 308 } 309 } 310 311 func (p *parseContext) RemoveDelimiter(d *Delimiter) { 312 if d.PreviousDelimiter == nil { 313 p.delimiters = d.NextDelimiter 314 } else { 315 d.PreviousDelimiter.NextDelimiter = d.NextDelimiter 316 if d.NextDelimiter != nil { 317 d.NextDelimiter.PreviousDelimiter = d.PreviousDelimiter 318 } 319 } 320 if d.NextDelimiter == nil { 321 p.lastDelimiter = d.PreviousDelimiter 322 } 323 if p.delimiters != nil { 324 p.delimiters.PreviousDelimiter = nil 325 } 326 if p.lastDelimiter != nil { 327 p.lastDelimiter.NextDelimiter = nil 328 } 329 d.NextDelimiter = nil 330 d.PreviousDelimiter = nil 331 if d.Length != 0 { 332 ast.MergeOrReplaceTextSegment(d.Parent(), d, d.Segment) 333 } else { 334 d.Parent().RemoveChild(d.Parent(), d) 335 } 336 } 337 338 func (p *parseContext) ClearDelimiters(bottom ast.Node) { 339 if p.lastDelimiter == nil { 340 return 341 } 342 var c ast.Node 343 for c = p.lastDelimiter; c != nil && c != bottom; { 344 prev := c.PreviousSibling() 345 if d, ok := c.(*Delimiter); ok { 346 p.RemoveDelimiter(d) 347 } 348 c = prev 349 } 350 } 351 352 func (p *parseContext) AddReference(ref Reference) { 353 key := util.ToLinkReference(ref.Label()) 354 if _, ok := p.refs[key]; !ok { 355 p.refs[key] = ref 356 } 357 } 358 359 func (p *parseContext) Reference(label string) (Reference, bool) { 360 v, ok := p.refs[label] 361 return v, ok 362 } 363 364 func (p *parseContext) References() []Reference { 365 ret := make([]Reference, 0, len(p.refs)) 366 for _, v := range p.refs { 367 ret = append(ret, v) 368 } 369 return ret 370 } 371 372 func (p *parseContext) String() string { 373 refs := []string{} 374 for _, r := range p.refs { 375 refs = append(refs, r.String()) 376 } 377 378 return fmt.Sprintf("Context{Store:%#v, Refs:%s}", p.store, strings.Join(refs, ",")) 379 } 380 381 func (p *parseContext) OpenedBlocks() []Block { 382 return p.openedBlocks 383 } 384 385 func (p *parseContext) SetOpenedBlocks(v []Block) { 386 p.openedBlocks = v 387 } 388 389 func (p *parseContext) LastOpenedBlock() Block { 390 if l := len(p.openedBlocks); l != 0 { 391 return p.openedBlocks[l-1] 392 } 393 return Block{} 394 } 395 396 func (p *parseContext) IsInLinkLabel() bool { 397 tlist := p.Get(linkLabelStateKey) 398 return tlist != nil 399 } 400 401 // State represents parser's state. 402 // State is designed to use as a bit flag. 403 type State int 404 405 const ( 406 none State = 1 << iota 407 408 // Continue indicates parser can continue parsing. 409 Continue 410 411 // Close indicates parser cannot parse anymore. 412 Close 413 414 // HasChildren indicates parser may have child blocks. 415 HasChildren 416 417 // NoChildren indicates parser does not have child blocks. 418 NoChildren 419 420 // RequireParagraph indicates parser requires that the last node 421 // must be a paragraph and is not converted to other nodes by 422 // ParagraphTransformers. 423 RequireParagraph 424 ) 425 426 // A Config struct is a data structure that holds configuration of the Parser. 427 type Config struct { 428 Options map[OptionName]interface{} 429 BlockParsers util.PrioritizedSlice /*<BlockParser>*/ 430 InlineParsers util.PrioritizedSlice /*<InlineParser>*/ 431 ParagraphTransformers util.PrioritizedSlice /*<ParagraphTransformer>*/ 432 ASTTransformers util.PrioritizedSlice /*<ASTTransformer>*/ 433 EscapedSpace bool 434 } 435 436 // NewConfig returns a new Config. 437 func NewConfig() *Config { 438 return &Config{ 439 Options: map[OptionName]interface{}{}, 440 BlockParsers: util.PrioritizedSlice{}, 441 InlineParsers: util.PrioritizedSlice{}, 442 ParagraphTransformers: util.PrioritizedSlice{}, 443 ASTTransformers: util.PrioritizedSlice{}, 444 } 445 } 446 447 // An Option interface is a functional option type for the Parser. 448 type Option interface { 449 SetParserOption(*Config) 450 } 451 452 // OptionName is a name of parser options. 453 type OptionName string 454 455 // Attribute is an option name that spacify attributes of elements. 456 const optAttribute OptionName = "Attribute" 457 458 type withAttribute struct { 459 } 460 461 func (o *withAttribute) SetParserOption(c *Config) { 462 c.Options[optAttribute] = true 463 } 464 465 // WithAttribute is a functional option that enables custom attributes. 466 func WithAttribute() Option { 467 return &withAttribute{} 468 } 469 470 // A Parser interface parses Markdown text into AST nodes. 471 type Parser interface { 472 // Parse parses the given Markdown text into AST nodes. 473 Parse(reader text.Reader, opts ...ParseOption) ast.Node 474 475 // AddOption adds the given option to this parser. 476 AddOptions(...Option) 477 } 478 479 // A SetOptioner interface sets the given option to the object. 480 type SetOptioner interface { 481 // SetOption sets the given option to the object. 482 // Unacceptable options may be passed. 483 // Thus implementations must ignore unacceptable options. 484 SetOption(name OptionName, value interface{}) 485 } 486 487 // A BlockParser interface parses a block level element like Paragraph, List, 488 // Blockquote etc. 489 type BlockParser interface { 490 // Trigger returns a list of characters that triggers Parse method of 491 // this parser. 492 // If Trigger returns a nil, Open will be called with any lines. 493 Trigger() []byte 494 495 // Open parses the current line and returns a result of parsing. 496 // 497 // Open must not parse beyond the current line. 498 // If Open has been able to parse the current line, Open must advance a reader 499 // position by consumed byte length. 500 // 501 // If Open has not been able to parse the current line, Open should returns 502 // (nil, NoChildren). If Open has been able to parse the current line, Open 503 // should returns a new Block node and returns HasChildren or NoChildren. 504 Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) 505 506 // Continue parses the current line and returns a result of parsing. 507 // 508 // Continue must not parse beyond the current line. 509 // If Continue has been able to parse the current line, Continue must advance 510 // a reader position by consumed byte length. 511 // 512 // If Continue has not been able to parse the current line, Continue should 513 // returns Close. If Continue has been able to parse the current line, 514 // Continue should returns (Continue | NoChildren) or 515 // (Continue | HasChildren) 516 Continue(node ast.Node, reader text.Reader, pc Context) State 517 518 // Close will be called when the parser returns Close. 519 Close(node ast.Node, reader text.Reader, pc Context) 520 521 // CanInterruptParagraph returns true if the parser can interrupt paragraphs, 522 // otherwise false. 523 CanInterruptParagraph() bool 524 525 // CanAcceptIndentedLine returns true if the parser can open new node when 526 // the given line is being indented more than 3 spaces. 527 CanAcceptIndentedLine() bool 528 } 529 530 // An InlineParser interface parses an inline level element like CodeSpan, Link etc. 531 type InlineParser interface { 532 // Trigger returns a list of characters that triggers Parse method of 533 // this parser. 534 // Trigger characters must be a punctuation or a halfspace. 535 // Halfspaces triggers this parser when character is any spaces characters or 536 // a head of line 537 Trigger() []byte 538 539 // Parse parse the given block into an inline node. 540 // 541 // Parse can parse beyond the current line. 542 // If Parse has been able to parse the current line, it must advance a reader 543 // position by consumed byte length. 544 Parse(parent ast.Node, block text.Reader, pc Context) ast.Node 545 } 546 547 // A CloseBlocker interface is a callback function that will be 548 // called when block is closed in the inline parsing. 549 type CloseBlocker interface { 550 // CloseBlock will be called when a block is closed. 551 CloseBlock(parent ast.Node, block text.Reader, pc Context) 552 } 553 554 // A ParagraphTransformer transforms parsed Paragraph nodes. 555 // For example, link references are searched in parsed Paragraphs. 556 type ParagraphTransformer interface { 557 // Transform transforms the given paragraph. 558 Transform(node *ast.Paragraph, reader text.Reader, pc Context) 559 } 560 561 // ASTTransformer transforms entire Markdown document AST tree. 562 type ASTTransformer interface { 563 // Transform transforms the given AST tree. 564 Transform(node *ast.Document, reader text.Reader, pc Context) 565 } 566 567 // DefaultBlockParsers returns a new list of default BlockParsers. 568 // Priorities of default BlockParsers are: 569 // 570 // SetextHeadingParser, 100 571 // ThematicBreakParser, 200 572 // ListParser, 300 573 // ListItemParser, 400 574 // CodeBlockParser, 500 575 // ATXHeadingParser, 600 576 // FencedCodeBlockParser, 700 577 // BlockquoteParser, 800 578 // HTMLBlockParser, 900 579 // ParagraphParser, 1000 580 func DefaultBlockParsers() []util.PrioritizedValue { 581 return []util.PrioritizedValue{ 582 util.Prioritized(NewSetextHeadingParser(), 100), 583 util.Prioritized(NewThematicBreakParser(), 200), 584 util.Prioritized(NewListParser(), 300), 585 util.Prioritized(NewListItemParser(), 400), 586 util.Prioritized(NewCodeBlockParser(), 500), 587 util.Prioritized(NewATXHeadingParser(), 600), 588 util.Prioritized(NewFencedCodeBlockParser(), 700), 589 util.Prioritized(NewBlockquoteParser(), 800), 590 util.Prioritized(NewHTMLBlockParser(), 900), 591 util.Prioritized(NewParagraphParser(), 1000), 592 } 593 } 594 595 // DefaultInlineParsers returns a new list of default InlineParsers. 596 // Priorities of default InlineParsers are: 597 // 598 // CodeSpanParser, 100 599 // LinkParser, 200 600 // AutoLinkParser, 300 601 // RawHTMLParser, 400 602 // EmphasisParser, 500 603 func DefaultInlineParsers() []util.PrioritizedValue { 604 return []util.PrioritizedValue{ 605 util.Prioritized(NewCodeSpanParser(), 100), 606 util.Prioritized(NewLinkParser(), 200), 607 util.Prioritized(NewAutoLinkParser(), 300), 608 util.Prioritized(NewRawHTMLParser(), 400), 609 util.Prioritized(NewEmphasisParser(), 500), 610 } 611 } 612 613 // DefaultParagraphTransformers returns a new list of default ParagraphTransformers. 614 // Priorities of default ParagraphTransformers are: 615 // 616 // LinkReferenceParagraphTransformer, 100 617 func DefaultParagraphTransformers() []util.PrioritizedValue { 618 return []util.PrioritizedValue{ 619 util.Prioritized(LinkReferenceParagraphTransformer, 100), 620 } 621 } 622 623 // A Block struct holds a node and correspond parser pair. 624 type Block struct { 625 // Node is a BlockNode. 626 Node ast.Node 627 // Parser is a BlockParser. 628 Parser BlockParser 629 } 630 631 type parser struct { 632 options map[OptionName]interface{} 633 blockParsers [256][]BlockParser 634 freeBlockParsers []BlockParser 635 inlineParsers [256][]InlineParser 636 closeBlockers []CloseBlocker 637 paragraphTransformers []ParagraphTransformer 638 astTransformers []ASTTransformer 639 escapedSpace bool 640 config *Config 641 initSync sync.Once 642 } 643 644 type withBlockParsers struct { 645 value []util.PrioritizedValue 646 } 647 648 func (o *withBlockParsers) SetParserOption(c *Config) { 649 c.BlockParsers = append(c.BlockParsers, o.value...) 650 } 651 652 // WithBlockParsers is a functional option that allow you to add 653 // BlockParsers to the parser. 654 func WithBlockParsers(bs ...util.PrioritizedValue) Option { 655 return &withBlockParsers{bs} 656 } 657 658 type withInlineParsers struct { 659 value []util.PrioritizedValue 660 } 661 662 func (o *withInlineParsers) SetParserOption(c *Config) { 663 c.InlineParsers = append(c.InlineParsers, o.value...) 664 } 665 666 // WithInlineParsers is a functional option that allow you to add 667 // InlineParsers to the parser. 668 func WithInlineParsers(bs ...util.PrioritizedValue) Option { 669 return &withInlineParsers{bs} 670 } 671 672 type withParagraphTransformers struct { 673 value []util.PrioritizedValue 674 } 675 676 func (o *withParagraphTransformers) SetParserOption(c *Config) { 677 c.ParagraphTransformers = append(c.ParagraphTransformers, o.value...) 678 } 679 680 // WithParagraphTransformers is a functional option that allow you to add 681 // ParagraphTransformers to the parser. 682 func WithParagraphTransformers(ps ...util.PrioritizedValue) Option { 683 return &withParagraphTransformers{ps} 684 } 685 686 type withASTTransformers struct { 687 value []util.PrioritizedValue 688 } 689 690 func (o *withASTTransformers) SetParserOption(c *Config) { 691 c.ASTTransformers = append(c.ASTTransformers, o.value...) 692 } 693 694 // WithASTTransformers is a functional option that allow you to add 695 // ASTTransformers to the parser. 696 func WithASTTransformers(ps ...util.PrioritizedValue) Option { 697 return &withASTTransformers{ps} 698 } 699 700 type withEscapedSpace struct { 701 } 702 703 func (o *withEscapedSpace) SetParserOption(c *Config) { 704 c.EscapedSpace = true 705 } 706 707 // WithEscapedSpace is a functional option indicates that a '\' escaped half-space(0x20) should not trigger parsers. 708 func WithEscapedSpace() Option { 709 return &withEscapedSpace{} 710 } 711 712 type withOption struct { 713 name OptionName 714 value interface{} 715 } 716 717 func (o *withOption) SetParserOption(c *Config) { 718 c.Options[o.name] = o.value 719 } 720 721 // WithOption is a functional option that allow you to set 722 // an arbitrary option to the parser. 723 func WithOption(name OptionName, value interface{}) Option { 724 return &withOption{name, value} 725 } 726 727 // NewParser returns a new Parser with given options. 728 func NewParser(options ...Option) Parser { 729 config := NewConfig() 730 for _, opt := range options { 731 opt.SetParserOption(config) 732 } 733 734 p := &parser{ 735 options: map[OptionName]interface{}{}, 736 config: config, 737 } 738 739 return p 740 } 741 742 func (p *parser) AddOptions(opts ...Option) { 743 for _, opt := range opts { 744 opt.SetParserOption(p.config) 745 } 746 } 747 748 func (p *parser) addBlockParser(v util.PrioritizedValue, options map[OptionName]interface{}) { 749 bp, ok := v.Value.(BlockParser) 750 if !ok { 751 panic(fmt.Sprintf("%v is not a BlockParser", v.Value)) 752 } 753 tcs := bp.Trigger() 754 so, ok := v.Value.(SetOptioner) 755 if ok { 756 for oname, ovalue := range options { 757 so.SetOption(oname, ovalue) 758 } 759 } 760 if tcs == nil { 761 p.freeBlockParsers = append(p.freeBlockParsers, bp) 762 } else { 763 for _, tc := range tcs { 764 if p.blockParsers[tc] == nil { 765 p.blockParsers[tc] = []BlockParser{} 766 } 767 p.blockParsers[tc] = append(p.blockParsers[tc], bp) 768 } 769 } 770 } 771 772 func (p *parser) addInlineParser(v util.PrioritizedValue, options map[OptionName]interface{}) { 773 ip, ok := v.Value.(InlineParser) 774 if !ok { 775 panic(fmt.Sprintf("%v is not a InlineParser", v.Value)) 776 } 777 tcs := ip.Trigger() 778 so, ok := v.Value.(SetOptioner) 779 if ok { 780 for oname, ovalue := range options { 781 so.SetOption(oname, ovalue) 782 } 783 } 784 if cb, ok := ip.(CloseBlocker); ok { 785 p.closeBlockers = append(p.closeBlockers, cb) 786 } 787 for _, tc := range tcs { 788 if p.inlineParsers[tc] == nil { 789 p.inlineParsers[tc] = []InlineParser{} 790 } 791 p.inlineParsers[tc] = append(p.inlineParsers[tc], ip) 792 } 793 } 794 795 func (p *parser) addParagraphTransformer(v util.PrioritizedValue, options map[OptionName]interface{}) { 796 pt, ok := v.Value.(ParagraphTransformer) 797 if !ok { 798 panic(fmt.Sprintf("%v is not a ParagraphTransformer", v.Value)) 799 } 800 so, ok := v.Value.(SetOptioner) 801 if ok { 802 for oname, ovalue := range options { 803 so.SetOption(oname, ovalue) 804 } 805 } 806 p.paragraphTransformers = append(p.paragraphTransformers, pt) 807 } 808 809 func (p *parser) addASTTransformer(v util.PrioritizedValue, options map[OptionName]interface{}) { 810 at, ok := v.Value.(ASTTransformer) 811 if !ok { 812 panic(fmt.Sprintf("%v is not a ASTTransformer", v.Value)) 813 } 814 so, ok := v.Value.(SetOptioner) 815 if ok { 816 for oname, ovalue := range options { 817 so.SetOption(oname, ovalue) 818 } 819 } 820 p.astTransformers = append(p.astTransformers, at) 821 } 822 823 // A ParseConfig struct is a data structure that holds configuration of the Parser.Parse. 824 type ParseConfig struct { 825 Context Context 826 } 827 828 // A ParseOption is a functional option type for the Parser.Parse. 829 type ParseOption func(c *ParseConfig) 830 831 // WithContext is a functional option that allow you to override 832 // a default context. 833 func WithContext(context Context) ParseOption { 834 return func(c *ParseConfig) { 835 c.Context = context 836 } 837 } 838 839 func (p *parser) Parse(reader text.Reader, opts ...ParseOption) ast.Node { 840 p.initSync.Do(func() { 841 p.config.BlockParsers.Sort() 842 for _, v := range p.config.BlockParsers { 843 p.addBlockParser(v, p.config.Options) 844 } 845 for i := range p.blockParsers { 846 if p.blockParsers[i] != nil { 847 p.blockParsers[i] = append(p.blockParsers[i], p.freeBlockParsers...) 848 } 849 } 850 851 p.config.InlineParsers.Sort() 852 for _, v := range p.config.InlineParsers { 853 p.addInlineParser(v, p.config.Options) 854 } 855 p.config.ParagraphTransformers.Sort() 856 for _, v := range p.config.ParagraphTransformers { 857 p.addParagraphTransformer(v, p.config.Options) 858 } 859 p.config.ASTTransformers.Sort() 860 for _, v := range p.config.ASTTransformers { 861 p.addASTTransformer(v, p.config.Options) 862 } 863 p.escapedSpace = p.config.EscapedSpace 864 p.config = nil 865 }) 866 c := &ParseConfig{} 867 for _, opt := range opts { 868 opt(c) 869 } 870 if c.Context == nil { 871 c.Context = NewContext() 872 } 873 pc := c.Context 874 root := ast.NewDocument() 875 p.parseBlocks(root, reader, pc) 876 877 blockReader := text.NewBlockReader(reader.Source(), nil) 878 p.walkBlock(root, func(node ast.Node) { 879 p.parseBlock(blockReader, node, pc) 880 }) 881 for _, at := range p.astTransformers { 882 at.Transform(root, reader, pc) 883 } 884 // root.Dump(reader.Source(), 0) 885 return root 886 } 887 888 func (p *parser) transformParagraph(node *ast.Paragraph, reader text.Reader, pc Context) bool { 889 for _, pt := range p.paragraphTransformers { 890 pt.Transform(node, reader, pc) 891 if node.Parent() == nil { 892 return true 893 } 894 } 895 return false 896 } 897 898 func (p *parser) closeBlocks(from, to int, reader text.Reader, pc Context) { 899 blocks := pc.OpenedBlocks() 900 for i := from; i >= to; i-- { 901 node := blocks[i].Node 902 paragraph, ok := node.(*ast.Paragraph) 903 if ok && node.Parent() != nil { 904 p.transformParagraph(paragraph, reader, pc) 905 } 906 if node.Parent() != nil { // closes only if node has not been transformed 907 blocks[i].Parser.Close(blocks[i].Node, reader, pc) 908 } 909 } 910 if from == len(blocks)-1 { 911 blocks = blocks[0:to] 912 } else { 913 blocks = append(blocks[0:to], blocks[from+1:]...) 914 } 915 pc.SetOpenedBlocks(blocks) 916 } 917 918 type blockOpenResult int 919 920 const ( 921 paragraphContinuation blockOpenResult = iota + 1 922 newBlocksOpened 923 noBlocksOpened 924 ) 925 926 func (p *parser) openBlocks(parent ast.Node, blankLine bool, reader text.Reader, pc Context) blockOpenResult { 927 result := blockOpenResult(noBlocksOpened) 928 continuable := false 929 lastBlock := pc.LastOpenedBlock() 930 if lastBlock.Node != nil { 931 continuable = ast.IsParagraph(lastBlock.Node) 932 } 933 retry: 934 var bps []BlockParser 935 line, _ := reader.PeekLine() 936 w, pos := util.IndentWidth(line, reader.LineOffset()) 937 if w >= len(line) { 938 pc.SetBlockOffset(-1) 939 pc.SetBlockIndent(-1) 940 } else { 941 pc.SetBlockOffset(pos) 942 pc.SetBlockIndent(w) 943 } 944 if line == nil || line[0] == '\n' { 945 goto continuable 946 } 947 bps = p.freeBlockParsers 948 if pos < len(line) { 949 bps = p.blockParsers[line[pos]] 950 if bps == nil { 951 bps = p.freeBlockParsers 952 } 953 } 954 if bps == nil { 955 goto continuable 956 } 957 958 for _, bp := range bps { 959 if continuable && result == noBlocksOpened && !bp.CanInterruptParagraph() { 960 continue 961 } 962 if w > 3 && !bp.CanAcceptIndentedLine() { 963 continue 964 } 965 lastBlock = pc.LastOpenedBlock() 966 last := lastBlock.Node 967 node, state := bp.Open(parent, reader, pc) 968 if node != nil { 969 // Parser requires last node to be a paragraph. 970 // With table extension: 971 // 972 // 0 973 // -: 974 // - 975 // 976 // '-' on 3rd line seems a Setext heading because 1st and 2nd lines 977 // are being paragraph when the Settext heading parser tries to parse the 3rd 978 // line. 979 // But 1st line and 2nd line are a table. Thus this paragraph will be transformed 980 // by a paragraph transformer. So this text should be converted to a table and 981 // an empty list. 982 if state&RequireParagraph != 0 { 983 if last == parent.LastChild() { 984 // Opened paragraph may be transformed by ParagraphTransformers in 985 // closeBlocks(). 986 lastBlock.Parser.Close(last, reader, pc) 987 blocks := pc.OpenedBlocks() 988 pc.SetOpenedBlocks(blocks[0 : len(blocks)-1]) 989 if p.transformParagraph(last.(*ast.Paragraph), reader, pc) { 990 // Paragraph has been transformed. 991 // So this parser is considered as failing. 992 continuable = false 993 goto retry 994 } 995 } 996 } 997 node.SetBlankPreviousLines(blankLine) 998 if last != nil && last.Parent() == nil { 999 lastPos := len(pc.OpenedBlocks()) - 1 1000 p.closeBlocks(lastPos, lastPos, reader, pc) 1001 } 1002 parent.AppendChild(parent, node) 1003 result = newBlocksOpened 1004 be := Block{node, bp} 1005 pc.SetOpenedBlocks(append(pc.OpenedBlocks(), be)) 1006 if state&HasChildren != 0 { 1007 parent = node 1008 goto retry // try child block 1009 } 1010 break // no children, can not open more blocks on this line 1011 } 1012 } 1013 1014 continuable: 1015 if result == noBlocksOpened && continuable { 1016 state := lastBlock.Parser.Continue(lastBlock.Node, reader, pc) 1017 if state&Continue != 0 { 1018 result = paragraphContinuation 1019 } 1020 } 1021 return result 1022 } 1023 1024 type lineStat struct { 1025 lineNum int 1026 level int 1027 isBlank bool 1028 } 1029 1030 func isBlankLine(lineNum, level int, stats []lineStat) bool { 1031 ret := true 1032 for i := len(stats) - 1 - level; i >= 0; i-- { 1033 ret = false 1034 s := stats[i] 1035 if s.lineNum == lineNum { 1036 if s.level < level && s.isBlank { 1037 return true 1038 } else if s.level == level { 1039 return s.isBlank 1040 } 1041 } 1042 if s.lineNum < lineNum { 1043 return ret 1044 } 1045 } 1046 return ret 1047 } 1048 1049 func (p *parser) parseBlocks(parent ast.Node, reader text.Reader, pc Context) { 1050 pc.SetOpenedBlocks([]Block{}) 1051 blankLines := make([]lineStat, 0, 128) 1052 isBlank := false 1053 for { // process blocks separated by blank lines 1054 _, lines, ok := reader.SkipBlankLines() 1055 if !ok { 1056 return 1057 } 1058 lineNum, _ := reader.Position() 1059 if lines != 0 { 1060 blankLines = blankLines[0:0] 1061 l := len(pc.OpenedBlocks()) 1062 for i := 0; i < l; i++ { 1063 blankLines = append(blankLines, lineStat{lineNum - 1, i, lines != 0}) 1064 } 1065 } 1066 isBlank = isBlankLine(lineNum-1, 0, blankLines) 1067 // first, we try to open blocks 1068 if p.openBlocks(parent, isBlank, reader, pc) != newBlocksOpened { 1069 return 1070 } 1071 reader.AdvanceLine() 1072 for { // process opened blocks line by line 1073 openedBlocks := pc.OpenedBlocks() 1074 l := len(openedBlocks) 1075 if l == 0 { 1076 break 1077 } 1078 lastIndex := l - 1 1079 for i := 0; i < l; i++ { 1080 be := openedBlocks[i] 1081 line, _ := reader.PeekLine() 1082 if line == nil { 1083 p.closeBlocks(lastIndex, 0, reader, pc) 1084 reader.AdvanceLine() 1085 return 1086 } 1087 lineNum, _ := reader.Position() 1088 blankLines = append(blankLines, lineStat{lineNum, i, util.IsBlank(line)}) 1089 // If node is a paragraph, p.openBlocks determines whether it is continuable. 1090 // So we do not process paragraphs here. 1091 if !ast.IsParagraph(be.Node) { 1092 state := be.Parser.Continue(be.Node, reader, pc) 1093 if state&Continue != 0 { 1094 // When current node is a container block and has no children, 1095 // we try to open new child nodes 1096 if state&HasChildren != 0 && i == lastIndex { 1097 isBlank = isBlankLine(lineNum-1, i, blankLines) 1098 p.openBlocks(be.Node, isBlank, reader, pc) 1099 break 1100 } 1101 continue 1102 } 1103 } 1104 // current node may be closed or lazy continuation 1105 isBlank = isBlankLine(lineNum-1, i, blankLines) 1106 thisParent := parent 1107 if i != 0 { 1108 thisParent = openedBlocks[i-1].Node 1109 } 1110 lastNode := openedBlocks[lastIndex].Node 1111 result := p.openBlocks(thisParent, isBlank, reader, pc) 1112 if result != paragraphContinuation { 1113 // lastNode is a paragraph and was transformed by the paragraph 1114 // transformers. 1115 if openedBlocks[lastIndex].Node != lastNode { 1116 lastIndex-- 1117 } 1118 p.closeBlocks(lastIndex, i, reader, pc) 1119 } 1120 break 1121 } 1122 1123 reader.AdvanceLine() 1124 } 1125 } 1126 } 1127 1128 func (p *parser) walkBlock(block ast.Node, cb func(node ast.Node)) { 1129 for c := block.FirstChild(); c != nil; c = c.NextSibling() { 1130 p.walkBlock(c, cb) 1131 } 1132 cb(block) 1133 } 1134 1135 const ( 1136 lineBreakHard uint8 = 1 << iota 1137 lineBreakSoft 1138 lineBreakVisible 1139 ) 1140 1141 func (p *parser) parseBlock(block text.BlockReader, parent ast.Node, pc Context) { 1142 if parent.IsRaw() { 1143 return 1144 } 1145 escaped := false 1146 source := block.Source() 1147 block.Reset(parent.Lines()) 1148 for { 1149 retry: 1150 line, _ := block.PeekLine() 1151 if line == nil { 1152 break 1153 } 1154 lineLength := len(line) 1155 var lineBreakFlags uint8 = 0 1156 hasNewLine := line[lineLength-1] == '\n' 1157 if ((lineLength >= 3 && line[lineLength-2] == '\\' && line[lineLength-3] != '\\') || (lineLength == 2 && line[lineLength-2] == '\\')) && hasNewLine { // ends with \\n 1158 lineLength -= 2 1159 lineBreakFlags |= lineBreakHard | lineBreakVisible 1160 } else if ((lineLength >= 4 && line[lineLength-3] == '\\' && line[lineLength-2] == '\r' && line[lineLength-4] != '\\') || (lineLength == 3 && line[lineLength-3] == '\\' && line[lineLength-2] == '\r')) && hasNewLine { // ends with \\r\n 1161 lineLength -= 3 1162 lineBreakFlags |= lineBreakHard | lineBreakVisible 1163 } else if lineLength >= 3 && line[lineLength-3] == ' ' && line[lineLength-2] == ' ' && hasNewLine { // ends with [space][space]\n 1164 lineLength -= 3 1165 lineBreakFlags |= lineBreakHard 1166 } else if lineLength >= 4 && line[lineLength-4] == ' ' && line[lineLength-3] == ' ' && line[lineLength-2] == '\r' && hasNewLine { // ends with [space][space]\r\n 1167 lineLength -= 4 1168 lineBreakFlags |= lineBreakHard 1169 } else if hasNewLine { 1170 // If the line ends with a newline character, but it is not a hardlineBreak, then it is a softLinebreak 1171 // If the line ends with a hardlineBreak, then it cannot end with a softLinebreak 1172 // See https://spec.commonmark.org/0.30/#soft-line-breaks 1173 lineBreakFlags |= lineBreakSoft 1174 } 1175 1176 l, startPosition := block.Position() 1177 n := 0 1178 for i := 0; i < lineLength; i++ { 1179 c := line[i] 1180 if c == '\n' { 1181 break 1182 } 1183 isSpace := util.IsSpace(c) && c != '\r' && c != '\n' 1184 isPunct := util.IsPunct(c) 1185 if (isPunct && !escaped) || isSpace && !(escaped && p.escapedSpace) || i == 0 { 1186 parserChar := c 1187 if isSpace || (i == 0 && !isPunct) { 1188 parserChar = ' ' 1189 } 1190 ips := p.inlineParsers[parserChar] 1191 if ips != nil { 1192 block.Advance(n) 1193 n = 0 1194 savedLine, savedPosition := block.Position() 1195 if i != 0 { 1196 _, currentPosition := block.Position() 1197 ast.MergeOrAppendTextSegment(parent, startPosition.Between(currentPosition)) 1198 _, startPosition = block.Position() 1199 } 1200 var inlineNode ast.Node 1201 for _, ip := range ips { 1202 inlineNode = ip.Parse(parent, block, pc) 1203 if inlineNode != nil { 1204 break 1205 } 1206 block.SetPosition(savedLine, savedPosition) 1207 } 1208 if inlineNode != nil { 1209 parent.AppendChild(parent, inlineNode) 1210 goto retry 1211 } 1212 } 1213 } 1214 if escaped { 1215 escaped = false 1216 n++ 1217 continue 1218 } 1219 1220 if c == '\\' { 1221 escaped = true 1222 n++ 1223 continue 1224 } 1225 1226 escaped = false 1227 n++ 1228 } 1229 if n != 0 { 1230 block.Advance(n) 1231 } 1232 currentL, currentPosition := block.Position() 1233 if l != currentL { 1234 continue 1235 } 1236 diff := startPosition.Between(currentPosition) 1237 var text *ast.Text 1238 if lineBreakFlags&(lineBreakHard|lineBreakVisible) == lineBreakHard|lineBreakVisible { 1239 text = ast.NewTextSegment(diff) 1240 } else { 1241 text = ast.NewTextSegment(diff.TrimRightSpace(source)) 1242 } 1243 text.SetSoftLineBreak(lineBreakFlags&lineBreakSoft != 0) 1244 text.SetHardLineBreak(lineBreakFlags&lineBreakHard != 0) 1245 parent.AppendChild(parent, text) 1246 block.AdvanceLine() 1247 } 1248 1249 ProcessDelimiters(nil, pc) 1250 for _, ip := range p.closeBlockers { 1251 ip.CloseBlock(parent, block, pc) 1252 } 1253 }