raw_html.go (4381B)
1 package parser 2 3 import ( 4 "bytes" 5 "regexp" 6 7 "github.com/yuin/goldmark/ast" 8 "github.com/yuin/goldmark/text" 9 "github.com/yuin/goldmark/util" 10 ) 11 12 type rawHTMLParser struct { 13 } 14 15 var defaultRawHTMLParser = &rawHTMLParser{} 16 17 // NewRawHTMLParser return a new InlineParser that can parse 18 // inline htmls 19 func NewRawHTMLParser() InlineParser { 20 return defaultRawHTMLParser 21 } 22 23 func (s *rawHTMLParser) Trigger() []byte { 24 return []byte{'<'} 25 } 26 27 func (s *rawHTMLParser) Parse(parent ast.Node, block text.Reader, pc Context) ast.Node { 28 line, _ := block.PeekLine() 29 if len(line) > 1 && util.IsAlphaNumeric(line[1]) { 30 return s.parseMultiLineRegexp(openTagRegexp, block, pc) 31 } 32 if len(line) > 2 && line[1] == '/' && util.IsAlphaNumeric(line[2]) { 33 return s.parseMultiLineRegexp(closeTagRegexp, block, pc) 34 } 35 if bytes.HasPrefix(line, openComment) { 36 return s.parseComment(block, pc) 37 } 38 if bytes.HasPrefix(line, openProcessingInstruction) { 39 return s.parseUntil(block, closeProcessingInstruction, pc) 40 } 41 if len(line) > 2 && line[1] == '!' && line[2] >= 'A' && line[2] <= 'Z' { 42 return s.parseUntil(block, closeDecl, pc) 43 } 44 if bytes.HasPrefix(line, openCDATA) { 45 return s.parseUntil(block, closeCDATA, pc) 46 } 47 return nil 48 } 49 50 var tagnamePattern = `([A-Za-z][A-Za-z0-9-]*)` 51 52 var attributePattern = `(?:[\r\n \t]+[a-zA-Z_:][a-zA-Z0-9:._-]*(?:[\r\n \t]*=[\r\n \t]*(?:[^\"'=<>` + "`" + `\x00-\x20]+|'[^']*'|"[^"]*"))?)` 53 var openTagRegexp = regexp.MustCompile("^<" + tagnamePattern + attributePattern + `*[ \t]*/?>`) 54 var closeTagRegexp = regexp.MustCompile("^</" + tagnamePattern + `\s*>`) 55 56 var openProcessingInstruction = []byte("<?") 57 var closeProcessingInstruction = []byte("?>") 58 var openCDATA = []byte("<![CDATA[") 59 var closeCDATA = []byte("]]>") 60 var closeDecl = []byte(">") 61 var emptyComment = []byte("<!---->") 62 var invalidComment1 = []byte("<!-->") 63 var invalidComment2 = []byte("<!--->") 64 var openComment = []byte("<!--") 65 var closeComment = []byte("-->") 66 var doubleHyphen = []byte("--") 67 68 func (s *rawHTMLParser) parseComment(block text.Reader, pc Context) ast.Node { 69 savedLine, savedSegment := block.Position() 70 node := ast.NewRawHTML() 71 line, segment := block.PeekLine() 72 if bytes.HasPrefix(line, emptyComment) { 73 node.Segments.Append(segment.WithStop(segment.Start + len(emptyComment))) 74 block.Advance(len(emptyComment)) 75 return node 76 } 77 if bytes.HasPrefix(line, invalidComment1) || bytes.HasPrefix(line, invalidComment2) { 78 return nil 79 } 80 offset := len(openComment) 81 line = line[offset:] 82 for { 83 hindex := bytes.Index(line, doubleHyphen) 84 if hindex > -1 { 85 hindex += offset 86 } 87 index := bytes.Index(line, closeComment) + offset 88 if index > -1 && hindex == index { 89 if index == 0 || len(line) < 2 || line[index-offset-1] != '-' { 90 node.Segments.Append(segment.WithStop(segment.Start + index + len(closeComment))) 91 block.Advance(index + len(closeComment)) 92 return node 93 } 94 } 95 if hindex > 0 { 96 break 97 } 98 node.Segments.Append(segment) 99 block.AdvanceLine() 100 line, segment = block.PeekLine() 101 offset = 0 102 if line == nil { 103 break 104 } 105 } 106 block.SetPosition(savedLine, savedSegment) 107 return nil 108 } 109 110 func (s *rawHTMLParser) parseUntil(block text.Reader, closer []byte, pc Context) ast.Node { 111 savedLine, savedSegment := block.Position() 112 node := ast.NewRawHTML() 113 for { 114 line, segment := block.PeekLine() 115 if line == nil { 116 break 117 } 118 index := bytes.Index(line, closer) 119 if index > -1 { 120 node.Segments.Append(segment.WithStop(segment.Start + index + len(closer))) 121 block.Advance(index + len(closer)) 122 return node 123 } 124 node.Segments.Append(segment) 125 block.AdvanceLine() 126 } 127 block.SetPosition(savedLine, savedSegment) 128 return nil 129 } 130 131 func (s *rawHTMLParser) parseMultiLineRegexp(reg *regexp.Regexp, block text.Reader, pc Context) ast.Node { 132 sline, ssegment := block.Position() 133 if block.Match(reg) { 134 node := ast.NewRawHTML() 135 eline, esegment := block.Position() 136 block.SetPosition(sline, ssegment) 137 for { 138 line, segment := block.PeekLine() 139 if line == nil { 140 break 141 } 142 l, _ := block.Position() 143 start := segment.Start 144 if l == sline { 145 start = ssegment.Start 146 } 147 end := segment.Stop 148 if l == eline { 149 end = esegment.Start 150 } 151 152 node.Segments.Append(text.NewSegment(start, end)) 153 if l == eline { 154 block.Advance(end - start) 155 break 156 } else { 157 block.AdvanceLine() 158 } 159 } 160 return node 161 } 162 return nil 163 }