html_block.go (6356B)
1 package parser 2 3 import ( 4 "bytes" 5 "regexp" 6 "strings" 7 8 "github.com/yuin/goldmark/ast" 9 "github.com/yuin/goldmark/text" 10 "github.com/yuin/goldmark/util" 11 ) 12 13 var allowedBlockTags = map[string]bool{ 14 "address": true, 15 "article": true, 16 "aside": true, 17 "base": true, 18 "basefont": true, 19 "blockquote": true, 20 "body": true, 21 "caption": true, 22 "center": true, 23 "col": true, 24 "colgroup": true, 25 "dd": true, 26 "details": true, 27 "dialog": true, 28 "dir": true, 29 "div": true, 30 "dl": true, 31 "dt": true, 32 "fieldset": true, 33 "figcaption": true, 34 "figure": true, 35 "footer": true, 36 "form": true, 37 "frame": true, 38 "frameset": true, 39 "h1": true, 40 "h2": true, 41 "h3": true, 42 "h4": true, 43 "h5": true, 44 "h6": true, 45 "head": true, 46 "header": true, 47 "hr": true, 48 "html": true, 49 "iframe": true, 50 "legend": true, 51 "li": true, 52 "link": true, 53 "main": true, 54 "menu": true, 55 "menuitem": true, 56 "meta": true, 57 "nav": true, 58 "noframes": true, 59 "ol": true, 60 "optgroup": true, 61 "option": true, 62 "p": true, 63 "param": true, 64 "section": true, 65 "source": true, 66 "summary": true, 67 "table": true, 68 "tbody": true, 69 "td": true, 70 "tfoot": true, 71 "th": true, 72 "thead": true, 73 "title": true, 74 "tr": true, 75 "track": true, 76 "ul": true, 77 } 78 79 var htmlBlockType1OpenRegexp = regexp.MustCompile(`(?i)^[ ]{0,3}<(script|pre|style|textarea)(?:\s.*|>.*|/>.*|)(?:\r\n|\n)?$`) 80 var htmlBlockType1CloseRegexp = regexp.MustCompile(`(?i)^.*</(?:script|pre|style|textarea)>.*`) 81 82 var htmlBlockType2OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<!\-\-`) 83 var htmlBlockType2Close = []byte{'-', '-', '>'} 84 85 var htmlBlockType3OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<\?`) 86 var htmlBlockType3Close = []byte{'?', '>'} 87 88 var htmlBlockType4OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<![A-Z]+.*(?:\r\n|\n)?$`) 89 var htmlBlockType4Close = []byte{'>'} 90 91 var htmlBlockType5OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<\!\[CDATA\[`) 92 var htmlBlockType5Close = []byte{']', ']', '>'} 93 94 var htmlBlockType6Regexp = regexp.MustCompile(`^[ ]{0,3}<(?:/[ ]*)?([a-zA-Z]+[a-zA-Z0-9\-]*)(?:[ ].*|>.*|/>.*|)(?:\r\n|\n)?$`) 95 96 var htmlBlockType7Regexp = regexp.MustCompile(`^[ ]{0,3}<(/[ ]*)?([a-zA-Z]+[a-zA-Z0-9\-]*)(` + attributePattern + `*)[ ]*(?:>|/>)[ ]*(?:\r\n|\n)?$`) 97 98 type htmlBlockParser struct { 99 } 100 101 var defaultHTMLBlockParser = &htmlBlockParser{} 102 103 // NewHTMLBlockParser return a new BlockParser that can parse html 104 // blocks. 105 func NewHTMLBlockParser() BlockParser { 106 return defaultHTMLBlockParser 107 } 108 109 func (b *htmlBlockParser) Trigger() []byte { 110 return []byte{'<'} 111 } 112 113 func (b *htmlBlockParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) { 114 var node *ast.HTMLBlock 115 line, segment := reader.PeekLine() 116 last := pc.LastOpenedBlock().Node 117 if pos := pc.BlockOffset(); pos < 0 || line[pos] != '<' { 118 return nil, NoChildren 119 } 120 121 if m := htmlBlockType1OpenRegexp.FindSubmatchIndex(line); m != nil { 122 node = ast.NewHTMLBlock(ast.HTMLBlockType1) 123 } else if htmlBlockType2OpenRegexp.Match(line) { 124 node = ast.NewHTMLBlock(ast.HTMLBlockType2) 125 } else if htmlBlockType3OpenRegexp.Match(line) { 126 node = ast.NewHTMLBlock(ast.HTMLBlockType3) 127 } else if htmlBlockType4OpenRegexp.Match(line) { 128 node = ast.NewHTMLBlock(ast.HTMLBlockType4) 129 } else if htmlBlockType5OpenRegexp.Match(line) { 130 node = ast.NewHTMLBlock(ast.HTMLBlockType5) 131 } else if match := htmlBlockType7Regexp.FindSubmatchIndex(line); match != nil { 132 isCloseTag := match[2] > -1 && bytes.Equal(line[match[2]:match[3]], []byte("/")) 133 hasAttr := match[6] != match[7] 134 tagName := strings.ToLower(string(line[match[4]:match[5]])) 135 _, ok := allowedBlockTags[tagName] 136 if ok { 137 node = ast.NewHTMLBlock(ast.HTMLBlockType6) 138 } else if tagName != "script" && tagName != "style" && tagName != "pre" && !ast.IsParagraph(last) && !(isCloseTag && hasAttr) { // type 7 can not interrupt paragraph 139 node = ast.NewHTMLBlock(ast.HTMLBlockType7) 140 } 141 } 142 if node == nil { 143 if match := htmlBlockType6Regexp.FindSubmatchIndex(line); match != nil { 144 tagName := string(line[match[2]:match[3]]) 145 _, ok := allowedBlockTags[strings.ToLower(tagName)] 146 if ok { 147 node = ast.NewHTMLBlock(ast.HTMLBlockType6) 148 } 149 } 150 } 151 if node != nil { 152 reader.Advance(segment.Len() - util.TrimRightSpaceLength(line)) 153 node.Lines().Append(segment) 154 return node, NoChildren 155 } 156 return nil, NoChildren 157 } 158 159 func (b *htmlBlockParser) Continue(node ast.Node, reader text.Reader, pc Context) State { 160 htmlBlock := node.(*ast.HTMLBlock) 161 lines := htmlBlock.Lines() 162 line, segment := reader.PeekLine() 163 var closurePattern []byte 164 165 switch htmlBlock.HTMLBlockType { 166 case ast.HTMLBlockType1: 167 if lines.Len() == 1 { 168 firstLine := lines.At(0) 169 if htmlBlockType1CloseRegexp.Match(firstLine.Value(reader.Source())) { 170 return Close 171 } 172 } 173 if htmlBlockType1CloseRegexp.Match(line) { 174 htmlBlock.ClosureLine = segment 175 reader.Advance(segment.Len() - util.TrimRightSpaceLength(line)) 176 return Close 177 } 178 case ast.HTMLBlockType2: 179 closurePattern = htmlBlockType2Close 180 fallthrough 181 case ast.HTMLBlockType3: 182 if closurePattern == nil { 183 closurePattern = htmlBlockType3Close 184 } 185 fallthrough 186 case ast.HTMLBlockType4: 187 if closurePattern == nil { 188 closurePattern = htmlBlockType4Close 189 } 190 fallthrough 191 case ast.HTMLBlockType5: 192 if closurePattern == nil { 193 closurePattern = htmlBlockType5Close 194 } 195 196 if lines.Len() == 1 { 197 firstLine := lines.At(0) 198 if bytes.Contains(firstLine.Value(reader.Source()), closurePattern) { 199 return Close 200 } 201 } 202 if bytes.Contains(line, closurePattern) { 203 htmlBlock.ClosureLine = segment 204 reader.Advance(segment.Len()) 205 return Close 206 } 207 208 case ast.HTMLBlockType6, ast.HTMLBlockType7: 209 if util.IsBlank(line) { 210 return Close 211 } 212 } 213 node.Lines().Append(segment) 214 reader.Advance(segment.Len() - util.TrimRightSpaceLength(line)) 215 return Continue | NoChildren 216 } 217 218 func (b *htmlBlockParser) Close(node ast.Node, reader text.Reader, pc Context) { 219 // nothing to do 220 } 221 222 func (b *htmlBlockParser) CanInterruptParagraph() bool { 223 return true 224 } 225 226 func (b *htmlBlockParser) CanAcceptIndentedLine() bool { 227 return false 228 }