gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

raw_html.go (4381B)


      1 package parser
      2 
      3 import (
      4 	"bytes"
      5 	"regexp"
      6 
      7 	"github.com/yuin/goldmark/ast"
      8 	"github.com/yuin/goldmark/text"
      9 	"github.com/yuin/goldmark/util"
     10 )
     11 
     12 type rawHTMLParser struct {
     13 }
     14 
     15 var defaultRawHTMLParser = &rawHTMLParser{}
     16 
     17 // NewRawHTMLParser return a new InlineParser that can parse
     18 // inline htmls
     19 func NewRawHTMLParser() InlineParser {
     20 	return defaultRawHTMLParser
     21 }
     22 
     23 func (s *rawHTMLParser) Trigger() []byte {
     24 	return []byte{'<'}
     25 }
     26 
     27 func (s *rawHTMLParser) Parse(parent ast.Node, block text.Reader, pc Context) ast.Node {
     28 	line, _ := block.PeekLine()
     29 	if len(line) > 1 && util.IsAlphaNumeric(line[1]) {
     30 		return s.parseMultiLineRegexp(openTagRegexp, block, pc)
     31 	}
     32 	if len(line) > 2 && line[1] == '/' && util.IsAlphaNumeric(line[2]) {
     33 		return s.parseMultiLineRegexp(closeTagRegexp, block, pc)
     34 	}
     35 	if bytes.HasPrefix(line, openComment) {
     36 		return s.parseComment(block, pc)
     37 	}
     38 	if bytes.HasPrefix(line, openProcessingInstruction) {
     39 		return s.parseUntil(block, closeProcessingInstruction, pc)
     40 	}
     41 	if len(line) > 2 && line[1] == '!' && line[2] >= 'A' && line[2] <= 'Z' {
     42 		return s.parseUntil(block, closeDecl, pc)
     43 	}
     44 	if bytes.HasPrefix(line, openCDATA) {
     45 		return s.parseUntil(block, closeCDATA, pc)
     46 	}
     47 	return nil
     48 }
     49 
     50 var tagnamePattern = `([A-Za-z][A-Za-z0-9-]*)`
     51 
     52 var attributePattern = `(?:[\r\n \t]+[a-zA-Z_:][a-zA-Z0-9:._-]*(?:[\r\n \t]*=[\r\n \t]*(?:[^\"'=<>` + "`" + `\x00-\x20]+|'[^']*'|"[^"]*"))?)`
     53 var openTagRegexp = regexp.MustCompile("^<" + tagnamePattern + attributePattern + `*[ \t]*/?>`)
     54 var closeTagRegexp = regexp.MustCompile("^</" + tagnamePattern + `\s*>`)
     55 
     56 var openProcessingInstruction = []byte("<?")
     57 var closeProcessingInstruction = []byte("?>")
     58 var openCDATA = []byte("<![CDATA[")
     59 var closeCDATA = []byte("]]>")
     60 var closeDecl = []byte(">")
     61 var emptyComment = []byte("<!---->")
     62 var invalidComment1 = []byte("<!-->")
     63 var invalidComment2 = []byte("<!--->")
     64 var openComment = []byte("<!--")
     65 var closeComment = []byte("-->")
     66 var doubleHyphen = []byte("--")
     67 
     68 func (s *rawHTMLParser) parseComment(block text.Reader, pc Context) ast.Node {
     69 	savedLine, savedSegment := block.Position()
     70 	node := ast.NewRawHTML()
     71 	line, segment := block.PeekLine()
     72 	if bytes.HasPrefix(line, emptyComment) {
     73 		node.Segments.Append(segment.WithStop(segment.Start + len(emptyComment)))
     74 		block.Advance(len(emptyComment))
     75 		return node
     76 	}
     77 	if bytes.HasPrefix(line, invalidComment1) || bytes.HasPrefix(line, invalidComment2) {
     78 		return nil
     79 	}
     80 	offset := len(openComment)
     81 	line = line[offset:]
     82 	for {
     83 		hindex := bytes.Index(line, doubleHyphen)
     84 		if hindex > -1 {
     85 			hindex += offset
     86 		}
     87 		index := bytes.Index(line, closeComment) + offset
     88 		if index > -1 && hindex == index {
     89 			if index == 0 || len(line) < 2 || line[index-offset-1] != '-' {
     90 				node.Segments.Append(segment.WithStop(segment.Start + index + len(closeComment)))
     91 				block.Advance(index + len(closeComment))
     92 				return node
     93 			}
     94 		}
     95 		if hindex > 0 {
     96 			break
     97 		}
     98 		node.Segments.Append(segment)
     99 		block.AdvanceLine()
    100 		line, segment = block.PeekLine()
    101 		offset = 0
    102 		if line == nil {
    103 			break
    104 		}
    105 	}
    106 	block.SetPosition(savedLine, savedSegment)
    107 	return nil
    108 }
    109 
    110 func (s *rawHTMLParser) parseUntil(block text.Reader, closer []byte, pc Context) ast.Node {
    111 	savedLine, savedSegment := block.Position()
    112 	node := ast.NewRawHTML()
    113 	for {
    114 		line, segment := block.PeekLine()
    115 		if line == nil {
    116 			break
    117 		}
    118 		index := bytes.Index(line, closer)
    119 		if index > -1 {
    120 			node.Segments.Append(segment.WithStop(segment.Start + index + len(closer)))
    121 			block.Advance(index + len(closer))
    122 			return node
    123 		}
    124 		node.Segments.Append(segment)
    125 		block.AdvanceLine()
    126 	}
    127 	block.SetPosition(savedLine, savedSegment)
    128 	return nil
    129 }
    130 
    131 func (s *rawHTMLParser) parseMultiLineRegexp(reg *regexp.Regexp, block text.Reader, pc Context) ast.Node {
    132 	sline, ssegment := block.Position()
    133 	if block.Match(reg) {
    134 		node := ast.NewRawHTML()
    135 		eline, esegment := block.Position()
    136 		block.SetPosition(sline, ssegment)
    137 		for {
    138 			line, segment := block.PeekLine()
    139 			if line == nil {
    140 				break
    141 			}
    142 			l, _ := block.Position()
    143 			start := segment.Start
    144 			if l == sline {
    145 				start = ssegment.Start
    146 			}
    147 			end := segment.Stop
    148 			if l == eline {
    149 				end = esegment.Start
    150 			}
    151 
    152 			node.Segments.Append(text.NewSegment(start, end))
    153 			if l == eline {
    154 				block.Advance(end - start)
    155 				break
    156 			} else {
    157 				block.AdvanceLine()
    158 			}
    159 		}
    160 		return node
    161 	}
    162 	return nil
    163 }