buffer.go - gtsocial-umbx - Unnamed repository; edit this file 'description' to name the repository.

buffer.go (2997B)
      1 package html
      2 
      3 import (
      4 	"github.com/tdewolff/parse/v2"
      5 	"github.com/tdewolff/parse/v2/html"
      6 )
      7 
      8 // Token is a single token unit with an attribute value (if given) and hash of the data.
      9 type Token struct {
     10 	html.TokenType
     11 	Hash    Hash
     12 	Data    []byte
     13 	Text    []byte
     14 	AttrVal []byte
     15 	Traits  traits
     16 	Offset  int
     17 }
     18 
     19 // TokenBuffer is a buffer that allows for token look-ahead.
     20 type TokenBuffer struct {
     21 	r *parse.Input
     22 	l *html.Lexer
     23 
     24 	buf []Token
     25 	pos int
     26 
     27 	attrBuffer []*Token
     28 }
     29 
     30 // NewTokenBuffer returns a new TokenBuffer.
     31 func NewTokenBuffer(r *parse.Input, l *html.Lexer) *TokenBuffer {
     32 	return &TokenBuffer{
     33 		r:   r,
     34 		l:   l,
     35 		buf: make([]Token, 0, 8),
     36 	}
     37 }
     38 
     39 func (z *TokenBuffer) read(t *Token) {
     40 	t.Offset = z.r.Offset()
     41 	t.TokenType, t.Data = z.l.Next()
     42 	t.Text = z.l.Text()
     43 	if t.TokenType == html.AttributeToken {
     44 		t.Offset += 1 + len(t.Text) + 1
     45 		t.AttrVal = z.l.AttrVal()
     46 		if len(t.AttrVal) > 1 && (t.AttrVal[0] == '"' || t.AttrVal[0] == '\'') {
     47 			t.Offset++
     48 			t.AttrVal = t.AttrVal[1 : len(t.AttrVal)-1] // quotes will be readded in attribute loop if necessary
     49 		}
     50 		t.Hash = ToHash(t.Text)
     51 		t.Traits = attrMap[t.Hash]
     52 	} else if t.TokenType == html.StartTagToken || t.TokenType == html.EndTagToken {
     53 		t.AttrVal = nil
     54 		t.Hash = ToHash(t.Text)
     55 		t.Traits = tagMap[t.Hash] // zero if not exist
     56 	} else {
     57 		t.AttrVal = nil
     58 		t.Hash = 0
     59 		t.Traits = 0
     60 	}
     61 }
     62 
     63 // Peek returns the ith element and possibly does an allocation.
     64 // Peeking past an error will panic.
     65 func (z *TokenBuffer) Peek(pos int) *Token {
     66 	pos += z.pos
     67 	if pos >= len(z.buf) {
     68 		if len(z.buf) > 0 && z.buf[len(z.buf)-1].TokenType == html.ErrorToken {
     69 			return &z.buf[len(z.buf)-1]
     70 		}
     71 
     72 		c := cap(z.buf)
     73 		d := len(z.buf) - z.pos
     74 		p := pos - z.pos + 1 // required peek length
     75 		var buf []Token
     76 		if 2*p > c {
     77 			buf = make([]Token, 0, 2*c+p)
     78 		} else {
     79 			buf = z.buf
     80 		}
     81 		copy(buf[:d], z.buf[z.pos:])
     82 
     83 		buf = buf[:p]
     84 		pos -= z.pos
     85 		for i := d; i < p; i++ {
     86 			z.read(&buf[i])
     87 			if buf[i].TokenType == html.ErrorToken {
     88 				buf = buf[:i+1]
     89 				pos = i
     90 				break
     91 			}
     92 		}
     93 		z.pos, z.buf = 0, buf
     94 	}
     95 	return &z.buf[pos]
     96 }
     97 
     98 // Shift returns the first element and advances position.
     99 func (z *TokenBuffer) Shift() *Token {
    100 	if z.pos >= len(z.buf) {
    101 		t := &z.buf[:1][0]
    102 		z.read(t)
    103 		return t
    104 	}
    105 	t := &z.buf[z.pos]
    106 	z.pos++
    107 	return t
    108 }
    109 
    110 // Attributes extracts the gives attribute hashes from a tag.
    111 // It returns in the same order pointers to the requested token data or nil.
    112 func (z *TokenBuffer) Attributes(hashes ...Hash) []*Token {
    113 	n := 0
    114 	for {
    115 		if t := z.Peek(n); t.TokenType != html.AttributeToken {
    116 			break
    117 		}
    118 		n++
    119 	}
    120 	if len(hashes) > cap(z.attrBuffer) {
    121 		z.attrBuffer = make([]*Token, len(hashes))
    122 	} else {
    123 		z.attrBuffer = z.attrBuffer[:len(hashes)]
    124 		for i := range z.attrBuffer {
    125 			z.attrBuffer[i] = nil
    126 		}
    127 	}
    128 	for i := z.pos; i < z.pos+n; i++ {
    129 		attr := &z.buf[i]
    130 		for j, hash := range hashes {
    131 			if hash == attr.Hash {
    132 				z.attrBuffer[j] = attr
    133 			}
    134 		}
    135 	}
    136 	return z.attrBuffer
    137 }
	gtsocial-umbx Unnamed repository; edit this file 'description' to name the repository.
	Log \| Files \| Refs \| README \| LICENSE