gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

util.go (28789B)


      1 // Package util provides utility functions for the goldmark.
      2 package util
      3 
      4 import (
      5 	"bytes"
      6 	"io"
      7 	"net/url"
      8 	"regexp"
      9 	"sort"
     10 	"strconv"
     11 	"unicode"
     12 	"unicode/utf8"
     13 )
     14 
     15 // A CopyOnWriteBuffer is a byte buffer that copies buffer when
     16 // it need to be changed.
     17 type CopyOnWriteBuffer struct {
     18 	buffer []byte
     19 	copied bool
     20 }
     21 
     22 // NewCopyOnWriteBuffer returns a new CopyOnWriteBuffer.
     23 func NewCopyOnWriteBuffer(buffer []byte) CopyOnWriteBuffer {
     24 	return CopyOnWriteBuffer{
     25 		buffer: buffer,
     26 		copied: false,
     27 	}
     28 }
     29 
     30 // Write writes given bytes to the buffer.
     31 // Write allocate new buffer and clears it at the first time.
     32 func (b *CopyOnWriteBuffer) Write(value []byte) {
     33 	if !b.copied {
     34 		b.buffer = make([]byte, 0, len(b.buffer)+20)
     35 		b.copied = true
     36 	}
     37 	b.buffer = append(b.buffer, value...)
     38 }
     39 
     40 // WriteString writes given string to the buffer.
     41 // WriteString allocate new buffer and clears it at the first time.
     42 func (b *CopyOnWriteBuffer) WriteString(value string) {
     43 	b.Write(StringToReadOnlyBytes(value))
     44 }
     45 
     46 // Append appends given bytes to the buffer.
     47 // Append copy buffer at the first time.
     48 func (b *CopyOnWriteBuffer) Append(value []byte) {
     49 	if !b.copied {
     50 		tmp := make([]byte, len(b.buffer), len(b.buffer)+20)
     51 		copy(tmp, b.buffer)
     52 		b.buffer = tmp
     53 		b.copied = true
     54 	}
     55 	b.buffer = append(b.buffer, value...)
     56 }
     57 
     58 // AppendString appends given string to the buffer.
     59 // AppendString copy buffer at the first time.
     60 func (b *CopyOnWriteBuffer) AppendString(value string) {
     61 	b.Append(StringToReadOnlyBytes(value))
     62 }
     63 
     64 // WriteByte writes the given byte to the buffer.
     65 // WriteByte allocate new buffer and clears it at the first time.
     66 func (b *CopyOnWriteBuffer) WriteByte(c byte) {
     67 	if !b.copied {
     68 		b.buffer = make([]byte, 0, len(b.buffer)+20)
     69 		b.copied = true
     70 	}
     71 	b.buffer = append(b.buffer, c)
     72 }
     73 
     74 // AppendByte appends given bytes to the buffer.
     75 // AppendByte copy buffer at the first time.
     76 func (b *CopyOnWriteBuffer) AppendByte(c byte) {
     77 	if !b.copied {
     78 		tmp := make([]byte, len(b.buffer), len(b.buffer)+20)
     79 		copy(tmp, b.buffer)
     80 		b.buffer = tmp
     81 		b.copied = true
     82 	}
     83 	b.buffer = append(b.buffer, c)
     84 }
     85 
     86 // Bytes returns bytes of this buffer.
     87 func (b *CopyOnWriteBuffer) Bytes() []byte {
     88 	return b.buffer
     89 }
     90 
     91 // IsCopied returns true if buffer has been copied, otherwise false.
     92 func (b *CopyOnWriteBuffer) IsCopied() bool {
     93 	return b.copied
     94 }
     95 
     96 // IsEscapedPunctuation returns true if character at a given index i
     97 // is an escaped punctuation, otherwise false.
     98 func IsEscapedPunctuation(source []byte, i int) bool {
     99 	return source[i] == '\\' && i < len(source)-1 && IsPunct(source[i+1])
    100 }
    101 
    102 // ReadWhile read the given source while pred is true.
    103 func ReadWhile(source []byte, index [2]int, pred func(byte) bool) (int, bool) {
    104 	j := index[0]
    105 	ok := false
    106 	for ; j < index[1]; j++ {
    107 		c1 := source[j]
    108 		if pred(c1) {
    109 			ok = true
    110 			continue
    111 		}
    112 		break
    113 	}
    114 	return j, ok
    115 }
    116 
    117 // IsBlank returns true if the given string is all space characters.
    118 func IsBlank(bs []byte) bool {
    119 	for _, b := range bs {
    120 		if !IsSpace(b) {
    121 			return false
    122 		}
    123 	}
    124 	return true
    125 }
    126 
    127 // VisualizeSpaces visualize invisible space characters.
    128 func VisualizeSpaces(bs []byte) []byte {
    129 	bs = bytes.Replace(bs, []byte(" "), []byte("[SPACE]"), -1)
    130 	bs = bytes.Replace(bs, []byte("\t"), []byte("[TAB]"), -1)
    131 	bs = bytes.Replace(bs, []byte("\n"), []byte("[NEWLINE]\n"), -1)
    132 	bs = bytes.Replace(bs, []byte("\r"), []byte("[CR]"), -1)
    133 	bs = bytes.Replace(bs, []byte("\v"), []byte("[VTAB]"), -1)
    134 	bs = bytes.Replace(bs, []byte("\x00"), []byte("[NUL]"), -1)
    135 	bs = bytes.Replace(bs, []byte("\ufffd"), []byte("[U+FFFD]"), -1)
    136 	return bs
    137 }
    138 
    139 // TabWidth calculates actual width of a tab at the given position.
    140 func TabWidth(currentPos int) int {
    141 	return 4 - currentPos%4
    142 }
    143 
    144 // IndentPosition searches an indent position with the given width for the given line.
    145 // If the line contains tab characters, paddings may be not zero.
    146 // currentPos==0 and width==2:
    147 //
    148 //     position: 0    1
    149 //               [TAB]aaaa
    150 //     width:    1234 5678
    151 //
    152 // width=2 is in the tab character. In this case, IndentPosition returns
    153 // (pos=1, padding=2)
    154 func IndentPosition(bs []byte, currentPos, width int) (pos, padding int) {
    155 	return IndentPositionPadding(bs, currentPos, 0, width)
    156 }
    157 
    158 // IndentPositionPadding searches an indent position with the given width for the given line.
    159 // This function is mostly same as IndentPosition except this function
    160 // takes account into additional paddings.
    161 func IndentPositionPadding(bs []byte, currentPos, paddingv, width int) (pos, padding int) {
    162 	if width == 0 {
    163 		return 0, paddingv
    164 	}
    165 	w := 0
    166 	i := 0
    167 	l := len(bs)
    168 	for ; i < l; i++ {
    169 		if bs[i] == '\t' && w < width {
    170 			w += TabWidth(currentPos + w)
    171 		} else if bs[i] == ' ' && w < width {
    172 			w++
    173 		} else {
    174 			break
    175 		}
    176 	}
    177 	if w >= width {
    178 		return i - paddingv, w - width
    179 	}
    180 	return -1, -1
    181 }
    182 
    183 // DedentPosition dedents lines by the given width.
    184 //
    185 // Deprecated: This function has bugs. Use util.IndentPositionPadding and util.FirstNonSpacePosition.
    186 func DedentPosition(bs []byte, currentPos, width int) (pos, padding int) {
    187 	if width == 0 {
    188 		return 0, 0
    189 	}
    190 	w := 0
    191 	l := len(bs)
    192 	i := 0
    193 	for ; i < l; i++ {
    194 		if bs[i] == '\t' {
    195 			w += TabWidth(currentPos + w)
    196 		} else if bs[i] == ' ' {
    197 			w++
    198 		} else {
    199 			break
    200 		}
    201 	}
    202 	if w >= width {
    203 		return i, w - width
    204 	}
    205 	return i, 0
    206 }
    207 
    208 // DedentPositionPadding dedents lines by the given width.
    209 // This function is mostly same as DedentPosition except this function
    210 // takes account into additional paddings.
    211 //
    212 // Deprecated: This function has bugs. Use util.IndentPositionPadding and util.FirstNonSpacePosition.
    213 func DedentPositionPadding(bs []byte, currentPos, paddingv, width int) (pos, padding int) {
    214 	if width == 0 {
    215 		return 0, paddingv
    216 	}
    217 
    218 	w := 0
    219 	i := 0
    220 	l := len(bs)
    221 	for ; i < l; i++ {
    222 		if bs[i] == '\t' {
    223 			w += TabWidth(currentPos + w)
    224 		} else if bs[i] == ' ' {
    225 			w++
    226 		} else {
    227 			break
    228 		}
    229 	}
    230 	if w >= width {
    231 		return i - paddingv, w - width
    232 	}
    233 	return i - paddingv, 0
    234 }
    235 
    236 // IndentWidth calculate an indent width for the given line.
    237 func IndentWidth(bs []byte, currentPos int) (width, pos int) {
    238 	l := len(bs)
    239 	for i := 0; i < l; i++ {
    240 		b := bs[i]
    241 		if b == ' ' {
    242 			width++
    243 			pos++
    244 		} else if b == '\t' {
    245 			width += TabWidth(currentPos + width)
    246 			pos++
    247 		} else {
    248 			break
    249 		}
    250 	}
    251 	return
    252 }
    253 
    254 // FirstNonSpacePosition returns a position line that is a first nonspace
    255 // character.
    256 func FirstNonSpacePosition(bs []byte) int {
    257 	i := 0
    258 	for ; i < len(bs); i++ {
    259 		c := bs[i]
    260 		if c == ' ' || c == '\t' {
    261 			continue
    262 		}
    263 		if c == '\n' {
    264 			return -1
    265 		}
    266 		return i
    267 	}
    268 	return -1
    269 }
    270 
    271 // FindClosure returns a position that closes the given opener.
    272 // If codeSpan is set true, it ignores characters in code spans.
    273 // If allowNesting is set true, closures correspond to nested opener will be
    274 // ignored.
    275 //
    276 // Deprecated: This function can not handle newlines. Many elements
    277 // can be existed over multiple lines(e.g. link labels).
    278 // Use text.Reader.FindClosure.
    279 func FindClosure(bs []byte, opener, closure byte, codeSpan, allowNesting bool) int {
    280 	i := 0
    281 	opened := 1
    282 	codeSpanOpener := 0
    283 	for i < len(bs) {
    284 		c := bs[i]
    285 		if codeSpan && codeSpanOpener != 0 && c == '`' {
    286 			codeSpanCloser := 0
    287 			for ; i < len(bs); i++ {
    288 				if bs[i] == '`' {
    289 					codeSpanCloser++
    290 				} else {
    291 					i--
    292 					break
    293 				}
    294 			}
    295 			if codeSpanCloser == codeSpanOpener {
    296 				codeSpanOpener = 0
    297 			}
    298 		} else if codeSpanOpener == 0 && c == '\\' && i < len(bs)-1 && IsPunct(bs[i+1]) {
    299 			i += 2
    300 			continue
    301 		} else if codeSpan && codeSpanOpener == 0 && c == '`' {
    302 			for ; i < len(bs); i++ {
    303 				if bs[i] == '`' {
    304 					codeSpanOpener++
    305 				} else {
    306 					i--
    307 					break
    308 				}
    309 			}
    310 		} else if (codeSpan && codeSpanOpener == 0) || !codeSpan {
    311 			if c == closure {
    312 				opened--
    313 				if opened == 0 {
    314 					return i
    315 				}
    316 			} else if c == opener {
    317 				if !allowNesting {
    318 					return -1
    319 				}
    320 				opened++
    321 			}
    322 		}
    323 		i++
    324 	}
    325 	return -1
    326 }
    327 
    328 // TrimLeft trims characters in the given s from head of the source.
    329 // bytes.TrimLeft offers same functionalities, but bytes.TrimLeft
    330 // allocates new buffer for the result.
    331 func TrimLeft(source, b []byte) []byte {
    332 	i := 0
    333 	for ; i < len(source); i++ {
    334 		c := source[i]
    335 		found := false
    336 		for j := 0; j < len(b); j++ {
    337 			if c == b[j] {
    338 				found = true
    339 				break
    340 			}
    341 		}
    342 		if !found {
    343 			break
    344 		}
    345 	}
    346 	return source[i:]
    347 }
    348 
    349 // TrimRight trims characters in the given s from tail of the source.
    350 func TrimRight(source, b []byte) []byte {
    351 	i := len(source) - 1
    352 	for ; i >= 0; i-- {
    353 		c := source[i]
    354 		found := false
    355 		for j := 0; j < len(b); j++ {
    356 			if c == b[j] {
    357 				found = true
    358 				break
    359 			}
    360 		}
    361 		if !found {
    362 			break
    363 		}
    364 	}
    365 	return source[:i+1]
    366 }
    367 
    368 // TrimLeftLength returns a length of leading specified characters.
    369 func TrimLeftLength(source, s []byte) int {
    370 	return len(source) - len(TrimLeft(source, s))
    371 }
    372 
    373 // TrimRightLength returns a length of trailing specified characters.
    374 func TrimRightLength(source, s []byte) int {
    375 	return len(source) - len(TrimRight(source, s))
    376 }
    377 
    378 // TrimLeftSpaceLength returns a length of leading space characters.
    379 func TrimLeftSpaceLength(source []byte) int {
    380 	i := 0
    381 	for ; i < len(source); i++ {
    382 		if !IsSpace(source[i]) {
    383 			break
    384 		}
    385 	}
    386 	return i
    387 }
    388 
    389 // TrimRightSpaceLength returns a length of trailing space characters.
    390 func TrimRightSpaceLength(source []byte) int {
    391 	l := len(source)
    392 	i := l - 1
    393 	for ; i >= 0; i-- {
    394 		if !IsSpace(source[i]) {
    395 			break
    396 		}
    397 	}
    398 	if i < 0 {
    399 		return l
    400 	}
    401 	return l - 1 - i
    402 }
    403 
    404 // TrimLeftSpace returns a subslice of the given string by slicing off all leading
    405 // space characters.
    406 func TrimLeftSpace(source []byte) []byte {
    407 	return TrimLeft(source, spaces)
    408 }
    409 
    410 // TrimRightSpace returns a subslice of the given string by slicing off all trailing
    411 // space characters.
    412 func TrimRightSpace(source []byte) []byte {
    413 	return TrimRight(source, spaces)
    414 }
    415 
    416 // DoFullUnicodeCaseFolding performs full unicode case folding to given bytes.
    417 func DoFullUnicodeCaseFolding(v []byte) []byte {
    418 	var rbuf []byte
    419 	cob := NewCopyOnWriteBuffer(v)
    420 	n := 0
    421 	for i := 0; i < len(v); i++ {
    422 		c := v[i]
    423 		if c < 0xb5 {
    424 			if c >= 0x41 && c <= 0x5a {
    425 				// A-Z to a-z
    426 				cob.Write(v[n:i])
    427 				cob.WriteByte(c + 32)
    428 				n = i + 1
    429 			}
    430 			continue
    431 		}
    432 
    433 		if !utf8.RuneStart(c) {
    434 			continue
    435 		}
    436 		r, length := utf8.DecodeRune(v[i:])
    437 		if r == utf8.RuneError {
    438 			continue
    439 		}
    440 		folded, ok := unicodeCaseFoldings[r]
    441 		if !ok {
    442 			continue
    443 		}
    444 
    445 		cob.Write(v[n:i])
    446 		if rbuf == nil {
    447 			rbuf = make([]byte, 4)
    448 		}
    449 		for _, f := range folded {
    450 			l := utf8.EncodeRune(rbuf, f)
    451 			cob.Write(rbuf[:l])
    452 		}
    453 		i += length - 1
    454 		n = i + 1
    455 	}
    456 	if cob.IsCopied() {
    457 		cob.Write(v[n:])
    458 	}
    459 	return cob.Bytes()
    460 }
    461 
    462 // ReplaceSpaces replaces sequence of spaces with the given repl.
    463 func ReplaceSpaces(source []byte, repl byte) []byte {
    464 	var ret []byte
    465 	start := -1
    466 	for i, c := range source {
    467 		iss := IsSpace(c)
    468 		if start < 0 && iss {
    469 			start = i
    470 			continue
    471 		} else if start >= 0 && iss {
    472 			continue
    473 		} else if start >= 0 {
    474 			if ret == nil {
    475 				ret = make([]byte, 0, len(source))
    476 				ret = append(ret, source[:start]...)
    477 			}
    478 			ret = append(ret, repl)
    479 			start = -1
    480 		}
    481 		if ret != nil {
    482 			ret = append(ret, c)
    483 		}
    484 	}
    485 	if start >= 0 && ret != nil {
    486 		ret = append(ret, repl)
    487 	}
    488 	if ret == nil {
    489 		return source
    490 	}
    491 	return ret
    492 }
    493 
    494 // ToRune decode given bytes start at pos and returns a rune.
    495 func ToRune(source []byte, pos int) rune {
    496 	i := pos
    497 	for ; i >= 0; i-- {
    498 		if utf8.RuneStart(source[i]) {
    499 			break
    500 		}
    501 	}
    502 	r, _ := utf8.DecodeRune(source[i:])
    503 	return r
    504 }
    505 
    506 // ToValidRune returns 0xFFFD if the given rune is invalid, otherwise v.
    507 func ToValidRune(v rune) rune {
    508 	if v == 0 || !utf8.ValidRune(v) {
    509 		return rune(0xFFFD)
    510 	}
    511 	return v
    512 }
    513 
    514 // ToLinkReference converts given bytes into a valid link reference string.
    515 // ToLinkReference performs unicode case folding, trims leading and trailing spaces,  converts into lower
    516 // case and replace spaces with a single space character.
    517 func ToLinkReference(v []byte) string {
    518 	v = TrimLeftSpace(v)
    519 	v = TrimRightSpace(v)
    520 	v = DoFullUnicodeCaseFolding(v)
    521 	return string(ReplaceSpaces(v, ' '))
    522 }
    523 
    524 var htmlEscapeTable = [256][]byte{nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, []byte("&quot;"), nil, nil, nil, []byte("&amp;"), nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, []byte("&lt;"), nil, []byte("&gt;"), nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil}
    525 
    526 // EscapeHTMLByte returns HTML escaped bytes if the given byte should be escaped,
    527 // otherwise nil.
    528 func EscapeHTMLByte(b byte) []byte {
    529 	return htmlEscapeTable[b]
    530 }
    531 
    532 // EscapeHTML escapes characters that should be escaped in HTML text.
    533 func EscapeHTML(v []byte) []byte {
    534 	cob := NewCopyOnWriteBuffer(v)
    535 	n := 0
    536 	for i := 0; i < len(v); i++ {
    537 		c := v[i]
    538 		escaped := htmlEscapeTable[c]
    539 		if escaped != nil {
    540 			cob.Write(v[n:i])
    541 			cob.Write(escaped)
    542 			n = i + 1
    543 		}
    544 	}
    545 	if cob.IsCopied() {
    546 		cob.Write(v[n:])
    547 	}
    548 	return cob.Bytes()
    549 }
    550 
    551 // UnescapePunctuations unescapes blackslash escaped punctuations.
    552 func UnescapePunctuations(source []byte) []byte {
    553 	cob := NewCopyOnWriteBuffer(source)
    554 	limit := len(source)
    555 	n := 0
    556 	for i := 0; i < limit; {
    557 		c := source[i]
    558 		if i < limit-1 && c == '\\' && IsPunct(source[i+1]) {
    559 			cob.Write(source[n:i])
    560 			cob.WriteByte(source[i+1])
    561 			i += 2
    562 			n = i
    563 			continue
    564 		}
    565 		i++
    566 	}
    567 	if cob.IsCopied() {
    568 		cob.Write(source[n:])
    569 	}
    570 	return cob.Bytes()
    571 }
    572 
    573 // ResolveNumericReferences resolve numeric references like '&#1234;" .
    574 func ResolveNumericReferences(source []byte) []byte {
    575 	cob := NewCopyOnWriteBuffer(source)
    576 	buf := make([]byte, 6, 6)
    577 	limit := len(source)
    578 	ok := false
    579 	n := 0
    580 	for i := 0; i < limit; i++ {
    581 		if source[i] == '&' {
    582 			pos := i
    583 			next := i + 1
    584 			if next < limit && source[next] == '#' {
    585 				nnext := next + 1
    586 				if nnext < limit {
    587 					nc := source[nnext]
    588 					// code point like #x22;
    589 					if nnext < limit && nc == 'x' || nc == 'X' {
    590 						start := nnext + 1
    591 						i, ok = ReadWhile(source, [2]int{start, limit}, IsHexDecimal)
    592 						if ok && i < limit && source[i] == ';' {
    593 							v, _ := strconv.ParseUint(BytesToReadOnlyString(source[start:i]), 16, 32)
    594 							cob.Write(source[n:pos])
    595 							n = i + 1
    596 							runeSize := utf8.EncodeRune(buf, ToValidRune(rune(v)))
    597 							cob.Write(buf[:runeSize])
    598 							continue
    599 						}
    600 						// code point like #1234;
    601 					} else if nc >= '0' && nc <= '9' {
    602 						start := nnext
    603 						i, ok = ReadWhile(source, [2]int{start, limit}, IsNumeric)
    604 						if ok && i < limit && i-start < 8 && source[i] == ';' {
    605 							v, _ := strconv.ParseUint(BytesToReadOnlyString(source[start:i]), 0, 32)
    606 							cob.Write(source[n:pos])
    607 							n = i + 1
    608 							runeSize := utf8.EncodeRune(buf, ToValidRune(rune(v)))
    609 							cob.Write(buf[:runeSize])
    610 							continue
    611 						}
    612 					}
    613 				}
    614 			}
    615 			i = next - 1
    616 		}
    617 	}
    618 	if cob.IsCopied() {
    619 		cob.Write(source[n:])
    620 	}
    621 	return cob.Bytes()
    622 }
    623 
    624 // ResolveEntityNames resolve entity references like '&ouml;" .
    625 func ResolveEntityNames(source []byte) []byte {
    626 	cob := NewCopyOnWriteBuffer(source)
    627 	limit := len(source)
    628 	ok := false
    629 	n := 0
    630 	for i := 0; i < limit; i++ {
    631 		if source[i] == '&' {
    632 			pos := i
    633 			next := i + 1
    634 			if !(next < limit && source[next] == '#') {
    635 				start := next
    636 				i, ok = ReadWhile(source, [2]int{start, limit}, IsAlphaNumeric)
    637 				if ok && i < limit && source[i] == ';' {
    638 					name := BytesToReadOnlyString(source[start:i])
    639 					entity, ok := LookUpHTML5EntityByName(name)
    640 					if ok {
    641 						cob.Write(source[n:pos])
    642 						n = i + 1
    643 						cob.Write(entity.Characters)
    644 						continue
    645 					}
    646 				}
    647 			}
    648 			i = next - 1
    649 		}
    650 	}
    651 	if cob.IsCopied() {
    652 		cob.Write(source[n:])
    653 	}
    654 	return cob.Bytes()
    655 }
    656 
    657 var htmlSpace = []byte("%20")
    658 
    659 // URLEscape escape the given URL.
    660 // If resolveReference is set true:
    661 //   1. unescape punctuations
    662 //   2. resolve numeric references
    663 //   3. resolve entity references
    664 //
    665 // URL encoded values (%xx) are kept as is.
    666 func URLEscape(v []byte, resolveReference bool) []byte {
    667 	if resolveReference {
    668 		v = UnescapePunctuations(v)
    669 		v = ResolveNumericReferences(v)
    670 		v = ResolveEntityNames(v)
    671 	}
    672 	cob := NewCopyOnWriteBuffer(v)
    673 	limit := len(v)
    674 	n := 0
    675 
    676 	for i := 0; i < limit; {
    677 		c := v[i]
    678 		if urlEscapeTable[c] == 1 {
    679 			i++
    680 			continue
    681 		}
    682 		if c == '%' && i+2 < limit && IsHexDecimal(v[i+1]) && IsHexDecimal(v[i+1]) {
    683 			i += 3
    684 			continue
    685 		}
    686 		u8len := utf8lenTable[c]
    687 		if u8len == 99 { // invalid utf8 leading byte, skip it
    688 			i++
    689 			continue
    690 		}
    691 		if c == ' ' {
    692 			cob.Write(v[n:i])
    693 			cob.Write(htmlSpace)
    694 			i++
    695 			n = i
    696 			continue
    697 		}
    698 		if int(u8len) > len(v) {
    699 			u8len = int8(len(v) - 1)
    700 		}
    701 		if u8len == 0 {
    702 			i++
    703 			n = i
    704 			continue
    705 		}
    706 		cob.Write(v[n:i])
    707 		stop := i + int(u8len)
    708 		if stop > len(v) {
    709 			i++
    710 			n = i
    711 			continue
    712 		}
    713 		cob.Write(StringToReadOnlyBytes(url.QueryEscape(string(v[i:stop]))))
    714 		i += int(u8len)
    715 		n = i
    716 	}
    717 	if cob.IsCopied() && n < limit {
    718 		cob.Write(v[n:])
    719 	}
    720 	return cob.Bytes()
    721 }
    722 
    723 // FindURLIndex returns a stop index value if the given bytes seem an URL.
    724 // This function is equivalent to [A-Za-z][A-Za-z0-9.+-]{1,31}:[^<>\x00-\x20]* .
    725 func FindURLIndex(b []byte) int {
    726 	i := 0
    727 	if !(len(b) > 0 && urlTable[b[i]]&7 == 7) {
    728 		return -1
    729 	}
    730 	i++
    731 	for ; i < len(b); i++ {
    732 		c := b[i]
    733 		if urlTable[c]&4 != 4 {
    734 			break
    735 		}
    736 	}
    737 	if i == 1 || i > 33 || i >= len(b) {
    738 		return -1
    739 	}
    740 	if b[i] != ':' {
    741 		return -1
    742 	}
    743 	i++
    744 	for ; i < len(b); i++ {
    745 		c := b[i]
    746 		if urlTable[c]&1 != 1 {
    747 			break
    748 		}
    749 	}
    750 	return i
    751 }
    752 
    753 var emailDomainRegexp = regexp.MustCompile(`^[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*`)
    754 
    755 // FindEmailIndex returns a stop index value if the given bytes seem an email address.
    756 func FindEmailIndex(b []byte) int {
    757 	// TODO: eliminate regexps
    758 	i := 0
    759 	for ; i < len(b); i++ {
    760 		c := b[i]
    761 		if emailTable[c]&1 != 1 {
    762 			break
    763 		}
    764 	}
    765 	if i == 0 {
    766 		return -1
    767 	}
    768 	if i >= len(b) || b[i] != '@' {
    769 		return -1
    770 	}
    771 	i++
    772 	if i >= len(b) {
    773 		return -1
    774 	}
    775 	match := emailDomainRegexp.FindSubmatchIndex(b[i:])
    776 	if match == nil {
    777 		return -1
    778 	}
    779 	return i + match[1]
    780 }
    781 
    782 var spaces = []byte(" \t\n\x0b\x0c\x0d")
    783 
    784 var spaceTable = [256]int8{0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
    785 
    786 var punctTable = [256]int8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
    787 
    788 // a-zA-Z0-9, ;/?:@&=+$,-_.!~*'()#
    789 var urlEscapeTable = [256]int8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
    790 
    791 var utf8lenTable = [256]int8{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 99, 99, 99, 99, 99, 99, 99, 99}
    792 
    793 var urlTable = [256]uint8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 5, 5, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 1, 0, 1, 1, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 1, 1, 1, 1, 1, 1, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}
    794 
    795 var emailTable = [256]uint8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
    796 
    797 // UTF8Len returns a byte length of the utf-8 character.
    798 func UTF8Len(b byte) int8 {
    799 	return utf8lenTable[b]
    800 }
    801 
    802 // IsPunct returns true if the given character is a punctuation, otherwise false.
    803 func IsPunct(c byte) bool {
    804 	return punctTable[c] == 1
    805 }
    806 
    807 // IsPunctRune returns true if the given rune is a punctuation, otherwise false.
    808 func IsPunctRune(r rune) bool {
    809 	return int32(r) <= 256 && IsPunct(byte(r)) || unicode.IsPunct(r)
    810 }
    811 
    812 // IsSpace returns true if the given character is a space, otherwise false.
    813 func IsSpace(c byte) bool {
    814 	return spaceTable[c] == 1
    815 }
    816 
    817 // IsSpaceRune returns true if the given rune is a space, otherwise false.
    818 func IsSpaceRune(r rune) bool {
    819 	return int32(r) <= 256 && IsSpace(byte(r)) || unicode.IsSpace(r)
    820 }
    821 
    822 // IsNumeric returns true if the given character is a numeric, otherwise false.
    823 func IsNumeric(c byte) bool {
    824 	return c >= '0' && c <= '9'
    825 }
    826 
    827 // IsHexDecimal returns true if the given character is a hexdecimal, otherwise false.
    828 func IsHexDecimal(c byte) bool {
    829 	return c >= '0' && c <= '9' || c >= 'a' && c <= 'f' || c >= 'A' && c <= 'F'
    830 }
    831 
    832 // IsAlphaNumeric returns true if the given character is a alphabet or a numeric, otherwise false.
    833 func IsAlphaNumeric(c byte) bool {
    834 	return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9'
    835 }
    836 
    837 // IsEastAsianWideRune returns trhe if the given rune is an east asian wide character, otherwise false.
    838 func IsEastAsianWideRune(r rune) bool {
    839 	return unicode.Is(unicode.Hiragana, r) ||
    840 		unicode.Is(unicode.Katakana, r) ||
    841 		unicode.Is(unicode.Han, r) ||
    842 		unicode.Is(unicode.Lm, r) ||
    843 		unicode.Is(unicode.Hangul, r)
    844 }
    845 
    846 // A BufWriter is a subset of the bufio.Writer .
    847 type BufWriter interface {
    848 	io.Writer
    849 	Available() int
    850 	Buffered() int
    851 	Flush() error
    852 	WriteByte(c byte) error
    853 	WriteRune(r rune) (size int, err error)
    854 	WriteString(s string) (int, error)
    855 }
    856 
    857 // A PrioritizedValue struct holds pair of an arbitrary value and a priority.
    858 type PrioritizedValue struct {
    859 	// Value is an arbitrary value that you want to prioritize.
    860 	Value interface{}
    861 	// Priority is a priority of the value.
    862 	Priority int
    863 }
    864 
    865 // PrioritizedSlice is a slice of the PrioritizedValues
    866 type PrioritizedSlice []PrioritizedValue
    867 
    868 // Sort sorts the PrioritizedSlice in ascending order.
    869 func (s PrioritizedSlice) Sort() {
    870 	sort.Slice(s, func(i, j int) bool {
    871 		return s[i].Priority < s[j].Priority
    872 	})
    873 }
    874 
    875 // Remove removes the given value from this slice.
    876 func (s PrioritizedSlice) Remove(v interface{}) PrioritizedSlice {
    877 	i := 0
    878 	found := false
    879 	for ; i < len(s); i++ {
    880 		if s[i].Value == v {
    881 			found = true
    882 			break
    883 		}
    884 	}
    885 	if !found {
    886 		return s
    887 	}
    888 	return append(s[:i], s[i+1:]...)
    889 }
    890 
    891 // Prioritized returns a new PrioritizedValue.
    892 func Prioritized(v interface{}, priority int) PrioritizedValue {
    893 	return PrioritizedValue{v, priority}
    894 }
    895 
    896 func bytesHash(b []byte) uint64 {
    897 	var hash uint64 = 5381
    898 	for _, c := range b {
    899 		hash = ((hash << 5) + hash) + uint64(c)
    900 	}
    901 	return hash
    902 }
    903 
    904 // BytesFilter is a efficient data structure for checking whether bytes exist or not.
    905 // BytesFilter is thread-safe.
    906 type BytesFilter interface {
    907 	// Add adds given bytes to this set.
    908 	Add([]byte)
    909 
    910 	// Contains return true if this set contains given bytes, otherwise false.
    911 	Contains([]byte) bool
    912 
    913 	// Extend copies this filter and adds given bytes to new filter.
    914 	Extend(...[]byte) BytesFilter
    915 }
    916 
    917 type bytesFilter struct {
    918 	chars     [256]uint8
    919 	threshold int
    920 	slots     [][][]byte
    921 }
    922 
    923 // NewBytesFilter returns a new BytesFilter.
    924 func NewBytesFilter(elements ...[]byte) BytesFilter {
    925 	s := &bytesFilter{
    926 		threshold: 3,
    927 		slots:     make([][][]byte, 64),
    928 	}
    929 	for _, element := range elements {
    930 		s.Add(element)
    931 	}
    932 	return s
    933 }
    934 
    935 func (s *bytesFilter) Add(b []byte) {
    936 	l := len(b)
    937 	m := s.threshold
    938 	if l < s.threshold {
    939 		m = l
    940 	}
    941 	for i := 0; i < m; i++ {
    942 		s.chars[b[i]] |= 1 << uint8(i)
    943 	}
    944 	h := bytesHash(b) % uint64(len(s.slots))
    945 	slot := s.slots[h]
    946 	if slot == nil {
    947 		slot = [][]byte{}
    948 	}
    949 	s.slots[h] = append(slot, b)
    950 }
    951 
    952 func (s *bytesFilter) Extend(bs ...[]byte) BytesFilter {
    953 	newFilter := NewBytesFilter().(*bytesFilter)
    954 	newFilter.chars = s.chars
    955 	newFilter.threshold = s.threshold
    956 	for k, v := range s.slots {
    957 		newSlot := make([][]byte, len(v))
    958 		copy(newSlot, v)
    959 		newFilter.slots[k] = v
    960 	}
    961 	for _, b := range bs {
    962 		newFilter.Add(b)
    963 	}
    964 	return newFilter
    965 }
    966 
    967 func (s *bytesFilter) Contains(b []byte) bool {
    968 	l := len(b)
    969 	m := s.threshold
    970 	if l < s.threshold {
    971 		m = l
    972 	}
    973 	for i := 0; i < m; i++ {
    974 		if (s.chars[b[i]] & (1 << uint8(i))) == 0 {
    975 			return false
    976 		}
    977 	}
    978 	h := bytesHash(b) % uint64(len(s.slots))
    979 	slot := s.slots[h]
    980 	if slot == nil || len(slot) == 0 {
    981 		return false
    982 	}
    983 	for _, element := range slot {
    984 		if bytes.Equal(element, b) {
    985 			return true
    986 		}
    987 	}
    988 	return false
    989 }