gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

reader.go (13529B)


      1 package text
      2 
      3 import (
      4 	"io"
      5 	"regexp"
      6 	"unicode/utf8"
      7 
      8 	"github.com/yuin/goldmark/util"
      9 )
     10 
     11 const invalidValue = -1
     12 
     13 // EOF indicates the end of file.
     14 const EOF = byte(0xff)
     15 
     16 // A Reader interface provides abstracted method for reading text.
     17 type Reader interface {
     18 	io.RuneReader
     19 
     20 	// Source returns a source of the reader.
     21 	Source() []byte
     22 
     23 	// ResetPosition resets positions.
     24 	ResetPosition()
     25 
     26 	// Peek returns a byte at current position without advancing the internal pointer.
     27 	Peek() byte
     28 
     29 	// PeekLine returns the current line without advancing the internal pointer.
     30 	PeekLine() ([]byte, Segment)
     31 
     32 	// PrecendingCharacter returns a character just before current internal pointer.
     33 	PrecendingCharacter() rune
     34 
     35 	// Value returns a value of the given segment.
     36 	Value(Segment) []byte
     37 
     38 	// LineOffset returns a distance from the line head to current position.
     39 	LineOffset() int
     40 
     41 	// Position returns current line number and position.
     42 	Position() (int, Segment)
     43 
     44 	// SetPosition sets current line number and position.
     45 	SetPosition(int, Segment)
     46 
     47 	// SetPadding sets padding to the reader.
     48 	SetPadding(int)
     49 
     50 	// Advance advances the internal pointer.
     51 	Advance(int)
     52 
     53 	// AdvanceAndSetPadding advances the internal pointer and add padding to the
     54 	// reader.
     55 	AdvanceAndSetPadding(int, int)
     56 
     57 	// AdvanceLine advances the internal pointer to the next line head.
     58 	AdvanceLine()
     59 
     60 	// SkipSpaces skips space characters and returns a non-blank line.
     61 	// If it reaches EOF, returns false.
     62 	SkipSpaces() (Segment, int, bool)
     63 
     64 	// SkipSpaces skips blank lines and returns a non-blank line.
     65 	// If it reaches EOF, returns false.
     66 	SkipBlankLines() (Segment, int, bool)
     67 
     68 	// Match performs regular expression matching to current line.
     69 	Match(reg *regexp.Regexp) bool
     70 
     71 	// Match performs regular expression searching to current line.
     72 	FindSubMatch(reg *regexp.Regexp) [][]byte
     73 
     74 	// FindClosure finds corresponding closure.
     75 	FindClosure(opener, closer byte, options FindClosureOptions) (*Segments, bool)
     76 }
     77 
     78 // FindClosureOptions is options for Reader.FindClosure
     79 type FindClosureOptions struct {
     80 	// CodeSpan is a flag for the FindClosure. If this is set to true,
     81 	// FindClosure ignores closers in codespans.
     82 	CodeSpan bool
     83 
     84 	// Nesting is a flag for the FindClosure. If this is set to true,
     85 	// FindClosure allows nesting.
     86 	Nesting bool
     87 
     88 	// Newline is a flag for the FindClosure. If this is set to true,
     89 	// FindClosure searches for a closer over multiple lines.
     90 	Newline bool
     91 
     92 	// Advance is a flag for the FindClosure. If this is set to true,
     93 	// FindClosure advances pointers when closer is found.
     94 	Advance bool
     95 }
     96 
     97 type reader struct {
     98 	source       []byte
     99 	sourceLength int
    100 	line         int
    101 	peekedLine   []byte
    102 	pos          Segment
    103 	head         int
    104 	lineOffset   int
    105 }
    106 
    107 // NewReader return a new Reader that can read UTF-8 bytes .
    108 func NewReader(source []byte) Reader {
    109 	r := &reader{
    110 		source:       source,
    111 		sourceLength: len(source),
    112 	}
    113 	r.ResetPosition()
    114 	return r
    115 }
    116 
    117 func (r *reader) FindClosure(opener, closer byte, options FindClosureOptions) (*Segments, bool) {
    118 	return findClosureReader(r, opener, closer, options)
    119 }
    120 
    121 func (r *reader) ResetPosition() {
    122 	r.line = -1
    123 	r.head = 0
    124 	r.lineOffset = -1
    125 	r.AdvanceLine()
    126 }
    127 
    128 func (r *reader) Source() []byte {
    129 	return r.source
    130 }
    131 
    132 func (r *reader) Value(seg Segment) []byte {
    133 	return seg.Value(r.source)
    134 }
    135 
    136 func (r *reader) Peek() byte {
    137 	if r.pos.Start >= 0 && r.pos.Start < r.sourceLength {
    138 		if r.pos.Padding != 0 {
    139 			return space[0]
    140 		}
    141 		return r.source[r.pos.Start]
    142 	}
    143 	return EOF
    144 }
    145 
    146 func (r *reader) PeekLine() ([]byte, Segment) {
    147 	if r.pos.Start >= 0 && r.pos.Start < r.sourceLength {
    148 		if r.peekedLine == nil {
    149 			r.peekedLine = r.pos.Value(r.Source())
    150 		}
    151 		return r.peekedLine, r.pos
    152 	}
    153 	return nil, r.pos
    154 }
    155 
    156 // io.RuneReader interface
    157 func (r *reader) ReadRune() (rune, int, error) {
    158 	return readRuneReader(r)
    159 }
    160 
    161 func (r *reader) LineOffset() int {
    162 	if r.lineOffset < 0 {
    163 		v := 0
    164 		for i := r.head; i < r.pos.Start; i++ {
    165 			if r.source[i] == '\t' {
    166 				v += util.TabWidth(v)
    167 			} else {
    168 				v++
    169 			}
    170 		}
    171 		r.lineOffset = v - r.pos.Padding
    172 	}
    173 	return r.lineOffset
    174 }
    175 
    176 func (r *reader) PrecendingCharacter() rune {
    177 	if r.pos.Start <= 0 {
    178 		if r.pos.Padding != 0 {
    179 			return rune(' ')
    180 		}
    181 		return rune('\n')
    182 	}
    183 	i := r.pos.Start - 1
    184 	for ; i >= 0; i-- {
    185 		if utf8.RuneStart(r.source[i]) {
    186 			break
    187 		}
    188 	}
    189 	rn, _ := utf8.DecodeRune(r.source[i:])
    190 	return rn
    191 }
    192 
    193 func (r *reader) Advance(n int) {
    194 	r.lineOffset = -1
    195 	if n < len(r.peekedLine) && r.pos.Padding == 0 {
    196 		r.pos.Start += n
    197 		r.peekedLine = nil
    198 		return
    199 	}
    200 	r.peekedLine = nil
    201 	l := r.sourceLength
    202 	for ; n > 0 && r.pos.Start < l; n-- {
    203 		if r.pos.Padding != 0 {
    204 			r.pos.Padding--
    205 			continue
    206 		}
    207 		if r.source[r.pos.Start] == '\n' {
    208 			r.AdvanceLine()
    209 			continue
    210 		}
    211 		r.pos.Start++
    212 	}
    213 }
    214 
    215 func (r *reader) AdvanceAndSetPadding(n, padding int) {
    216 	r.Advance(n)
    217 	if padding > r.pos.Padding {
    218 		r.SetPadding(padding)
    219 	}
    220 }
    221 
    222 func (r *reader) AdvanceLine() {
    223 	r.lineOffset = -1
    224 	r.peekedLine = nil
    225 	r.pos.Start = r.pos.Stop
    226 	r.head = r.pos.Start
    227 	if r.pos.Start < 0 {
    228 		return
    229 	}
    230 	r.pos.Stop = r.sourceLength
    231 	for i := r.pos.Start; i < r.sourceLength; i++ {
    232 		c := r.source[i]
    233 		if c == '\n' {
    234 			r.pos.Stop = i + 1
    235 			break
    236 		}
    237 	}
    238 	r.line++
    239 	r.pos.Padding = 0
    240 }
    241 
    242 func (r *reader) Position() (int, Segment) {
    243 	return r.line, r.pos
    244 }
    245 
    246 func (r *reader) SetPosition(line int, pos Segment) {
    247 	r.lineOffset = -1
    248 	r.line = line
    249 	r.pos = pos
    250 }
    251 
    252 func (r *reader) SetPadding(v int) {
    253 	r.pos.Padding = v
    254 }
    255 
    256 func (r *reader) SkipSpaces() (Segment, int, bool) {
    257 	return skipSpacesReader(r)
    258 }
    259 
    260 func (r *reader) SkipBlankLines() (Segment, int, bool) {
    261 	return skipBlankLinesReader(r)
    262 }
    263 
    264 func (r *reader) Match(reg *regexp.Regexp) bool {
    265 	return matchReader(r, reg)
    266 }
    267 
    268 func (r *reader) FindSubMatch(reg *regexp.Regexp) [][]byte {
    269 	return findSubMatchReader(r, reg)
    270 }
    271 
    272 // A BlockReader interface is a reader that is optimized for Blocks.
    273 type BlockReader interface {
    274 	Reader
    275 	// Reset resets current state and sets new segments to the reader.
    276 	Reset(segment *Segments)
    277 }
    278 
    279 type blockReader struct {
    280 	source         []byte
    281 	segments       *Segments
    282 	segmentsLength int
    283 	line           int
    284 	pos            Segment
    285 	head           int
    286 	last           int
    287 	lineOffset     int
    288 }
    289 
    290 // NewBlockReader returns a new BlockReader.
    291 func NewBlockReader(source []byte, segments *Segments) BlockReader {
    292 	r := &blockReader{
    293 		source: source,
    294 	}
    295 	if segments != nil {
    296 		r.Reset(segments)
    297 	}
    298 	return r
    299 }
    300 
    301 func (r *blockReader) FindClosure(opener, closer byte, options FindClosureOptions) (*Segments, bool) {
    302 	return findClosureReader(r, opener, closer, options)
    303 }
    304 
    305 func (r *blockReader) ResetPosition() {
    306 	r.line = -1
    307 	r.head = 0
    308 	r.last = 0
    309 	r.lineOffset = -1
    310 	r.pos.Start = -1
    311 	r.pos.Stop = -1
    312 	r.pos.Padding = 0
    313 	if r.segmentsLength > 0 {
    314 		last := r.segments.At(r.segmentsLength - 1)
    315 		r.last = last.Stop
    316 	}
    317 	r.AdvanceLine()
    318 }
    319 
    320 func (r *blockReader) Reset(segments *Segments) {
    321 	r.segments = segments
    322 	r.segmentsLength = segments.Len()
    323 	r.ResetPosition()
    324 }
    325 
    326 func (r *blockReader) Source() []byte {
    327 	return r.source
    328 }
    329 
    330 func (r *blockReader) Value(seg Segment) []byte {
    331 	line := r.segmentsLength - 1
    332 	ret := make([]byte, 0, seg.Stop-seg.Start+1)
    333 	for ; line >= 0; line-- {
    334 		if seg.Start >= r.segments.At(line).Start {
    335 			break
    336 		}
    337 	}
    338 	i := seg.Start
    339 	for ; line < r.segmentsLength; line++ {
    340 		s := r.segments.At(line)
    341 		if i < 0 {
    342 			i = s.Start
    343 		}
    344 		ret = s.ConcatPadding(ret)
    345 		for ; i < seg.Stop && i < s.Stop; i++ {
    346 			ret = append(ret, r.source[i])
    347 		}
    348 		i = -1
    349 		if s.Stop > seg.Stop {
    350 			break
    351 		}
    352 	}
    353 	return ret
    354 }
    355 
    356 // io.RuneReader interface
    357 func (r *blockReader) ReadRune() (rune, int, error) {
    358 	return readRuneReader(r)
    359 }
    360 
    361 func (r *blockReader) PrecendingCharacter() rune {
    362 	if r.pos.Padding != 0 {
    363 		return rune(' ')
    364 	}
    365 	if r.segments.Len() < 1 {
    366 		return rune('\n')
    367 	}
    368 	firstSegment := r.segments.At(0)
    369 	if r.line == 0 && r.pos.Start <= firstSegment.Start {
    370 		return rune('\n')
    371 	}
    372 	l := len(r.source)
    373 	i := r.pos.Start - 1
    374 	for ; i < l && i >= 0; i-- {
    375 		if utf8.RuneStart(r.source[i]) {
    376 			break
    377 		}
    378 	}
    379 	if i < 0 || i >= l {
    380 		return rune('\n')
    381 	}
    382 	rn, _ := utf8.DecodeRune(r.source[i:])
    383 	return rn
    384 }
    385 
    386 func (r *blockReader) LineOffset() int {
    387 	if r.lineOffset < 0 {
    388 		v := 0
    389 		for i := r.head; i < r.pos.Start; i++ {
    390 			if r.source[i] == '\t' {
    391 				v += util.TabWidth(v)
    392 			} else {
    393 				v++
    394 			}
    395 		}
    396 		r.lineOffset = v - r.pos.Padding
    397 	}
    398 	return r.lineOffset
    399 }
    400 
    401 func (r *blockReader) Peek() byte {
    402 	if r.line < r.segmentsLength && r.pos.Start >= 0 && r.pos.Start < r.last {
    403 		if r.pos.Padding != 0 {
    404 			return space[0]
    405 		}
    406 		return r.source[r.pos.Start]
    407 	}
    408 	return EOF
    409 }
    410 
    411 func (r *blockReader) PeekLine() ([]byte, Segment) {
    412 	if r.line < r.segmentsLength && r.pos.Start >= 0 && r.pos.Start < r.last {
    413 		return r.pos.Value(r.source), r.pos
    414 	}
    415 	return nil, r.pos
    416 }
    417 
    418 func (r *blockReader) Advance(n int) {
    419 	r.lineOffset = -1
    420 
    421 	if n < r.pos.Stop-r.pos.Start && r.pos.Padding == 0 {
    422 		r.pos.Start += n
    423 		return
    424 	}
    425 
    426 	for ; n > 0; n-- {
    427 		if r.pos.Padding != 0 {
    428 			r.pos.Padding--
    429 			continue
    430 		}
    431 		if r.pos.Start >= r.pos.Stop-1 && r.pos.Stop < r.last {
    432 			r.AdvanceLine()
    433 			continue
    434 		}
    435 		r.pos.Start++
    436 	}
    437 }
    438 
    439 func (r *blockReader) AdvanceAndSetPadding(n, padding int) {
    440 	r.Advance(n)
    441 	if padding > r.pos.Padding {
    442 		r.SetPadding(padding)
    443 	}
    444 }
    445 
    446 func (r *blockReader) AdvanceLine() {
    447 	r.SetPosition(r.line+1, NewSegment(invalidValue, invalidValue))
    448 	r.head = r.pos.Start
    449 }
    450 
    451 func (r *blockReader) Position() (int, Segment) {
    452 	return r.line, r.pos
    453 }
    454 
    455 func (r *blockReader) SetPosition(line int, pos Segment) {
    456 	r.lineOffset = -1
    457 	r.line = line
    458 	if pos.Start == invalidValue {
    459 		if r.line < r.segmentsLength {
    460 			s := r.segments.At(line)
    461 			r.head = s.Start
    462 			r.pos = s
    463 		}
    464 	} else {
    465 		r.pos = pos
    466 		if r.line < r.segmentsLength {
    467 			s := r.segments.At(line)
    468 			r.head = s.Start
    469 		}
    470 	}
    471 }
    472 
    473 func (r *blockReader) SetPadding(v int) {
    474 	r.lineOffset = -1
    475 	r.pos.Padding = v
    476 }
    477 
    478 func (r *blockReader) SkipSpaces() (Segment, int, bool) {
    479 	return skipSpacesReader(r)
    480 }
    481 
    482 func (r *blockReader) SkipBlankLines() (Segment, int, bool) {
    483 	return skipBlankLinesReader(r)
    484 }
    485 
    486 func (r *blockReader) Match(reg *regexp.Regexp) bool {
    487 	return matchReader(r, reg)
    488 }
    489 
    490 func (r *blockReader) FindSubMatch(reg *regexp.Regexp) [][]byte {
    491 	return findSubMatchReader(r, reg)
    492 }
    493 
    494 func skipBlankLinesReader(r Reader) (Segment, int, bool) {
    495 	lines := 0
    496 	for {
    497 		line, seg := r.PeekLine()
    498 		if line == nil {
    499 			return seg, lines, false
    500 		}
    501 		if util.IsBlank(line) {
    502 			lines++
    503 			r.AdvanceLine()
    504 		} else {
    505 			return seg, lines, true
    506 		}
    507 	}
    508 }
    509 
    510 func skipSpacesReader(r Reader) (Segment, int, bool) {
    511 	chars := 0
    512 	for {
    513 		line, segment := r.PeekLine()
    514 		if line == nil {
    515 			return segment, chars, false
    516 		}
    517 		for i, c := range line {
    518 			if util.IsSpace(c) {
    519 				chars++
    520 				r.Advance(1)
    521 				continue
    522 			}
    523 			return segment.WithStart(segment.Start + i + 1), chars, true
    524 		}
    525 	}
    526 }
    527 
    528 func matchReader(r Reader, reg *regexp.Regexp) bool {
    529 	oldline, oldseg := r.Position()
    530 	match := reg.FindReaderSubmatchIndex(r)
    531 	r.SetPosition(oldline, oldseg)
    532 	if match == nil {
    533 		return false
    534 	}
    535 	r.Advance(match[1] - match[0])
    536 	return true
    537 }
    538 
    539 func findSubMatchReader(r Reader, reg *regexp.Regexp) [][]byte {
    540 	oldline, oldseg := r.Position()
    541 	match := reg.FindReaderSubmatchIndex(r)
    542 	r.SetPosition(oldline, oldseg)
    543 	if match == nil {
    544 		return nil
    545 	}
    546 	runes := make([]rune, 0, match[1]-match[0])
    547 	for i := 0; i < match[1]; {
    548 		r, size, _ := readRuneReader(r)
    549 		i += size
    550 		runes = append(runes, r)
    551 	}
    552 	result := [][]byte{}
    553 	for i := 0; i < len(match); i += 2 {
    554 		result = append(result, []byte(string(runes[match[i]:match[i+1]])))
    555 	}
    556 
    557 	r.SetPosition(oldline, oldseg)
    558 	r.Advance(match[1] - match[0])
    559 	return result
    560 }
    561 
    562 func readRuneReader(r Reader) (rune, int, error) {
    563 	line, _ := r.PeekLine()
    564 	if line == nil {
    565 		return 0, 0, io.EOF
    566 	}
    567 	rn, size := utf8.DecodeRune(line)
    568 	if rn == utf8.RuneError {
    569 		return 0, 0, io.EOF
    570 	}
    571 	r.Advance(size)
    572 	return rn, size, nil
    573 }
    574 
    575 func findClosureReader(r Reader, opener, closer byte, opts FindClosureOptions) (*Segments, bool) {
    576 	opened := 1
    577 	codeSpanOpener := 0
    578 	closed := false
    579 	orgline, orgpos := r.Position()
    580 	var ret *Segments
    581 
    582 	for {
    583 		bs, seg := r.PeekLine()
    584 		if bs == nil {
    585 			goto end
    586 		}
    587 		i := 0
    588 		for i < len(bs) {
    589 			c := bs[i]
    590 			if opts.CodeSpan && codeSpanOpener != 0 && c == '`' {
    591 				codeSpanCloser := 0
    592 				for ; i < len(bs); i++ {
    593 					if bs[i] == '`' {
    594 						codeSpanCloser++
    595 					} else {
    596 						i--
    597 						break
    598 					}
    599 				}
    600 				if codeSpanCloser == codeSpanOpener {
    601 					codeSpanOpener = 0
    602 				}
    603 			} else if codeSpanOpener == 0 && c == '\\' && i < len(bs)-1 && util.IsPunct(bs[i+1]) {
    604 				i += 2
    605 				continue
    606 			} else if opts.CodeSpan && codeSpanOpener == 0 && c == '`' {
    607 				for ; i < len(bs); i++ {
    608 					if bs[i] == '`' {
    609 						codeSpanOpener++
    610 					} else {
    611 						i--
    612 						break
    613 					}
    614 				}
    615 			} else if (opts.CodeSpan && codeSpanOpener == 0) || !opts.CodeSpan {
    616 				if c == closer {
    617 					opened--
    618 					if opened == 0 {
    619 						if ret == nil {
    620 							ret = NewSegments()
    621 						}
    622 						ret.Append(seg.WithStop(seg.Start + i))
    623 						r.Advance(i + 1)
    624 						closed = true
    625 						goto end
    626 					}
    627 				} else if c == opener {
    628 					if !opts.Nesting {
    629 						goto end
    630 					}
    631 					opened++
    632 				}
    633 			}
    634 			i++
    635 		}
    636 		if !opts.Newline {
    637 			goto end
    638 		}
    639 		r.AdvanceLine()
    640 		if ret == nil {
    641 			ret = NewSegments()
    642 		}
    643 		ret.Append(seg)
    644 	}
    645 end:
    646 	if !opts.Advance {
    647 		r.SetPosition(orgline, orgpos)
    648 	}
    649 	if closed {
    650 		return ret, true
    651 	}
    652 	return nil, false
    653 }