reader.go (13529B)
1 package text 2 3 import ( 4 "io" 5 "regexp" 6 "unicode/utf8" 7 8 "github.com/yuin/goldmark/util" 9 ) 10 11 const invalidValue = -1 12 13 // EOF indicates the end of file. 14 const EOF = byte(0xff) 15 16 // A Reader interface provides abstracted method for reading text. 17 type Reader interface { 18 io.RuneReader 19 20 // Source returns a source of the reader. 21 Source() []byte 22 23 // ResetPosition resets positions. 24 ResetPosition() 25 26 // Peek returns a byte at current position without advancing the internal pointer. 27 Peek() byte 28 29 // PeekLine returns the current line without advancing the internal pointer. 30 PeekLine() ([]byte, Segment) 31 32 // PrecendingCharacter returns a character just before current internal pointer. 33 PrecendingCharacter() rune 34 35 // Value returns a value of the given segment. 36 Value(Segment) []byte 37 38 // LineOffset returns a distance from the line head to current position. 39 LineOffset() int 40 41 // Position returns current line number and position. 42 Position() (int, Segment) 43 44 // SetPosition sets current line number and position. 45 SetPosition(int, Segment) 46 47 // SetPadding sets padding to the reader. 48 SetPadding(int) 49 50 // Advance advances the internal pointer. 51 Advance(int) 52 53 // AdvanceAndSetPadding advances the internal pointer and add padding to the 54 // reader. 55 AdvanceAndSetPadding(int, int) 56 57 // AdvanceLine advances the internal pointer to the next line head. 58 AdvanceLine() 59 60 // SkipSpaces skips space characters and returns a non-blank line. 61 // If it reaches EOF, returns false. 62 SkipSpaces() (Segment, int, bool) 63 64 // SkipSpaces skips blank lines and returns a non-blank line. 65 // If it reaches EOF, returns false. 66 SkipBlankLines() (Segment, int, bool) 67 68 // Match performs regular expression matching to current line. 69 Match(reg *regexp.Regexp) bool 70 71 // Match performs regular expression searching to current line. 72 FindSubMatch(reg *regexp.Regexp) [][]byte 73 74 // FindClosure finds corresponding closure. 75 FindClosure(opener, closer byte, options FindClosureOptions) (*Segments, bool) 76 } 77 78 // FindClosureOptions is options for Reader.FindClosure 79 type FindClosureOptions struct { 80 // CodeSpan is a flag for the FindClosure. If this is set to true, 81 // FindClosure ignores closers in codespans. 82 CodeSpan bool 83 84 // Nesting is a flag for the FindClosure. If this is set to true, 85 // FindClosure allows nesting. 86 Nesting bool 87 88 // Newline is a flag for the FindClosure. If this is set to true, 89 // FindClosure searches for a closer over multiple lines. 90 Newline bool 91 92 // Advance is a flag for the FindClosure. If this is set to true, 93 // FindClosure advances pointers when closer is found. 94 Advance bool 95 } 96 97 type reader struct { 98 source []byte 99 sourceLength int 100 line int 101 peekedLine []byte 102 pos Segment 103 head int 104 lineOffset int 105 } 106 107 // NewReader return a new Reader that can read UTF-8 bytes . 108 func NewReader(source []byte) Reader { 109 r := &reader{ 110 source: source, 111 sourceLength: len(source), 112 } 113 r.ResetPosition() 114 return r 115 } 116 117 func (r *reader) FindClosure(opener, closer byte, options FindClosureOptions) (*Segments, bool) { 118 return findClosureReader(r, opener, closer, options) 119 } 120 121 func (r *reader) ResetPosition() { 122 r.line = -1 123 r.head = 0 124 r.lineOffset = -1 125 r.AdvanceLine() 126 } 127 128 func (r *reader) Source() []byte { 129 return r.source 130 } 131 132 func (r *reader) Value(seg Segment) []byte { 133 return seg.Value(r.source) 134 } 135 136 func (r *reader) Peek() byte { 137 if r.pos.Start >= 0 && r.pos.Start < r.sourceLength { 138 if r.pos.Padding != 0 { 139 return space[0] 140 } 141 return r.source[r.pos.Start] 142 } 143 return EOF 144 } 145 146 func (r *reader) PeekLine() ([]byte, Segment) { 147 if r.pos.Start >= 0 && r.pos.Start < r.sourceLength { 148 if r.peekedLine == nil { 149 r.peekedLine = r.pos.Value(r.Source()) 150 } 151 return r.peekedLine, r.pos 152 } 153 return nil, r.pos 154 } 155 156 // io.RuneReader interface 157 func (r *reader) ReadRune() (rune, int, error) { 158 return readRuneReader(r) 159 } 160 161 func (r *reader) LineOffset() int { 162 if r.lineOffset < 0 { 163 v := 0 164 for i := r.head; i < r.pos.Start; i++ { 165 if r.source[i] == '\t' { 166 v += util.TabWidth(v) 167 } else { 168 v++ 169 } 170 } 171 r.lineOffset = v - r.pos.Padding 172 } 173 return r.lineOffset 174 } 175 176 func (r *reader) PrecendingCharacter() rune { 177 if r.pos.Start <= 0 { 178 if r.pos.Padding != 0 { 179 return rune(' ') 180 } 181 return rune('\n') 182 } 183 i := r.pos.Start - 1 184 for ; i >= 0; i-- { 185 if utf8.RuneStart(r.source[i]) { 186 break 187 } 188 } 189 rn, _ := utf8.DecodeRune(r.source[i:]) 190 return rn 191 } 192 193 func (r *reader) Advance(n int) { 194 r.lineOffset = -1 195 if n < len(r.peekedLine) && r.pos.Padding == 0 { 196 r.pos.Start += n 197 r.peekedLine = nil 198 return 199 } 200 r.peekedLine = nil 201 l := r.sourceLength 202 for ; n > 0 && r.pos.Start < l; n-- { 203 if r.pos.Padding != 0 { 204 r.pos.Padding-- 205 continue 206 } 207 if r.source[r.pos.Start] == '\n' { 208 r.AdvanceLine() 209 continue 210 } 211 r.pos.Start++ 212 } 213 } 214 215 func (r *reader) AdvanceAndSetPadding(n, padding int) { 216 r.Advance(n) 217 if padding > r.pos.Padding { 218 r.SetPadding(padding) 219 } 220 } 221 222 func (r *reader) AdvanceLine() { 223 r.lineOffset = -1 224 r.peekedLine = nil 225 r.pos.Start = r.pos.Stop 226 r.head = r.pos.Start 227 if r.pos.Start < 0 { 228 return 229 } 230 r.pos.Stop = r.sourceLength 231 for i := r.pos.Start; i < r.sourceLength; i++ { 232 c := r.source[i] 233 if c == '\n' { 234 r.pos.Stop = i + 1 235 break 236 } 237 } 238 r.line++ 239 r.pos.Padding = 0 240 } 241 242 func (r *reader) Position() (int, Segment) { 243 return r.line, r.pos 244 } 245 246 func (r *reader) SetPosition(line int, pos Segment) { 247 r.lineOffset = -1 248 r.line = line 249 r.pos = pos 250 } 251 252 func (r *reader) SetPadding(v int) { 253 r.pos.Padding = v 254 } 255 256 func (r *reader) SkipSpaces() (Segment, int, bool) { 257 return skipSpacesReader(r) 258 } 259 260 func (r *reader) SkipBlankLines() (Segment, int, bool) { 261 return skipBlankLinesReader(r) 262 } 263 264 func (r *reader) Match(reg *regexp.Regexp) bool { 265 return matchReader(r, reg) 266 } 267 268 func (r *reader) FindSubMatch(reg *regexp.Regexp) [][]byte { 269 return findSubMatchReader(r, reg) 270 } 271 272 // A BlockReader interface is a reader that is optimized for Blocks. 273 type BlockReader interface { 274 Reader 275 // Reset resets current state and sets new segments to the reader. 276 Reset(segment *Segments) 277 } 278 279 type blockReader struct { 280 source []byte 281 segments *Segments 282 segmentsLength int 283 line int 284 pos Segment 285 head int 286 last int 287 lineOffset int 288 } 289 290 // NewBlockReader returns a new BlockReader. 291 func NewBlockReader(source []byte, segments *Segments) BlockReader { 292 r := &blockReader{ 293 source: source, 294 } 295 if segments != nil { 296 r.Reset(segments) 297 } 298 return r 299 } 300 301 func (r *blockReader) FindClosure(opener, closer byte, options FindClosureOptions) (*Segments, bool) { 302 return findClosureReader(r, opener, closer, options) 303 } 304 305 func (r *blockReader) ResetPosition() { 306 r.line = -1 307 r.head = 0 308 r.last = 0 309 r.lineOffset = -1 310 r.pos.Start = -1 311 r.pos.Stop = -1 312 r.pos.Padding = 0 313 if r.segmentsLength > 0 { 314 last := r.segments.At(r.segmentsLength - 1) 315 r.last = last.Stop 316 } 317 r.AdvanceLine() 318 } 319 320 func (r *blockReader) Reset(segments *Segments) { 321 r.segments = segments 322 r.segmentsLength = segments.Len() 323 r.ResetPosition() 324 } 325 326 func (r *blockReader) Source() []byte { 327 return r.source 328 } 329 330 func (r *blockReader) Value(seg Segment) []byte { 331 line := r.segmentsLength - 1 332 ret := make([]byte, 0, seg.Stop-seg.Start+1) 333 for ; line >= 0; line-- { 334 if seg.Start >= r.segments.At(line).Start { 335 break 336 } 337 } 338 i := seg.Start 339 for ; line < r.segmentsLength; line++ { 340 s := r.segments.At(line) 341 if i < 0 { 342 i = s.Start 343 } 344 ret = s.ConcatPadding(ret) 345 for ; i < seg.Stop && i < s.Stop; i++ { 346 ret = append(ret, r.source[i]) 347 } 348 i = -1 349 if s.Stop > seg.Stop { 350 break 351 } 352 } 353 return ret 354 } 355 356 // io.RuneReader interface 357 func (r *blockReader) ReadRune() (rune, int, error) { 358 return readRuneReader(r) 359 } 360 361 func (r *blockReader) PrecendingCharacter() rune { 362 if r.pos.Padding != 0 { 363 return rune(' ') 364 } 365 if r.segments.Len() < 1 { 366 return rune('\n') 367 } 368 firstSegment := r.segments.At(0) 369 if r.line == 0 && r.pos.Start <= firstSegment.Start { 370 return rune('\n') 371 } 372 l := len(r.source) 373 i := r.pos.Start - 1 374 for ; i < l && i >= 0; i-- { 375 if utf8.RuneStart(r.source[i]) { 376 break 377 } 378 } 379 if i < 0 || i >= l { 380 return rune('\n') 381 } 382 rn, _ := utf8.DecodeRune(r.source[i:]) 383 return rn 384 } 385 386 func (r *blockReader) LineOffset() int { 387 if r.lineOffset < 0 { 388 v := 0 389 for i := r.head; i < r.pos.Start; i++ { 390 if r.source[i] == '\t' { 391 v += util.TabWidth(v) 392 } else { 393 v++ 394 } 395 } 396 r.lineOffset = v - r.pos.Padding 397 } 398 return r.lineOffset 399 } 400 401 func (r *blockReader) Peek() byte { 402 if r.line < r.segmentsLength && r.pos.Start >= 0 && r.pos.Start < r.last { 403 if r.pos.Padding != 0 { 404 return space[0] 405 } 406 return r.source[r.pos.Start] 407 } 408 return EOF 409 } 410 411 func (r *blockReader) PeekLine() ([]byte, Segment) { 412 if r.line < r.segmentsLength && r.pos.Start >= 0 && r.pos.Start < r.last { 413 return r.pos.Value(r.source), r.pos 414 } 415 return nil, r.pos 416 } 417 418 func (r *blockReader) Advance(n int) { 419 r.lineOffset = -1 420 421 if n < r.pos.Stop-r.pos.Start && r.pos.Padding == 0 { 422 r.pos.Start += n 423 return 424 } 425 426 for ; n > 0; n-- { 427 if r.pos.Padding != 0 { 428 r.pos.Padding-- 429 continue 430 } 431 if r.pos.Start >= r.pos.Stop-1 && r.pos.Stop < r.last { 432 r.AdvanceLine() 433 continue 434 } 435 r.pos.Start++ 436 } 437 } 438 439 func (r *blockReader) AdvanceAndSetPadding(n, padding int) { 440 r.Advance(n) 441 if padding > r.pos.Padding { 442 r.SetPadding(padding) 443 } 444 } 445 446 func (r *blockReader) AdvanceLine() { 447 r.SetPosition(r.line+1, NewSegment(invalidValue, invalidValue)) 448 r.head = r.pos.Start 449 } 450 451 func (r *blockReader) Position() (int, Segment) { 452 return r.line, r.pos 453 } 454 455 func (r *blockReader) SetPosition(line int, pos Segment) { 456 r.lineOffset = -1 457 r.line = line 458 if pos.Start == invalidValue { 459 if r.line < r.segmentsLength { 460 s := r.segments.At(line) 461 r.head = s.Start 462 r.pos = s 463 } 464 } else { 465 r.pos = pos 466 if r.line < r.segmentsLength { 467 s := r.segments.At(line) 468 r.head = s.Start 469 } 470 } 471 } 472 473 func (r *blockReader) SetPadding(v int) { 474 r.lineOffset = -1 475 r.pos.Padding = v 476 } 477 478 func (r *blockReader) SkipSpaces() (Segment, int, bool) { 479 return skipSpacesReader(r) 480 } 481 482 func (r *blockReader) SkipBlankLines() (Segment, int, bool) { 483 return skipBlankLinesReader(r) 484 } 485 486 func (r *blockReader) Match(reg *regexp.Regexp) bool { 487 return matchReader(r, reg) 488 } 489 490 func (r *blockReader) FindSubMatch(reg *regexp.Regexp) [][]byte { 491 return findSubMatchReader(r, reg) 492 } 493 494 func skipBlankLinesReader(r Reader) (Segment, int, bool) { 495 lines := 0 496 for { 497 line, seg := r.PeekLine() 498 if line == nil { 499 return seg, lines, false 500 } 501 if util.IsBlank(line) { 502 lines++ 503 r.AdvanceLine() 504 } else { 505 return seg, lines, true 506 } 507 } 508 } 509 510 func skipSpacesReader(r Reader) (Segment, int, bool) { 511 chars := 0 512 for { 513 line, segment := r.PeekLine() 514 if line == nil { 515 return segment, chars, false 516 } 517 for i, c := range line { 518 if util.IsSpace(c) { 519 chars++ 520 r.Advance(1) 521 continue 522 } 523 return segment.WithStart(segment.Start + i + 1), chars, true 524 } 525 } 526 } 527 528 func matchReader(r Reader, reg *regexp.Regexp) bool { 529 oldline, oldseg := r.Position() 530 match := reg.FindReaderSubmatchIndex(r) 531 r.SetPosition(oldline, oldseg) 532 if match == nil { 533 return false 534 } 535 r.Advance(match[1] - match[0]) 536 return true 537 } 538 539 func findSubMatchReader(r Reader, reg *regexp.Regexp) [][]byte { 540 oldline, oldseg := r.Position() 541 match := reg.FindReaderSubmatchIndex(r) 542 r.SetPosition(oldline, oldseg) 543 if match == nil { 544 return nil 545 } 546 runes := make([]rune, 0, match[1]-match[0]) 547 for i := 0; i < match[1]; { 548 r, size, _ := readRuneReader(r) 549 i += size 550 runes = append(runes, r) 551 } 552 result := [][]byte{} 553 for i := 0; i < len(match); i += 2 { 554 result = append(result, []byte(string(runes[match[i]:match[i+1]]))) 555 } 556 557 r.SetPosition(oldline, oldseg) 558 r.Advance(match[1] - match[0]) 559 return result 560 } 561 562 func readRuneReader(r Reader) (rune, int, error) { 563 line, _ := r.PeekLine() 564 if line == nil { 565 return 0, 0, io.EOF 566 } 567 rn, size := utf8.DecodeRune(line) 568 if rn == utf8.RuneError { 569 return 0, 0, io.EOF 570 } 571 r.Advance(size) 572 return rn, size, nil 573 } 574 575 func findClosureReader(r Reader, opener, closer byte, opts FindClosureOptions) (*Segments, bool) { 576 opened := 1 577 codeSpanOpener := 0 578 closed := false 579 orgline, orgpos := r.Position() 580 var ret *Segments 581 582 for { 583 bs, seg := r.PeekLine() 584 if bs == nil { 585 goto end 586 } 587 i := 0 588 for i < len(bs) { 589 c := bs[i] 590 if opts.CodeSpan && codeSpanOpener != 0 && c == '`' { 591 codeSpanCloser := 0 592 for ; i < len(bs); i++ { 593 if bs[i] == '`' { 594 codeSpanCloser++ 595 } else { 596 i-- 597 break 598 } 599 } 600 if codeSpanCloser == codeSpanOpener { 601 codeSpanOpener = 0 602 } 603 } else if codeSpanOpener == 0 && c == '\\' && i < len(bs)-1 && util.IsPunct(bs[i+1]) { 604 i += 2 605 continue 606 } else if opts.CodeSpan && codeSpanOpener == 0 && c == '`' { 607 for ; i < len(bs); i++ { 608 if bs[i] == '`' { 609 codeSpanOpener++ 610 } else { 611 i-- 612 break 613 } 614 } 615 } else if (opts.CodeSpan && codeSpanOpener == 0) || !opts.CodeSpan { 616 if c == closer { 617 opened-- 618 if opened == 0 { 619 if ret == nil { 620 ret = NewSegments() 621 } 622 ret.Append(seg.WithStop(seg.Start + i)) 623 r.Advance(i + 1) 624 closed = true 625 goto end 626 } 627 } else if c == opener { 628 if !opts.Nesting { 629 goto end 630 } 631 opened++ 632 } 633 } 634 i++ 635 } 636 if !opts.Newline { 637 goto end 638 } 639 r.AdvanceLine() 640 if ret == nil { 641 ret = NewSegments() 642 } 643 ret.Append(seg) 644 } 645 end: 646 if !opts.Advance { 647 r.SetPosition(orgline, orgpos) 648 } 649 if closed { 650 return ret, true 651 } 652 return nil, false 653 }