scanner.go (29903B)
1 // Copyright 2019 The CC Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package cc // import "modernc.org/cc/v3" 6 7 import ( 8 "bufio" 9 "bytes" 10 "fmt" 11 goscanner "go/scanner" 12 "io" 13 "path/filepath" 14 "strconv" 15 "strings" 16 "sync" 17 "unicode/utf8" 18 19 "modernc.org/mathutil" 20 "modernc.org/token" 21 ) 22 23 const ( 24 clsEOF = iota + 0x80 25 clsOther 26 ) 27 28 const maxASCII = 0x7f 29 30 var ( 31 bom = []byte{0xEF, 0xBB, 0xBF} 32 33 idDefine = dict.sid("define") 34 idElif = dict.sid("elif") 35 idElse = dict.sid("else") 36 idEndif = dict.sid("endif") 37 idError = dict.sid("error") 38 idIf = dict.sid("if") 39 idIfdef = dict.sid("ifdef") 40 idIfndef = dict.sid("ifndef") 41 idInclude = dict.sid("include") 42 idIncludeNext = dict.sid("include_next") 43 idLine = dict.sid("line") 44 idPragma = dict.sid("pragma") 45 idPragmaOp = dict.sid("_Pragma") 46 idSpace = dict.sid(" ") 47 idUndef = dict.sid("undef") 48 49 trigraphPrefix = []byte("??") 50 trigraphs = []struct{ from, to []byte }{ 51 {[]byte("??="), []byte{'#'}}, 52 {[]byte("??("), []byte{'['}}, 53 {[]byte("??/"), []byte{'\\'}}, 54 {[]byte("??)"), []byte{']'}}, 55 {[]byte("??'"), []byte{'^'}}, 56 {[]byte("??<"), []byte{'{'}}, 57 {[]byte("??!"), []byte{'|'}}, 58 {[]byte("??>"), []byte{'}'}}, 59 {[]byte("??-"), []byte{'~'}}, 60 } 61 ) 62 63 type tokenFile struct { 64 *token.File 65 sync.RWMutex 66 } 67 68 func tokenNewFile(name string, sz int) *tokenFile { return &tokenFile{File: token.NewFile(name, sz)} } 69 70 func (f *tokenFile) Position(pos token.Pos) (r token.Position) { 71 f.RLock() 72 r = f.File.Position(pos) 73 f.RUnlock() 74 return r 75 } 76 77 func (f *tokenFile) PositionFor(pos token.Pos, adjusted bool) (r token.Position) { 78 f.RLock() 79 r = f.File.PositionFor(pos, adjusted) 80 f.RUnlock() 81 return r 82 } 83 84 func (f *tokenFile) AddLine(off int) { 85 f.Lock() 86 f.File.AddLine(off) 87 f.Unlock() 88 } 89 90 func (f *tokenFile) AddLineInfo(off int, fn string, line int) { 91 f.Lock() 92 f.File.AddLineInfo(off, fn, line) 93 f.Unlock() 94 } 95 96 type node interface { 97 Pos() token.Pos 98 } 99 100 type dictionary struct { 101 mu sync.RWMutex 102 m map[string]StringID 103 strings []string 104 } 105 106 func newDictionary() (r *dictionary) { 107 r = &dictionary{m: map[string]StringID{}} 108 b := make([]byte, 1) 109 for i := 0; i < 128; i++ { 110 var s string 111 if i != 0 { 112 b[0] = byte(i) 113 s = string(b) 114 } 115 r.m[s] = StringID(i) 116 r.strings = append(r.strings, s) 117 dictStrings[i] = s 118 } 119 return r 120 } 121 122 func (d *dictionary) id(key []byte) StringID { 123 switch len(key) { 124 case 0: 125 return 0 126 case 1: 127 if c := key[0]; c != 0 && c < 128 { 128 return StringID(c) 129 } 130 } 131 132 d.mu.Lock() 133 if n, ok := d.m[string(key)]; ok { 134 d.mu.Unlock() 135 return n 136 } 137 138 n := StringID(len(d.strings)) 139 s := string(key) 140 if int(n) < 256 { 141 dictStrings[n] = s 142 } 143 d.strings = append(d.strings, s) 144 d.m[s] = n 145 d.mu.Unlock() 146 return n 147 } 148 149 func (d *dictionary) sid(key string) StringID { 150 switch len(key) { 151 case 0: 152 return 0 153 case 1: 154 if c := key[0]; c != 0 && c < 128 { 155 return StringID(c) 156 } 157 } 158 159 d.mu.Lock() 160 if n, ok := d.m[key]; ok { 161 d.mu.Unlock() 162 return n 163 } 164 165 n := StringID(len(d.strings)) 166 if int(n) < 256 { 167 dictStrings[n] = key 168 } 169 d.strings = append(d.strings, key) 170 d.m[key] = n 171 d.mu.Unlock() 172 return n 173 } 174 175 type char struct { 176 pos int32 177 c byte 178 } 179 180 // token3 is produced by translation phase 3. 181 type token3 struct { 182 char rune 183 pos int32 184 value StringID 185 src StringID 186 macro StringID 187 } 188 189 func (t token3) Pos() token.Pos { return token.Pos(t.pos) } 190 func (t token3) String() string { return t.value.String() } 191 192 type scanner struct { 193 bomFix int 194 bytesBuf []byte 195 charBuf []char 196 ctx *context 197 file *tokenFile 198 fileOffset int 199 firstPos token.Pos 200 lineBuf []byte 201 lookaheadChar char 202 lookaheadLine ppLine 203 mark int 204 pos token.Pos 205 r *bufio.Reader 206 srcBuf []byte 207 tokenBuf []token3 208 ungetBuf []char 209 210 tok token3 211 212 closed bool 213 preserveWhiteSpace bool 214 } 215 216 func newScanner0(ctx *context, r io.Reader, file *tokenFile, bufSize int) *scanner { 217 s := &scanner{ 218 ctx: ctx, 219 file: file, 220 r: bufio.NewReaderSize(r, bufSize), 221 } 222 if r != nil { 223 s.init() 224 } 225 return s 226 } 227 228 func newScanner(ctx *context, r io.Reader, file *tokenFile) *scanner { 229 bufSize := 1 << 17 // emulate gcc 230 if n := ctx.cfg.MaxSourceLine; n > 4096 { 231 bufSize = n 232 } 233 return newScanner0(ctx, r, file, bufSize) 234 } 235 236 func (s *scanner) abort() (r byte, b bool) { 237 if s.mark >= 0 { 238 if len(s.charBuf) > s.mark { 239 s.unget(s.lookaheadChar) 240 for i := len(s.charBuf) - 1; i >= s.mark; i-- { 241 s.unget(s.charBuf[i]) 242 } 243 } 244 s.charBuf = s.charBuf[:s.mark] 245 return 0, false 246 } 247 248 switch n := len(s.charBuf); n { 249 case 0: // [] z 250 c := s.lookaheadChar 251 s.next() 252 return s.class(c.c), true 253 case 1: // [a] z 254 return s.class(s.charBuf[0].c), true 255 default: // [a, b, ...], z 256 c := s.charBuf[0] // a 257 s.unget(s.lookaheadChar) // z 258 for i := n - 1; i > 1; i-- { 259 s.unget(s.charBuf[i]) // ... 260 } 261 s.lookaheadChar = s.charBuf[1] // b 262 s.charBuf = s.charBuf[:1] 263 return s.class(c.c), true 264 } 265 } 266 267 func (s *scanner) class(b byte) byte { 268 switch { 269 case b == 0: 270 return clsEOF 271 case b > maxASCII: 272 return clsOther 273 default: 274 return b 275 } 276 } 277 278 func (s *scanner) err(n node, msg string, args ...interface{}) { s.errPos(n.Pos(), msg, args...) } 279 280 func (s *scanner) errLine(x interface{}, msg string, args ...interface{}) { 281 var toks []token3 282 switch x := x.(type) { 283 case nil: 284 toks = []token3{{}} 285 case ppLine: 286 toks = x.getToks() 287 default: 288 panic(internalError()) 289 } 290 var b strings.Builder 291 for _, v := range toks { 292 switch v.char { 293 case '\n': 294 // nop 295 case ' ': 296 b.WriteByte(' ') 297 default: 298 b.WriteString(v.String()) 299 } 300 } 301 s.err(toks[0], "%s"+msg, append([]interface{}{b.String()}, args...)...) 302 } 303 304 func (s *scanner) errPos(pos token.Pos, msg string, args ...interface{}) { 305 if s.ctx.err(s.file.Position(pos), msg, args...) { 306 s.r.Reset(nil) 307 s.closed = true 308 } 309 } 310 311 func (s *scanner) init() *scanner { 312 if s.r == nil { 313 return s 314 } 315 316 b, err := s.r.Peek(3) 317 if err == nil && bytes.Equal(b, bom) { 318 s.bomFix, _ = s.r.Discard(3) 319 } 320 s.tokenBuf = nil 321 return s 322 } 323 324 func (s *scanner) initScan() (r byte) { 325 if s.lookaheadChar.pos == 0 { 326 s.next() 327 } 328 s.firstPos = token.Pos(s.lookaheadChar.pos) 329 s.mark = -1 330 if len(s.charBuf) > 1<<18 { //DONE benchmark tuned 331 s.bytesBuf = nil 332 s.charBuf = nil 333 s.srcBuf = nil 334 } else { 335 s.bytesBuf = s.bytesBuf[:0] 336 s.charBuf = s.charBuf[:0] 337 s.srcBuf = s.bytesBuf[:0] 338 } 339 return s.class(s.lookaheadChar.c) 340 } 341 342 func (s *scanner) lex() { 343 s.tok.char = s.scan() 344 s.tok.pos = int32(s.firstPos) 345 for _, v := range s.charBuf { 346 s.srcBuf = append(s.srcBuf, v.c) 347 } 348 s.tok.src = dict.id(s.srcBuf) 349 switch { 350 case s.tok.char == ' ' && !s.preserveWhiteSpace && !s.ctx.cfg.PreserveWhiteSpace: 351 s.tok.value = idSpace 352 case s.tok.char == IDENTIFIER: 353 for i := 0; i < len(s.charBuf); { 354 c := s.charBuf[i].c 355 if c != '\\' { 356 s.bytesBuf = append(s.bytesBuf, c) 357 i++ 358 continue 359 } 360 361 i++ // Skip '\\' 362 var n int 363 switch s.charBuf[i].c { 364 case 'u': 365 n = 4 366 case 'U': 367 n = 8 368 default: 369 panic(internalError()) 370 } 371 i++ // Skip 'u' or 'U' 372 l := len(s.bytesBuf) 373 for i0 := i; i < i0+n; i++ { 374 s.bytesBuf = append(s.bytesBuf, s.charBuf[i].c) 375 } 376 r, err := strconv.ParseUint(string(s.bytesBuf[l:l+n]), 16, 32) 377 if err != nil { 378 panic(internalError()) 379 } 380 381 n2 := utf8.EncodeRune(s.bytesBuf[l:], rune(r)) 382 s.bytesBuf = s.bytesBuf[:l+n2] 383 } 384 s.tok.value = dict.id(s.bytesBuf) 385 default: 386 s.tok.value = s.tok.src 387 } 388 switch s.tok.char { 389 case clsEOF: 390 s.tok.char = -1 391 s.tok.pos = int32(s.file.Pos(s.file.Size())) 392 } 393 // dbg("lex %q %q", tokName(s.tok.char), s.tok.value) 394 } 395 396 func (s *scanner) next() (r byte) { 397 if s.lookaheadChar.pos > 0 { 398 s.charBuf = append(s.charBuf, s.lookaheadChar) 399 } 400 if n := len(s.ungetBuf); n != 0 { 401 s.lookaheadChar = s.ungetBuf[n-1] 402 s.ungetBuf = s.ungetBuf[:n-1] 403 return s.class(s.lookaheadChar.c) 404 } 405 406 if len(s.lineBuf) == 0 { 407 more: 408 if s.closed || s.fileOffset == s.file.Size() { 409 s.lookaheadChar.c = 0 410 s.lookaheadChar.pos = 0 411 return clsEOF 412 } 413 414 b, err := s.r.ReadSlice('\n') 415 if err != nil { 416 if err != io.EOF { 417 s.errPos(s.pos, "error while reading %s: %s", s.file.Name(), err) 418 } 419 if len(b) == 0 { 420 return clsEOF 421 } 422 } 423 424 s.file.AddLine(s.fileOffset) 425 s.fileOffset += s.bomFix 426 s.bomFix = 0 427 s.pos = token.Pos(s.fileOffset) 428 s.fileOffset += len(b) 429 430 // [0], 5.1.1.2, 1.1 431 // 432 // Physical source file multibyte characters are mapped, in an 433 // implementation- defined manner, to the source character set 434 // (introducing new-line characters for end-of-line indicators) 435 // if necessary. Trigraph sequences are replaced by 436 // corresponding single-character internal representations. 437 if !s.ctx.cfg.DisableTrigraphs && bytes.Contains(b, trigraphPrefix) { 438 for _, v := range trigraphs { 439 b = bytes.Replace(b, v.from, v.to, -1) 440 } 441 } 442 443 // [0], 5.1.1.2, 2 444 // 445 // Each instance of a backslash character (\) immediately 446 // followed by a new-line character is deleted, splicing 447 // physical source lines to form logical source lines. Only 448 // the last backslash on any physical source line shall be 449 // eligible for being part of such a splice. A source file that 450 // is not empty shall end in a new-line character, which shall 451 // not be immediately preceded by a backslash character before 452 // any such splicing takes place. 453 s.lineBuf = b 454 n := len(b) 455 switch { 456 case b[n-1] != '\n': 457 if s.ctx.cfg.RejectMissingFinalNewline { 458 s.errPos(s.pos+token.Pos(n), "non empty source file shall end in a new-line character") 459 } 460 b = append(b[:n:n], '\n') // bufio.Reader owns the bytes 461 case n > 1 && b[n-2] == '\\': 462 if n == 2 { 463 goto more 464 } 465 466 b = b[:n-2] 467 n = len(b) 468 if s.fileOffset == s.file.Size() { 469 if s.ctx.cfg.RejectFinalBackslash { 470 s.errPos(s.pos+token.Pos(n+1), "source file final new-line character shall not be preceded by a backslash character") 471 } 472 b = append(b[:n:n], '\n') // bufio.Reader owns the bytes 473 } 474 case n > 2 && b[n-3] == '\\' && b[n-2] == '\r': 475 // we've got a windows source that has \r\n line endings. 476 if n == 3 { 477 goto more 478 } 479 480 b = b[:n-3] 481 n = len(b) 482 if s.fileOffset == s.file.Size() { 483 if s.ctx.cfg.RejectFinalBackslash { 484 s.errPos(s.pos+token.Pos(n+1), "source file final new-line character shall not be preceded by a backslash character") 485 } 486 b = append(b[:n:n], '\n') // bufio.Reader owns the bytes 487 } 488 } 489 s.lineBuf = b 490 } 491 s.pos++ 492 s.lookaheadChar = char{int32(s.pos), s.lineBuf[0]} 493 s.lineBuf = s.lineBuf[1:] 494 return s.class(s.lookaheadChar.c) 495 } 496 497 func (s *scanner) unget(c ...char) { 498 s.ungetBuf = append(s.ungetBuf, c...) 499 s.lookaheadChar.pos = 0 // Must invalidate lookahead. 500 } 501 502 func (s *scanner) unterminatedComment() rune { 503 s.errPos(token.Pos(s.file.Size()), "unterminated comment") 504 n := len(s.charBuf) 505 s.unget(s.charBuf[n-1]) // \n 506 s.charBuf = s.charBuf[:n-1] 507 return ' ' 508 } 509 510 // -------------------------------------------------------- Translation phase 3 511 512 // [0], 5.1.1.2, 3 513 // 514 // The source file is decomposed into preprocessing tokens and sequences of 515 // white-space characters (including comments). A source file shall not end in 516 // a partial preprocessing token or in a partial comment. Each comment is 517 // replaced by one space character. New-line characters are retained. Whether 518 // each nonempty sequence of white-space characters other than new-line is 519 // retained or replaced by one space character is implementation-defined. 520 func (s *scanner) translationPhase3() *ppFile { 521 r := &ppFile{file: s.file} 522 if s.file.Size() == 0 { 523 s.r.Reset(nil) 524 return r 525 } 526 527 s.nextLine() 528 r.groups = s.parseGroup() 529 return r 530 } 531 532 func (s *scanner) parseGroup() (r []ppGroup) { 533 for { 534 switch x := s.lookaheadLine.(type) { 535 case ppGroup: 536 r = append(r, x) 537 s.nextLine() 538 case ppIfGroupDirective: 539 r = append(r, s.parseIfSection()) 540 default: 541 return r 542 } 543 } 544 } 545 546 func (s *scanner) parseIfSection() *ppIfSection { 547 return &ppIfSection{ 548 ifGroup: s.parseIfGroup(), 549 elifGroups: s.parseElifGroup(), 550 elseGroup: s.parseElseGroup(), 551 endifLine: s.parseEndifLine(), 552 } 553 } 554 555 func (s *scanner) parseEndifLine() *ppEndifDirective { 556 switch x := s.lookaheadLine.(type) { 557 case *ppEndifDirective: 558 s.nextLine() 559 return x 560 default: 561 s.errLine(x, fmt.Sprintf(": expected #endif (unexpected %T)", x)) 562 s.nextLine() 563 return nil 564 } 565 } 566 567 func (s *scanner) parseElseGroup() *ppElseGroup { 568 switch x := s.lookaheadLine.(type) { 569 case *ppElseDirective: 570 r := &ppElseGroup{elseLine: x} 571 s.nextLine() 572 r.groups = s.parseGroup() 573 return r 574 default: 575 return nil 576 } 577 } 578 579 func (s *scanner) parseElifGroup() (r []*ppElifGroup) { 580 for { 581 var g ppElifGroup 582 switch x := s.lookaheadLine.(type) { 583 case *ppElifDirective: 584 g.elif = x 585 s.nextLine() 586 g.groups = s.parseGroup() 587 r = append(r, &g) 588 default: 589 return r 590 } 591 } 592 } 593 594 func (s *scanner) parseIfGroup() *ppIfGroup { 595 r := &ppIfGroup{} 596 switch x := s.lookaheadLine.(type) { 597 case ppIfGroupDirective: 598 r.directive = x 599 default: 600 s.errLine(x, fmt.Sprintf(": expected if-group (unexpected %T)", x)) 601 } 602 s.nextLine() 603 r.groups = s.parseGroup() 604 return r 605 } 606 607 func (s *scanner) nextLine() { 608 s.tokenBuf = nil 609 s.lookaheadLine = s.scanLine() 610 } 611 612 func (s *scanner) scanLine() (r ppLine) { 613 again: 614 toks := s.scanToNonBlankToken(nil) 615 if len(toks) == 0 { 616 return nil 617 } 618 619 includeNext := false 620 switch tok := toks[len(toks)-1]; tok.char { 621 case '#': 622 toks = s.scanToNonBlankToken(toks) 623 switch tok := toks[len(toks)-1]; tok.char { 624 case '\n': 625 return &ppEmptyDirective{toks: toks} 626 case IDENTIFIER: 627 switch tok.value { 628 case idDefine: 629 return s.parseDefine(toks) 630 case idElif: 631 return s.parseElif(toks) 632 case idElse: 633 return s.parseElse(toks) 634 case idEndif: 635 return s.parseEndif(toks) 636 case idIf: 637 return s.parseIf(toks) 638 case idIfdef: 639 return s.parseIfdef(toks) 640 case idIfndef: 641 return s.parseIfndef(toks) 642 case idIncludeNext: 643 includeNext = true 644 fallthrough 645 case idInclude: 646 // # include pp-tokens new-line 647 // 648 // Prevent aliasing of eg. <foo bar.h> and <foo bar.h>. 649 save := s.preserveWhiteSpace 650 s.preserveWhiteSpace = true 651 n := len(toks) 652 toks := s.scanLineToEOL(toks) 653 r := &ppIncludeDirective{arg: toks[n : len(toks)-1], toks: toks, includeNext: includeNext} 654 s.preserveWhiteSpace = save 655 return r 656 case idUndef: 657 return s.parseUndef(toks) 658 case idLine: 659 return s.parseLine(toks) 660 case idError: 661 // # error pp-tokens_opt new-line 662 n := len(toks) 663 toks := s.scanLineToEOL(toks) 664 msg := toks[n : len(toks)-1] 665 if len(msg) != 0 && msg[0].char == ' ' { 666 msg = msg[1:] 667 } 668 return &ppErrorDirective{toks: toks, msg: msg} 669 case idPragma: 670 return s.parsePragma(toks) 671 } 672 } 673 674 // # non-directive 675 return &ppNonDirective{toks: s.scanLineToEOL(toks)} 676 case '\n': 677 return &ppTextLine{toks: toks} 678 case IDENTIFIER: 679 if tok.value == idPragmaOp { 680 toks = s.scanToNonBlankToken(toks) 681 switch tok = toks[len(toks)-1]; tok.char { 682 case '(': 683 // ok 684 default: 685 s.err(tok, "expected (") 686 return &ppTextLine{toks: toks} 687 } 688 689 var lit string 690 toks = s.scanToNonBlankToken(toks) 691 switch tok = toks[len(toks)-1]; tok.char { 692 case STRINGLITERAL: 693 lit = tok.String() 694 case LONGSTRINGLITERAL: 695 lit = tok.String()[1:] // [0], 6.9.10, 1 696 default: 697 s.err(tok, "expected string literal") 698 return &ppTextLine{toks: toks} 699 } 700 701 pos := tok.pos 702 toks = s.scanToNonBlankToken(toks) 703 switch tok = toks[len(toks)-1]; tok.char { 704 case ')': 705 // ok 706 default: 707 s.err(tok, "expected )") 708 return &ppTextLine{toks: toks} 709 } 710 711 s.unget(s.lookaheadChar) 712 // [0], 6.9.10, 1 713 lit = lit[1 : len(lit)-1] 714 lit = strings.ReplaceAll(lit, `\"`, `"`) 715 lit = strings.ReplaceAll(lit, `\\`, `\`) 716 lit = "#pragma " + lit + "\n" 717 for i := len(lit) - 1; i >= 0; i-- { 718 s.unget(char{pos, lit[i]}) 719 } 720 goto again 721 } 722 723 fallthrough 724 default: 725 return &ppTextLine{toks: s.scanLineToEOL(toks)} 726 } 727 } 728 729 func (s *scanner) parsePragma(toks []token3) *ppPragmaDirective { 730 toks = s.scanToNonBlankToken(toks) 731 n := len(toks) 732 if toks[n-1].char != '\n' { 733 toks = s.scanLineToEOL(toks) 734 } 735 return &ppPragmaDirective{toks: toks, args: toks[n-1:]} 736 } 737 738 // # line pp-tokens new-line 739 func (s *scanner) parseLine(toks []token3) *ppLineDirective { 740 toks = s.scanToNonBlankToken(toks) 741 switch tok := toks[len(toks)-1]; tok.char { 742 case '\n': 743 s.err(tok, "unexpected new-line") 744 return &ppLineDirective{toks: toks} 745 default: 746 toks := s.scanLineToEOL(toks) 747 last := toks[len(toks)-1] 748 r := &ppLineDirective{toks: toks, nextPos: int(last.pos) + len(last.src.String())} 749 toks = toks[:len(toks)-1] // sans new-line 750 toks = ltrim3(toks) 751 toks = toks[1:] // Skip '#' 752 toks = ltrim3(toks) 753 toks = toks[1:] // Skip "line" 754 r.args = ltrim3(toks) 755 return r 756 } 757 } 758 759 func ltrim3(toks []token3) []token3 { 760 for len(toks) != 0 && toks[0].char == ' ' { 761 toks = toks[1:] 762 } 763 return toks 764 } 765 766 // # undef identifier new-line 767 func (s *scanner) parseUndef(toks []token3) *ppUndefDirective { 768 toks = s.scanToNonBlankToken(toks) 769 switch tok := toks[len(toks)-1]; tok.char { 770 case '\n': 771 s.err(&tok, "expected identifier") 772 return &ppUndefDirective{toks: toks} 773 case IDENTIFIER: 774 name := tok 775 toks = s.scanToNonBlankToken(toks) 776 switch tok := toks[len(toks)-1]; tok.char { 777 case '\n': 778 return &ppUndefDirective{name: name, toks: toks} 779 default: 780 if s.ctx.cfg.RejectUndefExtraTokens { 781 s.err(&tok, "extra tokens after #undef") 782 } 783 return &ppUndefDirective{name: name, toks: s.scanLineToEOL(toks)} 784 } 785 default: 786 s.err(&tok, "expected identifier") 787 return &ppUndefDirective{toks: s.scanLineToEOL(toks)} 788 } 789 } 790 791 func (s *scanner) scanLineToEOL(toks []token3) []token3 { 792 n := len(s.tokenBuf) - len(toks) 793 for { 794 s.lex() 795 s.tokenBuf = append(s.tokenBuf, s.tok) 796 if s.tok.char == '\n' { 797 return s.tokenBuf[n:] 798 } 799 } 800 } 801 802 // # ifndef identifier new-line 803 func (s *scanner) parseIfndef(toks []token3) *ppIfndefDirective { 804 var name StringID 805 toks = s.scanToNonBlankToken(toks) 806 switch tok := toks[len(toks)-1]; tok.char { 807 case IDENTIFIER: 808 name = tok.value 809 toks = s.scanToNonBlankToken(toks) 810 switch tok := toks[len(toks)-1]; tok.char { 811 case '\n': 812 return &ppIfndefDirective{name: name, toks: toks} 813 default: 814 if s.ctx.cfg.RejectIfndefExtraTokens { 815 s.err(&tok, "extra tokens after #ifndef") 816 } 817 return &ppIfndefDirective{name: name, toks: s.scanLineToEOL(toks)} 818 } 819 case '\n': 820 s.err(tok, "expected identifier") 821 return &ppIfndefDirective{name: name, toks: toks} 822 default: 823 s.err(tok, "expected identifier") 824 return &ppIfndefDirective{name: name, toks: s.scanLineToEOL(toks)} 825 } 826 } 827 828 // # ifdef identifier new-line 829 func (s *scanner) parseIfdef(toks []token3) *ppIfdefDirective { 830 var name StringID 831 toks = s.scanToNonBlankToken(toks) 832 switch tok := toks[len(toks)-1]; tok.char { 833 case IDENTIFIER: 834 name = tok.value 835 toks = s.scanToNonBlankToken(toks) 836 switch tok := toks[len(toks)-1]; tok.char { 837 case '\n': 838 return &ppIfdefDirective{name: name, toks: toks} 839 default: 840 if s.ctx.cfg.RejectIfdefExtraTokens { 841 s.err(&tok, "extra tokens after #ifdef") 842 } 843 return &ppIfdefDirective{name: name, toks: s.scanLineToEOL(toks)} 844 } 845 case '\n': 846 s.err(tok, "expected identifier") 847 return &ppIfdefDirective{name: name, toks: toks} 848 default: 849 s.err(tok, "expected identifier") 850 return &ppIfdefDirective{name: name, toks: s.scanLineToEOL(toks)} 851 } 852 } 853 854 // # if constant-expression new-line 855 func (s *scanner) parseIf(toks []token3) *ppIfDirective { 856 n := len(toks) 857 toks = s.scanToNonBlankToken(toks) 858 switch tok := toks[len(toks)-1]; tok.char { 859 case '\n': 860 s.err(tok, "expected expression") 861 return &ppIfDirective{toks: toks} 862 default: 863 toks = s.scanLineToEOL(toks) 864 expr := toks[n:] 865 if expr[0].char == ' ' { // sans leading space 866 expr = expr[1:] 867 } 868 expr = expr[:len(expr)-1] // sans '\n' 869 return &ppIfDirective{toks: toks, expr: expr} 870 } 871 } 872 873 // # endif new-line 874 func (s *scanner) parseEndif(toks []token3) *ppEndifDirective { 875 toks = s.scanToNonBlankToken(toks) 876 switch tok := toks[len(toks)-1]; tok.char { 877 case '\n': 878 return &ppEndifDirective{toks} 879 default: 880 if s.ctx.cfg.RejectEndifExtraTokens { 881 s.err(&tok, "extra tokens after #else") 882 } 883 return &ppEndifDirective{s.scanLineToEOL(toks)} 884 } 885 } 886 887 // # else new-line 888 func (s *scanner) parseElse(toks []token3) *ppElseDirective { 889 toks = s.scanToNonBlankToken(toks) 890 switch tok := toks[len(toks)-1]; tok.char { 891 case '\n': 892 return &ppElseDirective{toks} 893 default: 894 if s.ctx.cfg.RejectElseExtraTokens { 895 s.err(&tok, "extra tokens after #else") 896 } 897 return &ppElseDirective{s.scanLineToEOL(toks)} 898 } 899 } 900 901 // # elif constant-expression new-line 902 func (s *scanner) parseElif(toks []token3) *ppElifDirective { 903 n := len(toks) 904 toks = s.scanToNonBlankToken(toks) 905 switch tok := toks[len(toks)-1]; tok.char { 906 case '\n': 907 s.err(tok, "expected expression") 908 return &ppElifDirective{toks, nil} 909 default: 910 toks = s.scanLineToEOL(toks) 911 expr := toks[n:] 912 if expr[0].char == ' ' { // sans leading space 913 expr = expr[1:] 914 } 915 expr = expr[:len(expr)-1] // sans '\n' 916 return &ppElifDirective{toks, expr} 917 } 918 } 919 920 func (s *scanner) parseDefine(toks []token3) ppLine { 921 toks = s.scanToNonBlankToken(toks) 922 switch tok := toks[len(toks)-1]; tok.char { 923 case IDENTIFIER: 924 name := tok 925 n := len(toks) 926 toks = s.scanToNonBlankToken(toks) 927 switch tok := toks[len(toks)-1]; tok.char { 928 case '\n': 929 return &ppDefineObjectMacroDirective{name: name, toks: toks} 930 case '(': 931 if toks[n].char == ' ' { 932 return s.parseDefineObjectMacro(n, name, toks) 933 } 934 935 return s.parseDefineFunctionMacro(name, toks) 936 default: 937 return s.parseDefineObjectMacro(n, name, toks) 938 } 939 case '\n': 940 s.err(tok, "expected identifier") 941 return &ppDefineObjectMacroDirective{toks: toks} 942 default: 943 s.err(tok, "expected identifier") 944 return &ppDefineObjectMacroDirective{toks: s.scanLineToEOL(toks)} 945 } 946 } 947 948 // # define identifier lparen identifier-list_opt ) replacement-list new-line 949 // # define identifier lparen ... ) replacement-list new-line 950 // # define identifier lparen identifier-list , ... ) replacement-list new-line 951 func (s *scanner) parseDefineFunctionMacro(name token3, toks []token3) *ppDefineFunctionMacroDirective { 952 // Parse parameters after "#define name(". 953 var list []token3 954 variadic := false 955 namedVariadic := false 956 again: 957 toks = s.scanToNonBlankToken(toks) 958 switch tok := toks[len(toks)-1]; tok.char { 959 case IDENTIFIER: 960 more: 961 list = append(list, tok) 962 toks = s.scanToNonBlankToken(toks) 963 switch tok = toks[len(toks)-1]; tok.char { 964 case ',': 965 toks = s.scanToNonBlankToken(toks) 966 switch tok = toks[len(toks)-1]; tok.char { 967 case IDENTIFIER: 968 goto more 969 case DDD: 970 if toks, variadic = s.parseDDD(toks); !variadic { 971 goto again 972 } 973 case ')': 974 s.err(tok, "expected parameter name") 975 default: 976 s.err(tok, "unexpected %q", &tok) 977 } 978 case DDD: 979 namedVariadic = true 980 if s.ctx.cfg.RejectInvalidVariadicMacros { 981 s.err(tok, "expected comma") 982 } 983 if toks, variadic = s.parseDDD(toks); !variadic { 984 goto again 985 } 986 case ')': 987 // ok 988 case '\n': 989 s.err(tok, "unexpected new-line") 990 return &ppDefineFunctionMacroDirective{toks: toks} 991 case IDENTIFIER: 992 s.err(tok, "expected comma") 993 goto more 994 default: 995 s.err(tok, "unexpected %q", &tok) 996 } 997 case DDD: 998 if toks, variadic = s.parseDDD(toks); !variadic { 999 goto again 1000 } 1001 case ',': 1002 s.err(tok, "expected parameter name") 1003 goto again 1004 case ')': 1005 // ok 1006 default: 1007 s.err(tok, "expected parameter name") 1008 goto again 1009 } 1010 // Parse replacement list. 1011 n := len(toks) 1012 toks = s.scanToNonBlankToken(toks) 1013 switch tok := toks[len(toks)-1]; tok.char { 1014 case '\n': 1015 if s.ctx.cfg.RejectFunctionMacroEmptyReplacementList { 1016 s.err(tok, "expected replacement list") 1017 } 1018 return &ppDefineFunctionMacroDirective{name: name, identifierList: list, toks: toks, variadic: variadic, namedVariadic: namedVariadic} 1019 default: 1020 toks = s.scanLineToEOL(toks) 1021 repl := toks[n:] // sans #define identifier 1022 repl = repl[:len(repl)-1] // sans '\n' 1023 // 6.10.3, 7 1024 // 1025 // Any white-space characters preceding or following the 1026 // replacement list of preprocessing tokens are not considered 1027 // part of the replacement list for either form of macro. 1028 repl = trim3(repl) 1029 repl = normalizeHashes(repl) 1030 return &ppDefineFunctionMacroDirective{name: name, identifierList: list, toks: toks, replacementList: repl, variadic: variadic, namedVariadic: namedVariadic} 1031 } 1032 } 1033 1034 func isWhite(char rune) bool { 1035 switch char { 1036 case ' ', '\t', '\n', '\v', '\f': 1037 return true 1038 } 1039 return false 1040 } 1041 1042 func trim3(toks []token3) []token3 { 1043 for len(toks) != 0 && isWhite(toks[0].char) { 1044 toks = toks[1:] 1045 } 1046 for len(toks) != 0 && isWhite(toks[len(toks)-1].char) { 1047 toks = toks[:len(toks)-1] 1048 } 1049 return toks 1050 } 1051 1052 func normalizeHashes(toks []token3) []token3 { 1053 w := 0 1054 var last rune 1055 for _, v := range toks { 1056 switch { 1057 case v.char == PPPASTE: 1058 if isWhite(last) { 1059 w-- 1060 } 1061 case isWhite(v.char): 1062 if last == '#' || last == PPPASTE { 1063 continue 1064 } 1065 } 1066 last = v.char 1067 toks[w] = v 1068 w++ 1069 } 1070 return toks[:w] 1071 } 1072 1073 func (s *scanner) parseDDD(toks []token3) ([]token3, bool) { 1074 toks = s.scanToNonBlankToken(toks) 1075 switch tok := toks[len(toks)-1]; tok.char { 1076 case ')': 1077 return toks, true 1078 default: 1079 s.err(tok, "expected right parenthesis") 1080 return toks, false 1081 } 1082 } 1083 1084 // # define identifier replacement-list new-line 1085 func (s *scanner) parseDefineObjectMacro(n int, name token3, toks []token3) *ppDefineObjectMacroDirective { 1086 toks = s.scanLineToEOL(toks) 1087 repl := toks[n:] // sans #define identifier 1088 repl = repl[:len(repl)-1] // sans '\n' 1089 // 6.10.3, 7 1090 // 1091 // Any white-space characters preceding or following the replacement 1092 // list of preprocessing tokens are not considered part of the 1093 // replacement list for either form of macro. 1094 repl = trim3(repl) 1095 repl = normalizeHashes(repl) 1096 return &ppDefineObjectMacroDirective{name: name, toks: toks, replacementList: repl} 1097 } 1098 1099 // Return {}, {x} or {' ', x} 1100 func (s *scanner) scanToNonBlankToken(toks []token3) []token3 { 1101 n := len(s.tokenBuf) - len(toks) 1102 for { 1103 s.lex() 1104 if s.tok.char < 0 { 1105 return s.tokenBuf[n:] 1106 } 1107 1108 s.tokenBuf = append(s.tokenBuf, s.tok) 1109 if s.tok.char != ' ' { 1110 return s.tokenBuf[n:] 1111 } 1112 } 1113 } 1114 1115 // ---------------------------------------------------------------------- Cache 1116 1117 // Translation phase4 source. 1118 type source interface { 1119 ppFile() (*ppFile, error) 1120 } 1121 1122 type cachedPPFile struct { 1123 err error 1124 errs goscanner.ErrorList 1125 modTime int64 // time.Time.UnixNano() 1126 pf *ppFile 1127 readyCh chan struct{} 1128 size int 1129 } 1130 1131 func (c *cachedPPFile) ready() *cachedPPFile { close(c.readyCh); return c } 1132 func (c *cachedPPFile) waitFor() (*cachedPPFile, error) { <-c.readyCh; return c, c.err } 1133 1134 func (c *cachedPPFile) ppFile() (*ppFile, error) { 1135 c.waitFor() 1136 if c.err == nil { 1137 return c.pf, nil 1138 } 1139 1140 return nil, c.err 1141 } 1142 1143 type cacheKey struct { 1144 name StringID 1145 sys bool 1146 value StringID 1147 Config3 1148 } 1149 1150 type ppCache struct { 1151 mu sync.RWMutex 1152 m map[cacheKey]*cachedPPFile 1153 } 1154 1155 func newPPCache() *ppCache { return &ppCache{m: map[cacheKey]*cachedPPFile{}} } 1156 1157 func (c *ppCache) get(ctx *context, src Source) (source, error) { 1158 if src.Value != "" { 1159 return c.getValue(ctx, src.Name, src.Value, false, src.DoNotCache) 1160 } 1161 1162 return c.getFile(ctx, src.Name, false, src.DoNotCache) 1163 } 1164 1165 func (c *ppCache) getFile(ctx *context, name string, sys bool, doNotCache bool) (*cachedPPFile, error) { 1166 fi, err := ctx.statFile(name, sys) 1167 if err != nil { 1168 return nil, err 1169 } 1170 1171 if !fi.Mode().IsRegular() { 1172 return nil, fmt.Errorf("%s is not a regular file", name) 1173 } 1174 1175 if fi.Size() > mathutil.MaxInt { 1176 return nil, fmt.Errorf("%s: file too big", name) 1177 } 1178 1179 size := int(fi.Size()) 1180 if !filepath.IsAbs(name) { // Never cache relative paths 1181 f, err := ctx.openFile(name, sys) 1182 if err != nil { 1183 return nil, err 1184 } 1185 1186 defer f.Close() 1187 1188 tf := tokenNewFile(name, size) 1189 ppFile := newScanner(ctx, f, tf).translationPhase3() 1190 cf := &cachedPPFile{pf: ppFile, readyCh: make(chan struct{})} 1191 cf.ready() 1192 return cf, nil 1193 } 1194 1195 modTime := fi.ModTime().UnixNano() 1196 key := cacheKey{dict.sid(name), sys, 0, ctx.cfg.Config3} 1197 c.mu.Lock() 1198 if cf, ok := c.m[key]; ok { 1199 if modTime <= cf.modTime && size == cf.size { 1200 c.mu.Unlock() 1201 if cf.err != nil { 1202 return nil, cf.err 1203 } 1204 1205 r, err := cf.waitFor() 1206 ctx.errs(cf.errs) 1207 return r, err 1208 } 1209 1210 delete(c.m, key) 1211 } 1212 1213 tf := tokenNewFile(name, size) 1214 cf := &cachedPPFile{modTime: modTime, size: size, readyCh: make(chan struct{})} 1215 if !doNotCache { 1216 c.m[key] = cf 1217 } 1218 c.mu.Unlock() 1219 1220 go func() { 1221 defer cf.ready() 1222 1223 f, err := ctx.openFile(name, sys) 1224 if err != nil { 1225 cf.err = err 1226 return 1227 } 1228 1229 defer f.Close() 1230 1231 ctx2 := newContext(ctx.cfg) 1232 cf.pf = newScanner(ctx2, f, tf).translationPhase3() 1233 cf.errs = ctx2.ErrorList 1234 ctx.errs(cf.errs) 1235 }() 1236 1237 return cf.waitFor() 1238 } 1239 1240 func (c *ppCache) getValue(ctx *context, name, value string, sys bool, doNotCache bool) (*cachedPPFile, error) { 1241 key := cacheKey{dict.sid(name), sys, dict.sid(value), ctx.cfg.Config3} 1242 c.mu.Lock() 1243 if cf, ok := c.m[key]; ok { 1244 c.mu.Unlock() 1245 if cf.err != nil { 1246 return nil, cf.err 1247 } 1248 1249 r, err := cf.waitFor() 1250 ctx.errs(cf.errs) 1251 return r, err 1252 } 1253 1254 tf := tokenNewFile(name, len(value)) 1255 cf := &cachedPPFile{readyCh: make(chan struct{})} 1256 if !doNotCache { 1257 c.m[key] = cf 1258 } 1259 c.mu.Unlock() 1260 ctx2 := newContext(ctx.cfg) 1261 cf.pf = newScanner(ctx2, strings.NewReader(value), tf).translationPhase3() 1262 cf.errs = ctx2.ErrorList 1263 ctx.errs(cf.errs) 1264 cf.ready() 1265 return cf.waitFor() 1266 }