gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

scanner.go (29903B)


      1 // Copyright 2019 The CC Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package cc // import "modernc.org/cc/v3"
      6 
      7 import (
      8 	"bufio"
      9 	"bytes"
     10 	"fmt"
     11 	goscanner "go/scanner"
     12 	"io"
     13 	"path/filepath"
     14 	"strconv"
     15 	"strings"
     16 	"sync"
     17 	"unicode/utf8"
     18 
     19 	"modernc.org/mathutil"
     20 	"modernc.org/token"
     21 )
     22 
     23 const (
     24 	clsEOF = iota + 0x80
     25 	clsOther
     26 )
     27 
     28 const maxASCII = 0x7f
     29 
     30 var (
     31 	bom = []byte{0xEF, 0xBB, 0xBF}
     32 
     33 	idDefine      = dict.sid("define")
     34 	idElif        = dict.sid("elif")
     35 	idElse        = dict.sid("else")
     36 	idEndif       = dict.sid("endif")
     37 	idError       = dict.sid("error")
     38 	idIf          = dict.sid("if")
     39 	idIfdef       = dict.sid("ifdef")
     40 	idIfndef      = dict.sid("ifndef")
     41 	idInclude     = dict.sid("include")
     42 	idIncludeNext = dict.sid("include_next")
     43 	idLine        = dict.sid("line")
     44 	idPragma      = dict.sid("pragma")
     45 	idPragmaOp    = dict.sid("_Pragma")
     46 	idSpace       = dict.sid(" ")
     47 	idUndef       = dict.sid("undef")
     48 
     49 	trigraphPrefix = []byte("??")
     50 	trigraphs      = []struct{ from, to []byte }{
     51 		{[]byte("??="), []byte{'#'}},
     52 		{[]byte("??("), []byte{'['}},
     53 		{[]byte("??/"), []byte{'\\'}},
     54 		{[]byte("??)"), []byte{']'}},
     55 		{[]byte("??'"), []byte{'^'}},
     56 		{[]byte("??<"), []byte{'{'}},
     57 		{[]byte("??!"), []byte{'|'}},
     58 		{[]byte("??>"), []byte{'}'}},
     59 		{[]byte("??-"), []byte{'~'}},
     60 	}
     61 )
     62 
     63 type tokenFile struct {
     64 	*token.File
     65 	sync.RWMutex
     66 }
     67 
     68 func tokenNewFile(name string, sz int) *tokenFile { return &tokenFile{File: token.NewFile(name, sz)} }
     69 
     70 func (f *tokenFile) Position(pos token.Pos) (r token.Position) {
     71 	f.RLock()
     72 	r = f.File.Position(pos)
     73 	f.RUnlock()
     74 	return r
     75 }
     76 
     77 func (f *tokenFile) PositionFor(pos token.Pos, adjusted bool) (r token.Position) {
     78 	f.RLock()
     79 	r = f.File.PositionFor(pos, adjusted)
     80 	f.RUnlock()
     81 	return r
     82 }
     83 
     84 func (f *tokenFile) AddLine(off int) {
     85 	f.Lock()
     86 	f.File.AddLine(off)
     87 	f.Unlock()
     88 }
     89 
     90 func (f *tokenFile) AddLineInfo(off int, fn string, line int) {
     91 	f.Lock()
     92 	f.File.AddLineInfo(off, fn, line)
     93 	f.Unlock()
     94 }
     95 
     96 type node interface {
     97 	Pos() token.Pos
     98 }
     99 
    100 type dictionary struct {
    101 	mu      sync.RWMutex
    102 	m       map[string]StringID
    103 	strings []string
    104 }
    105 
    106 func newDictionary() (r *dictionary) {
    107 	r = &dictionary{m: map[string]StringID{}}
    108 	b := make([]byte, 1)
    109 	for i := 0; i < 128; i++ {
    110 		var s string
    111 		if i != 0 {
    112 			b[0] = byte(i)
    113 			s = string(b)
    114 		}
    115 		r.m[s] = StringID(i)
    116 		r.strings = append(r.strings, s)
    117 		dictStrings[i] = s
    118 	}
    119 	return r
    120 }
    121 
    122 func (d *dictionary) id(key []byte) StringID {
    123 	switch len(key) {
    124 	case 0:
    125 		return 0
    126 	case 1:
    127 		if c := key[0]; c != 0 && c < 128 {
    128 			return StringID(c)
    129 		}
    130 	}
    131 
    132 	d.mu.Lock()
    133 	if n, ok := d.m[string(key)]; ok {
    134 		d.mu.Unlock()
    135 		return n
    136 	}
    137 
    138 	n := StringID(len(d.strings))
    139 	s := string(key)
    140 	if int(n) < 256 {
    141 		dictStrings[n] = s
    142 	}
    143 	d.strings = append(d.strings, s)
    144 	d.m[s] = n
    145 	d.mu.Unlock()
    146 	return n
    147 }
    148 
    149 func (d *dictionary) sid(key string) StringID {
    150 	switch len(key) {
    151 	case 0:
    152 		return 0
    153 	case 1:
    154 		if c := key[0]; c != 0 && c < 128 {
    155 			return StringID(c)
    156 		}
    157 	}
    158 
    159 	d.mu.Lock()
    160 	if n, ok := d.m[key]; ok {
    161 		d.mu.Unlock()
    162 		return n
    163 	}
    164 
    165 	n := StringID(len(d.strings))
    166 	if int(n) < 256 {
    167 		dictStrings[n] = key
    168 	}
    169 	d.strings = append(d.strings, key)
    170 	d.m[key] = n
    171 	d.mu.Unlock()
    172 	return n
    173 }
    174 
    175 type char struct {
    176 	pos int32
    177 	c   byte
    178 }
    179 
    180 // token3 is produced by translation phase 3.
    181 type token3 struct {
    182 	char  rune
    183 	pos   int32
    184 	value StringID
    185 	src   StringID
    186 	macro StringID
    187 }
    188 
    189 func (t token3) Pos() token.Pos { return token.Pos(t.pos) }
    190 func (t token3) String() string { return t.value.String() }
    191 
    192 type scanner struct {
    193 	bomFix        int
    194 	bytesBuf      []byte
    195 	charBuf       []char
    196 	ctx           *context
    197 	file          *tokenFile
    198 	fileOffset    int
    199 	firstPos      token.Pos
    200 	lineBuf       []byte
    201 	lookaheadChar char
    202 	lookaheadLine ppLine
    203 	mark          int
    204 	pos           token.Pos
    205 	r             *bufio.Reader
    206 	srcBuf        []byte
    207 	tokenBuf      []token3
    208 	ungetBuf      []char
    209 
    210 	tok token3
    211 
    212 	closed             bool
    213 	preserveWhiteSpace bool
    214 }
    215 
    216 func newScanner0(ctx *context, r io.Reader, file *tokenFile, bufSize int) *scanner {
    217 	s := &scanner{
    218 		ctx:  ctx,
    219 		file: file,
    220 		r:    bufio.NewReaderSize(r, bufSize),
    221 	}
    222 	if r != nil {
    223 		s.init()
    224 	}
    225 	return s
    226 }
    227 
    228 func newScanner(ctx *context, r io.Reader, file *tokenFile) *scanner {
    229 	bufSize := 1 << 17 // emulate gcc
    230 	if n := ctx.cfg.MaxSourceLine; n > 4096 {
    231 		bufSize = n
    232 	}
    233 	return newScanner0(ctx, r, file, bufSize)
    234 }
    235 
    236 func (s *scanner) abort() (r byte, b bool) {
    237 	if s.mark >= 0 {
    238 		if len(s.charBuf) > s.mark {
    239 			s.unget(s.lookaheadChar)
    240 			for i := len(s.charBuf) - 1; i >= s.mark; i-- {
    241 				s.unget(s.charBuf[i])
    242 			}
    243 		}
    244 		s.charBuf = s.charBuf[:s.mark]
    245 		return 0, false
    246 	}
    247 
    248 	switch n := len(s.charBuf); n {
    249 	case 0: // [] z
    250 		c := s.lookaheadChar
    251 		s.next()
    252 		return s.class(c.c), true
    253 	case 1: // [a] z
    254 		return s.class(s.charBuf[0].c), true
    255 	default: // [a, b, ...], z
    256 		c := s.charBuf[0]        // a
    257 		s.unget(s.lookaheadChar) // z
    258 		for i := n - 1; i > 1; i-- {
    259 			s.unget(s.charBuf[i]) // ...
    260 		}
    261 		s.lookaheadChar = s.charBuf[1] // b
    262 		s.charBuf = s.charBuf[:1]
    263 		return s.class(c.c), true
    264 	}
    265 }
    266 
    267 func (s *scanner) class(b byte) byte {
    268 	switch {
    269 	case b == 0:
    270 		return clsEOF
    271 	case b > maxASCII:
    272 		return clsOther
    273 	default:
    274 		return b
    275 	}
    276 }
    277 
    278 func (s *scanner) err(n node, msg string, args ...interface{}) { s.errPos(n.Pos(), msg, args...) }
    279 
    280 func (s *scanner) errLine(x interface{}, msg string, args ...interface{}) {
    281 	var toks []token3
    282 	switch x := x.(type) {
    283 	case nil:
    284 		toks = []token3{{}}
    285 	case ppLine:
    286 		toks = x.getToks()
    287 	default:
    288 		panic(internalError())
    289 	}
    290 	var b strings.Builder
    291 	for _, v := range toks {
    292 		switch v.char {
    293 		case '\n':
    294 			// nop
    295 		case ' ':
    296 			b.WriteByte(' ')
    297 		default:
    298 			b.WriteString(v.String())
    299 		}
    300 	}
    301 	s.err(toks[0], "%s"+msg, append([]interface{}{b.String()}, args...)...)
    302 }
    303 
    304 func (s *scanner) errPos(pos token.Pos, msg string, args ...interface{}) {
    305 	if s.ctx.err(s.file.Position(pos), msg, args...) {
    306 		s.r.Reset(nil)
    307 		s.closed = true
    308 	}
    309 }
    310 
    311 func (s *scanner) init() *scanner {
    312 	if s.r == nil {
    313 		return s
    314 	}
    315 
    316 	b, err := s.r.Peek(3)
    317 	if err == nil && bytes.Equal(b, bom) {
    318 		s.bomFix, _ = s.r.Discard(3)
    319 	}
    320 	s.tokenBuf = nil
    321 	return s
    322 }
    323 
    324 func (s *scanner) initScan() (r byte) {
    325 	if s.lookaheadChar.pos == 0 {
    326 		s.next()
    327 	}
    328 	s.firstPos = token.Pos(s.lookaheadChar.pos)
    329 	s.mark = -1
    330 	if len(s.charBuf) > 1<<18 { //DONE benchmark tuned
    331 		s.bytesBuf = nil
    332 		s.charBuf = nil
    333 		s.srcBuf = nil
    334 	} else {
    335 		s.bytesBuf = s.bytesBuf[:0]
    336 		s.charBuf = s.charBuf[:0]
    337 		s.srcBuf = s.bytesBuf[:0]
    338 	}
    339 	return s.class(s.lookaheadChar.c)
    340 }
    341 
    342 func (s *scanner) lex() {
    343 	s.tok.char = s.scan()
    344 	s.tok.pos = int32(s.firstPos)
    345 	for _, v := range s.charBuf {
    346 		s.srcBuf = append(s.srcBuf, v.c)
    347 	}
    348 	s.tok.src = dict.id(s.srcBuf)
    349 	switch {
    350 	case s.tok.char == ' ' && !s.preserveWhiteSpace && !s.ctx.cfg.PreserveWhiteSpace:
    351 		s.tok.value = idSpace
    352 	case s.tok.char == IDENTIFIER:
    353 		for i := 0; i < len(s.charBuf); {
    354 			c := s.charBuf[i].c
    355 			if c != '\\' {
    356 				s.bytesBuf = append(s.bytesBuf, c)
    357 				i++
    358 				continue
    359 			}
    360 
    361 			i++ // Skip '\\'
    362 			var n int
    363 			switch s.charBuf[i].c {
    364 			case 'u':
    365 				n = 4
    366 			case 'U':
    367 				n = 8
    368 			default:
    369 				panic(internalError())
    370 			}
    371 			i++ // Skip 'u' or 'U'
    372 			l := len(s.bytesBuf)
    373 			for i0 := i; i < i0+n; i++ {
    374 				s.bytesBuf = append(s.bytesBuf, s.charBuf[i].c)
    375 			}
    376 			r, err := strconv.ParseUint(string(s.bytesBuf[l:l+n]), 16, 32)
    377 			if err != nil {
    378 				panic(internalError())
    379 			}
    380 
    381 			n2 := utf8.EncodeRune(s.bytesBuf[l:], rune(r))
    382 			s.bytesBuf = s.bytesBuf[:l+n2]
    383 		}
    384 		s.tok.value = dict.id(s.bytesBuf)
    385 	default:
    386 		s.tok.value = s.tok.src
    387 	}
    388 	switch s.tok.char {
    389 	case clsEOF:
    390 		s.tok.char = -1
    391 		s.tok.pos = int32(s.file.Pos(s.file.Size()))
    392 	}
    393 	// dbg("lex %q %q", tokName(s.tok.char), s.tok.value)
    394 }
    395 
    396 func (s *scanner) next() (r byte) {
    397 	if s.lookaheadChar.pos > 0 {
    398 		s.charBuf = append(s.charBuf, s.lookaheadChar)
    399 	}
    400 	if n := len(s.ungetBuf); n != 0 {
    401 		s.lookaheadChar = s.ungetBuf[n-1]
    402 		s.ungetBuf = s.ungetBuf[:n-1]
    403 		return s.class(s.lookaheadChar.c)
    404 	}
    405 
    406 	if len(s.lineBuf) == 0 {
    407 	more:
    408 		if s.closed || s.fileOffset == s.file.Size() {
    409 			s.lookaheadChar.c = 0
    410 			s.lookaheadChar.pos = 0
    411 			return clsEOF
    412 		}
    413 
    414 		b, err := s.r.ReadSlice('\n')
    415 		if err != nil {
    416 			if err != io.EOF {
    417 				s.errPos(s.pos, "error while reading %s: %s", s.file.Name(), err)
    418 			}
    419 			if len(b) == 0 {
    420 				return clsEOF
    421 			}
    422 		}
    423 
    424 		s.file.AddLine(s.fileOffset)
    425 		s.fileOffset += s.bomFix
    426 		s.bomFix = 0
    427 		s.pos = token.Pos(s.fileOffset)
    428 		s.fileOffset += len(b)
    429 
    430 		// [0], 5.1.1.2, 1.1
    431 		//
    432 		// Physical source file multibyte characters are mapped, in an
    433 		// implementation- defined manner, to the source character set
    434 		// (introducing new-line characters for end-of-line indicators)
    435 		// if necessary. Trigraph sequences are replaced by
    436 		// corresponding single-character internal representations.
    437 		if !s.ctx.cfg.DisableTrigraphs && bytes.Contains(b, trigraphPrefix) {
    438 			for _, v := range trigraphs {
    439 				b = bytes.Replace(b, v.from, v.to, -1)
    440 			}
    441 		}
    442 
    443 		// [0], 5.1.1.2, 2
    444 		//
    445 		// Each instance of a backslash character (\) immediately
    446 		// followed by a new-line character is deleted, splicing
    447 		// physical source lines to form logical source lines.  Only
    448 		// the last backslash on any physical source line shall be
    449 		// eligible for being part of such a splice. A source file that
    450 		// is not empty shall end in a new-line character, which shall
    451 		// not be immediately preceded by a backslash character before
    452 		// any such splicing takes place.
    453 		s.lineBuf = b
    454 		n := len(b)
    455 		switch {
    456 		case b[n-1] != '\n':
    457 			if s.ctx.cfg.RejectMissingFinalNewline {
    458 				s.errPos(s.pos+token.Pos(n), "non empty source file shall end in a new-line character")
    459 			}
    460 			b = append(b[:n:n], '\n') // bufio.Reader owns the bytes
    461 		case n > 1 && b[n-2] == '\\':
    462 			if n == 2 {
    463 				goto more
    464 			}
    465 
    466 			b = b[:n-2]
    467 			n = len(b)
    468 			if s.fileOffset == s.file.Size() {
    469 				if s.ctx.cfg.RejectFinalBackslash {
    470 					s.errPos(s.pos+token.Pos(n+1), "source file final new-line character shall not be preceded by a backslash character")
    471 				}
    472 				b = append(b[:n:n], '\n') // bufio.Reader owns the bytes
    473 			}
    474 		case n > 2 && b[n-3] == '\\' && b[n-2] == '\r':
    475 			// we've got a windows source that has \r\n line endings.
    476 			if n == 3 {
    477 				goto more
    478 			}
    479 
    480 			b = b[:n-3]
    481 			n = len(b)
    482 			if s.fileOffset == s.file.Size() {
    483 				if s.ctx.cfg.RejectFinalBackslash {
    484 					s.errPos(s.pos+token.Pos(n+1), "source file final new-line character shall not be preceded by a backslash character")
    485 				}
    486 				b = append(b[:n:n], '\n') // bufio.Reader owns the bytes
    487 			}
    488 		}
    489 		s.lineBuf = b
    490 	}
    491 	s.pos++
    492 	s.lookaheadChar = char{int32(s.pos), s.lineBuf[0]}
    493 	s.lineBuf = s.lineBuf[1:]
    494 	return s.class(s.lookaheadChar.c)
    495 }
    496 
    497 func (s *scanner) unget(c ...char) {
    498 	s.ungetBuf = append(s.ungetBuf, c...)
    499 	s.lookaheadChar.pos = 0 // Must invalidate lookahead.
    500 }
    501 
    502 func (s *scanner) unterminatedComment() rune {
    503 	s.errPos(token.Pos(s.file.Size()), "unterminated comment")
    504 	n := len(s.charBuf)
    505 	s.unget(s.charBuf[n-1]) // \n
    506 	s.charBuf = s.charBuf[:n-1]
    507 	return ' '
    508 }
    509 
    510 // -------------------------------------------------------- Translation phase 3
    511 
    512 // [0], 5.1.1.2, 3
    513 //
    514 // The source file is decomposed into preprocessing tokens and sequences of
    515 // white-space characters (including comments). A source file shall not end in
    516 // a partial preprocessing token or in a partial comment. Each comment is
    517 // replaced by one space character. New-line characters are retained. Whether
    518 // each nonempty sequence of white-space characters other than new-line is
    519 // retained or replaced by one space character is implementation-defined.
    520 func (s *scanner) translationPhase3() *ppFile {
    521 	r := &ppFile{file: s.file}
    522 	if s.file.Size() == 0 {
    523 		s.r.Reset(nil)
    524 		return r
    525 	}
    526 
    527 	s.nextLine()
    528 	r.groups = s.parseGroup()
    529 	return r
    530 }
    531 
    532 func (s *scanner) parseGroup() (r []ppGroup) {
    533 	for {
    534 		switch x := s.lookaheadLine.(type) {
    535 		case ppGroup:
    536 			r = append(r, x)
    537 			s.nextLine()
    538 		case ppIfGroupDirective:
    539 			r = append(r, s.parseIfSection())
    540 		default:
    541 			return r
    542 		}
    543 	}
    544 }
    545 
    546 func (s *scanner) parseIfSection() *ppIfSection {
    547 	return &ppIfSection{
    548 		ifGroup:    s.parseIfGroup(),
    549 		elifGroups: s.parseElifGroup(),
    550 		elseGroup:  s.parseElseGroup(),
    551 		endifLine:  s.parseEndifLine(),
    552 	}
    553 }
    554 
    555 func (s *scanner) parseEndifLine() *ppEndifDirective {
    556 	switch x := s.lookaheadLine.(type) {
    557 	case *ppEndifDirective:
    558 		s.nextLine()
    559 		return x
    560 	default:
    561 		s.errLine(x, fmt.Sprintf(": expected #endif (unexpected %T)", x))
    562 		s.nextLine()
    563 		return nil
    564 	}
    565 }
    566 
    567 func (s *scanner) parseElseGroup() *ppElseGroup {
    568 	switch x := s.lookaheadLine.(type) {
    569 	case *ppElseDirective:
    570 		r := &ppElseGroup{elseLine: x}
    571 		s.nextLine()
    572 		r.groups = s.parseGroup()
    573 		return r
    574 	default:
    575 		return nil
    576 	}
    577 }
    578 
    579 func (s *scanner) parseElifGroup() (r []*ppElifGroup) {
    580 	for {
    581 		var g ppElifGroup
    582 		switch x := s.lookaheadLine.(type) {
    583 		case *ppElifDirective:
    584 			g.elif = x
    585 			s.nextLine()
    586 			g.groups = s.parseGroup()
    587 			r = append(r, &g)
    588 		default:
    589 			return r
    590 		}
    591 	}
    592 }
    593 
    594 func (s *scanner) parseIfGroup() *ppIfGroup {
    595 	r := &ppIfGroup{}
    596 	switch x := s.lookaheadLine.(type) {
    597 	case ppIfGroupDirective:
    598 		r.directive = x
    599 	default:
    600 		s.errLine(x, fmt.Sprintf(": expected if-group (unexpected %T)", x))
    601 	}
    602 	s.nextLine()
    603 	r.groups = s.parseGroup()
    604 	return r
    605 }
    606 
    607 func (s *scanner) nextLine() {
    608 	s.tokenBuf = nil
    609 	s.lookaheadLine = s.scanLine()
    610 }
    611 
    612 func (s *scanner) scanLine() (r ppLine) {
    613 again:
    614 	toks := s.scanToNonBlankToken(nil)
    615 	if len(toks) == 0 {
    616 		return nil
    617 	}
    618 
    619 	includeNext := false
    620 	switch tok := toks[len(toks)-1]; tok.char {
    621 	case '#':
    622 		toks = s.scanToNonBlankToken(toks)
    623 		switch tok := toks[len(toks)-1]; tok.char {
    624 		case '\n':
    625 			return &ppEmptyDirective{toks: toks}
    626 		case IDENTIFIER:
    627 			switch tok.value {
    628 			case idDefine:
    629 				return s.parseDefine(toks)
    630 			case idElif:
    631 				return s.parseElif(toks)
    632 			case idElse:
    633 				return s.parseElse(toks)
    634 			case idEndif:
    635 				return s.parseEndif(toks)
    636 			case idIf:
    637 				return s.parseIf(toks)
    638 			case idIfdef:
    639 				return s.parseIfdef(toks)
    640 			case idIfndef:
    641 				return s.parseIfndef(toks)
    642 			case idIncludeNext:
    643 				includeNext = true
    644 				fallthrough
    645 			case idInclude:
    646 				// # include pp-tokens new-line
    647 				//
    648 				// Prevent aliasing of eg. <foo  bar.h> and <foo bar.h>.
    649 				save := s.preserveWhiteSpace
    650 				s.preserveWhiteSpace = true
    651 				n := len(toks)
    652 				toks := s.scanLineToEOL(toks)
    653 				r := &ppIncludeDirective{arg: toks[n : len(toks)-1], toks: toks, includeNext: includeNext}
    654 				s.preserveWhiteSpace = save
    655 				return r
    656 			case idUndef:
    657 				return s.parseUndef(toks)
    658 			case idLine:
    659 				return s.parseLine(toks)
    660 			case idError:
    661 				// # error pp-tokens_opt new-line
    662 				n := len(toks)
    663 				toks := s.scanLineToEOL(toks)
    664 				msg := toks[n : len(toks)-1]
    665 				if len(msg) != 0 && msg[0].char == ' ' {
    666 					msg = msg[1:]
    667 				}
    668 				return &ppErrorDirective{toks: toks, msg: msg}
    669 			case idPragma:
    670 				return s.parsePragma(toks)
    671 			}
    672 		}
    673 
    674 		// # non-directive
    675 		return &ppNonDirective{toks: s.scanLineToEOL(toks)}
    676 	case '\n':
    677 		return &ppTextLine{toks: toks}
    678 	case IDENTIFIER:
    679 		if tok.value == idPragmaOp {
    680 			toks = s.scanToNonBlankToken(toks)
    681 			switch tok = toks[len(toks)-1]; tok.char {
    682 			case '(':
    683 				// ok
    684 			default:
    685 				s.err(tok, "expected (")
    686 				return &ppTextLine{toks: toks}
    687 			}
    688 
    689 			var lit string
    690 			toks = s.scanToNonBlankToken(toks)
    691 			switch tok = toks[len(toks)-1]; tok.char {
    692 			case STRINGLITERAL:
    693 				lit = tok.String()
    694 			case LONGSTRINGLITERAL:
    695 				lit = tok.String()[1:] // [0], 6.9.10, 1
    696 			default:
    697 				s.err(tok, "expected string literal")
    698 				return &ppTextLine{toks: toks}
    699 			}
    700 
    701 			pos := tok.pos
    702 			toks = s.scanToNonBlankToken(toks)
    703 			switch tok = toks[len(toks)-1]; tok.char {
    704 			case ')':
    705 				// ok
    706 			default:
    707 				s.err(tok, "expected )")
    708 				return &ppTextLine{toks: toks}
    709 			}
    710 
    711 			s.unget(s.lookaheadChar)
    712 			// [0], 6.9.10, 1
    713 			lit = lit[1 : len(lit)-1]
    714 			lit = strings.ReplaceAll(lit, `\"`, `"`)
    715 			lit = strings.ReplaceAll(lit, `\\`, `\`)
    716 			lit = "#pragma " + lit + "\n"
    717 			for i := len(lit) - 1; i >= 0; i-- {
    718 				s.unget(char{pos, lit[i]})
    719 			}
    720 			goto again
    721 		}
    722 
    723 		fallthrough
    724 	default:
    725 		return &ppTextLine{toks: s.scanLineToEOL(toks)}
    726 	}
    727 }
    728 
    729 func (s *scanner) parsePragma(toks []token3) *ppPragmaDirective {
    730 	toks = s.scanToNonBlankToken(toks)
    731 	n := len(toks)
    732 	if toks[n-1].char != '\n' {
    733 		toks = s.scanLineToEOL(toks)
    734 	}
    735 	return &ppPragmaDirective{toks: toks, args: toks[n-1:]}
    736 }
    737 
    738 // # line pp-tokens new-line
    739 func (s *scanner) parseLine(toks []token3) *ppLineDirective {
    740 	toks = s.scanToNonBlankToken(toks)
    741 	switch tok := toks[len(toks)-1]; tok.char {
    742 	case '\n':
    743 		s.err(tok, "unexpected new-line")
    744 		return &ppLineDirective{toks: toks}
    745 	default:
    746 		toks := s.scanLineToEOL(toks)
    747 		last := toks[len(toks)-1]
    748 		r := &ppLineDirective{toks: toks, nextPos: int(last.pos) + len(last.src.String())}
    749 		toks = toks[:len(toks)-1] // sans new-line
    750 		toks = ltrim3(toks)
    751 		toks = toks[1:] // Skip '#'
    752 		toks = ltrim3(toks)
    753 		toks = toks[1:] // Skip "line"
    754 		r.args = ltrim3(toks)
    755 		return r
    756 	}
    757 }
    758 
    759 func ltrim3(toks []token3) []token3 {
    760 	for len(toks) != 0 && toks[0].char == ' ' {
    761 		toks = toks[1:]
    762 	}
    763 	return toks
    764 }
    765 
    766 // # undef identifier new-line
    767 func (s *scanner) parseUndef(toks []token3) *ppUndefDirective {
    768 	toks = s.scanToNonBlankToken(toks)
    769 	switch tok := toks[len(toks)-1]; tok.char {
    770 	case '\n':
    771 		s.err(&tok, "expected identifier")
    772 		return &ppUndefDirective{toks: toks}
    773 	case IDENTIFIER:
    774 		name := tok
    775 		toks = s.scanToNonBlankToken(toks)
    776 		switch tok := toks[len(toks)-1]; tok.char {
    777 		case '\n':
    778 			return &ppUndefDirective{name: name, toks: toks}
    779 		default:
    780 			if s.ctx.cfg.RejectUndefExtraTokens {
    781 				s.err(&tok, "extra tokens after #undef")
    782 			}
    783 			return &ppUndefDirective{name: name, toks: s.scanLineToEOL(toks)}
    784 		}
    785 	default:
    786 		s.err(&tok, "expected identifier")
    787 		return &ppUndefDirective{toks: s.scanLineToEOL(toks)}
    788 	}
    789 }
    790 
    791 func (s *scanner) scanLineToEOL(toks []token3) []token3 {
    792 	n := len(s.tokenBuf) - len(toks)
    793 	for {
    794 		s.lex()
    795 		s.tokenBuf = append(s.tokenBuf, s.tok)
    796 		if s.tok.char == '\n' {
    797 			return s.tokenBuf[n:]
    798 		}
    799 	}
    800 }
    801 
    802 // # ifndef identifier new-line
    803 func (s *scanner) parseIfndef(toks []token3) *ppIfndefDirective {
    804 	var name StringID
    805 	toks = s.scanToNonBlankToken(toks)
    806 	switch tok := toks[len(toks)-1]; tok.char {
    807 	case IDENTIFIER:
    808 		name = tok.value
    809 		toks = s.scanToNonBlankToken(toks)
    810 		switch tok := toks[len(toks)-1]; tok.char {
    811 		case '\n':
    812 			return &ppIfndefDirective{name: name, toks: toks}
    813 		default:
    814 			if s.ctx.cfg.RejectIfndefExtraTokens {
    815 				s.err(&tok, "extra tokens after #ifndef")
    816 			}
    817 			return &ppIfndefDirective{name: name, toks: s.scanLineToEOL(toks)}
    818 		}
    819 	case '\n':
    820 		s.err(tok, "expected identifier")
    821 		return &ppIfndefDirective{name: name, toks: toks}
    822 	default:
    823 		s.err(tok, "expected identifier")
    824 		return &ppIfndefDirective{name: name, toks: s.scanLineToEOL(toks)}
    825 	}
    826 }
    827 
    828 // # ifdef identifier new-line
    829 func (s *scanner) parseIfdef(toks []token3) *ppIfdefDirective {
    830 	var name StringID
    831 	toks = s.scanToNonBlankToken(toks)
    832 	switch tok := toks[len(toks)-1]; tok.char {
    833 	case IDENTIFIER:
    834 		name = tok.value
    835 		toks = s.scanToNonBlankToken(toks)
    836 		switch tok := toks[len(toks)-1]; tok.char {
    837 		case '\n':
    838 			return &ppIfdefDirective{name: name, toks: toks}
    839 		default:
    840 			if s.ctx.cfg.RejectIfdefExtraTokens {
    841 				s.err(&tok, "extra tokens after #ifdef")
    842 			}
    843 			return &ppIfdefDirective{name: name, toks: s.scanLineToEOL(toks)}
    844 		}
    845 	case '\n':
    846 		s.err(tok, "expected identifier")
    847 		return &ppIfdefDirective{name: name, toks: toks}
    848 	default:
    849 		s.err(tok, "expected identifier")
    850 		return &ppIfdefDirective{name: name, toks: s.scanLineToEOL(toks)}
    851 	}
    852 }
    853 
    854 // # if constant-expression new-line
    855 func (s *scanner) parseIf(toks []token3) *ppIfDirective {
    856 	n := len(toks)
    857 	toks = s.scanToNonBlankToken(toks)
    858 	switch tok := toks[len(toks)-1]; tok.char {
    859 	case '\n':
    860 		s.err(tok, "expected expression")
    861 		return &ppIfDirective{toks: toks}
    862 	default:
    863 		toks = s.scanLineToEOL(toks)
    864 		expr := toks[n:]
    865 		if expr[0].char == ' ' { // sans leading space
    866 			expr = expr[1:]
    867 		}
    868 		expr = expr[:len(expr)-1] // sans '\n'
    869 		return &ppIfDirective{toks: toks, expr: expr}
    870 	}
    871 }
    872 
    873 // # endif new-line
    874 func (s *scanner) parseEndif(toks []token3) *ppEndifDirective {
    875 	toks = s.scanToNonBlankToken(toks)
    876 	switch tok := toks[len(toks)-1]; tok.char {
    877 	case '\n':
    878 		return &ppEndifDirective{toks}
    879 	default:
    880 		if s.ctx.cfg.RejectEndifExtraTokens {
    881 			s.err(&tok, "extra tokens after #else")
    882 		}
    883 		return &ppEndifDirective{s.scanLineToEOL(toks)}
    884 	}
    885 }
    886 
    887 // # else new-line
    888 func (s *scanner) parseElse(toks []token3) *ppElseDirective {
    889 	toks = s.scanToNonBlankToken(toks)
    890 	switch tok := toks[len(toks)-1]; tok.char {
    891 	case '\n':
    892 		return &ppElseDirective{toks}
    893 	default:
    894 		if s.ctx.cfg.RejectElseExtraTokens {
    895 			s.err(&tok, "extra tokens after #else")
    896 		}
    897 		return &ppElseDirective{s.scanLineToEOL(toks)}
    898 	}
    899 }
    900 
    901 // # elif constant-expression new-line
    902 func (s *scanner) parseElif(toks []token3) *ppElifDirective {
    903 	n := len(toks)
    904 	toks = s.scanToNonBlankToken(toks)
    905 	switch tok := toks[len(toks)-1]; tok.char {
    906 	case '\n':
    907 		s.err(tok, "expected expression")
    908 		return &ppElifDirective{toks, nil}
    909 	default:
    910 		toks = s.scanLineToEOL(toks)
    911 		expr := toks[n:]
    912 		if expr[0].char == ' ' { // sans leading space
    913 			expr = expr[1:]
    914 		}
    915 		expr = expr[:len(expr)-1] // sans '\n'
    916 		return &ppElifDirective{toks, expr}
    917 	}
    918 }
    919 
    920 func (s *scanner) parseDefine(toks []token3) ppLine {
    921 	toks = s.scanToNonBlankToken(toks)
    922 	switch tok := toks[len(toks)-1]; tok.char {
    923 	case IDENTIFIER:
    924 		name := tok
    925 		n := len(toks)
    926 		toks = s.scanToNonBlankToken(toks)
    927 		switch tok := toks[len(toks)-1]; tok.char {
    928 		case '\n':
    929 			return &ppDefineObjectMacroDirective{name: name, toks: toks}
    930 		case '(':
    931 			if toks[n].char == ' ' {
    932 				return s.parseDefineObjectMacro(n, name, toks)
    933 			}
    934 
    935 			return s.parseDefineFunctionMacro(name, toks)
    936 		default:
    937 			return s.parseDefineObjectMacro(n, name, toks)
    938 		}
    939 	case '\n':
    940 		s.err(tok, "expected identifier")
    941 		return &ppDefineObjectMacroDirective{toks: toks}
    942 	default:
    943 		s.err(tok, "expected identifier")
    944 		return &ppDefineObjectMacroDirective{toks: s.scanLineToEOL(toks)}
    945 	}
    946 }
    947 
    948 // # define identifier lparen identifier-list_opt ) replacement-list new-line
    949 // # define identifier lparen ... ) replacement-list new-line
    950 // # define identifier lparen identifier-list , ... ) replacement-list new-line
    951 func (s *scanner) parseDefineFunctionMacro(name token3, toks []token3) *ppDefineFunctionMacroDirective {
    952 	// Parse parameters after "#define name(".
    953 	var list []token3
    954 	variadic := false
    955 	namedVariadic := false
    956 again:
    957 	toks = s.scanToNonBlankToken(toks)
    958 	switch tok := toks[len(toks)-1]; tok.char {
    959 	case IDENTIFIER:
    960 	more:
    961 		list = append(list, tok)
    962 		toks = s.scanToNonBlankToken(toks)
    963 		switch tok = toks[len(toks)-1]; tok.char {
    964 		case ',':
    965 			toks = s.scanToNonBlankToken(toks)
    966 			switch tok = toks[len(toks)-1]; tok.char {
    967 			case IDENTIFIER:
    968 				goto more
    969 			case DDD:
    970 				if toks, variadic = s.parseDDD(toks); !variadic {
    971 					goto again
    972 				}
    973 			case ')':
    974 				s.err(tok, "expected parameter name")
    975 			default:
    976 				s.err(tok, "unexpected %q", &tok)
    977 			}
    978 		case DDD:
    979 			namedVariadic = true
    980 			if s.ctx.cfg.RejectInvalidVariadicMacros {
    981 				s.err(tok, "expected comma")
    982 			}
    983 			if toks, variadic = s.parseDDD(toks); !variadic {
    984 				goto again
    985 			}
    986 		case ')':
    987 			// ok
    988 		case '\n':
    989 			s.err(tok, "unexpected new-line")
    990 			return &ppDefineFunctionMacroDirective{toks: toks}
    991 		case IDENTIFIER:
    992 			s.err(tok, "expected comma")
    993 			goto more
    994 		default:
    995 			s.err(tok, "unexpected %q", &tok)
    996 		}
    997 	case DDD:
    998 		if toks, variadic = s.parseDDD(toks); !variadic {
    999 			goto again
   1000 		}
   1001 	case ',':
   1002 		s.err(tok, "expected parameter name")
   1003 		goto again
   1004 	case ')':
   1005 		// ok
   1006 	default:
   1007 		s.err(tok, "expected parameter name")
   1008 		goto again
   1009 	}
   1010 	// Parse replacement list.
   1011 	n := len(toks)
   1012 	toks = s.scanToNonBlankToken(toks)
   1013 	switch tok := toks[len(toks)-1]; tok.char {
   1014 	case '\n':
   1015 		if s.ctx.cfg.RejectFunctionMacroEmptyReplacementList {
   1016 			s.err(tok, "expected replacement list")
   1017 		}
   1018 		return &ppDefineFunctionMacroDirective{name: name, identifierList: list, toks: toks, variadic: variadic, namedVariadic: namedVariadic}
   1019 	default:
   1020 		toks = s.scanLineToEOL(toks)
   1021 		repl := toks[n:]          // sans #define identifier
   1022 		repl = repl[:len(repl)-1] // sans '\n'
   1023 		// 6.10.3, 7
   1024 		//
   1025 		// Any white-space characters preceding or following the
   1026 		// replacement list of preprocessing tokens are not considered
   1027 		// part of the replacement list for either form of macro.
   1028 		repl = trim3(repl)
   1029 		repl = normalizeHashes(repl)
   1030 		return &ppDefineFunctionMacroDirective{name: name, identifierList: list, toks: toks, replacementList: repl, variadic: variadic, namedVariadic: namedVariadic}
   1031 	}
   1032 }
   1033 
   1034 func isWhite(char rune) bool {
   1035 	switch char {
   1036 	case ' ', '\t', '\n', '\v', '\f':
   1037 		return true
   1038 	}
   1039 	return false
   1040 }
   1041 
   1042 func trim3(toks []token3) []token3 {
   1043 	for len(toks) != 0 && isWhite(toks[0].char) {
   1044 		toks = toks[1:]
   1045 	}
   1046 	for len(toks) != 0 && isWhite(toks[len(toks)-1].char) {
   1047 		toks = toks[:len(toks)-1]
   1048 	}
   1049 	return toks
   1050 }
   1051 
   1052 func normalizeHashes(toks []token3) []token3 {
   1053 	w := 0
   1054 	var last rune
   1055 	for _, v := range toks {
   1056 		switch {
   1057 		case v.char == PPPASTE:
   1058 			if isWhite(last) {
   1059 				w--
   1060 			}
   1061 		case isWhite(v.char):
   1062 			if last == '#' || last == PPPASTE {
   1063 				continue
   1064 			}
   1065 		}
   1066 		last = v.char
   1067 		toks[w] = v
   1068 		w++
   1069 	}
   1070 	return toks[:w]
   1071 }
   1072 
   1073 func (s *scanner) parseDDD(toks []token3) ([]token3, bool) {
   1074 	toks = s.scanToNonBlankToken(toks)
   1075 	switch tok := toks[len(toks)-1]; tok.char {
   1076 	case ')':
   1077 		return toks, true
   1078 	default:
   1079 		s.err(tok, "expected right parenthesis")
   1080 		return toks, false
   1081 	}
   1082 }
   1083 
   1084 // # define identifier replacement-list new-line
   1085 func (s *scanner) parseDefineObjectMacro(n int, name token3, toks []token3) *ppDefineObjectMacroDirective {
   1086 	toks = s.scanLineToEOL(toks)
   1087 	repl := toks[n:]          // sans #define identifier
   1088 	repl = repl[:len(repl)-1] // sans '\n'
   1089 	// 6.10.3, 7
   1090 	//
   1091 	// Any white-space characters preceding or following the replacement
   1092 	// list of preprocessing tokens are not considered part of the
   1093 	// replacement list for either form of macro.
   1094 	repl = trim3(repl)
   1095 	repl = normalizeHashes(repl)
   1096 	return &ppDefineObjectMacroDirective{name: name, toks: toks, replacementList: repl}
   1097 }
   1098 
   1099 // Return {}, {x} or {' ', x}
   1100 func (s *scanner) scanToNonBlankToken(toks []token3) []token3 {
   1101 	n := len(s.tokenBuf) - len(toks)
   1102 	for {
   1103 		s.lex()
   1104 		if s.tok.char < 0 {
   1105 			return s.tokenBuf[n:]
   1106 		}
   1107 
   1108 		s.tokenBuf = append(s.tokenBuf, s.tok)
   1109 		if s.tok.char != ' ' {
   1110 			return s.tokenBuf[n:]
   1111 		}
   1112 	}
   1113 }
   1114 
   1115 // ---------------------------------------------------------------------- Cache
   1116 
   1117 // Translation phase4 source.
   1118 type source interface {
   1119 	ppFile() (*ppFile, error)
   1120 }
   1121 
   1122 type cachedPPFile struct {
   1123 	err     error
   1124 	errs    goscanner.ErrorList
   1125 	modTime int64 // time.Time.UnixNano()
   1126 	pf      *ppFile
   1127 	readyCh chan struct{}
   1128 	size    int
   1129 }
   1130 
   1131 func (c *cachedPPFile) ready() *cachedPPFile            { close(c.readyCh); return c }
   1132 func (c *cachedPPFile) waitFor() (*cachedPPFile, error) { <-c.readyCh; return c, c.err }
   1133 
   1134 func (c *cachedPPFile) ppFile() (*ppFile, error) {
   1135 	c.waitFor()
   1136 	if c.err == nil {
   1137 		return c.pf, nil
   1138 	}
   1139 
   1140 	return nil, c.err
   1141 }
   1142 
   1143 type cacheKey struct {
   1144 	name  StringID
   1145 	sys   bool
   1146 	value StringID
   1147 	Config3
   1148 }
   1149 
   1150 type ppCache struct {
   1151 	mu sync.RWMutex
   1152 	m  map[cacheKey]*cachedPPFile
   1153 }
   1154 
   1155 func newPPCache() *ppCache { return &ppCache{m: map[cacheKey]*cachedPPFile{}} }
   1156 
   1157 func (c *ppCache) get(ctx *context, src Source) (source, error) {
   1158 	if src.Value != "" {
   1159 		return c.getValue(ctx, src.Name, src.Value, false, src.DoNotCache)
   1160 	}
   1161 
   1162 	return c.getFile(ctx, src.Name, false, src.DoNotCache)
   1163 }
   1164 
   1165 func (c *ppCache) getFile(ctx *context, name string, sys bool, doNotCache bool) (*cachedPPFile, error) {
   1166 	fi, err := ctx.statFile(name, sys)
   1167 	if err != nil {
   1168 		return nil, err
   1169 	}
   1170 
   1171 	if !fi.Mode().IsRegular() {
   1172 		return nil, fmt.Errorf("%s is not a regular file", name)
   1173 	}
   1174 
   1175 	if fi.Size() > mathutil.MaxInt {
   1176 		return nil, fmt.Errorf("%s: file too big", name)
   1177 	}
   1178 
   1179 	size := int(fi.Size())
   1180 	if !filepath.IsAbs(name) { // Never cache relative paths
   1181 		f, err := ctx.openFile(name, sys)
   1182 		if err != nil {
   1183 			return nil, err
   1184 		}
   1185 
   1186 		defer f.Close()
   1187 
   1188 		tf := tokenNewFile(name, size)
   1189 		ppFile := newScanner(ctx, f, tf).translationPhase3()
   1190 		cf := &cachedPPFile{pf: ppFile, readyCh: make(chan struct{})}
   1191 		cf.ready()
   1192 		return cf, nil
   1193 	}
   1194 
   1195 	modTime := fi.ModTime().UnixNano()
   1196 	key := cacheKey{dict.sid(name), sys, 0, ctx.cfg.Config3}
   1197 	c.mu.Lock()
   1198 	if cf, ok := c.m[key]; ok {
   1199 		if modTime <= cf.modTime && size == cf.size {
   1200 			c.mu.Unlock()
   1201 			if cf.err != nil {
   1202 				return nil, cf.err
   1203 			}
   1204 
   1205 			r, err := cf.waitFor()
   1206 			ctx.errs(cf.errs)
   1207 			return r, err
   1208 		}
   1209 
   1210 		delete(c.m, key)
   1211 	}
   1212 
   1213 	tf := tokenNewFile(name, size)
   1214 	cf := &cachedPPFile{modTime: modTime, size: size, readyCh: make(chan struct{})}
   1215 	if !doNotCache {
   1216 		c.m[key] = cf
   1217 	}
   1218 	c.mu.Unlock()
   1219 
   1220 	go func() {
   1221 		defer cf.ready()
   1222 
   1223 		f, err := ctx.openFile(name, sys)
   1224 		if err != nil {
   1225 			cf.err = err
   1226 			return
   1227 		}
   1228 
   1229 		defer f.Close()
   1230 
   1231 		ctx2 := newContext(ctx.cfg)
   1232 		cf.pf = newScanner(ctx2, f, tf).translationPhase3()
   1233 		cf.errs = ctx2.ErrorList
   1234 		ctx.errs(cf.errs)
   1235 	}()
   1236 
   1237 	return cf.waitFor()
   1238 }
   1239 
   1240 func (c *ppCache) getValue(ctx *context, name, value string, sys bool, doNotCache bool) (*cachedPPFile, error) {
   1241 	key := cacheKey{dict.sid(name), sys, dict.sid(value), ctx.cfg.Config3}
   1242 	c.mu.Lock()
   1243 	if cf, ok := c.m[key]; ok {
   1244 		c.mu.Unlock()
   1245 		if cf.err != nil {
   1246 			return nil, cf.err
   1247 		}
   1248 
   1249 		r, err := cf.waitFor()
   1250 		ctx.errs(cf.errs)
   1251 		return r, err
   1252 	}
   1253 
   1254 	tf := tokenNewFile(name, len(value))
   1255 	cf := &cachedPPFile{readyCh: make(chan struct{})}
   1256 	if !doNotCache {
   1257 		c.m[key] = cf
   1258 	}
   1259 	c.mu.Unlock()
   1260 	ctx2 := newContext(ctx.cfg)
   1261 	cf.pf = newScanner(ctx2, strings.NewReader(value), tf).translationPhase3()
   1262 	cf.errs = ctx2.ErrorList
   1263 	ctx.errs(cf.errs)
   1264 	cf.ready()
   1265 	return cf.waitFor()
   1266 }