gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

scanner.go (7157B)


      1 package unstable
      2 
      3 import "github.com/pelletier/go-toml/v2/internal/characters"
      4 
      5 func scanFollows(b []byte, pattern string) bool {
      6 	n := len(pattern)
      7 
      8 	return len(b) >= n && string(b[:n]) == pattern
      9 }
     10 
     11 func scanFollowsMultilineBasicStringDelimiter(b []byte) bool {
     12 	return scanFollows(b, `"""`)
     13 }
     14 
     15 func scanFollowsMultilineLiteralStringDelimiter(b []byte) bool {
     16 	return scanFollows(b, `'''`)
     17 }
     18 
     19 func scanFollowsTrue(b []byte) bool {
     20 	return scanFollows(b, `true`)
     21 }
     22 
     23 func scanFollowsFalse(b []byte) bool {
     24 	return scanFollows(b, `false`)
     25 }
     26 
     27 func scanFollowsInf(b []byte) bool {
     28 	return scanFollows(b, `inf`)
     29 }
     30 
     31 func scanFollowsNan(b []byte) bool {
     32 	return scanFollows(b, `nan`)
     33 }
     34 
     35 func scanUnquotedKey(b []byte) ([]byte, []byte) {
     36 	// unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
     37 	for i := 0; i < len(b); i++ {
     38 		if !isUnquotedKeyChar(b[i]) {
     39 			return b[:i], b[i:]
     40 		}
     41 	}
     42 
     43 	return b, b[len(b):]
     44 }
     45 
     46 func isUnquotedKeyChar(r byte) bool {
     47 	return (r >= 'A' && r <= 'Z') || (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '-' || r == '_'
     48 }
     49 
     50 func scanLiteralString(b []byte) ([]byte, []byte, error) {
     51 	// literal-string = apostrophe *literal-char apostrophe
     52 	// apostrophe = %x27 ; ' apostrophe
     53 	// literal-char = %x09 / %x20-26 / %x28-7E / non-ascii
     54 	for i := 1; i < len(b); {
     55 		switch b[i] {
     56 		case '\'':
     57 			return b[:i+1], b[i+1:], nil
     58 		case '\n', '\r':
     59 			return nil, nil, NewParserError(b[i:i+1], "literal strings cannot have new lines")
     60 		}
     61 		size := characters.Utf8ValidNext(b[i:])
     62 		if size == 0 {
     63 			return nil, nil, NewParserError(b[i:i+1], "invalid character")
     64 		}
     65 		i += size
     66 	}
     67 
     68 	return nil, nil, NewParserError(b[len(b):], "unterminated literal string")
     69 }
     70 
     71 func scanMultilineLiteralString(b []byte) ([]byte, []byte, error) {
     72 	// ml-literal-string = ml-literal-string-delim [ newline ] ml-literal-body
     73 	// ml-literal-string-delim
     74 	// ml-literal-string-delim = 3apostrophe
     75 	// ml-literal-body = *mll-content *( mll-quotes 1*mll-content ) [ mll-quotes ]
     76 	//
     77 	// mll-content = mll-char / newline
     78 	// mll-char = %x09 / %x20-26 / %x28-7E / non-ascii
     79 	// mll-quotes = 1*2apostrophe
     80 	for i := 3; i < len(b); {
     81 		switch b[i] {
     82 		case '\'':
     83 			if scanFollowsMultilineLiteralStringDelimiter(b[i:]) {
     84 				i += 3
     85 
     86 				// At that point we found 3 apostrophe, and i is the
     87 				// index of the byte after the third one. The scanner
     88 				// needs to be eager, because there can be an extra 2
     89 				// apostrophe that can be accepted at the end of the
     90 				// string.
     91 
     92 				if i >= len(b) || b[i] != '\'' {
     93 					return b[:i], b[i:], nil
     94 				}
     95 				i++
     96 
     97 				if i >= len(b) || b[i] != '\'' {
     98 					return b[:i], b[i:], nil
     99 				}
    100 				i++
    101 
    102 				if i < len(b) && b[i] == '\'' {
    103 					return nil, nil, NewParserError(b[i-3:i+1], "''' not allowed in multiline literal string")
    104 				}
    105 
    106 				return b[:i], b[i:], nil
    107 			}
    108 		case '\r':
    109 			if len(b) < i+2 {
    110 				return nil, nil, NewParserError(b[len(b):], `need a \n after \r`)
    111 			}
    112 			if b[i+1] != '\n' {
    113 				return nil, nil, NewParserError(b[i:i+2], `need a \n after \r`)
    114 			}
    115 			i += 2 // skip the \n
    116 			continue
    117 		}
    118 		size := characters.Utf8ValidNext(b[i:])
    119 		if size == 0 {
    120 			return nil, nil, NewParserError(b[i:i+1], "invalid character")
    121 		}
    122 		i += size
    123 	}
    124 
    125 	return nil, nil, NewParserError(b[len(b):], `multiline literal string not terminated by '''`)
    126 }
    127 
    128 func scanWindowsNewline(b []byte) ([]byte, []byte, error) {
    129 	const lenCRLF = 2
    130 	if len(b) < lenCRLF {
    131 		return nil, nil, NewParserError(b, "windows new line expected")
    132 	}
    133 
    134 	if b[1] != '\n' {
    135 		return nil, nil, NewParserError(b, `windows new line should be \r\n`)
    136 	}
    137 
    138 	return b[:lenCRLF], b[lenCRLF:], nil
    139 }
    140 
    141 func scanWhitespace(b []byte) ([]byte, []byte) {
    142 	for i := 0; i < len(b); i++ {
    143 		switch b[i] {
    144 		case ' ', '\t':
    145 			continue
    146 		default:
    147 			return b[:i], b[i:]
    148 		}
    149 	}
    150 
    151 	return b, b[len(b):]
    152 }
    153 
    154 func scanComment(b []byte) ([]byte, []byte, error) {
    155 	// comment-start-symbol = %x23 ; #
    156 	// non-ascii = %x80-D7FF / %xE000-10FFFF
    157 	// non-eol = %x09 / %x20-7F / non-ascii
    158 	//
    159 	// comment = comment-start-symbol *non-eol
    160 
    161 	for i := 1; i < len(b); {
    162 		if b[i] == '\n' {
    163 			return b[:i], b[i:], nil
    164 		}
    165 		if b[i] == '\r' {
    166 			if i+1 < len(b) && b[i+1] == '\n' {
    167 				return b[:i+1], b[i+1:], nil
    168 			}
    169 			return nil, nil, NewParserError(b[i:i+1], "invalid character in comment")
    170 		}
    171 		size := characters.Utf8ValidNext(b[i:])
    172 		if size == 0 {
    173 			return nil, nil, NewParserError(b[i:i+1], "invalid character in comment")
    174 		}
    175 
    176 		i += size
    177 	}
    178 
    179 	return b, b[len(b):], nil
    180 }
    181 
    182 func scanBasicString(b []byte) ([]byte, bool, []byte, error) {
    183 	// basic-string = quotation-mark *basic-char quotation-mark
    184 	// quotation-mark = %x22            ; "
    185 	// basic-char = basic-unescaped / escaped
    186 	// basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
    187 	// escaped = escape escape-seq-char
    188 	escaped := false
    189 	i := 1
    190 
    191 	for ; i < len(b); i++ {
    192 		switch b[i] {
    193 		case '"':
    194 			return b[:i+1], escaped, b[i+1:], nil
    195 		case '\n', '\r':
    196 			return nil, escaped, nil, NewParserError(b[i:i+1], "basic strings cannot have new lines")
    197 		case '\\':
    198 			if len(b) < i+2 {
    199 				return nil, escaped, nil, NewParserError(b[i:i+1], "need a character after \\")
    200 			}
    201 			escaped = true
    202 			i++ // skip the next character
    203 		}
    204 	}
    205 
    206 	return nil, escaped, nil, NewParserError(b[len(b):], `basic string not terminated by "`)
    207 }
    208 
    209 func scanMultilineBasicString(b []byte) ([]byte, bool, []byte, error) {
    210 	// ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body
    211 	// ml-basic-string-delim
    212 	// ml-basic-string-delim = 3quotation-mark
    213 	// ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ]
    214 	//
    215 	// mlb-content = mlb-char / newline / mlb-escaped-nl
    216 	// mlb-char = mlb-unescaped / escaped
    217 	// mlb-quotes = 1*2quotation-mark
    218 	// mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
    219 	// mlb-escaped-nl = escape ws newline *( wschar / newline )
    220 
    221 	escaped := false
    222 	i := 3
    223 
    224 	for ; i < len(b); i++ {
    225 		switch b[i] {
    226 		case '"':
    227 			if scanFollowsMultilineBasicStringDelimiter(b[i:]) {
    228 				i += 3
    229 
    230 				// At that point we found 3 apostrophe, and i is the
    231 				// index of the byte after the third one. The scanner
    232 				// needs to be eager, because there can be an extra 2
    233 				// apostrophe that can be accepted at the end of the
    234 				// string.
    235 
    236 				if i >= len(b) || b[i] != '"' {
    237 					return b[:i], escaped, b[i:], nil
    238 				}
    239 				i++
    240 
    241 				if i >= len(b) || b[i] != '"' {
    242 					return b[:i], escaped, b[i:], nil
    243 				}
    244 				i++
    245 
    246 				if i < len(b) && b[i] == '"' {
    247 					return nil, escaped, nil, NewParserError(b[i-3:i+1], `""" not allowed in multiline basic string`)
    248 				}
    249 
    250 				return b[:i], escaped, b[i:], nil
    251 			}
    252 		case '\\':
    253 			if len(b) < i+2 {
    254 				return nil, escaped, nil, NewParserError(b[len(b):], "need a character after \\")
    255 			}
    256 			escaped = true
    257 			i++ // skip the next character
    258 		case '\r':
    259 			if len(b) < i+2 {
    260 				return nil, escaped, nil, NewParserError(b[len(b):], `need a \n after \r`)
    261 			}
    262 			if b[i+1] != '\n' {
    263 				return nil, escaped, nil, NewParserError(b[i:i+2], `need a \n after \r`)
    264 			}
    265 			i++ // skip the \n
    266 		}
    267 	}
    268 
    269 	return nil, escaped, nil, NewParserError(b[len(b):], `multiline basic string not terminated by """`)
    270 }