scanner.go (7157B)
1 package unstable 2 3 import "github.com/pelletier/go-toml/v2/internal/characters" 4 5 func scanFollows(b []byte, pattern string) bool { 6 n := len(pattern) 7 8 return len(b) >= n && string(b[:n]) == pattern 9 } 10 11 func scanFollowsMultilineBasicStringDelimiter(b []byte) bool { 12 return scanFollows(b, `"""`) 13 } 14 15 func scanFollowsMultilineLiteralStringDelimiter(b []byte) bool { 16 return scanFollows(b, `'''`) 17 } 18 19 func scanFollowsTrue(b []byte) bool { 20 return scanFollows(b, `true`) 21 } 22 23 func scanFollowsFalse(b []byte) bool { 24 return scanFollows(b, `false`) 25 } 26 27 func scanFollowsInf(b []byte) bool { 28 return scanFollows(b, `inf`) 29 } 30 31 func scanFollowsNan(b []byte) bool { 32 return scanFollows(b, `nan`) 33 } 34 35 func scanUnquotedKey(b []byte) ([]byte, []byte) { 36 // unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _ 37 for i := 0; i < len(b); i++ { 38 if !isUnquotedKeyChar(b[i]) { 39 return b[:i], b[i:] 40 } 41 } 42 43 return b, b[len(b):] 44 } 45 46 func isUnquotedKeyChar(r byte) bool { 47 return (r >= 'A' && r <= 'Z') || (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '-' || r == '_' 48 } 49 50 func scanLiteralString(b []byte) ([]byte, []byte, error) { 51 // literal-string = apostrophe *literal-char apostrophe 52 // apostrophe = %x27 ; ' apostrophe 53 // literal-char = %x09 / %x20-26 / %x28-7E / non-ascii 54 for i := 1; i < len(b); { 55 switch b[i] { 56 case '\'': 57 return b[:i+1], b[i+1:], nil 58 case '\n', '\r': 59 return nil, nil, NewParserError(b[i:i+1], "literal strings cannot have new lines") 60 } 61 size := characters.Utf8ValidNext(b[i:]) 62 if size == 0 { 63 return nil, nil, NewParserError(b[i:i+1], "invalid character") 64 } 65 i += size 66 } 67 68 return nil, nil, NewParserError(b[len(b):], "unterminated literal string") 69 } 70 71 func scanMultilineLiteralString(b []byte) ([]byte, []byte, error) { 72 // ml-literal-string = ml-literal-string-delim [ newline ] ml-literal-body 73 // ml-literal-string-delim 74 // ml-literal-string-delim = 3apostrophe 75 // ml-literal-body = *mll-content *( mll-quotes 1*mll-content ) [ mll-quotes ] 76 // 77 // mll-content = mll-char / newline 78 // mll-char = %x09 / %x20-26 / %x28-7E / non-ascii 79 // mll-quotes = 1*2apostrophe 80 for i := 3; i < len(b); { 81 switch b[i] { 82 case '\'': 83 if scanFollowsMultilineLiteralStringDelimiter(b[i:]) { 84 i += 3 85 86 // At that point we found 3 apostrophe, and i is the 87 // index of the byte after the third one. The scanner 88 // needs to be eager, because there can be an extra 2 89 // apostrophe that can be accepted at the end of the 90 // string. 91 92 if i >= len(b) || b[i] != '\'' { 93 return b[:i], b[i:], nil 94 } 95 i++ 96 97 if i >= len(b) || b[i] != '\'' { 98 return b[:i], b[i:], nil 99 } 100 i++ 101 102 if i < len(b) && b[i] == '\'' { 103 return nil, nil, NewParserError(b[i-3:i+1], "''' not allowed in multiline literal string") 104 } 105 106 return b[:i], b[i:], nil 107 } 108 case '\r': 109 if len(b) < i+2 { 110 return nil, nil, NewParserError(b[len(b):], `need a \n after \r`) 111 } 112 if b[i+1] != '\n' { 113 return nil, nil, NewParserError(b[i:i+2], `need a \n after \r`) 114 } 115 i += 2 // skip the \n 116 continue 117 } 118 size := characters.Utf8ValidNext(b[i:]) 119 if size == 0 { 120 return nil, nil, NewParserError(b[i:i+1], "invalid character") 121 } 122 i += size 123 } 124 125 return nil, nil, NewParserError(b[len(b):], `multiline literal string not terminated by '''`) 126 } 127 128 func scanWindowsNewline(b []byte) ([]byte, []byte, error) { 129 const lenCRLF = 2 130 if len(b) < lenCRLF { 131 return nil, nil, NewParserError(b, "windows new line expected") 132 } 133 134 if b[1] != '\n' { 135 return nil, nil, NewParserError(b, `windows new line should be \r\n`) 136 } 137 138 return b[:lenCRLF], b[lenCRLF:], nil 139 } 140 141 func scanWhitespace(b []byte) ([]byte, []byte) { 142 for i := 0; i < len(b); i++ { 143 switch b[i] { 144 case ' ', '\t': 145 continue 146 default: 147 return b[:i], b[i:] 148 } 149 } 150 151 return b, b[len(b):] 152 } 153 154 func scanComment(b []byte) ([]byte, []byte, error) { 155 // comment-start-symbol = %x23 ; # 156 // non-ascii = %x80-D7FF / %xE000-10FFFF 157 // non-eol = %x09 / %x20-7F / non-ascii 158 // 159 // comment = comment-start-symbol *non-eol 160 161 for i := 1; i < len(b); { 162 if b[i] == '\n' { 163 return b[:i], b[i:], nil 164 } 165 if b[i] == '\r' { 166 if i+1 < len(b) && b[i+1] == '\n' { 167 return b[:i+1], b[i+1:], nil 168 } 169 return nil, nil, NewParserError(b[i:i+1], "invalid character in comment") 170 } 171 size := characters.Utf8ValidNext(b[i:]) 172 if size == 0 { 173 return nil, nil, NewParserError(b[i:i+1], "invalid character in comment") 174 } 175 176 i += size 177 } 178 179 return b, b[len(b):], nil 180 } 181 182 func scanBasicString(b []byte) ([]byte, bool, []byte, error) { 183 // basic-string = quotation-mark *basic-char quotation-mark 184 // quotation-mark = %x22 ; " 185 // basic-char = basic-unescaped / escaped 186 // basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii 187 // escaped = escape escape-seq-char 188 escaped := false 189 i := 1 190 191 for ; i < len(b); i++ { 192 switch b[i] { 193 case '"': 194 return b[:i+1], escaped, b[i+1:], nil 195 case '\n', '\r': 196 return nil, escaped, nil, NewParserError(b[i:i+1], "basic strings cannot have new lines") 197 case '\\': 198 if len(b) < i+2 { 199 return nil, escaped, nil, NewParserError(b[i:i+1], "need a character after \\") 200 } 201 escaped = true 202 i++ // skip the next character 203 } 204 } 205 206 return nil, escaped, nil, NewParserError(b[len(b):], `basic string not terminated by "`) 207 } 208 209 func scanMultilineBasicString(b []byte) ([]byte, bool, []byte, error) { 210 // ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body 211 // ml-basic-string-delim 212 // ml-basic-string-delim = 3quotation-mark 213 // ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ] 214 // 215 // mlb-content = mlb-char / newline / mlb-escaped-nl 216 // mlb-char = mlb-unescaped / escaped 217 // mlb-quotes = 1*2quotation-mark 218 // mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii 219 // mlb-escaped-nl = escape ws newline *( wschar / newline ) 220 221 escaped := false 222 i := 3 223 224 for ; i < len(b); i++ { 225 switch b[i] { 226 case '"': 227 if scanFollowsMultilineBasicStringDelimiter(b[i:]) { 228 i += 3 229 230 // At that point we found 3 apostrophe, and i is the 231 // index of the byte after the third one. The scanner 232 // needs to be eager, because there can be an extra 2 233 // apostrophe that can be accepted at the end of the 234 // string. 235 236 if i >= len(b) || b[i] != '"' { 237 return b[:i], escaped, b[i:], nil 238 } 239 i++ 240 241 if i >= len(b) || b[i] != '"' { 242 return b[:i], escaped, b[i:], nil 243 } 244 i++ 245 246 if i < len(b) && b[i] == '"' { 247 return nil, escaped, nil, NewParserError(b[i-3:i+1], `""" not allowed in multiline basic string`) 248 } 249 250 return b[:i], escaped, b[i:], nil 251 } 252 case '\\': 253 if len(b) < i+2 { 254 return nil, escaped, nil, NewParserError(b[len(b):], "need a character after \\") 255 } 256 escaped = true 257 i++ // skip the next character 258 case '\r': 259 if len(b) < i+2 { 260 return nil, escaped, nil, NewParserError(b[len(b):], `need a \n after \r`) 261 } 262 if b[i+1] != '\n' { 263 return nil, escaped, nil, NewParserError(b[i:i+2], `need a \n after \r`) 264 } 265 i++ // skip the \n 266 } 267 } 268 269 return nil, escaped, nil, NewParserError(b[len(b):], `multiline basic string not terminated by """`) 270 }