parser.go (13060B)
1 // Copyright 2015 Unknwon 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"): you may 4 // not use this file except in compliance with the License. You may obtain 5 // a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12 // License for the specific language governing permissions and limitations 13 // under the License. 14 15 package ini 16 17 import ( 18 "bufio" 19 "bytes" 20 "fmt" 21 "io" 22 "regexp" 23 "strconv" 24 "strings" 25 "unicode" 26 ) 27 28 const minReaderBufferSize = 4096 29 30 var pythonMultiline = regexp.MustCompile(`^([\t\f ]+)(.*)`) 31 32 type parserOptions struct { 33 IgnoreContinuation bool 34 IgnoreInlineComment bool 35 AllowPythonMultilineValues bool 36 SpaceBeforeInlineComment bool 37 UnescapeValueDoubleQuotes bool 38 UnescapeValueCommentSymbols bool 39 PreserveSurroundedQuote bool 40 DebugFunc DebugFunc 41 ReaderBufferSize int 42 } 43 44 type parser struct { 45 buf *bufio.Reader 46 options parserOptions 47 48 isEOF bool 49 count int 50 comment *bytes.Buffer 51 } 52 53 func (p *parser) debug(format string, args ...interface{}) { 54 if p.options.DebugFunc != nil { 55 p.options.DebugFunc(fmt.Sprintf(format, args...)) 56 } 57 } 58 59 func newParser(r io.Reader, opts parserOptions) *parser { 60 size := opts.ReaderBufferSize 61 if size < minReaderBufferSize { 62 size = minReaderBufferSize 63 } 64 65 return &parser{ 66 buf: bufio.NewReaderSize(r, size), 67 options: opts, 68 count: 1, 69 comment: &bytes.Buffer{}, 70 } 71 } 72 73 // BOM handles header of UTF-8, UTF-16 LE and UTF-16 BE's BOM format. 74 // http://en.wikipedia.org/wiki/Byte_order_mark#Representations_of_byte_order_marks_by_encoding 75 func (p *parser) BOM() error { 76 mask, err := p.buf.Peek(2) 77 if err != nil && err != io.EOF { 78 return err 79 } else if len(mask) < 2 { 80 return nil 81 } 82 83 switch { 84 case mask[0] == 254 && mask[1] == 255: 85 fallthrough 86 case mask[0] == 255 && mask[1] == 254: 87 _, err = p.buf.Read(mask) 88 if err != nil { 89 return err 90 } 91 case mask[0] == 239 && mask[1] == 187: 92 mask, err := p.buf.Peek(3) 93 if err != nil && err != io.EOF { 94 return err 95 } else if len(mask) < 3 { 96 return nil 97 } 98 if mask[2] == 191 { 99 _, err = p.buf.Read(mask) 100 if err != nil { 101 return err 102 } 103 } 104 } 105 return nil 106 } 107 108 func (p *parser) readUntil(delim byte) ([]byte, error) { 109 data, err := p.buf.ReadBytes(delim) 110 if err != nil { 111 if err == io.EOF { 112 p.isEOF = true 113 } else { 114 return nil, err 115 } 116 } 117 return data, nil 118 } 119 120 func cleanComment(in []byte) ([]byte, bool) { 121 i := bytes.IndexAny(in, "#;") 122 if i == -1 { 123 return nil, false 124 } 125 return in[i:], true 126 } 127 128 func readKeyName(delimiters string, in []byte) (string, int, error) { 129 line := string(in) 130 131 // Check if key name surrounded by quotes. 132 var keyQuote string 133 if line[0] == '"' { 134 if len(line) > 6 && line[0:3] == `"""` { 135 keyQuote = `"""` 136 } else { 137 keyQuote = `"` 138 } 139 } else if line[0] == '`' { 140 keyQuote = "`" 141 } 142 143 // Get out key name 144 var endIdx int 145 if len(keyQuote) > 0 { 146 startIdx := len(keyQuote) 147 // FIXME: fail case -> """"""name"""=value 148 pos := strings.Index(line[startIdx:], keyQuote) 149 if pos == -1 { 150 return "", -1, fmt.Errorf("missing closing key quote: %s", line) 151 } 152 pos += startIdx 153 154 // Find key-value delimiter 155 i := strings.IndexAny(line[pos+startIdx:], delimiters) 156 if i < 0 { 157 return "", -1, ErrDelimiterNotFound{line} 158 } 159 endIdx = pos + i 160 return strings.TrimSpace(line[startIdx:pos]), endIdx + startIdx + 1, nil 161 } 162 163 endIdx = strings.IndexAny(line, delimiters) 164 if endIdx < 0 { 165 return "", -1, ErrDelimiterNotFound{line} 166 } 167 if endIdx == 0 { 168 return "", -1, ErrEmptyKeyName{line} 169 } 170 171 return strings.TrimSpace(line[0:endIdx]), endIdx + 1, nil 172 } 173 174 func (p *parser) readMultilines(line, val, valQuote string) (string, error) { 175 for { 176 data, err := p.readUntil('\n') 177 if err != nil { 178 return "", err 179 } 180 next := string(data) 181 182 pos := strings.LastIndex(next, valQuote) 183 if pos > -1 { 184 val += next[:pos] 185 186 comment, has := cleanComment([]byte(next[pos:])) 187 if has { 188 p.comment.Write(bytes.TrimSpace(comment)) 189 } 190 break 191 } 192 val += next 193 if p.isEOF { 194 return "", fmt.Errorf("missing closing key quote from %q to %q", line, next) 195 } 196 } 197 return val, nil 198 } 199 200 func (p *parser) readContinuationLines(val string) (string, error) { 201 for { 202 data, err := p.readUntil('\n') 203 if err != nil { 204 return "", err 205 } 206 next := strings.TrimSpace(string(data)) 207 208 if len(next) == 0 { 209 break 210 } 211 val += next 212 if val[len(val)-1] != '\\' { 213 break 214 } 215 val = val[:len(val)-1] 216 } 217 return val, nil 218 } 219 220 // hasSurroundedQuote check if and only if the first and last characters 221 // are quotes \" or \'. 222 // It returns false if any other parts also contain same kind of quotes. 223 func hasSurroundedQuote(in string, quote byte) bool { 224 return len(in) >= 2 && in[0] == quote && in[len(in)-1] == quote && 225 strings.IndexByte(in[1:], quote) == len(in)-2 226 } 227 228 func (p *parser) readValue(in []byte, bufferSize int) (string, error) { 229 230 line := strings.TrimLeftFunc(string(in), unicode.IsSpace) 231 if len(line) == 0 { 232 if p.options.AllowPythonMultilineValues && len(in) > 0 && in[len(in)-1] == '\n' { 233 return p.readPythonMultilines(line, bufferSize) 234 } 235 return "", nil 236 } 237 238 var valQuote string 239 if len(line) > 3 && line[0:3] == `"""` { 240 valQuote = `"""` 241 } else if line[0] == '`' { 242 valQuote = "`" 243 } else if p.options.UnescapeValueDoubleQuotes && line[0] == '"' { 244 valQuote = `"` 245 } 246 247 if len(valQuote) > 0 { 248 startIdx := len(valQuote) 249 pos := strings.LastIndex(line[startIdx:], valQuote) 250 // Check for multi-line value 251 if pos == -1 { 252 return p.readMultilines(line, line[startIdx:], valQuote) 253 } 254 255 if p.options.UnescapeValueDoubleQuotes && valQuote == `"` { 256 return strings.Replace(line[startIdx:pos+startIdx], `\"`, `"`, -1), nil 257 } 258 return line[startIdx : pos+startIdx], nil 259 } 260 261 lastChar := line[len(line)-1] 262 // Won't be able to reach here if value only contains whitespace 263 line = strings.TrimSpace(line) 264 trimmedLastChar := line[len(line)-1] 265 266 // Check continuation lines when desired 267 if !p.options.IgnoreContinuation && trimmedLastChar == '\\' { 268 return p.readContinuationLines(line[:len(line)-1]) 269 } 270 271 // Check if ignore inline comment 272 if !p.options.IgnoreInlineComment { 273 var i int 274 if p.options.SpaceBeforeInlineComment { 275 i = strings.Index(line, " #") 276 if i == -1 { 277 i = strings.Index(line, " ;") 278 } 279 280 } else { 281 i = strings.IndexAny(line, "#;") 282 } 283 284 if i > -1 { 285 p.comment.WriteString(line[i:]) 286 line = strings.TrimSpace(line[:i]) 287 } 288 289 } 290 291 // Trim single and double quotes 292 if (hasSurroundedQuote(line, '\'') || 293 hasSurroundedQuote(line, '"')) && !p.options.PreserveSurroundedQuote { 294 line = line[1 : len(line)-1] 295 } else if len(valQuote) == 0 && p.options.UnescapeValueCommentSymbols { 296 line = strings.ReplaceAll(line, `\;`, ";") 297 line = strings.ReplaceAll(line, `\#`, "#") 298 } else if p.options.AllowPythonMultilineValues && lastChar == '\n' { 299 return p.readPythonMultilines(line, bufferSize) 300 } 301 302 return line, nil 303 } 304 305 func (p *parser) readPythonMultilines(line string, bufferSize int) (string, error) { 306 parserBufferPeekResult, _ := p.buf.Peek(bufferSize) 307 peekBuffer := bytes.NewBuffer(parserBufferPeekResult) 308 309 for { 310 peekData, peekErr := peekBuffer.ReadBytes('\n') 311 if peekErr != nil && peekErr != io.EOF { 312 p.debug("readPythonMultilines: failed to peek with error: %v", peekErr) 313 return "", peekErr 314 } 315 316 p.debug("readPythonMultilines: parsing %q", string(peekData)) 317 318 peekMatches := pythonMultiline.FindStringSubmatch(string(peekData)) 319 p.debug("readPythonMultilines: matched %d parts", len(peekMatches)) 320 for n, v := range peekMatches { 321 p.debug(" %d: %q", n, v) 322 } 323 324 // Return if not a Python multiline value. 325 if len(peekMatches) != 3 { 326 p.debug("readPythonMultilines: end of value, got: %q", line) 327 return line, nil 328 } 329 330 // Advance the parser reader (buffer) in-sync with the peek buffer. 331 _, err := p.buf.Discard(len(peekData)) 332 if err != nil { 333 p.debug("readPythonMultilines: failed to skip to the end, returning error") 334 return "", err 335 } 336 337 line += "\n" + peekMatches[0] 338 } 339 } 340 341 // parse parses data through an io.Reader. 342 func (f *File) parse(reader io.Reader) (err error) { 343 p := newParser(reader, parserOptions{ 344 IgnoreContinuation: f.options.IgnoreContinuation, 345 IgnoreInlineComment: f.options.IgnoreInlineComment, 346 AllowPythonMultilineValues: f.options.AllowPythonMultilineValues, 347 SpaceBeforeInlineComment: f.options.SpaceBeforeInlineComment, 348 UnescapeValueDoubleQuotes: f.options.UnescapeValueDoubleQuotes, 349 UnescapeValueCommentSymbols: f.options.UnescapeValueCommentSymbols, 350 PreserveSurroundedQuote: f.options.PreserveSurroundedQuote, 351 DebugFunc: f.options.DebugFunc, 352 ReaderBufferSize: f.options.ReaderBufferSize, 353 }) 354 if err = p.BOM(); err != nil { 355 return fmt.Errorf("BOM: %v", err) 356 } 357 358 // Ignore error because default section name is never empty string. 359 name := DefaultSection 360 if f.options.Insensitive || f.options.InsensitiveSections { 361 name = strings.ToLower(DefaultSection) 362 } 363 section, _ := f.NewSection(name) 364 365 // This "last" is not strictly equivalent to "previous one" if current key is not the first nested key 366 var isLastValueEmpty bool 367 var lastRegularKey *Key 368 369 var line []byte 370 var inUnparseableSection bool 371 372 // NOTE: Iterate and increase `currentPeekSize` until 373 // the size of the parser buffer is found. 374 // TODO(unknwon): When Golang 1.10 is the lowest version supported, replace with `parserBufferSize := p.buf.Size()`. 375 parserBufferSize := 0 376 // NOTE: Peek 4kb at a time. 377 currentPeekSize := minReaderBufferSize 378 379 if f.options.AllowPythonMultilineValues { 380 for { 381 peekBytes, _ := p.buf.Peek(currentPeekSize) 382 peekBytesLength := len(peekBytes) 383 384 if parserBufferSize >= peekBytesLength { 385 break 386 } 387 388 currentPeekSize *= 2 389 parserBufferSize = peekBytesLength 390 } 391 } 392 393 for !p.isEOF { 394 line, err = p.readUntil('\n') 395 if err != nil { 396 return err 397 } 398 399 if f.options.AllowNestedValues && 400 isLastValueEmpty && len(line) > 0 { 401 if line[0] == ' ' || line[0] == '\t' { 402 err = lastRegularKey.addNestedValue(string(bytes.TrimSpace(line))) 403 if err != nil { 404 return err 405 } 406 continue 407 } 408 } 409 410 line = bytes.TrimLeftFunc(line, unicode.IsSpace) 411 if len(line) == 0 { 412 continue 413 } 414 415 // Comments 416 if line[0] == '#' || line[0] == ';' { 417 // Note: we do not care ending line break, 418 // it is needed for adding second line, 419 // so just clean it once at the end when set to value. 420 p.comment.Write(line) 421 continue 422 } 423 424 // Section 425 if line[0] == '[' { 426 // Read to the next ']' (TODO: support quoted strings) 427 closeIdx := bytes.LastIndexByte(line, ']') 428 if closeIdx == -1 { 429 return fmt.Errorf("unclosed section: %s", line) 430 } 431 432 name := string(line[1:closeIdx]) 433 section, err = f.NewSection(name) 434 if err != nil { 435 return err 436 } 437 438 comment, has := cleanComment(line[closeIdx+1:]) 439 if has { 440 p.comment.Write(comment) 441 } 442 443 section.Comment = strings.TrimSpace(p.comment.String()) 444 445 // Reset auto-counter and comments 446 p.comment.Reset() 447 p.count = 1 448 // Nested values can't span sections 449 isLastValueEmpty = false 450 451 inUnparseableSection = false 452 for i := range f.options.UnparseableSections { 453 if f.options.UnparseableSections[i] == name || 454 ((f.options.Insensitive || f.options.InsensitiveSections) && strings.EqualFold(f.options.UnparseableSections[i], name)) { 455 inUnparseableSection = true 456 continue 457 } 458 } 459 continue 460 } 461 462 if inUnparseableSection { 463 section.isRawSection = true 464 section.rawBody += string(line) 465 continue 466 } 467 468 kname, offset, err := readKeyName(f.options.KeyValueDelimiters, line) 469 if err != nil { 470 switch { 471 // Treat as boolean key when desired, and whole line is key name. 472 case IsErrDelimiterNotFound(err): 473 switch { 474 case f.options.AllowBooleanKeys: 475 kname, err := p.readValue(line, parserBufferSize) 476 if err != nil { 477 return err 478 } 479 key, err := section.NewBooleanKey(kname) 480 if err != nil { 481 return err 482 } 483 key.Comment = strings.TrimSpace(p.comment.String()) 484 p.comment.Reset() 485 continue 486 487 case f.options.SkipUnrecognizableLines: 488 continue 489 } 490 case IsErrEmptyKeyName(err) && f.options.SkipUnrecognizableLines: 491 continue 492 } 493 return err 494 } 495 496 // Auto increment. 497 isAutoIncr := false 498 if kname == "-" { 499 isAutoIncr = true 500 kname = "#" + strconv.Itoa(p.count) 501 p.count++ 502 } 503 504 value, err := p.readValue(line[offset:], parserBufferSize) 505 if err != nil { 506 return err 507 } 508 isLastValueEmpty = len(value) == 0 509 510 key, err := section.NewKey(kname, value) 511 if err != nil { 512 return err 513 } 514 key.isAutoIncrement = isAutoIncr 515 key.Comment = strings.TrimSpace(p.comment.String()) 516 p.comment.Reset() 517 lastRegularKey = key 518 } 519 return nil 520 }