leobot

Simple Telegram Logging Bot
git clone git://bsandro.tech/leobot
Log | Files | Refs | README | LICENSE

parser.go (13060B)


      1 // Copyright 2015 Unknwon
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License"): you may
      4 // not use this file except in compliance with the License. You may obtain
      5 // a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
     11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
     12 // License for the specific language governing permissions and limitations
     13 // under the License.
     14 
     15 package ini
     16 
     17 import (
     18 	"bufio"
     19 	"bytes"
     20 	"fmt"
     21 	"io"
     22 	"regexp"
     23 	"strconv"
     24 	"strings"
     25 	"unicode"
     26 )
     27 
     28 const minReaderBufferSize = 4096
     29 
     30 var pythonMultiline = regexp.MustCompile(`^([\t\f ]+)(.*)`)
     31 
     32 type parserOptions struct {
     33 	IgnoreContinuation          bool
     34 	IgnoreInlineComment         bool
     35 	AllowPythonMultilineValues  bool
     36 	SpaceBeforeInlineComment    bool
     37 	UnescapeValueDoubleQuotes   bool
     38 	UnescapeValueCommentSymbols bool
     39 	PreserveSurroundedQuote     bool
     40 	DebugFunc                   DebugFunc
     41 	ReaderBufferSize            int
     42 }
     43 
     44 type parser struct {
     45 	buf     *bufio.Reader
     46 	options parserOptions
     47 
     48 	isEOF   bool
     49 	count   int
     50 	comment *bytes.Buffer
     51 }
     52 
     53 func (p *parser) debug(format string, args ...interface{}) {
     54 	if p.options.DebugFunc != nil {
     55 		p.options.DebugFunc(fmt.Sprintf(format, args...))
     56 	}
     57 }
     58 
     59 func newParser(r io.Reader, opts parserOptions) *parser {
     60 	size := opts.ReaderBufferSize
     61 	if size < minReaderBufferSize {
     62 		size = minReaderBufferSize
     63 	}
     64 
     65 	return &parser{
     66 		buf:     bufio.NewReaderSize(r, size),
     67 		options: opts,
     68 		count:   1,
     69 		comment: &bytes.Buffer{},
     70 	}
     71 }
     72 
     73 // BOM handles header of UTF-8, UTF-16 LE and UTF-16 BE's BOM format.
     74 // http://en.wikipedia.org/wiki/Byte_order_mark#Representations_of_byte_order_marks_by_encoding
     75 func (p *parser) BOM() error {
     76 	mask, err := p.buf.Peek(2)
     77 	if err != nil && err != io.EOF {
     78 		return err
     79 	} else if len(mask) < 2 {
     80 		return nil
     81 	}
     82 
     83 	switch {
     84 	case mask[0] == 254 && mask[1] == 255:
     85 		fallthrough
     86 	case mask[0] == 255 && mask[1] == 254:
     87 		_, err = p.buf.Read(mask)
     88 		if err != nil {
     89 			return err
     90 		}
     91 	case mask[0] == 239 && mask[1] == 187:
     92 		mask, err := p.buf.Peek(3)
     93 		if err != nil && err != io.EOF {
     94 			return err
     95 		} else if len(mask) < 3 {
     96 			return nil
     97 		}
     98 		if mask[2] == 191 {
     99 			_, err = p.buf.Read(mask)
    100 			if err != nil {
    101 				return err
    102 			}
    103 		}
    104 	}
    105 	return nil
    106 }
    107 
    108 func (p *parser) readUntil(delim byte) ([]byte, error) {
    109 	data, err := p.buf.ReadBytes(delim)
    110 	if err != nil {
    111 		if err == io.EOF {
    112 			p.isEOF = true
    113 		} else {
    114 			return nil, err
    115 		}
    116 	}
    117 	return data, nil
    118 }
    119 
    120 func cleanComment(in []byte) ([]byte, bool) {
    121 	i := bytes.IndexAny(in, "#;")
    122 	if i == -1 {
    123 		return nil, false
    124 	}
    125 	return in[i:], true
    126 }
    127 
    128 func readKeyName(delimiters string, in []byte) (string, int, error) {
    129 	line := string(in)
    130 
    131 	// Check if key name surrounded by quotes.
    132 	var keyQuote string
    133 	if line[0] == '"' {
    134 		if len(line) > 6 && line[0:3] == `"""` {
    135 			keyQuote = `"""`
    136 		} else {
    137 			keyQuote = `"`
    138 		}
    139 	} else if line[0] == '`' {
    140 		keyQuote = "`"
    141 	}
    142 
    143 	// Get out key name
    144 	var endIdx int
    145 	if len(keyQuote) > 0 {
    146 		startIdx := len(keyQuote)
    147 		// FIXME: fail case -> """"""name"""=value
    148 		pos := strings.Index(line[startIdx:], keyQuote)
    149 		if pos == -1 {
    150 			return "", -1, fmt.Errorf("missing closing key quote: %s", line)
    151 		}
    152 		pos += startIdx
    153 
    154 		// Find key-value delimiter
    155 		i := strings.IndexAny(line[pos+startIdx:], delimiters)
    156 		if i < 0 {
    157 			return "", -1, ErrDelimiterNotFound{line}
    158 		}
    159 		endIdx = pos + i
    160 		return strings.TrimSpace(line[startIdx:pos]), endIdx + startIdx + 1, nil
    161 	}
    162 
    163 	endIdx = strings.IndexAny(line, delimiters)
    164 	if endIdx < 0 {
    165 		return "", -1, ErrDelimiterNotFound{line}
    166 	}
    167 	if endIdx == 0 {
    168 		return "", -1, ErrEmptyKeyName{line}
    169 	}
    170 
    171 	return strings.TrimSpace(line[0:endIdx]), endIdx + 1, nil
    172 }
    173 
    174 func (p *parser) readMultilines(line, val, valQuote string) (string, error) {
    175 	for {
    176 		data, err := p.readUntil('\n')
    177 		if err != nil {
    178 			return "", err
    179 		}
    180 		next := string(data)
    181 
    182 		pos := strings.LastIndex(next, valQuote)
    183 		if pos > -1 {
    184 			val += next[:pos]
    185 
    186 			comment, has := cleanComment([]byte(next[pos:]))
    187 			if has {
    188 				p.comment.Write(bytes.TrimSpace(comment))
    189 			}
    190 			break
    191 		}
    192 		val += next
    193 		if p.isEOF {
    194 			return "", fmt.Errorf("missing closing key quote from %q to %q", line, next)
    195 		}
    196 	}
    197 	return val, nil
    198 }
    199 
    200 func (p *parser) readContinuationLines(val string) (string, error) {
    201 	for {
    202 		data, err := p.readUntil('\n')
    203 		if err != nil {
    204 			return "", err
    205 		}
    206 		next := strings.TrimSpace(string(data))
    207 
    208 		if len(next) == 0 {
    209 			break
    210 		}
    211 		val += next
    212 		if val[len(val)-1] != '\\' {
    213 			break
    214 		}
    215 		val = val[:len(val)-1]
    216 	}
    217 	return val, nil
    218 }
    219 
    220 // hasSurroundedQuote check if and only if the first and last characters
    221 // are quotes \" or \'.
    222 // It returns false if any other parts also contain same kind of quotes.
    223 func hasSurroundedQuote(in string, quote byte) bool {
    224 	return len(in) >= 2 && in[0] == quote && in[len(in)-1] == quote &&
    225 		strings.IndexByte(in[1:], quote) == len(in)-2
    226 }
    227 
    228 func (p *parser) readValue(in []byte, bufferSize int) (string, error) {
    229 
    230 	line := strings.TrimLeftFunc(string(in), unicode.IsSpace)
    231 	if len(line) == 0 {
    232 		if p.options.AllowPythonMultilineValues && len(in) > 0 && in[len(in)-1] == '\n' {
    233 			return p.readPythonMultilines(line, bufferSize)
    234 		}
    235 		return "", nil
    236 	}
    237 
    238 	var valQuote string
    239 	if len(line) > 3 && line[0:3] == `"""` {
    240 		valQuote = `"""`
    241 	} else if line[0] == '`' {
    242 		valQuote = "`"
    243 	} else if p.options.UnescapeValueDoubleQuotes && line[0] == '"' {
    244 		valQuote = `"`
    245 	}
    246 
    247 	if len(valQuote) > 0 {
    248 		startIdx := len(valQuote)
    249 		pos := strings.LastIndex(line[startIdx:], valQuote)
    250 		// Check for multi-line value
    251 		if pos == -1 {
    252 			return p.readMultilines(line, line[startIdx:], valQuote)
    253 		}
    254 
    255 		if p.options.UnescapeValueDoubleQuotes && valQuote == `"` {
    256 			return strings.Replace(line[startIdx:pos+startIdx], `\"`, `"`, -1), nil
    257 		}
    258 		return line[startIdx : pos+startIdx], nil
    259 	}
    260 
    261 	lastChar := line[len(line)-1]
    262 	// Won't be able to reach here if value only contains whitespace
    263 	line = strings.TrimSpace(line)
    264 	trimmedLastChar := line[len(line)-1]
    265 
    266 	// Check continuation lines when desired
    267 	if !p.options.IgnoreContinuation && trimmedLastChar == '\\' {
    268 		return p.readContinuationLines(line[:len(line)-1])
    269 	}
    270 
    271 	// Check if ignore inline comment
    272 	if !p.options.IgnoreInlineComment {
    273 		var i int
    274 		if p.options.SpaceBeforeInlineComment {
    275 			i = strings.Index(line, " #")
    276 			if i == -1 {
    277 				i = strings.Index(line, " ;")
    278 			}
    279 
    280 		} else {
    281 			i = strings.IndexAny(line, "#;")
    282 		}
    283 
    284 		if i > -1 {
    285 			p.comment.WriteString(line[i:])
    286 			line = strings.TrimSpace(line[:i])
    287 		}
    288 
    289 	}
    290 
    291 	// Trim single and double quotes
    292 	if (hasSurroundedQuote(line, '\'') ||
    293 		hasSurroundedQuote(line, '"')) && !p.options.PreserveSurroundedQuote {
    294 		line = line[1 : len(line)-1]
    295 	} else if len(valQuote) == 0 && p.options.UnescapeValueCommentSymbols {
    296 		line = strings.ReplaceAll(line, `\;`, ";")
    297 		line = strings.ReplaceAll(line, `\#`, "#")
    298 	} else if p.options.AllowPythonMultilineValues && lastChar == '\n' {
    299 		return p.readPythonMultilines(line, bufferSize)
    300 	}
    301 
    302 	return line, nil
    303 }
    304 
    305 func (p *parser) readPythonMultilines(line string, bufferSize int) (string, error) {
    306 	parserBufferPeekResult, _ := p.buf.Peek(bufferSize)
    307 	peekBuffer := bytes.NewBuffer(parserBufferPeekResult)
    308 
    309 	for {
    310 		peekData, peekErr := peekBuffer.ReadBytes('\n')
    311 		if peekErr != nil && peekErr != io.EOF {
    312 			p.debug("readPythonMultilines: failed to peek with error: %v", peekErr)
    313 			return "", peekErr
    314 		}
    315 
    316 		p.debug("readPythonMultilines: parsing %q", string(peekData))
    317 
    318 		peekMatches := pythonMultiline.FindStringSubmatch(string(peekData))
    319 		p.debug("readPythonMultilines: matched %d parts", len(peekMatches))
    320 		for n, v := range peekMatches {
    321 			p.debug("   %d: %q", n, v)
    322 		}
    323 
    324 		// Return if not a Python multiline value.
    325 		if len(peekMatches) != 3 {
    326 			p.debug("readPythonMultilines: end of value, got: %q", line)
    327 			return line, nil
    328 		}
    329 
    330 		// Advance the parser reader (buffer) in-sync with the peek buffer.
    331 		_, err := p.buf.Discard(len(peekData))
    332 		if err != nil {
    333 			p.debug("readPythonMultilines: failed to skip to the end, returning error")
    334 			return "", err
    335 		}
    336 
    337 		line += "\n" + peekMatches[0]
    338 	}
    339 }
    340 
    341 // parse parses data through an io.Reader.
    342 func (f *File) parse(reader io.Reader) (err error) {
    343 	p := newParser(reader, parserOptions{
    344 		IgnoreContinuation:          f.options.IgnoreContinuation,
    345 		IgnoreInlineComment:         f.options.IgnoreInlineComment,
    346 		AllowPythonMultilineValues:  f.options.AllowPythonMultilineValues,
    347 		SpaceBeforeInlineComment:    f.options.SpaceBeforeInlineComment,
    348 		UnescapeValueDoubleQuotes:   f.options.UnescapeValueDoubleQuotes,
    349 		UnescapeValueCommentSymbols: f.options.UnescapeValueCommentSymbols,
    350 		PreserveSurroundedQuote:     f.options.PreserveSurroundedQuote,
    351 		DebugFunc:                   f.options.DebugFunc,
    352 		ReaderBufferSize:            f.options.ReaderBufferSize,
    353 	})
    354 	if err = p.BOM(); err != nil {
    355 		return fmt.Errorf("BOM: %v", err)
    356 	}
    357 
    358 	// Ignore error because default section name is never empty string.
    359 	name := DefaultSection
    360 	if f.options.Insensitive || f.options.InsensitiveSections {
    361 		name = strings.ToLower(DefaultSection)
    362 	}
    363 	section, _ := f.NewSection(name)
    364 
    365 	// This "last" is not strictly equivalent to "previous one" if current key is not the first nested key
    366 	var isLastValueEmpty bool
    367 	var lastRegularKey *Key
    368 
    369 	var line []byte
    370 	var inUnparseableSection bool
    371 
    372 	// NOTE: Iterate and increase `currentPeekSize` until
    373 	// the size of the parser buffer is found.
    374 	// TODO(unknwon): When Golang 1.10 is the lowest version supported, replace with `parserBufferSize := p.buf.Size()`.
    375 	parserBufferSize := 0
    376 	// NOTE: Peek 4kb at a time.
    377 	currentPeekSize := minReaderBufferSize
    378 
    379 	if f.options.AllowPythonMultilineValues {
    380 		for {
    381 			peekBytes, _ := p.buf.Peek(currentPeekSize)
    382 			peekBytesLength := len(peekBytes)
    383 
    384 			if parserBufferSize >= peekBytesLength {
    385 				break
    386 			}
    387 
    388 			currentPeekSize *= 2
    389 			parserBufferSize = peekBytesLength
    390 		}
    391 	}
    392 
    393 	for !p.isEOF {
    394 		line, err = p.readUntil('\n')
    395 		if err != nil {
    396 			return err
    397 		}
    398 
    399 		if f.options.AllowNestedValues &&
    400 			isLastValueEmpty && len(line) > 0 {
    401 			if line[0] == ' ' || line[0] == '\t' {
    402 				err = lastRegularKey.addNestedValue(string(bytes.TrimSpace(line)))
    403 				if err != nil {
    404 					return err
    405 				}
    406 				continue
    407 			}
    408 		}
    409 
    410 		line = bytes.TrimLeftFunc(line, unicode.IsSpace)
    411 		if len(line) == 0 {
    412 			continue
    413 		}
    414 
    415 		// Comments
    416 		if line[0] == '#' || line[0] == ';' {
    417 			// Note: we do not care ending line break,
    418 			// it is needed for adding second line,
    419 			// so just clean it once at the end when set to value.
    420 			p.comment.Write(line)
    421 			continue
    422 		}
    423 
    424 		// Section
    425 		if line[0] == '[' {
    426 			// Read to the next ']' (TODO: support quoted strings)
    427 			closeIdx := bytes.LastIndexByte(line, ']')
    428 			if closeIdx == -1 {
    429 				return fmt.Errorf("unclosed section: %s", line)
    430 			}
    431 
    432 			name := string(line[1:closeIdx])
    433 			section, err = f.NewSection(name)
    434 			if err != nil {
    435 				return err
    436 			}
    437 
    438 			comment, has := cleanComment(line[closeIdx+1:])
    439 			if has {
    440 				p.comment.Write(comment)
    441 			}
    442 
    443 			section.Comment = strings.TrimSpace(p.comment.String())
    444 
    445 			// Reset auto-counter and comments
    446 			p.comment.Reset()
    447 			p.count = 1
    448 			// Nested values can't span sections
    449 			isLastValueEmpty = false
    450 
    451 			inUnparseableSection = false
    452 			for i := range f.options.UnparseableSections {
    453 				if f.options.UnparseableSections[i] == name ||
    454 					((f.options.Insensitive || f.options.InsensitiveSections) && strings.EqualFold(f.options.UnparseableSections[i], name)) {
    455 					inUnparseableSection = true
    456 					continue
    457 				}
    458 			}
    459 			continue
    460 		}
    461 
    462 		if inUnparseableSection {
    463 			section.isRawSection = true
    464 			section.rawBody += string(line)
    465 			continue
    466 		}
    467 
    468 		kname, offset, err := readKeyName(f.options.KeyValueDelimiters, line)
    469 		if err != nil {
    470 			switch {
    471 			// Treat as boolean key when desired, and whole line is key name.
    472 			case IsErrDelimiterNotFound(err):
    473 				switch {
    474 				case f.options.AllowBooleanKeys:
    475 					kname, err := p.readValue(line, parserBufferSize)
    476 					if err != nil {
    477 						return err
    478 					}
    479 					key, err := section.NewBooleanKey(kname)
    480 					if err != nil {
    481 						return err
    482 					}
    483 					key.Comment = strings.TrimSpace(p.comment.String())
    484 					p.comment.Reset()
    485 					continue
    486 
    487 				case f.options.SkipUnrecognizableLines:
    488 					continue
    489 				}
    490 			case IsErrEmptyKeyName(err) && f.options.SkipUnrecognizableLines:
    491 				continue
    492 			}
    493 			return err
    494 		}
    495 
    496 		// Auto increment.
    497 		isAutoIncr := false
    498 		if kname == "-" {
    499 			isAutoIncr = true
    500 			kname = "#" + strconv.Itoa(p.count)
    501 			p.count++
    502 		}
    503 
    504 		value, err := p.readValue(line[offset:], parserBufferSize)
    505 		if err != nil {
    506 			return err
    507 		}
    508 		isLastValueEmpty = len(value) == 0
    509 
    510 		key, err := section.NewKey(kname, value)
    511 		if err != nil {
    512 			return err
    513 		}
    514 		key.isAutoIncrement = isAutoIncr
    515 		key.Comment = strings.TrimSpace(p.comment.String())
    516 		p.comment.Reset()
    517 		lastRegularKey = key
    518 	}
    519 	return nil
    520 }