parse.go - mirror - Mirror free and open-source projects you like with minimal effort

parse.go (22278B)
      1 package toml
      2 
      3 import (
      4 	"fmt"
      5 	"math"
      6 	"os"
      7 	"strconv"
      8 	"strings"
      9 	"time"
     10 	"unicode/utf8"
     11 
     12 	"github.com/BurntSushi/toml/internal"
     13 )
     14 
     15 type parser struct {
     16 	lx         *lexer
     17 	context    Key      // Full key for the current hash in scope.
     18 	currentKey string   // Base key name for everything except hashes.
     19 	pos        Position // Current position in the TOML file.
     20 	tomlNext   bool
     21 
     22 	ordered []Key // List of keys in the order that they appear in the TOML data.
     23 
     24 	keyInfo   map[string]keyInfo  // Map keyname → info about the TOML key.
     25 	mapping   map[string]any      // Map keyname → key value.
     26 	implicits map[string]struct{} // Record implicit keys (e.g. "key.group.names").
     27 }
     28 
     29 type keyInfo struct {
     30 	pos      Position
     31 	tomlType tomlType
     32 }
     33 
     34 func parse(data string) (p *parser, err error) {
     35 	_, tomlNext := os.LookupEnv("BURNTSUSHI_TOML_110")
     36 
     37 	defer func() {
     38 		if r := recover(); r != nil {
     39 			if pErr, ok := r.(ParseError); ok {
     40 				pErr.input = data
     41 				err = pErr
     42 				return
     43 			}
     44 			panic(r)
     45 		}
     46 	}()
     47 
     48 	// Read over BOM; do this here as the lexer calls utf8.DecodeRuneInString()
     49 	// which mangles stuff. UTF-16 BOM isn't strictly valid, but some tools add
     50 	// it anyway.
     51 	if strings.HasPrefix(data, "\xff\xfe") || strings.HasPrefix(data, "\xfe\xff") { // UTF-16
     52 		data = data[2:]
     53 		//lint:ignore S1017 https://github.com/dominikh/go-tools/issues/1447
     54 	} else if strings.HasPrefix(data, "\xef\xbb\xbf") { // UTF-8
     55 		data = data[3:]
     56 	}
     57 
     58 	// Examine first few bytes for NULL bytes; this probably means it's a UTF-16
     59 	// file (second byte in surrogate pair being NULL). Again, do this here to
     60 	// avoid having to deal with UTF-8/16 stuff in the lexer.
     61 	ex := 6
     62 	if len(data) < 6 {
     63 		ex = len(data)
     64 	}
     65 	if i := strings.IndexRune(data[:ex], 0); i > -1 {
     66 		return nil, ParseError{
     67 			Message:  "files cannot contain NULL bytes; probably using UTF-16; TOML files must be UTF-8",
     68 			Position: Position{Line: 1, Start: i, Len: 1},
     69 			Line:     1,
     70 			input:    data,
     71 		}
     72 	}
     73 
     74 	p = &parser{
     75 		keyInfo:   make(map[string]keyInfo),
     76 		mapping:   make(map[string]any),
     77 		lx:        lex(data, tomlNext),
     78 		ordered:   make([]Key, 0),
     79 		implicits: make(map[string]struct{}),
     80 		tomlNext:  tomlNext,
     81 	}
     82 	for {
     83 		item := p.next()
     84 		if item.typ == itemEOF {
     85 			break
     86 		}
     87 		p.topLevel(item)
     88 	}
     89 
     90 	return p, nil
     91 }
     92 
     93 func (p *parser) panicErr(it item, err error) {
     94 	panic(ParseError{
     95 		err:      err,
     96 		Position: it.pos,
     97 		Line:     it.pos.Len,
     98 		LastKey:  p.current(),
     99 	})
    100 }
    101 
    102 func (p *parser) panicItemf(it item, format string, v ...any) {
    103 	panic(ParseError{
    104 		Message:  fmt.Sprintf(format, v...),
    105 		Position: it.pos,
    106 		Line:     it.pos.Len,
    107 		LastKey:  p.current(),
    108 	})
    109 }
    110 
    111 func (p *parser) panicf(format string, v ...any) {
    112 	panic(ParseError{
    113 		Message:  fmt.Sprintf(format, v...),
    114 		Position: p.pos,
    115 		Line:     p.pos.Line,
    116 		LastKey:  p.current(),
    117 	})
    118 }
    119 
    120 func (p *parser) next() item {
    121 	it := p.lx.nextItem()
    122 	//fmt.Printf("ITEM %-18s line %-3d │ %q\n", it.typ, it.pos.Line, it.val)
    123 	if it.typ == itemError {
    124 		if it.err != nil {
    125 			panic(ParseError{
    126 				Position: it.pos,
    127 				Line:     it.pos.Line,
    128 				LastKey:  p.current(),
    129 				err:      it.err,
    130 			})
    131 		}
    132 
    133 		p.panicItemf(it, "%s", it.val)
    134 	}
    135 	return it
    136 }
    137 
    138 func (p *parser) nextPos() item {
    139 	it := p.next()
    140 	p.pos = it.pos
    141 	return it
    142 }
    143 
    144 func (p *parser) bug(format string, v ...any) {
    145 	panic(fmt.Sprintf("BUG: "+format+"\n\n", v...))
    146 }
    147 
    148 func (p *parser) expect(typ itemType) item {
    149 	it := p.next()
    150 	p.assertEqual(typ, it.typ)
    151 	return it
    152 }
    153 
    154 func (p *parser) assertEqual(expected, got itemType) {
    155 	if expected != got {
    156 		p.bug("Expected '%s' but got '%s'.", expected, got)
    157 	}
    158 }
    159 
    160 func (p *parser) topLevel(item item) {
    161 	switch item.typ {
    162 	case itemCommentStart: // # ..
    163 		p.expect(itemText)
    164 	case itemTableStart: // [ .. ]
    165 		name := p.nextPos()
    166 
    167 		var key Key
    168 		for ; name.typ != itemTableEnd && name.typ != itemEOF; name = p.next() {
    169 			key = append(key, p.keyString(name))
    170 		}
    171 		p.assertEqual(itemTableEnd, name.typ)
    172 
    173 		p.addContext(key, false)
    174 		p.setType("", tomlHash, item.pos)
    175 		p.ordered = append(p.ordered, key)
    176 	case itemArrayTableStart: // [[ .. ]]
    177 		name := p.nextPos()
    178 
    179 		var key Key
    180 		for ; name.typ != itemArrayTableEnd && name.typ != itemEOF; name = p.next() {
    181 			key = append(key, p.keyString(name))
    182 		}
    183 		p.assertEqual(itemArrayTableEnd, name.typ)
    184 
    185 		p.addContext(key, true)
    186 		p.setType("", tomlArrayHash, item.pos)
    187 		p.ordered = append(p.ordered, key)
    188 	case itemKeyStart: // key = ..
    189 		outerContext := p.context
    190 		/// Read all the key parts (e.g. 'a' and 'b' in 'a.b')
    191 		k := p.nextPos()
    192 		var key Key
    193 		for ; k.typ != itemKeyEnd && k.typ != itemEOF; k = p.next() {
    194 			key = append(key, p.keyString(k))
    195 		}
    196 		p.assertEqual(itemKeyEnd, k.typ)
    197 
    198 		/// The current key is the last part.
    199 		p.currentKey = key.last()
    200 
    201 		/// All the other parts (if any) are the context; need to set each part
    202 		/// as implicit.
    203 		context := key.parent()
    204 		for i := range context {
    205 			p.addImplicitContext(append(p.context, context[i:i+1]...))
    206 		}
    207 		p.ordered = append(p.ordered, p.context.add(p.currentKey))
    208 
    209 		/// Set value.
    210 		vItem := p.next()
    211 		val, typ := p.value(vItem, false)
    212 		p.setValue(p.currentKey, val)
    213 		p.setType(p.currentKey, typ, vItem.pos)
    214 
    215 		/// Remove the context we added (preserving any context from [tbl] lines).
    216 		p.context = outerContext
    217 		p.currentKey = ""
    218 	default:
    219 		p.bug("Unexpected type at top level: %s", item.typ)
    220 	}
    221 }
    222 
    223 // Gets a string for a key (or part of a key in a table name).
    224 func (p *parser) keyString(it item) string {
    225 	switch it.typ {
    226 	case itemText:
    227 		return it.val
    228 	case itemString, itemStringEsc, itemMultilineString,
    229 		itemRawString, itemRawMultilineString:
    230 		s, _ := p.value(it, false)
    231 		return s.(string)
    232 	default:
    233 		p.bug("Unexpected key type: %s", it.typ)
    234 	}
    235 	panic("unreachable")
    236 }
    237 
    238 var datetimeRepl = strings.NewReplacer(
    239 	"z", "Z",
    240 	"t", "T",
    241 	" ", "T")
    242 
    243 // value translates an expected value from the lexer into a Go value wrapped
    244 // as an empty interface.
    245 func (p *parser) value(it item, parentIsArray bool) (any, tomlType) {
    246 	switch it.typ {
    247 	case itemString:
    248 		return it.val, p.typeOfPrimitive(it)
    249 	case itemStringEsc:
    250 		return p.replaceEscapes(it, it.val), p.typeOfPrimitive(it)
    251 	case itemMultilineString:
    252 		return p.replaceEscapes(it, p.stripEscapedNewlines(stripFirstNewline(it.val))), p.typeOfPrimitive(it)
    253 	case itemRawString:
    254 		return it.val, p.typeOfPrimitive(it)
    255 	case itemRawMultilineString:
    256 		return stripFirstNewline(it.val), p.typeOfPrimitive(it)
    257 	case itemInteger:
    258 		return p.valueInteger(it)
    259 	case itemFloat:
    260 		return p.valueFloat(it)
    261 	case itemBool:
    262 		switch it.val {
    263 		case "true":
    264 			return true, p.typeOfPrimitive(it)
    265 		case "false":
    266 			return false, p.typeOfPrimitive(it)
    267 		default:
    268 			p.bug("Expected boolean value, but got '%s'.", it.val)
    269 		}
    270 	case itemDatetime:
    271 		return p.valueDatetime(it)
    272 	case itemArray:
    273 		return p.valueArray(it)
    274 	case itemInlineTableStart:
    275 		return p.valueInlineTable(it, parentIsArray)
    276 	default:
    277 		p.bug("Unexpected value type: %s", it.typ)
    278 	}
    279 	panic("unreachable")
    280 }
    281 
    282 func (p *parser) valueInteger(it item) (any, tomlType) {
    283 	if !numUnderscoresOK(it.val) {
    284 		p.panicItemf(it, "Invalid integer %q: underscores must be surrounded by digits", it.val)
    285 	}
    286 	if numHasLeadingZero(it.val) {
    287 		p.panicItemf(it, "Invalid integer %q: cannot have leading zeroes", it.val)
    288 	}
    289 
    290 	num, err := strconv.ParseInt(it.val, 0, 64)
    291 	if err != nil {
    292 		// Distinguish integer values. Normally, it'd be a bug if the lexer
    293 		// provides an invalid integer, but it's possible that the number is
    294 		// out of range of valid values (which the lexer cannot determine).
    295 		// So mark the former as a bug but the latter as a legitimate user
    296 		// error.
    297 		if e, ok := err.(*strconv.NumError); ok && e.Err == strconv.ErrRange {
    298 			p.panicErr(it, errParseRange{i: it.val, size: "int64"})
    299 		} else {
    300 			p.bug("Expected integer value, but got '%s'.", it.val)
    301 		}
    302 	}
    303 	return num, p.typeOfPrimitive(it)
    304 }
    305 
    306 func (p *parser) valueFloat(it item) (any, tomlType) {
    307 	parts := strings.FieldsFunc(it.val, func(r rune) bool {
    308 		switch r {
    309 		case '.', 'e', 'E':
    310 			return true
    311 		}
    312 		return false
    313 	})
    314 	for _, part := range parts {
    315 		if !numUnderscoresOK(part) {
    316 			p.panicItemf(it, "Invalid float %q: underscores must be surrounded by digits", it.val)
    317 		}
    318 	}
    319 	if len(parts) > 0 && numHasLeadingZero(parts[0]) {
    320 		p.panicItemf(it, "Invalid float %q: cannot have leading zeroes", it.val)
    321 	}
    322 	if !numPeriodsOK(it.val) {
    323 		// As a special case, numbers like '123.' or '1.e2',
    324 		// which are valid as far as Go/strconv are concerned,
    325 		// must be rejected because TOML says that a fractional
    326 		// part consists of '.' followed by 1+ digits.
    327 		p.panicItemf(it, "Invalid float %q: '.' must be followed by one or more digits", it.val)
    328 	}
    329 	val := strings.Replace(it.val, "_", "", -1)
    330 	signbit := false
    331 	if val == "+nan" || val == "-nan" {
    332 		signbit = val == "-nan"
    333 		val = "nan"
    334 	}
    335 	num, err := strconv.ParseFloat(val, 64)
    336 	if err != nil {
    337 		if e, ok := err.(*strconv.NumError); ok && e.Err == strconv.ErrRange {
    338 			p.panicErr(it, errParseRange{i: it.val, size: "float64"})
    339 		} else {
    340 			p.panicItemf(it, "Invalid float value: %q", it.val)
    341 		}
    342 	}
    343 	if signbit {
    344 		num = math.Copysign(num, -1)
    345 	}
    346 	return num, p.typeOfPrimitive(it)
    347 }
    348 
    349 var dtTypes = []struct {
    350 	fmt  string
    351 	zone *time.Location
    352 	next bool
    353 }{
    354 	{time.RFC3339Nano, time.Local, false},
    355 	{"2006-01-02T15:04:05.999999999", internal.LocalDatetime, false},
    356 	{"2006-01-02", internal.LocalDate, false},
    357 	{"15:04:05.999999999", internal.LocalTime, false},
    358 
    359 	// tomlNext
    360 	{"2006-01-02T15:04Z07:00", time.Local, true},
    361 	{"2006-01-02T15:04", internal.LocalDatetime, true},
    362 	{"15:04", internal.LocalTime, true},
    363 }
    364 
    365 func (p *parser) valueDatetime(it item) (any, tomlType) {
    366 	it.val = datetimeRepl.Replace(it.val)
    367 	var (
    368 		t   time.Time
    369 		ok  bool
    370 		err error
    371 	)
    372 	for _, dt := range dtTypes {
    373 		if dt.next && !p.tomlNext {
    374 			continue
    375 		}
    376 		t, err = time.ParseInLocation(dt.fmt, it.val, dt.zone)
    377 		if err == nil {
    378 			if missingLeadingZero(it.val, dt.fmt) {
    379 				p.panicErr(it, errParseDate{it.val})
    380 			}
    381 			ok = true
    382 			break
    383 		}
    384 	}
    385 	if !ok {
    386 		p.panicErr(it, errParseDate{it.val})
    387 	}
    388 	return t, p.typeOfPrimitive(it)
    389 }
    390 
    391 // Go's time.Parse() will accept numbers without a leading zero; there isn't any
    392 // way to require it. https://github.com/golang/go/issues/29911
    393 //
    394 // Depend on the fact that the separators (- and :) should always be at the same
    395 // location.
    396 func missingLeadingZero(d, l string) bool {
    397 	for i, c := range []byte(l) {
    398 		if c == '.' || c == 'Z' {
    399 			return false
    400 		}
    401 		if (c < '0' || c > '9') && d[i] != c {
    402 			return true
    403 		}
    404 	}
    405 	return false
    406 }
    407 
    408 func (p *parser) valueArray(it item) (any, tomlType) {
    409 	p.setType(p.currentKey, tomlArray, it.pos)
    410 
    411 	var (
    412 		// Initialize to a non-nil slice to make it consistent with how S = []
    413 		// decodes into a non-nil slice inside something like struct { S
    414 		// []string }. See #338
    415 		array = make([]any, 0, 2)
    416 	)
    417 	for it = p.next(); it.typ != itemArrayEnd; it = p.next() {
    418 		if it.typ == itemCommentStart {
    419 			p.expect(itemText)
    420 			continue
    421 		}
    422 
    423 		val, typ := p.value(it, true)
    424 		array = append(array, val)
    425 
    426 		// XXX: type isn't used here, we need it to record the accurate type
    427 		// information.
    428 		//
    429 		// Not entirely sure how to best store this; could use "key[0]",
    430 		// "key[1]" notation, or maybe store it on the Array type?
    431 		_ = typ
    432 	}
    433 	return array, tomlArray
    434 }
    435 
    436 func (p *parser) valueInlineTable(it item, parentIsArray bool) (any, tomlType) {
    437 	var (
    438 		topHash      = make(map[string]any)
    439 		outerContext = p.context
    440 		outerKey     = p.currentKey
    441 	)
    442 
    443 	p.context = append(p.context, p.currentKey)
    444 	prevContext := p.context
    445 	p.currentKey = ""
    446 
    447 	p.addImplicit(p.context)
    448 	p.addContext(p.context, parentIsArray)
    449 
    450 	/// Loop over all table key/value pairs.
    451 	for it := p.next(); it.typ != itemInlineTableEnd; it = p.next() {
    452 		if it.typ == itemCommentStart {
    453 			p.expect(itemText)
    454 			continue
    455 		}
    456 
    457 		/// Read all key parts.
    458 		k := p.nextPos()
    459 		var key Key
    460 		for ; k.typ != itemKeyEnd && k.typ != itemEOF; k = p.next() {
    461 			key = append(key, p.keyString(k))
    462 		}
    463 		p.assertEqual(itemKeyEnd, k.typ)
    464 
    465 		/// The current key is the last part.
    466 		p.currentKey = key.last()
    467 
    468 		/// All the other parts (if any) are the context; need to set each part
    469 		/// as implicit.
    470 		context := key.parent()
    471 		for i := range context {
    472 			p.addImplicitContext(append(p.context, context[i:i+1]...))
    473 		}
    474 		p.ordered = append(p.ordered, p.context.add(p.currentKey))
    475 
    476 		/// Set the value.
    477 		val, typ := p.value(p.next(), false)
    478 		p.setValue(p.currentKey, val)
    479 		p.setType(p.currentKey, typ, it.pos)
    480 
    481 		hash := topHash
    482 		for _, c := range context {
    483 			h, ok := hash[c]
    484 			if !ok {
    485 				h = make(map[string]any)
    486 				hash[c] = h
    487 			}
    488 			hash, ok = h.(map[string]any)
    489 			if !ok {
    490 				p.panicf("%q is not a table", p.context)
    491 			}
    492 		}
    493 		hash[p.currentKey] = val
    494 
    495 		/// Restore context.
    496 		p.context = prevContext
    497 	}
    498 	p.context = outerContext
    499 	p.currentKey = outerKey
    500 	return topHash, tomlHash
    501 }
    502 
    503 // numHasLeadingZero checks if this number has leading zeroes, allowing for '0',
    504 // +/- signs, and base prefixes.
    505 func numHasLeadingZero(s string) bool {
    506 	if len(s) > 1 && s[0] == '0' && !(s[1] == 'b' || s[1] == 'o' || s[1] == 'x') { // Allow 0b, 0o, 0x
    507 		return true
    508 	}
    509 	if len(s) > 2 && (s[0] == '-' || s[0] == '+') && s[1] == '0' {
    510 		return true
    511 	}
    512 	return false
    513 }
    514 
    515 // numUnderscoresOK checks whether each underscore in s is surrounded by
    516 // characters that are not underscores.
    517 func numUnderscoresOK(s string) bool {
    518 	switch s {
    519 	case "nan", "+nan", "-nan", "inf", "-inf", "+inf":
    520 		return true
    521 	}
    522 	accept := false
    523 	for _, r := range s {
    524 		if r == '_' {
    525 			if !accept {
    526 				return false
    527 			}
    528 		}
    529 
    530 		// isHexis a superset of all the permissable characters surrounding an
    531 		// underscore.
    532 		accept = isHex(r)
    533 	}
    534 	return accept
    535 }
    536 
    537 // numPeriodsOK checks whether every period in s is followed by a digit.
    538 func numPeriodsOK(s string) bool {
    539 	period := false
    540 	for _, r := range s {
    541 		if period && !isDigit(r) {
    542 			return false
    543 		}
    544 		period = r == '.'
    545 	}
    546 	return !period
    547 }
    548 
    549 // Set the current context of the parser, where the context is either a hash or
    550 // an array of hashes, depending on the value of the `array` parameter.
    551 //
    552 // Establishing the context also makes sure that the key isn't a duplicate, and
    553 // will create implicit hashes automatically.
    554 func (p *parser) addContext(key Key, array bool) {
    555 	/// Always start at the top level and drill down for our context.
    556 	hashContext := p.mapping
    557 	keyContext := make(Key, 0, len(key)-1)
    558 
    559 	/// We only need implicit hashes for the parents.
    560 	for _, k := range key.parent() {
    561 		_, ok := hashContext[k]
    562 		keyContext = append(keyContext, k)
    563 
    564 		// No key? Make an implicit hash and move on.
    565 		if !ok {
    566 			p.addImplicit(keyContext)
    567 			hashContext[k] = make(map[string]any)
    568 		}
    569 
    570 		// If the hash context is actually an array of tables, then set
    571 		// the hash context to the last element in that array.
    572 		//
    573 		// Otherwise, it better be a table, since this MUST be a key group (by
    574 		// virtue of it not being the last element in a key).
    575 		switch t := hashContext[k].(type) {
    576 		case []map[string]any:
    577 			hashContext = t[len(t)-1]
    578 		case map[string]any:
    579 			hashContext = t
    580 		default:
    581 			p.panicf("Key '%s' was already created as a hash.", keyContext)
    582 		}
    583 	}
    584 
    585 	p.context = keyContext
    586 	if array {
    587 		// If this is the first element for this array, then allocate a new
    588 		// list of tables for it.
    589 		k := key.last()
    590 		if _, ok := hashContext[k]; !ok {
    591 			hashContext[k] = make([]map[string]any, 0, 4)
    592 		}
    593 
    594 		// Add a new table. But make sure the key hasn't already been used
    595 		// for something else.
    596 		if hash, ok := hashContext[k].([]map[string]any); ok {
    597 			hashContext[k] = append(hash, make(map[string]any))
    598 		} else {
    599 			p.panicf("Key '%s' was already created and cannot be used as an array.", key)
    600 		}
    601 	} else {
    602 		p.setValue(key.last(), make(map[string]any))
    603 	}
    604 	p.context = append(p.context, key.last())
    605 }
    606 
    607 // setValue sets the given key to the given value in the current context.
    608 // It will make sure that the key hasn't already been defined, account for
    609 // implicit key groups.
    610 func (p *parser) setValue(key string, value any) {
    611 	var (
    612 		tmpHash    any
    613 		ok         bool
    614 		hash       = p.mapping
    615 		keyContext = make(Key, 0, len(p.context)+1)
    616 	)
    617 	for _, k := range p.context {
    618 		keyContext = append(keyContext, k)
    619 		if tmpHash, ok = hash[k]; !ok {
    620 			p.bug("Context for key '%s' has not been established.", keyContext)
    621 		}
    622 		switch t := tmpHash.(type) {
    623 		case []map[string]any:
    624 			// The context is a table of hashes. Pick the most recent table
    625 			// defined as the current hash.
    626 			hash = t[len(t)-1]
    627 		case map[string]any:
    628 			hash = t
    629 		default:
    630 			p.panicf("Key '%s' has already been defined.", keyContext)
    631 		}
    632 	}
    633 	keyContext = append(keyContext, key)
    634 
    635 	if _, ok := hash[key]; ok {
    636 		// Normally redefining keys isn't allowed, but the key could have been
    637 		// defined implicitly and it's allowed to be redefined concretely. (See
    638 		// the `valid/implicit-and-explicit-after.toml` in toml-test)
    639 		//
    640 		// But we have to make sure to stop marking it as an implicit. (So that
    641 		// another redefinition provokes an error.)
    642 		//
    643 		// Note that since it has already been defined (as a hash), we don't
    644 		// want to overwrite it. So our business is done.
    645 		if p.isArray(keyContext) {
    646 			p.removeImplicit(keyContext)
    647 			hash[key] = value
    648 			return
    649 		}
    650 		if p.isImplicit(keyContext) {
    651 			p.removeImplicit(keyContext)
    652 			return
    653 		}
    654 		// Otherwise, we have a concrete key trying to override a previous key,
    655 		// which is *always* wrong.
    656 		p.panicf("Key '%s' has already been defined.", keyContext)
    657 	}
    658 
    659 	hash[key] = value
    660 }
    661 
    662 // setType sets the type of a particular value at a given key. It should be
    663 // called immediately AFTER setValue.
    664 //
    665 // Note that if `key` is empty, then the type given will be applied to the
    666 // current context (which is either a table or an array of tables).
    667 func (p *parser) setType(key string, typ tomlType, pos Position) {
    668 	keyContext := make(Key, 0, len(p.context)+1)
    669 	keyContext = append(keyContext, p.context...)
    670 	if len(key) > 0 { // allow type setting for hashes
    671 		keyContext = append(keyContext, key)
    672 	}
    673 	// Special case to make empty keys ("" = 1) work.
    674 	// Without it it will set "" rather than `""`.
    675 	// TODO: why is this needed? And why is this only needed here?
    676 	if len(keyContext) == 0 {
    677 		keyContext = Key{""}
    678 	}
    679 	p.keyInfo[keyContext.String()] = keyInfo{tomlType: typ, pos: pos}
    680 }
    681 
    682 // Implicit keys need to be created when tables are implied in "a.b.c.d = 1" and
    683 // "[a.b.c]" (the "a", "b", and "c" hashes are never created explicitly).
    684 func (p *parser) addImplicit(key Key)        { p.implicits[key.String()] = struct{}{} }
    685 func (p *parser) removeImplicit(key Key)     { delete(p.implicits, key.String()) }
    686 func (p *parser) isImplicit(key Key) bool    { _, ok := p.implicits[key.String()]; return ok }
    687 func (p *parser) isArray(key Key) bool       { return p.keyInfo[key.String()].tomlType == tomlArray }
    688 func (p *parser) addImplicitContext(key Key) { p.addImplicit(key); p.addContext(key, false) }
    689 
    690 // current returns the full key name of the current context.
    691 func (p *parser) current() string {
    692 	if len(p.currentKey) == 0 {
    693 		return p.context.String()
    694 	}
    695 	if len(p.context) == 0 {
    696 		return p.currentKey
    697 	}
    698 	return fmt.Sprintf("%s.%s", p.context, p.currentKey)
    699 }
    700 
    701 func stripFirstNewline(s string) string {
    702 	if len(s) > 0 && s[0] == '\n' {
    703 		return s[1:]
    704 	}
    705 	if len(s) > 1 && s[0] == '\r' && s[1] == '\n' {
    706 		return s[2:]
    707 	}
    708 	return s
    709 }
    710 
    711 // stripEscapedNewlines removes whitespace after line-ending backslashes in
    712 // multiline strings.
    713 //
    714 // A line-ending backslash is an unescaped \ followed only by whitespace until
    715 // the next newline. After a line-ending backslash, all whitespace is removed
    716 // until the next non-whitespace character.
    717 func (p *parser) stripEscapedNewlines(s string) string {
    718 	var (
    719 		b strings.Builder
    720 		i int
    721 	)
    722 	b.Grow(len(s))
    723 	for {
    724 		ix := strings.Index(s[i:], `\`)
    725 		if ix < 0 {
    726 			b.WriteString(s)
    727 			return b.String()
    728 		}
    729 		i += ix
    730 
    731 		if len(s) > i+1 && s[i+1] == '\\' {
    732 			// Escaped backslash.
    733 			i += 2
    734 			continue
    735 		}
    736 		// Scan until the next non-whitespace.
    737 		j := i + 1
    738 	whitespaceLoop:
    739 		for ; j < len(s); j++ {
    740 			switch s[j] {
    741 			case ' ', '\t', '\r', '\n':
    742 			default:
    743 				break whitespaceLoop
    744 			}
    745 		}
    746 		if j == i+1 {
    747 			// Not a whitespace escape.
    748 			i++
    749 			continue
    750 		}
    751 		if !strings.Contains(s[i:j], "\n") {
    752 			// This is not a line-ending backslash. (It's a bad escape sequence,
    753 			// but we can let replaceEscapes catch it.)
    754 			i++
    755 			continue
    756 		}
    757 		b.WriteString(s[:i])
    758 		s = s[j:]
    759 		i = 0
    760 	}
    761 }
    762 
    763 func (p *parser) replaceEscapes(it item, str string) string {
    764 	var (
    765 		b    strings.Builder
    766 		skip = 0
    767 	)
    768 	b.Grow(len(str))
    769 	for i, c := range str {
    770 		if skip > 0 {
    771 			skip--
    772 			continue
    773 		}
    774 		if c != '\\' {
    775 			b.WriteRune(c)
    776 			continue
    777 		}
    778 
    779 		if i >= len(str) {
    780 			p.bug("Escape sequence at end of string.")
    781 			return ""
    782 		}
    783 		switch str[i+1] {
    784 		default:
    785 			p.bug("Expected valid escape code after \\, but got %q.", str[i+1])
    786 		case ' ', '\t':
    787 			p.panicItemf(it, "invalid escape: '\\%c'", str[i+1])
    788 		case 'b':
    789 			b.WriteByte(0x08)
    790 			skip = 1
    791 		case 't':
    792 			b.WriteByte(0x09)
    793 			skip = 1
    794 		case 'n':
    795 			b.WriteByte(0x0a)
    796 			skip = 1
    797 		case 'f':
    798 			b.WriteByte(0x0c)
    799 			skip = 1
    800 		case 'r':
    801 			b.WriteByte(0x0d)
    802 			skip = 1
    803 		case 'e':
    804 			if p.tomlNext {
    805 				b.WriteByte(0x1b)
    806 				skip = 1
    807 			}
    808 		case '"':
    809 			b.WriteByte(0x22)
    810 			skip = 1
    811 		case '\\':
    812 			b.WriteByte(0x5c)
    813 			skip = 1
    814 		// The lexer guarantees the correct number of characters are present;
    815 		// don't need to check here.
    816 		case 'x':
    817 			if p.tomlNext {
    818 				escaped := p.asciiEscapeToUnicode(it, str[i+2:i+4])
    819 				b.WriteRune(escaped)
    820 				skip = 3
    821 			}
    822 		case 'u':
    823 			escaped := p.asciiEscapeToUnicode(it, str[i+2:i+6])
    824 			b.WriteRune(escaped)
    825 			skip = 5
    826 		case 'U':
    827 			escaped := p.asciiEscapeToUnicode(it, str[i+2:i+10])
    828 			b.WriteRune(escaped)
    829 			skip = 9
    830 		}
    831 	}
    832 	return b.String()
    833 }
    834 
    835 func (p *parser) asciiEscapeToUnicode(it item, s string) rune {
    836 	hex, err := strconv.ParseUint(strings.ToLower(s), 16, 32)
    837 	if err != nil {
    838 		p.bug("Could not parse '%s' as a hexadecimal number, but the lexer claims it's OK: %s", s, err)
    839 	}
    840 	if !utf8.ValidRune(rune(hex)) {
    841 		p.panicItemf(it, "Escaped character '\\u%s' is not valid UTF-8.", s)
    842 	}
    843 	return rune(hex)
    844 }
	mirror Mirror free and open-source projects you like with minimal effort
	git clone git://git.server.ky/slackcoder/mirror
	Log \| Files \| Refs \| README