parse.go (22278B)
1 package toml 2 3 import ( 4 "fmt" 5 "math" 6 "os" 7 "strconv" 8 "strings" 9 "time" 10 "unicode/utf8" 11 12 "github.com/BurntSushi/toml/internal" 13 ) 14 15 type parser struct { 16 lx *lexer 17 context Key // Full key for the current hash in scope. 18 currentKey string // Base key name for everything except hashes. 19 pos Position // Current position in the TOML file. 20 tomlNext bool 21 22 ordered []Key // List of keys in the order that they appear in the TOML data. 23 24 keyInfo map[string]keyInfo // Map keyname → info about the TOML key. 25 mapping map[string]any // Map keyname → key value. 26 implicits map[string]struct{} // Record implicit keys (e.g. "key.group.names"). 27 } 28 29 type keyInfo struct { 30 pos Position 31 tomlType tomlType 32 } 33 34 func parse(data string) (p *parser, err error) { 35 _, tomlNext := os.LookupEnv("BURNTSUSHI_TOML_110") 36 37 defer func() { 38 if r := recover(); r != nil { 39 if pErr, ok := r.(ParseError); ok { 40 pErr.input = data 41 err = pErr 42 return 43 } 44 panic(r) 45 } 46 }() 47 48 // Read over BOM; do this here as the lexer calls utf8.DecodeRuneInString() 49 // which mangles stuff. UTF-16 BOM isn't strictly valid, but some tools add 50 // it anyway. 51 if strings.HasPrefix(data, "\xff\xfe") || strings.HasPrefix(data, "\xfe\xff") { // UTF-16 52 data = data[2:] 53 //lint:ignore S1017 https://github.com/dominikh/go-tools/issues/1447 54 } else if strings.HasPrefix(data, "\xef\xbb\xbf") { // UTF-8 55 data = data[3:] 56 } 57 58 // Examine first few bytes for NULL bytes; this probably means it's a UTF-16 59 // file (second byte in surrogate pair being NULL). Again, do this here to 60 // avoid having to deal with UTF-8/16 stuff in the lexer. 61 ex := 6 62 if len(data) < 6 { 63 ex = len(data) 64 } 65 if i := strings.IndexRune(data[:ex], 0); i > -1 { 66 return nil, ParseError{ 67 Message: "files cannot contain NULL bytes; probably using UTF-16; TOML files must be UTF-8", 68 Position: Position{Line: 1, Start: i, Len: 1}, 69 Line: 1, 70 input: data, 71 } 72 } 73 74 p = &parser{ 75 keyInfo: make(map[string]keyInfo), 76 mapping: make(map[string]any), 77 lx: lex(data, tomlNext), 78 ordered: make([]Key, 0), 79 implicits: make(map[string]struct{}), 80 tomlNext: tomlNext, 81 } 82 for { 83 item := p.next() 84 if item.typ == itemEOF { 85 break 86 } 87 p.topLevel(item) 88 } 89 90 return p, nil 91 } 92 93 func (p *parser) panicErr(it item, err error) { 94 panic(ParseError{ 95 err: err, 96 Position: it.pos, 97 Line: it.pos.Len, 98 LastKey: p.current(), 99 }) 100 } 101 102 func (p *parser) panicItemf(it item, format string, v ...any) { 103 panic(ParseError{ 104 Message: fmt.Sprintf(format, v...), 105 Position: it.pos, 106 Line: it.pos.Len, 107 LastKey: p.current(), 108 }) 109 } 110 111 func (p *parser) panicf(format string, v ...any) { 112 panic(ParseError{ 113 Message: fmt.Sprintf(format, v...), 114 Position: p.pos, 115 Line: p.pos.Line, 116 LastKey: p.current(), 117 }) 118 } 119 120 func (p *parser) next() item { 121 it := p.lx.nextItem() 122 //fmt.Printf("ITEM %-18s line %-3d │ %q\n", it.typ, it.pos.Line, it.val) 123 if it.typ == itemError { 124 if it.err != nil { 125 panic(ParseError{ 126 Position: it.pos, 127 Line: it.pos.Line, 128 LastKey: p.current(), 129 err: it.err, 130 }) 131 } 132 133 p.panicItemf(it, "%s", it.val) 134 } 135 return it 136 } 137 138 func (p *parser) nextPos() item { 139 it := p.next() 140 p.pos = it.pos 141 return it 142 } 143 144 func (p *parser) bug(format string, v ...any) { 145 panic(fmt.Sprintf("BUG: "+format+"\n\n", v...)) 146 } 147 148 func (p *parser) expect(typ itemType) item { 149 it := p.next() 150 p.assertEqual(typ, it.typ) 151 return it 152 } 153 154 func (p *parser) assertEqual(expected, got itemType) { 155 if expected != got { 156 p.bug("Expected '%s' but got '%s'.", expected, got) 157 } 158 } 159 160 func (p *parser) topLevel(item item) { 161 switch item.typ { 162 case itemCommentStart: // # .. 163 p.expect(itemText) 164 case itemTableStart: // [ .. ] 165 name := p.nextPos() 166 167 var key Key 168 for ; name.typ != itemTableEnd && name.typ != itemEOF; name = p.next() { 169 key = append(key, p.keyString(name)) 170 } 171 p.assertEqual(itemTableEnd, name.typ) 172 173 p.addContext(key, false) 174 p.setType("", tomlHash, item.pos) 175 p.ordered = append(p.ordered, key) 176 case itemArrayTableStart: // [[ .. ]] 177 name := p.nextPos() 178 179 var key Key 180 for ; name.typ != itemArrayTableEnd && name.typ != itemEOF; name = p.next() { 181 key = append(key, p.keyString(name)) 182 } 183 p.assertEqual(itemArrayTableEnd, name.typ) 184 185 p.addContext(key, true) 186 p.setType("", tomlArrayHash, item.pos) 187 p.ordered = append(p.ordered, key) 188 case itemKeyStart: // key = .. 189 outerContext := p.context 190 /// Read all the key parts (e.g. 'a' and 'b' in 'a.b') 191 k := p.nextPos() 192 var key Key 193 for ; k.typ != itemKeyEnd && k.typ != itemEOF; k = p.next() { 194 key = append(key, p.keyString(k)) 195 } 196 p.assertEqual(itemKeyEnd, k.typ) 197 198 /// The current key is the last part. 199 p.currentKey = key.last() 200 201 /// All the other parts (if any) are the context; need to set each part 202 /// as implicit. 203 context := key.parent() 204 for i := range context { 205 p.addImplicitContext(append(p.context, context[i:i+1]...)) 206 } 207 p.ordered = append(p.ordered, p.context.add(p.currentKey)) 208 209 /// Set value. 210 vItem := p.next() 211 val, typ := p.value(vItem, false) 212 p.setValue(p.currentKey, val) 213 p.setType(p.currentKey, typ, vItem.pos) 214 215 /// Remove the context we added (preserving any context from [tbl] lines). 216 p.context = outerContext 217 p.currentKey = "" 218 default: 219 p.bug("Unexpected type at top level: %s", item.typ) 220 } 221 } 222 223 // Gets a string for a key (or part of a key in a table name). 224 func (p *parser) keyString(it item) string { 225 switch it.typ { 226 case itemText: 227 return it.val 228 case itemString, itemStringEsc, itemMultilineString, 229 itemRawString, itemRawMultilineString: 230 s, _ := p.value(it, false) 231 return s.(string) 232 default: 233 p.bug("Unexpected key type: %s", it.typ) 234 } 235 panic("unreachable") 236 } 237 238 var datetimeRepl = strings.NewReplacer( 239 "z", "Z", 240 "t", "T", 241 " ", "T") 242 243 // value translates an expected value from the lexer into a Go value wrapped 244 // as an empty interface. 245 func (p *parser) value(it item, parentIsArray bool) (any, tomlType) { 246 switch it.typ { 247 case itemString: 248 return it.val, p.typeOfPrimitive(it) 249 case itemStringEsc: 250 return p.replaceEscapes(it, it.val), p.typeOfPrimitive(it) 251 case itemMultilineString: 252 return p.replaceEscapes(it, p.stripEscapedNewlines(stripFirstNewline(it.val))), p.typeOfPrimitive(it) 253 case itemRawString: 254 return it.val, p.typeOfPrimitive(it) 255 case itemRawMultilineString: 256 return stripFirstNewline(it.val), p.typeOfPrimitive(it) 257 case itemInteger: 258 return p.valueInteger(it) 259 case itemFloat: 260 return p.valueFloat(it) 261 case itemBool: 262 switch it.val { 263 case "true": 264 return true, p.typeOfPrimitive(it) 265 case "false": 266 return false, p.typeOfPrimitive(it) 267 default: 268 p.bug("Expected boolean value, but got '%s'.", it.val) 269 } 270 case itemDatetime: 271 return p.valueDatetime(it) 272 case itemArray: 273 return p.valueArray(it) 274 case itemInlineTableStart: 275 return p.valueInlineTable(it, parentIsArray) 276 default: 277 p.bug("Unexpected value type: %s", it.typ) 278 } 279 panic("unreachable") 280 } 281 282 func (p *parser) valueInteger(it item) (any, tomlType) { 283 if !numUnderscoresOK(it.val) { 284 p.panicItemf(it, "Invalid integer %q: underscores must be surrounded by digits", it.val) 285 } 286 if numHasLeadingZero(it.val) { 287 p.panicItemf(it, "Invalid integer %q: cannot have leading zeroes", it.val) 288 } 289 290 num, err := strconv.ParseInt(it.val, 0, 64) 291 if err != nil { 292 // Distinguish integer values. Normally, it'd be a bug if the lexer 293 // provides an invalid integer, but it's possible that the number is 294 // out of range of valid values (which the lexer cannot determine). 295 // So mark the former as a bug but the latter as a legitimate user 296 // error. 297 if e, ok := err.(*strconv.NumError); ok && e.Err == strconv.ErrRange { 298 p.panicErr(it, errParseRange{i: it.val, size: "int64"}) 299 } else { 300 p.bug("Expected integer value, but got '%s'.", it.val) 301 } 302 } 303 return num, p.typeOfPrimitive(it) 304 } 305 306 func (p *parser) valueFloat(it item) (any, tomlType) { 307 parts := strings.FieldsFunc(it.val, func(r rune) bool { 308 switch r { 309 case '.', 'e', 'E': 310 return true 311 } 312 return false 313 }) 314 for _, part := range parts { 315 if !numUnderscoresOK(part) { 316 p.panicItemf(it, "Invalid float %q: underscores must be surrounded by digits", it.val) 317 } 318 } 319 if len(parts) > 0 && numHasLeadingZero(parts[0]) { 320 p.panicItemf(it, "Invalid float %q: cannot have leading zeroes", it.val) 321 } 322 if !numPeriodsOK(it.val) { 323 // As a special case, numbers like '123.' or '1.e2', 324 // which are valid as far as Go/strconv are concerned, 325 // must be rejected because TOML says that a fractional 326 // part consists of '.' followed by 1+ digits. 327 p.panicItemf(it, "Invalid float %q: '.' must be followed by one or more digits", it.val) 328 } 329 val := strings.Replace(it.val, "_", "", -1) 330 signbit := false 331 if val == "+nan" || val == "-nan" { 332 signbit = val == "-nan" 333 val = "nan" 334 } 335 num, err := strconv.ParseFloat(val, 64) 336 if err != nil { 337 if e, ok := err.(*strconv.NumError); ok && e.Err == strconv.ErrRange { 338 p.panicErr(it, errParseRange{i: it.val, size: "float64"}) 339 } else { 340 p.panicItemf(it, "Invalid float value: %q", it.val) 341 } 342 } 343 if signbit { 344 num = math.Copysign(num, -1) 345 } 346 return num, p.typeOfPrimitive(it) 347 } 348 349 var dtTypes = []struct { 350 fmt string 351 zone *time.Location 352 next bool 353 }{ 354 {time.RFC3339Nano, time.Local, false}, 355 {"2006-01-02T15:04:05.999999999", internal.LocalDatetime, false}, 356 {"2006-01-02", internal.LocalDate, false}, 357 {"15:04:05.999999999", internal.LocalTime, false}, 358 359 // tomlNext 360 {"2006-01-02T15:04Z07:00", time.Local, true}, 361 {"2006-01-02T15:04", internal.LocalDatetime, true}, 362 {"15:04", internal.LocalTime, true}, 363 } 364 365 func (p *parser) valueDatetime(it item) (any, tomlType) { 366 it.val = datetimeRepl.Replace(it.val) 367 var ( 368 t time.Time 369 ok bool 370 err error 371 ) 372 for _, dt := range dtTypes { 373 if dt.next && !p.tomlNext { 374 continue 375 } 376 t, err = time.ParseInLocation(dt.fmt, it.val, dt.zone) 377 if err == nil { 378 if missingLeadingZero(it.val, dt.fmt) { 379 p.panicErr(it, errParseDate{it.val}) 380 } 381 ok = true 382 break 383 } 384 } 385 if !ok { 386 p.panicErr(it, errParseDate{it.val}) 387 } 388 return t, p.typeOfPrimitive(it) 389 } 390 391 // Go's time.Parse() will accept numbers without a leading zero; there isn't any 392 // way to require it. https://github.com/golang/go/issues/29911 393 // 394 // Depend on the fact that the separators (- and :) should always be at the same 395 // location. 396 func missingLeadingZero(d, l string) bool { 397 for i, c := range []byte(l) { 398 if c == '.' || c == 'Z' { 399 return false 400 } 401 if (c < '0' || c > '9') && d[i] != c { 402 return true 403 } 404 } 405 return false 406 } 407 408 func (p *parser) valueArray(it item) (any, tomlType) { 409 p.setType(p.currentKey, tomlArray, it.pos) 410 411 var ( 412 // Initialize to a non-nil slice to make it consistent with how S = [] 413 // decodes into a non-nil slice inside something like struct { S 414 // []string }. See #338 415 array = make([]any, 0, 2) 416 ) 417 for it = p.next(); it.typ != itemArrayEnd; it = p.next() { 418 if it.typ == itemCommentStart { 419 p.expect(itemText) 420 continue 421 } 422 423 val, typ := p.value(it, true) 424 array = append(array, val) 425 426 // XXX: type isn't used here, we need it to record the accurate type 427 // information. 428 // 429 // Not entirely sure how to best store this; could use "key[0]", 430 // "key[1]" notation, or maybe store it on the Array type? 431 _ = typ 432 } 433 return array, tomlArray 434 } 435 436 func (p *parser) valueInlineTable(it item, parentIsArray bool) (any, tomlType) { 437 var ( 438 topHash = make(map[string]any) 439 outerContext = p.context 440 outerKey = p.currentKey 441 ) 442 443 p.context = append(p.context, p.currentKey) 444 prevContext := p.context 445 p.currentKey = "" 446 447 p.addImplicit(p.context) 448 p.addContext(p.context, parentIsArray) 449 450 /// Loop over all table key/value pairs. 451 for it := p.next(); it.typ != itemInlineTableEnd; it = p.next() { 452 if it.typ == itemCommentStart { 453 p.expect(itemText) 454 continue 455 } 456 457 /// Read all key parts. 458 k := p.nextPos() 459 var key Key 460 for ; k.typ != itemKeyEnd && k.typ != itemEOF; k = p.next() { 461 key = append(key, p.keyString(k)) 462 } 463 p.assertEqual(itemKeyEnd, k.typ) 464 465 /// The current key is the last part. 466 p.currentKey = key.last() 467 468 /// All the other parts (if any) are the context; need to set each part 469 /// as implicit. 470 context := key.parent() 471 for i := range context { 472 p.addImplicitContext(append(p.context, context[i:i+1]...)) 473 } 474 p.ordered = append(p.ordered, p.context.add(p.currentKey)) 475 476 /// Set the value. 477 val, typ := p.value(p.next(), false) 478 p.setValue(p.currentKey, val) 479 p.setType(p.currentKey, typ, it.pos) 480 481 hash := topHash 482 for _, c := range context { 483 h, ok := hash[c] 484 if !ok { 485 h = make(map[string]any) 486 hash[c] = h 487 } 488 hash, ok = h.(map[string]any) 489 if !ok { 490 p.panicf("%q is not a table", p.context) 491 } 492 } 493 hash[p.currentKey] = val 494 495 /// Restore context. 496 p.context = prevContext 497 } 498 p.context = outerContext 499 p.currentKey = outerKey 500 return topHash, tomlHash 501 } 502 503 // numHasLeadingZero checks if this number has leading zeroes, allowing for '0', 504 // +/- signs, and base prefixes. 505 func numHasLeadingZero(s string) bool { 506 if len(s) > 1 && s[0] == '0' && !(s[1] == 'b' || s[1] == 'o' || s[1] == 'x') { // Allow 0b, 0o, 0x 507 return true 508 } 509 if len(s) > 2 && (s[0] == '-' || s[0] == '+') && s[1] == '0' { 510 return true 511 } 512 return false 513 } 514 515 // numUnderscoresOK checks whether each underscore in s is surrounded by 516 // characters that are not underscores. 517 func numUnderscoresOK(s string) bool { 518 switch s { 519 case "nan", "+nan", "-nan", "inf", "-inf", "+inf": 520 return true 521 } 522 accept := false 523 for _, r := range s { 524 if r == '_' { 525 if !accept { 526 return false 527 } 528 } 529 530 // isHexis a superset of all the permissable characters surrounding an 531 // underscore. 532 accept = isHex(r) 533 } 534 return accept 535 } 536 537 // numPeriodsOK checks whether every period in s is followed by a digit. 538 func numPeriodsOK(s string) bool { 539 period := false 540 for _, r := range s { 541 if period && !isDigit(r) { 542 return false 543 } 544 period = r == '.' 545 } 546 return !period 547 } 548 549 // Set the current context of the parser, where the context is either a hash or 550 // an array of hashes, depending on the value of the `array` parameter. 551 // 552 // Establishing the context also makes sure that the key isn't a duplicate, and 553 // will create implicit hashes automatically. 554 func (p *parser) addContext(key Key, array bool) { 555 /// Always start at the top level and drill down for our context. 556 hashContext := p.mapping 557 keyContext := make(Key, 0, len(key)-1) 558 559 /// We only need implicit hashes for the parents. 560 for _, k := range key.parent() { 561 _, ok := hashContext[k] 562 keyContext = append(keyContext, k) 563 564 // No key? Make an implicit hash and move on. 565 if !ok { 566 p.addImplicit(keyContext) 567 hashContext[k] = make(map[string]any) 568 } 569 570 // If the hash context is actually an array of tables, then set 571 // the hash context to the last element in that array. 572 // 573 // Otherwise, it better be a table, since this MUST be a key group (by 574 // virtue of it not being the last element in a key). 575 switch t := hashContext[k].(type) { 576 case []map[string]any: 577 hashContext = t[len(t)-1] 578 case map[string]any: 579 hashContext = t 580 default: 581 p.panicf("Key '%s' was already created as a hash.", keyContext) 582 } 583 } 584 585 p.context = keyContext 586 if array { 587 // If this is the first element for this array, then allocate a new 588 // list of tables for it. 589 k := key.last() 590 if _, ok := hashContext[k]; !ok { 591 hashContext[k] = make([]map[string]any, 0, 4) 592 } 593 594 // Add a new table. But make sure the key hasn't already been used 595 // for something else. 596 if hash, ok := hashContext[k].([]map[string]any); ok { 597 hashContext[k] = append(hash, make(map[string]any)) 598 } else { 599 p.panicf("Key '%s' was already created and cannot be used as an array.", key) 600 } 601 } else { 602 p.setValue(key.last(), make(map[string]any)) 603 } 604 p.context = append(p.context, key.last()) 605 } 606 607 // setValue sets the given key to the given value in the current context. 608 // It will make sure that the key hasn't already been defined, account for 609 // implicit key groups. 610 func (p *parser) setValue(key string, value any) { 611 var ( 612 tmpHash any 613 ok bool 614 hash = p.mapping 615 keyContext = make(Key, 0, len(p.context)+1) 616 ) 617 for _, k := range p.context { 618 keyContext = append(keyContext, k) 619 if tmpHash, ok = hash[k]; !ok { 620 p.bug("Context for key '%s' has not been established.", keyContext) 621 } 622 switch t := tmpHash.(type) { 623 case []map[string]any: 624 // The context is a table of hashes. Pick the most recent table 625 // defined as the current hash. 626 hash = t[len(t)-1] 627 case map[string]any: 628 hash = t 629 default: 630 p.panicf("Key '%s' has already been defined.", keyContext) 631 } 632 } 633 keyContext = append(keyContext, key) 634 635 if _, ok := hash[key]; ok { 636 // Normally redefining keys isn't allowed, but the key could have been 637 // defined implicitly and it's allowed to be redefined concretely. (See 638 // the `valid/implicit-and-explicit-after.toml` in toml-test) 639 // 640 // But we have to make sure to stop marking it as an implicit. (So that 641 // another redefinition provokes an error.) 642 // 643 // Note that since it has already been defined (as a hash), we don't 644 // want to overwrite it. So our business is done. 645 if p.isArray(keyContext) { 646 p.removeImplicit(keyContext) 647 hash[key] = value 648 return 649 } 650 if p.isImplicit(keyContext) { 651 p.removeImplicit(keyContext) 652 return 653 } 654 // Otherwise, we have a concrete key trying to override a previous key, 655 // which is *always* wrong. 656 p.panicf("Key '%s' has already been defined.", keyContext) 657 } 658 659 hash[key] = value 660 } 661 662 // setType sets the type of a particular value at a given key. It should be 663 // called immediately AFTER setValue. 664 // 665 // Note that if `key` is empty, then the type given will be applied to the 666 // current context (which is either a table or an array of tables). 667 func (p *parser) setType(key string, typ tomlType, pos Position) { 668 keyContext := make(Key, 0, len(p.context)+1) 669 keyContext = append(keyContext, p.context...) 670 if len(key) > 0 { // allow type setting for hashes 671 keyContext = append(keyContext, key) 672 } 673 // Special case to make empty keys ("" = 1) work. 674 // Without it it will set "" rather than `""`. 675 // TODO: why is this needed? And why is this only needed here? 676 if len(keyContext) == 0 { 677 keyContext = Key{""} 678 } 679 p.keyInfo[keyContext.String()] = keyInfo{tomlType: typ, pos: pos} 680 } 681 682 // Implicit keys need to be created when tables are implied in "a.b.c.d = 1" and 683 // "[a.b.c]" (the "a", "b", and "c" hashes are never created explicitly). 684 func (p *parser) addImplicit(key Key) { p.implicits[key.String()] = struct{}{} } 685 func (p *parser) removeImplicit(key Key) { delete(p.implicits, key.String()) } 686 func (p *parser) isImplicit(key Key) bool { _, ok := p.implicits[key.String()]; return ok } 687 func (p *parser) isArray(key Key) bool { return p.keyInfo[key.String()].tomlType == tomlArray } 688 func (p *parser) addImplicitContext(key Key) { p.addImplicit(key); p.addContext(key, false) } 689 690 // current returns the full key name of the current context. 691 func (p *parser) current() string { 692 if len(p.currentKey) == 0 { 693 return p.context.String() 694 } 695 if len(p.context) == 0 { 696 return p.currentKey 697 } 698 return fmt.Sprintf("%s.%s", p.context, p.currentKey) 699 } 700 701 func stripFirstNewline(s string) string { 702 if len(s) > 0 && s[0] == '\n' { 703 return s[1:] 704 } 705 if len(s) > 1 && s[0] == '\r' && s[1] == '\n' { 706 return s[2:] 707 } 708 return s 709 } 710 711 // stripEscapedNewlines removes whitespace after line-ending backslashes in 712 // multiline strings. 713 // 714 // A line-ending backslash is an unescaped \ followed only by whitespace until 715 // the next newline. After a line-ending backslash, all whitespace is removed 716 // until the next non-whitespace character. 717 func (p *parser) stripEscapedNewlines(s string) string { 718 var ( 719 b strings.Builder 720 i int 721 ) 722 b.Grow(len(s)) 723 for { 724 ix := strings.Index(s[i:], `\`) 725 if ix < 0 { 726 b.WriteString(s) 727 return b.String() 728 } 729 i += ix 730 731 if len(s) > i+1 && s[i+1] == '\\' { 732 // Escaped backslash. 733 i += 2 734 continue 735 } 736 // Scan until the next non-whitespace. 737 j := i + 1 738 whitespaceLoop: 739 for ; j < len(s); j++ { 740 switch s[j] { 741 case ' ', '\t', '\r', '\n': 742 default: 743 break whitespaceLoop 744 } 745 } 746 if j == i+1 { 747 // Not a whitespace escape. 748 i++ 749 continue 750 } 751 if !strings.Contains(s[i:j], "\n") { 752 // This is not a line-ending backslash. (It's a bad escape sequence, 753 // but we can let replaceEscapes catch it.) 754 i++ 755 continue 756 } 757 b.WriteString(s[:i]) 758 s = s[j:] 759 i = 0 760 } 761 } 762 763 func (p *parser) replaceEscapes(it item, str string) string { 764 var ( 765 b strings.Builder 766 skip = 0 767 ) 768 b.Grow(len(str)) 769 for i, c := range str { 770 if skip > 0 { 771 skip-- 772 continue 773 } 774 if c != '\\' { 775 b.WriteRune(c) 776 continue 777 } 778 779 if i >= len(str) { 780 p.bug("Escape sequence at end of string.") 781 return "" 782 } 783 switch str[i+1] { 784 default: 785 p.bug("Expected valid escape code after \\, but got %q.", str[i+1]) 786 case ' ', '\t': 787 p.panicItemf(it, "invalid escape: '\\%c'", str[i+1]) 788 case 'b': 789 b.WriteByte(0x08) 790 skip = 1 791 case 't': 792 b.WriteByte(0x09) 793 skip = 1 794 case 'n': 795 b.WriteByte(0x0a) 796 skip = 1 797 case 'f': 798 b.WriteByte(0x0c) 799 skip = 1 800 case 'r': 801 b.WriteByte(0x0d) 802 skip = 1 803 case 'e': 804 if p.tomlNext { 805 b.WriteByte(0x1b) 806 skip = 1 807 } 808 case '"': 809 b.WriteByte(0x22) 810 skip = 1 811 case '\\': 812 b.WriteByte(0x5c) 813 skip = 1 814 // The lexer guarantees the correct number of characters are present; 815 // don't need to check here. 816 case 'x': 817 if p.tomlNext { 818 escaped := p.asciiEscapeToUnicode(it, str[i+2:i+4]) 819 b.WriteRune(escaped) 820 skip = 3 821 } 822 case 'u': 823 escaped := p.asciiEscapeToUnicode(it, str[i+2:i+6]) 824 b.WriteRune(escaped) 825 skip = 5 826 case 'U': 827 escaped := p.asciiEscapeToUnicode(it, str[i+2:i+10]) 828 b.WriteRune(escaped) 829 skip = 9 830 } 831 } 832 return b.String() 833 } 834 835 func (p *parser) asciiEscapeToUnicode(it item, s string) rune { 836 hex, err := strconv.ParseUint(strings.ToLower(s), 16, 32) 837 if err != nil { 838 p.bug("Could not parse '%s' as a hexadecimal number, but the lexer claims it's OK: %s", s, err) 839 } 840 if !utf8.ValidRune(rune(hex)) { 841 p.panicItemf(it, "Escaped character '\\u%s' is not valid UTF-8.", s) 842 } 843 return rune(hex) 844 }