123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431 |
- // TOML Parser.
-
- package toml
-
- import (
- "errors"
- "fmt"
- "math"
- "reflect"
- "regexp"
- "strconv"
- "strings"
- "time"
- )
-
- type tomlParser struct {
- flowIdx int
- flow []token
- tree *Tree
- currentTable []string
- seenTableKeys []string
- }
-
- type tomlParserStateFn func() tomlParserStateFn
-
- // Formats and panics an error message based on a token
- func (p *tomlParser) raiseError(tok *token, msg string, args ...interface{}) {
- panic(tok.Position.String() + ": " + fmt.Sprintf(msg, args...))
- }
-
- func (p *tomlParser) run() {
- for state := p.parseStart; state != nil; {
- state = state()
- }
- }
-
- func (p *tomlParser) peek() *token {
- if p.flowIdx >= len(p.flow) {
- return nil
- }
- return &p.flow[p.flowIdx]
- }
-
- func (p *tomlParser) assume(typ tokenType) {
- tok := p.getToken()
- if tok == nil {
- p.raiseError(tok, "was expecting token %s, but token stream is empty", tok)
- }
- if tok.typ != typ {
- p.raiseError(tok, "was expecting token %s, but got %s instead", typ, tok)
- }
- }
-
- func (p *tomlParser) getToken() *token {
- tok := p.peek()
- if tok == nil {
- return nil
- }
- p.flowIdx++
- return tok
- }
-
- func (p *tomlParser) parseStart() tomlParserStateFn {
- tok := p.peek()
-
- // end of stream, parsing is finished
- if tok == nil {
- return nil
- }
-
- switch tok.typ {
- case tokenDoubleLeftBracket:
- return p.parseGroupArray
- case tokenLeftBracket:
- return p.parseGroup
- case tokenKey:
- return p.parseAssign
- case tokenEOF:
- return nil
- default:
- p.raiseError(tok, "unexpected token")
- }
- return nil
- }
-
- func (p *tomlParser) parseGroupArray() tomlParserStateFn {
- startToken := p.getToken() // discard the [[
- key := p.getToken()
- if key.typ != tokenKeyGroupArray {
- p.raiseError(key, "unexpected token %s, was expecting a table array key", key)
- }
-
- // get or create table array element at the indicated part in the path
- keys, err := parseKey(key.val)
- if err != nil {
- p.raiseError(key, "invalid table array key: %s", err)
- }
- p.tree.createSubTree(keys[:len(keys)-1], startToken.Position) // create parent entries
- destTree := p.tree.GetPath(keys)
- var array []*Tree
- if destTree == nil {
- array = make([]*Tree, 0)
- } else if target, ok := destTree.([]*Tree); ok && target != nil {
- array = destTree.([]*Tree)
- } else {
- p.raiseError(key, "key %s is already assigned and not of type table array", key)
- }
- p.currentTable = keys
-
- // add a new tree to the end of the table array
- newTree := newTree()
- newTree.position = startToken.Position
- array = append(array, newTree)
- p.tree.SetPath(p.currentTable, array)
-
- // remove all keys that were children of this table array
- prefix := key.val + "."
- found := false
- for ii := 0; ii < len(p.seenTableKeys); {
- tableKey := p.seenTableKeys[ii]
- if strings.HasPrefix(tableKey, prefix) {
- p.seenTableKeys = append(p.seenTableKeys[:ii], p.seenTableKeys[ii+1:]...)
- } else {
- found = (tableKey == key.val)
- ii++
- }
- }
-
- // keep this key name from use by other kinds of assignments
- if !found {
- p.seenTableKeys = append(p.seenTableKeys, key.val)
- }
-
- // move to next parser state
- p.assume(tokenDoubleRightBracket)
- return p.parseStart
- }
-
- func (p *tomlParser) parseGroup() tomlParserStateFn {
- startToken := p.getToken() // discard the [
- key := p.getToken()
- if key.typ != tokenKeyGroup {
- p.raiseError(key, "unexpected token %s, was expecting a table key", key)
- }
- for _, item := range p.seenTableKeys {
- if item == key.val {
- p.raiseError(key, "duplicated tables")
- }
- }
-
- p.seenTableKeys = append(p.seenTableKeys, key.val)
- keys, err := parseKey(key.val)
- if err != nil {
- p.raiseError(key, "invalid table array key: %s", err)
- }
- if err := p.tree.createSubTree(keys, startToken.Position); err != nil {
- p.raiseError(key, "%s", err)
- }
- p.assume(tokenRightBracket)
- p.currentTable = keys
- return p.parseStart
- }
-
- func (p *tomlParser) parseAssign() tomlParserStateFn {
- key := p.getToken()
- p.assume(tokenEqual)
-
- value := p.parseRvalue()
- var tableKey []string
- if len(p.currentTable) > 0 {
- tableKey = p.currentTable
- } else {
- tableKey = []string{}
- }
-
- // find the table to assign, looking out for arrays of tables
- var targetNode *Tree
- switch node := p.tree.GetPath(tableKey).(type) {
- case []*Tree:
- targetNode = node[len(node)-1]
- case *Tree:
- targetNode = node
- default:
- p.raiseError(key, "Unknown table type for path: %s",
- strings.Join(tableKey, "."))
- }
-
- // assign value to the found table
- keyVals := []string{key.val}
- if len(keyVals) != 1 {
- p.raiseError(key, "Invalid key")
- }
- keyVal := keyVals[0]
- localKey := []string{keyVal}
- finalKey := append(tableKey, keyVal)
- if targetNode.GetPath(localKey) != nil {
- p.raiseError(key, "The following key was defined twice: %s",
- strings.Join(finalKey, "."))
- }
- var toInsert interface{}
-
- switch value.(type) {
- case *Tree, []*Tree:
- toInsert = value
- default:
- toInsert = &tomlValue{value: value, position: key.Position}
- }
- targetNode.values[keyVal] = toInsert
- return p.parseStart
- }
-
- var numberUnderscoreInvalidRegexp *regexp.Regexp
- var hexNumberUnderscoreInvalidRegexp *regexp.Regexp
-
- func numberContainsInvalidUnderscore(value string) error {
- if numberUnderscoreInvalidRegexp.MatchString(value) {
- return errors.New("invalid use of _ in number")
- }
- return nil
- }
-
- func hexNumberContainsInvalidUnderscore(value string) error {
- if hexNumberUnderscoreInvalidRegexp.MatchString(value) {
- return errors.New("invalid use of _ in hex number")
- }
- return nil
- }
-
- func cleanupNumberToken(value string) string {
- cleanedVal := strings.Replace(value, "_", "", -1)
- return cleanedVal
- }
-
- func (p *tomlParser) parseRvalue() interface{} {
- tok := p.getToken()
- if tok == nil || tok.typ == tokenEOF {
- p.raiseError(tok, "expecting a value")
- }
-
- switch tok.typ {
- case tokenString:
- return tok.val
- case tokenTrue:
- return true
- case tokenFalse:
- return false
- case tokenInf:
- if tok.val[0] == '-' {
- return math.Inf(-1)
- }
- return math.Inf(1)
- case tokenNan:
- return math.NaN()
- case tokenInteger:
- cleanedVal := cleanupNumberToken(tok.val)
- var err error
- var val int64
- if len(cleanedVal) >= 3 && cleanedVal[0] == '0' {
- switch cleanedVal[1] {
- case 'x':
- err = hexNumberContainsInvalidUnderscore(tok.val)
- if err != nil {
- p.raiseError(tok, "%s", err)
- }
- val, err = strconv.ParseInt(cleanedVal[2:], 16, 64)
- case 'o':
- err = numberContainsInvalidUnderscore(tok.val)
- if err != nil {
- p.raiseError(tok, "%s", err)
- }
- val, err = strconv.ParseInt(cleanedVal[2:], 8, 64)
- case 'b':
- err = numberContainsInvalidUnderscore(tok.val)
- if err != nil {
- p.raiseError(tok, "%s", err)
- }
- val, err = strconv.ParseInt(cleanedVal[2:], 2, 64)
- default:
- panic("invalid base") // the lexer should catch this first
- }
- } else {
- err = numberContainsInvalidUnderscore(tok.val)
- if err != nil {
- p.raiseError(tok, "%s", err)
- }
- val, err = strconv.ParseInt(cleanedVal, 10, 64)
- }
- if err != nil {
- p.raiseError(tok, "%s", err)
- }
- return val
- case tokenFloat:
- err := numberContainsInvalidUnderscore(tok.val)
- if err != nil {
- p.raiseError(tok, "%s", err)
- }
- cleanedVal := cleanupNumberToken(tok.val)
- val, err := strconv.ParseFloat(cleanedVal, 64)
- if err != nil {
- p.raiseError(tok, "%s", err)
- }
- return val
- case tokenDate:
- val, err := time.ParseInLocation(time.RFC3339Nano, tok.val, time.UTC)
- if err != nil {
- p.raiseError(tok, "%s", err)
- }
- return val
- case tokenLeftBracket:
- return p.parseArray()
- case tokenLeftCurlyBrace:
- return p.parseInlineTable()
- case tokenEqual:
- p.raiseError(tok, "cannot have multiple equals for the same key")
- case tokenError:
- p.raiseError(tok, "%s", tok)
- }
-
- p.raiseError(tok, "never reached")
-
- return nil
- }
-
- func tokenIsComma(t *token) bool {
- return t != nil && t.typ == tokenComma
- }
-
- func (p *tomlParser) parseInlineTable() *Tree {
- tree := newTree()
- var previous *token
- Loop:
- for {
- follow := p.peek()
- if follow == nil || follow.typ == tokenEOF {
- p.raiseError(follow, "unterminated inline table")
- }
- switch follow.typ {
- case tokenRightCurlyBrace:
- p.getToken()
- break Loop
- case tokenKey:
- if !tokenIsComma(previous) && previous != nil {
- p.raiseError(follow, "comma expected between fields in inline table")
- }
- key := p.getToken()
- p.assume(tokenEqual)
- value := p.parseRvalue()
- tree.Set(key.val, value)
- case tokenComma:
- if previous == nil {
- p.raiseError(follow, "inline table cannot start with a comma")
- }
- if tokenIsComma(previous) {
- p.raiseError(follow, "need field between two commas in inline table")
- }
- p.getToken()
- default:
- p.raiseError(follow, "unexpected token type in inline table: %s", follow.String())
- }
- previous = follow
- }
- if tokenIsComma(previous) {
- p.raiseError(previous, "trailing comma at the end of inline table")
- }
- return tree
- }
-
- func (p *tomlParser) parseArray() interface{} {
- var array []interface{}
- arrayType := reflect.TypeOf(nil)
- for {
- follow := p.peek()
- if follow == nil || follow.typ == tokenEOF {
- p.raiseError(follow, "unterminated array")
- }
- if follow.typ == tokenRightBracket {
- p.getToken()
- break
- }
- val := p.parseRvalue()
- if arrayType == nil {
- arrayType = reflect.TypeOf(val)
- }
- if reflect.TypeOf(val) != arrayType {
- p.raiseError(follow, "mixed types in array")
- }
- array = append(array, val)
- follow = p.peek()
- if follow == nil || follow.typ == tokenEOF {
- p.raiseError(follow, "unterminated array")
- }
- if follow.typ != tokenRightBracket && follow.typ != tokenComma {
- p.raiseError(follow, "missing comma")
- }
- if follow.typ == tokenComma {
- p.getToken()
- }
- }
- // An array of Trees is actually an array of inline
- // tables, which is a shorthand for a table array. If the
- // array was not converted from []interface{} to []*Tree,
- // the two notations would not be equivalent.
- if arrayType == reflect.TypeOf(newTree()) {
- tomlArray := make([]*Tree, len(array))
- for i, v := range array {
- tomlArray[i] = v.(*Tree)
- }
- return tomlArray
- }
- return array
- }
-
- func parseToml(flow []token) *Tree {
- result := newTree()
- result.position = Position{1, 1}
- parser := &tomlParser{
- flowIdx: 0,
- flow: flow,
- tree: result,
- currentTable: make([]string, 0),
- seenTableKeys: make([]string, 0),
- }
- parser.run()
- return result
- }
-
- func init() {
- numberUnderscoreInvalidRegexp = regexp.MustCompile(`([^\d]_|_[^\d])|_$|^_`)
- hexNumberUnderscoreInvalidRegexp = regexp.MustCompile(`(^0x_)|([^\da-f]_|_[^\da-f])|_$|^_`)
- }
|