另客网go项目公用的代码库

text_parser.go 22KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881
  1. // Go support for Protocol Buffers - Google's data interchange format
  2. //
  3. // Copyright 2010 The Go Authors. All rights reserved.
  4. // https://github.com/golang/protobuf
  5. //
  6. // Redistribution and use in source and binary forms, with or without
  7. // modification, are permitted provided that the following conditions are
  8. // met:
  9. //
  10. // * Redistributions of source code must retain the above copyright
  11. // notice, this list of conditions and the following disclaimer.
  12. // * Redistributions in binary form must reproduce the above
  13. // copyright notice, this list of conditions and the following disclaimer
  14. // in the documentation and/or other materials provided with the
  15. // distribution.
  16. // * Neither the name of Google Inc. nor the names of its
  17. // contributors may be used to endorse or promote products derived from
  18. // this software without specific prior written permission.
  19. //
  20. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. package proto
  32. // Functions for parsing the Text protocol buffer format.
  33. // TODO: message sets.
  34. import (
  35. "encoding"
  36. "errors"
  37. "fmt"
  38. "reflect"
  39. "strconv"
  40. "strings"
  41. "unicode/utf8"
  42. )
  43. // Error string emitted when deserializing Any and fields are already set
  44. const anyRepeatedlyUnpacked = "Any message unpacked multiple times, or %q already set"
  45. type ParseError struct {
  46. Message string
  47. Line int // 1-based line number
  48. Offset int // 0-based byte offset from start of input
  49. }
  50. func (p *ParseError) Error() string {
  51. if p.Line == 1 {
  52. // show offset only for first line
  53. return fmt.Sprintf("line 1.%d: %v", p.Offset, p.Message)
  54. }
  55. return fmt.Sprintf("line %d: %v", p.Line, p.Message)
  56. }
  57. type token struct {
  58. value string
  59. err *ParseError
  60. line int // line number
  61. offset int // byte number from start of input, not start of line
  62. unquoted string // the unquoted version of value, if it was a quoted string
  63. }
  64. func (t *token) String() string {
  65. if t.err == nil {
  66. return fmt.Sprintf("%q (line=%d, offset=%d)", t.value, t.line, t.offset)
  67. }
  68. return fmt.Sprintf("parse error: %v", t.err)
  69. }
  70. type textParser struct {
  71. s string // remaining input
  72. done bool // whether the parsing is finished (success or error)
  73. backed bool // whether back() was called
  74. offset, line int
  75. cur token
  76. }
  77. func newTextParser(s string) *textParser {
  78. p := new(textParser)
  79. p.s = s
  80. p.line = 1
  81. p.cur.line = 1
  82. return p
  83. }
  84. func (p *textParser) errorf(format string, a ...interface{}) *ParseError {
  85. pe := &ParseError{fmt.Sprintf(format, a...), p.cur.line, p.cur.offset}
  86. p.cur.err = pe
  87. p.done = true
  88. return pe
  89. }
  90. // Numbers and identifiers are matched by [-+._A-Za-z0-9]
  91. func isIdentOrNumberChar(c byte) bool {
  92. switch {
  93. case 'A' <= c && c <= 'Z', 'a' <= c && c <= 'z':
  94. return true
  95. case '0' <= c && c <= '9':
  96. return true
  97. }
  98. switch c {
  99. case '-', '+', '.', '_':
  100. return true
  101. }
  102. return false
  103. }
  104. func isWhitespace(c byte) bool {
  105. switch c {
  106. case ' ', '\t', '\n', '\r':
  107. return true
  108. }
  109. return false
  110. }
  111. func isQuote(c byte) bool {
  112. switch c {
  113. case '"', '\'':
  114. return true
  115. }
  116. return false
  117. }
  118. func (p *textParser) skipWhitespace() {
  119. i := 0
  120. for i < len(p.s) && (isWhitespace(p.s[i]) || p.s[i] == '#') {
  121. if p.s[i] == '#' {
  122. // comment; skip to end of line or input
  123. for i < len(p.s) && p.s[i] != '\n' {
  124. i++
  125. }
  126. if i == len(p.s) {
  127. break
  128. }
  129. }
  130. if p.s[i] == '\n' {
  131. p.line++
  132. }
  133. i++
  134. }
  135. p.offset += i
  136. p.s = p.s[i:len(p.s)]
  137. if len(p.s) == 0 {
  138. p.done = true
  139. }
  140. }
  141. func (p *textParser) advance() {
  142. // Skip whitespace
  143. p.skipWhitespace()
  144. if p.done {
  145. return
  146. }
  147. // Start of non-whitespace
  148. p.cur.err = nil
  149. p.cur.offset, p.cur.line = p.offset, p.line
  150. p.cur.unquoted = ""
  151. switch p.s[0] {
  152. case '<', '>', '{', '}', ':', '[', ']', ';', ',', '/':
  153. // Single symbol
  154. p.cur.value, p.s = p.s[0:1], p.s[1:len(p.s)]
  155. case '"', '\'':
  156. // Quoted string
  157. i := 1
  158. for i < len(p.s) && p.s[i] != p.s[0] && p.s[i] != '\n' {
  159. if p.s[i] == '\\' && i+1 < len(p.s) {
  160. // skip escaped char
  161. i++
  162. }
  163. i++
  164. }
  165. if i >= len(p.s) || p.s[i] != p.s[0] {
  166. p.errorf("unmatched quote")
  167. return
  168. }
  169. unq, err := unquoteC(p.s[1:i], rune(p.s[0]))
  170. if err != nil {
  171. p.errorf("invalid quoted string %s: %v", p.s[0:i+1], err)
  172. return
  173. }
  174. p.cur.value, p.s = p.s[0:i+1], p.s[i+1:len(p.s)]
  175. p.cur.unquoted = unq
  176. default:
  177. i := 0
  178. for i < len(p.s) && isIdentOrNumberChar(p.s[i]) {
  179. i++
  180. }
  181. if i == 0 {
  182. p.errorf("unexpected byte %#x", p.s[0])
  183. return
  184. }
  185. p.cur.value, p.s = p.s[0:i], p.s[i:len(p.s)]
  186. }
  187. p.offset += len(p.cur.value)
  188. }
  189. var (
  190. errBadUTF8 = errors.New("proto: bad UTF-8")
  191. )
  192. func unquoteC(s string, quote rune) (string, error) {
  193. // This is based on C++'s tokenizer.cc.
  194. // Despite its name, this is *not* parsing C syntax.
  195. // For instance, "\0" is an invalid quoted string.
  196. // Avoid allocation in trivial cases.
  197. simple := true
  198. for _, r := range s {
  199. if r == '\\' || r == quote {
  200. simple = false
  201. break
  202. }
  203. }
  204. if simple {
  205. return s, nil
  206. }
  207. buf := make([]byte, 0, 3*len(s)/2)
  208. for len(s) > 0 {
  209. r, n := utf8.DecodeRuneInString(s)
  210. if r == utf8.RuneError && n == 1 {
  211. return "", errBadUTF8
  212. }
  213. s = s[n:]
  214. if r != '\\' {
  215. if r < utf8.RuneSelf {
  216. buf = append(buf, byte(r))
  217. } else {
  218. buf = append(buf, string(r)...)
  219. }
  220. continue
  221. }
  222. ch, tail, err := unescape(s)
  223. if err != nil {
  224. return "", err
  225. }
  226. buf = append(buf, ch...)
  227. s = tail
  228. }
  229. return string(buf), nil
  230. }
  231. func unescape(s string) (ch string, tail string, err error) {
  232. r, n := utf8.DecodeRuneInString(s)
  233. if r == utf8.RuneError && n == 1 {
  234. return "", "", errBadUTF8
  235. }
  236. s = s[n:]
  237. switch r {
  238. case 'a':
  239. return "\a", s, nil
  240. case 'b':
  241. return "\b", s, nil
  242. case 'f':
  243. return "\f", s, nil
  244. case 'n':
  245. return "\n", s, nil
  246. case 'r':
  247. return "\r", s, nil
  248. case 't':
  249. return "\t", s, nil
  250. case 'v':
  251. return "\v", s, nil
  252. case '?':
  253. return "?", s, nil // trigraph workaround
  254. case '\'', '"', '\\':
  255. return string(r), s, nil
  256. case '0', '1', '2', '3', '4', '5', '6', '7':
  257. if len(s) < 2 {
  258. return "", "", fmt.Errorf(`\%c requires 2 following digits`, r)
  259. }
  260. ss := string(r) + s[:2]
  261. s = s[2:]
  262. i, err := strconv.ParseUint(ss, 8, 8)
  263. if err != nil {
  264. return "", "", fmt.Errorf(`\%s contains non-octal digits`, ss)
  265. }
  266. return string([]byte{byte(i)}), s, nil
  267. case 'x', 'X', 'u', 'U':
  268. var n int
  269. switch r {
  270. case 'x', 'X':
  271. n = 2
  272. case 'u':
  273. n = 4
  274. case 'U':
  275. n = 8
  276. }
  277. if len(s) < n {
  278. return "", "", fmt.Errorf(`\%c requires %d following digits`, r, n)
  279. }
  280. ss := s[:n]
  281. s = s[n:]
  282. i, err := strconv.ParseUint(ss, 16, 64)
  283. if err != nil {
  284. return "", "", fmt.Errorf(`\%c%s contains non-hexadecimal digits`, r, ss)
  285. }
  286. if r == 'x' || r == 'X' {
  287. return string([]byte{byte(i)}), s, nil
  288. }
  289. if i > utf8.MaxRune {
  290. return "", "", fmt.Errorf(`\%c%s is not a valid Unicode code point`, r, ss)
  291. }
  292. return string(i), s, nil
  293. }
  294. return "", "", fmt.Errorf(`unknown escape \%c`, r)
  295. }
  296. // Back off the parser by one token. Can only be done between calls to next().
  297. // It makes the next advance() a no-op.
  298. func (p *textParser) back() { p.backed = true }
  299. // Advances the parser and returns the new current token.
  300. func (p *textParser) next() *token {
  301. if p.backed || p.done {
  302. p.backed = false
  303. return &p.cur
  304. }
  305. p.advance()
  306. if p.done {
  307. p.cur.value = ""
  308. } else if len(p.cur.value) > 0 && isQuote(p.cur.value[0]) {
  309. // Look for multiple quoted strings separated by whitespace,
  310. // and concatenate them.
  311. cat := p.cur
  312. for {
  313. p.skipWhitespace()
  314. if p.done || !isQuote(p.s[0]) {
  315. break
  316. }
  317. p.advance()
  318. if p.cur.err != nil {
  319. return &p.cur
  320. }
  321. cat.value += " " + p.cur.value
  322. cat.unquoted += p.cur.unquoted
  323. }
  324. p.done = false // parser may have seen EOF, but we want to return cat
  325. p.cur = cat
  326. }
  327. return &p.cur
  328. }
  329. func (p *textParser) consumeToken(s string) error {
  330. tok := p.next()
  331. if tok.err != nil {
  332. return tok.err
  333. }
  334. if tok.value != s {
  335. p.back()
  336. return p.errorf("expected %q, found %q", s, tok.value)
  337. }
  338. return nil
  339. }
  340. // Return a RequiredNotSetError indicating which required field was not set.
  341. func (p *textParser) missingRequiredFieldError(sv reflect.Value) *RequiredNotSetError {
  342. st := sv.Type()
  343. sprops := GetProperties(st)
  344. for i := 0; i < st.NumField(); i++ {
  345. if !isNil(sv.Field(i)) {
  346. continue
  347. }
  348. props := sprops.Prop[i]
  349. if props.Required {
  350. return &RequiredNotSetError{fmt.Sprintf("%v.%v", st, props.OrigName)}
  351. }
  352. }
  353. return &RequiredNotSetError{fmt.Sprintf("%v.<unknown field name>", st)} // should not happen
  354. }
  355. // Returns the index in the struct for the named field, as well as the parsed tag properties.
  356. func structFieldByName(sprops *StructProperties, name string) (int, *Properties, bool) {
  357. i, ok := sprops.decoderOrigNames[name]
  358. if ok {
  359. return i, sprops.Prop[i], true
  360. }
  361. return -1, nil, false
  362. }
  363. // Consume a ':' from the input stream (if the next token is a colon),
  364. // returning an error if a colon is needed but not present.
  365. func (p *textParser) checkForColon(props *Properties, typ reflect.Type) *ParseError {
  366. tok := p.next()
  367. if tok.err != nil {
  368. return tok.err
  369. }
  370. if tok.value != ":" {
  371. // Colon is optional when the field is a group or message.
  372. needColon := true
  373. switch props.Wire {
  374. case "group":
  375. needColon = false
  376. case "bytes":
  377. // A "bytes" field is either a message, a string, or a repeated field;
  378. // those three become *T, *string and []T respectively, so we can check for
  379. // this field being a pointer to a non-string.
  380. if typ.Kind() == reflect.Ptr {
  381. // *T or *string
  382. if typ.Elem().Kind() == reflect.String {
  383. break
  384. }
  385. } else if typ.Kind() == reflect.Slice {
  386. // []T or []*T
  387. if typ.Elem().Kind() != reflect.Ptr {
  388. break
  389. }
  390. } else if typ.Kind() == reflect.String {
  391. // The proto3 exception is for a string field,
  392. // which requires a colon.
  393. break
  394. }
  395. needColon = false
  396. }
  397. if needColon {
  398. return p.errorf("expected ':', found %q", tok.value)
  399. }
  400. p.back()
  401. }
  402. return nil
  403. }
  404. func (p *textParser) readStruct(sv reflect.Value, terminator string) error {
  405. st := sv.Type()
  406. sprops := GetProperties(st)
  407. reqCount := sprops.reqCount
  408. var reqFieldErr error
  409. fieldSet := make(map[string]bool)
  410. // A struct is a sequence of "name: value", terminated by one of
  411. // '>' or '}', or the end of the input. A name may also be
  412. // "[extension]" or "[type/url]".
  413. //
  414. // The whole struct can also be an expanded Any message, like:
  415. // [type/url] < ... struct contents ... >
  416. for {
  417. tok := p.next()
  418. if tok.err != nil {
  419. return tok.err
  420. }
  421. if tok.value == terminator {
  422. break
  423. }
  424. if tok.value == "[" {
  425. // Looks like an extension or an Any.
  426. //
  427. // TODO: Check whether we need to handle
  428. // namespace rooted names (e.g. ".something.Foo").
  429. extName, err := p.consumeExtName()
  430. if err != nil {
  431. return err
  432. }
  433. if s := strings.LastIndex(extName, "/"); s >= 0 {
  434. // If it contains a slash, it's an Any type URL.
  435. messageName := extName[s+1:]
  436. mt := MessageType(messageName)
  437. if mt == nil {
  438. return p.errorf("unrecognized message %q in google.protobuf.Any", messageName)
  439. }
  440. tok = p.next()
  441. if tok.err != nil {
  442. return tok.err
  443. }
  444. // consume an optional colon
  445. if tok.value == ":" {
  446. tok = p.next()
  447. if tok.err != nil {
  448. return tok.err
  449. }
  450. }
  451. var terminator string
  452. switch tok.value {
  453. case "<":
  454. terminator = ">"
  455. case "{":
  456. terminator = "}"
  457. default:
  458. return p.errorf("expected '{' or '<', found %q", tok.value)
  459. }
  460. v := reflect.New(mt.Elem())
  461. if pe := p.readStruct(v.Elem(), terminator); pe != nil {
  462. return pe
  463. }
  464. b, err := Marshal(v.Interface().(Message))
  465. if err != nil {
  466. return p.errorf("failed to marshal message of type %q: %v", messageName, err)
  467. }
  468. if fieldSet["type_url"] {
  469. return p.errorf(anyRepeatedlyUnpacked, "type_url")
  470. }
  471. if fieldSet["value"] {
  472. return p.errorf(anyRepeatedlyUnpacked, "value")
  473. }
  474. sv.FieldByName("TypeUrl").SetString(extName)
  475. sv.FieldByName("Value").SetBytes(b)
  476. fieldSet["type_url"] = true
  477. fieldSet["value"] = true
  478. continue
  479. }
  480. var desc *ExtensionDesc
  481. // This could be faster, but it's functional.
  482. // TODO: Do something smarter than a linear scan.
  483. for _, d := range RegisteredExtensions(reflect.New(st).Interface().(Message)) {
  484. if d.Name == extName {
  485. desc = d
  486. break
  487. }
  488. }
  489. if desc == nil {
  490. return p.errorf("unrecognized extension %q", extName)
  491. }
  492. props := &Properties{}
  493. props.Parse(desc.Tag)
  494. typ := reflect.TypeOf(desc.ExtensionType)
  495. if err := p.checkForColon(props, typ); err != nil {
  496. return err
  497. }
  498. rep := desc.repeated()
  499. // Read the extension structure, and set it in
  500. // the value we're constructing.
  501. var ext reflect.Value
  502. if !rep {
  503. ext = reflect.New(typ).Elem()
  504. } else {
  505. ext = reflect.New(typ.Elem()).Elem()
  506. }
  507. if err := p.readAny(ext, props); err != nil {
  508. if _, ok := err.(*RequiredNotSetError); !ok {
  509. return err
  510. }
  511. reqFieldErr = err
  512. }
  513. ep := sv.Addr().Interface().(Message)
  514. if !rep {
  515. SetExtension(ep, desc, ext.Interface())
  516. } else {
  517. old, err := GetExtension(ep, desc)
  518. var sl reflect.Value
  519. if err == nil {
  520. sl = reflect.ValueOf(old) // existing slice
  521. } else {
  522. sl = reflect.MakeSlice(typ, 0, 1)
  523. }
  524. sl = reflect.Append(sl, ext)
  525. SetExtension(ep, desc, sl.Interface())
  526. }
  527. if err := p.consumeOptionalSeparator(); err != nil {
  528. return err
  529. }
  530. continue
  531. }
  532. // This is a normal, non-extension field.
  533. name := tok.value
  534. var dst reflect.Value
  535. fi, props, ok := structFieldByName(sprops, name)
  536. if ok {
  537. dst = sv.Field(fi)
  538. } else if oop, ok := sprops.OneofTypes[name]; ok {
  539. // It is a oneof.
  540. props = oop.Prop
  541. nv := reflect.New(oop.Type.Elem())
  542. dst = nv.Elem().Field(0)
  543. field := sv.Field(oop.Field)
  544. if !field.IsNil() {
  545. return p.errorf("field '%s' would overwrite already parsed oneof '%s'", name, sv.Type().Field(oop.Field).Name)
  546. }
  547. field.Set(nv)
  548. }
  549. if !dst.IsValid() {
  550. return p.errorf("unknown field name %q in %v", name, st)
  551. }
  552. if dst.Kind() == reflect.Map {
  553. // Consume any colon.
  554. if err := p.checkForColon(props, dst.Type()); err != nil {
  555. return err
  556. }
  557. // Construct the map if it doesn't already exist.
  558. if dst.IsNil() {
  559. dst.Set(reflect.MakeMap(dst.Type()))
  560. }
  561. key := reflect.New(dst.Type().Key()).Elem()
  562. val := reflect.New(dst.Type().Elem()).Elem()
  563. // The map entry should be this sequence of tokens:
  564. // < key : KEY value : VALUE >
  565. // However, implementations may omit key or value, and technically
  566. // we should support them in any order. See b/28924776 for a time
  567. // this went wrong.
  568. tok := p.next()
  569. var terminator string
  570. switch tok.value {
  571. case "<":
  572. terminator = ">"
  573. case "{":
  574. terminator = "}"
  575. default:
  576. return p.errorf("expected '{' or '<', found %q", tok.value)
  577. }
  578. for {
  579. tok := p.next()
  580. if tok.err != nil {
  581. return tok.err
  582. }
  583. if tok.value == terminator {
  584. break
  585. }
  586. switch tok.value {
  587. case "key":
  588. if err := p.consumeToken(":"); err != nil {
  589. return err
  590. }
  591. if err := p.readAny(key, props.MapKeyProp); err != nil {
  592. return err
  593. }
  594. if err := p.consumeOptionalSeparator(); err != nil {
  595. return err
  596. }
  597. case "value":
  598. if err := p.checkForColon(props.MapValProp, dst.Type().Elem()); err != nil {
  599. return err
  600. }
  601. if err := p.readAny(val, props.MapValProp); err != nil {
  602. return err
  603. }
  604. if err := p.consumeOptionalSeparator(); err != nil {
  605. return err
  606. }
  607. default:
  608. p.back()
  609. return p.errorf(`expected "key", "value", or %q, found %q`, terminator, tok.value)
  610. }
  611. }
  612. dst.SetMapIndex(key, val)
  613. continue
  614. }
  615. // Check that it's not already set if it's not a repeated field.
  616. if !props.Repeated && fieldSet[name] {
  617. return p.errorf("non-repeated field %q was repeated", name)
  618. }
  619. if err := p.checkForColon(props, dst.Type()); err != nil {
  620. return err
  621. }
  622. // Parse into the field.
  623. fieldSet[name] = true
  624. if err := p.readAny(dst, props); err != nil {
  625. if _, ok := err.(*RequiredNotSetError); !ok {
  626. return err
  627. }
  628. reqFieldErr = err
  629. }
  630. if props.Required {
  631. reqCount--
  632. }
  633. if err := p.consumeOptionalSeparator(); err != nil {
  634. return err
  635. }
  636. }
  637. if reqCount > 0 {
  638. return p.missingRequiredFieldError(sv)
  639. }
  640. return reqFieldErr
  641. }
  642. // consumeExtName consumes extension name or expanded Any type URL and the
  643. // following ']'. It returns the name or URL consumed.
  644. func (p *textParser) consumeExtName() (string, error) {
  645. tok := p.next()
  646. if tok.err != nil {
  647. return "", tok.err
  648. }
  649. // If extension name or type url is quoted, it's a single token.
  650. if len(tok.value) > 2 && isQuote(tok.value[0]) && tok.value[len(tok.value)-1] == tok.value[0] {
  651. name, err := unquoteC(tok.value[1:len(tok.value)-1], rune(tok.value[0]))
  652. if err != nil {
  653. return "", err
  654. }
  655. return name, p.consumeToken("]")
  656. }
  657. // Consume everything up to "]"
  658. var parts []string
  659. for tok.value != "]" {
  660. parts = append(parts, tok.value)
  661. tok = p.next()
  662. if tok.err != nil {
  663. return "", p.errorf("unrecognized type_url or extension name: %s", tok.err)
  664. }
  665. if p.done && tok.value != "]" {
  666. return "", p.errorf("unclosed type_url or extension name")
  667. }
  668. }
  669. return strings.Join(parts, ""), nil
  670. }
  671. // consumeOptionalSeparator consumes an optional semicolon or comma.
  672. // It is used in readStruct to provide backward compatibility.
  673. func (p *textParser) consumeOptionalSeparator() error {
  674. tok := p.next()
  675. if tok.err != nil {
  676. return tok.err
  677. }
  678. if tok.value != ";" && tok.value != "," {
  679. p.back()
  680. }
  681. return nil
  682. }
  683. func (p *textParser) readAny(v reflect.Value, props *Properties) error {
  684. tok := p.next()
  685. if tok.err != nil {
  686. return tok.err
  687. }
  688. if tok.value == "" {
  689. return p.errorf("unexpected EOF")
  690. }
  691. switch fv := v; fv.Kind() {
  692. case reflect.Slice:
  693. at := v.Type()
  694. if at.Elem().Kind() == reflect.Uint8 {
  695. // Special case for []byte
  696. if tok.value[0] != '"' && tok.value[0] != '\'' {
  697. // Deliberately written out here, as the error after
  698. // this switch statement would write "invalid []byte: ...",
  699. // which is not as user-friendly.
  700. return p.errorf("invalid string: %v", tok.value)
  701. }
  702. bytes := []byte(tok.unquoted)
  703. fv.Set(reflect.ValueOf(bytes))
  704. return nil
  705. }
  706. // Repeated field.
  707. if tok.value == "[" {
  708. // Repeated field with list notation, like [1,2,3].
  709. for {
  710. fv.Set(reflect.Append(fv, reflect.New(at.Elem()).Elem()))
  711. err := p.readAny(fv.Index(fv.Len()-1), props)
  712. if err != nil {
  713. return err
  714. }
  715. tok := p.next()
  716. if tok.err != nil {
  717. return tok.err
  718. }
  719. if tok.value == "]" {
  720. break
  721. }
  722. if tok.value != "," {
  723. return p.errorf("Expected ']' or ',' found %q", tok.value)
  724. }
  725. }
  726. return nil
  727. }
  728. // One value of the repeated field.
  729. p.back()
  730. fv.Set(reflect.Append(fv, reflect.New(at.Elem()).Elem()))
  731. return p.readAny(fv.Index(fv.Len()-1), props)
  732. case reflect.Bool:
  733. // true/1/t/True or false/f/0/False.
  734. switch tok.value {
  735. case "true", "1", "t", "True":
  736. fv.SetBool(true)
  737. return nil
  738. case "false", "0", "f", "False":
  739. fv.SetBool(false)
  740. return nil
  741. }
  742. case reflect.Float32, reflect.Float64:
  743. v := tok.value
  744. // Ignore 'f' for compatibility with output generated by C++, but don't
  745. // remove 'f' when the value is "-inf" or "inf".
  746. if strings.HasSuffix(v, "f") && tok.value != "-inf" && tok.value != "inf" {
  747. v = v[:len(v)-1]
  748. }
  749. if f, err := strconv.ParseFloat(v, fv.Type().Bits()); err == nil {
  750. fv.SetFloat(f)
  751. return nil
  752. }
  753. case reflect.Int32:
  754. if x, err := strconv.ParseInt(tok.value, 0, 32); err == nil {
  755. fv.SetInt(x)
  756. return nil
  757. }
  758. if len(props.Enum) == 0 {
  759. break
  760. }
  761. m, ok := enumValueMaps[props.Enum]
  762. if !ok {
  763. break
  764. }
  765. x, ok := m[tok.value]
  766. if !ok {
  767. break
  768. }
  769. fv.SetInt(int64(x))
  770. return nil
  771. case reflect.Int64:
  772. if x, err := strconv.ParseInt(tok.value, 0, 64); err == nil {
  773. fv.SetInt(x)
  774. return nil
  775. }
  776. case reflect.Ptr:
  777. // A basic field (indirected through pointer), or a repeated message/group
  778. p.back()
  779. fv.Set(reflect.New(fv.Type().Elem()))
  780. return p.readAny(fv.Elem(), props)
  781. case reflect.String:
  782. if tok.value[0] == '"' || tok.value[0] == '\'' {
  783. fv.SetString(tok.unquoted)
  784. return nil
  785. }
  786. case reflect.Struct:
  787. var terminator string
  788. switch tok.value {
  789. case "{":
  790. terminator = "}"
  791. case "<":
  792. terminator = ">"
  793. default:
  794. return p.errorf("expected '{' or '<', found %q", tok.value)
  795. }
  796. // TODO: Handle nested messages which implement encoding.TextUnmarshaler.
  797. return p.readStruct(fv, terminator)
  798. case reflect.Uint32:
  799. if x, err := strconv.ParseUint(tok.value, 0, 32); err == nil {
  800. fv.SetUint(uint64(x))
  801. return nil
  802. }
  803. case reflect.Uint64:
  804. if x, err := strconv.ParseUint(tok.value, 0, 64); err == nil {
  805. fv.SetUint(x)
  806. return nil
  807. }
  808. }
  809. return p.errorf("invalid %v: %v", v.Type(), tok.value)
  810. }
  811. // UnmarshalText reads a protocol buffer in Text format. UnmarshalText resets pb
  812. // before starting to unmarshal, so any existing data in pb is always removed.
  813. // If a required field is not set and no other error occurs,
  814. // UnmarshalText returns *RequiredNotSetError.
  815. func UnmarshalText(s string, pb Message) error {
  816. if um, ok := pb.(encoding.TextUnmarshaler); ok {
  817. return um.UnmarshalText([]byte(s))
  818. }
  819. pb.Reset()
  820. v := reflect.ValueOf(pb)
  821. return newTextParser(s).readStruct(v.Elem(), "")
  822. }