http urls monitor.

urlesc.go 4.7KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181
  1. // Copyright 2009 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // Package urlesc implements query escaping as per RFC 3986.
  5. // It contains some parts of the net/url package, modified so as to allow
  6. // some reserved characters incorrectly escaped by net/url.
  7. // See https://github.com/golang/go/issues/5684
  8. package urlesc
  9. import (
  10. "bytes"
  11. "net/url"
  12. "strings"
  13. )
  14. type encoding int
  15. const (
  16. encodePath encoding = 1 + iota
  17. encodeUserPassword
  18. encodeQueryComponent
  19. encodeFragment
  20. )
  21. // Return true if the specified character should be escaped when
  22. // appearing in a URL string, according to RFC 3986.
  23. func shouldEscape(c byte, mode encoding) bool {
  24. // §2.3 Unreserved characters (alphanum)
  25. if 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' {
  26. return false
  27. }
  28. switch c {
  29. case '-', '.', '_', '~': // §2.3 Unreserved characters (mark)
  30. return false
  31. // §2.2 Reserved characters (reserved)
  32. case ':', '/', '?', '#', '[', ']', '@', // gen-delims
  33. '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=': // sub-delims
  34. // Different sections of the URL allow a few of
  35. // the reserved characters to appear unescaped.
  36. switch mode {
  37. case encodePath: // §3.3
  38. // The RFC allows sub-delims and : @.
  39. // '/', '[' and ']' can be used to assign meaning to individual path
  40. // segments. This package only manipulates the path as a whole,
  41. // so we allow those as well. That leaves only ? and # to escape.
  42. return c == '?' || c == '#'
  43. case encodeUserPassword: // §3.2.1
  44. // The RFC allows : and sub-delims in
  45. // userinfo. The parsing of userinfo treats ':' as special so we must escape
  46. // all the gen-delims.
  47. return c == ':' || c == '/' || c == '?' || c == '#' || c == '[' || c == ']' || c == '@'
  48. case encodeQueryComponent: // §3.4
  49. // The RFC allows / and ?.
  50. return c != '/' && c != '?'
  51. case encodeFragment: // §4.1
  52. // The RFC text is silent but the grammar allows
  53. // everything, so escape nothing but #
  54. return c == '#'
  55. }
  56. }
  57. // Everything else must be escaped.
  58. return true
  59. }
  60. // QueryEscape escapes the string so it can be safely placed
  61. // inside a URL query.
  62. func QueryEscape(s string) string {
  63. return escape(s, encodeQueryComponent)
  64. }
  65. func escape(s string, mode encoding) string {
  66. spaceCount, hexCount := 0, 0
  67. for i := 0; i < len(s); i++ {
  68. c := s[i]
  69. if shouldEscape(c, mode) {
  70. if c == ' ' && mode == encodeQueryComponent {
  71. spaceCount++
  72. } else {
  73. hexCount++
  74. }
  75. }
  76. }
  77. if spaceCount == 0 && hexCount == 0 {
  78. return s
  79. }
  80. t := make([]byte, len(s)+2*hexCount)
  81. j := 0
  82. for i := 0; i < len(s); i++ {
  83. switch c := s[i]; {
  84. case c == ' ' && mode == encodeQueryComponent:
  85. t[j] = '+'
  86. j++
  87. case shouldEscape(c, mode):
  88. t[j] = '%'
  89. t[j+1] = "0123456789ABCDEF"[c>>4]
  90. t[j+2] = "0123456789ABCDEF"[c&15]
  91. j += 3
  92. default:
  93. t[j] = s[i]
  94. j++
  95. }
  96. }
  97. return string(t)
  98. }
  99. var uiReplacer = strings.NewReplacer(
  100. "%21", "!",
  101. "%27", "'",
  102. "%28", "(",
  103. "%29", ")",
  104. "%2A", "*",
  105. )
  106. // unescapeUserinfo unescapes some characters that need not to be escaped as per RFC3986.
  107. func unescapeUserinfo(s string) string {
  108. return uiReplacer.Replace(s)
  109. }
  110. // Escape reassembles the URL into a valid URL string.
  111. // The general form of the result is one of:
  112. //
  113. // scheme:opaque
  114. // scheme://userinfo@host/path?query#fragment
  115. //
  116. // If u.Opaque is non-empty, String uses the first form;
  117. // otherwise it uses the second form.
  118. //
  119. // In the second form, the following rules apply:
  120. // - if u.Scheme is empty, scheme: is omitted.
  121. // - if u.User is nil, userinfo@ is omitted.
  122. // - if u.Host is empty, host/ is omitted.
  123. // - if u.Scheme and u.Host are empty and u.User is nil,
  124. // the entire scheme://userinfo@host/ is omitted.
  125. // - if u.Host is non-empty and u.Path begins with a /,
  126. // the form host/path does not add its own /.
  127. // - if u.RawQuery is empty, ?query is omitted.
  128. // - if u.Fragment is empty, #fragment is omitted.
  129. func Escape(u *url.URL) string {
  130. var buf bytes.Buffer
  131. if u.Scheme != "" {
  132. buf.WriteString(u.Scheme)
  133. buf.WriteByte(':')
  134. }
  135. if u.Opaque != "" {
  136. buf.WriteString(u.Opaque)
  137. } else {
  138. if u.Scheme != "" || u.Host != "" || u.User != nil {
  139. buf.WriteString("//")
  140. if ui := u.User; ui != nil {
  141. buf.WriteString(unescapeUserinfo(ui.String()))
  142. buf.WriteByte('@')
  143. }
  144. if h := u.Host; h != "" {
  145. buf.WriteString(h)
  146. }
  147. }
  148. if u.Path != "" && u.Path[0] != '/' && u.Host != "" {
  149. buf.WriteByte('/')
  150. }
  151. buf.WriteString(escape(u.Path, encodePath))
  152. }
  153. if u.RawQuery != "" {
  154. buf.WriteByte('?')
  155. buf.WriteString(u.RawQuery)
  156. }
  157. if u.Fragment != "" {
  158. buf.WriteByte('#')
  159. buf.WriteString(escape(u.Fragment, encodeFragment))
  160. }
  161. return buf.String()
  162. }