httplex.go 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347
  1. // Copyright 2016 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package httpguts
  5. import (
  6. "net"
  7. "strings"
  8. "unicode/utf8"
  9. "golang.org/x/net/idna"
  10. )
  11. var isTokenTable = [256]bool{
  12. '!': true,
  13. '#': true,
  14. '$': true,
  15. '%': true,
  16. '&': true,
  17. '\'': true,
  18. '*': true,
  19. '+': true,
  20. '-': true,
  21. '.': true,
  22. '0': true,
  23. '1': true,
  24. '2': true,
  25. '3': true,
  26. '4': true,
  27. '5': true,
  28. '6': true,
  29. '7': true,
  30. '8': true,
  31. '9': true,
  32. 'A': true,
  33. 'B': true,
  34. 'C': true,
  35. 'D': true,
  36. 'E': true,
  37. 'F': true,
  38. 'G': true,
  39. 'H': true,
  40. 'I': true,
  41. 'J': true,
  42. 'K': true,
  43. 'L': true,
  44. 'M': true,
  45. 'N': true,
  46. 'O': true,
  47. 'P': true,
  48. 'Q': true,
  49. 'R': true,
  50. 'S': true,
  51. 'T': true,
  52. 'U': true,
  53. 'W': true,
  54. 'V': true,
  55. 'X': true,
  56. 'Y': true,
  57. 'Z': true,
  58. '^': true,
  59. '_': true,
  60. '`': true,
  61. 'a': true,
  62. 'b': true,
  63. 'c': true,
  64. 'd': true,
  65. 'e': true,
  66. 'f': true,
  67. 'g': true,
  68. 'h': true,
  69. 'i': true,
  70. 'j': true,
  71. 'k': true,
  72. 'l': true,
  73. 'm': true,
  74. 'n': true,
  75. 'o': true,
  76. 'p': true,
  77. 'q': true,
  78. 'r': true,
  79. 's': true,
  80. 't': true,
  81. 'u': true,
  82. 'v': true,
  83. 'w': true,
  84. 'x': true,
  85. 'y': true,
  86. 'z': true,
  87. '|': true,
  88. '~': true,
  89. }
  90. func IsTokenRune(r rune) bool {
  91. return r < utf8.RuneSelf && isTokenTable[byte(r)]
  92. }
  93. // HeaderValuesContainsToken reports whether any string in values
  94. // contains the provided token, ASCII case-insensitively.
  95. func HeaderValuesContainsToken(values []string, token string) bool {
  96. for _, v := range values {
  97. if headerValueContainsToken(v, token) {
  98. return true
  99. }
  100. }
  101. return false
  102. }
  103. // isOWS reports whether b is an optional whitespace byte, as defined
  104. // by RFC 7230 section 3.2.3.
  105. func isOWS(b byte) bool { return b == ' ' || b == '\t' }
  106. // trimOWS returns x with all optional whitespace removes from the
  107. // beginning and end.
  108. func trimOWS(x string) string {
  109. // TODO: consider using strings.Trim(x, " \t") instead,
  110. // if and when it's fast enough. See issue 10292.
  111. // But this ASCII-only code will probably always beat UTF-8
  112. // aware code.
  113. for len(x) > 0 && isOWS(x[0]) {
  114. x = x[1:]
  115. }
  116. for len(x) > 0 && isOWS(x[len(x)-1]) {
  117. x = x[:len(x)-1]
  118. }
  119. return x
  120. }
  121. // headerValueContainsToken reports whether v (assumed to be a
  122. // 0#element, in the ABNF extension described in RFC 7230 section 7)
  123. // contains token amongst its comma-separated tokens, ASCII
  124. // case-insensitively.
  125. func headerValueContainsToken(v string, token string) bool {
  126. for comma := strings.IndexByte(v, ','); comma != -1; comma = strings.IndexByte(v, ',') {
  127. if tokenEqual(trimOWS(v[:comma]), token) {
  128. return true
  129. }
  130. v = v[comma+1:]
  131. }
  132. return tokenEqual(trimOWS(v), token)
  133. }
  134. // lowerASCII returns the ASCII lowercase version of b.
  135. func lowerASCII(b byte) byte {
  136. if 'A' <= b && b <= 'Z' {
  137. return b + ('a' - 'A')
  138. }
  139. return b
  140. }
  141. // tokenEqual reports whether t1 and t2 are equal, ASCII case-insensitively.
  142. func tokenEqual(t1, t2 string) bool {
  143. if len(t1) != len(t2) {
  144. return false
  145. }
  146. for i, b := range t1 {
  147. if b >= utf8.RuneSelf {
  148. // No UTF-8 or non-ASCII allowed in tokens.
  149. return false
  150. }
  151. if lowerASCII(byte(b)) != lowerASCII(t2[i]) {
  152. return false
  153. }
  154. }
  155. return true
  156. }
  157. // isLWS reports whether b is linear white space, according
  158. // to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2
  159. //
  160. // LWS = [CRLF] 1*( SP | HT )
  161. func isLWS(b byte) bool { return b == ' ' || b == '\t' }
  162. // isCTL reports whether b is a control byte, according
  163. // to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2
  164. //
  165. // CTL = <any US-ASCII control character
  166. // (octets 0 - 31) and DEL (127)>
  167. func isCTL(b byte) bool {
  168. const del = 0x7f // a CTL
  169. return b < ' ' || b == del
  170. }
  171. // ValidHeaderFieldName reports whether v is a valid HTTP/1.x header name.
  172. // HTTP/2 imposes the additional restriction that uppercase ASCII
  173. // letters are not allowed.
  174. //
  175. // RFC 7230 says:
  176. //
  177. // header-field = field-name ":" OWS field-value OWS
  178. // field-name = token
  179. // token = 1*tchar
  180. // tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
  181. // "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
  182. func ValidHeaderFieldName(v string) bool {
  183. if len(v) == 0 {
  184. return false
  185. }
  186. for i := 0; i < len(v); i++ {
  187. if !isTokenTable[v[i]] {
  188. return false
  189. }
  190. }
  191. return true
  192. }
  193. // ValidHostHeader reports whether h is a valid host header.
  194. func ValidHostHeader(h string) bool {
  195. // The latest spec is actually this:
  196. //
  197. // http://tools.ietf.org/html/rfc7230#section-5.4
  198. // Host = uri-host [ ":" port ]
  199. //
  200. // Where uri-host is:
  201. // http://tools.ietf.org/html/rfc3986#section-3.2.2
  202. //
  203. // But we're going to be much more lenient for now and just
  204. // search for any byte that's not a valid byte in any of those
  205. // expressions.
  206. for i := 0; i < len(h); i++ {
  207. if !validHostByte[h[i]] {
  208. return false
  209. }
  210. }
  211. return true
  212. }
  213. // See the validHostHeader comment.
  214. var validHostByte = [256]bool{
  215. '0': true, '1': true, '2': true, '3': true, '4': true, '5': true, '6': true, '7': true,
  216. '8': true, '9': true,
  217. 'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 'g': true, 'h': true,
  218. 'i': true, 'j': true, 'k': true, 'l': true, 'm': true, 'n': true, 'o': true, 'p': true,
  219. 'q': true, 'r': true, 's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true,
  220. 'y': true, 'z': true,
  221. 'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 'G': true, 'H': true,
  222. 'I': true, 'J': true, 'K': true, 'L': true, 'M': true, 'N': true, 'O': true, 'P': true,
  223. 'Q': true, 'R': true, 'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true,
  224. 'Y': true, 'Z': true,
  225. '!': true, // sub-delims
  226. '$': true, // sub-delims
  227. '%': true, // pct-encoded (and used in IPv6 zones)
  228. '&': true, // sub-delims
  229. '(': true, // sub-delims
  230. ')': true, // sub-delims
  231. '*': true, // sub-delims
  232. '+': true, // sub-delims
  233. ',': true, // sub-delims
  234. '-': true, // unreserved
  235. '.': true, // unreserved
  236. ':': true, // IPv6address + Host expression's optional port
  237. ';': true, // sub-delims
  238. '=': true, // sub-delims
  239. '[': true,
  240. '\'': true, // sub-delims
  241. ']': true,
  242. '_': true, // unreserved
  243. '~': true, // unreserved
  244. }
  245. // ValidHeaderFieldValue reports whether v is a valid "field-value" according to
  246. // http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2 :
  247. //
  248. // message-header = field-name ":" [ field-value ]
  249. // field-value = *( field-content | LWS )
  250. // field-content = <the OCTETs making up the field-value
  251. // and consisting of either *TEXT or combinations
  252. // of token, separators, and quoted-string>
  253. //
  254. // http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2 :
  255. //
  256. // TEXT = <any OCTET except CTLs,
  257. // but including LWS>
  258. // LWS = [CRLF] 1*( SP | HT )
  259. // CTL = <any US-ASCII control character
  260. // (octets 0 - 31) and DEL (127)>
  261. //
  262. // RFC 7230 says:
  263. //
  264. // field-value = *( field-content / obs-fold )
  265. // obj-fold = N/A to http2, and deprecated
  266. // field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
  267. // field-vchar = VCHAR / obs-text
  268. // obs-text = %x80-FF
  269. // VCHAR = "any visible [USASCII] character"
  270. //
  271. // http2 further says: "Similarly, HTTP/2 allows header field values
  272. // that are not valid. While most of the values that can be encoded
  273. // will not alter header field parsing, carriage return (CR, ASCII
  274. // 0xd), line feed (LF, ASCII 0xa), and the zero character (NUL, ASCII
  275. // 0x0) might be exploited by an attacker if they are translated
  276. // verbatim. Any request or response that contains a character not
  277. // permitted in a header field value MUST be treated as malformed
  278. // (Section 8.1.2.6). Valid characters are defined by the
  279. // field-content ABNF rule in Section 3.2 of [RFC7230]."
  280. //
  281. // This function does not (yet?) properly handle the rejection of
  282. // strings that begin or end with SP or HTAB.
  283. func ValidHeaderFieldValue(v string) bool {
  284. for i := 0; i < len(v); i++ {
  285. b := v[i]
  286. if isCTL(b) && !isLWS(b) {
  287. return false
  288. }
  289. }
  290. return true
  291. }
  292. func isASCII(s string) bool {
  293. for i := 0; i < len(s); i++ {
  294. if s[i] >= utf8.RuneSelf {
  295. return false
  296. }
  297. }
  298. return true
  299. }
  300. // PunycodeHostPort returns the IDNA Punycode version
  301. // of the provided "host" or "host:port" string.
  302. func PunycodeHostPort(v string) (string, error) {
  303. if isASCII(v) {
  304. return v, nil
  305. }
  306. host, port, err := net.SplitHostPort(v)
  307. if err != nil {
  308. // The input 'v' argument was just a "host" argument,
  309. // without a port. This error should not be returned
  310. // to the caller.
  311. host = v
  312. port = ""
  313. }
  314. host, err = idna.ToASCII(host)
  315. if err != nil {
  316. // Non-UTF-8? Not representable in Punycode, in any
  317. // case.
  318. return "", err
  319. }
  320. if port == "" {
  321. return host, nil
  322. }
  323. return net.JoinHostPort(host, port), nil
  324. }