You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
351 lines
9.1 KiB
351 lines
9.1 KiB
// Copyright 2016 The Go Authors. All rights reserved. |
|
// Use of this source code is governed by a BSD-style |
|
// license that can be found in the LICENSE file. |
|
|
|
// Package httplex contains rules around lexical matters of various |
|
// HTTP-related specifications. |
|
// |
|
// This package is shared by the standard library (which vendors it) |
|
// and x/net/http2. It comes with no API stability promise. |
|
package httplex |
|
|
|
import ( |
|
"net" |
|
"strings" |
|
"unicode/utf8" |
|
|
|
"golang.org/x/net/idna" |
|
) |
|
|
|
var isTokenTable = [127]bool{ |
|
'!': true, |
|
'#': true, |
|
'$': true, |
|
'%': true, |
|
'&': true, |
|
'\'': true, |
|
'*': true, |
|
'+': true, |
|
'-': true, |
|
'.': true, |
|
'0': true, |
|
'1': true, |
|
'2': true, |
|
'3': true, |
|
'4': true, |
|
'5': true, |
|
'6': true, |
|
'7': true, |
|
'8': true, |
|
'9': true, |
|
'A': true, |
|
'B': true, |
|
'C': true, |
|
'D': true, |
|
'E': true, |
|
'F': true, |
|
'G': true, |
|
'H': true, |
|
'I': true, |
|
'J': true, |
|
'K': true, |
|
'L': true, |
|
'M': true, |
|
'N': true, |
|
'O': true, |
|
'P': true, |
|
'Q': true, |
|
'R': true, |
|
'S': true, |
|
'T': true, |
|
'U': true, |
|
'W': true, |
|
'V': true, |
|
'X': true, |
|
'Y': true, |
|
'Z': true, |
|
'^': true, |
|
'_': true, |
|
'`': true, |
|
'a': true, |
|
'b': true, |
|
'c': true, |
|
'd': true, |
|
'e': true, |
|
'f': true, |
|
'g': true, |
|
'h': true, |
|
'i': true, |
|
'j': true, |
|
'k': true, |
|
'l': true, |
|
'm': true, |
|
'n': true, |
|
'o': true, |
|
'p': true, |
|
'q': true, |
|
'r': true, |
|
's': true, |
|
't': true, |
|
'u': true, |
|
'v': true, |
|
'w': true, |
|
'x': true, |
|
'y': true, |
|
'z': true, |
|
'|': true, |
|
'~': true, |
|
} |
|
|
|
func IsTokenRune(r rune) bool { |
|
i := int(r) |
|
return i < len(isTokenTable) && isTokenTable[i] |
|
} |
|
|
|
func isNotToken(r rune) bool { |
|
return !IsTokenRune(r) |
|
} |
|
|
|
// HeaderValuesContainsToken reports whether any string in values |
|
// contains the provided token, ASCII case-insensitively. |
|
func HeaderValuesContainsToken(values []string, token string) bool { |
|
for _, v := range values { |
|
if headerValueContainsToken(v, token) { |
|
return true |
|
} |
|
} |
|
return false |
|
} |
|
|
|
// isOWS reports whether b is an optional whitespace byte, as defined |
|
// by RFC 7230 section 3.2.3. |
|
func isOWS(b byte) bool { return b == ' ' || b == '\t' } |
|
|
|
// trimOWS returns x with all optional whitespace removes from the |
|
// beginning and end. |
|
func trimOWS(x string) string { |
|
// TODO: consider using strings.Trim(x, " \t") instead, |
|
// if and when it's fast enough. See issue 10292. |
|
// But this ASCII-only code will probably always beat UTF-8 |
|
// aware code. |
|
for len(x) > 0 && isOWS(x[0]) { |
|
x = x[1:] |
|
} |
|
for len(x) > 0 && isOWS(x[len(x)-1]) { |
|
x = x[:len(x)-1] |
|
} |
|
return x |
|
} |
|
|
|
// headerValueContainsToken reports whether v (assumed to be a |
|
// 0#element, in the ABNF extension described in RFC 7230 section 7) |
|
// contains token amongst its comma-separated tokens, ASCII |
|
// case-insensitively. |
|
func headerValueContainsToken(v string, token string) bool { |
|
v = trimOWS(v) |
|
if comma := strings.IndexByte(v, ','); comma != -1 { |
|
return tokenEqual(trimOWS(v[:comma]), token) || headerValueContainsToken(v[comma+1:], token) |
|
} |
|
return tokenEqual(v, token) |
|
} |
|
|
|
// lowerASCII returns the ASCII lowercase version of b. |
|
func lowerASCII(b byte) byte { |
|
if 'A' <= b && b <= 'Z' { |
|
return b + ('a' - 'A') |
|
} |
|
return b |
|
} |
|
|
|
// tokenEqual reports whether t1 and t2 are equal, ASCII case-insensitively. |
|
func tokenEqual(t1, t2 string) bool { |
|
if len(t1) != len(t2) { |
|
return false |
|
} |
|
for i, b := range t1 { |
|
if b >= utf8.RuneSelf { |
|
// No UTF-8 or non-ASCII allowed in tokens. |
|
return false |
|
} |
|
if lowerASCII(byte(b)) != lowerASCII(t2[i]) { |
|
return false |
|
} |
|
} |
|
return true |
|
} |
|
|
|
// isLWS reports whether b is linear white space, according |
|
// to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2 |
|
// LWS = [CRLF] 1*( SP | HT ) |
|
func isLWS(b byte) bool { return b == ' ' || b == '\t' } |
|
|
|
// isCTL reports whether b is a control byte, according |
|
// to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2 |
|
// CTL = <any US-ASCII control character |
|
// (octets 0 - 31) and DEL (127)> |
|
func isCTL(b byte) bool { |
|
const del = 0x7f // a CTL |
|
return b < ' ' || b == del |
|
} |
|
|
|
// ValidHeaderFieldName reports whether v is a valid HTTP/1.x header name. |
|
// HTTP/2 imposes the additional restriction that uppercase ASCII |
|
// letters are not allowed. |
|
// |
|
// RFC 7230 says: |
|
// header-field = field-name ":" OWS field-value OWS |
|
// field-name = token |
|
// token = 1*tchar |
|
// tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." / |
|
// "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA |
|
func ValidHeaderFieldName(v string) bool { |
|
if len(v) == 0 { |
|
return false |
|
} |
|
for _, r := range v { |
|
if !IsTokenRune(r) { |
|
return false |
|
} |
|
} |
|
return true |
|
} |
|
|
|
// ValidHostHeader reports whether h is a valid host header. |
|
func ValidHostHeader(h string) bool { |
|
// The latest spec is actually this: |
|
// |
|
// http://tools.ietf.org/html/rfc7230#section-5.4 |
|
// Host = uri-host [ ":" port ] |
|
// |
|
// Where uri-host is: |
|
// http://tools.ietf.org/html/rfc3986#section-3.2.2 |
|
// |
|
// But we're going to be much more lenient for now and just |
|
// search for any byte that's not a valid byte in any of those |
|
// expressions. |
|
for i := 0; i < len(h); i++ { |
|
if !validHostByte[h[i]] { |
|
return false |
|
} |
|
} |
|
return true |
|
} |
|
|
|
// See the validHostHeader comment. |
|
var validHostByte = [256]bool{ |
|
'0': true, '1': true, '2': true, '3': true, '4': true, '5': true, '6': true, '7': true, |
|
'8': true, '9': true, |
|
|
|
'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 'g': true, 'h': true, |
|
'i': true, 'j': true, 'k': true, 'l': true, 'm': true, 'n': true, 'o': true, 'p': true, |
|
'q': true, 'r': true, 's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true, |
|
'y': true, 'z': true, |
|
|
|
'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 'G': true, 'H': true, |
|
'I': true, 'J': true, 'K': true, 'L': true, 'M': true, 'N': true, 'O': true, 'P': true, |
|
'Q': true, 'R': true, 'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true, |
|
'Y': true, 'Z': true, |
|
|
|
'!': true, // sub-delims |
|
'$': true, // sub-delims |
|
'%': true, // pct-encoded (and used in IPv6 zones) |
|
'&': true, // sub-delims |
|
'(': true, // sub-delims |
|
')': true, // sub-delims |
|
'*': true, // sub-delims |
|
'+': true, // sub-delims |
|
',': true, // sub-delims |
|
'-': true, // unreserved |
|
'.': true, // unreserved |
|
':': true, // IPv6address + Host expression's optional port |
|
';': true, // sub-delims |
|
'=': true, // sub-delims |
|
'[': true, |
|
'\'': true, // sub-delims |
|
']': true, |
|
'_': true, // unreserved |
|
'~': true, // unreserved |
|
} |
|
|
|
// ValidHeaderFieldValue reports whether v is a valid "field-value" according to |
|
// http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2 : |
|
// |
|
// message-header = field-name ":" [ field-value ] |
|
// field-value = *( field-content | LWS ) |
|
// field-content = <the OCTETs making up the field-value |
|
// and consisting of either *TEXT or combinations |
|
// of token, separators, and quoted-string> |
|
// |
|
// http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2 : |
|
// |
|
// TEXT = <any OCTET except CTLs, |
|
// but including LWS> |
|
// LWS = [CRLF] 1*( SP | HT ) |
|
// CTL = <any US-ASCII control character |
|
// (octets 0 - 31) and DEL (127)> |
|
// |
|
// RFC 7230 says: |
|
// field-value = *( field-content / obs-fold ) |
|
// obj-fold = N/A to http2, and deprecated |
|
// field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ] |
|
// field-vchar = VCHAR / obs-text |
|
// obs-text = %x80-FF |
|
// VCHAR = "any visible [USASCII] character" |
|
// |
|
// http2 further says: "Similarly, HTTP/2 allows header field values |
|
// that are not valid. While most of the values that can be encoded |
|
// will not alter header field parsing, carriage return (CR, ASCII |
|
// 0xd), line feed (LF, ASCII 0xa), and the zero character (NUL, ASCII |
|
// 0x0) might be exploited by an attacker if they are translated |
|
// verbatim. Any request or response that contains a character not |
|
// permitted in a header field value MUST be treated as malformed |
|
// (Section 8.1.2.6). Valid characters are defined by the |
|
// field-content ABNF rule in Section 3.2 of [RFC7230]." |
|
// |
|
// This function does not (yet?) properly handle the rejection of |
|
// strings that begin or end with SP or HTAB. |
|
func ValidHeaderFieldValue(v string) bool { |
|
for i := 0; i < len(v); i++ { |
|
b := v[i] |
|
if isCTL(b) && !isLWS(b) { |
|
return false |
|
} |
|
} |
|
return true |
|
} |
|
|
|
func isASCII(s string) bool { |
|
for i := 0; i < len(s); i++ { |
|
if s[i] >= utf8.RuneSelf { |
|
return false |
|
} |
|
} |
|
return true |
|
} |
|
|
|
// PunycodeHostPort returns the IDNA Punycode version |
|
// of the provided "host" or "host:port" string. |
|
func PunycodeHostPort(v string) (string, error) { |
|
if isASCII(v) { |
|
return v, nil |
|
} |
|
|
|
host, port, err := net.SplitHostPort(v) |
|
if err != nil { |
|
// The input 'v' argument was just a "host" argument, |
|
// without a port. This error should not be returned |
|
// to the caller. |
|
host = v |
|
port = "" |
|
} |
|
host, err = idna.ToASCII(host) |
|
if err != nil { |
|
// Non-UTF-8? Not representable in Punycode, in any |
|
// case. |
|
return "", err |
|
} |
|
if port == "" { |
|
return host, nil |
|
} |
|
return net.JoinHostPort(host, port), nil |
|
}
|
|
|