httplex.go - gtsocial-umbx - Unnamed repository; edit this file 'description' to name the repository.

httplex.go (8990B)
      1 // Copyright 2016 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package httpguts
      6 
      7 import (
      8 	"net"
      9 	"strings"
     10 	"unicode/utf8"
     11 
     12 	"golang.org/x/net/idna"
     13 )
     14 
     15 var isTokenTable = [127]bool{
     16 	'!':  true,
     17 	'#':  true,
     18 	'$':  true,
     19 	'%':  true,
     20 	'&':  true,
     21 	'\'': true,
     22 	'*':  true,
     23 	'+':  true,
     24 	'-':  true,
     25 	'.':  true,
     26 	'0':  true,
     27 	'1':  true,
     28 	'2':  true,
     29 	'3':  true,
     30 	'4':  true,
     31 	'5':  true,
     32 	'6':  true,
     33 	'7':  true,
     34 	'8':  true,
     35 	'9':  true,
     36 	'A':  true,
     37 	'B':  true,
     38 	'C':  true,
     39 	'D':  true,
     40 	'E':  true,
     41 	'F':  true,
     42 	'G':  true,
     43 	'H':  true,
     44 	'I':  true,
     45 	'J':  true,
     46 	'K':  true,
     47 	'L':  true,
     48 	'M':  true,
     49 	'N':  true,
     50 	'O':  true,
     51 	'P':  true,
     52 	'Q':  true,
     53 	'R':  true,
     54 	'S':  true,
     55 	'T':  true,
     56 	'U':  true,
     57 	'W':  true,
     58 	'V':  true,
     59 	'X':  true,
     60 	'Y':  true,
     61 	'Z':  true,
     62 	'^':  true,
     63 	'_':  true,
     64 	'`':  true,
     65 	'a':  true,
     66 	'b':  true,
     67 	'c':  true,
     68 	'd':  true,
     69 	'e':  true,
     70 	'f':  true,
     71 	'g':  true,
     72 	'h':  true,
     73 	'i':  true,
     74 	'j':  true,
     75 	'k':  true,
     76 	'l':  true,
     77 	'm':  true,
     78 	'n':  true,
     79 	'o':  true,
     80 	'p':  true,
     81 	'q':  true,
     82 	'r':  true,
     83 	's':  true,
     84 	't':  true,
     85 	'u':  true,
     86 	'v':  true,
     87 	'w':  true,
     88 	'x':  true,
     89 	'y':  true,
     90 	'z':  true,
     91 	'|':  true,
     92 	'~':  true,
     93 }
     94 
     95 func IsTokenRune(r rune) bool {
     96 	i := int(r)
     97 	return i < len(isTokenTable) && isTokenTable[i]
     98 }
     99 
    100 func isNotToken(r rune) bool {
    101 	return !IsTokenRune(r)
    102 }
    103 
    104 // HeaderValuesContainsToken reports whether any string in values
    105 // contains the provided token, ASCII case-insensitively.
    106 func HeaderValuesContainsToken(values []string, token string) bool {
    107 	for _, v := range values {
    108 		if headerValueContainsToken(v, token) {
    109 			return true
    110 		}
    111 	}
    112 	return false
    113 }
    114 
    115 // isOWS reports whether b is an optional whitespace byte, as defined
    116 // by RFC 7230 section 3.2.3.
    117 func isOWS(b byte) bool { return b == ' ' || b == '\t' }
    118 
    119 // trimOWS returns x with all optional whitespace removes from the
    120 // beginning and end.
    121 func trimOWS(x string) string {
    122 	// TODO: consider using strings.Trim(x, " \t") instead,
    123 	// if and when it's fast enough. See issue 10292.
    124 	// But this ASCII-only code will probably always beat UTF-8
    125 	// aware code.
    126 	for len(x) > 0 && isOWS(x[0]) {
    127 		x = x[1:]
    128 	}
    129 	for len(x) > 0 && isOWS(x[len(x)-1]) {
    130 		x = x[:len(x)-1]
    131 	}
    132 	return x
    133 }
    134 
    135 // headerValueContainsToken reports whether v (assumed to be a
    136 // 0#element, in the ABNF extension described in RFC 7230 section 7)
    137 // contains token amongst its comma-separated tokens, ASCII
    138 // case-insensitively.
    139 func headerValueContainsToken(v string, token string) bool {
    140 	for comma := strings.IndexByte(v, ','); comma != -1; comma = strings.IndexByte(v, ',') {
    141 		if tokenEqual(trimOWS(v[:comma]), token) {
    142 			return true
    143 		}
    144 		v = v[comma+1:]
    145 	}
    146 	return tokenEqual(trimOWS(v), token)
    147 }
    148 
    149 // lowerASCII returns the ASCII lowercase version of b.
    150 func lowerASCII(b byte) byte {
    151 	if 'A' <= b && b <= 'Z' {
    152 		return b + ('a' - 'A')
    153 	}
    154 	return b
    155 }
    156 
    157 // tokenEqual reports whether t1 and t2 are equal, ASCII case-insensitively.
    158 func tokenEqual(t1, t2 string) bool {
    159 	if len(t1) != len(t2) {
    160 		return false
    161 	}
    162 	for i, b := range t1 {
    163 		if b >= utf8.RuneSelf {
    164 			// No UTF-8 or non-ASCII allowed in tokens.
    165 			return false
    166 		}
    167 		if lowerASCII(byte(b)) != lowerASCII(t2[i]) {
    168 			return false
    169 		}
    170 	}
    171 	return true
    172 }
    173 
    174 // isLWS reports whether b is linear white space, according
    175 // to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2
    176 //
    177 //	LWS            = [CRLF] 1*( SP | HT )
    178 func isLWS(b byte) bool { return b == ' ' || b == '\t' }
    179 
    180 // isCTL reports whether b is a control byte, according
    181 // to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2
    182 //
    183 //	CTL            = <any US-ASCII control character
    184 //	                 (octets 0 - 31) and DEL (127)>
    185 func isCTL(b byte) bool {
    186 	const del = 0x7f // a CTL
    187 	return b < ' ' || b == del
    188 }
    189 
    190 // ValidHeaderFieldName reports whether v is a valid HTTP/1.x header name.
    191 // HTTP/2 imposes the additional restriction that uppercase ASCII
    192 // letters are not allowed.
    193 //
    194 // RFC 7230 says:
    195 //
    196 //	header-field   = field-name ":" OWS field-value OWS
    197 //	field-name     = token
    198 //	token          = 1*tchar
    199 //	tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
    200 //	        "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
    201 func ValidHeaderFieldName(v string) bool {
    202 	if len(v) == 0 {
    203 		return false
    204 	}
    205 	for _, r := range v {
    206 		if !IsTokenRune(r) {
    207 			return false
    208 		}
    209 	}
    210 	return true
    211 }
    212 
    213 // ValidHostHeader reports whether h is a valid host header.
    214 func ValidHostHeader(h string) bool {
    215 	// The latest spec is actually this:
    216 	//
    217 	// http://tools.ietf.org/html/rfc7230#section-5.4
    218 	//     Host = uri-host [ ":" port ]
    219 	//
    220 	// Where uri-host is:
    221 	//     http://tools.ietf.org/html/rfc3986#section-3.2.2
    222 	//
    223 	// But we're going to be much more lenient for now and just
    224 	// search for any byte that's not a valid byte in any of those
    225 	// expressions.
    226 	for i := 0; i < len(h); i++ {
    227 		if !validHostByte[h[i]] {
    228 			return false
    229 		}
    230 	}
    231 	return true
    232 }
    233 
    234 // See the validHostHeader comment.
    235 var validHostByte = [256]bool{
    236 	'0': true, '1': true, '2': true, '3': true, '4': true, '5': true, '6': true, '7': true,
    237 	'8': true, '9': true,
    238 
    239 	'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 'g': true, 'h': true,
    240 	'i': true, 'j': true, 'k': true, 'l': true, 'm': true, 'n': true, 'o': true, 'p': true,
    241 	'q': true, 'r': true, 's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true,
    242 	'y': true, 'z': true,
    243 
    244 	'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 'G': true, 'H': true,
    245 	'I': true, 'J': true, 'K': true, 'L': true, 'M': true, 'N': true, 'O': true, 'P': true,
    246 	'Q': true, 'R': true, 'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true,
    247 	'Y': true, 'Z': true,
    248 
    249 	'!':  true, // sub-delims
    250 	'$':  true, // sub-delims
    251 	'%':  true, // pct-encoded (and used in IPv6 zones)
    252 	'&':  true, // sub-delims
    253 	'(':  true, // sub-delims
    254 	')':  true, // sub-delims
    255 	'*':  true, // sub-delims
    256 	'+':  true, // sub-delims
    257 	',':  true, // sub-delims
    258 	'-':  true, // unreserved
    259 	'.':  true, // unreserved
    260 	':':  true, // IPv6address + Host expression's optional port
    261 	';':  true, // sub-delims
    262 	'=':  true, // sub-delims
    263 	'[':  true,
    264 	'\'': true, // sub-delims
    265 	']':  true,
    266 	'_':  true, // unreserved
    267 	'~':  true, // unreserved
    268 }
    269 
    270 // ValidHeaderFieldValue reports whether v is a valid "field-value" according to
    271 // http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2 :
    272 //
    273 //	message-header = field-name ":" [ field-value ]
    274 //	field-value    = *( field-content | LWS )
    275 //	field-content  = <the OCTETs making up the field-value
    276 //	                 and consisting of either *TEXT or combinations
    277 //	                 of token, separators, and quoted-string>
    278 //
    279 // http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2 :
    280 //
    281 //	TEXT           = <any OCTET except CTLs,
    282 //	                  but including LWS>
    283 //	LWS            = [CRLF] 1*( SP | HT )
    284 //	CTL            = <any US-ASCII control character
    285 //	                 (octets 0 - 31) and DEL (127)>
    286 //
    287 // RFC 7230 says:
    288 //
    289 //	field-value    = *( field-content / obs-fold )
    290 //	obj-fold       =  N/A to http2, and deprecated
    291 //	field-content  = field-vchar [ 1*( SP / HTAB ) field-vchar ]
    292 //	field-vchar    = VCHAR / obs-text
    293 //	obs-text       = %x80-FF
    294 //	VCHAR          = "any visible [USASCII] character"
    295 //
    296 // http2 further says: "Similarly, HTTP/2 allows header field values
    297 // that are not valid. While most of the values that can be encoded
    298 // will not alter header field parsing, carriage return (CR, ASCII
    299 // 0xd), line feed (LF, ASCII 0xa), and the zero character (NUL, ASCII
    300 // 0x0) might be exploited by an attacker if they are translated
    301 // verbatim. Any request or response that contains a character not
    302 // permitted in a header field value MUST be treated as malformed
    303 // (Section 8.1.2.6). Valid characters are defined by the
    304 // field-content ABNF rule in Section 3.2 of [RFC7230]."
    305 //
    306 // This function does not (yet?) properly handle the rejection of
    307 // strings that begin or end with SP or HTAB.
    308 func ValidHeaderFieldValue(v string) bool {
    309 	for i := 0; i < len(v); i++ {
    310 		b := v[i]
    311 		if isCTL(b) && !isLWS(b) {
    312 			return false
    313 		}
    314 	}
    315 	return true
    316 }
    317 
    318 func isASCII(s string) bool {
    319 	for i := 0; i < len(s); i++ {
    320 		if s[i] >= utf8.RuneSelf {
    321 			return false
    322 		}
    323 	}
    324 	return true
    325 }
    326 
    327 // PunycodeHostPort returns the IDNA Punycode version
    328 // of the provided "host" or "host:port" string.
    329 func PunycodeHostPort(v string) (string, error) {
    330 	if isASCII(v) {
    331 		return v, nil
    332 	}
    333 
    334 	host, port, err := net.SplitHostPort(v)
    335 	if err != nil {
    336 		// The input 'v' argument was just a "host" argument,
    337 		// without a port. This error should not be returned
    338 		// to the caller.
    339 		host = v
    340 		port = ""
    341 	}
    342 	host, err = idna.ToASCII(host)
    343 	if err != nil {
    344 		// Non-UTF-8? Not representable in Punycode, in any
    345 		// case.
    346 		return "", err
    347 	}
    348 	if port == "" {
    349 		return host, nil
    350 	}
    351 	return net.JoinHostPort(host, port), nil
    352 }
	gtsocial-umbx Unnamed repository; edit this file 'description' to name the repository.
	Log \| Files \| Refs \| README \| LICENSE