gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

utils.go (14394B)


      1 /*
      2  * MinIO Go Library for Amazon S3 Compatible Cloud Storage
      3  * Copyright 2015-2020 MinIO, Inc.
      4  *
      5  * Licensed under the Apache License, Version 2.0 (the "License");
      6  * you may not use this file except in compliance with the License.
      7  * You may obtain a copy of the License at
      8  *
      9  *     http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  */
     17 
     18 package s3utils
     19 
     20 import (
     21 	"bytes"
     22 	"encoding/hex"
     23 	"errors"
     24 	"net"
     25 	"net/url"
     26 	"regexp"
     27 	"sort"
     28 	"strings"
     29 	"unicode/utf8"
     30 )
     31 
     32 // Sentinel URL is the default url value which is invalid.
     33 var sentinelURL = url.URL{}
     34 
     35 // IsValidDomain validates if input string is a valid domain name.
     36 func IsValidDomain(host string) bool {
     37 	// See RFC 1035, RFC 3696.
     38 	host = strings.TrimSpace(host)
     39 	if len(host) == 0 || len(host) > 255 {
     40 		return false
     41 	}
     42 	// host cannot start or end with "-"
     43 	if host[len(host)-1:] == "-" || host[:1] == "-" {
     44 		return false
     45 	}
     46 	// host cannot start or end with "_"
     47 	if host[len(host)-1:] == "_" || host[:1] == "_" {
     48 		return false
     49 	}
     50 	// host cannot start with a "."
     51 	if host[:1] == "." {
     52 		return false
     53 	}
     54 	// All non alphanumeric characters are invalid.
     55 	if strings.ContainsAny(host, "`~!@#$%^&*()+={}[]|\\\"';:><?/") {
     56 		return false
     57 	}
     58 	// No need to regexp match, since the list is non-exhaustive.
     59 	// We let it valid and fail later.
     60 	return true
     61 }
     62 
     63 // IsValidIP parses input string for ip address validity.
     64 func IsValidIP(ip string) bool {
     65 	return net.ParseIP(ip) != nil
     66 }
     67 
     68 // IsVirtualHostSupported - verifies if bucketName can be part of
     69 // virtual host. Currently only Amazon S3 and Google Cloud Storage
     70 // would support this.
     71 func IsVirtualHostSupported(endpointURL url.URL, bucketName string) bool {
     72 	if endpointURL == sentinelURL {
     73 		return false
     74 	}
     75 	// bucketName can be valid but '.' in the hostname will fail SSL
     76 	// certificate validation. So do not use host-style for such buckets.
     77 	if endpointURL.Scheme == "https" && strings.Contains(bucketName, ".") {
     78 		return false
     79 	}
     80 	// Return true for all other cases
     81 	return IsAmazonEndpoint(endpointURL) || IsGoogleEndpoint(endpointURL) || IsAliyunOSSEndpoint(endpointURL)
     82 }
     83 
     84 // Refer for region styles - https://docs.aws.amazon.com/general/latest/gr/rande.html#s3_region
     85 
     86 // amazonS3HostHyphen - regular expression used to determine if an arg is s3 host in hyphenated style.
     87 var amazonS3HostHyphen = regexp.MustCompile(`^s3-(.*?).amazonaws.com$`)
     88 
     89 // amazonS3HostDualStack - regular expression used to determine if an arg is s3 host dualstack.
     90 var amazonS3HostDualStack = regexp.MustCompile(`^s3.dualstack.(.*?).amazonaws.com$`)
     91 
     92 // amazonS3HostFIPS - regular expression used to determine if an arg is s3 FIPS host.
     93 var amazonS3HostFIPS = regexp.MustCompile(`^s3-fips.(.*?).amazonaws.com$`)
     94 
     95 // amazonS3HostFIPSDualStack - regular expression used to determine if an arg is s3 FIPS host dualstack.
     96 var amazonS3HostFIPSDualStack = regexp.MustCompile(`^s3-fips.dualstack.(.*?).amazonaws.com$`)
     97 
     98 // amazonS3HostDot - regular expression used to determine if an arg is s3 host in . style.
     99 var amazonS3HostDot = regexp.MustCompile(`^s3.(.*?).amazonaws.com$`)
    100 
    101 // amazonS3ChinaHost - regular expression used to determine if the arg is s3 china host.
    102 var amazonS3ChinaHost = regexp.MustCompile(`^s3.(cn.*?).amazonaws.com.cn$`)
    103 
    104 // amazonS3ChinaHostDualStack - regular expression used to determine if the arg is s3 china host dualstack.
    105 var amazonS3ChinaHostDualStack = regexp.MustCompile(`^s3.dualstack.(cn.*?).amazonaws.com.cn$`)
    106 
    107 // Regular expression used to determine if the arg is elb host.
    108 var elbAmazonRegex = regexp.MustCompile(`elb(.*?).amazonaws.com$`)
    109 
    110 // Regular expression used to determine if the arg is elb host in china.
    111 var elbAmazonCnRegex = regexp.MustCompile(`elb(.*?).amazonaws.com.cn$`)
    112 
    113 // amazonS3HostPrivateLink - regular expression used to determine if an arg is s3 host in AWS PrivateLink interface endpoints style
    114 var amazonS3HostPrivateLink = regexp.MustCompile(`^(?:bucket|accesspoint).vpce-.*?.s3.(.*?).vpce.amazonaws.com$`)
    115 
    116 // GetRegionFromURL - returns a region from url host.
    117 func GetRegionFromURL(endpointURL url.URL) string {
    118 	if endpointURL == sentinelURL {
    119 		return ""
    120 	}
    121 	if endpointURL.Host == "s3-external-1.amazonaws.com" {
    122 		return ""
    123 	}
    124 	if IsAmazonGovCloudEndpoint(endpointURL) {
    125 		return "us-gov-west-1"
    126 	}
    127 	// if elb's are used we cannot calculate which region it may be, just return empty.
    128 	if elbAmazonRegex.MatchString(endpointURL.Host) || elbAmazonCnRegex.MatchString(endpointURL.Host) {
    129 		return ""
    130 	}
    131 	parts := amazonS3HostDualStack.FindStringSubmatch(endpointURL.Host)
    132 	if len(parts) > 1 {
    133 		return parts[1]
    134 	}
    135 	if IsAmazonFIPSUSEastWestEndpoint(endpointURL) {
    136 		// We check for FIPS dualstack matching first to avoid the non-greedy
    137 		// regex for FIPS non-dualstack matching a dualstack URL
    138 		parts = amazonS3HostFIPSDualStack.FindStringSubmatch(endpointURL.Host)
    139 		if len(parts) > 1 {
    140 			return parts[1]
    141 		}
    142 		parts = amazonS3HostFIPS.FindStringSubmatch(endpointURL.Host)
    143 		if len(parts) > 1 {
    144 			return parts[1]
    145 		}
    146 	}
    147 	parts = amazonS3HostHyphen.FindStringSubmatch(endpointURL.Host)
    148 	if len(parts) > 1 {
    149 		return parts[1]
    150 	}
    151 	parts = amazonS3ChinaHost.FindStringSubmatch(endpointURL.Host)
    152 	if len(parts) > 1 {
    153 		return parts[1]
    154 	}
    155 	parts = amazonS3ChinaHostDualStack.FindStringSubmatch(endpointURL.Host)
    156 	if len(parts) > 1 {
    157 		return parts[1]
    158 	}
    159 	parts = amazonS3HostDot.FindStringSubmatch(endpointURL.Host)
    160 	if len(parts) > 1 {
    161 		return parts[1]
    162 	}
    163 	parts = amazonS3HostPrivateLink.FindStringSubmatch(endpointURL.Host)
    164 	if len(parts) > 1 {
    165 		return parts[1]
    166 	}
    167 	return ""
    168 }
    169 
    170 // IsAliyunOSSEndpoint - Match if it is exactly Aliyun OSS endpoint.
    171 func IsAliyunOSSEndpoint(endpointURL url.URL) bool {
    172 	return strings.HasSuffix(endpointURL.Host, "aliyuncs.com")
    173 }
    174 
    175 // IsAmazonEndpoint - Match if it is exactly Amazon S3 endpoint.
    176 func IsAmazonEndpoint(endpointURL url.URL) bool {
    177 	if endpointURL.Host == "s3-external-1.amazonaws.com" || endpointURL.Host == "s3.amazonaws.com" {
    178 		return true
    179 	}
    180 	return GetRegionFromURL(endpointURL) != ""
    181 }
    182 
    183 // IsAmazonGovCloudEndpoint - Match if it is exactly Amazon S3 GovCloud endpoint.
    184 func IsAmazonGovCloudEndpoint(endpointURL url.URL) bool {
    185 	if endpointURL == sentinelURL {
    186 		return false
    187 	}
    188 	return (endpointURL.Host == "s3-us-gov-west-1.amazonaws.com" ||
    189 		IsAmazonFIPSGovCloudEndpoint(endpointURL))
    190 }
    191 
    192 // IsAmazonFIPSGovCloudEndpoint - Match if it is exactly Amazon S3 FIPS GovCloud endpoint.
    193 // See https://aws.amazon.com/compliance/fips.
    194 func IsAmazonFIPSGovCloudEndpoint(endpointURL url.URL) bool {
    195 	if endpointURL == sentinelURL {
    196 		return false
    197 	}
    198 	return endpointURL.Host == "s3-fips-us-gov-west-1.amazonaws.com" ||
    199 		endpointURL.Host == "s3-fips.us-gov-west-1.amazonaws.com" ||
    200 		endpointURL.Host == "s3-fips.dualstack.us-gov-west-1.amazonaws.com"
    201 }
    202 
    203 // IsAmazonFIPSUSEastWestEndpoint - Match if it is exactly Amazon S3 FIPS US East/West endpoint.
    204 // See https://aws.amazon.com/compliance/fips.
    205 func IsAmazonFIPSUSEastWestEndpoint(endpointURL url.URL) bool {
    206 	if endpointURL == sentinelURL {
    207 		return false
    208 	}
    209 	switch endpointURL.Host {
    210 	case "s3-fips.us-east-2.amazonaws.com":
    211 	case "s3-fips.dualstack.us-west-1.amazonaws.com":
    212 	case "s3-fips.dualstack.us-west-2.amazonaws.com":
    213 	case "s3-fips.dualstack.us-east-2.amazonaws.com":
    214 	case "s3-fips.dualstack.us-east-1.amazonaws.com":
    215 	case "s3-fips.us-west-1.amazonaws.com":
    216 	case "s3-fips.us-west-2.amazonaws.com":
    217 	case "s3-fips.us-east-1.amazonaws.com":
    218 	default:
    219 		return false
    220 	}
    221 	return true
    222 }
    223 
    224 // IsAmazonFIPSEndpoint - Match if it is exactly Amazon S3 FIPS endpoint.
    225 // See https://aws.amazon.com/compliance/fips.
    226 func IsAmazonFIPSEndpoint(endpointURL url.URL) bool {
    227 	return IsAmazonFIPSUSEastWestEndpoint(endpointURL) || IsAmazonFIPSGovCloudEndpoint(endpointURL)
    228 }
    229 
    230 // IsAmazonPrivateLinkEndpoint - Match if it is exactly Amazon S3 PrivateLink interface endpoint
    231 // See https://docs.aws.amazon.com/AmazonS3/latest/userguide/privatelink-interface-endpoints.html.
    232 func IsAmazonPrivateLinkEndpoint(endpointURL url.URL) bool {
    233 	if endpointURL == sentinelURL {
    234 		return false
    235 	}
    236 	return amazonS3HostPrivateLink.MatchString(endpointURL.Host)
    237 }
    238 
    239 // IsGoogleEndpoint - Match if it is exactly Google cloud storage endpoint.
    240 func IsGoogleEndpoint(endpointURL url.URL) bool {
    241 	if endpointURL == sentinelURL {
    242 		return false
    243 	}
    244 	return endpointURL.Host == "storage.googleapis.com"
    245 }
    246 
    247 // Expects ascii encoded strings - from output of urlEncodePath
    248 func percentEncodeSlash(s string) string {
    249 	return strings.ReplaceAll(s, "/", "%2F")
    250 }
    251 
    252 // QueryEncode - encodes query values in their URL encoded form. In
    253 // addition to the percent encoding performed by urlEncodePath() used
    254 // here, it also percent encodes '/' (forward slash)
    255 func QueryEncode(v url.Values) string {
    256 	if v == nil {
    257 		return ""
    258 	}
    259 	var buf bytes.Buffer
    260 	keys := make([]string, 0, len(v))
    261 	for k := range v {
    262 		keys = append(keys, k)
    263 	}
    264 	sort.Strings(keys)
    265 	for _, k := range keys {
    266 		vs := v[k]
    267 		prefix := percentEncodeSlash(EncodePath(k)) + "="
    268 		for _, v := range vs {
    269 			if buf.Len() > 0 {
    270 				buf.WriteByte('&')
    271 			}
    272 			buf.WriteString(prefix)
    273 			buf.WriteString(percentEncodeSlash(EncodePath(v)))
    274 		}
    275 	}
    276 	return buf.String()
    277 }
    278 
    279 // TagDecode - decodes canonical tag into map of key and value.
    280 func TagDecode(ctag string) map[string]string {
    281 	if ctag == "" {
    282 		return map[string]string{}
    283 	}
    284 	tags := strings.Split(ctag, "&")
    285 	tagMap := make(map[string]string, len(tags))
    286 	var err error
    287 	for _, tag := range tags {
    288 		kvs := strings.SplitN(tag, "=", 2)
    289 		if len(kvs) == 0 {
    290 			return map[string]string{}
    291 		}
    292 		if len(kvs) == 1 {
    293 			return map[string]string{}
    294 		}
    295 		tagMap[kvs[0]], err = url.PathUnescape(kvs[1])
    296 		if err != nil {
    297 			continue
    298 		}
    299 	}
    300 	return tagMap
    301 }
    302 
    303 // TagEncode - encodes tag values in their URL encoded form. In
    304 // addition to the percent encoding performed by urlEncodePath() used
    305 // here, it also percent encodes '/' (forward slash)
    306 func TagEncode(tags map[string]string) string {
    307 	if tags == nil {
    308 		return ""
    309 	}
    310 	values := url.Values{}
    311 	for k, v := range tags {
    312 		values[k] = []string{v}
    313 	}
    314 	return QueryEncode(values)
    315 }
    316 
    317 // if object matches reserved string, no need to encode them
    318 var reservedObjectNames = regexp.MustCompile("^[a-zA-Z0-9-_.~/]+$")
    319 
    320 // EncodePath encode the strings from UTF-8 byte representations to HTML hex escape sequences
    321 //
    322 // This is necessary since regular url.Parse() and url.Encode() functions do not support UTF-8
    323 // non english characters cannot be parsed due to the nature in which url.Encode() is written
    324 //
    325 // This function on the other hand is a direct replacement for url.Encode() technique to support
    326 // pretty much every UTF-8 character.
    327 func EncodePath(pathName string) string {
    328 	if reservedObjectNames.MatchString(pathName) {
    329 		return pathName
    330 	}
    331 	var encodedPathname strings.Builder
    332 	for _, s := range pathName {
    333 		if 'A' <= s && s <= 'Z' || 'a' <= s && s <= 'z' || '0' <= s && s <= '9' { // §2.3 Unreserved characters (mark)
    334 			encodedPathname.WriteRune(s)
    335 			continue
    336 		}
    337 		switch s {
    338 		case '-', '_', '.', '~', '/': // §2.3 Unreserved characters (mark)
    339 			encodedPathname.WriteRune(s)
    340 			continue
    341 		default:
    342 			l := utf8.RuneLen(s)
    343 			if l < 0 {
    344 				// if utf8 cannot convert return the same string as is
    345 				return pathName
    346 			}
    347 			u := make([]byte, l)
    348 			utf8.EncodeRune(u, s)
    349 			for _, r := range u {
    350 				hex := hex.EncodeToString([]byte{r})
    351 				encodedPathname.WriteString("%" + strings.ToUpper(hex))
    352 			}
    353 		}
    354 	}
    355 	return encodedPathname.String()
    356 }
    357 
    358 // We support '.' with bucket names but we fallback to using path
    359 // style requests instead for such buckets.
    360 var (
    361 	validBucketName       = regexp.MustCompile(`^[A-Za-z0-9][A-Za-z0-9\.\-\_\:]{1,61}[A-Za-z0-9]$`)
    362 	validBucketNameStrict = regexp.MustCompile(`^[a-z0-9][a-z0-9\.\-]{1,61}[a-z0-9]$`)
    363 	ipAddress             = regexp.MustCompile(`^(\d+\.){3}\d+$`)
    364 )
    365 
    366 // Common checker for both stricter and basic validation.
    367 func checkBucketNameCommon(bucketName string, strict bool) (err error) {
    368 	if strings.TrimSpace(bucketName) == "" {
    369 		return errors.New("Bucket name cannot be empty")
    370 	}
    371 	if len(bucketName) < 3 {
    372 		return errors.New("Bucket name cannot be shorter than 3 characters")
    373 	}
    374 	if len(bucketName) > 63 {
    375 		return errors.New("Bucket name cannot be longer than 63 characters")
    376 	}
    377 	if ipAddress.MatchString(bucketName) {
    378 		return errors.New("Bucket name cannot be an ip address")
    379 	}
    380 	if strings.Contains(bucketName, "..") || strings.Contains(bucketName, ".-") || strings.Contains(bucketName, "-.") {
    381 		return errors.New("Bucket name contains invalid characters")
    382 	}
    383 	if strict {
    384 		if !validBucketNameStrict.MatchString(bucketName) {
    385 			err = errors.New("Bucket name contains invalid characters")
    386 		}
    387 		return err
    388 	}
    389 	if !validBucketName.MatchString(bucketName) {
    390 		err = errors.New("Bucket name contains invalid characters")
    391 	}
    392 	return err
    393 }
    394 
    395 // CheckValidBucketName - checks if we have a valid input bucket name.
    396 func CheckValidBucketName(bucketName string) (err error) {
    397 	return checkBucketNameCommon(bucketName, false)
    398 }
    399 
    400 // CheckValidBucketNameStrict - checks if we have a valid input bucket name.
    401 // This is a stricter version.
    402 // - http://docs.aws.amazon.com/AmazonS3/latest/dev/UsingBucket.html
    403 func CheckValidBucketNameStrict(bucketName string) (err error) {
    404 	return checkBucketNameCommon(bucketName, true)
    405 }
    406 
    407 // CheckValidObjectNamePrefix - checks if we have a valid input object name prefix.
    408 //   - http://docs.aws.amazon.com/AmazonS3/latest/dev/UsingMetadata.html
    409 func CheckValidObjectNamePrefix(objectName string) error {
    410 	if len(objectName) > 1024 {
    411 		return errors.New("Object name cannot be longer than 1024 characters")
    412 	}
    413 	if !utf8.ValidString(objectName) {
    414 		return errors.New("Object name with non UTF-8 strings are not supported")
    415 	}
    416 	return nil
    417 }
    418 
    419 // CheckValidObjectName - checks if we have a valid input object name.
    420 //   - http://docs.aws.amazon.com/AmazonS3/latest/dev/UsingMetadata.html
    421 func CheckValidObjectName(objectName string) error {
    422 	if strings.TrimSpace(objectName) == "" {
    423 		return errors.New("Object name cannot be empty")
    424 	}
    425 	return CheckValidObjectNamePrefix(objectName)
    426 }