utils.go (14394B)
1 /* 2 * MinIO Go Library for Amazon S3 Compatible Cloud Storage 3 * Copyright 2015-2020 MinIO, Inc. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package s3utils 19 20 import ( 21 "bytes" 22 "encoding/hex" 23 "errors" 24 "net" 25 "net/url" 26 "regexp" 27 "sort" 28 "strings" 29 "unicode/utf8" 30 ) 31 32 // Sentinel URL is the default url value which is invalid. 33 var sentinelURL = url.URL{} 34 35 // IsValidDomain validates if input string is a valid domain name. 36 func IsValidDomain(host string) bool { 37 // See RFC 1035, RFC 3696. 38 host = strings.TrimSpace(host) 39 if len(host) == 0 || len(host) > 255 { 40 return false 41 } 42 // host cannot start or end with "-" 43 if host[len(host)-1:] == "-" || host[:1] == "-" { 44 return false 45 } 46 // host cannot start or end with "_" 47 if host[len(host)-1:] == "_" || host[:1] == "_" { 48 return false 49 } 50 // host cannot start with a "." 51 if host[:1] == "." { 52 return false 53 } 54 // All non alphanumeric characters are invalid. 55 if strings.ContainsAny(host, "`~!@#$%^&*()+={}[]|\\\"';:><?/") { 56 return false 57 } 58 // No need to regexp match, since the list is non-exhaustive. 59 // We let it valid and fail later. 60 return true 61 } 62 63 // IsValidIP parses input string for ip address validity. 64 func IsValidIP(ip string) bool { 65 return net.ParseIP(ip) != nil 66 } 67 68 // IsVirtualHostSupported - verifies if bucketName can be part of 69 // virtual host. Currently only Amazon S3 and Google Cloud Storage 70 // would support this. 71 func IsVirtualHostSupported(endpointURL url.URL, bucketName string) bool { 72 if endpointURL == sentinelURL { 73 return false 74 } 75 // bucketName can be valid but '.' in the hostname will fail SSL 76 // certificate validation. So do not use host-style for such buckets. 77 if endpointURL.Scheme == "https" && strings.Contains(bucketName, ".") { 78 return false 79 } 80 // Return true for all other cases 81 return IsAmazonEndpoint(endpointURL) || IsGoogleEndpoint(endpointURL) || IsAliyunOSSEndpoint(endpointURL) 82 } 83 84 // Refer for region styles - https://docs.aws.amazon.com/general/latest/gr/rande.html#s3_region 85 86 // amazonS3HostHyphen - regular expression used to determine if an arg is s3 host in hyphenated style. 87 var amazonS3HostHyphen = regexp.MustCompile(`^s3-(.*?).amazonaws.com$`) 88 89 // amazonS3HostDualStack - regular expression used to determine if an arg is s3 host dualstack. 90 var amazonS3HostDualStack = regexp.MustCompile(`^s3.dualstack.(.*?).amazonaws.com$`) 91 92 // amazonS3HostFIPS - regular expression used to determine if an arg is s3 FIPS host. 93 var amazonS3HostFIPS = regexp.MustCompile(`^s3-fips.(.*?).amazonaws.com$`) 94 95 // amazonS3HostFIPSDualStack - regular expression used to determine if an arg is s3 FIPS host dualstack. 96 var amazonS3HostFIPSDualStack = regexp.MustCompile(`^s3-fips.dualstack.(.*?).amazonaws.com$`) 97 98 // amazonS3HostDot - regular expression used to determine if an arg is s3 host in . style. 99 var amazonS3HostDot = regexp.MustCompile(`^s3.(.*?).amazonaws.com$`) 100 101 // amazonS3ChinaHost - regular expression used to determine if the arg is s3 china host. 102 var amazonS3ChinaHost = regexp.MustCompile(`^s3.(cn.*?).amazonaws.com.cn$`) 103 104 // amazonS3ChinaHostDualStack - regular expression used to determine if the arg is s3 china host dualstack. 105 var amazonS3ChinaHostDualStack = regexp.MustCompile(`^s3.dualstack.(cn.*?).amazonaws.com.cn$`) 106 107 // Regular expression used to determine if the arg is elb host. 108 var elbAmazonRegex = regexp.MustCompile(`elb(.*?).amazonaws.com$`) 109 110 // Regular expression used to determine if the arg is elb host in china. 111 var elbAmazonCnRegex = regexp.MustCompile(`elb(.*?).amazonaws.com.cn$`) 112 113 // amazonS3HostPrivateLink - regular expression used to determine if an arg is s3 host in AWS PrivateLink interface endpoints style 114 var amazonS3HostPrivateLink = regexp.MustCompile(`^(?:bucket|accesspoint).vpce-.*?.s3.(.*?).vpce.amazonaws.com$`) 115 116 // GetRegionFromURL - returns a region from url host. 117 func GetRegionFromURL(endpointURL url.URL) string { 118 if endpointURL == sentinelURL { 119 return "" 120 } 121 if endpointURL.Host == "s3-external-1.amazonaws.com" { 122 return "" 123 } 124 if IsAmazonGovCloudEndpoint(endpointURL) { 125 return "us-gov-west-1" 126 } 127 // if elb's are used we cannot calculate which region it may be, just return empty. 128 if elbAmazonRegex.MatchString(endpointURL.Host) || elbAmazonCnRegex.MatchString(endpointURL.Host) { 129 return "" 130 } 131 parts := amazonS3HostDualStack.FindStringSubmatch(endpointURL.Host) 132 if len(parts) > 1 { 133 return parts[1] 134 } 135 if IsAmazonFIPSUSEastWestEndpoint(endpointURL) { 136 // We check for FIPS dualstack matching first to avoid the non-greedy 137 // regex for FIPS non-dualstack matching a dualstack URL 138 parts = amazonS3HostFIPSDualStack.FindStringSubmatch(endpointURL.Host) 139 if len(parts) > 1 { 140 return parts[1] 141 } 142 parts = amazonS3HostFIPS.FindStringSubmatch(endpointURL.Host) 143 if len(parts) > 1 { 144 return parts[1] 145 } 146 } 147 parts = amazonS3HostHyphen.FindStringSubmatch(endpointURL.Host) 148 if len(parts) > 1 { 149 return parts[1] 150 } 151 parts = amazonS3ChinaHost.FindStringSubmatch(endpointURL.Host) 152 if len(parts) > 1 { 153 return parts[1] 154 } 155 parts = amazonS3ChinaHostDualStack.FindStringSubmatch(endpointURL.Host) 156 if len(parts) > 1 { 157 return parts[1] 158 } 159 parts = amazonS3HostDot.FindStringSubmatch(endpointURL.Host) 160 if len(parts) > 1 { 161 return parts[1] 162 } 163 parts = amazonS3HostPrivateLink.FindStringSubmatch(endpointURL.Host) 164 if len(parts) > 1 { 165 return parts[1] 166 } 167 return "" 168 } 169 170 // IsAliyunOSSEndpoint - Match if it is exactly Aliyun OSS endpoint. 171 func IsAliyunOSSEndpoint(endpointURL url.URL) bool { 172 return strings.HasSuffix(endpointURL.Host, "aliyuncs.com") 173 } 174 175 // IsAmazonEndpoint - Match if it is exactly Amazon S3 endpoint. 176 func IsAmazonEndpoint(endpointURL url.URL) bool { 177 if endpointURL.Host == "s3-external-1.amazonaws.com" || endpointURL.Host == "s3.amazonaws.com" { 178 return true 179 } 180 return GetRegionFromURL(endpointURL) != "" 181 } 182 183 // IsAmazonGovCloudEndpoint - Match if it is exactly Amazon S3 GovCloud endpoint. 184 func IsAmazonGovCloudEndpoint(endpointURL url.URL) bool { 185 if endpointURL == sentinelURL { 186 return false 187 } 188 return (endpointURL.Host == "s3-us-gov-west-1.amazonaws.com" || 189 IsAmazonFIPSGovCloudEndpoint(endpointURL)) 190 } 191 192 // IsAmazonFIPSGovCloudEndpoint - Match if it is exactly Amazon S3 FIPS GovCloud endpoint. 193 // See https://aws.amazon.com/compliance/fips. 194 func IsAmazonFIPSGovCloudEndpoint(endpointURL url.URL) bool { 195 if endpointURL == sentinelURL { 196 return false 197 } 198 return endpointURL.Host == "s3-fips-us-gov-west-1.amazonaws.com" || 199 endpointURL.Host == "s3-fips.us-gov-west-1.amazonaws.com" || 200 endpointURL.Host == "s3-fips.dualstack.us-gov-west-1.amazonaws.com" 201 } 202 203 // IsAmazonFIPSUSEastWestEndpoint - Match if it is exactly Amazon S3 FIPS US East/West endpoint. 204 // See https://aws.amazon.com/compliance/fips. 205 func IsAmazonFIPSUSEastWestEndpoint(endpointURL url.URL) bool { 206 if endpointURL == sentinelURL { 207 return false 208 } 209 switch endpointURL.Host { 210 case "s3-fips.us-east-2.amazonaws.com": 211 case "s3-fips.dualstack.us-west-1.amazonaws.com": 212 case "s3-fips.dualstack.us-west-2.amazonaws.com": 213 case "s3-fips.dualstack.us-east-2.amazonaws.com": 214 case "s3-fips.dualstack.us-east-1.amazonaws.com": 215 case "s3-fips.us-west-1.amazonaws.com": 216 case "s3-fips.us-west-2.amazonaws.com": 217 case "s3-fips.us-east-1.amazonaws.com": 218 default: 219 return false 220 } 221 return true 222 } 223 224 // IsAmazonFIPSEndpoint - Match if it is exactly Amazon S3 FIPS endpoint. 225 // See https://aws.amazon.com/compliance/fips. 226 func IsAmazonFIPSEndpoint(endpointURL url.URL) bool { 227 return IsAmazonFIPSUSEastWestEndpoint(endpointURL) || IsAmazonFIPSGovCloudEndpoint(endpointURL) 228 } 229 230 // IsAmazonPrivateLinkEndpoint - Match if it is exactly Amazon S3 PrivateLink interface endpoint 231 // See https://docs.aws.amazon.com/AmazonS3/latest/userguide/privatelink-interface-endpoints.html. 232 func IsAmazonPrivateLinkEndpoint(endpointURL url.URL) bool { 233 if endpointURL == sentinelURL { 234 return false 235 } 236 return amazonS3HostPrivateLink.MatchString(endpointURL.Host) 237 } 238 239 // IsGoogleEndpoint - Match if it is exactly Google cloud storage endpoint. 240 func IsGoogleEndpoint(endpointURL url.URL) bool { 241 if endpointURL == sentinelURL { 242 return false 243 } 244 return endpointURL.Host == "storage.googleapis.com" 245 } 246 247 // Expects ascii encoded strings - from output of urlEncodePath 248 func percentEncodeSlash(s string) string { 249 return strings.ReplaceAll(s, "/", "%2F") 250 } 251 252 // QueryEncode - encodes query values in their URL encoded form. In 253 // addition to the percent encoding performed by urlEncodePath() used 254 // here, it also percent encodes '/' (forward slash) 255 func QueryEncode(v url.Values) string { 256 if v == nil { 257 return "" 258 } 259 var buf bytes.Buffer 260 keys := make([]string, 0, len(v)) 261 for k := range v { 262 keys = append(keys, k) 263 } 264 sort.Strings(keys) 265 for _, k := range keys { 266 vs := v[k] 267 prefix := percentEncodeSlash(EncodePath(k)) + "=" 268 for _, v := range vs { 269 if buf.Len() > 0 { 270 buf.WriteByte('&') 271 } 272 buf.WriteString(prefix) 273 buf.WriteString(percentEncodeSlash(EncodePath(v))) 274 } 275 } 276 return buf.String() 277 } 278 279 // TagDecode - decodes canonical tag into map of key and value. 280 func TagDecode(ctag string) map[string]string { 281 if ctag == "" { 282 return map[string]string{} 283 } 284 tags := strings.Split(ctag, "&") 285 tagMap := make(map[string]string, len(tags)) 286 var err error 287 for _, tag := range tags { 288 kvs := strings.SplitN(tag, "=", 2) 289 if len(kvs) == 0 { 290 return map[string]string{} 291 } 292 if len(kvs) == 1 { 293 return map[string]string{} 294 } 295 tagMap[kvs[0]], err = url.PathUnescape(kvs[1]) 296 if err != nil { 297 continue 298 } 299 } 300 return tagMap 301 } 302 303 // TagEncode - encodes tag values in their URL encoded form. In 304 // addition to the percent encoding performed by urlEncodePath() used 305 // here, it also percent encodes '/' (forward slash) 306 func TagEncode(tags map[string]string) string { 307 if tags == nil { 308 return "" 309 } 310 values := url.Values{} 311 for k, v := range tags { 312 values[k] = []string{v} 313 } 314 return QueryEncode(values) 315 } 316 317 // if object matches reserved string, no need to encode them 318 var reservedObjectNames = regexp.MustCompile("^[a-zA-Z0-9-_.~/]+$") 319 320 // EncodePath encode the strings from UTF-8 byte representations to HTML hex escape sequences 321 // 322 // This is necessary since regular url.Parse() and url.Encode() functions do not support UTF-8 323 // non english characters cannot be parsed due to the nature in which url.Encode() is written 324 // 325 // This function on the other hand is a direct replacement for url.Encode() technique to support 326 // pretty much every UTF-8 character. 327 func EncodePath(pathName string) string { 328 if reservedObjectNames.MatchString(pathName) { 329 return pathName 330 } 331 var encodedPathname strings.Builder 332 for _, s := range pathName { 333 if 'A' <= s && s <= 'Z' || 'a' <= s && s <= 'z' || '0' <= s && s <= '9' { // §2.3 Unreserved characters (mark) 334 encodedPathname.WriteRune(s) 335 continue 336 } 337 switch s { 338 case '-', '_', '.', '~', '/': // §2.3 Unreserved characters (mark) 339 encodedPathname.WriteRune(s) 340 continue 341 default: 342 l := utf8.RuneLen(s) 343 if l < 0 { 344 // if utf8 cannot convert return the same string as is 345 return pathName 346 } 347 u := make([]byte, l) 348 utf8.EncodeRune(u, s) 349 for _, r := range u { 350 hex := hex.EncodeToString([]byte{r}) 351 encodedPathname.WriteString("%" + strings.ToUpper(hex)) 352 } 353 } 354 } 355 return encodedPathname.String() 356 } 357 358 // We support '.' with bucket names but we fallback to using path 359 // style requests instead for such buckets. 360 var ( 361 validBucketName = regexp.MustCompile(`^[A-Za-z0-9][A-Za-z0-9\.\-\_\:]{1,61}[A-Za-z0-9]$`) 362 validBucketNameStrict = regexp.MustCompile(`^[a-z0-9][a-z0-9\.\-]{1,61}[a-z0-9]$`) 363 ipAddress = regexp.MustCompile(`^(\d+\.){3}\d+$`) 364 ) 365 366 // Common checker for both stricter and basic validation. 367 func checkBucketNameCommon(bucketName string, strict bool) (err error) { 368 if strings.TrimSpace(bucketName) == "" { 369 return errors.New("Bucket name cannot be empty") 370 } 371 if len(bucketName) < 3 { 372 return errors.New("Bucket name cannot be shorter than 3 characters") 373 } 374 if len(bucketName) > 63 { 375 return errors.New("Bucket name cannot be longer than 63 characters") 376 } 377 if ipAddress.MatchString(bucketName) { 378 return errors.New("Bucket name cannot be an ip address") 379 } 380 if strings.Contains(bucketName, "..") || strings.Contains(bucketName, ".-") || strings.Contains(bucketName, "-.") { 381 return errors.New("Bucket name contains invalid characters") 382 } 383 if strict { 384 if !validBucketNameStrict.MatchString(bucketName) { 385 err = errors.New("Bucket name contains invalid characters") 386 } 387 return err 388 } 389 if !validBucketName.MatchString(bucketName) { 390 err = errors.New("Bucket name contains invalid characters") 391 } 392 return err 393 } 394 395 // CheckValidBucketName - checks if we have a valid input bucket name. 396 func CheckValidBucketName(bucketName string) (err error) { 397 return checkBucketNameCommon(bucketName, false) 398 } 399 400 // CheckValidBucketNameStrict - checks if we have a valid input bucket name. 401 // This is a stricter version. 402 // - http://docs.aws.amazon.com/AmazonS3/latest/dev/UsingBucket.html 403 func CheckValidBucketNameStrict(bucketName string) (err error) { 404 return checkBucketNameCommon(bucketName, true) 405 } 406 407 // CheckValidObjectNamePrefix - checks if we have a valid input object name prefix. 408 // - http://docs.aws.amazon.com/AmazonS3/latest/dev/UsingMetadata.html 409 func CheckValidObjectNamePrefix(objectName string) error { 410 if len(objectName) > 1024 { 411 return errors.New("Object name cannot be longer than 1024 characters") 412 } 413 if !utf8.ValidString(objectName) { 414 return errors.New("Object name with non UTF-8 strings are not supported") 415 } 416 return nil 417 } 418 419 // CheckValidObjectName - checks if we have a valid input object name. 420 // - http://docs.aws.amazon.com/AmazonS3/latest/dev/UsingMetadata.html 421 func CheckValidObjectName(objectName string) error { 422 if strings.TrimSpace(objectName) == "" { 423 return errors.New("Object name cannot be empty") 424 } 425 return CheckValidObjectNamePrefix(objectName) 426 }