idna9.0.0.go (19657B)
1 // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. 2 3 // Copyright 2016 The Go Authors. All rights reserved. 4 // Use of this source code is governed by a BSD-style 5 // license that can be found in the LICENSE file. 6 7 //go:build !go1.10 8 // +build !go1.10 9 10 // Package idna implements IDNA2008 using the compatibility processing 11 // defined by UTS (Unicode Technical Standard) #46, which defines a standard to 12 // deal with the transition from IDNA2003. 13 // 14 // IDNA2008 (Internationalized Domain Names for Applications), is defined in RFC 15 // 5890, RFC 5891, RFC 5892, RFC 5893 and RFC 5894. 16 // UTS #46 is defined in https://www.unicode.org/reports/tr46. 17 // See https://unicode.org/cldr/utility/idna.jsp for a visualization of the 18 // differences between these two standards. 19 package idna // import "golang.org/x/net/idna" 20 21 import ( 22 "fmt" 23 "strings" 24 "unicode/utf8" 25 26 "golang.org/x/text/secure/bidirule" 27 "golang.org/x/text/unicode/norm" 28 ) 29 30 // NOTE: Unlike common practice in Go APIs, the functions will return a 31 // sanitized domain name in case of errors. Browsers sometimes use a partially 32 // evaluated string as lookup. 33 // TODO: the current error handling is, in my opinion, the least opinionated. 34 // Other strategies are also viable, though: 35 // Option 1) Return an empty string in case of error, but allow the user to 36 // specify explicitly which errors to ignore. 37 // Option 2) Return the partially evaluated string if it is itself a valid 38 // string, otherwise return the empty string in case of error. 39 // Option 3) Option 1 and 2. 40 // Option 4) Always return an empty string for now and implement Option 1 as 41 // needed, and document that the return string may not be empty in case of 42 // error in the future. 43 // I think Option 1 is best, but it is quite opinionated. 44 45 // ToASCII is a wrapper for Punycode.ToASCII. 46 func ToASCII(s string) (string, error) { 47 return Punycode.process(s, true) 48 } 49 50 // ToUnicode is a wrapper for Punycode.ToUnicode. 51 func ToUnicode(s string) (string, error) { 52 return Punycode.process(s, false) 53 } 54 55 // An Option configures a Profile at creation time. 56 type Option func(*options) 57 58 // Transitional sets a Profile to use the Transitional mapping as defined in UTS 59 // #46. This will cause, for example, "ß" to be mapped to "ss". Using the 60 // transitional mapping provides a compromise between IDNA2003 and IDNA2008 61 // compatibility. It is used by some browsers when resolving domain names. This 62 // option is only meaningful if combined with MapForLookup. 63 func Transitional(transitional bool) Option { 64 return func(o *options) { o.transitional = transitional } 65 } 66 67 // VerifyDNSLength sets whether a Profile should fail if any of the IDN parts 68 // are longer than allowed by the RFC. 69 // 70 // This option corresponds to the VerifyDnsLength flag in UTS #46. 71 func VerifyDNSLength(verify bool) Option { 72 return func(o *options) { o.verifyDNSLength = verify } 73 } 74 75 // RemoveLeadingDots removes leading label separators. Leading runes that map to 76 // dots, such as U+3002 IDEOGRAPHIC FULL STOP, are removed as well. 77 func RemoveLeadingDots(remove bool) Option { 78 return func(o *options) { o.removeLeadingDots = remove } 79 } 80 81 // ValidateLabels sets whether to check the mandatory label validation criteria 82 // as defined in Section 5.4 of RFC 5891. This includes testing for correct use 83 // of hyphens ('-'), normalization, validity of runes, and the context rules. 84 // In particular, ValidateLabels also sets the CheckHyphens and CheckJoiners flags 85 // in UTS #46. 86 func ValidateLabels(enable bool) Option { 87 return func(o *options) { 88 // Don't override existing mappings, but set one that at least checks 89 // normalization if it is not set. 90 if o.mapping == nil && enable { 91 o.mapping = normalize 92 } 93 o.trie = trie 94 o.checkJoiners = enable 95 o.checkHyphens = enable 96 if enable { 97 o.fromPuny = validateFromPunycode 98 } else { 99 o.fromPuny = nil 100 } 101 } 102 } 103 104 // CheckHyphens sets whether to check for correct use of hyphens ('-') in 105 // labels. Most web browsers do not have this option set, since labels such as 106 // "r3---sn-apo3qvuoxuxbt-j5pe" are in common use. 107 // 108 // This option corresponds to the CheckHyphens flag in UTS #46. 109 func CheckHyphens(enable bool) Option { 110 return func(o *options) { o.checkHyphens = enable } 111 } 112 113 // CheckJoiners sets whether to check the ContextJ rules as defined in Appendix 114 // A of RFC 5892, concerning the use of joiner runes. 115 // 116 // This option corresponds to the CheckJoiners flag in UTS #46. 117 func CheckJoiners(enable bool) Option { 118 return func(o *options) { 119 o.trie = trie 120 o.checkJoiners = enable 121 } 122 } 123 124 // StrictDomainName limits the set of permissable ASCII characters to those 125 // allowed in domain names as defined in RFC 1034 (A-Z, a-z, 0-9 and the 126 // hyphen). This is set by default for MapForLookup and ValidateForRegistration, 127 // but is only useful if ValidateLabels is set. 128 // 129 // This option is useful, for instance, for browsers that allow characters 130 // outside this range, for example a '_' (U+005F LOW LINE). See 131 // http://www.rfc-editor.org/std/std3.txt for more details. 132 // 133 // This option corresponds to the UseSTD3ASCIIRules flag in UTS #46. 134 func StrictDomainName(use bool) Option { 135 return func(o *options) { o.useSTD3Rules = use } 136 } 137 138 // NOTE: the following options pull in tables. The tables should not be linked 139 // in as long as the options are not used. 140 141 // BidiRule enables the Bidi rule as defined in RFC 5893. Any application 142 // that relies on proper validation of labels should include this rule. 143 // 144 // This option corresponds to the CheckBidi flag in UTS #46. 145 func BidiRule() Option { 146 return func(o *options) { o.bidirule = bidirule.ValidString } 147 } 148 149 // ValidateForRegistration sets validation options to verify that a given IDN is 150 // properly formatted for registration as defined by Section 4 of RFC 5891. 151 func ValidateForRegistration() Option { 152 return func(o *options) { 153 o.mapping = validateRegistration 154 StrictDomainName(true)(o) 155 ValidateLabels(true)(o) 156 VerifyDNSLength(true)(o) 157 BidiRule()(o) 158 } 159 } 160 161 // MapForLookup sets validation and mapping options such that a given IDN is 162 // transformed for domain name lookup according to the requirements set out in 163 // Section 5 of RFC 5891. The mappings follow the recommendations of RFC 5894, 164 // RFC 5895 and UTS 46. It does not add the Bidi Rule. Use the BidiRule option 165 // to add this check. 166 // 167 // The mappings include normalization and mapping case, width and other 168 // compatibility mappings. 169 func MapForLookup() Option { 170 return func(o *options) { 171 o.mapping = validateAndMap 172 StrictDomainName(true)(o) 173 ValidateLabels(true)(o) 174 RemoveLeadingDots(true)(o) 175 } 176 } 177 178 type options struct { 179 transitional bool 180 useSTD3Rules bool 181 checkHyphens bool 182 checkJoiners bool 183 verifyDNSLength bool 184 removeLeadingDots bool 185 186 trie *idnaTrie 187 188 // fromPuny calls validation rules when converting A-labels to U-labels. 189 fromPuny func(p *Profile, s string) error 190 191 // mapping implements a validation and mapping step as defined in RFC 5895 192 // or UTS 46, tailored to, for example, domain registration or lookup. 193 mapping func(p *Profile, s string) (string, error) 194 195 // bidirule, if specified, checks whether s conforms to the Bidi Rule 196 // defined in RFC 5893. 197 bidirule func(s string) bool 198 } 199 200 // A Profile defines the configuration of a IDNA mapper. 201 type Profile struct { 202 options 203 } 204 205 func apply(o *options, opts []Option) { 206 for _, f := range opts { 207 f(o) 208 } 209 } 210 211 // New creates a new Profile. 212 // 213 // With no options, the returned Profile is the most permissive and equals the 214 // Punycode Profile. Options can be passed to further restrict the Profile. The 215 // MapForLookup and ValidateForRegistration options set a collection of options, 216 // for lookup and registration purposes respectively, which can be tailored by 217 // adding more fine-grained options, where later options override earlier 218 // options. 219 func New(o ...Option) *Profile { 220 p := &Profile{} 221 apply(&p.options, o) 222 return p 223 } 224 225 // ToASCII converts a domain or domain label to its ASCII form. For example, 226 // ToASCII("bücher.example.com") is "xn--bcher-kva.example.com", and 227 // ToASCII("golang") is "golang". If an error is encountered it will return 228 // an error and a (partially) processed result. 229 func (p *Profile) ToASCII(s string) (string, error) { 230 return p.process(s, true) 231 } 232 233 // ToUnicode converts a domain or domain label to its Unicode form. For example, 234 // ToUnicode("xn--bcher-kva.example.com") is "bücher.example.com", and 235 // ToUnicode("golang") is "golang". If an error is encountered it will return 236 // an error and a (partially) processed result. 237 func (p *Profile) ToUnicode(s string) (string, error) { 238 pp := *p 239 pp.transitional = false 240 return pp.process(s, false) 241 } 242 243 // String reports a string with a description of the profile for debugging 244 // purposes. The string format may change with different versions. 245 func (p *Profile) String() string { 246 s := "" 247 if p.transitional { 248 s = "Transitional" 249 } else { 250 s = "NonTransitional" 251 } 252 if p.useSTD3Rules { 253 s += ":UseSTD3Rules" 254 } 255 if p.checkHyphens { 256 s += ":CheckHyphens" 257 } 258 if p.checkJoiners { 259 s += ":CheckJoiners" 260 } 261 if p.verifyDNSLength { 262 s += ":VerifyDNSLength" 263 } 264 return s 265 } 266 267 var ( 268 // Punycode is a Profile that does raw punycode processing with a minimum 269 // of validation. 270 Punycode *Profile = punycode 271 272 // Lookup is the recommended profile for looking up domain names, according 273 // to Section 5 of RFC 5891. The exact configuration of this profile may 274 // change over time. 275 Lookup *Profile = lookup 276 277 // Display is the recommended profile for displaying domain names. 278 // The configuration of this profile may change over time. 279 Display *Profile = display 280 281 // Registration is the recommended profile for checking whether a given 282 // IDN is valid for registration, according to Section 4 of RFC 5891. 283 Registration *Profile = registration 284 285 punycode = &Profile{} 286 lookup = &Profile{options{ 287 transitional: true, 288 removeLeadingDots: true, 289 useSTD3Rules: true, 290 checkHyphens: true, 291 checkJoiners: true, 292 trie: trie, 293 fromPuny: validateFromPunycode, 294 mapping: validateAndMap, 295 bidirule: bidirule.ValidString, 296 }} 297 display = &Profile{options{ 298 useSTD3Rules: true, 299 removeLeadingDots: true, 300 checkHyphens: true, 301 checkJoiners: true, 302 trie: trie, 303 fromPuny: validateFromPunycode, 304 mapping: validateAndMap, 305 bidirule: bidirule.ValidString, 306 }} 307 registration = &Profile{options{ 308 useSTD3Rules: true, 309 verifyDNSLength: true, 310 checkHyphens: true, 311 checkJoiners: true, 312 trie: trie, 313 fromPuny: validateFromPunycode, 314 mapping: validateRegistration, 315 bidirule: bidirule.ValidString, 316 }} 317 318 // TODO: profiles 319 // Register: recommended for approving domain names: don't do any mappings 320 // but rather reject on invalid input. Bundle or block deviation characters. 321 ) 322 323 type labelError struct{ label, code_ string } 324 325 func (e labelError) code() string { return e.code_ } 326 func (e labelError) Error() string { 327 return fmt.Sprintf("idna: invalid label %q", e.label) 328 } 329 330 type runeError rune 331 332 func (e runeError) code() string { return "P1" } 333 func (e runeError) Error() string { 334 return fmt.Sprintf("idna: disallowed rune %U", e) 335 } 336 337 // process implements the algorithm described in section 4 of UTS #46, 338 // see https://www.unicode.org/reports/tr46. 339 func (p *Profile) process(s string, toASCII bool) (string, error) { 340 var err error 341 if p.mapping != nil { 342 s, err = p.mapping(p, s) 343 } 344 // Remove leading empty labels. 345 if p.removeLeadingDots { 346 for ; len(s) > 0 && s[0] == '.'; s = s[1:] { 347 } 348 } 349 // It seems like we should only create this error on ToASCII, but the 350 // UTS 46 conformance tests suggests we should always check this. 351 if err == nil && p.verifyDNSLength && s == "" { 352 err = &labelError{s, "A4"} 353 } 354 labels := labelIter{orig: s} 355 for ; !labels.done(); labels.next() { 356 label := labels.label() 357 if label == "" { 358 // Empty labels are not okay. The label iterator skips the last 359 // label if it is empty. 360 if err == nil && p.verifyDNSLength { 361 err = &labelError{s, "A4"} 362 } 363 continue 364 } 365 if strings.HasPrefix(label, acePrefix) { 366 u, err2 := decode(label[len(acePrefix):]) 367 if err2 != nil { 368 if err == nil { 369 err = err2 370 } 371 // Spec says keep the old label. 372 continue 373 } 374 labels.set(u) 375 if err == nil && p.fromPuny != nil { 376 err = p.fromPuny(p, u) 377 } 378 if err == nil { 379 // This should be called on NonTransitional, according to the 380 // spec, but that currently does not have any effect. Use the 381 // original profile to preserve options. 382 err = p.validateLabel(u) 383 } 384 } else if err == nil { 385 err = p.validateLabel(label) 386 } 387 } 388 if toASCII { 389 for labels.reset(); !labels.done(); labels.next() { 390 label := labels.label() 391 if !ascii(label) { 392 a, err2 := encode(acePrefix, label) 393 if err == nil { 394 err = err2 395 } 396 label = a 397 labels.set(a) 398 } 399 n := len(label) 400 if p.verifyDNSLength && err == nil && (n == 0 || n > 63) { 401 err = &labelError{label, "A4"} 402 } 403 } 404 } 405 s = labels.result() 406 if toASCII && p.verifyDNSLength && err == nil { 407 // Compute the length of the domain name minus the root label and its dot. 408 n := len(s) 409 if n > 0 && s[n-1] == '.' { 410 n-- 411 } 412 if len(s) < 1 || n > 253 { 413 err = &labelError{s, "A4"} 414 } 415 } 416 return s, err 417 } 418 419 func normalize(p *Profile, s string) (string, error) { 420 return norm.NFC.String(s), nil 421 } 422 423 func validateRegistration(p *Profile, s string) (string, error) { 424 if !norm.NFC.IsNormalString(s) { 425 return s, &labelError{s, "V1"} 426 } 427 for i := 0; i < len(s); { 428 v, sz := trie.lookupString(s[i:]) 429 // Copy bytes not copied so far. 430 switch p.simplify(info(v).category()) { 431 // TODO: handle the NV8 defined in the Unicode idna data set to allow 432 // for strict conformance to IDNA2008. 433 case valid, deviation: 434 case disallowed, mapped, unknown, ignored: 435 r, _ := utf8.DecodeRuneInString(s[i:]) 436 return s, runeError(r) 437 } 438 i += sz 439 } 440 return s, nil 441 } 442 443 func validateAndMap(p *Profile, s string) (string, error) { 444 var ( 445 err error 446 b []byte 447 k int 448 ) 449 for i := 0; i < len(s); { 450 v, sz := trie.lookupString(s[i:]) 451 start := i 452 i += sz 453 // Copy bytes not copied so far. 454 switch p.simplify(info(v).category()) { 455 case valid: 456 continue 457 case disallowed: 458 if err == nil { 459 r, _ := utf8.DecodeRuneInString(s[start:]) 460 err = runeError(r) 461 } 462 continue 463 case mapped, deviation: 464 b = append(b, s[k:start]...) 465 b = info(v).appendMapping(b, s[start:i]) 466 case ignored: 467 b = append(b, s[k:start]...) 468 // drop the rune 469 case unknown: 470 b = append(b, s[k:start]...) 471 b = append(b, "\ufffd"...) 472 } 473 k = i 474 } 475 if k == 0 { 476 // No changes so far. 477 s = norm.NFC.String(s) 478 } else { 479 b = append(b, s[k:]...) 480 if norm.NFC.QuickSpan(b) != len(b) { 481 b = norm.NFC.Bytes(b) 482 } 483 // TODO: the punycode converters require strings as input. 484 s = string(b) 485 } 486 return s, err 487 } 488 489 // A labelIter allows iterating over domain name labels. 490 type labelIter struct { 491 orig string 492 slice []string 493 curStart int 494 curEnd int 495 i int 496 } 497 498 func (l *labelIter) reset() { 499 l.curStart = 0 500 l.curEnd = 0 501 l.i = 0 502 } 503 504 func (l *labelIter) done() bool { 505 return l.curStart >= len(l.orig) 506 } 507 508 func (l *labelIter) result() string { 509 if l.slice != nil { 510 return strings.Join(l.slice, ".") 511 } 512 return l.orig 513 } 514 515 func (l *labelIter) label() string { 516 if l.slice != nil { 517 return l.slice[l.i] 518 } 519 p := strings.IndexByte(l.orig[l.curStart:], '.') 520 l.curEnd = l.curStart + p 521 if p == -1 { 522 l.curEnd = len(l.orig) 523 } 524 return l.orig[l.curStart:l.curEnd] 525 } 526 527 // next sets the value to the next label. It skips the last label if it is empty. 528 func (l *labelIter) next() { 529 l.i++ 530 if l.slice != nil { 531 if l.i >= len(l.slice) || l.i == len(l.slice)-1 && l.slice[l.i] == "" { 532 l.curStart = len(l.orig) 533 } 534 } else { 535 l.curStart = l.curEnd + 1 536 if l.curStart == len(l.orig)-1 && l.orig[l.curStart] == '.' { 537 l.curStart = len(l.orig) 538 } 539 } 540 } 541 542 func (l *labelIter) set(s string) { 543 if l.slice == nil { 544 l.slice = strings.Split(l.orig, ".") 545 } 546 l.slice[l.i] = s 547 } 548 549 // acePrefix is the ASCII Compatible Encoding prefix. 550 const acePrefix = "xn--" 551 552 func (p *Profile) simplify(cat category) category { 553 switch cat { 554 case disallowedSTD3Mapped: 555 if p.useSTD3Rules { 556 cat = disallowed 557 } else { 558 cat = mapped 559 } 560 case disallowedSTD3Valid: 561 if p.useSTD3Rules { 562 cat = disallowed 563 } else { 564 cat = valid 565 } 566 case deviation: 567 if !p.transitional { 568 cat = valid 569 } 570 case validNV8, validXV8: 571 // TODO: handle V2008 572 cat = valid 573 } 574 return cat 575 } 576 577 func validateFromPunycode(p *Profile, s string) error { 578 if !norm.NFC.IsNormalString(s) { 579 return &labelError{s, "V1"} 580 } 581 for i := 0; i < len(s); { 582 v, sz := trie.lookupString(s[i:]) 583 if c := p.simplify(info(v).category()); c != valid && c != deviation { 584 return &labelError{s, "V6"} 585 } 586 i += sz 587 } 588 return nil 589 } 590 591 const ( 592 zwnj = "\u200c" 593 zwj = "\u200d" 594 ) 595 596 type joinState int8 597 598 const ( 599 stateStart joinState = iota 600 stateVirama 601 stateBefore 602 stateBeforeVirama 603 stateAfter 604 stateFAIL 605 ) 606 607 var joinStates = [][numJoinTypes]joinState{ 608 stateStart: { 609 joiningL: stateBefore, 610 joiningD: stateBefore, 611 joinZWNJ: stateFAIL, 612 joinZWJ: stateFAIL, 613 joinVirama: stateVirama, 614 }, 615 stateVirama: { 616 joiningL: stateBefore, 617 joiningD: stateBefore, 618 }, 619 stateBefore: { 620 joiningL: stateBefore, 621 joiningD: stateBefore, 622 joiningT: stateBefore, 623 joinZWNJ: stateAfter, 624 joinZWJ: stateFAIL, 625 joinVirama: stateBeforeVirama, 626 }, 627 stateBeforeVirama: { 628 joiningL: stateBefore, 629 joiningD: stateBefore, 630 joiningT: stateBefore, 631 }, 632 stateAfter: { 633 joiningL: stateFAIL, 634 joiningD: stateBefore, 635 joiningT: stateAfter, 636 joiningR: stateStart, 637 joinZWNJ: stateFAIL, 638 joinZWJ: stateFAIL, 639 joinVirama: stateAfter, // no-op as we can't accept joiners here 640 }, 641 stateFAIL: { 642 0: stateFAIL, 643 joiningL: stateFAIL, 644 joiningD: stateFAIL, 645 joiningT: stateFAIL, 646 joiningR: stateFAIL, 647 joinZWNJ: stateFAIL, 648 joinZWJ: stateFAIL, 649 joinVirama: stateFAIL, 650 }, 651 } 652 653 // validateLabel validates the criteria from Section 4.1. Item 1, 4, and 6 are 654 // already implicitly satisfied by the overall implementation. 655 func (p *Profile) validateLabel(s string) error { 656 if s == "" { 657 if p.verifyDNSLength { 658 return &labelError{s, "A4"} 659 } 660 return nil 661 } 662 if p.bidirule != nil && !p.bidirule(s) { 663 return &labelError{s, "B"} 664 } 665 if p.checkHyphens { 666 if len(s) > 4 && s[2] == '-' && s[3] == '-' { 667 return &labelError{s, "V2"} 668 } 669 if s[0] == '-' || s[len(s)-1] == '-' { 670 return &labelError{s, "V3"} 671 } 672 } 673 if !p.checkJoiners { 674 return nil 675 } 676 trie := p.trie // p.checkJoiners is only set if trie is set. 677 // TODO: merge the use of this in the trie. 678 v, sz := trie.lookupString(s) 679 x := info(v) 680 if x.isModifier() { 681 return &labelError{s, "V5"} 682 } 683 // Quickly return in the absence of zero-width (non) joiners. 684 if strings.Index(s, zwj) == -1 && strings.Index(s, zwnj) == -1 { 685 return nil 686 } 687 st := stateStart 688 for i := 0; ; { 689 jt := x.joinType() 690 if s[i:i+sz] == zwj { 691 jt = joinZWJ 692 } else if s[i:i+sz] == zwnj { 693 jt = joinZWNJ 694 } 695 st = joinStates[st][jt] 696 if x.isViramaModifier() { 697 st = joinStates[st][joinVirama] 698 } 699 if i += sz; i == len(s) { 700 break 701 } 702 v, sz = trie.lookupString(s[i:]) 703 x = info(v) 704 } 705 if st == stateFAIL || st == stateAfter { 706 return &labelError{s, "C"} 707 } 708 return nil 709 } 710 711 func ascii(s string) bool { 712 for i := 0; i < len(s); i++ { 713 if s[i] >= utf8.RuneSelf { 714 return false 715 } 716 } 717 return true 718 }