map.go - gtsocial-umbx - Unnamed repository; edit this file 'description' to name the repository.

map.go (23278B)
      1 // Copyright 2014 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package cases
      6 
      7 // This file contains the definitions of case mappings for all supported
      8 // languages. The rules for the language-specific tailorings were taken and
      9 // modified from the CLDR transform definitions in common/transforms.
     10 
     11 import (
     12 	"strings"
     13 	"unicode"
     14 	"unicode/utf8"
     15 
     16 	"golang.org/x/text/internal"
     17 	"golang.org/x/text/language"
     18 	"golang.org/x/text/transform"
     19 	"golang.org/x/text/unicode/norm"
     20 )
     21 
     22 // A mapFunc takes a context set to the current rune and writes the mapped
     23 // version to the same context. It may advance the context to the next rune. It
     24 // returns whether a checkpoint is possible: whether the pDst bytes written to
     25 // dst so far won't need changing as we see more source bytes.
     26 type mapFunc func(*context) bool
     27 
     28 // A spanFunc takes a context set to the current rune and returns whether this
     29 // rune would be altered when written to the output. It may advance the context
     30 // to the next rune. It returns whether a checkpoint is possible.
     31 type spanFunc func(*context) bool
     32 
     33 // maxIgnorable defines the maximum number of ignorables to consider for
     34 // lookahead operations.
     35 const maxIgnorable = 30
     36 
     37 // supported lists the language tags for which we have tailorings.
     38 const supported = "und af az el lt nl tr"
     39 
     40 func init() {
     41 	tags := []language.Tag{}
     42 	for _, s := range strings.Split(supported, " ") {
     43 		tags = append(tags, language.MustParse(s))
     44 	}
     45 	matcher = internal.NewInheritanceMatcher(tags)
     46 	Supported = language.NewCoverage(tags)
     47 }
     48 
     49 var (
     50 	matcher *internal.InheritanceMatcher
     51 
     52 	Supported language.Coverage
     53 
     54 	// We keep the following lists separate, instead of having a single per-
     55 	// language struct, to give the compiler a chance to remove unused code.
     56 
     57 	// Some uppercase mappers are stateless, so we can precompute the
     58 	// Transformers and save a bit on runtime allocations.
     59 	upperFunc = []struct {
     60 		upper mapFunc
     61 		span  spanFunc
     62 	}{
     63 		{nil, nil},                  // und
     64 		{nil, nil},                  // af
     65 		{aztrUpper(upper), isUpper}, // az
     66 		{elUpper, noSpan},           // el
     67 		{ltUpper(upper), noSpan},    // lt
     68 		{nil, nil},                  // nl
     69 		{aztrUpper(upper), isUpper}, // tr
     70 	}
     71 
     72 	undUpper            transform.SpanningTransformer = &undUpperCaser{}
     73 	undLower            transform.SpanningTransformer = &undLowerCaser{}
     74 	undLowerIgnoreSigma transform.SpanningTransformer = &undLowerIgnoreSigmaCaser{}
     75 
     76 	lowerFunc = []mapFunc{
     77 		nil,       // und
     78 		nil,       // af
     79 		aztrLower, // az
     80 		nil,       // el
     81 		ltLower,   // lt
     82 		nil,       // nl
     83 		aztrLower, // tr
     84 	}
     85 
     86 	titleInfos = []struct {
     87 		title     mapFunc
     88 		lower     mapFunc
     89 		titleSpan spanFunc
     90 		rewrite   func(*context)
     91 	}{
     92 		{title, lower, isTitle, nil},                // und
     93 		{title, lower, isTitle, afnlRewrite},        // af
     94 		{aztrUpper(title), aztrLower, isTitle, nil}, // az
     95 		{title, lower, isTitle, nil},                // el
     96 		{ltUpper(title), ltLower, noSpan, nil},      // lt
     97 		{nlTitle, lower, nlTitleSpan, afnlRewrite},  // nl
     98 		{aztrUpper(title), aztrLower, isTitle, nil}, // tr
     99 	}
    100 )
    101 
    102 func makeUpper(t language.Tag, o options) transform.SpanningTransformer {
    103 	_, i, _ := matcher.Match(t)
    104 	f := upperFunc[i].upper
    105 	if f == nil {
    106 		return undUpper
    107 	}
    108 	return &simpleCaser{f: f, span: upperFunc[i].span}
    109 }
    110 
    111 func makeLower(t language.Tag, o options) transform.SpanningTransformer {
    112 	_, i, _ := matcher.Match(t)
    113 	f := lowerFunc[i]
    114 	if f == nil {
    115 		if o.ignoreFinalSigma {
    116 			return undLowerIgnoreSigma
    117 		}
    118 		return undLower
    119 	}
    120 	if o.ignoreFinalSigma {
    121 		return &simpleCaser{f: f, span: isLower}
    122 	}
    123 	return &lowerCaser{
    124 		first:   f,
    125 		midWord: finalSigma(f),
    126 	}
    127 }
    128 
    129 func makeTitle(t language.Tag, o options) transform.SpanningTransformer {
    130 	_, i, _ := matcher.Match(t)
    131 	x := &titleInfos[i]
    132 	lower := x.lower
    133 	if o.noLower {
    134 		lower = (*context).copy
    135 	} else if !o.ignoreFinalSigma {
    136 		lower = finalSigma(lower)
    137 	}
    138 	return &titleCaser{
    139 		title:     x.title,
    140 		lower:     lower,
    141 		titleSpan: x.titleSpan,
    142 		rewrite:   x.rewrite,
    143 	}
    144 }
    145 
    146 func noSpan(c *context) bool {
    147 	c.err = transform.ErrEndOfSpan
    148 	return false
    149 }
    150 
    151 // TODO: consider a similar special case for the fast majority lower case. This
    152 // is a bit more involved so will require some more precise benchmarking to
    153 // justify it.
    154 
    155 type undUpperCaser struct{ transform.NopResetter }
    156 
    157 // undUpperCaser implements the Transformer interface for doing an upper case
    158 // mapping for the root locale (und). It eliminates the need for an allocation
    159 // as it prevents escaping by not using function pointers.
    160 func (t undUpperCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
    161 	c := context{dst: dst, src: src, atEOF: atEOF}
    162 	for c.next() {
    163 		upper(&c)
    164 		c.checkpoint()
    165 	}
    166 	return c.ret()
    167 }
    168 
    169 func (t undUpperCaser) Span(src []byte, atEOF bool) (n int, err error) {
    170 	c := context{src: src, atEOF: atEOF}
    171 	for c.next() && isUpper(&c) {
    172 		c.checkpoint()
    173 	}
    174 	return c.retSpan()
    175 }
    176 
    177 // undLowerIgnoreSigmaCaser implements the Transformer interface for doing
    178 // a lower case mapping for the root locale (und) ignoring final sigma
    179 // handling. This casing algorithm is used in some performance-critical packages
    180 // like secure/precis and x/net/http/idna, which warrants its special-casing.
    181 type undLowerIgnoreSigmaCaser struct{ transform.NopResetter }
    182 
    183 func (t undLowerIgnoreSigmaCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
    184 	c := context{dst: dst, src: src, atEOF: atEOF}
    185 	for c.next() && lower(&c) {
    186 		c.checkpoint()
    187 	}
    188 	return c.ret()
    189 
    190 }
    191 
    192 // Span implements a generic lower-casing. This is possible as isLower works
    193 // for all lowercasing variants. All lowercase variants only vary in how they
    194 // transform a non-lowercase letter. They will never change an already lowercase
    195 // letter. In addition, there is no state.
    196 func (t undLowerIgnoreSigmaCaser) Span(src []byte, atEOF bool) (n int, err error) {
    197 	c := context{src: src, atEOF: atEOF}
    198 	for c.next() && isLower(&c) {
    199 		c.checkpoint()
    200 	}
    201 	return c.retSpan()
    202 }
    203 
    204 type simpleCaser struct {
    205 	context
    206 	f    mapFunc
    207 	span spanFunc
    208 }
    209 
    210 // simpleCaser implements the Transformer interface for doing a case operation
    211 // on a rune-by-rune basis.
    212 func (t *simpleCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
    213 	c := context{dst: dst, src: src, atEOF: atEOF}
    214 	for c.next() && t.f(&c) {
    215 		c.checkpoint()
    216 	}
    217 	return c.ret()
    218 }
    219 
    220 func (t *simpleCaser) Span(src []byte, atEOF bool) (n int, err error) {
    221 	c := context{src: src, atEOF: atEOF}
    222 	for c.next() && t.span(&c) {
    223 		c.checkpoint()
    224 	}
    225 	return c.retSpan()
    226 }
    227 
    228 // undLowerCaser implements the Transformer interface for doing a lower case
    229 // mapping for the root locale (und) ignoring final sigma handling. This casing
    230 // algorithm is used in some performance-critical packages like secure/precis
    231 // and x/net/http/idna, which warrants its special-casing.
    232 type undLowerCaser struct{ transform.NopResetter }
    233 
    234 func (t undLowerCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
    235 	c := context{dst: dst, src: src, atEOF: atEOF}
    236 
    237 	for isInterWord := true; c.next(); {
    238 		if isInterWord {
    239 			if c.info.isCased() {
    240 				if !lower(&c) {
    241 					break
    242 				}
    243 				isInterWord = false
    244 			} else if !c.copy() {
    245 				break
    246 			}
    247 		} else {
    248 			if c.info.isNotCasedAndNotCaseIgnorable() {
    249 				if !c.copy() {
    250 					break
    251 				}
    252 				isInterWord = true
    253 			} else if !c.hasPrefix("Σ") {
    254 				if !lower(&c) {
    255 					break
    256 				}
    257 			} else if !finalSigmaBody(&c) {
    258 				break
    259 			}
    260 		}
    261 		c.checkpoint()
    262 	}
    263 	return c.ret()
    264 }
    265 
    266 func (t undLowerCaser) Span(src []byte, atEOF bool) (n int, err error) {
    267 	c := context{src: src, atEOF: atEOF}
    268 	for c.next() && isLower(&c) {
    269 		c.checkpoint()
    270 	}
    271 	return c.retSpan()
    272 }
    273 
    274 // lowerCaser implements the Transformer interface. The default Unicode lower
    275 // casing requires different treatment for the first and subsequent characters
    276 // of a word, most notably to handle the Greek final Sigma.
    277 type lowerCaser struct {
    278 	undLowerIgnoreSigmaCaser
    279 
    280 	context
    281 
    282 	first, midWord mapFunc
    283 }
    284 
    285 func (t *lowerCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
    286 	t.context = context{dst: dst, src: src, atEOF: atEOF}
    287 	c := &t.context
    288 
    289 	for isInterWord := true; c.next(); {
    290 		if isInterWord {
    291 			if c.info.isCased() {
    292 				if !t.first(c) {
    293 					break
    294 				}
    295 				isInterWord = false
    296 			} else if !c.copy() {
    297 				break
    298 			}
    299 		} else {
    300 			if c.info.isNotCasedAndNotCaseIgnorable() {
    301 				if !c.copy() {
    302 					break
    303 				}
    304 				isInterWord = true
    305 			} else if !t.midWord(c) {
    306 				break
    307 			}
    308 		}
    309 		c.checkpoint()
    310 	}
    311 	return c.ret()
    312 }
    313 
    314 // titleCaser implements the Transformer interface. Title casing algorithms
    315 // distinguish between the first letter of a word and subsequent letters of the
    316 // same word. It uses state to avoid requiring a potentially infinite lookahead.
    317 type titleCaser struct {
    318 	context
    319 
    320 	// rune mappings used by the actual casing algorithms.
    321 	title     mapFunc
    322 	lower     mapFunc
    323 	titleSpan spanFunc
    324 
    325 	rewrite func(*context)
    326 }
    327 
    328 // Transform implements the standard Unicode title case algorithm as defined in
    329 // Chapter 3 of The Unicode Standard:
    330 // toTitlecase(X): Find the word boundaries in X according to Unicode Standard
    331 // Annex #29, "Unicode Text Segmentation." For each word boundary, find the
    332 // first cased character F following the word boundary. If F exists, map F to
    333 // Titlecase_Mapping(F); then map all characters C between F and the following
    334 // word boundary to Lowercase_Mapping(C).
    335 func (t *titleCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
    336 	t.context = context{dst: dst, src: src, atEOF: atEOF, isMidWord: t.isMidWord}
    337 	c := &t.context
    338 
    339 	if !c.next() {
    340 		return c.ret()
    341 	}
    342 
    343 	for {
    344 		p := c.info
    345 		if t.rewrite != nil {
    346 			t.rewrite(c)
    347 		}
    348 
    349 		wasMid := p.isMid()
    350 		// Break out of this loop on failure to ensure we do not modify the
    351 		// state incorrectly.
    352 		if p.isCased() {
    353 			if !c.isMidWord {
    354 				if !t.title(c) {
    355 					break
    356 				}
    357 				c.isMidWord = true
    358 			} else if !t.lower(c) {
    359 				break
    360 			}
    361 		} else if !c.copy() {
    362 			break
    363 		} else if p.isBreak() {
    364 			c.isMidWord = false
    365 		}
    366 
    367 		// As we save the state of the transformer, it is safe to call
    368 		// checkpoint after any successful write.
    369 		if !(c.isMidWord && wasMid) {
    370 			c.checkpoint()
    371 		}
    372 
    373 		if !c.next() {
    374 			break
    375 		}
    376 		if wasMid && c.info.isMid() {
    377 			c.isMidWord = false
    378 		}
    379 	}
    380 	return c.ret()
    381 }
    382 
    383 func (t *titleCaser) Span(src []byte, atEOF bool) (n int, err error) {
    384 	t.context = context{src: src, atEOF: atEOF, isMidWord: t.isMidWord}
    385 	c := &t.context
    386 
    387 	if !c.next() {
    388 		return c.retSpan()
    389 	}
    390 
    391 	for {
    392 		p := c.info
    393 		if t.rewrite != nil {
    394 			t.rewrite(c)
    395 		}
    396 
    397 		wasMid := p.isMid()
    398 		// Break out of this loop on failure to ensure we do not modify the
    399 		// state incorrectly.
    400 		if p.isCased() {
    401 			if !c.isMidWord {
    402 				if !t.titleSpan(c) {
    403 					break
    404 				}
    405 				c.isMidWord = true
    406 			} else if !isLower(c) {
    407 				break
    408 			}
    409 		} else if p.isBreak() {
    410 			c.isMidWord = false
    411 		}
    412 		// As we save the state of the transformer, it is safe to call
    413 		// checkpoint after any successful write.
    414 		if !(c.isMidWord && wasMid) {
    415 			c.checkpoint()
    416 		}
    417 
    418 		if !c.next() {
    419 			break
    420 		}
    421 		if wasMid && c.info.isMid() {
    422 			c.isMidWord = false
    423 		}
    424 	}
    425 	return c.retSpan()
    426 }
    427 
    428 // finalSigma adds Greek final Sigma handing to another casing function. It
    429 // determines whether a lowercased sigma should be σ or ς, by looking ahead for
    430 // case-ignorables and a cased letters.
    431 func finalSigma(f mapFunc) mapFunc {
    432 	return func(c *context) bool {
    433 		if !c.hasPrefix("Σ") {
    434 			return f(c)
    435 		}
    436 		return finalSigmaBody(c)
    437 	}
    438 }
    439 
    440 func finalSigmaBody(c *context) bool {
    441 	// Current rune must be ∑.
    442 
    443 	// ::NFD();
    444 	// # 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
    445 	// Σ } [:case-ignorable:]* [:cased:] → σ;
    446 	// [:cased:] [:case-ignorable:]* { Σ → ς;
    447 	// ::Any-Lower;
    448 	// ::NFC();
    449 
    450 	p := c.pDst
    451 	c.writeString("ς")
    452 
    453 	// TODO: we should do this here, but right now this will never have an
    454 	// effect as this is called when the prefix is Sigma, whereas Dutch and
    455 	// Afrikaans only test for an apostrophe.
    456 	//
    457 	// if t.rewrite != nil {
    458 	// 	t.rewrite(c)
    459 	// }
    460 
    461 	// We need to do one more iteration after maxIgnorable, as a cased
    462 	// letter is not an ignorable and may modify the result.
    463 	wasMid := false
    464 	for i := 0; i < maxIgnorable+1; i++ {
    465 		if !c.next() {
    466 			return false
    467 		}
    468 		if !c.info.isCaseIgnorable() {
    469 			// All Midword runes are also case ignorable, so we are
    470 			// guaranteed to have a letter or word break here. As we are
    471 			// unreading the run, there is no need to unset c.isMidWord;
    472 			// the title caser will handle this.
    473 			if c.info.isCased() {
    474 				// p+1 is guaranteed to be in bounds: if writing ς was
    475 				// successful, p+1 will contain the second byte of ς. If not,
    476 				// this function will have returned after c.next returned false.
    477 				c.dst[p+1]++ // ς → σ
    478 			}
    479 			c.unreadRune()
    480 			return true
    481 		}
    482 		// A case ignorable may also introduce a word break, so we may need
    483 		// to continue searching even after detecting a break.
    484 		isMid := c.info.isMid()
    485 		if (wasMid && isMid) || c.info.isBreak() {
    486 			c.isMidWord = false
    487 		}
    488 		wasMid = isMid
    489 		c.copy()
    490 	}
    491 	return true
    492 }
    493 
    494 // finalSigmaSpan would be the same as isLower.
    495 
    496 // elUpper implements Greek upper casing, which entails removing a predefined
    497 // set of non-blocked modifiers. Note that these accents should not be removed
    498 // for title casing!
    499 // Example: "Οδός" -> "ΟΔΟΣ".
    500 func elUpper(c *context) bool {
    501 	// From CLDR:
    502 	// [:Greek:] [^[:ccc=Not_Reordered:][:ccc=Above:]]*? { [\u0313\u0314\u0301\u0300\u0306\u0342\u0308\u0304] → ;
    503 	// [:Greek:] [^[:ccc=Not_Reordered:][:ccc=Iota_Subscript:]]*? { \u0345 → ;
    504 
    505 	r, _ := utf8.DecodeRune(c.src[c.pSrc:])
    506 	oldPDst := c.pDst
    507 	if !upper(c) {
    508 		return false
    509 	}
    510 	if !unicode.Is(unicode.Greek, r) {
    511 		return true
    512 	}
    513 	i := 0
    514 	// Take the properties of the uppercased rune that is already written to the
    515 	// destination. This saves us the trouble of having to uppercase the
    516 	// decomposed rune again.
    517 	if b := norm.NFD.Properties(c.dst[oldPDst:]).Decomposition(); b != nil {
    518 		// Restore the destination position and process the decomposed rune.
    519 		r, sz := utf8.DecodeRune(b)
    520 		if r <= 0xFF { // See A.6.1
    521 			return true
    522 		}
    523 		c.pDst = oldPDst
    524 		// Insert the first rune and ignore the modifiers. See A.6.2.
    525 		c.writeBytes(b[:sz])
    526 		i = len(b[sz:]) / 2 // Greek modifiers are always of length 2.
    527 	}
    528 
    529 	for ; i < maxIgnorable && c.next(); i++ {
    530 		switch r, _ := utf8.DecodeRune(c.src[c.pSrc:]); r {
    531 		// Above and Iota Subscript
    532 		case 0x0300, // U+0300 COMBINING GRAVE ACCENT
    533 			0x0301, // U+0301 COMBINING ACUTE ACCENT
    534 			0x0304, // U+0304 COMBINING MACRON
    535 			0x0306, // U+0306 COMBINING BREVE
    536 			0x0308, // U+0308 COMBINING DIAERESIS
    537 			0x0313, // U+0313 COMBINING COMMA ABOVE
    538 			0x0314, // U+0314 COMBINING REVERSED COMMA ABOVE
    539 			0x0342, // U+0342 COMBINING GREEK PERISPOMENI
    540 			0x0345: // U+0345 COMBINING GREEK YPOGEGRAMMENI
    541 			// No-op. Gobble the modifier.
    542 
    543 		default:
    544 			switch v, _ := trie.lookup(c.src[c.pSrc:]); info(v).cccType() {
    545 			case cccZero:
    546 				c.unreadRune()
    547 				return true
    548 
    549 			// We don't need to test for IotaSubscript as the only rune that
    550 			// qualifies (U+0345) was already excluded in the switch statement
    551 			// above. See A.4.
    552 
    553 			case cccAbove:
    554 				return c.copy()
    555 			default:
    556 				// Some other modifier. We're still allowed to gobble Greek
    557 				// modifiers after this.
    558 				c.copy()
    559 			}
    560 		}
    561 	}
    562 	return i == maxIgnorable
    563 }
    564 
    565 // TODO: implement elUpperSpan (low-priority: complex and infrequent).
    566 
    567 func ltLower(c *context) bool {
    568 	// From CLDR:
    569 	// # Introduce an explicit dot above when lowercasing capital I's and J's
    570 	// # whenever there are more accents above.
    571 	// # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
    572 	// # 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I
    573 	// # 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J
    574 	// # 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK
    575 	// # 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE
    576 	// # 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
    577 	// # 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
    578 	// ::NFD();
    579 	// I } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0307;
    580 	// J } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → j \u0307;
    581 	// I \u0328 (Į) } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0328 \u0307;
    582 	// I \u0300 (Ì) → i \u0307 \u0300;
    583 	// I \u0301 (Í) → i \u0307 \u0301;
    584 	// I \u0303 (Ĩ) → i \u0307 \u0303;
    585 	// ::Any-Lower();
    586 	// ::NFC();
    587 
    588 	i := 0
    589 	if r := c.src[c.pSrc]; r < utf8.RuneSelf {
    590 		lower(c)
    591 		if r != 'I' && r != 'J' {
    592 			return true
    593 		}
    594 	} else {
    595 		p := norm.NFD.Properties(c.src[c.pSrc:])
    596 		if d := p.Decomposition(); len(d) >= 3 && (d[0] == 'I' || d[0] == 'J') {
    597 			// UTF-8 optimization: the decomposition will only have an above
    598 			// modifier if the last rune of the decomposition is in [U+300-U+311].
    599 			// In all other cases, a decomposition starting with I is always
    600 			// an I followed by modifiers that are not cased themselves. See A.2.
    601 			if d[1] == 0xCC && d[2] <= 0x91 { // A.2.4.
    602 				if !c.writeBytes(d[:1]) {
    603 					return false
    604 				}
    605 				c.dst[c.pDst-1] += 'a' - 'A' // lower
    606 
    607 				// Assumption: modifier never changes on lowercase. See A.1.
    608 				// Assumption: all modifiers added have CCC = Above. See A.2.3.
    609 				return c.writeString("\u0307") && c.writeBytes(d[1:])
    610 			}
    611 			// In all other cases the additional modifiers will have a CCC
    612 			// that is less than 230 (Above). We will insert the U+0307, if
    613 			// needed, after these modifiers so that a string in FCD form
    614 			// will remain so. See A.2.2.
    615 			lower(c)
    616 			i = 1
    617 		} else {
    618 			return lower(c)
    619 		}
    620 	}
    621 
    622 	for ; i < maxIgnorable && c.next(); i++ {
    623 		switch c.info.cccType() {
    624 		case cccZero:
    625 			c.unreadRune()
    626 			return true
    627 		case cccAbove:
    628 			return c.writeString("\u0307") && c.copy() // See A.1.
    629 		default:
    630 			c.copy() // See A.1.
    631 		}
    632 	}
    633 	return i == maxIgnorable
    634 }
    635 
    636 // ltLowerSpan would be the same as isLower.
    637 
    638 func ltUpper(f mapFunc) mapFunc {
    639 	return func(c *context) bool {
    640 		// Unicode:
    641 		// 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
    642 		//
    643 		// From CLDR:
    644 		// # Remove \u0307 following soft-dotteds (i, j, and the like), with possible
    645 		// # intervening non-230 marks.
    646 		// ::NFD();
    647 		// [:Soft_Dotted:] [^[:ccc=Not_Reordered:][:ccc=Above:]]* { \u0307 → ;
    648 		// ::Any-Upper();
    649 		// ::NFC();
    650 
    651 		// TODO: See A.5. A soft-dotted rune never has an exception. This would
    652 		// allow us to overload the exception bit and encode this property in
    653 		// info. Need to measure performance impact of this.
    654 		r, _ := utf8.DecodeRune(c.src[c.pSrc:])
    655 		oldPDst := c.pDst
    656 		if !f(c) {
    657 			return false
    658 		}
    659 		if !unicode.Is(unicode.Soft_Dotted, r) {
    660 			return true
    661 		}
    662 
    663 		// We don't need to do an NFD normalization, as a soft-dotted rune never
    664 		// contains U+0307. See A.3.
    665 
    666 		i := 0
    667 		for ; i < maxIgnorable && c.next(); i++ {
    668 			switch c.info.cccType() {
    669 			case cccZero:
    670 				c.unreadRune()
    671 				return true
    672 			case cccAbove:
    673 				if c.hasPrefix("\u0307") {
    674 					// We don't do a full NFC, but rather combine runes for
    675 					// some of the common cases. (Returning NFC or
    676 					// preserving normal form is neither a requirement nor
    677 					// a possibility anyway).
    678 					if !c.next() {
    679 						return false
    680 					}
    681 					if c.dst[oldPDst] == 'I' && c.pDst == oldPDst+1 && c.src[c.pSrc] == 0xcc {
    682 						s := ""
    683 						switch c.src[c.pSrc+1] {
    684 						case 0x80: // U+0300 COMBINING GRAVE ACCENT
    685 							s = "\u00cc" // U+00CC LATIN CAPITAL LETTER I WITH GRAVE
    686 						case 0x81: // U+0301 COMBINING ACUTE ACCENT
    687 							s = "\u00cd" // U+00CD LATIN CAPITAL LETTER I WITH ACUTE
    688 						case 0x83: // U+0303 COMBINING TILDE
    689 							s = "\u0128" // U+0128 LATIN CAPITAL LETTER I WITH TILDE
    690 						case 0x88: // U+0308 COMBINING DIAERESIS
    691 							s = "\u00cf" // U+00CF LATIN CAPITAL LETTER I WITH DIAERESIS
    692 						default:
    693 						}
    694 						if s != "" {
    695 							c.pDst = oldPDst
    696 							return c.writeString(s)
    697 						}
    698 					}
    699 				}
    700 				return c.copy()
    701 			default:
    702 				c.copy()
    703 			}
    704 		}
    705 		return i == maxIgnorable
    706 	}
    707 }
    708 
    709 // TODO: implement ltUpperSpan (low priority: complex and infrequent).
    710 
    711 func aztrUpper(f mapFunc) mapFunc {
    712 	return func(c *context) bool {
    713 		// i→İ;
    714 		if c.src[c.pSrc] == 'i' {
    715 			return c.writeString("İ")
    716 		}
    717 		return f(c)
    718 	}
    719 }
    720 
    721 func aztrLower(c *context) (done bool) {
    722 	// From CLDR:
    723 	// # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
    724 	// # 0130; 0069; 0130; 0130; tr; # LATIN CAPITAL LETTER I WITH DOT ABOVE
    725 	// İ→i;
    726 	// # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
    727 	// # This matches the behavior of the canonically equivalent I-dot_above
    728 	// # 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
    729 	// # When lowercasing, unless an I is before a dot_above, it turns into a dotless i.
    730 	// # 0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I
    731 	// I([^[:ccc=Not_Reordered:][:ccc=Above:]]*)\u0307 → i$1 ;
    732 	// I→ı ;
    733 	// ::Any-Lower();
    734 	if c.hasPrefix("\u0130") { // İ
    735 		return c.writeString("i")
    736 	}
    737 	if c.src[c.pSrc] != 'I' {
    738 		return lower(c)
    739 	}
    740 
    741 	// We ignore the lower-case I for now, but insert it later when we know
    742 	// which form we need.
    743 	start := c.pSrc + c.sz
    744 
    745 	i := 0
    746 Loop:
    747 	// We check for up to n ignorables before \u0307. As \u0307 is an
    748 	// ignorable as well, n is maxIgnorable-1.
    749 	for ; i < maxIgnorable && c.next(); i++ {
    750 		switch c.info.cccType() {
    751 		case cccAbove:
    752 			if c.hasPrefix("\u0307") {
    753 				return c.writeString("i") && c.writeBytes(c.src[start:c.pSrc]) // ignore U+0307
    754 			}
    755 			done = true
    756 			break Loop
    757 		case cccZero:
    758 			c.unreadRune()
    759 			done = true
    760 			break Loop
    761 		default:
    762 			// We'll write this rune after we know which starter to use.
    763 		}
    764 	}
    765 	if i == maxIgnorable {
    766 		done = true
    767 	}
    768 	return c.writeString("ı") && c.writeBytes(c.src[start:c.pSrc+c.sz]) && done
    769 }
    770 
    771 // aztrLowerSpan would be the same as isLower.
    772 
    773 func nlTitle(c *context) bool {
    774 	// From CLDR:
    775 	// # Special titlecasing for Dutch initial "ij".
    776 	// ::Any-Title();
    777 	// # Fix up Ij at the beginning of a "word" (per Any-Title, notUAX #29)
    778 	// [:^WB=ALetter:] [:WB=Extend:]* [[:WB=MidLetter:][:WB=MidNumLet:]]? { Ij } → IJ ;
    779 	if c.src[c.pSrc] != 'I' && c.src[c.pSrc] != 'i' {
    780 		return title(c)
    781 	}
    782 
    783 	if !c.writeString("I") || !c.next() {
    784 		return false
    785 	}
    786 	if c.src[c.pSrc] == 'j' || c.src[c.pSrc] == 'J' {
    787 		return c.writeString("J")
    788 	}
    789 	c.unreadRune()
    790 	return true
    791 }
    792 
    793 func nlTitleSpan(c *context) bool {
    794 	// From CLDR:
    795 	// # Special titlecasing for Dutch initial "ij".
    796 	// ::Any-Title();
    797 	// # Fix up Ij at the beginning of a "word" (per Any-Title, notUAX #29)
    798 	// [:^WB=ALetter:] [:WB=Extend:]* [[:WB=MidLetter:][:WB=MidNumLet:]]? { Ij } → IJ ;
    799 	if c.src[c.pSrc] != 'I' {
    800 		return isTitle(c)
    801 	}
    802 	if !c.next() || c.src[c.pSrc] == 'j' {
    803 		return false
    804 	}
    805 	if c.src[c.pSrc] != 'J' {
    806 		c.unreadRune()
    807 	}
    808 	return true
    809 }
    810 
    811 // Not part of CLDR, but see https://unicode.org/cldr/trac/ticket/7078.
    812 func afnlRewrite(c *context) {
    813 	if c.hasPrefix("'") || c.hasPrefix("’") {
    814 		c.isMidWord = true
    815 	}
    816 }
	gtsocial-umbx Unnamed repository; edit this file 'description' to name the repository.
	Log \| Files \| Refs \| README \| LICENSE