cases.go (4991B)
1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:generate go run gen.go gen_trieval.go 6 7 // Package cases provides general and language-specific case mappers. 8 package cases // import "golang.org/x/text/cases" 9 10 import ( 11 "golang.org/x/text/language" 12 "golang.org/x/text/transform" 13 ) 14 15 // References: 16 // - Unicode Reference Manual Chapter 3.13, 4.2, and 5.18. 17 // - https://www.unicode.org/reports/tr29/ 18 // - https://www.unicode.org/Public/6.3.0/ucd/CaseFolding.txt 19 // - https://www.unicode.org/Public/6.3.0/ucd/SpecialCasing.txt 20 // - https://www.unicode.org/Public/6.3.0/ucd/DerivedCoreProperties.txt 21 // - https://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakProperty.txt 22 // - https://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakTest.txt 23 // - http://userguide.icu-project.org/transforms/casemappings 24 25 // TODO: 26 // - Case folding 27 // - Wide and Narrow? 28 // - Segmenter option for title casing. 29 // - ASCII fast paths 30 // - Encode Soft-Dotted property within trie somehow. 31 32 // A Caser transforms given input to a certain case. It implements 33 // transform.Transformer. 34 // 35 // A Caser may be stateful and should therefore not be shared between 36 // goroutines. 37 type Caser struct { 38 t transform.SpanningTransformer 39 } 40 41 // Bytes returns a new byte slice with the result of converting b to the case 42 // form implemented by c. 43 func (c Caser) Bytes(b []byte) []byte { 44 b, _, _ = transform.Bytes(c.t, b) 45 return b 46 } 47 48 // String returns a string with the result of transforming s to the case form 49 // implemented by c. 50 func (c Caser) String(s string) string { 51 s, _, _ = transform.String(c.t, s) 52 return s 53 } 54 55 // Reset resets the Caser to be reused for new input after a previous call to 56 // Transform. 57 func (c Caser) Reset() { c.t.Reset() } 58 59 // Transform implements the transform.Transformer interface and transforms the 60 // given input to the case form implemented by c. 61 func (c Caser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { 62 return c.t.Transform(dst, src, atEOF) 63 } 64 65 // Span implements the transform.SpanningTransformer interface. 66 func (c Caser) Span(src []byte, atEOF bool) (n int, err error) { 67 return c.t.Span(src, atEOF) 68 } 69 70 // Upper returns a Caser for language-specific uppercasing. 71 func Upper(t language.Tag, opts ...Option) Caser { 72 return Caser{makeUpper(t, getOpts(opts...))} 73 } 74 75 // Lower returns a Caser for language-specific lowercasing. 76 func Lower(t language.Tag, opts ...Option) Caser { 77 return Caser{makeLower(t, getOpts(opts...))} 78 } 79 80 // Title returns a Caser for language-specific title casing. It uses an 81 // approximation of the default Unicode Word Break algorithm. 82 func Title(t language.Tag, opts ...Option) Caser { 83 return Caser{makeTitle(t, getOpts(opts...))} 84 } 85 86 // Fold returns a Caser that implements Unicode case folding. The returned Caser 87 // is stateless and safe to use concurrently by multiple goroutines. 88 // 89 // Case folding does not normalize the input and may not preserve a normal form. 90 // Use the collate or search package for more convenient and linguistically 91 // sound comparisons. Use golang.org/x/text/secure/precis for string comparisons 92 // where security aspects are a concern. 93 func Fold(opts ...Option) Caser { 94 return Caser{makeFold(getOpts(opts...))} 95 } 96 97 // An Option is used to modify the behavior of a Caser. 98 type Option func(o options) options 99 100 // TODO: consider these options to take a boolean as well, like FinalSigma. 101 // The advantage of using this approach is that other providers of a lower-case 102 // algorithm could set different defaults by prefixing a user-provided slice 103 // of options with their own. This is handy, for instance, for the precis 104 // package which would override the default to not handle the Greek final sigma. 105 106 var ( 107 // NoLower disables the lowercasing of non-leading letters for a title 108 // caser. 109 NoLower Option = noLower 110 111 // Compact omits mappings in case folding for characters that would grow the 112 // input. (Unimplemented.) 113 Compact Option = compact 114 ) 115 116 // TODO: option to preserve a normal form, if applicable? 117 118 type options struct { 119 noLower bool 120 simple bool 121 122 // TODO: segmenter, max ignorable, alternative versions, etc. 123 124 ignoreFinalSigma bool 125 } 126 127 func getOpts(o ...Option) (res options) { 128 for _, f := range o { 129 res = f(res) 130 } 131 return 132 } 133 134 func noLower(o options) options { 135 o.noLower = true 136 return o 137 } 138 139 func compact(o options) options { 140 o.simple = true 141 return o 142 } 143 144 // HandleFinalSigma specifies whether the special handling of Greek final sigma 145 // should be enabled. Unicode prescribes handling the Greek final sigma for all 146 // locales, but standards like IDNA and PRECIS override this default. 147 func HandleFinalSigma(enable bool) Option { 148 if enable { 149 return handleFinalSigma 150 } 151 return ignoreFinalSigma 152 } 153 154 func ignoreFinalSigma(o options) options { 155 o.ignoreFinalSigma = true 156 return o 157 } 158 159 func handleFinalSigma(o options) options { 160 o.ignoreFinalSigma = false 161 return o 162 }