bidi.go (10565B)
1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:generate go run gen.go gen_trieval.go gen_ranges.go 6 7 // Package bidi contains functionality for bidirectional text support. 8 // 9 // See https://www.unicode.org/reports/tr9. 10 // 11 // NOTE: UNDER CONSTRUCTION. This API may change in backwards incompatible ways 12 // and without notice. 13 package bidi // import "golang.org/x/text/unicode/bidi" 14 15 // TODO 16 // - Transformer for reordering? 17 // - Transformer (validator, really) for Bidi Rule. 18 19 import ( 20 "bytes" 21 ) 22 23 // This API tries to avoid dealing with embedding levels for now. Under the hood 24 // these will be computed, but the question is to which extent the user should 25 // know they exist. We should at some point allow the user to specify an 26 // embedding hierarchy, though. 27 28 // A Direction indicates the overall flow of text. 29 type Direction int 30 31 const ( 32 // LeftToRight indicates the text contains no right-to-left characters and 33 // that either there are some left-to-right characters or the option 34 // DefaultDirection(LeftToRight) was passed. 35 LeftToRight Direction = iota 36 37 // RightToLeft indicates the text contains no left-to-right characters and 38 // that either there are some right-to-left characters or the option 39 // DefaultDirection(RightToLeft) was passed. 40 RightToLeft 41 42 // Mixed indicates text contains both left-to-right and right-to-left 43 // characters. 44 Mixed 45 46 // Neutral means that text contains no left-to-right and right-to-left 47 // characters and that no default direction has been set. 48 Neutral 49 ) 50 51 type options struct { 52 defaultDirection Direction 53 } 54 55 // An Option is an option for Bidi processing. 56 type Option func(*options) 57 58 // ICU allows the user to define embedding levels. This may be used, for example, 59 // to use hierarchical structure of markup languages to define embeddings. 60 // The following option may be a way to expose this functionality in this API. 61 // // LevelFunc sets a function that associates nesting levels with the given text. 62 // // The levels function will be called with monotonically increasing values for p. 63 // func LevelFunc(levels func(p int) int) Option { 64 // panic("unimplemented") 65 // } 66 67 // DefaultDirection sets the default direction for a Paragraph. The direction is 68 // overridden if the text contains directional characters. 69 func DefaultDirection(d Direction) Option { 70 return func(opts *options) { 71 opts.defaultDirection = d 72 } 73 } 74 75 // A Paragraph holds a single Paragraph for Bidi processing. 76 type Paragraph struct { 77 p []byte 78 o Ordering 79 opts []Option 80 types []Class 81 pairTypes []bracketType 82 pairValues []rune 83 runes []rune 84 options options 85 } 86 87 // Initialize the p.pairTypes, p.pairValues and p.types from the input previously 88 // set by p.SetBytes() or p.SetString(). Also limit the input up to (and including) a paragraph 89 // separator (bidi class B). 90 // 91 // The function p.Order() needs these values to be set, so this preparation could be postponed. 92 // But since the SetBytes and SetStrings functions return the length of the input up to the paragraph 93 // separator, the whole input needs to be processed anyway and should not be done twice. 94 // 95 // The function has the same return values as SetBytes() / SetString() 96 func (p *Paragraph) prepareInput() (n int, err error) { 97 p.runes = bytes.Runes(p.p) 98 bytecount := 0 99 // clear slices from previous SetString or SetBytes 100 p.pairTypes = nil 101 p.pairValues = nil 102 p.types = nil 103 104 for _, r := range p.runes { 105 props, i := LookupRune(r) 106 bytecount += i 107 cls := props.Class() 108 if cls == B { 109 return bytecount, nil 110 } 111 p.types = append(p.types, cls) 112 if props.IsOpeningBracket() { 113 p.pairTypes = append(p.pairTypes, bpOpen) 114 p.pairValues = append(p.pairValues, r) 115 } else if props.IsBracket() { 116 // this must be a closing bracket, 117 // since IsOpeningBracket is not true 118 p.pairTypes = append(p.pairTypes, bpClose) 119 p.pairValues = append(p.pairValues, r) 120 } else { 121 p.pairTypes = append(p.pairTypes, bpNone) 122 p.pairValues = append(p.pairValues, 0) 123 } 124 } 125 return bytecount, nil 126 } 127 128 // SetBytes configures p for the given paragraph text. It replaces text 129 // previously set by SetBytes or SetString. If b contains a paragraph separator 130 // it will only process the first paragraph and report the number of bytes 131 // consumed from b including this separator. Error may be non-nil if options are 132 // given. 133 func (p *Paragraph) SetBytes(b []byte, opts ...Option) (n int, err error) { 134 p.p = b 135 p.opts = opts 136 return p.prepareInput() 137 } 138 139 // SetString configures s for the given paragraph text. It replaces text 140 // previously set by SetBytes or SetString. If s contains a paragraph separator 141 // it will only process the first paragraph and report the number of bytes 142 // consumed from s including this separator. Error may be non-nil if options are 143 // given. 144 func (p *Paragraph) SetString(s string, opts ...Option) (n int, err error) { 145 p.p = []byte(s) 146 p.opts = opts 147 return p.prepareInput() 148 } 149 150 // IsLeftToRight reports whether the principle direction of rendering for this 151 // paragraphs is left-to-right. If this returns false, the principle direction 152 // of rendering is right-to-left. 153 func (p *Paragraph) IsLeftToRight() bool { 154 return p.Direction() == LeftToRight 155 } 156 157 // Direction returns the direction of the text of this paragraph. 158 // 159 // The direction may be LeftToRight, RightToLeft, Mixed, or Neutral. 160 func (p *Paragraph) Direction() Direction { 161 return p.o.Direction() 162 } 163 164 // TODO: what happens if the position is > len(input)? This should return an error. 165 166 // RunAt reports the Run at the given position of the input text. 167 // 168 // This method can be used for computing line breaks on paragraphs. 169 func (p *Paragraph) RunAt(pos int) Run { 170 c := 0 171 runNumber := 0 172 for i, r := range p.o.runes { 173 c += len(r) 174 if pos < c { 175 runNumber = i 176 } 177 } 178 return p.o.Run(runNumber) 179 } 180 181 func calculateOrdering(levels []level, runes []rune) Ordering { 182 var curDir Direction 183 184 prevDir := Neutral 185 prevI := 0 186 187 o := Ordering{} 188 // lvl = 0,2,4,...: left to right 189 // lvl = 1,3,5,...: right to left 190 for i, lvl := range levels { 191 if lvl%2 == 0 { 192 curDir = LeftToRight 193 } else { 194 curDir = RightToLeft 195 } 196 if curDir != prevDir { 197 if i > 0 { 198 o.runes = append(o.runes, runes[prevI:i]) 199 o.directions = append(o.directions, prevDir) 200 o.startpos = append(o.startpos, prevI) 201 } 202 prevI = i 203 prevDir = curDir 204 } 205 } 206 o.runes = append(o.runes, runes[prevI:]) 207 o.directions = append(o.directions, prevDir) 208 o.startpos = append(o.startpos, prevI) 209 return o 210 } 211 212 // Order computes the visual ordering of all the runs in a Paragraph. 213 func (p *Paragraph) Order() (Ordering, error) { 214 if len(p.types) == 0 { 215 return Ordering{}, nil 216 } 217 218 for _, fn := range p.opts { 219 fn(&p.options) 220 } 221 lvl := level(-1) 222 if p.options.defaultDirection == RightToLeft { 223 lvl = 1 224 } 225 para, err := newParagraph(p.types, p.pairTypes, p.pairValues, lvl) 226 if err != nil { 227 return Ordering{}, err 228 } 229 230 levels := para.getLevels([]int{len(p.types)}) 231 232 p.o = calculateOrdering(levels, p.runes) 233 return p.o, nil 234 } 235 236 // Line computes the visual ordering of runs for a single line starting and 237 // ending at the given positions in the original text. 238 func (p *Paragraph) Line(start, end int) (Ordering, error) { 239 lineTypes := p.types[start:end] 240 para, err := newParagraph(lineTypes, p.pairTypes[start:end], p.pairValues[start:end], -1) 241 if err != nil { 242 return Ordering{}, err 243 } 244 levels := para.getLevels([]int{len(lineTypes)}) 245 o := calculateOrdering(levels, p.runes[start:end]) 246 return o, nil 247 } 248 249 // An Ordering holds the computed visual order of runs of a Paragraph. Calling 250 // SetBytes or SetString on the originating Paragraph invalidates an Ordering. 251 // The methods of an Ordering should only be called by one goroutine at a time. 252 type Ordering struct { 253 runes [][]rune 254 directions []Direction 255 startpos []int 256 } 257 258 // Direction reports the directionality of the runs. 259 // 260 // The direction may be LeftToRight, RightToLeft, Mixed, or Neutral. 261 func (o *Ordering) Direction() Direction { 262 return o.directions[0] 263 } 264 265 // NumRuns returns the number of runs. 266 func (o *Ordering) NumRuns() int { 267 return len(o.runes) 268 } 269 270 // Run returns the ith run within the ordering. 271 func (o *Ordering) Run(i int) Run { 272 r := Run{ 273 runes: o.runes[i], 274 direction: o.directions[i], 275 startpos: o.startpos[i], 276 } 277 return r 278 } 279 280 // TODO: perhaps with options. 281 // // Reorder creates a reader that reads the runes in visual order per character. 282 // // Modifiers remain after the runes they modify. 283 // func (l *Runs) Reorder() io.Reader { 284 // panic("unimplemented") 285 // } 286 287 // A Run is a continuous sequence of characters of a single direction. 288 type Run struct { 289 runes []rune 290 direction Direction 291 startpos int 292 } 293 294 // String returns the text of the run in its original order. 295 func (r *Run) String() string { 296 return string(r.runes) 297 } 298 299 // Bytes returns the text of the run in its original order. 300 func (r *Run) Bytes() []byte { 301 return []byte(r.String()) 302 } 303 304 // TODO: methods for 305 // - Display order 306 // - headers and footers 307 // - bracket replacement. 308 309 // Direction reports the direction of the run. 310 func (r *Run) Direction() Direction { 311 return r.direction 312 } 313 314 // Pos returns the position of the Run within the text passed to SetBytes or SetString of the 315 // originating Paragraph value. 316 func (r *Run) Pos() (start, end int) { 317 return r.startpos, r.startpos + len(r.runes) - 1 318 } 319 320 // AppendReverse reverses the order of characters of in, appends them to out, 321 // and returns the result. Modifiers will still follow the runes they modify. 322 // Brackets are replaced with their counterparts. 323 func AppendReverse(out, in []byte) []byte { 324 ret := make([]byte, len(in)+len(out)) 325 copy(ret, out) 326 inRunes := bytes.Runes(in) 327 328 for i, r := range inRunes { 329 prop, _ := LookupRune(r) 330 if prop.IsBracket() { 331 inRunes[i] = prop.reverseBracket(r) 332 } 333 } 334 335 for i, j := 0, len(inRunes)-1; i < j; i, j = i+1, j-1 { 336 inRunes[i], inRunes[j] = inRunes[j], inRunes[i] 337 } 338 copy(ret[len(out):], string(inRunes)) 339 340 return ret 341 } 342 343 // ReverseString reverses the order of characters in s and returns a new string. 344 // Modifiers will still follow the runes they modify. Brackets are replaced with 345 // their counterparts. 346 func ReverseString(s string) string { 347 input := []rune(s) 348 li := len(input) 349 ret := make([]rune, li) 350 for i, r := range input { 351 prop, _ := LookupRune(r) 352 if prop.IsBracket() { 353 ret[li-i-1] = prop.reverseBracket(r) 354 } else { 355 ret[li-i-1] = r 356 } 357 } 358 return string(ret) 359 }