gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

bidi.go (10565B)


      1 // Copyright 2015 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 //go:generate go run gen.go gen_trieval.go gen_ranges.go
      6 
      7 // Package bidi contains functionality for bidirectional text support.
      8 //
      9 // See https://www.unicode.org/reports/tr9.
     10 //
     11 // NOTE: UNDER CONSTRUCTION. This API may change in backwards incompatible ways
     12 // and without notice.
     13 package bidi // import "golang.org/x/text/unicode/bidi"
     14 
     15 // TODO
     16 // - Transformer for reordering?
     17 // - Transformer (validator, really) for Bidi Rule.
     18 
     19 import (
     20 	"bytes"
     21 )
     22 
     23 // This API tries to avoid dealing with embedding levels for now. Under the hood
     24 // these will be computed, but the question is to which extent the user should
     25 // know they exist. We should at some point allow the user to specify an
     26 // embedding hierarchy, though.
     27 
     28 // A Direction indicates the overall flow of text.
     29 type Direction int
     30 
     31 const (
     32 	// LeftToRight indicates the text contains no right-to-left characters and
     33 	// that either there are some left-to-right characters or the option
     34 	// DefaultDirection(LeftToRight) was passed.
     35 	LeftToRight Direction = iota
     36 
     37 	// RightToLeft indicates the text contains no left-to-right characters and
     38 	// that either there are some right-to-left characters or the option
     39 	// DefaultDirection(RightToLeft) was passed.
     40 	RightToLeft
     41 
     42 	// Mixed indicates text contains both left-to-right and right-to-left
     43 	// characters.
     44 	Mixed
     45 
     46 	// Neutral means that text contains no left-to-right and right-to-left
     47 	// characters and that no default direction has been set.
     48 	Neutral
     49 )
     50 
     51 type options struct {
     52 	defaultDirection Direction
     53 }
     54 
     55 // An Option is an option for Bidi processing.
     56 type Option func(*options)
     57 
     58 // ICU allows the user to define embedding levels. This may be used, for example,
     59 // to use hierarchical structure of markup languages to define embeddings.
     60 // The following option may be a way to expose this functionality in this API.
     61 // // LevelFunc sets a function that associates nesting levels with the given text.
     62 // // The levels function will be called with monotonically increasing values for p.
     63 // func LevelFunc(levels func(p int) int) Option {
     64 // 	panic("unimplemented")
     65 // }
     66 
     67 // DefaultDirection sets the default direction for a Paragraph. The direction is
     68 // overridden if the text contains directional characters.
     69 func DefaultDirection(d Direction) Option {
     70 	return func(opts *options) {
     71 		opts.defaultDirection = d
     72 	}
     73 }
     74 
     75 // A Paragraph holds a single Paragraph for Bidi processing.
     76 type Paragraph struct {
     77 	p          []byte
     78 	o          Ordering
     79 	opts       []Option
     80 	types      []Class
     81 	pairTypes  []bracketType
     82 	pairValues []rune
     83 	runes      []rune
     84 	options    options
     85 }
     86 
     87 // Initialize the p.pairTypes, p.pairValues and p.types from the input previously
     88 // set by p.SetBytes() or p.SetString(). Also limit the input up to (and including) a paragraph
     89 // separator (bidi class B).
     90 //
     91 // The function p.Order() needs these values to be set, so this preparation could be postponed.
     92 // But since the SetBytes and SetStrings functions return the length of the input up to the paragraph
     93 // separator, the whole input needs to be processed anyway and should not be done twice.
     94 //
     95 // The function has the same return values as SetBytes() / SetString()
     96 func (p *Paragraph) prepareInput() (n int, err error) {
     97 	p.runes = bytes.Runes(p.p)
     98 	bytecount := 0
     99 	// clear slices from previous SetString or SetBytes
    100 	p.pairTypes = nil
    101 	p.pairValues = nil
    102 	p.types = nil
    103 
    104 	for _, r := range p.runes {
    105 		props, i := LookupRune(r)
    106 		bytecount += i
    107 		cls := props.Class()
    108 		if cls == B {
    109 			return bytecount, nil
    110 		}
    111 		p.types = append(p.types, cls)
    112 		if props.IsOpeningBracket() {
    113 			p.pairTypes = append(p.pairTypes, bpOpen)
    114 			p.pairValues = append(p.pairValues, r)
    115 		} else if props.IsBracket() {
    116 			// this must be a closing bracket,
    117 			// since IsOpeningBracket is not true
    118 			p.pairTypes = append(p.pairTypes, bpClose)
    119 			p.pairValues = append(p.pairValues, r)
    120 		} else {
    121 			p.pairTypes = append(p.pairTypes, bpNone)
    122 			p.pairValues = append(p.pairValues, 0)
    123 		}
    124 	}
    125 	return bytecount, nil
    126 }
    127 
    128 // SetBytes configures p for the given paragraph text. It replaces text
    129 // previously set by SetBytes or SetString. If b contains a paragraph separator
    130 // it will only process the first paragraph and report the number of bytes
    131 // consumed from b including this separator. Error may be non-nil if options are
    132 // given.
    133 func (p *Paragraph) SetBytes(b []byte, opts ...Option) (n int, err error) {
    134 	p.p = b
    135 	p.opts = opts
    136 	return p.prepareInput()
    137 }
    138 
    139 // SetString configures s for the given paragraph text. It replaces text
    140 // previously set by SetBytes or SetString. If s contains a paragraph separator
    141 // it will only process the first paragraph and report the number of bytes
    142 // consumed from s including this separator. Error may be non-nil if options are
    143 // given.
    144 func (p *Paragraph) SetString(s string, opts ...Option) (n int, err error) {
    145 	p.p = []byte(s)
    146 	p.opts = opts
    147 	return p.prepareInput()
    148 }
    149 
    150 // IsLeftToRight reports whether the principle direction of rendering for this
    151 // paragraphs is left-to-right. If this returns false, the principle direction
    152 // of rendering is right-to-left.
    153 func (p *Paragraph) IsLeftToRight() bool {
    154 	return p.Direction() == LeftToRight
    155 }
    156 
    157 // Direction returns the direction of the text of this paragraph.
    158 //
    159 // The direction may be LeftToRight, RightToLeft, Mixed, or Neutral.
    160 func (p *Paragraph) Direction() Direction {
    161 	return p.o.Direction()
    162 }
    163 
    164 // TODO: what happens if the position is > len(input)? This should return an error.
    165 
    166 // RunAt reports the Run at the given position of the input text.
    167 //
    168 // This method can be used for computing line breaks on paragraphs.
    169 func (p *Paragraph) RunAt(pos int) Run {
    170 	c := 0
    171 	runNumber := 0
    172 	for i, r := range p.o.runes {
    173 		c += len(r)
    174 		if pos < c {
    175 			runNumber = i
    176 		}
    177 	}
    178 	return p.o.Run(runNumber)
    179 }
    180 
    181 func calculateOrdering(levels []level, runes []rune) Ordering {
    182 	var curDir Direction
    183 
    184 	prevDir := Neutral
    185 	prevI := 0
    186 
    187 	o := Ordering{}
    188 	// lvl = 0,2,4,...: left to right
    189 	// lvl = 1,3,5,...: right to left
    190 	for i, lvl := range levels {
    191 		if lvl%2 == 0 {
    192 			curDir = LeftToRight
    193 		} else {
    194 			curDir = RightToLeft
    195 		}
    196 		if curDir != prevDir {
    197 			if i > 0 {
    198 				o.runes = append(o.runes, runes[prevI:i])
    199 				o.directions = append(o.directions, prevDir)
    200 				o.startpos = append(o.startpos, prevI)
    201 			}
    202 			prevI = i
    203 			prevDir = curDir
    204 		}
    205 	}
    206 	o.runes = append(o.runes, runes[prevI:])
    207 	o.directions = append(o.directions, prevDir)
    208 	o.startpos = append(o.startpos, prevI)
    209 	return o
    210 }
    211 
    212 // Order computes the visual ordering of all the runs in a Paragraph.
    213 func (p *Paragraph) Order() (Ordering, error) {
    214 	if len(p.types) == 0 {
    215 		return Ordering{}, nil
    216 	}
    217 
    218 	for _, fn := range p.opts {
    219 		fn(&p.options)
    220 	}
    221 	lvl := level(-1)
    222 	if p.options.defaultDirection == RightToLeft {
    223 		lvl = 1
    224 	}
    225 	para, err := newParagraph(p.types, p.pairTypes, p.pairValues, lvl)
    226 	if err != nil {
    227 		return Ordering{}, err
    228 	}
    229 
    230 	levels := para.getLevels([]int{len(p.types)})
    231 
    232 	p.o = calculateOrdering(levels, p.runes)
    233 	return p.o, nil
    234 }
    235 
    236 // Line computes the visual ordering of runs for a single line starting and
    237 // ending at the given positions in the original text.
    238 func (p *Paragraph) Line(start, end int) (Ordering, error) {
    239 	lineTypes := p.types[start:end]
    240 	para, err := newParagraph(lineTypes, p.pairTypes[start:end], p.pairValues[start:end], -1)
    241 	if err != nil {
    242 		return Ordering{}, err
    243 	}
    244 	levels := para.getLevels([]int{len(lineTypes)})
    245 	o := calculateOrdering(levels, p.runes[start:end])
    246 	return o, nil
    247 }
    248 
    249 // An Ordering holds the computed visual order of runs of a Paragraph. Calling
    250 // SetBytes or SetString on the originating Paragraph invalidates an Ordering.
    251 // The methods of an Ordering should only be called by one goroutine at a time.
    252 type Ordering struct {
    253 	runes      [][]rune
    254 	directions []Direction
    255 	startpos   []int
    256 }
    257 
    258 // Direction reports the directionality of the runs.
    259 //
    260 // The direction may be LeftToRight, RightToLeft, Mixed, or Neutral.
    261 func (o *Ordering) Direction() Direction {
    262 	return o.directions[0]
    263 }
    264 
    265 // NumRuns returns the number of runs.
    266 func (o *Ordering) NumRuns() int {
    267 	return len(o.runes)
    268 }
    269 
    270 // Run returns the ith run within the ordering.
    271 func (o *Ordering) Run(i int) Run {
    272 	r := Run{
    273 		runes:     o.runes[i],
    274 		direction: o.directions[i],
    275 		startpos:  o.startpos[i],
    276 	}
    277 	return r
    278 }
    279 
    280 // TODO: perhaps with options.
    281 // // Reorder creates a reader that reads the runes in visual order per character.
    282 // // Modifiers remain after the runes they modify.
    283 // func (l *Runs) Reorder() io.Reader {
    284 // 	panic("unimplemented")
    285 // }
    286 
    287 // A Run is a continuous sequence of characters of a single direction.
    288 type Run struct {
    289 	runes     []rune
    290 	direction Direction
    291 	startpos  int
    292 }
    293 
    294 // String returns the text of the run in its original order.
    295 func (r *Run) String() string {
    296 	return string(r.runes)
    297 }
    298 
    299 // Bytes returns the text of the run in its original order.
    300 func (r *Run) Bytes() []byte {
    301 	return []byte(r.String())
    302 }
    303 
    304 // TODO: methods for
    305 // - Display order
    306 // - headers and footers
    307 // - bracket replacement.
    308 
    309 // Direction reports the direction of the run.
    310 func (r *Run) Direction() Direction {
    311 	return r.direction
    312 }
    313 
    314 // Pos returns the position of the Run within the text passed to SetBytes or SetString of the
    315 // originating Paragraph value.
    316 func (r *Run) Pos() (start, end int) {
    317 	return r.startpos, r.startpos + len(r.runes) - 1
    318 }
    319 
    320 // AppendReverse reverses the order of characters of in, appends them to out,
    321 // and returns the result. Modifiers will still follow the runes they modify.
    322 // Brackets are replaced with their counterparts.
    323 func AppendReverse(out, in []byte) []byte {
    324 	ret := make([]byte, len(in)+len(out))
    325 	copy(ret, out)
    326 	inRunes := bytes.Runes(in)
    327 
    328 	for i, r := range inRunes {
    329 		prop, _ := LookupRune(r)
    330 		if prop.IsBracket() {
    331 			inRunes[i] = prop.reverseBracket(r)
    332 		}
    333 	}
    334 
    335 	for i, j := 0, len(inRunes)-1; i < j; i, j = i+1, j-1 {
    336 		inRunes[i], inRunes[j] = inRunes[j], inRunes[i]
    337 	}
    338 	copy(ret[len(out):], string(inRunes))
    339 
    340 	return ret
    341 }
    342 
    343 // ReverseString reverses the order of characters in s and returns a new string.
    344 // Modifiers will still follow the runes they modify. Brackets are replaced with
    345 // their counterparts.
    346 func ReverseString(s string) string {
    347 	input := []rune(s)
    348 	li := len(input)
    349 	ret := make([]rune, li)
    350 	for i, r := range input {
    351 		prop, _ := LookupRune(r)
    352 		if prop.IsBracket() {
    353 			ret[li-i-1] = prop.reverseBracket(r)
    354 		} else {
    355 			ret[li-i-1] = r
    356 		}
    357 	}
    358 	return string(ret)
    359 }