gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

context.go (3730B)


      1 // Copyright 2016 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package precis
      6 
      7 import "errors"
      8 
      9 // This file contains tables and code related to context rules.
     10 
     11 type catBitmap uint16
     12 
     13 const (
     14 	// These bits, once set depending on the current value, are never unset.
     15 	bJapanese catBitmap = 1 << iota
     16 	bArabicIndicDigit
     17 	bExtendedArabicIndicDigit
     18 
     19 	// These bits are set on each iteration depending on the current value.
     20 	bJoinStart
     21 	bJoinMid
     22 	bJoinEnd
     23 	bVirama
     24 	bLatinSmallL
     25 	bGreek
     26 	bHebrew
     27 
     28 	// These bits indicated which of the permanent bits need to be set at the
     29 	// end of the checks.
     30 	bMustHaveJapn
     31 
     32 	permanent = bJapanese | bArabicIndicDigit | bExtendedArabicIndicDigit | bMustHaveJapn
     33 )
     34 
     35 const finalShift = 10
     36 
     37 var errContext = errors.New("precis: contextual rule violated")
     38 
     39 func init() {
     40 	// Programmatically set these required bits as, manually setting them seems
     41 	// too error prone.
     42 	for i, ct := range categoryTransitions {
     43 		categoryTransitions[i].keep |= permanent
     44 		categoryTransitions[i].accept |= ct.term
     45 	}
     46 }
     47 
     48 var categoryTransitions = []struct {
     49 	keep catBitmap // mask selecting which bits to keep from the previous state
     50 	set  catBitmap // mask for which bits to set for this transition
     51 
     52 	// These bitmaps are used for rules that require lookahead.
     53 	// term&accept == term must be true, which is enforced programmatically.
     54 	term   catBitmap // bits accepted as termination condition
     55 	accept catBitmap // bits that pass, but not sufficient as termination
     56 
     57 	// The rule function cannot take a *context as an argument, as it would
     58 	// cause the context to escape, adding significant overhead.
     59 	rule func(beforeBits catBitmap) (doLookahead bool, err error)
     60 }{
     61 	joiningL:          {set: bJoinStart},
     62 	joiningD:          {set: bJoinStart | bJoinEnd},
     63 	joiningT:          {keep: bJoinStart, set: bJoinMid},
     64 	joiningR:          {set: bJoinEnd},
     65 	viramaModifier:    {set: bVirama},
     66 	viramaJoinT:       {set: bVirama | bJoinMid},
     67 	latinSmallL:       {set: bLatinSmallL},
     68 	greek:             {set: bGreek},
     69 	greekJoinT:        {set: bGreek | bJoinMid},
     70 	hebrew:            {set: bHebrew},
     71 	hebrewJoinT:       {set: bHebrew | bJoinMid},
     72 	japanese:          {set: bJapanese},
     73 	katakanaMiddleDot: {set: bMustHaveJapn},
     74 
     75 	zeroWidthNonJoiner: {
     76 		term:   bJoinEnd,
     77 		accept: bJoinMid,
     78 		rule: func(before catBitmap) (doLookAhead bool, err error) {
     79 			if before&bVirama != 0 {
     80 				return false, nil
     81 			}
     82 			if before&bJoinStart == 0 {
     83 				return false, errContext
     84 			}
     85 			return true, nil
     86 		},
     87 	},
     88 	zeroWidthJoiner: {
     89 		rule: func(before catBitmap) (doLookAhead bool, err error) {
     90 			if before&bVirama == 0 {
     91 				err = errContext
     92 			}
     93 			return false, err
     94 		},
     95 	},
     96 	middleDot: {
     97 		term: bLatinSmallL,
     98 		rule: func(before catBitmap) (doLookAhead bool, err error) {
     99 			if before&bLatinSmallL == 0 {
    100 				return false, errContext
    101 			}
    102 			return true, nil
    103 		},
    104 	},
    105 	greekLowerNumeralSign: {
    106 		set:  bGreek,
    107 		term: bGreek,
    108 		rule: func(before catBitmap) (doLookAhead bool, err error) {
    109 			return true, nil
    110 		},
    111 	},
    112 	hebrewPreceding: {
    113 		set: bHebrew,
    114 		rule: func(before catBitmap) (doLookAhead bool, err error) {
    115 			if before&bHebrew == 0 {
    116 				err = errContext
    117 			}
    118 			return false, err
    119 		},
    120 	},
    121 	arabicIndicDigit: {
    122 		set: bArabicIndicDigit,
    123 		rule: func(before catBitmap) (doLookAhead bool, err error) {
    124 			if before&bExtendedArabicIndicDigit != 0 {
    125 				err = errContext
    126 			}
    127 			return false, err
    128 		},
    129 	},
    130 	extendedArabicIndicDigit: {
    131 		set: bExtendedArabicIndicDigit,
    132 		rule: func(before catBitmap) (doLookAhead bool, err error) {
    133 			if before&bArabicIndicDigit != 0 {
    134 				err = errContext
    135 			}
    136 			return false, err
    137 		},
    138 	},
    139 }