context.go (3730B)
1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package precis 6 7 import "errors" 8 9 // This file contains tables and code related to context rules. 10 11 type catBitmap uint16 12 13 const ( 14 // These bits, once set depending on the current value, are never unset. 15 bJapanese catBitmap = 1 << iota 16 bArabicIndicDigit 17 bExtendedArabicIndicDigit 18 19 // These bits are set on each iteration depending on the current value. 20 bJoinStart 21 bJoinMid 22 bJoinEnd 23 bVirama 24 bLatinSmallL 25 bGreek 26 bHebrew 27 28 // These bits indicated which of the permanent bits need to be set at the 29 // end of the checks. 30 bMustHaveJapn 31 32 permanent = bJapanese | bArabicIndicDigit | bExtendedArabicIndicDigit | bMustHaveJapn 33 ) 34 35 const finalShift = 10 36 37 var errContext = errors.New("precis: contextual rule violated") 38 39 func init() { 40 // Programmatically set these required bits as, manually setting them seems 41 // too error prone. 42 for i, ct := range categoryTransitions { 43 categoryTransitions[i].keep |= permanent 44 categoryTransitions[i].accept |= ct.term 45 } 46 } 47 48 var categoryTransitions = []struct { 49 keep catBitmap // mask selecting which bits to keep from the previous state 50 set catBitmap // mask for which bits to set for this transition 51 52 // These bitmaps are used for rules that require lookahead. 53 // term&accept == term must be true, which is enforced programmatically. 54 term catBitmap // bits accepted as termination condition 55 accept catBitmap // bits that pass, but not sufficient as termination 56 57 // The rule function cannot take a *context as an argument, as it would 58 // cause the context to escape, adding significant overhead. 59 rule func(beforeBits catBitmap) (doLookahead bool, err error) 60 }{ 61 joiningL: {set: bJoinStart}, 62 joiningD: {set: bJoinStart | bJoinEnd}, 63 joiningT: {keep: bJoinStart, set: bJoinMid}, 64 joiningR: {set: bJoinEnd}, 65 viramaModifier: {set: bVirama}, 66 viramaJoinT: {set: bVirama | bJoinMid}, 67 latinSmallL: {set: bLatinSmallL}, 68 greek: {set: bGreek}, 69 greekJoinT: {set: bGreek | bJoinMid}, 70 hebrew: {set: bHebrew}, 71 hebrewJoinT: {set: bHebrew | bJoinMid}, 72 japanese: {set: bJapanese}, 73 katakanaMiddleDot: {set: bMustHaveJapn}, 74 75 zeroWidthNonJoiner: { 76 term: bJoinEnd, 77 accept: bJoinMid, 78 rule: func(before catBitmap) (doLookAhead bool, err error) { 79 if before&bVirama != 0 { 80 return false, nil 81 } 82 if before&bJoinStart == 0 { 83 return false, errContext 84 } 85 return true, nil 86 }, 87 }, 88 zeroWidthJoiner: { 89 rule: func(before catBitmap) (doLookAhead bool, err error) { 90 if before&bVirama == 0 { 91 err = errContext 92 } 93 return false, err 94 }, 95 }, 96 middleDot: { 97 term: bLatinSmallL, 98 rule: func(before catBitmap) (doLookAhead bool, err error) { 99 if before&bLatinSmallL == 0 { 100 return false, errContext 101 } 102 return true, nil 103 }, 104 }, 105 greekLowerNumeralSign: { 106 set: bGreek, 107 term: bGreek, 108 rule: func(before catBitmap) (doLookAhead bool, err error) { 109 return true, nil 110 }, 111 }, 112 hebrewPreceding: { 113 set: bHebrew, 114 rule: func(before catBitmap) (doLookAhead bool, err error) { 115 if before&bHebrew == 0 { 116 err = errContext 117 } 118 return false, err 119 }, 120 }, 121 arabicIndicDigit: { 122 set: bArabicIndicDigit, 123 rule: func(before catBitmap) (doLookAhead bool, err error) { 124 if before&bExtendedArabicIndicDigit != 0 { 125 err = errContext 126 } 127 return false, err 128 }, 129 }, 130 extendedArabicIndicDigit: { 131 set: bExtendedArabicIndicDigit, 132 rule: func(before catBitmap) (doLookAhead bool, err error) { 133 if before&bArabicIndicDigit != 0 { 134 err = errContext 135 } 136 return false, err 137 }, 138 }, 139 }