typographer.go (10236B)
1 package extension 2 3 import ( 4 "unicode" 5 6 "github.com/yuin/goldmark" 7 gast "github.com/yuin/goldmark/ast" 8 "github.com/yuin/goldmark/parser" 9 "github.com/yuin/goldmark/text" 10 "github.com/yuin/goldmark/util" 11 ) 12 13 var uncloseCounterKey = parser.NewContextKey() 14 15 type unclosedCounter struct { 16 Single int 17 Double int 18 } 19 20 func (u *unclosedCounter) Reset() { 21 u.Single = 0 22 u.Double = 0 23 } 24 25 func getUnclosedCounter(pc parser.Context) *unclosedCounter { 26 v := pc.Get(uncloseCounterKey) 27 if v == nil { 28 v = &unclosedCounter{} 29 pc.Set(uncloseCounterKey, v) 30 } 31 return v.(*unclosedCounter) 32 } 33 34 // TypographicPunctuation is a key of the punctuations that can be replaced with 35 // typographic entities. 36 type TypographicPunctuation int 37 38 const ( 39 // LeftSingleQuote is ' 40 LeftSingleQuote TypographicPunctuation = iota + 1 41 // RightSingleQuote is ' 42 RightSingleQuote 43 // LeftDoubleQuote is " 44 LeftDoubleQuote 45 // RightDoubleQuote is " 46 RightDoubleQuote 47 // EnDash is -- 48 EnDash 49 // EmDash is --- 50 EmDash 51 // Ellipsis is ... 52 Ellipsis 53 // LeftAngleQuote is << 54 LeftAngleQuote 55 // RightAngleQuote is >> 56 RightAngleQuote 57 // Apostrophe is ' 58 Apostrophe 59 60 typographicPunctuationMax 61 ) 62 63 // An TypographerConfig struct is a data structure that holds configuration of the 64 // Typographer extension. 65 type TypographerConfig struct { 66 Substitutions [][]byte 67 } 68 69 func newDefaultSubstitutions() [][]byte { 70 replacements := make([][]byte, typographicPunctuationMax) 71 replacements[LeftSingleQuote] = []byte("‘") 72 replacements[RightSingleQuote] = []byte("’") 73 replacements[LeftDoubleQuote] = []byte("“") 74 replacements[RightDoubleQuote] = []byte("”") 75 replacements[EnDash] = []byte("–") 76 replacements[EmDash] = []byte("—") 77 replacements[Ellipsis] = []byte("…") 78 replacements[LeftAngleQuote] = []byte("«") 79 replacements[RightAngleQuote] = []byte("»") 80 replacements[Apostrophe] = []byte("’") 81 82 return replacements 83 } 84 85 // SetOption implements SetOptioner. 86 func (b *TypographerConfig) SetOption(name parser.OptionName, value interface{}) { 87 switch name { 88 case optTypographicSubstitutions: 89 b.Substitutions = value.([][]byte) 90 } 91 } 92 93 // A TypographerOption interface sets options for the TypographerParser. 94 type TypographerOption interface { 95 parser.Option 96 SetTypographerOption(*TypographerConfig) 97 } 98 99 const optTypographicSubstitutions parser.OptionName = "TypographicSubstitutions" 100 101 // TypographicSubstitutions is a list of the substitutions for the Typographer extension. 102 type TypographicSubstitutions map[TypographicPunctuation][]byte 103 104 type withTypographicSubstitutions struct { 105 value [][]byte 106 } 107 108 func (o *withTypographicSubstitutions) SetParserOption(c *parser.Config) { 109 c.Options[optTypographicSubstitutions] = o.value 110 } 111 112 func (o *withTypographicSubstitutions) SetTypographerOption(p *TypographerConfig) { 113 p.Substitutions = o.value 114 } 115 116 // WithTypographicSubstitutions is a functional otpion that specify replacement text 117 // for punctuations. 118 func WithTypographicSubstitutions(values map[TypographicPunctuation][]byte) TypographerOption { 119 replacements := newDefaultSubstitutions() 120 for k, v := range values { 121 replacements[k] = v 122 } 123 124 return &withTypographicSubstitutions{replacements} 125 } 126 127 type typographerDelimiterProcessor struct { 128 } 129 130 func (p *typographerDelimiterProcessor) IsDelimiter(b byte) bool { 131 return b == '\'' || b == '"' 132 } 133 134 func (p *typographerDelimiterProcessor) CanOpenCloser(opener, closer *parser.Delimiter) bool { 135 return opener.Char == closer.Char 136 } 137 138 func (p *typographerDelimiterProcessor) OnMatch(consumes int) gast.Node { 139 return nil 140 } 141 142 var defaultTypographerDelimiterProcessor = &typographerDelimiterProcessor{} 143 144 type typographerParser struct { 145 TypographerConfig 146 } 147 148 // NewTypographerParser return a new InlineParser that parses 149 // typographer expressions. 150 func NewTypographerParser(opts ...TypographerOption) parser.InlineParser { 151 p := &typographerParser{ 152 TypographerConfig: TypographerConfig{ 153 Substitutions: newDefaultSubstitutions(), 154 }, 155 } 156 for _, o := range opts { 157 o.SetTypographerOption(&p.TypographerConfig) 158 } 159 return p 160 } 161 162 func (s *typographerParser) Trigger() []byte { 163 return []byte{'\'', '"', '-', '.', ',', '<', '>', '*', '['} 164 } 165 166 func (s *typographerParser) Parse(parent gast.Node, block text.Reader, pc parser.Context) gast.Node { 167 line, _ := block.PeekLine() 168 c := line[0] 169 if len(line) > 2 { 170 if c == '-' { 171 if s.Substitutions[EmDash] != nil && line[1] == '-' && line[2] == '-' { // --- 172 node := gast.NewString(s.Substitutions[EmDash]) 173 node.SetCode(true) 174 block.Advance(3) 175 return node 176 } 177 } else if c == '.' { 178 if s.Substitutions[Ellipsis] != nil && line[1] == '.' && line[2] == '.' { // ... 179 node := gast.NewString(s.Substitutions[Ellipsis]) 180 node.SetCode(true) 181 block.Advance(3) 182 return node 183 } 184 return nil 185 } 186 } 187 if len(line) > 1 { 188 if c == '<' { 189 if s.Substitutions[LeftAngleQuote] != nil && line[1] == '<' { // << 190 node := gast.NewString(s.Substitutions[LeftAngleQuote]) 191 node.SetCode(true) 192 block.Advance(2) 193 return node 194 } 195 return nil 196 } else if c == '>' { 197 if s.Substitutions[RightAngleQuote] != nil && line[1] == '>' { // >> 198 node := gast.NewString(s.Substitutions[RightAngleQuote]) 199 node.SetCode(true) 200 block.Advance(2) 201 return node 202 } 203 return nil 204 } else if s.Substitutions[EnDash] != nil && c == '-' && line[1] == '-' { // -- 205 node := gast.NewString(s.Substitutions[EnDash]) 206 node.SetCode(true) 207 block.Advance(2) 208 return node 209 } 210 } 211 if c == '\'' || c == '"' { 212 before := block.PrecendingCharacter() 213 d := parser.ScanDelimiter(line, before, 1, defaultTypographerDelimiterProcessor) 214 if d == nil { 215 return nil 216 } 217 counter := getUnclosedCounter(pc) 218 if c == '\'' { 219 if s.Substitutions[Apostrophe] != nil { 220 // Handle decade abbrevations such as '90s 221 if d.CanOpen && !d.CanClose && len(line) > 3 && util.IsNumeric(line[1]) && util.IsNumeric(line[2]) && line[3] == 's' { 222 after := rune(' ') 223 if len(line) > 4 { 224 after = util.ToRune(line, 4) 225 } 226 if len(line) == 3 || util.IsSpaceRune(after) || util.IsPunctRune(after) { 227 node := gast.NewString(s.Substitutions[Apostrophe]) 228 node.SetCode(true) 229 block.Advance(1) 230 return node 231 } 232 } 233 // special cases: 'twas, 'em, 'net 234 if len(line) > 1 && (unicode.IsPunct(before) || unicode.IsSpace(before)) && (line[1] == 't' || line[1] == 'e' || line[1] == 'n' || line[1] == 'l') { 235 node := gast.NewString(s.Substitutions[Apostrophe]) 236 node.SetCode(true) 237 block.Advance(1) 238 return node 239 } 240 // Convert normal apostrophes. This is probably more flexible than necessary but 241 // converts any apostrophe in between two alphanumerics. 242 if len(line) > 1 && (unicode.IsDigit(before) || unicode.IsLetter(before)) && (unicode.IsLetter(util.ToRune(line, 1))) { 243 node := gast.NewString(s.Substitutions[Apostrophe]) 244 node.SetCode(true) 245 block.Advance(1) 246 return node 247 } 248 } 249 if s.Substitutions[LeftSingleQuote] != nil && d.CanOpen && !d.CanClose { 250 nt := LeftSingleQuote 251 // special cases: Alice's, I'm, Don't, You'd 252 if len(line) > 1 && (line[1] == 's' || line[1] == 'm' || line[1] == 't' || line[1] == 'd') && (len(line) < 3 || util.IsPunct(line[2]) || util.IsSpace(line[2])) { 253 nt = RightSingleQuote 254 } 255 // special cases: I've, I'll, You're 256 if len(line) > 2 && ((line[1] == 'v' && line[2] == 'e') || (line[1] == 'l' && line[2] == 'l') || (line[1] == 'r' && line[2] == 'e')) && (len(line) < 4 || util.IsPunct(line[3]) || util.IsSpace(line[3])) { 257 nt = RightSingleQuote 258 } 259 if nt == LeftSingleQuote { 260 counter.Single++ 261 } 262 263 node := gast.NewString(s.Substitutions[nt]) 264 node.SetCode(true) 265 block.Advance(1) 266 return node 267 } 268 if s.Substitutions[RightSingleQuote] != nil { 269 // plural possesives and abbreviations: Smiths', doin' 270 if len(line) > 1 && unicode.IsSpace(util.ToRune(line, 0)) || unicode.IsPunct(util.ToRune(line, 0)) && (len(line) > 2 && !unicode.IsDigit(util.ToRune(line, 1))) { 271 node := gast.NewString(s.Substitutions[RightSingleQuote]) 272 node.SetCode(true) 273 block.Advance(1) 274 return node 275 } 276 } 277 if s.Substitutions[RightSingleQuote] != nil && counter.Single > 0 { 278 isClose := d.CanClose && !d.CanOpen 279 maybeClose := d.CanClose && d.CanOpen && len(line) > 1 && unicode.IsPunct(util.ToRune(line, 1)) && (len(line) == 2 || (len(line) > 2 && util.IsPunct(line[2]) || util.IsSpace(line[2]))) 280 if isClose || maybeClose { 281 node := gast.NewString(s.Substitutions[RightSingleQuote]) 282 node.SetCode(true) 283 block.Advance(1) 284 counter.Single-- 285 return node 286 } 287 } 288 } 289 if c == '"' { 290 if s.Substitutions[LeftDoubleQuote] != nil && d.CanOpen && !d.CanClose { 291 node := gast.NewString(s.Substitutions[LeftDoubleQuote]) 292 node.SetCode(true) 293 block.Advance(1) 294 counter.Double++ 295 return node 296 } 297 if s.Substitutions[RightDoubleQuote] != nil && counter.Double > 0 { 298 isClose := d.CanClose && !d.CanOpen 299 maybeClose := d.CanClose && d.CanOpen && len(line) > 1 && (unicode.IsPunct(util.ToRune(line, 1))) && (len(line) == 2 || (len(line) > 2 && util.IsPunct(line[2]) || util.IsSpace(line[2]))) 300 if isClose || maybeClose { 301 // special case: "Monitor 21"" 302 if len(line) > 1 && line[1] == '"' && unicode.IsDigit(before) { 303 return nil 304 } 305 node := gast.NewString(s.Substitutions[RightDoubleQuote]) 306 node.SetCode(true) 307 block.Advance(1) 308 counter.Double-- 309 return node 310 } 311 } 312 } 313 } 314 return nil 315 } 316 317 func (s *typographerParser) CloseBlock(parent gast.Node, pc parser.Context) { 318 getUnclosedCounter(pc).Reset() 319 } 320 321 type typographer struct { 322 options []TypographerOption 323 } 324 325 // Typographer is an extension that replaces punctuations with typographic entities. 326 var Typographer = &typographer{} 327 328 // NewTypographer returns a new Extender that replaces punctuations with typographic entities. 329 func NewTypographer(opts ...TypographerOption) goldmark.Extender { 330 return &typographer{ 331 options: opts, 332 } 333 } 334 335 func (e *typographer) Extend(m goldmark.Markdown) { 336 m.Parser().AddOptions(parser.WithInlineParsers( 337 util.Prioritized(NewTypographerParser(e.options...), 9999), 338 )) 339 }