gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

typographer.go (10236B)


      1 package extension
      2 
      3 import (
      4 	"unicode"
      5 
      6 	"github.com/yuin/goldmark"
      7 	gast "github.com/yuin/goldmark/ast"
      8 	"github.com/yuin/goldmark/parser"
      9 	"github.com/yuin/goldmark/text"
     10 	"github.com/yuin/goldmark/util"
     11 )
     12 
     13 var uncloseCounterKey = parser.NewContextKey()
     14 
     15 type unclosedCounter struct {
     16 	Single int
     17 	Double int
     18 }
     19 
     20 func (u *unclosedCounter) Reset() {
     21 	u.Single = 0
     22 	u.Double = 0
     23 }
     24 
     25 func getUnclosedCounter(pc parser.Context) *unclosedCounter {
     26 	v := pc.Get(uncloseCounterKey)
     27 	if v == nil {
     28 		v = &unclosedCounter{}
     29 		pc.Set(uncloseCounterKey, v)
     30 	}
     31 	return v.(*unclosedCounter)
     32 }
     33 
     34 // TypographicPunctuation is a key of the punctuations that can be replaced with
     35 // typographic entities.
     36 type TypographicPunctuation int
     37 
     38 const (
     39 	// LeftSingleQuote is '
     40 	LeftSingleQuote TypographicPunctuation = iota + 1
     41 	// RightSingleQuote is '
     42 	RightSingleQuote
     43 	// LeftDoubleQuote is "
     44 	LeftDoubleQuote
     45 	// RightDoubleQuote is "
     46 	RightDoubleQuote
     47 	// EnDash is --
     48 	EnDash
     49 	// EmDash is ---
     50 	EmDash
     51 	// Ellipsis is ...
     52 	Ellipsis
     53 	// LeftAngleQuote is <<
     54 	LeftAngleQuote
     55 	// RightAngleQuote is >>
     56 	RightAngleQuote
     57 	// Apostrophe is '
     58 	Apostrophe
     59 
     60 	typographicPunctuationMax
     61 )
     62 
     63 // An TypographerConfig struct is a data structure that holds configuration of the
     64 // Typographer extension.
     65 type TypographerConfig struct {
     66 	Substitutions [][]byte
     67 }
     68 
     69 func newDefaultSubstitutions() [][]byte {
     70 	replacements := make([][]byte, typographicPunctuationMax)
     71 	replacements[LeftSingleQuote] = []byte("&lsquo;")
     72 	replacements[RightSingleQuote] = []byte("&rsquo;")
     73 	replacements[LeftDoubleQuote] = []byte("&ldquo;")
     74 	replacements[RightDoubleQuote] = []byte("&rdquo;")
     75 	replacements[EnDash] = []byte("&ndash;")
     76 	replacements[EmDash] = []byte("&mdash;")
     77 	replacements[Ellipsis] = []byte("&hellip;")
     78 	replacements[LeftAngleQuote] = []byte("&laquo;")
     79 	replacements[RightAngleQuote] = []byte("&raquo;")
     80 	replacements[Apostrophe] = []byte("&rsquo;")
     81 
     82 	return replacements
     83 }
     84 
     85 // SetOption implements SetOptioner.
     86 func (b *TypographerConfig) SetOption(name parser.OptionName, value interface{}) {
     87 	switch name {
     88 	case optTypographicSubstitutions:
     89 		b.Substitutions = value.([][]byte)
     90 	}
     91 }
     92 
     93 // A TypographerOption interface sets options for the TypographerParser.
     94 type TypographerOption interface {
     95 	parser.Option
     96 	SetTypographerOption(*TypographerConfig)
     97 }
     98 
     99 const optTypographicSubstitutions parser.OptionName = "TypographicSubstitutions"
    100 
    101 // TypographicSubstitutions is a list of the substitutions for the Typographer extension.
    102 type TypographicSubstitutions map[TypographicPunctuation][]byte
    103 
    104 type withTypographicSubstitutions struct {
    105 	value [][]byte
    106 }
    107 
    108 func (o *withTypographicSubstitutions) SetParserOption(c *parser.Config) {
    109 	c.Options[optTypographicSubstitutions] = o.value
    110 }
    111 
    112 func (o *withTypographicSubstitutions) SetTypographerOption(p *TypographerConfig) {
    113 	p.Substitutions = o.value
    114 }
    115 
    116 // WithTypographicSubstitutions is a functional otpion that specify replacement text
    117 // for punctuations.
    118 func WithTypographicSubstitutions(values map[TypographicPunctuation][]byte) TypographerOption {
    119 	replacements := newDefaultSubstitutions()
    120 	for k, v := range values {
    121 		replacements[k] = v
    122 	}
    123 
    124 	return &withTypographicSubstitutions{replacements}
    125 }
    126 
    127 type typographerDelimiterProcessor struct {
    128 }
    129 
    130 func (p *typographerDelimiterProcessor) IsDelimiter(b byte) bool {
    131 	return b == '\'' || b == '"'
    132 }
    133 
    134 func (p *typographerDelimiterProcessor) CanOpenCloser(opener, closer *parser.Delimiter) bool {
    135 	return opener.Char == closer.Char
    136 }
    137 
    138 func (p *typographerDelimiterProcessor) OnMatch(consumes int) gast.Node {
    139 	return nil
    140 }
    141 
    142 var defaultTypographerDelimiterProcessor = &typographerDelimiterProcessor{}
    143 
    144 type typographerParser struct {
    145 	TypographerConfig
    146 }
    147 
    148 // NewTypographerParser return a new InlineParser that parses
    149 // typographer expressions.
    150 func NewTypographerParser(opts ...TypographerOption) parser.InlineParser {
    151 	p := &typographerParser{
    152 		TypographerConfig: TypographerConfig{
    153 			Substitutions: newDefaultSubstitutions(),
    154 		},
    155 	}
    156 	for _, o := range opts {
    157 		o.SetTypographerOption(&p.TypographerConfig)
    158 	}
    159 	return p
    160 }
    161 
    162 func (s *typographerParser) Trigger() []byte {
    163 	return []byte{'\'', '"', '-', '.', ',', '<', '>', '*', '['}
    164 }
    165 
    166 func (s *typographerParser) Parse(parent gast.Node, block text.Reader, pc parser.Context) gast.Node {
    167 	line, _ := block.PeekLine()
    168 	c := line[0]
    169 	if len(line) > 2 {
    170 		if c == '-' {
    171 			if s.Substitutions[EmDash] != nil && line[1] == '-' && line[2] == '-' { // ---
    172 				node := gast.NewString(s.Substitutions[EmDash])
    173 				node.SetCode(true)
    174 				block.Advance(3)
    175 				return node
    176 			}
    177 		} else if c == '.' {
    178 			if s.Substitutions[Ellipsis] != nil && line[1] == '.' && line[2] == '.' { // ...
    179 				node := gast.NewString(s.Substitutions[Ellipsis])
    180 				node.SetCode(true)
    181 				block.Advance(3)
    182 				return node
    183 			}
    184 			return nil
    185 		}
    186 	}
    187 	if len(line) > 1 {
    188 		if c == '<' {
    189 			if s.Substitutions[LeftAngleQuote] != nil && line[1] == '<' { // <<
    190 				node := gast.NewString(s.Substitutions[LeftAngleQuote])
    191 				node.SetCode(true)
    192 				block.Advance(2)
    193 				return node
    194 			}
    195 			return nil
    196 		} else if c == '>' {
    197 			if s.Substitutions[RightAngleQuote] != nil && line[1] == '>' { // >>
    198 				node := gast.NewString(s.Substitutions[RightAngleQuote])
    199 				node.SetCode(true)
    200 				block.Advance(2)
    201 				return node
    202 			}
    203 			return nil
    204 		} else if s.Substitutions[EnDash] != nil && c == '-' && line[1] == '-' { // --
    205 			node := gast.NewString(s.Substitutions[EnDash])
    206 			node.SetCode(true)
    207 			block.Advance(2)
    208 			return node
    209 		}
    210 	}
    211 	if c == '\'' || c == '"' {
    212 		before := block.PrecendingCharacter()
    213 		d := parser.ScanDelimiter(line, before, 1, defaultTypographerDelimiterProcessor)
    214 		if d == nil {
    215 			return nil
    216 		}
    217 		counter := getUnclosedCounter(pc)
    218 		if c == '\'' {
    219 			if s.Substitutions[Apostrophe] != nil {
    220 				// Handle decade abbrevations such as '90s
    221 				if d.CanOpen && !d.CanClose && len(line) > 3 && util.IsNumeric(line[1]) && util.IsNumeric(line[2]) && line[3] == 's' {
    222 					after := rune(' ')
    223 					if len(line) > 4 {
    224 						after = util.ToRune(line, 4)
    225 					}
    226 					if len(line) == 3 || util.IsSpaceRune(after) || util.IsPunctRune(after) {
    227 						node := gast.NewString(s.Substitutions[Apostrophe])
    228 						node.SetCode(true)
    229 						block.Advance(1)
    230 						return node
    231 					}
    232 				}
    233 				// special cases: 'twas, 'em, 'net
    234 				if len(line) > 1 && (unicode.IsPunct(before) || unicode.IsSpace(before)) && (line[1] == 't' || line[1] == 'e' || line[1] == 'n' || line[1] == 'l') {
    235 					node := gast.NewString(s.Substitutions[Apostrophe])
    236 					node.SetCode(true)
    237 					block.Advance(1)
    238 					return node
    239 				}
    240 				// Convert normal apostrophes. This is probably more flexible than necessary but
    241 				// converts any apostrophe in between two alphanumerics.
    242 				if len(line) > 1 && (unicode.IsDigit(before) || unicode.IsLetter(before)) && (unicode.IsLetter(util.ToRune(line, 1))) {
    243 					node := gast.NewString(s.Substitutions[Apostrophe])
    244 					node.SetCode(true)
    245 					block.Advance(1)
    246 					return node
    247 				}
    248 			}
    249 			if s.Substitutions[LeftSingleQuote] != nil && d.CanOpen && !d.CanClose {
    250 				nt := LeftSingleQuote
    251 				// special cases: Alice's, I'm, Don't, You'd
    252 				if len(line) > 1 && (line[1] == 's' || line[1] == 'm' || line[1] == 't' || line[1] == 'd') && (len(line) < 3 || util.IsPunct(line[2]) || util.IsSpace(line[2])) {
    253 					nt = RightSingleQuote
    254 				}
    255 				// special cases: I've, I'll, You're
    256 				if len(line) > 2 && ((line[1] == 'v' && line[2] == 'e') || (line[1] == 'l' && line[2] == 'l') || (line[1] == 'r' && line[2] == 'e')) && (len(line) < 4 || util.IsPunct(line[3]) || util.IsSpace(line[3])) {
    257 					nt = RightSingleQuote
    258 				}
    259 				if nt == LeftSingleQuote {
    260 					counter.Single++
    261 				}
    262 
    263 				node := gast.NewString(s.Substitutions[nt])
    264 				node.SetCode(true)
    265 				block.Advance(1)
    266 				return node
    267 			}
    268 			if s.Substitutions[RightSingleQuote] != nil {
    269 				// plural possesives and abbreviations: Smiths', doin'
    270 				if len(line) > 1 && unicode.IsSpace(util.ToRune(line, 0)) || unicode.IsPunct(util.ToRune(line, 0)) && (len(line) > 2 && !unicode.IsDigit(util.ToRune(line, 1))) {
    271 					node := gast.NewString(s.Substitutions[RightSingleQuote])
    272 					node.SetCode(true)
    273 					block.Advance(1)
    274 					return node
    275 				}
    276 			}
    277 			if s.Substitutions[RightSingleQuote] != nil && counter.Single > 0 {
    278 				isClose := d.CanClose && !d.CanOpen
    279 				maybeClose := d.CanClose && d.CanOpen && len(line) > 1 && unicode.IsPunct(util.ToRune(line, 1)) && (len(line) == 2 || (len(line) > 2 && util.IsPunct(line[2]) || util.IsSpace(line[2])))
    280 				if isClose || maybeClose {
    281 					node := gast.NewString(s.Substitutions[RightSingleQuote])
    282 					node.SetCode(true)
    283 					block.Advance(1)
    284 					counter.Single--
    285 					return node
    286 				}
    287 			}
    288 		}
    289 		if c == '"' {
    290 			if s.Substitutions[LeftDoubleQuote] != nil && d.CanOpen && !d.CanClose {
    291 				node := gast.NewString(s.Substitutions[LeftDoubleQuote])
    292 				node.SetCode(true)
    293 				block.Advance(1)
    294 				counter.Double++
    295 				return node
    296 			}
    297 			if s.Substitutions[RightDoubleQuote] != nil && counter.Double > 0 {
    298 				isClose := d.CanClose && !d.CanOpen
    299 				maybeClose := d.CanClose && d.CanOpen && len(line) > 1 && (unicode.IsPunct(util.ToRune(line, 1))) && (len(line) == 2 || (len(line) > 2 && util.IsPunct(line[2]) || util.IsSpace(line[2])))
    300 				if isClose || maybeClose {
    301 					// special case: "Monitor 21""
    302 					if len(line) > 1 && line[1] == '"' && unicode.IsDigit(before) {
    303 						return nil
    304 					}
    305 					node := gast.NewString(s.Substitutions[RightDoubleQuote])
    306 					node.SetCode(true)
    307 					block.Advance(1)
    308 					counter.Double--
    309 					return node
    310 				}
    311 			}
    312 		}
    313 	}
    314 	return nil
    315 }
    316 
    317 func (s *typographerParser) CloseBlock(parent gast.Node, pc parser.Context) {
    318 	getUnclosedCounter(pc).Reset()
    319 }
    320 
    321 type typographer struct {
    322 	options []TypographerOption
    323 }
    324 
    325 // Typographer is an extension that replaces punctuations with typographic entities.
    326 var Typographer = &typographer{}
    327 
    328 // NewTypographer returns a new Extender that replaces punctuations with typographic entities.
    329 func NewTypographer(opts ...TypographerOption) goldmark.Extender {
    330 	return &typographer{
    331 		options: opts,
    332 	}
    333 }
    334 
    335 func (e *typographer) Extend(m goldmark.Markdown) {
    336 	m.Parser().AddOptions(parser.WithInlineParsers(
    337 		util.Prioritized(NewTypographerParser(e.options...), 9999),
    338 	))
    339 }