gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

linkify.go (7690B)


      1 package extension
      2 
      3 import (
      4 	"bytes"
      5 	"regexp"
      6 
      7 	"github.com/yuin/goldmark"
      8 	"github.com/yuin/goldmark/ast"
      9 	"github.com/yuin/goldmark/parser"
     10 	"github.com/yuin/goldmark/text"
     11 	"github.com/yuin/goldmark/util"
     12 )
     13 
     14 var wwwURLRegxp = regexp.MustCompile(`^www\.[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-z]+(?:[/#?][-a-zA-Z0-9@:%_\+.~#!?&/=\(\);,'">\^{}\[\]` + "`" + `]*)?`)
     15 
     16 var urlRegexp = regexp.MustCompile(`^(?:http|https|ftp)://[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-z]+(?::\d+)?(?:[/#?][-a-zA-Z0-9@:%_+.~#$!?&/=\(\);,'">\^{}\[\]` + "`" + `]*)?`)
     17 
     18 // An LinkifyConfig struct is a data structure that holds configuration of the
     19 // Linkify extension.
     20 type LinkifyConfig struct {
     21 	AllowedProtocols [][]byte
     22 	URLRegexp        *regexp.Regexp
     23 	WWWRegexp        *regexp.Regexp
     24 	EmailRegexp      *regexp.Regexp
     25 }
     26 
     27 const (
     28 	optLinkifyAllowedProtocols parser.OptionName = "LinkifyAllowedProtocols"
     29 	optLinkifyURLRegexp        parser.OptionName = "LinkifyURLRegexp"
     30 	optLinkifyWWWRegexp        parser.OptionName = "LinkifyWWWRegexp"
     31 	optLinkifyEmailRegexp      parser.OptionName = "LinkifyEmailRegexp"
     32 )
     33 
     34 // SetOption implements SetOptioner.
     35 func (c *LinkifyConfig) SetOption(name parser.OptionName, value interface{}) {
     36 	switch name {
     37 	case optLinkifyAllowedProtocols:
     38 		c.AllowedProtocols = value.([][]byte)
     39 	case optLinkifyURLRegexp:
     40 		c.URLRegexp = value.(*regexp.Regexp)
     41 	case optLinkifyWWWRegexp:
     42 		c.WWWRegexp = value.(*regexp.Regexp)
     43 	case optLinkifyEmailRegexp:
     44 		c.EmailRegexp = value.(*regexp.Regexp)
     45 	}
     46 }
     47 
     48 // A LinkifyOption interface sets options for the LinkifyOption.
     49 type LinkifyOption interface {
     50 	parser.Option
     51 	SetLinkifyOption(*LinkifyConfig)
     52 }
     53 
     54 type withLinkifyAllowedProtocols struct {
     55 	value [][]byte
     56 }
     57 
     58 func (o *withLinkifyAllowedProtocols) SetParserOption(c *parser.Config) {
     59 	c.Options[optLinkifyAllowedProtocols] = o.value
     60 }
     61 
     62 func (o *withLinkifyAllowedProtocols) SetLinkifyOption(p *LinkifyConfig) {
     63 	p.AllowedProtocols = o.value
     64 }
     65 
     66 // WithLinkifyAllowedProtocols is a functional option that specify allowed
     67 // protocols in autolinks. Each protocol must end with ':' like
     68 // 'http:' .
     69 func WithLinkifyAllowedProtocols(value [][]byte) LinkifyOption {
     70 	return &withLinkifyAllowedProtocols{
     71 		value: value,
     72 	}
     73 }
     74 
     75 type withLinkifyURLRegexp struct {
     76 	value *regexp.Regexp
     77 }
     78 
     79 func (o *withLinkifyURLRegexp) SetParserOption(c *parser.Config) {
     80 	c.Options[optLinkifyURLRegexp] = o.value
     81 }
     82 
     83 func (o *withLinkifyURLRegexp) SetLinkifyOption(p *LinkifyConfig) {
     84 	p.URLRegexp = o.value
     85 }
     86 
     87 // WithLinkifyURLRegexp is a functional option that specify
     88 // a pattern of the URL including a protocol.
     89 func WithLinkifyURLRegexp(value *regexp.Regexp) LinkifyOption {
     90 	return &withLinkifyURLRegexp{
     91 		value: value,
     92 	}
     93 }
     94 
     95 // WithLinkifyWWWRegexp is a functional option that specify
     96 // a pattern of the URL without a protocol.
     97 // This pattern must start with 'www.' .
     98 type withLinkifyWWWRegexp struct {
     99 	value *regexp.Regexp
    100 }
    101 
    102 func (o *withLinkifyWWWRegexp) SetParserOption(c *parser.Config) {
    103 	c.Options[optLinkifyWWWRegexp] = o.value
    104 }
    105 
    106 func (o *withLinkifyWWWRegexp) SetLinkifyOption(p *LinkifyConfig) {
    107 	p.WWWRegexp = o.value
    108 }
    109 
    110 func WithLinkifyWWWRegexp(value *regexp.Regexp) LinkifyOption {
    111 	return &withLinkifyWWWRegexp{
    112 		value: value,
    113 	}
    114 }
    115 
    116 // WithLinkifyWWWRegexp is a functional otpion that specify
    117 // a pattern of the email address.
    118 type withLinkifyEmailRegexp struct {
    119 	value *regexp.Regexp
    120 }
    121 
    122 func (o *withLinkifyEmailRegexp) SetParserOption(c *parser.Config) {
    123 	c.Options[optLinkifyEmailRegexp] = o.value
    124 }
    125 
    126 func (o *withLinkifyEmailRegexp) SetLinkifyOption(p *LinkifyConfig) {
    127 	p.EmailRegexp = o.value
    128 }
    129 
    130 func WithLinkifyEmailRegexp(value *regexp.Regexp) LinkifyOption {
    131 	return &withLinkifyEmailRegexp{
    132 		value: value,
    133 	}
    134 }
    135 
    136 type linkifyParser struct {
    137 	LinkifyConfig
    138 }
    139 
    140 // NewLinkifyParser return a new InlineParser can parse
    141 // text that seems like a URL.
    142 func NewLinkifyParser(opts ...LinkifyOption) parser.InlineParser {
    143 	p := &linkifyParser{
    144 		LinkifyConfig: LinkifyConfig{
    145 			AllowedProtocols: nil,
    146 			URLRegexp:        urlRegexp,
    147 			WWWRegexp:        wwwURLRegxp,
    148 		},
    149 	}
    150 	for _, o := range opts {
    151 		o.SetLinkifyOption(&p.LinkifyConfig)
    152 	}
    153 	return p
    154 }
    155 
    156 func (s *linkifyParser) Trigger() []byte {
    157 	// ' ' indicates any white spaces and a line head
    158 	return []byte{' ', '*', '_', '~', '('}
    159 }
    160 
    161 var (
    162 	protoHTTP  = []byte("http:")
    163 	protoHTTPS = []byte("https:")
    164 	protoFTP   = []byte("ftp:")
    165 	domainWWW  = []byte("www.")
    166 )
    167 
    168 func (s *linkifyParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node {
    169 	if pc.IsInLinkLabel() {
    170 		return nil
    171 	}
    172 	line, segment := block.PeekLine()
    173 	consumes := 0
    174 	start := segment.Start
    175 	c := line[0]
    176 	// advance if current position is not a line head.
    177 	if c == ' ' || c == '*' || c == '_' || c == '~' || c == '(' {
    178 		consumes++
    179 		start++
    180 		line = line[1:]
    181 	}
    182 
    183 	var m []int
    184 	var protocol []byte
    185 	var typ ast.AutoLinkType = ast.AutoLinkURL
    186 	if s.LinkifyConfig.AllowedProtocols == nil {
    187 		if bytes.HasPrefix(line, protoHTTP) || bytes.HasPrefix(line, protoHTTPS) || bytes.HasPrefix(line, protoFTP) {
    188 			m = s.LinkifyConfig.URLRegexp.FindSubmatchIndex(line)
    189 		}
    190 	} else {
    191 		for _, prefix := range s.LinkifyConfig.AllowedProtocols {
    192 			if bytes.HasPrefix(line, prefix) {
    193 				m = s.LinkifyConfig.URLRegexp.FindSubmatchIndex(line)
    194 				break
    195 			}
    196 		}
    197 	}
    198 	if m == nil && bytes.HasPrefix(line, domainWWW) {
    199 		m = s.LinkifyConfig.WWWRegexp.FindSubmatchIndex(line)
    200 		protocol = []byte("http")
    201 	}
    202 	if m != nil && m[0] != 0 {
    203 		m = nil
    204 	}
    205 	if m != nil && m[0] == 0 {
    206 		lastChar := line[m[1]-1]
    207 		if lastChar == '.' {
    208 			m[1]--
    209 		} else if lastChar == ')' {
    210 			closing := 0
    211 			for i := m[1] - 1; i >= m[0]; i-- {
    212 				if line[i] == ')' {
    213 					closing++
    214 				} else if line[i] == '(' {
    215 					closing--
    216 				}
    217 			}
    218 			if closing > 0 {
    219 				m[1] -= closing
    220 			}
    221 		} else if lastChar == ';' {
    222 			i := m[1] - 2
    223 			for ; i >= m[0]; i-- {
    224 				if util.IsAlphaNumeric(line[i]) {
    225 					continue
    226 				}
    227 				break
    228 			}
    229 			if i != m[1]-2 {
    230 				if line[i] == '&' {
    231 					m[1] -= m[1] - i
    232 				}
    233 			}
    234 		}
    235 	}
    236 	if m == nil {
    237 		if len(line) > 0 && util.IsPunct(line[0]) {
    238 			return nil
    239 		}
    240 		typ = ast.AutoLinkEmail
    241 		stop := -1
    242 		if s.LinkifyConfig.EmailRegexp == nil {
    243 			stop = util.FindEmailIndex(line)
    244 		} else {
    245 			m := s.LinkifyConfig.EmailRegexp.FindSubmatchIndex(line)
    246 			if m != nil && m[0] == 0 {
    247 				stop = m[1]
    248 			}
    249 		}
    250 		if stop < 0 {
    251 			return nil
    252 		}
    253 		at := bytes.IndexByte(line, '@')
    254 		m = []int{0, stop, at, stop - 1}
    255 		if m == nil || bytes.IndexByte(line[m[2]:m[3]], '.') < 0 {
    256 			return nil
    257 		}
    258 		lastChar := line[m[1]-1]
    259 		if lastChar == '.' {
    260 			m[1]--
    261 		}
    262 		if m[1] < len(line) {
    263 			nextChar := line[m[1]]
    264 			if nextChar == '-' || nextChar == '_' {
    265 				return nil
    266 			}
    267 		}
    268 	}
    269 	if m == nil {
    270 		return nil
    271 	}
    272 	if consumes != 0 {
    273 		s := segment.WithStop(segment.Start + 1)
    274 		ast.MergeOrAppendTextSegment(parent, s)
    275 	}
    276 	i := m[1] - 1
    277 	for ; i > 0; i-- {
    278 		c := line[i]
    279 		switch c {
    280 		case '?', '!', '.', ',', ':', '*', '_', '~':
    281 		default:
    282 			goto endfor
    283 		}
    284 	}
    285 endfor:
    286 	i++
    287 	consumes += i
    288 	block.Advance(consumes)
    289 	n := ast.NewTextSegment(text.NewSegment(start, start+i))
    290 	link := ast.NewAutoLink(typ, n)
    291 	link.Protocol = protocol
    292 	return link
    293 }
    294 
    295 func (s *linkifyParser) CloseBlock(parent ast.Node, pc parser.Context) {
    296 	// nothing to do
    297 }
    298 
    299 type linkify struct {
    300 	options []LinkifyOption
    301 }
    302 
    303 // Linkify is an extension that allow you to parse text that seems like a URL.
    304 var Linkify = &linkify{}
    305 
    306 func NewLinkify(opts ...LinkifyOption) goldmark.Extender {
    307 	return &linkify{
    308 		options: opts,
    309 	}
    310 }
    311 
    312 func (e *linkify) Extend(m goldmark.Markdown) {
    313 	m.Parser().AddOptions(
    314 		parser.WithInlineParsers(
    315 			util.Prioritized(NewLinkifyParser(e.options...), 999),
    316 		),
    317 	)
    318 }