linkify.go (7690B)
1 package extension 2 3 import ( 4 "bytes" 5 "regexp" 6 7 "github.com/yuin/goldmark" 8 "github.com/yuin/goldmark/ast" 9 "github.com/yuin/goldmark/parser" 10 "github.com/yuin/goldmark/text" 11 "github.com/yuin/goldmark/util" 12 ) 13 14 var wwwURLRegxp = regexp.MustCompile(`^www\.[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-z]+(?:[/#?][-a-zA-Z0-9@:%_\+.~#!?&/=\(\);,'">\^{}\[\]` + "`" + `]*)?`) 15 16 var urlRegexp = regexp.MustCompile(`^(?:http|https|ftp)://[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-z]+(?::\d+)?(?:[/#?][-a-zA-Z0-9@:%_+.~#$!?&/=\(\);,'">\^{}\[\]` + "`" + `]*)?`) 17 18 // An LinkifyConfig struct is a data structure that holds configuration of the 19 // Linkify extension. 20 type LinkifyConfig struct { 21 AllowedProtocols [][]byte 22 URLRegexp *regexp.Regexp 23 WWWRegexp *regexp.Regexp 24 EmailRegexp *regexp.Regexp 25 } 26 27 const ( 28 optLinkifyAllowedProtocols parser.OptionName = "LinkifyAllowedProtocols" 29 optLinkifyURLRegexp parser.OptionName = "LinkifyURLRegexp" 30 optLinkifyWWWRegexp parser.OptionName = "LinkifyWWWRegexp" 31 optLinkifyEmailRegexp parser.OptionName = "LinkifyEmailRegexp" 32 ) 33 34 // SetOption implements SetOptioner. 35 func (c *LinkifyConfig) SetOption(name parser.OptionName, value interface{}) { 36 switch name { 37 case optLinkifyAllowedProtocols: 38 c.AllowedProtocols = value.([][]byte) 39 case optLinkifyURLRegexp: 40 c.URLRegexp = value.(*regexp.Regexp) 41 case optLinkifyWWWRegexp: 42 c.WWWRegexp = value.(*regexp.Regexp) 43 case optLinkifyEmailRegexp: 44 c.EmailRegexp = value.(*regexp.Regexp) 45 } 46 } 47 48 // A LinkifyOption interface sets options for the LinkifyOption. 49 type LinkifyOption interface { 50 parser.Option 51 SetLinkifyOption(*LinkifyConfig) 52 } 53 54 type withLinkifyAllowedProtocols struct { 55 value [][]byte 56 } 57 58 func (o *withLinkifyAllowedProtocols) SetParserOption(c *parser.Config) { 59 c.Options[optLinkifyAllowedProtocols] = o.value 60 } 61 62 func (o *withLinkifyAllowedProtocols) SetLinkifyOption(p *LinkifyConfig) { 63 p.AllowedProtocols = o.value 64 } 65 66 // WithLinkifyAllowedProtocols is a functional option that specify allowed 67 // protocols in autolinks. Each protocol must end with ':' like 68 // 'http:' . 69 func WithLinkifyAllowedProtocols(value [][]byte) LinkifyOption { 70 return &withLinkifyAllowedProtocols{ 71 value: value, 72 } 73 } 74 75 type withLinkifyURLRegexp struct { 76 value *regexp.Regexp 77 } 78 79 func (o *withLinkifyURLRegexp) SetParserOption(c *parser.Config) { 80 c.Options[optLinkifyURLRegexp] = o.value 81 } 82 83 func (o *withLinkifyURLRegexp) SetLinkifyOption(p *LinkifyConfig) { 84 p.URLRegexp = o.value 85 } 86 87 // WithLinkifyURLRegexp is a functional option that specify 88 // a pattern of the URL including a protocol. 89 func WithLinkifyURLRegexp(value *regexp.Regexp) LinkifyOption { 90 return &withLinkifyURLRegexp{ 91 value: value, 92 } 93 } 94 95 // WithLinkifyWWWRegexp is a functional option that specify 96 // a pattern of the URL without a protocol. 97 // This pattern must start with 'www.' . 98 type withLinkifyWWWRegexp struct { 99 value *regexp.Regexp 100 } 101 102 func (o *withLinkifyWWWRegexp) SetParserOption(c *parser.Config) { 103 c.Options[optLinkifyWWWRegexp] = o.value 104 } 105 106 func (o *withLinkifyWWWRegexp) SetLinkifyOption(p *LinkifyConfig) { 107 p.WWWRegexp = o.value 108 } 109 110 func WithLinkifyWWWRegexp(value *regexp.Regexp) LinkifyOption { 111 return &withLinkifyWWWRegexp{ 112 value: value, 113 } 114 } 115 116 // WithLinkifyWWWRegexp is a functional otpion that specify 117 // a pattern of the email address. 118 type withLinkifyEmailRegexp struct { 119 value *regexp.Regexp 120 } 121 122 func (o *withLinkifyEmailRegexp) SetParserOption(c *parser.Config) { 123 c.Options[optLinkifyEmailRegexp] = o.value 124 } 125 126 func (o *withLinkifyEmailRegexp) SetLinkifyOption(p *LinkifyConfig) { 127 p.EmailRegexp = o.value 128 } 129 130 func WithLinkifyEmailRegexp(value *regexp.Regexp) LinkifyOption { 131 return &withLinkifyEmailRegexp{ 132 value: value, 133 } 134 } 135 136 type linkifyParser struct { 137 LinkifyConfig 138 } 139 140 // NewLinkifyParser return a new InlineParser can parse 141 // text that seems like a URL. 142 func NewLinkifyParser(opts ...LinkifyOption) parser.InlineParser { 143 p := &linkifyParser{ 144 LinkifyConfig: LinkifyConfig{ 145 AllowedProtocols: nil, 146 URLRegexp: urlRegexp, 147 WWWRegexp: wwwURLRegxp, 148 }, 149 } 150 for _, o := range opts { 151 o.SetLinkifyOption(&p.LinkifyConfig) 152 } 153 return p 154 } 155 156 func (s *linkifyParser) Trigger() []byte { 157 // ' ' indicates any white spaces and a line head 158 return []byte{' ', '*', '_', '~', '('} 159 } 160 161 var ( 162 protoHTTP = []byte("http:") 163 protoHTTPS = []byte("https:") 164 protoFTP = []byte("ftp:") 165 domainWWW = []byte("www.") 166 ) 167 168 func (s *linkifyParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node { 169 if pc.IsInLinkLabel() { 170 return nil 171 } 172 line, segment := block.PeekLine() 173 consumes := 0 174 start := segment.Start 175 c := line[0] 176 // advance if current position is not a line head. 177 if c == ' ' || c == '*' || c == '_' || c == '~' || c == '(' { 178 consumes++ 179 start++ 180 line = line[1:] 181 } 182 183 var m []int 184 var protocol []byte 185 var typ ast.AutoLinkType = ast.AutoLinkURL 186 if s.LinkifyConfig.AllowedProtocols == nil { 187 if bytes.HasPrefix(line, protoHTTP) || bytes.HasPrefix(line, protoHTTPS) || bytes.HasPrefix(line, protoFTP) { 188 m = s.LinkifyConfig.URLRegexp.FindSubmatchIndex(line) 189 } 190 } else { 191 for _, prefix := range s.LinkifyConfig.AllowedProtocols { 192 if bytes.HasPrefix(line, prefix) { 193 m = s.LinkifyConfig.URLRegexp.FindSubmatchIndex(line) 194 break 195 } 196 } 197 } 198 if m == nil && bytes.HasPrefix(line, domainWWW) { 199 m = s.LinkifyConfig.WWWRegexp.FindSubmatchIndex(line) 200 protocol = []byte("http") 201 } 202 if m != nil && m[0] != 0 { 203 m = nil 204 } 205 if m != nil && m[0] == 0 { 206 lastChar := line[m[1]-1] 207 if lastChar == '.' { 208 m[1]-- 209 } else if lastChar == ')' { 210 closing := 0 211 for i := m[1] - 1; i >= m[0]; i-- { 212 if line[i] == ')' { 213 closing++ 214 } else if line[i] == '(' { 215 closing-- 216 } 217 } 218 if closing > 0 { 219 m[1] -= closing 220 } 221 } else if lastChar == ';' { 222 i := m[1] - 2 223 for ; i >= m[0]; i-- { 224 if util.IsAlphaNumeric(line[i]) { 225 continue 226 } 227 break 228 } 229 if i != m[1]-2 { 230 if line[i] == '&' { 231 m[1] -= m[1] - i 232 } 233 } 234 } 235 } 236 if m == nil { 237 if len(line) > 0 && util.IsPunct(line[0]) { 238 return nil 239 } 240 typ = ast.AutoLinkEmail 241 stop := -1 242 if s.LinkifyConfig.EmailRegexp == nil { 243 stop = util.FindEmailIndex(line) 244 } else { 245 m := s.LinkifyConfig.EmailRegexp.FindSubmatchIndex(line) 246 if m != nil && m[0] == 0 { 247 stop = m[1] 248 } 249 } 250 if stop < 0 { 251 return nil 252 } 253 at := bytes.IndexByte(line, '@') 254 m = []int{0, stop, at, stop - 1} 255 if m == nil || bytes.IndexByte(line[m[2]:m[3]], '.') < 0 { 256 return nil 257 } 258 lastChar := line[m[1]-1] 259 if lastChar == '.' { 260 m[1]-- 261 } 262 if m[1] < len(line) { 263 nextChar := line[m[1]] 264 if nextChar == '-' || nextChar == '_' { 265 return nil 266 } 267 } 268 } 269 if m == nil { 270 return nil 271 } 272 if consumes != 0 { 273 s := segment.WithStop(segment.Start + 1) 274 ast.MergeOrAppendTextSegment(parent, s) 275 } 276 i := m[1] - 1 277 for ; i > 0; i-- { 278 c := line[i] 279 switch c { 280 case '?', '!', '.', ',', ':', '*', '_', '~': 281 default: 282 goto endfor 283 } 284 } 285 endfor: 286 i++ 287 consumes += i 288 block.Advance(consumes) 289 n := ast.NewTextSegment(text.NewSegment(start, start+i)) 290 link := ast.NewAutoLink(typ, n) 291 link.Protocol = protocol 292 return link 293 } 294 295 func (s *linkifyParser) CloseBlock(parent ast.Node, pc parser.Context) { 296 // nothing to do 297 } 298 299 type linkify struct { 300 options []LinkifyOption 301 } 302 303 // Linkify is an extension that allow you to parse text that seems like a URL. 304 var Linkify = &linkify{} 305 306 func NewLinkify(opts ...LinkifyOption) goldmark.Extender { 307 return &linkify{ 308 options: opts, 309 } 310 } 311 312 func (e *linkify) Extend(m goldmark.Markdown) { 313 m.Parser().AddOptions( 314 parser.WithInlineParsers( 315 util.Prioritized(NewLinkifyParser(e.options...), 999), 316 ), 317 ) 318 }