goldmark_extension.go (8873B)
1 // GoToSocial 2 // Copyright (C) GoToSocial Authors admin@gotosocial.org 3 // SPDX-License-Identifier: AGPL-3.0-or-later 4 // 5 // This program is free software: you can redistribute it and/or modify 6 // it under the terms of the GNU Affero General Public License as published by 7 // the Free Software Foundation, either version 3 of the License, or 8 // (at your option) any later version. 9 // 10 // This program is distributed in the hope that it will be useful, 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 // GNU Affero General Public License for more details. 14 // 15 // You should have received a copy of the GNU Affero General Public License 16 // along with this program. If not, see <http://www.gnu.org/licenses/>. 17 18 package text 19 20 import ( 21 "context" 22 "fmt" 23 "strings" 24 25 "github.com/superseriousbusiness/gotosocial/internal/db" 26 "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" 27 "github.com/superseriousbusiness/gotosocial/internal/log" 28 "github.com/superseriousbusiness/gotosocial/internal/regexes" 29 "github.com/superseriousbusiness/gotosocial/internal/util" 30 "github.com/yuin/goldmark" 31 "github.com/yuin/goldmark/ast" 32 "github.com/yuin/goldmark/parser" 33 "github.com/yuin/goldmark/renderer" 34 "github.com/yuin/goldmark/text" 35 mdutil "github.com/yuin/goldmark/util" 36 ) 37 38 // A goldmark extension that parses potential mentions and hashtags separately from regular 39 // text, so that they stay as one contiguous text fragment in the AST, and then renders 40 // them separately too, to avoid scanning normal text for mentions and tags. 41 42 // mention and hashtag fulfil the goldmark ast.Node interface. 43 type mention struct { 44 ast.BaseInline 45 Segment text.Segment 46 } 47 48 type hashtag struct { 49 ast.BaseInline 50 Segment text.Segment 51 } 52 53 type emoji struct { 54 ast.BaseInline 55 Segment text.Segment 56 } 57 58 var ( 59 kindMention = ast.NewNodeKind("Mention") 60 kindHashtag = ast.NewNodeKind("Hashtag") 61 kindEmoji = ast.NewNodeKind("Emoji") 62 ) 63 64 func (n *mention) Kind() ast.NodeKind { 65 return kindMention 66 } 67 68 func (n *hashtag) Kind() ast.NodeKind { 69 return kindHashtag 70 } 71 72 func (n *emoji) Kind() ast.NodeKind { 73 return kindEmoji 74 } 75 76 // Dump can be used for debugging. 77 func (n *mention) Dump(source []byte, level int) { 78 fmt.Printf("%sMention: %s\n", strings.Repeat(" ", level), string(n.Segment.Value(source))) 79 } 80 81 func (n *hashtag) Dump(source []byte, level int) { 82 fmt.Printf("%sHashtag: %s\n", strings.Repeat(" ", level), string(n.Segment.Value(source))) 83 } 84 85 func (n *emoji) Dump(source []byte, level int) { 86 fmt.Printf("%sEmoji: %s\n", strings.Repeat(" ", level), string(n.Segment.Value(source))) 87 } 88 89 // newMention and newHashtag create a goldmark ast.Node from a goldmark text.Segment. 90 // The contained segment is used in rendering. 91 func newMention(s text.Segment) *mention { 92 return &mention{ 93 BaseInline: ast.BaseInline{}, 94 Segment: s, 95 } 96 } 97 98 func newHashtag(s text.Segment) *hashtag { 99 return &hashtag{ 100 BaseInline: ast.BaseInline{}, 101 Segment: s, 102 } 103 } 104 105 func newEmoji(s text.Segment) *emoji { 106 return &emoji{ 107 BaseInline: ast.BaseInline{}, 108 Segment: s, 109 } 110 } 111 112 // mentionParser and hashtagParser fulfil the goldmark parser.InlineParser interface. 113 type mentionParser struct{} 114 115 type hashtagParser struct{} 116 117 type emojiParser struct{} 118 119 func (p *mentionParser) Trigger() []byte { 120 return []byte{'@'} 121 } 122 123 func (p *hashtagParser) Trigger() []byte { 124 return []byte{'#'} 125 } 126 127 func (p *emojiParser) Trigger() []byte { 128 return []byte{':'} 129 } 130 131 func (p *mentionParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node { 132 before := block.PrecendingCharacter() 133 line, segment := block.PeekLine() 134 135 if !util.IsMentionOrHashtagBoundary(before) { 136 return nil 137 } 138 139 // unideal for performance but makes use of existing regex 140 loc := regexes.MentionFinder.FindIndex(line) 141 switch { 142 case loc == nil: 143 fallthrough 144 case loc[0] != 0: // fail if not found at start 145 return nil 146 default: 147 block.Advance(loc[1]) 148 return newMention(segment.WithStop(segment.Start + loc[1])) 149 } 150 } 151 152 func (p *hashtagParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node { 153 before := block.PrecendingCharacter() 154 line, segment := block.PeekLine() 155 s := string(line) 156 157 if !util.IsMentionOrHashtagBoundary(before) || len(s) == 1 { 158 return nil 159 } 160 161 for i, r := range s { 162 switch { 163 case r == '#' && i == 0: 164 // ignore initial # 165 continue 166 case !util.IsPlausiblyInHashtag(r) && !util.IsMentionOrHashtagBoundary(r): 167 // Fake hashtag, don't trust it 168 return nil 169 case util.IsMentionOrHashtagBoundary(r): 170 if i <= 1 { 171 // empty 172 return nil 173 } 174 // End of hashtag 175 block.Advance(i) 176 return newHashtag(segment.WithStop(segment.Start + i)) 177 } 178 } 179 // If we don't find invalid characters before the end of the line then it's all hashtag, babey 180 block.Advance(segment.Len()) 181 return newHashtag(segment) 182 } 183 184 func (p *emojiParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node { 185 line, segment := block.PeekLine() 186 187 // unideal for performance but makes use of existing regex 188 loc := regexes.EmojiFinder.FindIndex(line) 189 switch { 190 case loc == nil: 191 fallthrough 192 case loc[0] != 0: // fail if not found at start 193 return nil 194 default: 195 block.Advance(loc[1]) 196 return newEmoji(segment.WithStop(segment.Start + loc[1])) 197 } 198 } 199 200 // customRenderer fulfils both the renderer.NodeRenderer and goldmark.Extender interfaces. 201 // It is created in FromMarkdown and FromPlain to be used as a goldmark extension, and the 202 // fields are used to report tags and mentions to the caller for use as metadata. 203 type customRenderer struct { 204 f *formatter 205 ctx context.Context 206 parseMention gtsmodel.ParseMentionFunc 207 accountID string 208 statusID string 209 emojiOnly bool 210 result *FormatResult 211 } 212 213 func (r *customRenderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) { 214 reg.Register(kindMention, r.renderMention) 215 reg.Register(kindHashtag, r.renderHashtag) 216 reg.Register(kindEmoji, r.renderEmoji) 217 } 218 219 func (r *customRenderer) Extend(m goldmark.Markdown) { 220 // 1000 is set as the lowest priority, but it's arbitrary 221 m.Parser().AddOptions(parser.WithInlineParsers( 222 mdutil.Prioritized(&emojiParser{}, 1000), 223 )) 224 if !r.emojiOnly { 225 m.Parser().AddOptions(parser.WithInlineParsers( 226 mdutil.Prioritized(&mentionParser{}, 1000), 227 mdutil.Prioritized(&hashtagParser{}, 1000), 228 )) 229 } 230 m.Renderer().AddOptions(renderer.WithNodeRenderers( 231 mdutil.Prioritized(r, 1000), 232 )) 233 } 234 235 // renderMention and renderHashtag take a mention or a hashtag ast.Node and render it as HTML. 236 func (r *customRenderer) renderMention(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { 237 if !entering { 238 return ast.WalkSkipChildren, nil 239 } 240 241 n, ok := node.(*mention) // this function is only registered for kindMention 242 if !ok { 243 log.Panic(r.ctx, "type assertion failed") 244 } 245 text := string(n.Segment.Value(source)) 246 247 html := r.replaceMention(text) 248 249 // we don't have much recourse if this fails 250 if _, err := w.WriteString(html); err != nil { 251 log.Errorf(r.ctx, "error writing HTML: %s", err) 252 } 253 return ast.WalkSkipChildren, nil 254 } 255 256 func (r *customRenderer) renderHashtag(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { 257 if !entering { 258 return ast.WalkSkipChildren, nil 259 } 260 261 n, ok := node.(*hashtag) // this function is only registered for kindHashtag 262 if !ok { 263 log.Panic(r.ctx, "type assertion failed") 264 } 265 text := string(n.Segment.Value(source)) 266 267 html := r.replaceHashtag(text) 268 269 _, err := w.WriteString(html) 270 // we don't have much recourse if this fails 271 if err != nil { 272 log.Errorf(r.ctx, "error writing HTML: %s", err) 273 } 274 return ast.WalkSkipChildren, nil 275 } 276 277 // renderEmoji doesn't turn an emoji into HTML, but adds it to the metadata. 278 func (r *customRenderer) renderEmoji(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { 279 if !entering { 280 return ast.WalkSkipChildren, nil 281 } 282 283 n, ok := node.(*emoji) // this function is only registered for kindEmoji 284 if !ok { 285 log.Panic(r.ctx, "type assertion failed") 286 } 287 text := string(n.Segment.Value(source)) 288 shortcode := text[1 : len(text)-1] 289 290 emoji, err := r.f.db.GetEmojiByShortcodeDomain(r.ctx, shortcode, "") 291 if err != nil { 292 if err != db.ErrNoEntries { 293 log.Errorf(nil, "error getting local emoji with shortcode %s: %s", shortcode, err) 294 } 295 } else if *emoji.VisibleInPicker && !*emoji.Disabled { 296 listed := false 297 for _, e := range r.result.Emojis { 298 if e.Shortcode == emoji.Shortcode { 299 listed = true 300 break 301 } 302 } 303 if !listed { 304 r.result.Emojis = append(r.result.Emojis, emoji) 305 } 306 } 307 308 // we don't have much recourse if this fails 309 if _, err := w.WriteString(text); err != nil { 310 log.Errorf(r.ctx, "error writing HTML: %s", err) 311 } 312 return ast.WalkSkipChildren, nil 313 }