gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

goldmark_extension.go (8873B)


      1 // GoToSocial
      2 // Copyright (C) GoToSocial Authors admin@gotosocial.org
      3 // SPDX-License-Identifier: AGPL-3.0-or-later
      4 //
      5 // This program is free software: you can redistribute it and/or modify
      6 // it under the terms of the GNU Affero General Public License as published by
      7 // the Free Software Foundation, either version 3 of the License, or
      8 // (at your option) any later version.
      9 //
     10 // This program is distributed in the hope that it will be useful,
     11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
     12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     13 // GNU Affero General Public License for more details.
     14 //
     15 // You should have received a copy of the GNU Affero General Public License
     16 // along with this program.  If not, see <http://www.gnu.org/licenses/>.
     17 
     18 package text
     19 
     20 import (
     21 	"context"
     22 	"fmt"
     23 	"strings"
     24 
     25 	"github.com/superseriousbusiness/gotosocial/internal/db"
     26 	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
     27 	"github.com/superseriousbusiness/gotosocial/internal/log"
     28 	"github.com/superseriousbusiness/gotosocial/internal/regexes"
     29 	"github.com/superseriousbusiness/gotosocial/internal/util"
     30 	"github.com/yuin/goldmark"
     31 	"github.com/yuin/goldmark/ast"
     32 	"github.com/yuin/goldmark/parser"
     33 	"github.com/yuin/goldmark/renderer"
     34 	"github.com/yuin/goldmark/text"
     35 	mdutil "github.com/yuin/goldmark/util"
     36 )
     37 
     38 // A goldmark extension that parses potential mentions and hashtags separately from regular
     39 // text, so that they stay as one contiguous text fragment in the AST, and then renders
     40 // them separately too, to avoid scanning normal text for mentions and tags.
     41 
     42 // mention and hashtag fulfil the goldmark ast.Node interface.
     43 type mention struct {
     44 	ast.BaseInline
     45 	Segment text.Segment
     46 }
     47 
     48 type hashtag struct {
     49 	ast.BaseInline
     50 	Segment text.Segment
     51 }
     52 
     53 type emoji struct {
     54 	ast.BaseInline
     55 	Segment text.Segment
     56 }
     57 
     58 var (
     59 	kindMention = ast.NewNodeKind("Mention")
     60 	kindHashtag = ast.NewNodeKind("Hashtag")
     61 	kindEmoji   = ast.NewNodeKind("Emoji")
     62 )
     63 
     64 func (n *mention) Kind() ast.NodeKind {
     65 	return kindMention
     66 }
     67 
     68 func (n *hashtag) Kind() ast.NodeKind {
     69 	return kindHashtag
     70 }
     71 
     72 func (n *emoji) Kind() ast.NodeKind {
     73 	return kindEmoji
     74 }
     75 
     76 // Dump can be used for debugging.
     77 func (n *mention) Dump(source []byte, level int) {
     78 	fmt.Printf("%sMention: %s\n", strings.Repeat("    ", level), string(n.Segment.Value(source)))
     79 }
     80 
     81 func (n *hashtag) Dump(source []byte, level int) {
     82 	fmt.Printf("%sHashtag: %s\n", strings.Repeat("    ", level), string(n.Segment.Value(source)))
     83 }
     84 
     85 func (n *emoji) Dump(source []byte, level int) {
     86 	fmt.Printf("%sEmoji: %s\n", strings.Repeat("    ", level), string(n.Segment.Value(source)))
     87 }
     88 
     89 // newMention and newHashtag create a goldmark ast.Node from a goldmark text.Segment.
     90 // The contained segment is used in rendering.
     91 func newMention(s text.Segment) *mention {
     92 	return &mention{
     93 		BaseInline: ast.BaseInline{},
     94 		Segment:    s,
     95 	}
     96 }
     97 
     98 func newHashtag(s text.Segment) *hashtag {
     99 	return &hashtag{
    100 		BaseInline: ast.BaseInline{},
    101 		Segment:    s,
    102 	}
    103 }
    104 
    105 func newEmoji(s text.Segment) *emoji {
    106 	return &emoji{
    107 		BaseInline: ast.BaseInline{},
    108 		Segment:    s,
    109 	}
    110 }
    111 
    112 // mentionParser and hashtagParser fulfil the goldmark parser.InlineParser interface.
    113 type mentionParser struct{}
    114 
    115 type hashtagParser struct{}
    116 
    117 type emojiParser struct{}
    118 
    119 func (p *mentionParser) Trigger() []byte {
    120 	return []byte{'@'}
    121 }
    122 
    123 func (p *hashtagParser) Trigger() []byte {
    124 	return []byte{'#'}
    125 }
    126 
    127 func (p *emojiParser) Trigger() []byte {
    128 	return []byte{':'}
    129 }
    130 
    131 func (p *mentionParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node {
    132 	before := block.PrecendingCharacter()
    133 	line, segment := block.PeekLine()
    134 
    135 	if !util.IsMentionOrHashtagBoundary(before) {
    136 		return nil
    137 	}
    138 
    139 	// unideal for performance but makes use of existing regex
    140 	loc := regexes.MentionFinder.FindIndex(line)
    141 	switch {
    142 	case loc == nil:
    143 		fallthrough
    144 	case loc[0] != 0: // fail if not found at start
    145 		return nil
    146 	default:
    147 		block.Advance(loc[1])
    148 		return newMention(segment.WithStop(segment.Start + loc[1]))
    149 	}
    150 }
    151 
    152 func (p *hashtagParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node {
    153 	before := block.PrecendingCharacter()
    154 	line, segment := block.PeekLine()
    155 	s := string(line)
    156 
    157 	if !util.IsMentionOrHashtagBoundary(before) || len(s) == 1 {
    158 		return nil
    159 	}
    160 
    161 	for i, r := range s {
    162 		switch {
    163 		case r == '#' && i == 0:
    164 			// ignore initial #
    165 			continue
    166 		case !util.IsPlausiblyInHashtag(r) && !util.IsMentionOrHashtagBoundary(r):
    167 			// Fake hashtag, don't trust it
    168 			return nil
    169 		case util.IsMentionOrHashtagBoundary(r):
    170 			if i <= 1 {
    171 				// empty
    172 				return nil
    173 			}
    174 			// End of hashtag
    175 			block.Advance(i)
    176 			return newHashtag(segment.WithStop(segment.Start + i))
    177 		}
    178 	}
    179 	// If we don't find invalid characters before the end of the line then it's all hashtag, babey
    180 	block.Advance(segment.Len())
    181 	return newHashtag(segment)
    182 }
    183 
    184 func (p *emojiParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node {
    185 	line, segment := block.PeekLine()
    186 
    187 	// unideal for performance but makes use of existing regex
    188 	loc := regexes.EmojiFinder.FindIndex(line)
    189 	switch {
    190 	case loc == nil:
    191 		fallthrough
    192 	case loc[0] != 0: // fail if not found at start
    193 		return nil
    194 	default:
    195 		block.Advance(loc[1])
    196 		return newEmoji(segment.WithStop(segment.Start + loc[1]))
    197 	}
    198 }
    199 
    200 // customRenderer fulfils both the renderer.NodeRenderer and goldmark.Extender interfaces.
    201 // It is created in FromMarkdown and FromPlain to be used as a goldmark extension, and the
    202 // fields are used to report tags and mentions to the caller for use as metadata.
    203 type customRenderer struct {
    204 	f            *formatter
    205 	ctx          context.Context
    206 	parseMention gtsmodel.ParseMentionFunc
    207 	accountID    string
    208 	statusID     string
    209 	emojiOnly    bool
    210 	result       *FormatResult
    211 }
    212 
    213 func (r *customRenderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) {
    214 	reg.Register(kindMention, r.renderMention)
    215 	reg.Register(kindHashtag, r.renderHashtag)
    216 	reg.Register(kindEmoji, r.renderEmoji)
    217 }
    218 
    219 func (r *customRenderer) Extend(m goldmark.Markdown) {
    220 	// 1000 is set as the lowest priority, but it's arbitrary
    221 	m.Parser().AddOptions(parser.WithInlineParsers(
    222 		mdutil.Prioritized(&emojiParser{}, 1000),
    223 	))
    224 	if !r.emojiOnly {
    225 		m.Parser().AddOptions(parser.WithInlineParsers(
    226 			mdutil.Prioritized(&mentionParser{}, 1000),
    227 			mdutil.Prioritized(&hashtagParser{}, 1000),
    228 		))
    229 	}
    230 	m.Renderer().AddOptions(renderer.WithNodeRenderers(
    231 		mdutil.Prioritized(r, 1000),
    232 	))
    233 }
    234 
    235 // renderMention and renderHashtag take a mention or a hashtag ast.Node and render it as HTML.
    236 func (r *customRenderer) renderMention(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
    237 	if !entering {
    238 		return ast.WalkSkipChildren, nil
    239 	}
    240 
    241 	n, ok := node.(*mention) // this function is only registered for kindMention
    242 	if !ok {
    243 		log.Panic(r.ctx, "type assertion failed")
    244 	}
    245 	text := string(n.Segment.Value(source))
    246 
    247 	html := r.replaceMention(text)
    248 
    249 	// we don't have much recourse if this fails
    250 	if _, err := w.WriteString(html); err != nil {
    251 		log.Errorf(r.ctx, "error writing HTML: %s", err)
    252 	}
    253 	return ast.WalkSkipChildren, nil
    254 }
    255 
    256 func (r *customRenderer) renderHashtag(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
    257 	if !entering {
    258 		return ast.WalkSkipChildren, nil
    259 	}
    260 
    261 	n, ok := node.(*hashtag) // this function is only registered for kindHashtag
    262 	if !ok {
    263 		log.Panic(r.ctx, "type assertion failed")
    264 	}
    265 	text := string(n.Segment.Value(source))
    266 
    267 	html := r.replaceHashtag(text)
    268 
    269 	_, err := w.WriteString(html)
    270 	// we don't have much recourse if this fails
    271 	if err != nil {
    272 		log.Errorf(r.ctx, "error writing HTML: %s", err)
    273 	}
    274 	return ast.WalkSkipChildren, nil
    275 }
    276 
    277 // renderEmoji doesn't turn an emoji into HTML, but adds it to the metadata.
    278 func (r *customRenderer) renderEmoji(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
    279 	if !entering {
    280 		return ast.WalkSkipChildren, nil
    281 	}
    282 
    283 	n, ok := node.(*emoji) // this function is only registered for kindEmoji
    284 	if !ok {
    285 		log.Panic(r.ctx, "type assertion failed")
    286 	}
    287 	text := string(n.Segment.Value(source))
    288 	shortcode := text[1 : len(text)-1]
    289 
    290 	emoji, err := r.f.db.GetEmojiByShortcodeDomain(r.ctx, shortcode, "")
    291 	if err != nil {
    292 		if err != db.ErrNoEntries {
    293 			log.Errorf(nil, "error getting local emoji with shortcode %s: %s", shortcode, err)
    294 		}
    295 	} else if *emoji.VisibleInPicker && !*emoji.Disabled {
    296 		listed := false
    297 		for _, e := range r.result.Emojis {
    298 			if e.Shortcode == emoji.Shortcode {
    299 				listed = true
    300 				break
    301 			}
    302 		}
    303 		if !listed {
    304 			r.result.Emojis = append(r.result.Emojis, emoji)
    305 		}
    306 	}
    307 
    308 	// we don't have much recourse if this fails
    309 	if _, err := w.WriteString(text); err != nil {
    310 		log.Errorf(r.ctx, "error writing HTML: %s", err)
    311 	}
    312 	return ast.WalkSkipChildren, nil
    313 }