gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

commit 49beb17a8fbdbf3517c103a477a5459a3bba404d
parent 271da016b91d8d575e13be03b440f970cd333ebe
Author: Autumn! <86073772+autumnull@users.noreply.github.com>
Date:   Fri,  3 Feb 2023 10:58:58 +0000

[chore] Text formatting overhaul (#1406)

* Implement goldmark debug print for hashtags and mentions

* Minify HTML in FromPlain

* Convert plaintext status parser to goldmark

* Move mention/tag/emoji finding logic into formatter

* Combine mention and hashtag boundary characters

* Normalize unicode when rendering hashtags
Diffstat:
Minternal/api/client/statuses/statuscreate_test.go | 6+++---
Minternal/db/bundb/bundb.go | 63++++++++++++++++++++++++++++++---------------------------------
Minternal/db/db.go | 8++++----
Minternal/processing/account/update.go | 81++++++++++++++++++-------------------------------------------------------------
Minternal/processing/account/update_test.go | 4++--
Minternal/processing/status/create.go | 12------------
Minternal/processing/status/status.go | 3---
Minternal/processing/status/util.go | 112+++++++++++++++++++++++--------------------------------------------------------
Minternal/processing/status/util_test.go | 207++++++++-----------------------------------------------------------------------
Dinternal/text/common.go | 112-------------------------------------------------------------------------------
Dinternal/text/common_test.go | 106-------------------------------------------------------------------------------
Ainternal/text/emojionly.go | 71+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Minternal/text/formatter.go | 24+++++++++++++++---------
Minternal/text/formatter_test.go | 22+++++++++++++++++++++-
Ainternal/text/goldmark_extension.go | 312+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainternal/text/goldmark_plaintext.go | 64++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Dinternal/text/link.go | 86-------------------------------------------------------------------------------
Dinternal/text/link_test.go | 157-------------------------------------------------------------------------------
Minternal/text/markdown.go | 54+++++++++++++-----------------------------------------
Minternal/text/markdown_test.go | 117++++++++++++++++++++++++++++++++++++++++++++++---------------------------------
Dinternal/text/markdownextension.go | 215-------------------------------------------------------------------------------
Ainternal/text/minify.go | 45+++++++++++++++++++++++++++++++++++++++++++++
Minternal/text/plain.go | 68++++++++++++++++++++++++++++++++++++++++++--------------------------
Minternal/text/plain_test.go | 125+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------
Ainternal/text/replace.go | 141+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Minternal/util/statustools.go | 110+++++--------------------------------------------------------------------------
Dinternal/util/statustools_test.go | 173-------------------------------------------------------------------------------
27 files changed, 1005 insertions(+), 1493 deletions(-)

diff --git a/internal/api/client/statuses/statuscreate_test.go b/internal/api/client/statuses/statuscreate_test.go @@ -219,7 +219,7 @@ func (suite *StatusCreateTestSuite) TestPostAnotherNewStatus() { err = json.Unmarshal(b, statusReply) suite.NoError(err) - suite.Equal("<p><a href=\"http://localhost:8080/tags/test\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>test</span></a> alright, should be able to post <a href=\"http://localhost:8080/tags/links\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>links</span></a> with fragments in them now, let&#39;s see........<br/><br/><a href=\"https://docs.gotosocial.org/en/latest/user_guide/posts/#links\" rel=\"noopener nofollow noreferrer\" target=\"_blank\">docs.gotosocial.org/en/latest/user_guide/posts/#links</a><br/><br/><a href=\"http://localhost:8080/tags/gotosocial\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>gotosocial</span></a><br/><br/>(tobi remember to pull the docker image challenge)</p>", statusReply.Content) + suite.Equal("<p><a href=\"http://localhost:8080/tags/test\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>test</span></a> alright, should be able to post <a href=\"http://localhost:8080/tags/links\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>links</span></a> with fragments in them now, let's see........<br><br><a href=\"https://docs.gotosocial.org/en/latest/user_guide/posts/#links\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">https://docs.gotosocial.org/en/latest/user_guide/posts/#links</a><br><br><a href=\"http://localhost:8080/tags/gotosocial\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>gotosocial</span></a><br><br>(tobi remember to pull the docker image challenge)</p>", statusReply.Content) } func (suite *StatusCreateTestSuite) TestPostNewStatusWithEmoji() { @@ -252,7 +252,7 @@ func (suite *StatusCreateTestSuite) TestPostNewStatusWithEmoji() { suite.NoError(err) suite.Equal("", statusReply.SpoilerText) - suite.Equal("<p>here is a rainbow emoji a few times! :rainbow: :rainbow: :rainbow: <br/> here&#39;s an emoji that isn&#39;t in the db: :test_emoji:</p>", statusReply.Content) + suite.Equal("<p>here is a rainbow emoji a few times! :rainbow: :rainbow: :rainbow:<br>here's an emoji that isn't in the db: :test_emoji:</p>", statusReply.Content) suite.Len(statusReply.Emojis, 1) apiEmoji := statusReply.Emojis[0] @@ -371,7 +371,7 @@ func (suite *StatusCreateTestSuite) TestAttachNewMediaSuccess() { suite.NoError(err) suite.Equal("", statusResponse.SpoilerText) - suite.Equal("<p>here&#39;s an image attachment</p>", statusResponse.Content) + suite.Equal("<p>here's an image attachment</p>", statusResponse.Content) suite.False(statusResponse.Sensitive) suite.Equal(apimodel.VisibilityPublic, statusResponse.Visibility) diff --git a/internal/db/bundb/bundb.go b/internal/db/bundb/bundb.go @@ -473,43 +473,40 @@ func sqlitePragmas(ctx context.Context, conn *DBConn) error { CONVERSION FUNCTIONS */ -func (dbService *DBService) TagStringsToTags(ctx context.Context, tags []string, originAccountID string) ([]*gtsmodel.Tag, error) { +func (dbService *DBService) TagStringToTag(ctx context.Context, t string, originAccountID string) (*gtsmodel.Tag, error) { protocol := config.GetProtocol() host := config.GetHost() + now := time.Now() - newTags := []*gtsmodel.Tag{} - for _, t := range tags { - tag := &gtsmodel.Tag{} - // we can use selectorinsert here to create the new tag if it doesn't exist already - // inserted will be true if this is a new tag we just created - if err := dbService.conn.NewSelect().Model(tag).Where("LOWER(?) = LOWER(?)", bun.Ident("name"), t).Scan(ctx); err != nil { - if err == sql.ErrNoRows { - // tag doesn't exist yet so populate it - newID, err := id.NewRandomULID() - if err != nil { - return nil, err - } - tag.ID = newID - tag.URL = fmt.Sprintf("%s://%s/tags/%s", protocol, host, t) - tag.Name = t - tag.FirstSeenFromAccountID = originAccountID - tag.CreatedAt = time.Now() - tag.UpdatedAt = time.Now() - useable := true - tag.Useable = &useable - listable := true - tag.Listable = &listable - } else { - return nil, fmt.Errorf("error getting tag with name %s: %s", t, err) - } - } + tag := &gtsmodel.Tag{} + // we can use selectorinsert here to create the new tag if it doesn't exist already + // inserted will be true if this is a new tag we just created + if err := dbService.conn.NewSelect().Model(tag).Where("LOWER(?) = LOWER(?)", bun.Ident("name"), t).Scan(ctx); err != nil && err != sql.ErrNoRows { + return nil, fmt.Errorf("error getting tag with name %s: %s", t, err) + } - // bail already if the tag isn't useable - if !*tag.Useable { - continue + if tag.ID == "" { + // tag doesn't exist yet so populate it + newID, err := id.NewRandomULID() + if err != nil { + return nil, err } - tag.LastStatusAt = time.Now() - newTags = append(newTags, tag) + tag.ID = newID + tag.URL = protocol + "://" + host + "/tags/" + t + tag.Name = t + tag.FirstSeenFromAccountID = originAccountID + tag.CreatedAt = now + tag.UpdatedAt = now + useable := true + tag.Useable = &useable + listable := true + tag.Listable = &listable + } + + // bail already if the tag isn't useable + if !*tag.Useable { + return nil, fmt.Errorf("tag %s is not useable", t) } - return newTags, nil + tag.LastStatusAt = now + return tag, nil } diff --git a/internal/db/db.go b/internal/db/db.go @@ -52,12 +52,12 @@ type DB interface { USEFUL CONVERSION FUNCTIONS */ - // TagStringsToTags takes a slice of deduplicated, lowercase tags in the form "somehashtag", which have been + // TagStringToTag takes a lowercase tag in the form "somehashtag", which has been // used in a status. It takes the id of the account that wrote the status, and the id of the status itself, and then - // returns a slice of *apimodel.Tag corresponding to the given tags. If the tag already exists in database, that tag + // returns an *apimodel.Tag corresponding to the given tags. If the tag already exists in database, that tag // will be returned. Otherwise a pointer to a new tag struct will be created and returned. // - // Note: this func doesn't/shouldn't do any manipulation of the tags in the DB, it's just for checking + // Note: this func doesn't/shouldn't do any manipulation of tags in the DB, it's just for checking // if they exist in the db already, and conveniently returning them, or creating new tag structs. - TagStringsToTags(ctx context.Context, tags []string, originAccountID string) ([]*gtsmodel.Tag, error) + TagStringToTag(ctx context.Context, tag string, originAccountID string) (*gtsmodel.Tag, error) } diff --git a/internal/processing/account/update.go b/internal/processing/account/update.go @@ -27,14 +27,12 @@ import ( "github.com/superseriousbusiness/gotosocial/internal/ap" apimodel "github.com/superseriousbusiness/gotosocial/internal/api/model" "github.com/superseriousbusiness/gotosocial/internal/config" - "github.com/superseriousbusiness/gotosocial/internal/db" "github.com/superseriousbusiness/gotosocial/internal/gtserror" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" "github.com/superseriousbusiness/gotosocial/internal/log" "github.com/superseriousbusiness/gotosocial/internal/media" "github.com/superseriousbusiness/gotosocial/internal/messages" "github.com/superseriousbusiness/gotosocial/internal/text" - "github.com/superseriousbusiness/gotosocial/internal/util" "github.com/superseriousbusiness/gotosocial/internal/validate" ) @@ -47,14 +45,20 @@ func (p *processor) Update(ctx context.Context, account *gtsmodel.Account, form account.Bot = form.Bot } - var updateEmojis bool + account.Emojis = []*gtsmodel.Emoji{} + account.EmojiIDs = []string{} if form.DisplayName != nil { if err := validate.DisplayName(*form.DisplayName); err != nil { return nil, gtserror.NewErrorBadRequest(err) } account.DisplayName = text.SanitizePlaintext(*form.DisplayName) - updateEmojis = true + + formatResult := p.formatter.FromPlainEmojiOnly(ctx, p.parseMention, account.ID, "", account.DisplayName) + for _, emoji := range formatResult.Emojis { + account.Emojis = append(account.Emojis, emoji) + account.EmojiIDs = append(account.EmojiIDs, emoji.ID) + } } if form.Note != nil { @@ -66,36 +70,19 @@ func (p *processor) Update(ctx context.Context, account *gtsmodel.Account, form account.NoteRaw = *form.Note // Process note to generate a valid HTML representation - note, err := p.processNote(ctx, *form.Note, account) - if err != nil { - return nil, gtserror.NewErrorBadRequest(err) + var f text.FormatFunc + if account.StatusFormat == "markdown" { + f = p.formatter.FromMarkdown + } else { + f = p.formatter.FromPlain } + formatted := f(ctx, p.parseMention, account.ID, "", *form.Note) // Set updated HTML-ified note - account.Note = note - updateEmojis = true - } - - if updateEmojis { - // account emojis -- treat the sanitized display name and raw - // note like one long text for the purposes of deriving emojis - accountEmojiShortcodes := util.DeriveEmojisFromText(account.DisplayName + "\n\n" + account.NoteRaw) - account.Emojis = make([]*gtsmodel.Emoji, 0, len(accountEmojiShortcodes)) - account.EmojiIDs = make([]string, 0, len(accountEmojiShortcodes)) - - for _, shortcode := range accountEmojiShortcodes { - emoji, err := p.db.GetEmojiByShortcodeDomain(ctx, shortcode, "") - if err != nil { - if err != db.ErrNoEntries { - log.Errorf("error getting local emoji with shortcode %s: %s", shortcode, err) - } - continue - } - - if *emoji.VisibleInPicker && !*emoji.Disabled { - account.Emojis = append(account.Emojis, emoji) - account.EmojiIDs = append(account.EmojiIDs, emoji.ID) - } + account.Note = formatted.HTML + for _, emoji := range formatted.Emojis { + account.Emojis = append(account.Emojis, emoji) + account.EmojiIDs = append(account.EmojiIDs, emoji.ID) } } @@ -240,35 +227,3 @@ func (p *processor) UpdateHeader(ctx context.Context, header *multipart.FileHead return processingMedia.LoadAttachment(ctx) } - -func (p *processor) processNote(ctx context.Context, note string, account *gtsmodel.Account) (string, error) { - if note == "" { - return "", nil - } - - tagStrings := util.DeriveHashtagsFromText(note) - tags, err := p.db.TagStringsToTags(ctx, tagStrings, account.ID) - if err != nil { - return "", err - } - - mentionStrings := util.DeriveMentionNamesFromText(note) - mentions := []*gtsmodel.Mention{} - for _, mentionString := range mentionStrings { - mention, err := p.parseMention(ctx, mentionString, account.ID, "") - if err != nil { - continue - } - mentions = append(mentions, mention) - } - - // TODO: support emojis in account notes - // emojiStrings := util.DeriveEmojisFromText(note) - // emojis, err := p.db.EmojiStringsToEmojis(ctx, emojiStrings) - - if account.StatusFormat == "markdown" { - return p.formatter.FromMarkdown(ctx, note, mentions, tags, nil), nil - } - - return p.formatter.FromPlain(ctx, note, mentions, tags), nil -} diff --git a/internal/processing/account/update_test.go b/internal/processing/account/update_test.go @@ -76,8 +76,8 @@ func (suite *AccountUpdateTestSuite) TestAccountUpdateWithMention() { var ( locked = true displayName = "new display name" - note = "#hello here i am!\n\ngo check out @1happyturtle, they have a cool account!\n" - noteExpected = "<p><a href=\"http://localhost:8080/tags/hello\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hello</span></a> here i am!<br/><br/>go check out <span class=\"h-card\"><a href=\"http://localhost:8080/@1happyturtle\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>1happyturtle</span></a></span>, they have a cool account!</p>" + note = "#hello here i am!\n\ngo check out @1happyturtle, they have a cool account!" + noteExpected = "<p><a href=\"http://localhost:8080/tags/hello\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hello</span></a> here i am!<br><br>go check out <span class=\"h-card\"><a href=\"http://localhost:8080/@1happyturtle\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>1happyturtle</span></a></span>, they have a cool account!</p>" ) form := &apimodel.UpdateCredentialsRequest{ diff --git a/internal/processing/status/create.go b/internal/processing/status/create.go @@ -76,18 +76,6 @@ func (p *processor) Create(ctx context.Context, account *gtsmodel.Account, appli return nil, gtserror.NewErrorInternalError(err) } - if err := p.ProcessMentions(ctx, form, account.ID, newStatus); err != nil { - return nil, gtserror.NewErrorInternalError(err) - } - - if err := p.ProcessTags(ctx, form, account.ID, newStatus); err != nil { - return nil, gtserror.NewErrorInternalError(err) - } - - if err := p.ProcessEmojis(ctx, form, account.ID, newStatus); err != nil { - return nil, gtserror.NewErrorInternalError(err) - } - if err := p.ProcessContent(ctx, form, account.ID, newStatus); err != nil { return nil, gtserror.NewErrorInternalError(err) } diff --git a/internal/processing/status/status.go b/internal/processing/status/status.go @@ -67,9 +67,6 @@ type Processor interface { ProcessReplyToID(ctx context.Context, form *apimodel.AdvancedStatusCreateForm, thisAccountID string, status *gtsmodel.Status) gtserror.WithCode ProcessMediaIDs(ctx context.Context, form *apimodel.AdvancedStatusCreateForm, thisAccountID string, status *gtsmodel.Status) gtserror.WithCode ProcessLanguage(ctx context.Context, form *apimodel.AdvancedStatusCreateForm, accountDefaultLanguage string, status *gtsmodel.Status) error - ProcessMentions(ctx context.Context, form *apimodel.AdvancedStatusCreateForm, accountID string, status *gtsmodel.Status) error - ProcessTags(ctx context.Context, form *apimodel.AdvancedStatusCreateForm, accountID string, status *gtsmodel.Status) error - ProcessEmojis(ctx context.Context, form *apimodel.AdvancedStatusCreateForm, accountID string, status *gtsmodel.Status) error ProcessContent(ctx context.Context, form *apimodel.AdvancedStatusCreateForm, accountID string, status *gtsmodel.Status) error } diff --git a/internal/processing/status/util.go b/internal/processing/status/util.go @@ -28,8 +28,7 @@ import ( "github.com/superseriousbusiness/gotosocial/internal/db" "github.com/superseriousbusiness/gotosocial/internal/gtserror" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" - "github.com/superseriousbusiness/gotosocial/internal/log" - "github.com/superseriousbusiness/gotosocial/internal/util" + "github.com/superseriousbusiness/gotosocial/internal/text" ) func (p *processor) ProcessVisibility(ctx context.Context, form *apimodel.AdvancedStatusCreateForm, accountDefaultVis gtsmodel.Visibility, status *gtsmodel.Status) error { @@ -212,80 +211,6 @@ func (p *processor) ProcessLanguage(ctx context.Context, form *apimodel.Advanced return nil } -func (p *processor) ProcessMentions(ctx context.Context, form *apimodel.AdvancedStatusCreateForm, accountID string, status *gtsmodel.Status) error { - mentionedAccountNames := util.DeriveMentionNamesFromText(form.Status) - mentions := []*gtsmodel.Mention{} - mentionIDs := []string{} - - for _, mentionedAccountName := range mentionedAccountNames { - gtsMention, err := p.parseMention(ctx, mentionedAccountName, accountID, status.ID) - if err != nil { - log.Errorf("ProcessMentions: error parsing mention %s from status: %s", mentionedAccountName, err) - continue - } - - if err := p.db.Put(ctx, gtsMention); err != nil { - log.Errorf("ProcessMentions: error putting mention in db: %s", err) - } - - mentions = append(mentions, gtsMention) - mentionIDs = append(mentionIDs, gtsMention.ID) - } - - // add full populated gts menchies to the status for passing them around conveniently - status.Mentions = mentions - // add just the ids of the mentioned accounts to the status for putting in the db - status.MentionIDs = mentionIDs - - return nil -} - -func (p *processor) ProcessTags(ctx context.Context, form *apimodel.AdvancedStatusCreateForm, accountID string, status *gtsmodel.Status) error { - tags := []string{} - gtsTags, err := p.db.TagStringsToTags(ctx, util.DeriveHashtagsFromText(form.Status), accountID) - if err != nil { - return fmt.Errorf("error generating hashtags from status: %s", err) - } - for _, tag := range gtsTags { - if err := p.db.Put(ctx, tag); err != nil { - if !errors.Is(err, db.ErrAlreadyExists) { - return fmt.Errorf("error putting tags in db: %s", err) - } - } - tags = append(tags, tag.ID) - } - // add full populated gts tags to the status for passing them around conveniently - status.Tags = gtsTags - // add just the ids of the used tags to the status for putting in the db - status.TagIDs = tags - return nil -} - -func (p *processor) ProcessEmojis(ctx context.Context, form *apimodel.AdvancedStatusCreateForm, accountID string, status *gtsmodel.Status) error { - // for each emoji shortcode in the text, check if it's an enabled - // emoji on this instance, and if so, add it to the status - emojiShortcodes := util.DeriveEmojisFromText(form.SpoilerText + "\n\n" + form.Status) - status.Emojis = make([]*gtsmodel.Emoji, 0, len(emojiShortcodes)) - status.EmojiIDs = make([]string, 0, len(emojiShortcodes)) - - for _, shortcode := range emojiShortcodes { - emoji, err := p.db.GetEmojiByShortcodeDomain(ctx, shortcode, "") - if err != nil { - if err != db.ErrNoEntries { - log.Errorf("error getting local emoji with shortcode %s: %s", shortcode, err) - } - continue - } - - if *emoji.VisibleInPicker && !*emoji.Disabled { - status.Emojis = append(status.Emojis, emoji) - status.EmojiIDs = append(status.EmojiIDs, emoji.ID) - } - } - - return nil -} - func (p *processor) ProcessContent(ctx context.Context, form *apimodel.AdvancedStatusCreateForm, accountID string, status *gtsmodel.Status) error { // if there's nothing in the status at all we can just return early if form.Status == "" { @@ -311,16 +236,43 @@ func (p *processor) ProcessContent(ctx context.Context, form *apimodel.AdvancedS } // parse content out of the status depending on what format has been submitted - var formatted string + var f text.FormatFunc switch form.Format { case apimodel.StatusFormatPlain: - formatted = p.formatter.FromPlain(ctx, form.Status, status.Mentions, status.Tags) + f = p.formatter.FromPlain case apimodel.StatusFormatMarkdown: - formatted = p.formatter.FromMarkdown(ctx, form.Status, status.Mentions, status.Tags, status.Emojis) + f = p.formatter.FromMarkdown default: return fmt.Errorf("format %s not recognised as a valid status format", form.Format) } + formatted := f(ctx, p.parseMention, accountID, status.ID, form.Status) + + // add full populated gts {mentions, tags, emojis} to the status for passing them around conveniently + // add just their ids to the status for putting in the db + status.Mentions = formatted.Mentions + status.MentionIDs = make([]string, 0, len(formatted.Mentions)) + for _, gtsmention := range formatted.Mentions { + status.MentionIDs = append(status.MentionIDs, gtsmention.ID) + } + + status.Tags = formatted.Tags + status.TagIDs = make([]string, 0, len(formatted.Tags)) + for _, gtstag := range formatted.Tags { + status.TagIDs = append(status.TagIDs, gtstag.ID) + } + + status.Emojis = formatted.Emojis + status.EmojiIDs = make([]string, 0, len(formatted.Emojis)) + for _, gtsemoji := range formatted.Emojis { + status.EmojiIDs = append(status.EmojiIDs, gtsemoji.ID) + } + + spoilerformatted := p.formatter.FromPlainEmojiOnly(ctx, p.parseMention, accountID, status.ID, form.SpoilerText) + for _, gtsemoji := range spoilerformatted.Emojis { + status.Emojis = append(status.Emojis, gtsemoji) + status.EmojiIDs = append(status.EmojiIDs, gtsemoji.ID) + } - status.Content = formatted + status.Content = formatted.HTML return nil } diff --git a/internal/processing/status/util_test.go b/internal/processing/status/util_test.go @@ -29,22 +29,23 @@ import ( ) const ( - statusText1 = "Another test @foss_satan@fossbros-anonymous.io\n\n#Hashtag\n\nText" - statusText1ExpectedFull = "<p>Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>foss_satan</span></a></span><br/><br/><a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>Hashtag</span></a><br/><br/>Text</p>" - statusText1ExpectedPartial = "<p>Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>foss_satan</span></a></span><br/><br/>#Hashtag<br/><br/>Text</p>" - statusText2 = "Another test @foss_satan@fossbros-anonymous.io\n\n#Hashtag\n\n#hashTAG" - status2TextExpectedFull = "<p>Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>foss_satan</span></a></span><br/><br/><a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>Hashtag</span></a><br/><br/><a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashTAG</span></a></p>" - status2TextExpectedPartial = "<p>Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>foss_satan</span></a></span><br/><br/>#Hashtag<br/><br/>#hashTAG</p>" + statusText1 = "Another test @foss_satan@fossbros-anonymous.io\n\n#Hashtag\n\nText" + statusText1Expected = "<p>Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>foss_satan</span></a></span><br><br><a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>Hashtag</span></a><br><br>Text</p>" + statusText2 = "Another test @foss_satan@fossbros-anonymous.io\n\n#Hashtag\n\n#hashTAG" + status2TextExpected = "<p>Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>foss_satan</span></a></span><br><br><a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>Hashtag</span></a><br><br><a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashTAG</span></a></p>" ) type UtilTestSuite struct { StatusStandardTestSuite } -func (suite *UtilTestSuite) TestProcessMentions1() { +func (suite *UtilTestSuite) TestProcessContent1() { + /* + TEST PREPARATION + */ + // we need to partially process the status first since processContent expects a status with some stuff already set on it creatingAccount := suite.testAccounts["local_account_1"] mentionedAccount := suite.testAccounts["remote_account_1"] - form := &apimodel.AdvancedStatusCreateForm{ StatusCreateRequest: apimodel.StatusCreateRequest{ Status: statusText1, @@ -70,8 +71,13 @@ func (suite *UtilTestSuite) TestProcessMentions1() { ID: "01FCTDD78JJMX3K9KPXQ7ZQ8BJ", } - err := suite.status.ProcessMentions(context.Background(), form, creatingAccount.ID, status) + /* + ACTUAL TEST + */ + + err := suite.status.ProcessContent(context.Background(), form, creatingAccount.ID, status) suite.NoError(err) + suite.Equal(statusText1Expected, status.Content) suite.Len(status.Mentions, 1) newMention := status.Mentions[0] @@ -88,63 +94,16 @@ func (suite *UtilTestSuite) TestProcessMentions1() { suite.Equal(newMention.ID, status.MentionIDs[0]) } -func (suite *UtilTestSuite) TestProcessContentFull1() { - /* - TEST PREPARATION - */ - // we need to partially process the status first since processContent expects a status with some stuff already set on it - creatingAccount := suite.testAccounts["local_account_1"] - form := &apimodel.AdvancedStatusCreateForm{ - StatusCreateRequest: apimodel.StatusCreateRequest{ - Status: statusText1, - MediaIDs: []string{}, - Poll: nil, - InReplyToID: "", - Sensitive: false, - SpoilerText: "", - Visibility: apimodel.VisibilityPublic, - ScheduledAt: "", - Language: "en", - Format: apimodel.StatusFormatPlain, - }, - AdvancedVisibilityFlagsForm: apimodel.AdvancedVisibilityFlagsForm{ - Federated: nil, - Boostable: nil, - Replyable: nil, - Likeable: nil, - }, - } - - status := &gtsmodel.Status{ - ID: "01FCTDD78JJMX3K9KPXQ7ZQ8BJ", - } - - err := suite.status.ProcessMentions(context.Background(), form, creatingAccount.ID, status) - suite.NoError(err) - suite.Empty(status.Content) // shouldn't be set yet - - err = suite.status.ProcessTags(context.Background(), form, creatingAccount.ID, status) - suite.NoError(err) - suite.Empty(status.Content) // shouldn't be set yet - - /* - ACTUAL TEST - */ - - err = suite.status.ProcessContent(context.Background(), form, creatingAccount.ID, status) - suite.NoError(err) - suite.Equal(statusText1ExpectedFull, status.Content) -} - -func (suite *UtilTestSuite) TestProcessContentPartial1() { +func (suite *UtilTestSuite) TestProcessContent2() { /* TEST PREPARATION */ // we need to partially process the status first since processContent expects a status with some stuff already set on it creatingAccount := suite.testAccounts["local_account_1"] + mentionedAccount := suite.testAccounts["remote_account_1"] form := &apimodel.AdvancedStatusCreateForm{ StatusCreateRequest: apimodel.StatusCreateRequest{ - Status: statusText1, + Status: statusText2, MediaIDs: []string{}, Poll: nil, InReplyToID: "", @@ -167,50 +126,14 @@ func (suite *UtilTestSuite) TestProcessContentPartial1() { ID: "01FCTDD78JJMX3K9KPXQ7ZQ8BJ", } - err := suite.status.ProcessMentions(context.Background(), form, creatingAccount.ID, status) - suite.NoError(err) - suite.Empty(status.Content) // shouldn't be set yet - /* ACTUAL TEST */ - err = suite.status.ProcessContent(context.Background(), form, creatingAccount.ID, status) + err := suite.status.ProcessContent(context.Background(), form, creatingAccount.ID, status) suite.NoError(err) - suite.Equal(statusText1ExpectedPartial, status.Content) -} -func (suite *UtilTestSuite) TestProcessMentions2() { - creatingAccount := suite.testAccounts["local_account_1"] - mentionedAccount := suite.testAccounts["remote_account_1"] - - form := &apimodel.AdvancedStatusCreateForm{ - StatusCreateRequest: apimodel.StatusCreateRequest{ - Status: statusText2, - MediaIDs: []string{}, - Poll: nil, - InReplyToID: "", - Sensitive: false, - SpoilerText: "", - Visibility: apimodel.VisibilityPublic, - ScheduledAt: "", - Language: "en", - Format: apimodel.StatusFormatPlain, - }, - AdvancedVisibilityFlagsForm: apimodel.AdvancedVisibilityFlagsForm{ - Federated: nil, - Boostable: nil, - Replyable: nil, - Likeable: nil, - }, - } - - status := &gtsmodel.Status{ - ID: "01FCTDD78JJMX3K9KPXQ7ZQ8BJ", - } - - err := suite.status.ProcessMentions(context.Background(), form, creatingAccount.ID, status) - suite.NoError(err) + suite.Equal(status2TextExpected, status.Content) suite.Len(status.Mentions, 1) newMention := status.Mentions[0] @@ -227,96 +150,6 @@ func (suite *UtilTestSuite) TestProcessMentions2() { suite.Equal(newMention.ID, status.MentionIDs[0]) } -func (suite *UtilTestSuite) TestProcessContentFull2() { - /* - TEST PREPARATION - */ - // we need to partially process the status first since processContent expects a status with some stuff already set on it - creatingAccount := suite.testAccounts["local_account_1"] - form := &apimodel.AdvancedStatusCreateForm{ - StatusCreateRequest: apimodel.StatusCreateRequest{ - Status: statusText2, - MediaIDs: []string{}, - Poll: nil, - InReplyToID: "", - Sensitive: false, - SpoilerText: "", - Visibility: apimodel.VisibilityPublic, - ScheduledAt: "", - Language: "en", - Format: apimodel.StatusFormatPlain, - }, - AdvancedVisibilityFlagsForm: apimodel.AdvancedVisibilityFlagsForm{ - Federated: nil, - Boostable: nil, - Replyable: nil, - Likeable: nil, - }, - } - - status := &gtsmodel.Status{ - ID: "01FCTDD78JJMX3K9KPXQ7ZQ8BJ", - } - - err := suite.status.ProcessMentions(context.Background(), form, creatingAccount.ID, status) - suite.NoError(err) - suite.Empty(status.Content) // shouldn't be set yet - - err = suite.status.ProcessTags(context.Background(), form, creatingAccount.ID, status) - suite.NoError(err) - suite.Empty(status.Content) // shouldn't be set yet - - /* - ACTUAL TEST - */ - - err = suite.status.ProcessContent(context.Background(), form, creatingAccount.ID, status) - suite.NoError(err) - - suite.Equal(status2TextExpectedFull, status.Content) -} - -func (suite *UtilTestSuite) TestProcessContentPartial2() { - /* - TEST PREPARATION - */ - // we need to partially process the status first since processContent expects a status with some stuff already set on it - creatingAccount := suite.testAccounts["local_account_1"] - form := &apimodel.AdvancedStatusCreateForm{ - StatusCreateRequest: apimodel.StatusCreateRequest{ - Status: statusText2, - MediaIDs: []string{}, - Poll: nil, - InReplyToID: "", - Sensitive: false, - SpoilerText: "", - Visibility: apimodel.VisibilityPublic, - ScheduledAt: "", - Language: "en", - Format: apimodel.StatusFormatPlain, - }, - AdvancedVisibilityFlagsForm: apimodel.AdvancedVisibilityFlagsForm{ - Federated: nil, - Boostable: nil, - Replyable: nil, - Likeable: nil, - }, - } - - status := &gtsmodel.Status{ - ID: "01FCTDD78JJMX3K9KPXQ7ZQ8BJ", - } - - err := suite.status.ProcessMentions(context.Background(), form, creatingAccount.ID, status) - suite.NoError(err) - suite.Empty(status.Content) - - err = suite.status.ProcessContent(context.Background(), form, creatingAccount.ID, status) - suite.NoError(err) - - suite.Equal(status2TextExpectedPartial, status.Content) -} - func TestUtilTestSuite(t *testing.T) { suite.Run(t, new(UtilTestSuite)) } diff --git a/internal/text/common.go b/internal/text/common.go @@ -1,112 +0,0 @@ -/* - GoToSocial - Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -package text - -import ( - "bytes" - "context" - "strings" - "unicode" - - "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" - "github.com/superseriousbusiness/gotosocial/internal/log" - "github.com/superseriousbusiness/gotosocial/internal/regexes" - "github.com/superseriousbusiness/gotosocial/internal/util" -) - -func (f *formatter) ReplaceTags(ctx context.Context, in string, tags []*gtsmodel.Tag) string { - spans := util.FindHashtagSpansInText(in) - - if len(spans) == 0 { - return in - } - - var b strings.Builder - i := 0 - -spans: - for _, t := range spans { - b.WriteString(in[i:t.First]) - i = t.Second - tagAsEntered := in[t.First+1 : t.Second] - - for _, tag := range tags { - if strings.EqualFold(tagAsEntered, tag.Name) { - // replace the #tag with the formatted tag content - // `<a href="tag.URL" class="mention hashtag" rel="tag">#<span>tagAsEntered</span></a> - b.WriteString(`<a href="`) - b.WriteString(tag.URL) - b.WriteString(`" class="mention hashtag" rel="tag">#<span>`) - b.WriteString(tagAsEntered) - b.WriteString(`</span></a>`) - continue spans - } - } - - b.WriteString(in[t.First:t.Second]) - } - - // Get the last bits. - i = spans[len(spans)-1].Second - b.WriteString(in[i:]) - - return b.String() -} - -func (f *formatter) ReplaceMentions(ctx context.Context, in string, mentions []*gtsmodel.Mention) string { - return regexes.ReplaceAllStringFunc(regexes.MentionFinder, in, func(match string, buf *bytes.Buffer) string { - // we have a match, trim any spaces - matchTrimmed := strings.TrimSpace(match) - - // check through mentions to find what we're matching - for _, menchie := range mentions { - if strings.EqualFold(matchTrimmed, menchie.NameString) { - // make sure we have an account attached to this mention - if menchie.TargetAccount == nil { - a, err := f.db.GetAccountByID(ctx, menchie.TargetAccountID) - if err != nil { - log.Errorf("error getting account with id %s from the db: %s", menchie.TargetAccountID, err) - return match - } - menchie.TargetAccount = a - } - - // The mention's target is our target - targetAccount := menchie.TargetAccount - - // Add any dropped space from match - if unicode.IsSpace(rune(match[0])) { - buf.WriteByte(match[0]) - } - - // replace the mention with the formatted mention content - // <span class="h-card"><a href="targetAccount.URL" class="u-url mention">@<span>targetAccount.Username</span></a></span> - buf.WriteString(`<span class="h-card"><a href="`) - buf.WriteString(targetAccount.URL) - buf.WriteString(`" class="u-url mention">@<span>`) - buf.WriteString(targetAccount.Username) - buf.WriteString(`</span></a></span>`) - return buf.String() - } - } - - // the match wasn't in the list of mentions for whatever reason, so just return the match as we found it so nothing changes - return match - }) -} diff --git a/internal/text/common_test.go b/internal/text/common_test.go @@ -1,106 +0,0 @@ -/* - GoToSocial - Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -package text_test - -import ( - "context" - "testing" - "time" - - "github.com/stretchr/testify/suite" - "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" -) - -const ( - replaceMentionsString = "Another test @foss_satan@fossbros-anonymous.io\n\n#Hashtag\n\nText" - replaceMentionsExpected = "Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\">@<span>foss_satan</span></a></span>\n\n#Hashtag\n\nText" - replaceHashtagsExpected = "Another test @foss_satan@fossbros-anonymous.io\n\n<a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag\">#<span>Hashtag</span></a>\n\nText" - replaceHashtagsAfterMentionsExpected = "Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\">@<span>foss_satan</span></a></span>\n\n<a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag\">#<span>Hashtag</span></a>\n\nText" - replaceMentionsWithLinkString = "Another test @foss_satan@fossbros-anonymous.io\n\nhttp://fossbros-anonymous.io/@foss_satan/statuses/6675ee73-fccc-4562-a46a-3e8cd9798060" - replaceMentionsWithLinkStringExpected = "Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\">@<span>foss_satan</span></a></span>\n\nhttp://fossbros-anonymous.io/@foss_satan/statuses/6675ee73-fccc-4562-a46a-3e8cd9798060" - replaceMentionsWithLinkSelfString = "Mentioning myself: @the_mighty_zork\n\nand linking to my own status: https://localhost:8080/@the_mighty_zork/statuses/01FGXKJRX2PMERJQ9EQF8Y6HCR" - replaceMemtionsWithLinkSelfExpected = "Mentioning myself: <span class=\"h-card\"><a href=\"http://localhost:8080/@the_mighty_zork\" class=\"u-url mention\">@<span>the_mighty_zork</span></a></span>\n\nand linking to my own status: https://localhost:8080/@the_mighty_zork/statuses/01FGXKJRX2PMERJQ9EQF8Y6HCR" -) - -type CommonTestSuite struct { - TextStandardTestSuite -} - -func (suite *CommonTestSuite) TestReplaceMentions() { - foundMentions := []*gtsmodel.Mention{ - suite.testMentions["zork_mention_foss_satan"], - } - - f := suite.formatter.ReplaceMentions(context.Background(), replaceMentionsString, foundMentions) - suite.Equal(replaceMentionsExpected, f) -} - -func (suite *CommonTestSuite) TestReplaceHashtags() { - foundTags := []*gtsmodel.Tag{ - suite.testTags["Hashtag"], - } - - f := suite.formatter.ReplaceTags(context.Background(), replaceMentionsString, foundTags) - - suite.Equal(replaceHashtagsExpected, f) -} - -func (suite *CommonTestSuite) TestReplaceHashtagsAfterReplaceMentions() { - foundTags := []*gtsmodel.Tag{ - suite.testTags["Hashtag"], - } - - f := suite.formatter.ReplaceTags(context.Background(), replaceMentionsExpected, foundTags) - - suite.Equal(replaceHashtagsAfterMentionsExpected, f) -} - -func (suite *CommonTestSuite) TestReplaceMentionsWithLink() { - foundMentions := []*gtsmodel.Mention{ - suite.testMentions["zork_mention_foss_satan"], - } - - f := suite.formatter.ReplaceMentions(context.Background(), replaceMentionsWithLinkString, foundMentions) - suite.Equal(replaceMentionsWithLinkStringExpected, f) -} - -func (suite *CommonTestSuite) TestReplaceMentionsWithLinkSelf() { - mentioningAccount := suite.testAccounts["local_account_1"] - - foundMentions := []*gtsmodel.Mention{ - { - ID: "01FGXKN5F815DVFVD53PN9NYM6", - CreatedAt: time.Now(), - UpdatedAt: time.Now(), - StatusID: "01FGXKP0S5THQXFC1D9R141DDR", - OriginAccountID: mentioningAccount.ID, - TargetAccountID: mentioningAccount.ID, - NameString: "@the_mighty_zork", - TargetAccountURI: mentioningAccount.URI, - TargetAccountURL: mentioningAccount.URL, - }, - } - - f := suite.formatter.ReplaceMentions(context.Background(), replaceMentionsWithLinkSelfString, foundMentions) - suite.Equal(replaceMemtionsWithLinkSelfExpected, f) -} - -func TestCommonTestSuite(t *testing.T) { - suite.Run(t, new(CommonTestSuite)) -} diff --git a/internal/text/emojionly.go b/internal/text/emojionly.go @@ -0,0 +1,71 @@ +/* + GoToSocial + Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +package text + +import ( + "bytes" + "context" + + "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" + "github.com/superseriousbusiness/gotosocial/internal/log" + "github.com/yuin/goldmark" + "github.com/yuin/goldmark/parser" + "github.com/yuin/goldmark/renderer/html" + "github.com/yuin/goldmark/util" +) + +func (f *formatter) FromPlainEmojiOnly(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, plain string) *FormatResult { + result := &FormatResult{ + Mentions: []*gtsmodel.Mention{}, + Tags: []*gtsmodel.Tag{}, + Emojis: []*gtsmodel.Emoji{}, + } + // parse markdown text into html, using custom renderer to add hashtag/mention links + md := goldmark.New( + goldmark.WithRendererOptions( + html.WithXHTML(), + html.WithHardWraps(), + ), + goldmark.WithParser( + parser.NewParser( + parser.WithBlockParsers( + util.Prioritized(newPlaintextParser(), 500), + ), + ), + ), + goldmark.WithExtensions( + &customRenderer{f, ctx, pmf, authorID, statusID, true, result}, + ), + ) + + var htmlContentBytes bytes.Buffer + err := md.Convert([]byte(plain), &htmlContentBytes) + if err != nil { + log.Errorf("error formatting plaintext to HTML: %s", err) + } + result.HTML = htmlContentBytes.String() + + // clean anything dangerous out of the HTML + result.HTML = SanitizeHTML(result.HTML) + + // shrink ray + result.HTML = minifyHTML(result.HTML) + + return result +} diff --git a/internal/text/formatter.go b/internal/text/formatter.go @@ -26,20 +26,19 @@ import ( ) // Formatter wraps some logic and functions for parsing statuses and other text input into nice html. +// Each of the member functions returns a struct containing the formatted HTML and any tags, mentions, and +// emoji that were found in the text. type Formatter interface { // FromPlain parses an HTML text from a plaintext. - FromPlain(ctx context.Context, plain string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag) string + FromPlain(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, plain string) *FormatResult // FromMarkdown parses an HTML text from a markdown-formatted text. - FromMarkdown(ctx context.Context, md string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag, emojis []*gtsmodel.Emoji) string - - // ReplaceTags takes a piece of text and a slice of tags, and returns the same text with the tags nicely formatted as hrefs. - ReplaceTags(ctx context.Context, in string, tags []*gtsmodel.Tag) string - // ReplaceMentions takes a piece of text and a slice of mentions, and returns the same text with the mentions nicely formatted as hrefs. - ReplaceMentions(ctx context.Context, in string, mentions []*gtsmodel.Mention) string - // ReplaceLinks takes a piece of text, finds all recognizable links in that text, and replaces them with hrefs. - ReplaceLinks(ctx context.Context, in string) string + FromMarkdown(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, md string) *FormatResult + // FromPlainEmojiOnly parses an HTML text from a plaintext, only parsing emojis and not mentions etc. + FromPlainEmojiOnly(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, plain string) *FormatResult } +type FormatFunc func(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, text string) *FormatResult + type formatter struct { db db.DB } @@ -50,3 +49,10 @@ func NewFormatter(db db.DB) Formatter { db: db, } } + +type FormatResult struct { + HTML string + Mentions []*gtsmodel.Mention + Tags []*gtsmodel.Tag + Emojis []*gtsmodel.Emoji +} diff --git a/internal/text/formatter_test.go b/internal/text/formatter_test.go @@ -19,9 +19,13 @@ package text_test import ( + "context" "github.com/stretchr/testify/suite" + "github.com/superseriousbusiness/gotosocial/internal/concurrency" "github.com/superseriousbusiness/gotosocial/internal/db" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" + "github.com/superseriousbusiness/gotosocial/internal/messages" + "github.com/superseriousbusiness/gotosocial/internal/processing" "github.com/superseriousbusiness/gotosocial/internal/text" "github.com/superseriousbusiness/gotosocial/testrig" ) @@ -29,7 +33,8 @@ import ( type TextStandardTestSuite struct { // standard suite interfaces suite.Suite - db db.DB + db db.DB + parseMention gtsmodel.ParseMentionFunc // standard suite models testTokens map[string]*gtsmodel.Token @@ -41,6 +46,7 @@ type TextStandardTestSuite struct { testStatuses map[string]*gtsmodel.Status testTags map[string]*gtsmodel.Tag testMentions map[string]*gtsmodel.Mention + testEmojis map[string]*gtsmodel.Emoji // module being tested formatter text.Formatter @@ -56,6 +62,7 @@ func (suite *TextStandardTestSuite) SetupSuite() { suite.testStatuses = testrig.NewTestStatuses() suite.testTags = testrig.NewTestTags() suite.testMentions = testrig.NewTestMentions() + suite.testEmojis = testrig.NewTestEmojis() } func (suite *TextStandardTestSuite) SetupTest() { @@ -63,6 +70,11 @@ func (suite *TextStandardTestSuite) SetupTest() { testrig.InitTestConfig() suite.db = testrig.NewTestDB() + + fedWorker := concurrency.NewWorkerPool[messages.FromFederator](-1, -1) + federator := testrig.NewTestFederator(suite.db, testrig.NewTestTransportController(testrig.NewMockHTTPClient(nil, "../../testrig/media"), suite.db, fedWorker), nil, nil, fedWorker) + suite.parseMention = processing.GetParseMentionFunc(suite.db, federator) + suite.formatter = text.NewFormatter(suite.db) testrig.StandardDBSetup(suite.db, nil) @@ -71,3 +83,11 @@ func (suite *TextStandardTestSuite) SetupTest() { func (suite *TextStandardTestSuite) TearDownTest() { testrig.StandardDBTeardown(suite.db) } + +func (suite *TextStandardTestSuite) FromMarkdown(text string) *text.FormatResult { + return suite.formatter.FromMarkdown(context.Background(), suite.parseMention, suite.testAccounts["local_account_1"].ID, "status_ID", text) +} + +func (suite *TextStandardTestSuite) FromPlain(text string) *text.FormatResult { + return suite.formatter.FromPlain(context.Background(), suite.parseMention, suite.testAccounts["local_account_1"].ID, "status_ID", text) +} diff --git a/internal/text/goldmark_extension.go b/internal/text/goldmark_extension.go @@ -0,0 +1,312 @@ +/* + GoToSocial + Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +package text + +import ( + "context" + "fmt" + "strings" + + "github.com/superseriousbusiness/gotosocial/internal/db" + "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" + "github.com/superseriousbusiness/gotosocial/internal/log" + "github.com/superseriousbusiness/gotosocial/internal/regexes" + "github.com/superseriousbusiness/gotosocial/internal/util" + "github.com/yuin/goldmark" + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/parser" + "github.com/yuin/goldmark/renderer" + "github.com/yuin/goldmark/text" + mdutil "github.com/yuin/goldmark/util" +) + +// A goldmark extension that parses potential mentions and hashtags separately from regular +// text, so that they stay as one contiguous text fragment in the AST, and then renders +// them separately too, to avoid scanning normal text for mentions and tags. + +// mention and hashtag fulfil the goldmark ast.Node interface. +type mention struct { + ast.BaseInline + Segment text.Segment +} + +type hashtag struct { + ast.BaseInline + Segment text.Segment +} + +type emoji struct { + ast.BaseInline + Segment text.Segment +} + +var kindMention = ast.NewNodeKind("Mention") +var kindHashtag = ast.NewNodeKind("Hashtag") +var kindEmoji = ast.NewNodeKind("Emoji") + +func (n *mention) Kind() ast.NodeKind { + return kindMention +} + +func (n *hashtag) Kind() ast.NodeKind { + return kindHashtag +} + +func (n *emoji) Kind() ast.NodeKind { + return kindEmoji +} + +// Dump can be used for debugging. +func (n *mention) Dump(source []byte, level int) { + fmt.Printf("%sMention: %s\n", strings.Repeat(" ", level), string(n.Segment.Value(source))) +} + +func (n *hashtag) Dump(source []byte, level int) { + fmt.Printf("%sHashtag: %s\n", strings.Repeat(" ", level), string(n.Segment.Value(source))) +} + +func (n *emoji) Dump(source []byte, level int) { + fmt.Printf("%sEmoji: %s\n", strings.Repeat(" ", level), string(n.Segment.Value(source))) +} + +// newMention and newHashtag create a goldmark ast.Node from a goldmark text.Segment. +// The contained segment is used in rendering. +func newMention(s text.Segment) *mention { + return &mention{ + BaseInline: ast.BaseInline{}, + Segment: s, + } +} + +func newHashtag(s text.Segment) *hashtag { + return &hashtag{ + BaseInline: ast.BaseInline{}, + Segment: s, + } +} + +func newEmoji(s text.Segment) *emoji { + return &emoji{ + BaseInline: ast.BaseInline{}, + Segment: s, + } +} + +// mentionParser and hashtagParser fulfil the goldmark parser.InlineParser interface. +type mentionParser struct { +} + +type hashtagParser struct { +} + +type emojiParser struct { +} + +func (p *mentionParser) Trigger() []byte { + return []byte{'@'} +} + +func (p *hashtagParser) Trigger() []byte { + return []byte{'#'} +} + +func (p *emojiParser) Trigger() []byte { + return []byte{':'} +} + +func (p *mentionParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node { + before := block.PrecendingCharacter() + line, segment := block.PeekLine() + + if !util.IsMentionOrHashtagBoundary(before) { + return nil + } + + // unideal for performance but makes use of existing regex + loc := regexes.MentionFinder.FindIndex(line) + switch { + case loc == nil: + fallthrough + case loc[0] != 0: // fail if not found at start + return nil + default: + block.Advance(loc[1]) + return newMention(segment.WithStop(segment.Start + loc[1])) + } +} + +func (p *hashtagParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node { + before := block.PrecendingCharacter() + line, segment := block.PeekLine() + s := string(line) + + if !util.IsMentionOrHashtagBoundary(before) || len(s) == 1 { + return nil + } + + for i, r := range s { + switch { + case r == '#' && i == 0: + // ignore initial # + continue + case !util.IsPlausiblyInHashtag(r) && !util.IsMentionOrHashtagBoundary(r): + // Fake hashtag, don't trust it + return nil + case util.IsMentionOrHashtagBoundary(r): + if i <= 1 { + // empty + return nil + } + // End of hashtag + block.Advance(i) + return newHashtag(segment.WithStop(segment.Start + i)) + } + } + // If we don't find invalid characters before the end of the line then it's all hashtag, babey + block.Advance(segment.Len()) + return newHashtag(segment) +} + +func (p *emojiParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node { + line, segment := block.PeekLine() + + // unideal for performance but makes use of existing regex + loc := regexes.EmojiFinder.FindIndex(line) + switch { + case loc == nil: + fallthrough + case loc[0] != 0: // fail if not found at start + return nil + default: + block.Advance(loc[1]) + return newEmoji(segment.WithStop(segment.Start + loc[1])) + } +} + +// customRenderer fulfils both the renderer.NodeRenderer and goldmark.Extender interfaces. +// It is created in FromMarkdown and FromPlain to be used as a goldmark extension, and the +// fields are used to report tags and mentions to the caller for use as metadata. +type customRenderer struct { + f *formatter + ctx context.Context + parseMention gtsmodel.ParseMentionFunc + accountID string + statusID string + emojiOnly bool + result *FormatResult +} + +func (r *customRenderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) { + reg.Register(kindMention, r.renderMention) + reg.Register(kindHashtag, r.renderHashtag) + reg.Register(kindEmoji, r.renderEmoji) +} + +func (r *customRenderer) Extend(m goldmark.Markdown) { + // 1000 is set as the lowest priority, but it's arbitrary + m.Parser().AddOptions(parser.WithInlineParsers( + mdutil.Prioritized(&emojiParser{}, 1000), + )) + if !r.emojiOnly { + m.Parser().AddOptions(parser.WithInlineParsers( + mdutil.Prioritized(&mentionParser{}, 1000), + mdutil.Prioritized(&hashtagParser{}, 1000), + )) + } + m.Renderer().AddOptions(renderer.WithNodeRenderers( + mdutil.Prioritized(r, 1000), + )) +} + +// renderMention and renderHashtag take a mention or a hashtag ast.Node and render it as HTML. +func (r *customRenderer) renderMention(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { + if !entering { + return ast.WalkSkipChildren, nil + } + + n, ok := node.(*mention) // this function is only registered for kindMention + if !ok { + log.Errorf("type assertion failed") + } + text := string(n.Segment.Value(source)) + + html := r.replaceMention(text) + + // we don't have much recourse if this fails + if _, err := w.WriteString(html); err != nil { + log.Errorf("error writing HTML: %s", err) + } + return ast.WalkSkipChildren, nil +} + +func (r *customRenderer) renderHashtag(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { + if !entering { + return ast.WalkSkipChildren, nil + } + + n, ok := node.(*hashtag) // this function is only registered for kindHashtag + if !ok { + log.Errorf("type assertion failed") + } + text := string(n.Segment.Value(source)) + + html := r.replaceHashtag(text) + + _, err := w.WriteString(html) + // we don't have much recourse if this fails + if err != nil { + log.Errorf("error writing HTML: %s", err) + } + return ast.WalkSkipChildren, nil +} + +// renderEmoji doesn't turn an emoji into HTML, but adds it to the metadata. +func (r *customRenderer) renderEmoji(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { + if !entering { + return ast.WalkSkipChildren, nil + } + + n, ok := node.(*emoji) // this function is only registered for kindEmoji + if !ok { + log.Errorf("type assertion failed") + } + text := string(n.Segment.Value(source)) + shortcode := text[1 : len(text)-1] + + emoji, err := r.f.db.GetEmojiByShortcodeDomain(r.ctx, shortcode, "") + if err != nil { + if err != db.ErrNoEntries { + log.Errorf("error getting local emoji with shortcode %s: %s", shortcode, err) + } + } else if *emoji.VisibleInPicker && !*emoji.Disabled { + listed := false + for _, e := range r.result.Emojis { + if e.Shortcode == emoji.Shortcode { + listed = true + break + } + } + if !listed { + r.result.Emojis = append(r.result.Emojis, emoji) + } + } + + // we don't have much recourse if this fails + if _, err := w.WriteString(text); err != nil { + log.Errorf("error writing HTML: %s", err) + } + return ast.WalkSkipChildren, nil +} diff --git a/internal/text/goldmark_plaintext.go b/internal/text/goldmark_plaintext.go @@ -0,0 +1,64 @@ +/* + GoToSocial + Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +package text + +import ( + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/parser" + "github.com/yuin/goldmark/text" +) + +// plaintextParser implements goldmark.parser.BlockParser +type plaintextParser struct { +} + +var defaultPlaintextParser = &plaintextParser{} + +func newPlaintextParser() parser.BlockParser { + return defaultPlaintextParser +} + +func (b *plaintextParser) Trigger() []byte { + return nil +} + +func (b *plaintextParser) Open(parent ast.Node, reader text.Reader, pc parser.Context) (ast.Node, parser.State) { + _, segment := reader.PeekLine() + node := ast.NewParagraph() + node.Lines().Append(segment) + reader.Advance(segment.Len() - 1) + return node, parser.NoChildren +} + +func (b *plaintextParser) Continue(node ast.Node, reader text.Reader, pc parser.Context) parser.State { + _, segment := reader.PeekLine() + node.Lines().Append(segment) + reader.Advance(segment.Len() - 1) + return parser.Continue | parser.NoChildren +} + +func (b *plaintextParser) Close(node ast.Node, reader text.Reader, pc parser.Context) {} + +func (b *plaintextParser) CanInterruptParagraph() bool { + return false +} + +func (b *plaintextParser) CanAcceptIndentedLine() bool { + return true +} diff --git a/internal/text/link.go b/internal/text/link.go @@ -1,86 +0,0 @@ -/* - GoToSocial - Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -package text - -import ( - "bytes" - "context" - "net/url" - "strings" - - "github.com/superseriousbusiness/gotosocial/internal/regexes" -) - -// FindLinks parses the given string looking for recognizable URLs (including scheme). -// It returns a list of those URLs, without changing the string, or an error if something goes wrong. -// If no URLs are found within the given string, an empty slice and nil will be returned. -func FindLinks(in string) []*url.URL { - var urls []*url.URL - - // bail already if we don't find anything - found := regexes.LinkScheme.FindAllString(in, -1) - if len(found) == 0 { - return nil - } - - urlmap := map[string]struct{}{} - - // for each string we find, we want to parse it into a URL if we can - // if we fail to parse it, just ignore this match and continue - for _, f := range found { - u, err := url.Parse(f) - if err != nil { - continue - } - - // Calculate string - ustr := u.String() - - if _, ok := urlmap[ustr]; !ok { - // Has not been encountered yet - urls = append(urls, u) - urlmap[ustr] = struct{}{} - } - } - - return urls -} - -// ReplaceLinks replaces all detected links in a piece of text with their HTML (href) equivalents. -// Note: because Go doesn't allow negative lookbehinds in regex, it's possible that an already-formatted -// href will end up double-formatted, if the text you pass here contains one or more hrefs already. -// To avoid this, you should sanitize any HTML out of text before you pass it into this function. -func (f *formatter) ReplaceLinks(ctx context.Context, in string) string { - return regexes.ReplaceAllStringFunc(regexes.LinkScheme, in, func(urlString string, buf *bytes.Buffer) string { - thisURL, err := url.Parse(urlString) - if err != nil { - return urlString // we can't parse it as a URL so don't replace it - } - // <a href="thisURL.String()" rel="noopener">urlString</a> - urlString = thisURL.String() - buf.WriteString(`<a href="`) - buf.WriteString(thisURL.String()) - buf.WriteString(`" rel="noopener">`) - urlString = strings.TrimPrefix(urlString, thisURL.Scheme) - urlString = strings.TrimPrefix(urlString, "://") - buf.WriteString(urlString) - buf.WriteString(`</a>`) - return buf.String() - }) -} diff --git a/internal/text/link_test.go b/internal/text/link_test.go @@ -1,157 +0,0 @@ -/* - GoToSocial - Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -package text_test - -import ( - "context" - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/suite" - "github.com/superseriousbusiness/gotosocial/internal/text" -) - -const text1 = ` -This is a text with some links in it. Here's link number one: https://example.org/link/to/something#fragment - -Here's link number two: http://test.example.org?q=bahhhhhhhhhhhh - -https://another.link.example.org/with/a/pretty/long/path/at/the/end/of/it - -really.cool.website <-- this one shouldn't be parsed as a link because it doesn't contain the scheme - -https://example.orghttps://google.com <-- this shouldn't work either, but it does?! OK -` - -const text2 = ` -this is one link: https://example.org - -this is the same link again: https://example.org - -these should be deduplicated -` - -const text3 = ` -here's a mailto link: mailto:whatever@test.org -` - -const text4 = ` -two similar links: - -https://example.org - -https://example.org/test -` - -const text5 = ` -what happens when we already have a link within an href? - -<a href="https://example.org">https://example.org</a> -` - -type LinkTestSuite struct { - TextStandardTestSuite -} - -func (suite *LinkTestSuite) TestParseSimple() { - f := suite.formatter.FromPlain(context.Background(), simple, nil, nil) - suite.Equal(simpleExpected, f) -} - -func (suite *LinkTestSuite) TestParseURLsFromText1() { - urls := text.FindLinks(text1) - - suite.Equal("https://example.org/link/to/something#fragment", urls[0].String()) - suite.Equal("http://test.example.org?q=bahhhhhhhhhhhh", urls[1].String()) - suite.Equal("https://another.link.example.org/with/a/pretty/long/path/at/the/end/of/it", urls[2].String()) - suite.Equal("https://example.orghttps://google.com", urls[3].String()) -} - -func (suite *LinkTestSuite) TestParseURLsFromText2() { - urls := text.FindLinks(text2) - - // assert length 1 because the found links will be deduplicated - assert.Len(suite.T(), urls, 1) -} - -func (suite *LinkTestSuite) TestParseURLsFromText3() { - urls := text.FindLinks(text3) - - // assert length 0 because `mailto:` isn't accepted - assert.Len(suite.T(), urls, 0) -} - -func (suite *LinkTestSuite) TestReplaceLinksFromText1() { - replaced := suite.formatter.ReplaceLinks(context.Background(), text1) - suite.Equal(` -This is a text with some links in it. Here's link number one: <a href="https://example.org/link/to/something#fragment" rel="noopener">example.org/link/to/something#fragment</a> - -Here's link number two: <a href="http://test.example.org?q=bahhhhhhhhhhhh" rel="noopener">test.example.org?q=bahhhhhhhhhhhh</a> - -<a href="https://another.link.example.org/with/a/pretty/long/path/at/the/end/of/it" rel="noopener">another.link.example.org/with/a/pretty/long/path/at/the/end/of/it</a> - -really.cool.website <-- this one shouldn't be parsed as a link because it doesn't contain the scheme - -<a href="https://example.orghttps://google.com" rel="noopener">example.orghttps://google.com</a> <-- this shouldn't work either, but it does?! OK -`, replaced) -} - -func (suite *LinkTestSuite) TestReplaceLinksFromText2() { - replaced := suite.formatter.ReplaceLinks(context.Background(), text2) - suite.Equal(` -this is one link: <a href="https://example.org" rel="noopener">example.org</a> - -this is the same link again: <a href="https://example.org" rel="noopener">example.org</a> - -these should be deduplicated -`, replaced) -} - -func (suite *LinkTestSuite) TestReplaceLinksFromText3() { - // we know mailto links won't be replaced with hrefs -- we only accept https and http - replaced := suite.formatter.ReplaceLinks(context.Background(), text3) - suite.Equal(` -here's a mailto link: mailto:whatever@test.org -`, replaced) -} - -func (suite *LinkTestSuite) TestReplaceLinksFromText4() { - replaced := suite.formatter.ReplaceLinks(context.Background(), text4) - suite.Equal(` -two similar links: - -<a href="https://example.org" rel="noopener">example.org</a> - -<a href="https://example.org/test" rel="noopener">example.org/test</a> -`, replaced) -} - -func (suite *LinkTestSuite) TestReplaceLinksFromText5() { - // we know this one doesn't work properly, which is why html should always be sanitized before being passed into the ReplaceLinks function - replaced := suite.formatter.ReplaceLinks(context.Background(), text5) - suite.Equal(` -what happens when we already have a link within an href? - -<a href="<a href="https://example.org" rel="noopener">example.org</a>"><a href="https://example.org" rel="noopener">example.org</a></a> -`, replaced) -} - -func TestLinkTestSuite(t *testing.T) { - suite.Run(t, new(LinkTestSuite)) -} diff --git a/internal/text/markdown.go b/internal/text/markdown.go @@ -21,32 +21,19 @@ package text import ( "bytes" "context" - "strings" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" "github.com/superseriousbusiness/gotosocial/internal/log" - "github.com/tdewolff/minify/v2" - minifyHtml "github.com/tdewolff/minify/v2/html" "github.com/yuin/goldmark" "github.com/yuin/goldmark/extension" "github.com/yuin/goldmark/renderer/html" ) -var ( - m *minify.M -) - -func (f *formatter) FromMarkdown(ctx context.Context, markdownText string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag, emojis []*gtsmodel.Emoji) string { - - // Temporarily replace all found emoji shortcodes in the markdown text with - // their ID so that they're not parsed as anything by the markdown parser - - // this fixes cases where emojis with some underscores in them are parsed as - // words with emphasis, eg `:_some_emoji:` becomes `:<em>some</em>emoji:` - // - // Since the IDs of the emojis are just uppercase letters + numbers they should - // be safe to pass through the markdown parser without unexpected effects. - for _, e := range emojis { - markdownText = strings.ReplaceAll(markdownText, ":"+e.Shortcode+":", ":"+e.ID+":") +func (f *formatter) FromMarkdown(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, markdownText string) *FormatResult { + result := &FormatResult{ + Mentions: []*gtsmodel.Mention{}, + Tags: []*gtsmodel.Tag{}, + Emojis: []*gtsmodel.Emoji{}, } // parse markdown text into html, using custom renderer to add hashtag/mention links @@ -57,7 +44,7 @@ func (f *formatter) FromMarkdown(ctx context.Context, markdownText string, menti html.WithUnsafe(), // allows raw HTML ), goldmark.WithExtensions( - &customRenderer{f, ctx, mentions, tags}, + &customRenderer{f, ctx, pmf, authorID, statusID, false, result}, extension.Linkify, // turns URLs into links extension.Strikethrough, ), @@ -66,30 +53,15 @@ func (f *formatter) FromMarkdown(ctx context.Context, markdownText string, menti var htmlContentBytes bytes.Buffer err := md.Convert([]byte(markdownText), &htmlContentBytes) if err != nil { - log.Errorf("error rendering markdown to HTML: %s", err) - } - htmlContent := htmlContentBytes.String() - - // Replace emoji IDs in the parsed html content with their shortcodes again - for _, e := range emojis { - htmlContent = strings.ReplaceAll(htmlContent, ":"+e.ID+":", ":"+e.Shortcode+":") + log.Errorf("error formatting markdown to HTML: %s", err) } + result.HTML = htmlContentBytes.String() - // clean anything dangerous out of the html - htmlContent = SanitizeHTML(htmlContent) + // clean anything dangerous out of the HTML + result.HTML = SanitizeHTML(result.HTML) - if m == nil { - m = minify.New() - m.Add("text/html", &minifyHtml.Minifier{ - KeepEndTags: true, - KeepQuotes: true, - }) - } - - minified, err := m.String("text/html", htmlContent) - if err != nil { - log.Errorf("error minifying markdown text: %s", err) - } + // shrink ray + result.HTML = minifyHTML(result.HTML) - return minified + return result } diff --git a/internal/text/markdown_test.go b/internal/text/markdown_test.go @@ -19,11 +19,9 @@ package text_test import ( - "context" "testing" "github.com/stretchr/testify/suite" - "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" ) var withCodeBlock = `# Title @@ -77,6 +75,16 @@ const ( mdWithStrikethroughExpected = "<p>I have <del>mdae</del> made an error</p>" mdWithLink = "Check out this code, i heard it was written by a sloth https://github.com/superseriousbusiness/gotosocial" mdWithLinkExpected = "<p>Check out this code, i heard it was written by a sloth <a href=\"https://github.com/superseriousbusiness/gotosocial\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">https://github.com/superseriousbusiness/gotosocial</a></p>" + mdObjectInCodeBlock = "@foss_satan@fossbros-anonymous.io this is how to mention a user\n```\n@the_mighty_zork hey bud! nice #ObjectOrientedProgramming software you've been writing lately! :rainbow:\n```\nhope that helps" + mdObjectInCodeBlockExpected = "<p><span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>foss_satan</span></a></span> this is how to mention a user</p><pre><code>@the_mighty_zork hey bud! nice #ObjectOrientedProgramming software you&#39;ve been writing lately! :rainbow:\n</code></pre><p>hope that helps</p>" + mdItalicHashtag = "_#hashtag_" + mdItalicHashtagExpected = "<p><em><a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag</span></a></em></p>" + mdItalicHashtags = "_#hashtag #hashtag #hashtag_" + mdItalicHashtagsExpected = "<p><em><a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag</span></a> <a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag</span></a> <a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag</span></a></em></p>" + // BEWARE: sneaky unicode business going on. + // the first ö is one rune, the second ö is an o with a combining diacritic. + mdUnnormalizedHashtag = "#hellöthere #hellöthere" + mdUnnormalizedHashtagExpected = "<p><a href=\"http://localhost:8080/tags/hell%C3%B6there\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hellöthere</span></a> <a href=\"http://localhost:8080/tags/hell%C3%B6there\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hellöthere</span></a></p>" ) type MarkdownTestSuite struct { @@ -84,101 +92,112 @@ type MarkdownTestSuite struct { } func (suite *MarkdownTestSuite) TestParseSimple() { - s := suite.formatter.FromMarkdown(context.Background(), simpleMarkdown, nil, nil, nil) - suite.Equal(simpleMarkdownExpected, s) + formatted := suite.FromMarkdown(simpleMarkdown) + suite.Equal(simpleMarkdownExpected, formatted.HTML) } func (suite *MarkdownTestSuite) TestParseWithCodeBlock() { - s := suite.formatter.FromMarkdown(context.Background(), withCodeBlock, nil, nil, nil) - suite.Equal(withCodeBlockExpected, s) + formatted := suite.FromMarkdown(withCodeBlock) + suite.Equal(withCodeBlockExpected, formatted.HTML) } func (suite *MarkdownTestSuite) TestParseWithInlineCode() { - s := suite.formatter.FromMarkdown(context.Background(), withInlineCode, nil, nil, nil) - suite.Equal(withInlineCodeExpected, s) + formatted := suite.FromMarkdown(withInlineCode) + suite.Equal(withInlineCodeExpected, formatted.HTML) } func (suite *MarkdownTestSuite) TestParseWithInlineCode2() { - s := suite.formatter.FromMarkdown(context.Background(), withInlineCode2, nil, nil, nil) - suite.Equal(withInlineCode2Expected, s) + formatted := suite.FromMarkdown(withInlineCode2) + suite.Equal(withInlineCode2Expected, formatted.HTML) } func (suite *MarkdownTestSuite) TestParseWithHashtag() { - foundTags := []*gtsmodel.Tag{ - suite.testTags["Hashtag"], - } - - s := suite.formatter.FromMarkdown(context.Background(), withHashtag, nil, foundTags, nil) - suite.Equal(withHashtagExpected, s) + formatted := suite.FromMarkdown(withHashtag) + suite.Equal(withHashtagExpected, formatted.HTML) } func (suite *MarkdownTestSuite) TestParseWithHTML() { - s := suite.formatter.FromMarkdown(context.Background(), mdWithHTML, nil, nil, nil) - suite.Equal(mdWithHTMLExpected, s) + formatted := suite.FromMarkdown(mdWithHTML) + suite.Equal(mdWithHTMLExpected, formatted.HTML) } func (suite *MarkdownTestSuite) TestParseWithCheekyHTML() { - s := suite.formatter.FromMarkdown(context.Background(), mdWithCheekyHTML, nil, nil, nil) - suite.Equal(mdWithCheekyHTMLExpected, s) + formatted := suite.FromMarkdown(mdWithCheekyHTML) + suite.Equal(mdWithCheekyHTMLExpected, formatted.HTML) } func (suite *MarkdownTestSuite) TestParseWithHashtagInitial() { - s := suite.formatter.FromMarkdown(context.Background(), mdWithHashtagInitial, nil, []*gtsmodel.Tag{ - suite.testTags["Hashtag"], - suite.testTags["welcome"], - }, nil) - suite.Equal(mdWithHashtagInitialExpected, s) + formatted := suite.FromMarkdown(mdWithHashtagInitial) + suite.Equal(mdWithHashtagInitialExpected, formatted.HTML) } func (suite *MarkdownTestSuite) TestParseCodeBlockWithNewlines() { - s := suite.formatter.FromMarkdown(context.Background(), mdCodeBlockWithNewlines, nil, nil, nil) - suite.Equal(mdCodeBlockWithNewlinesExpected, s) + formatted := suite.FromMarkdown(mdCodeBlockWithNewlines) + suite.Equal(mdCodeBlockWithNewlinesExpected, formatted.HTML) } func (suite *MarkdownTestSuite) TestParseWithFootnote() { - s := suite.formatter.FromMarkdown(context.Background(), mdWithFootnote, nil, nil, nil) - suite.Equal(mdWithFootnoteExpected, s) + formatted := suite.FromMarkdown(mdWithFootnote) + suite.Equal(mdWithFootnoteExpected, formatted.HTML) } func (suite *MarkdownTestSuite) TestParseWithBlockquote() { - s := suite.formatter.FromMarkdown(context.Background(), mdWithBlockQuote, nil, nil, nil) - suite.Equal(mdWithBlockQuoteExpected, s) + formatted := suite.FromMarkdown(mdWithBlockQuote) + suite.Equal(mdWithBlockQuoteExpected, formatted.HTML) } func (suite *MarkdownTestSuite) TestParseHashtagWithCodeBlock() { - s := suite.formatter.FromMarkdown(context.Background(), mdHashtagAndCodeBlock, nil, []*gtsmodel.Tag{ - suite.testTags["Hashtag"], - }, nil) - suite.Equal(mdHashtagAndCodeBlockExpected, s) + formatted := suite.FromMarkdown(mdHashtagAndCodeBlock) + suite.Equal(mdHashtagAndCodeBlockExpected, formatted.HTML) } func (suite *MarkdownTestSuite) TestParseMentionWithCodeBlock() { - s := suite.formatter.FromMarkdown(context.Background(), mdMentionAndCodeBlock, []*gtsmodel.Mention{ - suite.testMentions["local_user_2_mention_zork"], - }, nil, nil) - suite.Equal(mdMentionAndCodeBlockExpected, s) + formatted := suite.FromMarkdown(mdMentionAndCodeBlock) + suite.Equal(mdMentionAndCodeBlockExpected, formatted.HTML) } func (suite *MarkdownTestSuite) TestParseSmartypants() { - s := suite.formatter.FromMarkdown(context.Background(), mdWithSmartypants, []*gtsmodel.Mention{ - suite.testMentions["local_user_2_mention_zork"], - }, nil, nil) - suite.Equal(mdWithSmartypantsExpected, s) + formatted := suite.FromMarkdown(mdWithSmartypants) + suite.Equal(mdWithSmartypantsExpected, formatted.HTML) } func (suite *MarkdownTestSuite) TestParseAsciiHeart() { - s := suite.formatter.FromMarkdown(context.Background(), mdWithAsciiHeart, nil, nil, nil) - suite.Equal(mdWithAsciiHeartExpected, s) + formatted := suite.FromMarkdown(mdWithAsciiHeart) + suite.Equal(mdWithAsciiHeartExpected, formatted.HTML) } func (suite *MarkdownTestSuite) TestParseStrikethrough() { - s := suite.formatter.FromMarkdown(context.Background(), mdWithStrikethrough, nil, nil, nil) - suite.Equal(mdWithStrikethroughExpected, s) + formatted := suite.FromMarkdown(mdWithStrikethrough) + suite.Equal(mdWithStrikethroughExpected, formatted.HTML) } func (suite *MarkdownTestSuite) TestParseLink() { - s := suite.formatter.FromMarkdown(context.Background(), mdWithLink, nil, nil, nil) - suite.Equal(mdWithLinkExpected, s) + formatted := suite.FromMarkdown(mdWithLink) + suite.Equal(mdWithLinkExpected, formatted.HTML) +} + +func (suite *MarkdownTestSuite) TestParseObjectInCodeBlock() { + formatted := suite.FromMarkdown(mdObjectInCodeBlock) + suite.Equal(mdObjectInCodeBlockExpected, formatted.HTML) + suite.Len(formatted.Mentions, 1) + suite.Equal("@foss_satan@fossbros-anonymous.io", formatted.Mentions[0].NameString) + suite.Empty(formatted.Tags) + suite.Empty(formatted.Emojis) +} + +func (suite *MarkdownTestSuite) TestParseItalicHashtag() { + formatted := suite.FromMarkdown(mdItalicHashtag) + suite.Equal(mdItalicHashtagExpected, formatted.HTML) +} + +func (suite *MarkdownTestSuite) TestParseItalicHashtags() { + formatted := suite.FromMarkdown(mdItalicHashtags) + suite.Equal(mdItalicHashtagsExpected, formatted.HTML) +} + +func (suite *MarkdownTestSuite) TestParseUnnormalizedHashtag() { + formatted := suite.FromMarkdown(mdUnnormalizedHashtag) + suite.Equal(mdUnnormalizedHashtagExpected, formatted.HTML) } func TestMarkdownTestSuite(t *testing.T) { diff --git a/internal/text/markdownextension.go b/internal/text/markdownextension.go @@ -1,215 +0,0 @@ -/* - GoToSocial - Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -package text - -import ( - "context" - "unicode" - - "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" - "github.com/superseriousbusiness/gotosocial/internal/log" - "github.com/superseriousbusiness/gotosocial/internal/regexes" - "github.com/superseriousbusiness/gotosocial/internal/util" - "github.com/yuin/goldmark" - "github.com/yuin/goldmark/ast" - "github.com/yuin/goldmark/parser" - "github.com/yuin/goldmark/renderer" - "github.com/yuin/goldmark/text" - mdutil "github.com/yuin/goldmark/util" -) - -// A goldmark extension that parses potential mentions and hashtags separately from regular -// text, so that they stay as one contiguous text fragment in the AST, and then renders -// them separately too, to avoid scanning normal text for mentions and tags. - -// mention and hashtag fulfil the goldmark ast.Node interface. -type mention struct { - ast.BaseInline - Segment text.Segment -} - -type hashtag struct { - ast.BaseInline - Segment text.Segment -} - -var kindMention = ast.NewNodeKind("Mention") -var kindHashtag = ast.NewNodeKind("Hashtag") - -func (n *mention) Kind() ast.NodeKind { - return kindMention -} - -func (n *hashtag) Kind() ast.NodeKind { - return kindHashtag -} - -// Dump is used by goldmark for debugging. It is implemented only minimally because -// it is not used in our code. -func (n *mention) Dump(source []byte, level int) { - ast.DumpHelper(n, source, level, nil, nil) -} - -func (n *hashtag) Dump(source []byte, level int) { - ast.DumpHelper(n, source, level, nil, nil) -} - -// newMention and newHashtag create a goldmark ast.Node from a goldmark text.Segment. -// The contained segment is used in rendering. -func newMention(s text.Segment) *mention { - return &mention{ - BaseInline: ast.BaseInline{}, - Segment: s, - } -} - -func newHashtag(s text.Segment) *hashtag { - return &hashtag{ - BaseInline: ast.BaseInline{}, - Segment: s, - } -} - -// mentionParser and hashtagParser fulfil the goldmark parser.InlineParser interface. -type mentionParser struct { -} - -type hashtagParser struct { -} - -func (p *mentionParser) Trigger() []byte { - return []byte{'@'} -} - -func (p *hashtagParser) Trigger() []byte { - return []byte{'#'} -} - -func (p *mentionParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node { - before := block.PrecendingCharacter() - line, segment := block.PeekLine() - - if !unicode.IsSpace(before) { - return nil - } - - // unideal for performance but makes use of existing regex - loc := regexes.MentionFinder.FindIndex(line) - switch { - case loc == nil: - fallthrough - case loc[0] != 0: // fail if not found at start - return nil - default: - block.Advance(loc[1]) - return newMention(segment.WithStop(segment.Start + loc[1])) - } -} - -func (p *hashtagParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node { - before := block.PrecendingCharacter() - line, segment := block.PeekLine() - s := string(line) - - if !util.IsHashtagBoundary(before) { - return nil - } - - for i, r := range s { - switch { - case r == '#' && i == 0: - continue - case !util.IsPermittedInHashtag(r) && !util.IsHashtagBoundary(r): - // Fake hashtag, don't trust it - return nil - case util.IsHashtagBoundary(r): - // End of hashtag - block.Advance(i) - return newHashtag(segment.WithStop(segment.Start + i)) - } - } - // If we don't find invalid characters before the end of the line then it's good - block.Advance(len(s)) - return newHashtag(segment) -} - -// customRenderer fulfils both the renderer.NodeRenderer and goldmark.Extender interfaces. -// It is created in FromMarkdown to be used a goldmark extension, and the fields are used -// when rendering mentions and tags. -type customRenderer struct { - f *formatter - ctx context.Context - mentions []*gtsmodel.Mention - tags []*gtsmodel.Tag -} - -func (r *customRenderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) { - reg.Register(kindMention, r.renderMention) - reg.Register(kindHashtag, r.renderHashtag) -} - -func (r *customRenderer) Extend(m goldmark.Markdown) { - m.Parser().AddOptions(parser.WithInlineParsers( - // 500 is pretty arbitrary here, it was copied from example goldmark extension code. - // https://github.com/yuin/goldmark/blob/75d8cce5b78c7e1d5d9c4ca32c1164f0a1e57b53/extension/strikethrough.go#L111 - mdutil.Prioritized(&mentionParser{}, 500), - mdutil.Prioritized(&hashtagParser{}, 500), - )) - m.Renderer().AddOptions(renderer.WithNodeRenderers( - mdutil.Prioritized(r, 500), - )) -} - -// renderMention and renderHashtag take a mention or a hashtag ast.Node and render it as HTML. -func (r *customRenderer) renderMention(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { - if !entering { - return ast.WalkContinue, nil - } - - n, ok := node.(*mention) // this function is only registered for kindMention - if !ok { - log.Errorf("type assertion failed") - } - text := string(n.Segment.Value(source)) - - html := r.f.ReplaceMentions(r.ctx, text, r.mentions) - - // we don't have much recourse if this fails - if _, err := w.WriteString(html); err != nil { - log.Errorf("error outputting markdown text: %s", err) - } - return ast.WalkContinue, nil -} - -func (r *customRenderer) renderHashtag(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { - if !entering { - return ast.WalkContinue, nil - } - - n, ok := node.(*hashtag) // this function is only registered for kindHashtag - if !ok { - log.Errorf("type assertion failed") - } - text := string(n.Segment.Value(source)) - - html := r.f.ReplaceTags(r.ctx, text, r.tags) - - // we don't have much recourse if this fails - if _, err := w.WriteString(html); err != nil { - log.Errorf("error outputting markdown text: %s", err) - } - return ast.WalkContinue, nil -} diff --git a/internal/text/minify.go b/internal/text/minify.go @@ -0,0 +1,45 @@ +/* + GoToSocial + Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +package text + +import ( + "github.com/superseriousbusiness/gotosocial/internal/log" + "github.com/tdewolff/minify/v2" + "github.com/tdewolff/minify/v2/html" +) + +var ( + m *minify.M +) + +func minifyHTML(content string) string { + if m == nil { + m = minify.New() + m.Add("text/html", &html.Minifier{ + KeepEndTags: true, + KeepQuotes: true, + }) + } + + minified, err := m.String("text/html", content) + if err != nil { + log.Errorf("error minifying HTML: %s", err) + } + return minified +} diff --git a/internal/text/plain.go b/internal/text/plain.go @@ -19,40 +19,56 @@ package text import ( + "bytes" "context" - "html" - "strings" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" + "github.com/superseriousbusiness/gotosocial/internal/log" + "github.com/yuin/goldmark" + "github.com/yuin/goldmark/extension" + "github.com/yuin/goldmark/parser" + "github.com/yuin/goldmark/renderer/html" + "github.com/yuin/goldmark/util" ) -// breakReplacer replaces new-lines with HTML breaks. -var breakReplacer = strings.NewReplacer( - "\r\n", "<br/>", - "\n", "<br/>", -) - -func (f *formatter) FromPlain(ctx context.Context, plain string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag) string { - // trim any crap - content := strings.TrimSpace(plain) - - // clean 'er up - content = html.EscapeString(content) - - // format links nicely - content = f.ReplaceLinks(ctx, content) +func (f *formatter) FromPlain(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, plain string) *FormatResult { + result := &FormatResult{ + Mentions: []*gtsmodel.Mention{}, + Tags: []*gtsmodel.Tag{}, + Emojis: []*gtsmodel.Emoji{}, + } - // format tags nicely - content = f.ReplaceTags(ctx, content, tags) + // parse markdown text into html, using custom renderer to add hashtag/mention links + md := goldmark.New( + goldmark.WithRendererOptions( + html.WithXHTML(), + html.WithHardWraps(), + ), + goldmark.WithParser( + parser.NewParser( + parser.WithBlockParsers( + util.Prioritized(newPlaintextParser(), 500), + ), + ), + ), + goldmark.WithExtensions( + &customRenderer{f, ctx, pmf, authorID, statusID, false, result}, + extension.Linkify, // turns URLs into links + ), + ) - // format mentions nicely - content = f.ReplaceMentions(ctx, content, mentions) + var htmlContentBytes bytes.Buffer + err := md.Convert([]byte(plain), &htmlContentBytes) + if err != nil { + log.Errorf("error formatting plaintext to HTML: %s", err) + } + result.HTML = htmlContentBytes.String() - // replace newlines with breaks - content = breakReplacer.Replace(content) + // clean anything dangerous out of the HTML + result.HTML = SanitizeHTML(result.HTML) - // wrap the whole thing in a pee - content = `<p>` + content + `</p>` + // shrink ray + result.HTML = minifyHTML(result.HTML) - return SanitizeHTML(content) + return result } diff --git a/internal/text/plain_test.go b/internal/text/plain_test.go @@ -19,22 +19,21 @@ package text_test import ( - "context" "testing" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" - "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" ) const ( - simple = "this is a plain and simple status" - simpleExpected = "<p>this is a plain and simple status</p>" - withTag = "here's a simple status that uses hashtag #welcome!" - withTagExpected = "<p>here&#39;s a simple status that uses hashtag <a href=\"http://localhost:8080/tags/welcome\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>welcome</span></a>!</p>" - withHTML = "<div>blah this should just be html escaped blah</div>" - withHTMLExpected = "<p>&lt;div&gt;blah this should just be html escaped blah&lt;/div&gt;</p>" - moreComplex = "Another test @foss_satan@fossbros-anonymous.io\n\n#Hashtag\n\nText" - moreComplexFull = "<p>Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>foss_satan</span></a></span><br/><br/><a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>Hashtag</span></a><br/><br/>Text</p>" + simple = "this is a plain and simple status" + simpleExpected = "<p>this is a plain and simple status</p>" + withTag = "here's a simple status that uses hashtag #welcome!" + withTagExpected = "<p>here's a simple status that uses hashtag <a href=\"http://localhost:8080/tags/welcome\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>welcome</span></a>!</p>" + withHTML = "<div>blah this should just be html escaped blah</div>" + withHTMLExpected = "<p>&lt;div>blah this should just be html escaped blah&lt;/div></p>" + moreComplex = "Another test @foss_satan@fossbros-anonymous.io\n\n#Hashtag\n\nText\n\n:rainbow:" + moreComplexExpected = "<p>Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>foss_satan</span></a></span><br><br><a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>Hashtag</span></a><br><br>Text<br><br>:rainbow:</p>" ) type PlainTestSuite struct { @@ -42,35 +41,105 @@ type PlainTestSuite struct { } func (suite *PlainTestSuite) TestParseSimple() { - f := suite.formatter.FromPlain(context.Background(), simple, nil, nil) - suite.Equal(simpleExpected, f) + formatted := suite.FromPlain(simple) + suite.Equal(simpleExpected, formatted.HTML) } func (suite *PlainTestSuite) TestParseWithTag() { - foundTags := []*gtsmodel.Tag{ - suite.testTags["welcome"], - } - - f := suite.formatter.FromPlain(context.Background(), withTag, nil, foundTags) - suite.Equal(withTagExpected, f) + formatted := suite.FromPlain(withTag) + suite.Equal(withTagExpected, formatted.HTML) } func (suite *PlainTestSuite) TestParseWithHTML() { - f := suite.formatter.FromPlain(context.Background(), withHTML, nil, nil) - suite.Equal(withHTMLExpected, f) + formatted := suite.FromPlain(withHTML) + suite.Equal(withHTMLExpected, formatted.HTML) } func (suite *PlainTestSuite) TestParseMoreComplex() { - foundTags := []*gtsmodel.Tag{ - suite.testTags["Hashtag"], - } + formatted := suite.FromPlain(moreComplex) + suite.Equal(moreComplexExpected, formatted.HTML) +} + +func (suite *PlainTestSuite) TestLinkNoMention() { + statusText := `here's a link to a post by zork + +https://example.com/@the_mighty_zork/statuses/01FGVP55XMF2K6316MQRX6PFG1 + +that link shouldn't come out formatted as a mention!` + + menchies := suite.FromPlain(statusText).Mentions + suite.Empty(menchies) +} + +func (suite *PlainTestSuite) TestDeriveMentionsEmpty() { + statusText := `` + menchies := suite.FromPlain(statusText).Mentions + assert.Len(suite.T(), menchies, 0) +} + +func (suite *PlainTestSuite) TestDeriveHashtagsOK() { + statusText := `weeeeeeee #testing123 #also testing + +# testing this one shouldn't work + + #thisshouldwork #dupe #dupe!! #dupe + + here's a link with a fragment: https://example.org/whatever#ahhh + here's another link with a fragment: https://example.org/whatever/#ahhh - foundMentions := []*gtsmodel.Mention{ - suite.testMentions["zork_mention_foss_satan"], - } +(#ThisShouldAlsoWork) #this_should_be_split + +#111111 thisalsoshouldn'twork#### ## + +#alimentación, #saúde, #lävistää, #ö, #네 +#ThisOneIsThirtyOneCharactersLon... ...ng +#ThisOneIsThirteyCharactersLong +` + + tags := suite.FromPlain(statusText).Tags + assert.Len(suite.T(), tags, 13) + assert.Equal(suite.T(), "testing123", tags[0].Name) + assert.Equal(suite.T(), "also", tags[1].Name) + assert.Equal(suite.T(), "thisshouldwork", tags[2].Name) + assert.Equal(suite.T(), "dupe", tags[3].Name) + assert.Equal(suite.T(), "ThisShouldAlsoWork", tags[4].Name) + assert.Equal(suite.T(), "this", tags[5].Name) + assert.Equal(suite.T(), "111111", tags[6].Name) + assert.Equal(suite.T(), "alimentación", tags[7].Name) + assert.Equal(suite.T(), "saúde", tags[8].Name) + assert.Equal(suite.T(), "lävistää", tags[9].Name) + assert.Equal(suite.T(), "ö", tags[10].Name) + assert.Equal(suite.T(), "네", tags[11].Name) + assert.Equal(suite.T(), "ThisOneIsThirteyCharactersLong", tags[12].Name) + + statusText = `#올빼미 hej` + tags = suite.FromPlain(statusText).Tags + assert.Equal(suite.T(), "올빼미", tags[0].Name) +} + +func (suite *PlainTestSuite) TestDeriveMultiple() { + statusText := `Another test @foss_satan@fossbros-anonymous.io + + #Hashtag + + Text` + + f := suite.FromPlain(statusText) + + assert.Len(suite.T(), f.Mentions, 1) + assert.Equal(suite.T(), "@foss_satan@fossbros-anonymous.io", f.Mentions[0].NameString) + + assert.Len(suite.T(), f.Tags, 1) + assert.Equal(suite.T(), "Hashtag", f.Tags[0].Name) + + assert.Len(suite.T(), f.Emojis, 0) +} - f := suite.formatter.FromPlain(context.Background(), moreComplex, foundMentions, foundTags) - suite.Equal(moreComplexFull, f) +func (suite *PlainTestSuite) TestZalgoHashtag() { + statusText := `yo who else loves #praying to #z̸͉̅a̸͚͋l̵͈̊g̸̫͌ỏ̷̪?` + f := suite.FromPlain(statusText) + assert.Len(suite.T(), f.Tags, 1) + assert.Equal(suite.T(), "praying", f.Tags[0].Name) } func TestPlainTestSuite(t *testing.T) { diff --git a/internal/text/replace.go b/internal/text/replace.go @@ -0,0 +1,141 @@ +/* + GoToSocial + Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +package text + +import ( + "errors" + "github.com/superseriousbusiness/gotosocial/internal/db" + "github.com/superseriousbusiness/gotosocial/internal/log" + "github.com/superseriousbusiness/gotosocial/internal/util" + "golang.org/x/text/unicode/norm" + "strings" +) + +const ( + maximumHashtagLength = 30 +) + +// given a mention or a hashtag string, the methods in this file will attempt to parse it, +// add it to the database, and render it as HTML. If any of these steps fails, the method +// will just return the original string and log an error. + +// replaceMention takes a string in the form @username@domain.com or @localusername +func (r *customRenderer) replaceMention(text string) string { + menchie, err := r.parseMention(r.ctx, text, r.accountID, r.statusID) + if err != nil { + log.Errorf("error parsing mention %s from status: %s", text, err) + return text + } + + if r.statusID != "" { + if err := r.f.db.Put(r.ctx, menchie); err != nil { + log.Errorf("error putting mention in db: %s", err) + return text + } + } + + // only append if it's not been listed yet + listed := false + for _, m := range r.result.Mentions { + if menchie.ID == m.ID { + listed = true + break + } + } + if !listed { + r.result.Mentions = append(r.result.Mentions, menchie) + } + + // make sure we have an account attached to this mention + if menchie.TargetAccount == nil { + a, err := r.f.db.GetAccountByID(r.ctx, menchie.TargetAccountID) + if err != nil { + log.Errorf("error getting account with id %s from the db: %s", menchie.TargetAccountID, err) + return text + } + menchie.TargetAccount = a + } + + // The mention's target is our target + targetAccount := menchie.TargetAccount + + var b strings.Builder + + // replace the mention with the formatted mention content + // <span class="h-card"><a href="targetAccount.URL" class="u-url mention">@<span>targetAccount.Username</span></a></span> + b.WriteString(`<span class="h-card"><a href="`) + b.WriteString(targetAccount.URL) + b.WriteString(`" class="u-url mention">@<span>`) + b.WriteString(targetAccount.Username) + b.WriteString(`</span></a></span>`) + return b.String() +} + +// replaceMention takes a string in the form #HashedTag, and will normalize it before +// adding it to the db and turning it into HTML. +func (r *customRenderer) replaceHashtag(text string) string { + // this normalization is specifically to avoid cases where visually-identical + // hashtags are stored with different unicode representations (e.g. with combining + // diacritics). It allows a tasteful number of combining diacritics to be used, + // as long as they can be combined with parent characters to form regular letter + // symbols. + normalized := norm.NFC.String(text[1:]) + + for i, r := range normalized { + if i >= maximumHashtagLength || !util.IsPermittedInHashtag(r) { + return text + } + } + + tag, err := r.f.db.TagStringToTag(r.ctx, normalized, r.accountID) + if err != nil { + log.Errorf("error generating hashtags from status: %s", err) + return text + } + + // only append if it's not been listed yet + listed := false + for _, t := range r.result.Tags { + if tag.ID == t.ID { + listed = true + break + } + } + if !listed { + err = r.f.db.Put(r.ctx, tag) + if err != nil { + if !errors.Is(err, db.ErrAlreadyExists) { + log.Errorf("error putting tags in db: %s", err) + return text + } + } + r.result.Tags = append(r.result.Tags, tag) + } + + var b strings.Builder + // replace the #tag with the formatted tag content + // `<a href="tag.URL" class="mention hashtag" rel="tag">#<span>tagAsEntered</span></a> + b.WriteString(`<a href="`) + b.WriteString(tag.URL) + b.WriteString(`" class="mention hashtag" rel="tag">#<span>`) + b.WriteString(normalized) + b.WriteString(`</span></a>`) + + return b.String() +} diff --git a/internal/util/statustools.go b/internal/util/statustools.go @@ -20,115 +20,19 @@ package util import ( "unicode" - "unicode/utf8" - - "github.com/superseriousbusiness/gotosocial/internal/regexes" -) - -const ( - maximumHashtagLength = 30 ) -// DeriveMentionNamesFromText takes a plaintext (ie., not html-formatted) text, -// and applies a regex to it to return a deduplicated list of account names -// mentioned in that text, in the format "@user@example.org" or "@username" for -// local users. -func DeriveMentionNamesFromText(text string) []string { - mentionedAccounts := []string{} - for _, m := range regexes.MentionFinder.FindAllStringSubmatch(text, -1) { - mentionedAccounts = append(mentionedAccounts, m[1]) - } - return UniqueStrings(mentionedAccounts) -} - -type Pair[A, B any] struct { - First A - Second B -} - -// Byte index in original string -// `First` includes `#`. -type Span = Pair[int, int] - -// Takes a plaintext (ie., not HTML-formatted) text, -// and returns a slice of unique hashtags. -func DeriveHashtagsFromText(text string) []string { - tagsMap := make(map[string]bool) - tags := []string{} - - for _, v := range FindHashtagSpansInText(text) { - t := text[v.First+1 : v.Second] - if _, value := tagsMap[t]; !value { - tagsMap[t] = true - tags = append(tags, t) - } - } - - return tags -} - -// Takes a plaintext (ie., not HTML-formatted) text, -// and returns a list of pairs of indices into the original string, where -// hashtags are located. -func FindHashtagSpansInText(text string) []Span { - tags := []Span{} - start := 0 - // Keep one rune of lookbehind. - prev := ' ' - inTag := false - - for i, r := range text { - if r == '#' && IsHashtagBoundary(prev) { - // Start of hashtag. - inTag = true - start = i - } else if inTag && !IsPermittedInHashtag(r) && !IsHashtagBoundary(r) { - // Inside the hashtag, but it was a phoney, gottem. - inTag = false - } else if inTag && IsHashtagBoundary(r) { - // End of hashtag. - inTag = false - appendTag(&tags, text, start, i) - } else if irl := i + utf8.RuneLen(r); inTag && irl == len(text) { - // End of text. - appendTag(&tags, text, start, irl) - } - - prev = r - } - - return tags -} - -func appendTag(tags *[]Span, text string, start int, end int) { - l := end - start - 1 - // This check could be moved out into the parsing loop if necessary! - if 0 < l && l <= maximumHashtagLength { - *tags = append(*tags, Span{First: start, Second: end}) - } -} - -// DeriveEmojisFromText takes a plaintext (ie., not html-formatted) text, -// and applies a regex to it to return a deduplicated list of emojis -// used in that text, without the surrounding `::` -func DeriveEmojisFromText(text string) []string { - emojis := []string{} - for _, m := range regexes.EmojiFinder.FindAllStringSubmatch(text, -1) { - emojis = append(emojis, m[1]) - } - return UniqueStrings(emojis) +func IsPlausiblyInHashtag(r rune) bool { + // Marks are allowed during parsing, prior to normalization, but not after, + // since they may be combined into letters during normalization. + return unicode.IsLetter(r) || unicode.IsNumber(r) || unicode.IsMark(r) } func IsPermittedInHashtag(r rune) bool { return unicode.IsLetter(r) || unicode.IsNumber(r) } -// Decides where to break before or after a hashtag. -func IsHashtagBoundary(r rune) bool { - return r == '#' || // `###lol` should work - unicode.IsSpace(r) || // All kinds of Unicode whitespace. - unicode.IsControl(r) || // All kinds of control characters, like tab. - // Most kinds of punctuation except "Pc" ("Punctuation, connecting", like `_`). - // But `someurl/#fragment` should not match, neither should HTML entities like `&#35;`. - ('/' != r && '&' != r && !unicode.Is(unicode.Categories["Pc"], r) && unicode.IsPunct(r)) +// Decides where to break before or after a #hashtag or @mention +func IsMentionOrHashtagBoundary(r rune) bool { + return unicode.IsSpace(r) || unicode.IsPunct(r) } diff --git a/internal/util/statustools_test.go b/internal/util/statustools_test.go @@ -1,173 +0,0 @@ -/* - GoToSocial - Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -package util_test - -import ( - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/suite" - "github.com/superseriousbusiness/gotosocial/internal/util" -) - -type StatusTestSuite struct { - suite.Suite -} - -func (suite *StatusTestSuite) TestLinkNoMention() { - statusText := `here's a link to a post by zork: - -https://localhost:8080/@the_mighty_zork/statuses/01FGVP55XMF2K6316MQRX6PFG1 - -that link shouldn't come out formatted as a mention!` - - menchies := util.DeriveMentionNamesFromText(statusText) - suite.Empty(menchies) -} - -func (suite *StatusTestSuite) TestDeriveMentionsOK() { - statusText := `@dumpsterqueer@example.org testing testing - - is this thing on? - - @someone_else@testing.best-horse.com can you confirm? @hello@test.lgbt - - @thisisalocaluser! - - here is a duplicate mention: @hello@test.lgbt @hello@test.lgbt - - @account1@whatever.com @account2@whatever.com - - ` - - menchies := util.DeriveMentionNamesFromText(statusText) - assert.Len(suite.T(), menchies, 6) - assert.Equal(suite.T(), "@dumpsterqueer@example.org", menchies[0]) - assert.Equal(suite.T(), "@someone_else@testing.best-horse.com", menchies[1]) - assert.Equal(suite.T(), "@hello@test.lgbt", menchies[2]) - assert.Equal(suite.T(), "@thisisalocaluser", menchies[3]) - assert.Equal(suite.T(), "@account1@whatever.com", menchies[4]) - assert.Equal(suite.T(), "@account2@whatever.com", menchies[5]) -} - -func (suite *StatusTestSuite) TestDeriveMentionsEmpty() { - statusText := `` - menchies := util.DeriveMentionNamesFromText(statusText) - assert.Len(suite.T(), menchies, 0) -} - -func (suite *StatusTestSuite) TestDeriveHashtagsOK() { - statusText := `weeeeeeee #testing123 #also testing - -# testing this one shouldn't work - - #thisshouldwork #dupe #dupe!! #dupe - - here's a link with a fragment: https://example.org/whatever#ahhh - here's another link with a fragment: https://example.org/whatever/#ahhh - -(#ThisShouldAlsoWork) #not_this_though - -#111111 thisalsoshouldn'twork#### ## - -#alimentación, #saúde, #lävistää, #ö, #네 -#ThisOneIsThirtyOneCharactersLon... ...ng -#ThisOneIsThirteyCharactersLong -` - - tags := util.DeriveHashtagsFromText(statusText) - assert.Len(suite.T(), tags, 12) - assert.Equal(suite.T(), "testing123", tags[0]) - assert.Equal(suite.T(), "also", tags[1]) - assert.Equal(suite.T(), "thisshouldwork", tags[2]) - assert.Equal(suite.T(), "dupe", tags[3]) - assert.Equal(suite.T(), "ThisShouldAlsoWork", tags[4]) - assert.Equal(suite.T(), "111111", tags[5]) - assert.Equal(suite.T(), "alimentación", tags[6]) - assert.Equal(suite.T(), "saúde", tags[7]) - assert.Equal(suite.T(), "lävistää", tags[8]) - assert.Equal(suite.T(), "ö", tags[9]) - assert.Equal(suite.T(), "네", tags[10]) - assert.Equal(suite.T(), "ThisOneIsThirteyCharactersLong", tags[11]) - - statusText = `#올빼미 hej` - tags = util.DeriveHashtagsFromText(statusText) - assert.Equal(suite.T(), "올빼미", tags[0]) -} - -func (suite *StatusTestSuite) TestHashtagSpansOK() { - statusText := `#0 #3 #8aa` - - spans := util.FindHashtagSpansInText(statusText) - assert.Equal(suite.T(), 0, spans[0].First) - assert.Equal(suite.T(), 2, spans[0].Second) - assert.Equal(suite.T(), 3, spans[1].First) - assert.Equal(suite.T(), 5, spans[1].Second) - assert.Equal(suite.T(), 8, spans[2].First) - assert.Equal(suite.T(), 12, spans[2].Second) -} - -func (suite *StatusTestSuite) TestDeriveEmojiOK() { - statusText := `:test: :another: - -Here's some normal text with an :emoji: at the end - -:spaces shouldnt work: - -:emoji1::emoji2: - -:anotheremoji:emoji2: -:anotheremoji::anotheremoji::anotheremoji::anotheremoji: -:underscores_ok_too: -` - - tags := util.DeriveEmojisFromText(statusText) - assert.Len(suite.T(), tags, 7) - assert.Equal(suite.T(), "test", tags[0]) - assert.Equal(suite.T(), "another", tags[1]) - assert.Equal(suite.T(), "emoji", tags[2]) - assert.Equal(suite.T(), "emoji1", tags[3]) - assert.Equal(suite.T(), "emoji2", tags[4]) - assert.Equal(suite.T(), "anotheremoji", tags[5]) - assert.Equal(suite.T(), "underscores_ok_too", tags[6]) -} - -func (suite *StatusTestSuite) TestDeriveMultiple() { - statusText := `Another test @foss_satan@fossbros-anonymous.io - - #HashTag - - Text` - - ms := util.DeriveMentionNamesFromText(statusText) - hs := util.DeriveHashtagsFromText(statusText) - es := util.DeriveEmojisFromText(statusText) - - assert.Len(suite.T(), ms, 1) - assert.Equal(suite.T(), "@foss_satan@fossbros-anonymous.io", ms[0]) - - assert.Len(suite.T(), hs, 1) - assert.Contains(suite.T(), hs, "HashTag") - - assert.Len(suite.T(), es, 0) -} - -func TestStatusTestSuite(t *testing.T) { - suite.Run(t, new(StatusTestSuite)) -}