commit 664713ddd4f7236fde0759cf7a0e04a434417876
parent 9e7d022a06779a03e3eaaadad6cc33423f46892b
Author: tobi <31960611+tsmethurst@users.noreply.github.com>
Date: Sun, 3 Jul 2022 11:03:03 +0200
[bugfix] Make hashtag regex work with non-ascii characters (#682)
Diffstat:
2 files changed, 9 insertions(+), 4 deletions(-)
diff --git a/internal/regexes/regexes.go b/internal/regexes/regexes.go
@@ -71,8 +71,8 @@ var (
// MentionFinder extracts mentions from a piece of text.
MentionFinder = regexp.MustCompile(mentionFinder)
- // hashtag regex can be played with here: https://regex101.com/r/bPxeca/1
- hashtagFinder = fmt.Sprintf(`(?:^|\s)(?:#*)(#[a-zA-Z0-9]{1,%d})(?:#|\b)`, maximumHashtagLength)
+ // hashtag regex can be played with here: https://regex101.com/r/bpyGlj/1
+ hashtagFinder = fmt.Sprintf(`(?:^|\s)(?:#*)(#[\p{L}\p{N}]{1,%d})(?:#|\b)`, maximumHashtagLength)
// HashtagFinder finds possible hashtags in a string.
// It returns just the string part of the hashtag, not the # symbol.
HashtagFinder = regexp.MustCompile(hashtagFinder)
diff --git a/internal/util/statustools_test.go b/internal/util/statustools_test.go
@@ -83,15 +83,20 @@ func (suite *StatusTestSuite) TestDeriveHashtagsOK() {
#ThisShouldAlsoWork #not_this_though
-#111111 thisalsoshouldn'twork#### ##`
+#111111 thisalsoshouldn'twork#### ##
+
+#alimentación, #saúde
+`
tags := util.DeriveHashtagsFromText(statusText)
- assert.Len(suite.T(), tags, 5)
+ assert.Len(suite.T(), tags, 7)
assert.Equal(suite.T(), "testing123", tags[0])
assert.Equal(suite.T(), "also", tags[1])
assert.Equal(suite.T(), "thisshouldwork", tags[2])
assert.Equal(suite.T(), "ThisShouldAlsoWork", tags[3])
assert.Equal(suite.T(), "111111", tags[4])
+ assert.Equal(suite.T(), "alimentación", tags[5])
+ assert.Equal(suite.T(), "saúde", tags[6])
}
func (suite *StatusTestSuite) TestDeriveEmojiOK() {