gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

regexes.go (8541B)


      1 // GoToSocial
      2 // Copyright (C) GoToSocial Authors admin@gotosocial.org
      3 // SPDX-License-Identifier: AGPL-3.0-or-later
      4 //
      5 // This program is free software: you can redistribute it and/or modify
      6 // it under the terms of the GNU Affero General Public License as published by
      7 // the Free Software Foundation, either version 3 of the License, or
      8 // (at your option) any later version.
      9 //
     10 // This program is distributed in the hope that it will be useful,
     11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
     12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     13 // GNU Affero General Public License for more details.
     14 //
     15 // You should have received a copy of the GNU Affero General Public License
     16 // along with this program.  If not, see <http://www.gnu.org/licenses/>.
     17 
     18 package regexes
     19 
     20 import (
     21 	"bytes"
     22 	"regexp"
     23 	"sync"
     24 
     25 	"mvdan.cc/xurls/v2"
     26 )
     27 
     28 const (
     29 	users     = "users"
     30 	actors    = "actors"
     31 	statuses  = "statuses"
     32 	inbox     = "inbox"
     33 	outbox    = "outbox"
     34 	followers = "followers"
     35 	following = "following"
     36 	liked     = "liked"
     37 	publicKey = "main-key"
     38 	follow    = "follow"
     39 	blocks    = "blocks"
     40 	reports   = "reports"
     41 
     42 	schemes                  = `(http|https)://`                                         // Allowed URI protocols for parsing links in text.
     43 	alphaNumeric             = `\p{L}\p{M}*|\p{N}`                                       // A single number or script character in any language, including chars with accents.
     44 	usernameGrp              = `(?:` + alphaNumeric + `|\.|\-|\_)`                       // Non-capturing group that matches against a single valid username character.
     45 	domainGrp                = `(?:` + alphaNumeric + `|\.|\-|\:)`                       // Non-capturing group that matches against a single valid domain character.
     46 	mentionName              = `^@(` + usernameGrp + `+)(?:@(` + domainGrp + `+))?$`     // Extract parts of one mention, maybe including domain.
     47 	mentionFinder            = `(?:^|\s)(@` + usernameGrp + `+(?:@` + domainGrp + `+)?)` // Extract all mentions from a text, each mention may include domain.
     48 	emojiShortcode           = `\w{2,30}`                                                // Pattern for emoji shortcodes. maximumEmojiShortcodeLength = 30
     49 	emojiFinder              = `(?:\b)?:(` + emojiShortcode + `):(?:\b)?`                // Extract all emoji shortcodes from a text.
     50 	usernameStrict           = `^[a-z0-9_]{1,64}$`                                       // Pattern for usernames on THIS instance. maximumUsernameLength = 64
     51 	usernameRelaxed          = `[a-z0-9_\.]{1,}`                                         // Relaxed version of username that can match instance accounts too.
     52 	misskeyReportNotesFinder = `(?m)(?:^Note: ((?:http|https):\/\/.*)$)`                 // Extract reported Note URIs from the text of a Misskey report/flag.
     53 	ulid                     = `[0123456789ABCDEFGHJKMNPQRSTVWXYZ]{26}`                  // Pattern for ULID.
     54 	ulidValidate             = `^` + ulid + `$`                                          // Validate one ULID.
     55 
     56 	/*
     57 		Path parts / capture.
     58 	*/
     59 
     60 	userPathPrefix = `^/?` + users + `/(` + usernameRelaxed + `)`
     61 	userPath       = userPathPrefix + `$`
     62 	publicKeyPath  = userPathPrefix + `/` + publicKey + `$`
     63 	inboxPath      = userPathPrefix + `/` + inbox + `$`
     64 	outboxPath     = userPathPrefix + `/` + outbox + `$`
     65 	followersPath  = userPathPrefix + `/` + followers + `$`
     66 	followingPath  = userPathPrefix + `/` + following + `$`
     67 	likedPath      = userPathPrefix + `/` + liked + `$`
     68 	followPath     = userPathPrefix + `/` + follow + `/(` + ulid + `)$`
     69 	likePath       = userPathPrefix + `/` + liked + `/(` + ulid + `)$`
     70 	statusesPath   = userPathPrefix + `/` + statuses + `/(` + ulid + `)$`
     71 	blockPath      = userPathPrefix + `/` + blocks + `/(` + ulid + `)$`
     72 	reportPath     = `^/?` + reports + `/(` + ulid + `)$`
     73 	filePath       = `^/?(` + ulid + `)/([a-z]+)/([a-z]+)/(` + ulid + `)\.([a-z]+)$`
     74 )
     75 
     76 var (
     77 	// LinkScheme captures http/https schemes in URLs.
     78 	LinkScheme = func() *regexp.Regexp {
     79 		rgx, err := xurls.StrictMatchingScheme(schemes)
     80 		if err != nil {
     81 			panic(err)
     82 		}
     83 		return rgx
     84 	}()
     85 
     86 	// MentionName captures the username and domain part from
     87 	// a mention string such as @whatever_user@example.org,
     88 	// returning whatever_user and example.org (without the @ symbols).
     89 	// Will also work for characters with umlauts and other accents.
     90 	// See: https://regex101.com/r/9tjNUy/1 for explanation and examples.
     91 	MentionName = regexp.MustCompile(mentionName)
     92 
     93 	// MentionFinder extracts whole mentions from a piece of text.
     94 	MentionFinder = regexp.MustCompile(mentionFinder)
     95 
     96 	// EmojiShortcode validates an emoji name.
     97 	EmojiShortcode = regexp.MustCompile(emojiShortcode)
     98 
     99 	// EmojiFinder extracts emoji strings from a piece of text.
    100 	// See: https://regex101.com/r/478XGM/1
    101 	EmojiFinder = regexp.MustCompile(emojiFinder)
    102 
    103 	// Username can be used to validate usernames of new signups on this instance.
    104 	Username = regexp.MustCompile(usernameStrict)
    105 
    106 	// MisskeyReportNotes captures a list of Note URIs from report content created by Misskey.
    107 	// See: https://regex101.com/r/EnTOBV/1
    108 	MisskeyReportNotes = regexp.MustCompile(misskeyReportNotesFinder)
    109 
    110 	// UserPath validates and captures the username part from eg /users/example_username.
    111 	UserPath = regexp.MustCompile(userPath)
    112 
    113 	// PublicKeyPath parses a path that validates and captures the username part from eg /users/example_username/main-key
    114 	PublicKeyPath = regexp.MustCompile(publicKeyPath)
    115 
    116 	// InboxPath parses a path that validates and captures the username part from eg /users/example_username/inbox
    117 	InboxPath = regexp.MustCompile(inboxPath)
    118 
    119 	// OutboxPath parses a path that validates and captures the username part from eg /users/example_username/outbox
    120 	OutboxPath = regexp.MustCompile(outboxPath)
    121 
    122 	// FollowersPath parses a path that validates and captures the username part from eg /users/example_username/followers
    123 	FollowersPath = regexp.MustCompile(followersPath)
    124 
    125 	// FollowingPath parses a path that validates and captures the username part from eg /users/example_username/following
    126 	FollowingPath = regexp.MustCompile(followingPath)
    127 
    128 	// LikedPath parses a path that validates and captures the username part from eg /users/example_username/liked
    129 	LikedPath = regexp.MustCompile(likedPath)
    130 
    131 	// ULID parses and validate a ULID.
    132 	ULID = regexp.MustCompile(ulidValidate)
    133 
    134 	// FollowPath parses a path that validates and captures the username part and the ulid part
    135 	// from eg /users/example_username/follow/01F7XT5JZW1WMVSW1KADS8PVDH
    136 	FollowPath = regexp.MustCompile(followPath)
    137 
    138 	// LikePath parses a path that validates and captures the username part and the ulid part
    139 	// from eg /users/example_username/liked/01F7XT5JZW1WMVSW1KADS8PVDH
    140 	LikePath = regexp.MustCompile(likePath)
    141 
    142 	// StatusesPath parses a path that validates and captures the username part and the ulid part
    143 	// from eg /users/example_username/statuses/01F7XT5JZW1WMVSW1KADS8PVDH
    144 	// The regex can be played with here: https://regex101.com/r/G9zuxQ/1
    145 	StatusesPath = regexp.MustCompile(statusesPath)
    146 
    147 	// BlockPath parses a path that validates and captures the username part and the ulid part
    148 	// from eg /users/example_username/blocks/01F7XT5JZW1WMVSW1KADS8PVDH
    149 	BlockPath = regexp.MustCompile(blockPath)
    150 
    151 	// ReportPath parses a path that validates and captures the ulid part
    152 	// from eg /reports/01GP3AWY4CRDVRNZKW0TEAMB5R
    153 	ReportPath = regexp.MustCompile(reportPath)
    154 
    155 	// FilePath parses a file storage path of the form [ACCOUNT_ID]/[MEDIA_TYPE]/[MEDIA_SIZE]/[FILE_NAME]
    156 	// eg 01F8MH1H7YV1Z7D2C8K2730QBF/attachment/small/01F8MH8RMYQ6MSNY3JM2XT1CQ5.jpeg
    157 	// It captures the account id, media type, media size, file name, and file extension, eg
    158 	// `01F8MH1H7YV1Z7D2C8K2730QBF`, `attachment`, `small`, `01F8MH8RMYQ6MSNY3JM2XT1CQ5`, `jpeg`.
    159 	FilePath = regexp.MustCompile(filePath)
    160 )
    161 
    162 // bufpool is a memory pool of byte buffers for use in our regex utility functions.
    163 var bufpool = sync.Pool{
    164 	New: func() any {
    165 		buf := bytes.NewBuffer(make([]byte, 0, 512))
    166 		return buf
    167 	},
    168 }
    169 
    170 // ReplaceAllStringFunc will call through to .ReplaceAllStringFunc in the provided regex, but provide you a clean byte buffer for optimized string writes.
    171 func ReplaceAllStringFunc(rgx *regexp.Regexp, src string, repl func(match string, buf *bytes.Buffer) string) string {
    172 	buf := bufpool.Get().(*bytes.Buffer) //nolint
    173 	defer bufpool.Put(buf)
    174 	return rgx.ReplaceAllStringFunc(src, func(match string) string {
    175 		buf.Reset() // reset use
    176 		return repl(match, buf)
    177 	})
    178 }