regexes.go (8541B)
1 // GoToSocial 2 // Copyright (C) GoToSocial Authors admin@gotosocial.org 3 // SPDX-License-Identifier: AGPL-3.0-or-later 4 // 5 // This program is free software: you can redistribute it and/or modify 6 // it under the terms of the GNU Affero General Public License as published by 7 // the Free Software Foundation, either version 3 of the License, or 8 // (at your option) any later version. 9 // 10 // This program is distributed in the hope that it will be useful, 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 // GNU Affero General Public License for more details. 14 // 15 // You should have received a copy of the GNU Affero General Public License 16 // along with this program. If not, see <http://www.gnu.org/licenses/>. 17 18 package regexes 19 20 import ( 21 "bytes" 22 "regexp" 23 "sync" 24 25 "mvdan.cc/xurls/v2" 26 ) 27 28 const ( 29 users = "users" 30 actors = "actors" 31 statuses = "statuses" 32 inbox = "inbox" 33 outbox = "outbox" 34 followers = "followers" 35 following = "following" 36 liked = "liked" 37 publicKey = "main-key" 38 follow = "follow" 39 blocks = "blocks" 40 reports = "reports" 41 42 schemes = `(http|https)://` // Allowed URI protocols for parsing links in text. 43 alphaNumeric = `\p{L}\p{M}*|\p{N}` // A single number or script character in any language, including chars with accents. 44 usernameGrp = `(?:` + alphaNumeric + `|\.|\-|\_)` // Non-capturing group that matches against a single valid username character. 45 domainGrp = `(?:` + alphaNumeric + `|\.|\-|\:)` // Non-capturing group that matches against a single valid domain character. 46 mentionName = `^@(` + usernameGrp + `+)(?:@(` + domainGrp + `+))?$` // Extract parts of one mention, maybe including domain. 47 mentionFinder = `(?:^|\s)(@` + usernameGrp + `+(?:@` + domainGrp + `+)?)` // Extract all mentions from a text, each mention may include domain. 48 emojiShortcode = `\w{2,30}` // Pattern for emoji shortcodes. maximumEmojiShortcodeLength = 30 49 emojiFinder = `(?:\b)?:(` + emojiShortcode + `):(?:\b)?` // Extract all emoji shortcodes from a text. 50 usernameStrict = `^[a-z0-9_]{1,64}$` // Pattern for usernames on THIS instance. maximumUsernameLength = 64 51 usernameRelaxed = `[a-z0-9_\.]{1,}` // Relaxed version of username that can match instance accounts too. 52 misskeyReportNotesFinder = `(?m)(?:^Note: ((?:http|https):\/\/.*)$)` // Extract reported Note URIs from the text of a Misskey report/flag. 53 ulid = `[0123456789ABCDEFGHJKMNPQRSTVWXYZ]{26}` // Pattern for ULID. 54 ulidValidate = `^` + ulid + `$` // Validate one ULID. 55 56 /* 57 Path parts / capture. 58 */ 59 60 userPathPrefix = `^/?` + users + `/(` + usernameRelaxed + `)` 61 userPath = userPathPrefix + `$` 62 publicKeyPath = userPathPrefix + `/` + publicKey + `$` 63 inboxPath = userPathPrefix + `/` + inbox + `$` 64 outboxPath = userPathPrefix + `/` + outbox + `$` 65 followersPath = userPathPrefix + `/` + followers + `$` 66 followingPath = userPathPrefix + `/` + following + `$` 67 likedPath = userPathPrefix + `/` + liked + `$` 68 followPath = userPathPrefix + `/` + follow + `/(` + ulid + `)$` 69 likePath = userPathPrefix + `/` + liked + `/(` + ulid + `)$` 70 statusesPath = userPathPrefix + `/` + statuses + `/(` + ulid + `)$` 71 blockPath = userPathPrefix + `/` + blocks + `/(` + ulid + `)$` 72 reportPath = `^/?` + reports + `/(` + ulid + `)$` 73 filePath = `^/?(` + ulid + `)/([a-z]+)/([a-z]+)/(` + ulid + `)\.([a-z]+)$` 74 ) 75 76 var ( 77 // LinkScheme captures http/https schemes in URLs. 78 LinkScheme = func() *regexp.Regexp { 79 rgx, err := xurls.StrictMatchingScheme(schemes) 80 if err != nil { 81 panic(err) 82 } 83 return rgx 84 }() 85 86 // MentionName captures the username and domain part from 87 // a mention string such as @whatever_user@example.org, 88 // returning whatever_user and example.org (without the @ symbols). 89 // Will also work for characters with umlauts and other accents. 90 // See: https://regex101.com/r/9tjNUy/1 for explanation and examples. 91 MentionName = regexp.MustCompile(mentionName) 92 93 // MentionFinder extracts whole mentions from a piece of text. 94 MentionFinder = regexp.MustCompile(mentionFinder) 95 96 // EmojiShortcode validates an emoji name. 97 EmojiShortcode = regexp.MustCompile(emojiShortcode) 98 99 // EmojiFinder extracts emoji strings from a piece of text. 100 // See: https://regex101.com/r/478XGM/1 101 EmojiFinder = regexp.MustCompile(emojiFinder) 102 103 // Username can be used to validate usernames of new signups on this instance. 104 Username = regexp.MustCompile(usernameStrict) 105 106 // MisskeyReportNotes captures a list of Note URIs from report content created by Misskey. 107 // See: https://regex101.com/r/EnTOBV/1 108 MisskeyReportNotes = regexp.MustCompile(misskeyReportNotesFinder) 109 110 // UserPath validates and captures the username part from eg /users/example_username. 111 UserPath = regexp.MustCompile(userPath) 112 113 // PublicKeyPath parses a path that validates and captures the username part from eg /users/example_username/main-key 114 PublicKeyPath = regexp.MustCompile(publicKeyPath) 115 116 // InboxPath parses a path that validates and captures the username part from eg /users/example_username/inbox 117 InboxPath = regexp.MustCompile(inboxPath) 118 119 // OutboxPath parses a path that validates and captures the username part from eg /users/example_username/outbox 120 OutboxPath = regexp.MustCompile(outboxPath) 121 122 // FollowersPath parses a path that validates and captures the username part from eg /users/example_username/followers 123 FollowersPath = regexp.MustCompile(followersPath) 124 125 // FollowingPath parses a path that validates and captures the username part from eg /users/example_username/following 126 FollowingPath = regexp.MustCompile(followingPath) 127 128 // LikedPath parses a path that validates and captures the username part from eg /users/example_username/liked 129 LikedPath = regexp.MustCompile(likedPath) 130 131 // ULID parses and validate a ULID. 132 ULID = regexp.MustCompile(ulidValidate) 133 134 // FollowPath parses a path that validates and captures the username part and the ulid part 135 // from eg /users/example_username/follow/01F7XT5JZW1WMVSW1KADS8PVDH 136 FollowPath = regexp.MustCompile(followPath) 137 138 // LikePath parses a path that validates and captures the username part and the ulid part 139 // from eg /users/example_username/liked/01F7XT5JZW1WMVSW1KADS8PVDH 140 LikePath = regexp.MustCompile(likePath) 141 142 // StatusesPath parses a path that validates and captures the username part and the ulid part 143 // from eg /users/example_username/statuses/01F7XT5JZW1WMVSW1KADS8PVDH 144 // The regex can be played with here: https://regex101.com/r/G9zuxQ/1 145 StatusesPath = regexp.MustCompile(statusesPath) 146 147 // BlockPath parses a path that validates and captures the username part and the ulid part 148 // from eg /users/example_username/blocks/01F7XT5JZW1WMVSW1KADS8PVDH 149 BlockPath = regexp.MustCompile(blockPath) 150 151 // ReportPath parses a path that validates and captures the ulid part 152 // from eg /reports/01GP3AWY4CRDVRNZKW0TEAMB5R 153 ReportPath = regexp.MustCompile(reportPath) 154 155 // FilePath parses a file storage path of the form [ACCOUNT_ID]/[MEDIA_TYPE]/[MEDIA_SIZE]/[FILE_NAME] 156 // eg 01F8MH1H7YV1Z7D2C8K2730QBF/attachment/small/01F8MH8RMYQ6MSNY3JM2XT1CQ5.jpeg 157 // It captures the account id, media type, media size, file name, and file extension, eg 158 // `01F8MH1H7YV1Z7D2C8K2730QBF`, `attachment`, `small`, `01F8MH8RMYQ6MSNY3JM2XT1CQ5`, `jpeg`. 159 FilePath = regexp.MustCompile(filePath) 160 ) 161 162 // bufpool is a memory pool of byte buffers for use in our regex utility functions. 163 var bufpool = sync.Pool{ 164 New: func() any { 165 buf := bytes.NewBuffer(make([]byte, 0, 512)) 166 return buf 167 }, 168 } 169 170 // ReplaceAllStringFunc will call through to .ReplaceAllStringFunc in the provided regex, but provide you a clean byte buffer for optimized string writes. 171 func ReplaceAllStringFunc(rgx *regexp.Regexp, src string, repl func(match string, buf *bytes.Buffer) string) string { 172 buf := bufpool.Get().(*bytes.Buffer) //nolint 173 defer bufpool.Put(buf) 174 return rgx.ReplaceAllStringFunc(src, func(match string) string { 175 buf.Reset() // reset use 176 return repl(match, buf) 177 }) 178 }