sanitize.go (3025B)
1 // GoToSocial 2 // Copyright (C) GoToSocial Authors admin@gotosocial.org 3 // SPDX-License-Identifier: AGPL-3.0-or-later 4 // 5 // This program is free software: you can redistribute it and/or modify 6 // it under the terms of the GNU Affero General Public License as published by 7 // the Free Software Foundation, either version 3 of the License, or 8 // (at your option) any later version. 9 // 10 // This program is distributed in the hope that it will be useful, 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 // GNU Affero General Public License for more details. 14 // 15 // You should have received a copy of the GNU Affero General Public License 16 // along with this program. If not, see <http://www.gnu.org/licenses/>. 17 18 package text 19 20 import ( 21 "html" 22 "regexp" 23 "strings" 24 25 "github.com/microcosm-cc/bluemonday" 26 ) 27 28 // '[A]llows a broad selection of HTML elements and attributes that are safe for user generated content. 29 // Note that this policy does not allow iframes, object, embed, styles, script, etc. 30 // An example usage scenario would be blog post bodies where a variety of formatting is expected along with the potential for TABLEs and IMGs.' 31 // 32 // Source: https://github.com/microcosm-cc/bluemonday#usage 33 var regular *bluemonday.Policy = bluemonday.UGCPolicy(). 34 RequireNoReferrerOnLinks(true). 35 RequireNoFollowOnLinks(false). // remove the global default which adds rel="nofollow" to all links including local relative 36 RequireNoFollowOnFullyQualifiedLinks(true). // add rel="nofollow" on all external links 37 RequireCrossOriginAnonymous(true). 38 AddTargetBlankToFullyQualifiedLinks(true). 39 AllowAttrs("class", "href", "rel").OnElements("a"). 40 AllowAttrs("class").OnElements("span"). 41 AllowAttrs("class").Matching(regexp.MustCompile("^language-[a-zA-Z0-9]+$")).OnElements("code"). 42 SkipElementsContent("code", "pre") 43 44 // '[C]an be thought of as equivalent to stripping all HTML elements and their attributes as it has nothing on its allowlist. 45 // An example usage scenario would be blog post titles where HTML tags are not expected at all 46 // and if they are then the elements and the content of the elements should be stripped. This is a very strict policy.' 47 // 48 // Source: https://github.com/microcosm-cc/bluemonday#usage 49 var strict *bluemonday.Policy = bluemonday.StrictPolicy() 50 51 // removeHTML strictly removes *all* recognized HTML elements from the given string. 52 func removeHTML(in string) string { 53 return strict.Sanitize(in) 54 } 55 56 // SanitizeHTML sanitizes risky html elements from the given string, allowing only safe ones through. 57 func SanitizeHTML(in string) string { 58 return regular.Sanitize(in) 59 } 60 61 // SanitizePlaintext runs text through basic sanitization. This removes 62 // any html elements that were in the string, and returns clean plaintext. 63 func SanitizePlaintext(in string) string { 64 content := html.UnescapeString(in) 65 content = removeHTML(content) 66 content = html.UnescapeString(content) 67 return strings.TrimSpace(content) 68 }