gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

sanitize.go (3025B)


      1 // GoToSocial
      2 // Copyright (C) GoToSocial Authors admin@gotosocial.org
      3 // SPDX-License-Identifier: AGPL-3.0-or-later
      4 //
      5 // This program is free software: you can redistribute it and/or modify
      6 // it under the terms of the GNU Affero General Public License as published by
      7 // the Free Software Foundation, either version 3 of the License, or
      8 // (at your option) any later version.
      9 //
     10 // This program is distributed in the hope that it will be useful,
     11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
     12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     13 // GNU Affero General Public License for more details.
     14 //
     15 // You should have received a copy of the GNU Affero General Public License
     16 // along with this program.  If not, see <http://www.gnu.org/licenses/>.
     17 
     18 package text
     19 
     20 import (
     21 	"html"
     22 	"regexp"
     23 	"strings"
     24 
     25 	"github.com/microcosm-cc/bluemonday"
     26 )
     27 
     28 // '[A]llows a broad selection of HTML elements and attributes that are safe for user generated content.
     29 // Note that this policy does not allow iframes, object, embed, styles, script, etc.
     30 // An example usage scenario would be blog post bodies where a variety of formatting is expected along with the potential for TABLEs and IMGs.'
     31 //
     32 // Source: https://github.com/microcosm-cc/bluemonday#usage
     33 var regular *bluemonday.Policy = bluemonday.UGCPolicy().
     34 	RequireNoReferrerOnLinks(true).
     35 	RequireNoFollowOnLinks(false).              // remove the global default which adds rel="nofollow" to all links including local relative
     36 	RequireNoFollowOnFullyQualifiedLinks(true). // add rel="nofollow" on all external links
     37 	RequireCrossOriginAnonymous(true).
     38 	AddTargetBlankToFullyQualifiedLinks(true).
     39 	AllowAttrs("class", "href", "rel").OnElements("a").
     40 	AllowAttrs("class").OnElements("span").
     41 	AllowAttrs("class").Matching(regexp.MustCompile("^language-[a-zA-Z0-9]+$")).OnElements("code").
     42 	SkipElementsContent("code", "pre")
     43 
     44 // '[C]an be thought of as equivalent to stripping all HTML elements and their attributes as it has nothing on its allowlist.
     45 // An example usage scenario would be blog post titles where HTML tags are not expected at all
     46 // and if they are then the elements and the content of the elements should be stripped. This is a very strict policy.'
     47 //
     48 // Source: https://github.com/microcosm-cc/bluemonday#usage
     49 var strict *bluemonday.Policy = bluemonday.StrictPolicy()
     50 
     51 // removeHTML strictly removes *all* recognized HTML elements from the given string.
     52 func removeHTML(in string) string {
     53 	return strict.Sanitize(in)
     54 }
     55 
     56 // SanitizeHTML sanitizes risky html elements from the given string, allowing only safe ones through.
     57 func SanitizeHTML(in string) string {
     58 	return regular.Sanitize(in)
     59 }
     60 
     61 // SanitizePlaintext runs text through basic sanitization. This removes
     62 // any html elements that were in the string, and returns clean plaintext.
     63 func SanitizePlaintext(in string) string {
     64 	content := html.UnescapeString(in)
     65 	content = removeHTML(content)
     66 	content = html.UnescapeString(content)
     67 	return strings.TrimSpace(content)
     68 }