gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

helpers.go (11443B)


      1 // Copyright (c) 2014, David Kitchen <david@buro9.com>
      2 //
      3 // All rights reserved.
      4 //
      5 // Redistribution and use in source and binary forms, with or without
      6 // modification, are permitted provided that the following conditions are met:
      7 //
      8 // * Redistributions of source code must retain the above copyright notice, this
      9 //   list of conditions and the following disclaimer.
     10 //
     11 // * Redistributions in binary form must reproduce the above copyright notice,
     12 //   this list of conditions and the following disclaimer in the documentation
     13 //   and/or other materials provided with the distribution.
     14 //
     15 // * Neither the name of the organisation (Microcosm) nor the names of its
     16 //   contributors may be used to endorse or promote products derived from
     17 //   this software without specific prior written permission.
     18 //
     19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     22 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
     23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
     26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
     27 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 
     30 package bluemonday
     31 
     32 import (
     33 	"encoding/base64"
     34 	"net/url"
     35 	"regexp"
     36 )
     37 
     38 // A selection of regular expressions that can be used as .Matching() rules on
     39 // HTML attributes.
     40 var (
     41 	// CellAlign handles the `align` attribute
     42 	// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/td#attr-align
     43 	CellAlign = regexp.MustCompile(`(?i)^(center|justify|left|right|char)$`)
     44 
     45 	// CellVerticalAlign handles the `valign` attribute
     46 	// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/td#attr-valign
     47 	CellVerticalAlign = regexp.MustCompile(`(?i)^(baseline|bottom|middle|top)$`)
     48 
     49 	// Direction handles the `dir` attribute
     50 	// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/bdo#attr-dir
     51 	Direction = regexp.MustCompile(`(?i)^(rtl|ltr)$`)
     52 
     53 	// ImageAlign handles the `align` attribute on the `image` tag
     54 	// http://www.w3.org/MarkUp/Test/Img/imgtest.html
     55 	ImageAlign = regexp.MustCompile(
     56 		`(?i)^(left|right|top|texttop|middle|absmiddle|baseline|bottom|absbottom)$`,
     57 	)
     58 
     59 	// Integer describes whole positive integers (including 0) used in places
     60 	// like td.colspan
     61 	// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/td#attr-colspan
     62 	Integer = regexp.MustCompile(`^[0-9]+$`)
     63 
     64 	// ISO8601 according to the W3 group is only a subset of the ISO8601
     65 	// standard: http://www.w3.org/TR/NOTE-datetime
     66 	//
     67 	// Used in places like time.datetime
     68 	// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/time#attr-datetime
     69 	//
     70 	// Matches patterns:
     71 	//  Year:
     72 	//     YYYY (eg 1997)
     73 	//  Year and month:
     74 	//     YYYY-MM (eg 1997-07)
     75 	//  Complete date:
     76 	//     YYYY-MM-DD (eg 1997-07-16)
     77 	//  Complete date plus hours and minutes:
     78 	//     YYYY-MM-DDThh:mmTZD (eg 1997-07-16T19:20+01:00)
     79 	//  Complete date plus hours, minutes and seconds:
     80 	//     YYYY-MM-DDThh:mm:ssTZD (eg 1997-07-16T19:20:30+01:00)
     81 	//  Complete date plus hours, minutes, seconds and a decimal fraction of a
     82 	//  second
     83 	//      YYYY-MM-DDThh:mm:ss.sTZD (eg 1997-07-16T19:20:30.45+01:00)
     84 	ISO8601 = regexp.MustCompile(
     85 		`^[0-9]{4}(-[0-9]{2}(-[0-9]{2}([ T][0-9]{2}(:[0-9]{2}){1,2}(.[0-9]{1,6})` +
     86 			`?Z?([\+-][0-9]{2}:[0-9]{2})?)?)?)?$`,
     87 	)
     88 
     89 	// ListType encapsulates the common value as well as the latest spec
     90 	// values for lists
     91 	// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/ol#attr-type
     92 	ListType = regexp.MustCompile(`(?i)^(circle|disc|square|a|A|i|I|1)$`)
     93 
     94 	// SpaceSeparatedTokens is used in places like `a.rel` and the common attribute
     95 	// `class` which both contain space delimited lists of data tokens
     96 	// http://www.w3.org/TR/html-markup/datatypes.html#common.data.tokens-def
     97 	// Regexp: \p{L} matches unicode letters, \p{N} matches unicode numbers
     98 	SpaceSeparatedTokens = regexp.MustCompile(`^([\s\p{L}\p{N}_-]+)$`)
     99 
    100 	// Number is a double value used on HTML5 meter and progress elements
    101 	// http://www.whatwg.org/specs/web-apps/current-work/multipage/the-button-element.html#the-meter-element
    102 	Number = regexp.MustCompile(`^[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?$`)
    103 
    104 	// NumberOrPercent is used predominantly as units of measurement in width
    105 	// and height attributes
    106 	// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/img#attr-height
    107 	NumberOrPercent = regexp.MustCompile(`^[0-9]+[%]?$`)
    108 
    109 	// Paragraph of text in an attribute such as *.'title', img.alt, etc
    110 	// https://developer.mozilla.org/en-US/docs/Web/HTML/Global_attributes#attr-title
    111 	// Note that we are not allowing chars that could close tags like '>'
    112 	Paragraph = regexp.MustCompile(`^[\p{L}\p{N}\s\-_',\[\]!\./\\\(\)]*$`)
    113 
    114 	// dataURIImagePrefix is used by AllowDataURIImages to define the acceptable
    115 	// prefix of data URIs that contain common web image formats.
    116 	//
    117 	// This is not exported as it's not useful by itself, and only has value
    118 	// within the AllowDataURIImages func
    119 	dataURIImagePrefix = regexp.MustCompile(
    120 		`^image/(gif|jpeg|png|svg\+xml|webp);base64,`,
    121 	)
    122 )
    123 
    124 // AllowStandardURLs is a convenience function that will enable rel="nofollow"
    125 // on "a", "area" and "link" (if you have allowed those elements) and will
    126 // ensure that the URL values are parseable and either relative or belong to the
    127 // "mailto", "http", or "https" schemes
    128 func (p *Policy) AllowStandardURLs() {
    129 	// URLs must be parseable by net/url.Parse()
    130 	p.RequireParseableURLs(true)
    131 
    132 	// !url.IsAbs() is permitted
    133 	p.AllowRelativeURLs(true)
    134 
    135 	// Most common URL schemes only
    136 	p.AllowURLSchemes("mailto", "http", "https")
    137 
    138 	// For linking elements we will add rel="nofollow" if it does not already exist
    139 	// This applies to "a" "area" "link"
    140 	p.RequireNoFollowOnLinks(true)
    141 }
    142 
    143 // AllowStandardAttributes will enable "id", "title" and the language specific
    144 // attributes "dir" and "lang" on all elements that are allowed
    145 func (p *Policy) AllowStandardAttributes() {
    146 	// "dir" "lang" are permitted as both language attributes affect charsets
    147 	// and direction of text.
    148 	p.AllowAttrs("dir").Matching(Direction).Globally()
    149 	p.AllowAttrs(
    150 		"lang",
    151 	).Matching(regexp.MustCompile(`[a-zA-Z]{2,20}`)).Globally()
    152 
    153 	// "id" is permitted. This is pretty much as some HTML elements require this
    154 	// to work well ("dfn" is an example of a "id" being value)
    155 	// This does create a risk that JavaScript and CSS within your web page
    156 	// might identify the wrong elements. Ensure that you select things
    157 	// accurately
    158 	p.AllowAttrs("id").Matching(
    159 		regexp.MustCompile(`[a-zA-Z0-9\:\-_\.]+`),
    160 	).Globally()
    161 
    162 	// "title" is permitted as it improves accessibility.
    163 	p.AllowAttrs("title").Matching(Paragraph).Globally()
    164 }
    165 
    166 // AllowStyling presently enables the class attribute globally.
    167 //
    168 // Note: When bluemonday ships a CSS parser and we can safely sanitise that,
    169 // this will also allow sanitized styling of elements via the style attribute.
    170 func (p *Policy) AllowStyling() {
    171 
    172 	// "class" is permitted globally
    173 	p.AllowAttrs("class").Matching(SpaceSeparatedTokens).Globally()
    174 }
    175 
    176 // AllowImages enables the img element and some popular attributes. It will also
    177 // ensure that URL values are parseable. This helper does not enable data URI
    178 // images, for that you should also use the AllowDataURIImages() helper.
    179 func (p *Policy) AllowImages() {
    180 
    181 	// "img" is permitted
    182 	p.AllowAttrs("align").Matching(ImageAlign).OnElements("img")
    183 	p.AllowAttrs("alt").Matching(Paragraph).OnElements("img")
    184 	p.AllowAttrs("height", "width").Matching(NumberOrPercent).OnElements("img")
    185 
    186 	// Standard URLs enabled
    187 	p.AllowStandardURLs()
    188 	p.AllowAttrs("src").OnElements("img")
    189 }
    190 
    191 // AllowDataURIImages permits the use of inline images defined in RFC2397
    192 // http://tools.ietf.org/html/rfc2397
    193 // http://en.wikipedia.org/wiki/Data_URI_scheme
    194 //
    195 // Images must have a mimetype matching:
    196 //
    197 //	image/gif
    198 //	image/jpeg
    199 //	image/png
    200 //	image/webp
    201 //
    202 // NOTE: There is a potential security risk to allowing data URIs and you should
    203 // only permit them on content you already trust.
    204 // http://palizine.plynt.com/issues/2010Oct/bypass-xss-filters/
    205 // https://capec.mitre.org/data/definitions/244.html
    206 func (p *Policy) AllowDataURIImages() {
    207 
    208 	// URLs must be parseable by net/url.Parse()
    209 	p.RequireParseableURLs(true)
    210 
    211 	// Supply a function to validate images contained within data URI
    212 	p.AllowURLSchemeWithCustomPolicy(
    213 		"data",
    214 		func(url *url.URL) (allowUrl bool) {
    215 			if url.RawQuery != "" || url.Fragment != "" {
    216 				return false
    217 			}
    218 
    219 			matched := dataURIImagePrefix.FindString(url.Opaque)
    220 			if matched == "" {
    221 				return false
    222 			}
    223 
    224 			_, err := base64.StdEncoding.DecodeString(url.Opaque[len(matched):])
    225 			if err != nil {
    226 				return false
    227 			}
    228 
    229 			return true
    230 		},
    231 	)
    232 }
    233 
    234 // AllowLists will enabled ordered and unordered lists, as well as definition
    235 // lists
    236 func (p *Policy) AllowLists() {
    237 	// "ol" "ul" are permitted
    238 	p.AllowAttrs("type").Matching(ListType).OnElements("ol", "ul")
    239 
    240 	// "li" is permitted
    241 	p.AllowAttrs("type").Matching(ListType).OnElements("li")
    242 	p.AllowAttrs("value").Matching(Integer).OnElements("li")
    243 
    244 	// "dl" "dt" "dd" are permitted
    245 	p.AllowElements("dl", "dt", "dd")
    246 }
    247 
    248 // AllowTables will enable a rich set of elements and attributes to describe
    249 // HTML tables
    250 func (p *Policy) AllowTables() {
    251 
    252 	// "table" is permitted
    253 	p.AllowAttrs("height", "width").Matching(NumberOrPercent).OnElements("table")
    254 	p.AllowAttrs("summary").Matching(Paragraph).OnElements("table")
    255 
    256 	// "caption" is permitted
    257 	p.AllowElements("caption")
    258 
    259 	// "col" "colgroup" are permitted
    260 	p.AllowAttrs("align").Matching(CellAlign).OnElements("col", "colgroup")
    261 	p.AllowAttrs("height", "width").Matching(
    262 		NumberOrPercent,
    263 	).OnElements("col", "colgroup")
    264 	p.AllowAttrs("span").Matching(Integer).OnElements("colgroup", "col")
    265 	p.AllowAttrs("valign").Matching(
    266 		CellVerticalAlign,
    267 	).OnElements("col", "colgroup")
    268 
    269 	// "thead" "tr" are permitted
    270 	p.AllowAttrs("align").Matching(CellAlign).OnElements("thead", "tr")
    271 	p.AllowAttrs("valign").Matching(CellVerticalAlign).OnElements("thead", "tr")
    272 
    273 	// "td" "th" are permitted
    274 	p.AllowAttrs("abbr").Matching(Paragraph).OnElements("td", "th")
    275 	p.AllowAttrs("align").Matching(CellAlign).OnElements("td", "th")
    276 	p.AllowAttrs("colspan", "rowspan").Matching(Integer).OnElements("td", "th")
    277 	p.AllowAttrs("headers").Matching(
    278 		SpaceSeparatedTokens,
    279 	).OnElements("td", "th")
    280 	p.AllowAttrs("height", "width").Matching(
    281 		NumberOrPercent,
    282 	).OnElements("td", "th")
    283 	p.AllowAttrs(
    284 		"scope",
    285 	).Matching(
    286 		regexp.MustCompile(`(?i)(?:row|col)(?:group)?`),
    287 	).OnElements("td", "th")
    288 	p.AllowAttrs("valign").Matching(CellVerticalAlign).OnElements("td", "th")
    289 	p.AllowAttrs("nowrap").Matching(
    290 		regexp.MustCompile(`(?i)|nowrap`),
    291 	).OnElements("td", "th")
    292 
    293 	// "tbody" "tfoot"
    294 	p.AllowAttrs("align").Matching(CellAlign).OnElements("tbody", "tfoot")
    295 	p.AllowAttrs("valign").Matching(
    296 		CellVerticalAlign,
    297 	).OnElements("tbody", "tfoot")
    298 }
    299 
    300 func (p *Policy) AllowIFrames(vals ...SandboxValue) {
    301 	p.AllowAttrs("sandbox").OnElements("iframe")
    302 
    303 	p.RequireSandboxOnIFrame(vals...)
    304 }