helpers.go (11443B)
1 // Copyright (c) 2014, David Kitchen <david@buro9.com> 2 // 3 // All rights reserved. 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are met: 7 // 8 // * Redistributions of source code must retain the above copyright notice, this 9 // list of conditions and the following disclaimer. 10 // 11 // * Redistributions in binary form must reproduce the above copyright notice, 12 // this list of conditions and the following disclaimer in the documentation 13 // and/or other materials provided with the distribution. 14 // 15 // * Neither the name of the organisation (Microcosm) nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30 package bluemonday 31 32 import ( 33 "encoding/base64" 34 "net/url" 35 "regexp" 36 ) 37 38 // A selection of regular expressions that can be used as .Matching() rules on 39 // HTML attributes. 40 var ( 41 // CellAlign handles the `align` attribute 42 // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/td#attr-align 43 CellAlign = regexp.MustCompile(`(?i)^(center|justify|left|right|char)$`) 44 45 // CellVerticalAlign handles the `valign` attribute 46 // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/td#attr-valign 47 CellVerticalAlign = regexp.MustCompile(`(?i)^(baseline|bottom|middle|top)$`) 48 49 // Direction handles the `dir` attribute 50 // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/bdo#attr-dir 51 Direction = regexp.MustCompile(`(?i)^(rtl|ltr)$`) 52 53 // ImageAlign handles the `align` attribute on the `image` tag 54 // http://www.w3.org/MarkUp/Test/Img/imgtest.html 55 ImageAlign = regexp.MustCompile( 56 `(?i)^(left|right|top|texttop|middle|absmiddle|baseline|bottom|absbottom)$`, 57 ) 58 59 // Integer describes whole positive integers (including 0) used in places 60 // like td.colspan 61 // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/td#attr-colspan 62 Integer = regexp.MustCompile(`^[0-9]+$`) 63 64 // ISO8601 according to the W3 group is only a subset of the ISO8601 65 // standard: http://www.w3.org/TR/NOTE-datetime 66 // 67 // Used in places like time.datetime 68 // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/time#attr-datetime 69 // 70 // Matches patterns: 71 // Year: 72 // YYYY (eg 1997) 73 // Year and month: 74 // YYYY-MM (eg 1997-07) 75 // Complete date: 76 // YYYY-MM-DD (eg 1997-07-16) 77 // Complete date plus hours and minutes: 78 // YYYY-MM-DDThh:mmTZD (eg 1997-07-16T19:20+01:00) 79 // Complete date plus hours, minutes and seconds: 80 // YYYY-MM-DDThh:mm:ssTZD (eg 1997-07-16T19:20:30+01:00) 81 // Complete date plus hours, minutes, seconds and a decimal fraction of a 82 // second 83 // YYYY-MM-DDThh:mm:ss.sTZD (eg 1997-07-16T19:20:30.45+01:00) 84 ISO8601 = regexp.MustCompile( 85 `^[0-9]{4}(-[0-9]{2}(-[0-9]{2}([ T][0-9]{2}(:[0-9]{2}){1,2}(.[0-9]{1,6})` + 86 `?Z?([\+-][0-9]{2}:[0-9]{2})?)?)?)?$`, 87 ) 88 89 // ListType encapsulates the common value as well as the latest spec 90 // values for lists 91 // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/ol#attr-type 92 ListType = regexp.MustCompile(`(?i)^(circle|disc|square|a|A|i|I|1)$`) 93 94 // SpaceSeparatedTokens is used in places like `a.rel` and the common attribute 95 // `class` which both contain space delimited lists of data tokens 96 // http://www.w3.org/TR/html-markup/datatypes.html#common.data.tokens-def 97 // Regexp: \p{L} matches unicode letters, \p{N} matches unicode numbers 98 SpaceSeparatedTokens = regexp.MustCompile(`^([\s\p{L}\p{N}_-]+)$`) 99 100 // Number is a double value used on HTML5 meter and progress elements 101 // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-button-element.html#the-meter-element 102 Number = regexp.MustCompile(`^[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?$`) 103 104 // NumberOrPercent is used predominantly as units of measurement in width 105 // and height attributes 106 // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/img#attr-height 107 NumberOrPercent = regexp.MustCompile(`^[0-9]+[%]?$`) 108 109 // Paragraph of text in an attribute such as *.'title', img.alt, etc 110 // https://developer.mozilla.org/en-US/docs/Web/HTML/Global_attributes#attr-title 111 // Note that we are not allowing chars that could close tags like '>' 112 Paragraph = regexp.MustCompile(`^[\p{L}\p{N}\s\-_',\[\]!\./\\\(\)]*$`) 113 114 // dataURIImagePrefix is used by AllowDataURIImages to define the acceptable 115 // prefix of data URIs that contain common web image formats. 116 // 117 // This is not exported as it's not useful by itself, and only has value 118 // within the AllowDataURIImages func 119 dataURIImagePrefix = regexp.MustCompile( 120 `^image/(gif|jpeg|png|svg\+xml|webp);base64,`, 121 ) 122 ) 123 124 // AllowStandardURLs is a convenience function that will enable rel="nofollow" 125 // on "a", "area" and "link" (if you have allowed those elements) and will 126 // ensure that the URL values are parseable and either relative or belong to the 127 // "mailto", "http", or "https" schemes 128 func (p *Policy) AllowStandardURLs() { 129 // URLs must be parseable by net/url.Parse() 130 p.RequireParseableURLs(true) 131 132 // !url.IsAbs() is permitted 133 p.AllowRelativeURLs(true) 134 135 // Most common URL schemes only 136 p.AllowURLSchemes("mailto", "http", "https") 137 138 // For linking elements we will add rel="nofollow" if it does not already exist 139 // This applies to "a" "area" "link" 140 p.RequireNoFollowOnLinks(true) 141 } 142 143 // AllowStandardAttributes will enable "id", "title" and the language specific 144 // attributes "dir" and "lang" on all elements that are allowed 145 func (p *Policy) AllowStandardAttributes() { 146 // "dir" "lang" are permitted as both language attributes affect charsets 147 // and direction of text. 148 p.AllowAttrs("dir").Matching(Direction).Globally() 149 p.AllowAttrs( 150 "lang", 151 ).Matching(regexp.MustCompile(`[a-zA-Z]{2,20}`)).Globally() 152 153 // "id" is permitted. This is pretty much as some HTML elements require this 154 // to work well ("dfn" is an example of a "id" being value) 155 // This does create a risk that JavaScript and CSS within your web page 156 // might identify the wrong elements. Ensure that you select things 157 // accurately 158 p.AllowAttrs("id").Matching( 159 regexp.MustCompile(`[a-zA-Z0-9\:\-_\.]+`), 160 ).Globally() 161 162 // "title" is permitted as it improves accessibility. 163 p.AllowAttrs("title").Matching(Paragraph).Globally() 164 } 165 166 // AllowStyling presently enables the class attribute globally. 167 // 168 // Note: When bluemonday ships a CSS parser and we can safely sanitise that, 169 // this will also allow sanitized styling of elements via the style attribute. 170 func (p *Policy) AllowStyling() { 171 172 // "class" is permitted globally 173 p.AllowAttrs("class").Matching(SpaceSeparatedTokens).Globally() 174 } 175 176 // AllowImages enables the img element and some popular attributes. It will also 177 // ensure that URL values are parseable. This helper does not enable data URI 178 // images, for that you should also use the AllowDataURIImages() helper. 179 func (p *Policy) AllowImages() { 180 181 // "img" is permitted 182 p.AllowAttrs("align").Matching(ImageAlign).OnElements("img") 183 p.AllowAttrs("alt").Matching(Paragraph).OnElements("img") 184 p.AllowAttrs("height", "width").Matching(NumberOrPercent).OnElements("img") 185 186 // Standard URLs enabled 187 p.AllowStandardURLs() 188 p.AllowAttrs("src").OnElements("img") 189 } 190 191 // AllowDataURIImages permits the use of inline images defined in RFC2397 192 // http://tools.ietf.org/html/rfc2397 193 // http://en.wikipedia.org/wiki/Data_URI_scheme 194 // 195 // Images must have a mimetype matching: 196 // 197 // image/gif 198 // image/jpeg 199 // image/png 200 // image/webp 201 // 202 // NOTE: There is a potential security risk to allowing data URIs and you should 203 // only permit them on content you already trust. 204 // http://palizine.plynt.com/issues/2010Oct/bypass-xss-filters/ 205 // https://capec.mitre.org/data/definitions/244.html 206 func (p *Policy) AllowDataURIImages() { 207 208 // URLs must be parseable by net/url.Parse() 209 p.RequireParseableURLs(true) 210 211 // Supply a function to validate images contained within data URI 212 p.AllowURLSchemeWithCustomPolicy( 213 "data", 214 func(url *url.URL) (allowUrl bool) { 215 if url.RawQuery != "" || url.Fragment != "" { 216 return false 217 } 218 219 matched := dataURIImagePrefix.FindString(url.Opaque) 220 if matched == "" { 221 return false 222 } 223 224 _, err := base64.StdEncoding.DecodeString(url.Opaque[len(matched):]) 225 if err != nil { 226 return false 227 } 228 229 return true 230 }, 231 ) 232 } 233 234 // AllowLists will enabled ordered and unordered lists, as well as definition 235 // lists 236 func (p *Policy) AllowLists() { 237 // "ol" "ul" are permitted 238 p.AllowAttrs("type").Matching(ListType).OnElements("ol", "ul") 239 240 // "li" is permitted 241 p.AllowAttrs("type").Matching(ListType).OnElements("li") 242 p.AllowAttrs("value").Matching(Integer).OnElements("li") 243 244 // "dl" "dt" "dd" are permitted 245 p.AllowElements("dl", "dt", "dd") 246 } 247 248 // AllowTables will enable a rich set of elements and attributes to describe 249 // HTML tables 250 func (p *Policy) AllowTables() { 251 252 // "table" is permitted 253 p.AllowAttrs("height", "width").Matching(NumberOrPercent).OnElements("table") 254 p.AllowAttrs("summary").Matching(Paragraph).OnElements("table") 255 256 // "caption" is permitted 257 p.AllowElements("caption") 258 259 // "col" "colgroup" are permitted 260 p.AllowAttrs("align").Matching(CellAlign).OnElements("col", "colgroup") 261 p.AllowAttrs("height", "width").Matching( 262 NumberOrPercent, 263 ).OnElements("col", "colgroup") 264 p.AllowAttrs("span").Matching(Integer).OnElements("colgroup", "col") 265 p.AllowAttrs("valign").Matching( 266 CellVerticalAlign, 267 ).OnElements("col", "colgroup") 268 269 // "thead" "tr" are permitted 270 p.AllowAttrs("align").Matching(CellAlign).OnElements("thead", "tr") 271 p.AllowAttrs("valign").Matching(CellVerticalAlign).OnElements("thead", "tr") 272 273 // "td" "th" are permitted 274 p.AllowAttrs("abbr").Matching(Paragraph).OnElements("td", "th") 275 p.AllowAttrs("align").Matching(CellAlign).OnElements("td", "th") 276 p.AllowAttrs("colspan", "rowspan").Matching(Integer).OnElements("td", "th") 277 p.AllowAttrs("headers").Matching( 278 SpaceSeparatedTokens, 279 ).OnElements("td", "th") 280 p.AllowAttrs("height", "width").Matching( 281 NumberOrPercent, 282 ).OnElements("td", "th") 283 p.AllowAttrs( 284 "scope", 285 ).Matching( 286 regexp.MustCompile(`(?i)(?:row|col)(?:group)?`), 287 ).OnElements("td", "th") 288 p.AllowAttrs("valign").Matching(CellVerticalAlign).OnElements("td", "th") 289 p.AllowAttrs("nowrap").Matching( 290 regexp.MustCompile(`(?i)|nowrap`), 291 ).OnElements("td", "th") 292 293 // "tbody" "tfoot" 294 p.AllowAttrs("align").Matching(CellAlign).OnElements("tbody", "tfoot") 295 p.AllowAttrs("valign").Matching( 296 CellVerticalAlign, 297 ).OnElements("tbody", "tfoot") 298 } 299 300 func (p *Policy) AllowIFrames(vals ...SandboxValue) { 301 p.AllowAttrs("sandbox").OnElements("iframe") 302 303 p.RequireSandboxOnIFrame(vals...) 304 }