gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

policies.go (7814B)


      1 // Copyright (c) 2014, David Kitchen <david@buro9.com>
      2 //
      3 // All rights reserved.
      4 //
      5 // Redistribution and use in source and binary forms, with or without
      6 // modification, are permitted provided that the following conditions are met:
      7 //
      8 // * Redistributions of source code must retain the above copyright notice, this
      9 //   list of conditions and the following disclaimer.
     10 //
     11 // * Redistributions in binary form must reproduce the above copyright notice,
     12 //   this list of conditions and the following disclaimer in the documentation
     13 //   and/or other materials provided with the distribution.
     14 //
     15 // * Neither the name of the organisation (Microcosm) nor the names of its
     16 //   contributors may be used to endorse or promote products derived from
     17 //   this software without specific prior written permission.
     18 //
     19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     22 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
     23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
     26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
     27 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 
     30 package bluemonday
     31 
     32 import (
     33 	"regexp"
     34 )
     35 
     36 // StrictPolicy returns an empty policy, which will effectively strip all HTML
     37 // elements and their attributes from a document.
     38 func StrictPolicy() *Policy {
     39 	return NewPolicy()
     40 }
     41 
     42 // StripTagsPolicy is DEPRECATED. Use StrictPolicy instead.
     43 func StripTagsPolicy() *Policy {
     44 	return StrictPolicy()
     45 }
     46 
     47 // UGCPolicy returns a policy aimed at user generated content that is a result
     48 // of HTML WYSIWYG tools and Markdown conversions.
     49 //
     50 // This is expected to be a fairly rich document where as much markup as
     51 // possible should be retained. Markdown permits raw HTML so we are basically
     52 // providing a policy to sanitise HTML5 documents safely but with the
     53 // least intrusion on the formatting expectations of the user.
     54 func UGCPolicy() *Policy {
     55 
     56 	p := NewPolicy()
     57 
     58 	///////////////////////
     59 	// Global attributes //
     60 	///////////////////////
     61 
     62 	// "class" is not permitted as we are not allowing users to style their own
     63 	// content
     64 
     65 	p.AllowStandardAttributes()
     66 
     67 	//////////////////////////////
     68 	// Global URL format policy //
     69 	//////////////////////////////
     70 
     71 	p.AllowStandardURLs()
     72 
     73 	////////////////////////////////
     74 	// Declarations and structure //
     75 	////////////////////////////////
     76 
     77 	// "xml" "xslt" "DOCTYPE" "html" "head" are not permitted as we are
     78 	// expecting user generated content to be a fragment of HTML and not a full
     79 	// document.
     80 
     81 	//////////////////////////
     82 	// Sectioning root tags //
     83 	//////////////////////////
     84 
     85 	// "article" and "aside" are permitted and takes no attributes
     86 	p.AllowElements("article", "aside")
     87 
     88 	// "body" is not permitted as we are expecting user generated content to be a fragment
     89 	// of HTML and not a full document.
     90 
     91 	// "details" is permitted, including the "open" attribute which can either
     92 	// be blank or the value "open".
     93 	p.AllowAttrs(
     94 		"open",
     95 	).Matching(regexp.MustCompile(`(?i)^(|open)$`)).OnElements("details")
     96 
     97 	// "fieldset" is not permitted as we are not allowing forms to be created.
     98 
     99 	// "figure" is permitted and takes no attributes
    100 	p.AllowElements("figure")
    101 
    102 	// "nav" is not permitted as it is assumed that the site (and not the user)
    103 	// has defined navigation elements
    104 
    105 	// "section" is permitted and takes no attributes
    106 	p.AllowElements("section")
    107 
    108 	// "summary" is permitted and takes no attributes
    109 	p.AllowElements("summary")
    110 
    111 	//////////////////////////
    112 	// Headings and footers //
    113 	//////////////////////////
    114 
    115 	// "footer" is not permitted as we expect user content to be a fragment and
    116 	// not structural to this extent
    117 
    118 	// "h1" through "h6" are permitted and take no attributes
    119 	p.AllowElements("h1", "h2", "h3", "h4", "h5", "h6")
    120 
    121 	// "header" is not permitted as we expect user content to be a fragment and
    122 	// not structural to this extent
    123 
    124 	// "hgroup" is permitted and takes no attributes
    125 	p.AllowElements("hgroup")
    126 
    127 	/////////////////////////////////////
    128 	// Content grouping and separating //
    129 	/////////////////////////////////////
    130 
    131 	// "blockquote" is permitted, including the "cite" attribute which must be
    132 	// a standard URL.
    133 	p.AllowAttrs("cite").OnElements("blockquote")
    134 
    135 	// "br" "div" "hr" "p" "span" "wbr" are permitted and take no attributes
    136 	p.AllowElements("br", "div", "hr", "p", "span", "wbr")
    137 
    138 	///////////
    139 	// Links //
    140 	///////////
    141 
    142 	// "a" is permitted
    143 	p.AllowAttrs("href").OnElements("a")
    144 
    145 	// "area" is permitted along with the attributes that map image maps work
    146 	p.AllowAttrs("name").Matching(
    147 		regexp.MustCompile(`^([\p{L}\p{N}_-]+)$`),
    148 	).OnElements("map")
    149 	p.AllowAttrs("alt").Matching(Paragraph).OnElements("area")
    150 	p.AllowAttrs("coords").Matching(
    151 		regexp.MustCompile(`^([0-9]+,)+[0-9]+$`),
    152 	).OnElements("area")
    153 	p.AllowAttrs("href").OnElements("area")
    154 	p.AllowAttrs("rel").Matching(SpaceSeparatedTokens).OnElements("area")
    155 	p.AllowAttrs("shape").Matching(
    156 		regexp.MustCompile(`(?i)^(default|circle|rect|poly)$`),
    157 	).OnElements("area")
    158 	p.AllowAttrs("usemap").Matching(
    159 		regexp.MustCompile(`(?i)^#[\p{L}\p{N}_-]+$`),
    160 	).OnElements("img")
    161 
    162 	// "link" is not permitted
    163 
    164 	/////////////////////
    165 	// Phrase elements //
    166 	/////////////////////
    167 
    168 	// The following are all inline phrasing elements
    169 	p.AllowElements("abbr", "acronym", "cite", "code", "dfn", "em",
    170 		"figcaption", "mark", "s", "samp", "strong", "sub", "sup", "var")
    171 
    172 	// "q" is permitted and "cite" is a URL and handled by URL policies
    173 	p.AllowAttrs("cite").OnElements("q")
    174 
    175 	// "time" is permitted
    176 	p.AllowAttrs("datetime").Matching(ISO8601).OnElements("time")
    177 
    178 	////////////////////
    179 	// Style elements //
    180 	////////////////////
    181 
    182 	// block and inline elements that impart no semantic meaning but style the
    183 	// document
    184 	p.AllowElements("b", "i", "pre", "small", "strike", "tt", "u")
    185 
    186 	// "style" is not permitted as we are not yet sanitising CSS and it is an
    187 	// XSS attack vector
    188 
    189 	//////////////////////
    190 	// HTML5 Formatting //
    191 	//////////////////////
    192 
    193 	// "bdi" "bdo" are permitted
    194 	p.AllowAttrs("dir").Matching(Direction).OnElements("bdi", "bdo")
    195 
    196 	// "rp" "rt" "ruby" are permitted
    197 	p.AllowElements("rp", "rt", "ruby")
    198 
    199 	///////////////////////////
    200 	// HTML5 Change tracking //
    201 	///////////////////////////
    202 
    203 	// "del" "ins" are permitted
    204 	p.AllowAttrs("cite").Matching(Paragraph).OnElements("del", "ins")
    205 	p.AllowAttrs("datetime").Matching(ISO8601).OnElements("del", "ins")
    206 
    207 	///////////
    208 	// Lists //
    209 	///////////
    210 
    211 	p.AllowLists()
    212 
    213 	////////////
    214 	// Tables //
    215 	////////////
    216 
    217 	p.AllowTables()
    218 
    219 	///////////
    220 	// Forms //
    221 	///////////
    222 
    223 	// By and large, forms are not permitted. However there are some form
    224 	// elements that can be used to present data, and we do permit those
    225 	//
    226 	// "button" "fieldset" "input" "keygen" "label" "output" "select" "datalist"
    227 	// "textarea" "optgroup" "option" are all not permitted
    228 
    229 	// "meter" is permitted
    230 	p.AllowAttrs(
    231 		"value",
    232 		"min",
    233 		"max",
    234 		"low",
    235 		"high",
    236 		"optimum",
    237 	).Matching(Number).OnElements("meter")
    238 
    239 	// "progress" is permitted
    240 	p.AllowAttrs("value", "max").Matching(Number).OnElements("progress")
    241 
    242 	//////////////////////
    243 	// Embedded content //
    244 	//////////////////////
    245 
    246 	// Vast majority not permitted
    247 	// "audio" "canvas" "embed" "iframe" "object" "param" "source" "svg" "track"
    248 	// "video" are all not permitted
    249 
    250 	p.AllowImages()
    251 
    252 	return p
    253 }