render.go (7175B)
1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package html 6 7 import ( 8 "bufio" 9 "errors" 10 "fmt" 11 "io" 12 "strings" 13 ) 14 15 type writer interface { 16 io.Writer 17 io.ByteWriter 18 WriteString(string) (int, error) 19 } 20 21 // Render renders the parse tree n to the given writer. 22 // 23 // Rendering is done on a 'best effort' basis: calling Parse on the output of 24 // Render will always result in something similar to the original tree, but it 25 // is not necessarily an exact clone unless the original tree was 'well-formed'. 26 // 'Well-formed' is not easily specified; the HTML5 specification is 27 // complicated. 28 // 29 // Calling Parse on arbitrary input typically results in a 'well-formed' parse 30 // tree. However, it is possible for Parse to yield a 'badly-formed' parse tree. 31 // For example, in a 'well-formed' parse tree, no <a> element is a child of 32 // another <a> element: parsing "<a><a>" results in two sibling elements. 33 // Similarly, in a 'well-formed' parse tree, no <a> element is a child of a 34 // <table> element: parsing "<p><table><a>" results in a <p> with two sibling 35 // children; the <a> is reparented to the <table>'s parent. However, calling 36 // Parse on "<a><table><a>" does not return an error, but the result has an <a> 37 // element with an <a> child, and is therefore not 'well-formed'. 38 // 39 // Programmatically constructed trees are typically also 'well-formed', but it 40 // is possible to construct a tree that looks innocuous but, when rendered and 41 // re-parsed, results in a different tree. A simple example is that a solitary 42 // text node would become a tree containing <html>, <head> and <body> elements. 43 // Another example is that the programmatic equivalent of "a<head>b</head>c" 44 // becomes "<html><head><head/><body>abc</body></html>". 45 func Render(w io.Writer, n *Node) error { 46 if x, ok := w.(writer); ok { 47 return render(x, n) 48 } 49 buf := bufio.NewWriter(w) 50 if err := render(buf, n); err != nil { 51 return err 52 } 53 return buf.Flush() 54 } 55 56 // plaintextAbort is returned from render1 when a <plaintext> element 57 // has been rendered. No more end tags should be rendered after that. 58 var plaintextAbort = errors.New("html: internal error (plaintext abort)") 59 60 func render(w writer, n *Node) error { 61 err := render1(w, n) 62 if err == plaintextAbort { 63 err = nil 64 } 65 return err 66 } 67 68 func render1(w writer, n *Node) error { 69 // Render non-element nodes; these are the easy cases. 70 switch n.Type { 71 case ErrorNode: 72 return errors.New("html: cannot render an ErrorNode node") 73 case TextNode: 74 return escape(w, n.Data) 75 case DocumentNode: 76 for c := n.FirstChild; c != nil; c = c.NextSibling { 77 if err := render1(w, c); err != nil { 78 return err 79 } 80 } 81 return nil 82 case ElementNode: 83 // No-op. 84 case CommentNode: 85 if _, err := w.WriteString("<!--"); err != nil { 86 return err 87 } 88 if err := escapeComment(w, n.Data); err != nil { 89 return err 90 } 91 if _, err := w.WriteString("-->"); err != nil { 92 return err 93 } 94 return nil 95 case DoctypeNode: 96 if _, err := w.WriteString("<!DOCTYPE "); err != nil { 97 return err 98 } 99 if err := escape(w, n.Data); err != nil { 100 return err 101 } 102 if n.Attr != nil { 103 var p, s string 104 for _, a := range n.Attr { 105 switch a.Key { 106 case "public": 107 p = a.Val 108 case "system": 109 s = a.Val 110 } 111 } 112 if p != "" { 113 if _, err := w.WriteString(" PUBLIC "); err != nil { 114 return err 115 } 116 if err := writeQuoted(w, p); err != nil { 117 return err 118 } 119 if s != "" { 120 if err := w.WriteByte(' '); err != nil { 121 return err 122 } 123 if err := writeQuoted(w, s); err != nil { 124 return err 125 } 126 } 127 } else if s != "" { 128 if _, err := w.WriteString(" SYSTEM "); err != nil { 129 return err 130 } 131 if err := writeQuoted(w, s); err != nil { 132 return err 133 } 134 } 135 } 136 return w.WriteByte('>') 137 case RawNode: 138 _, err := w.WriteString(n.Data) 139 return err 140 default: 141 return errors.New("html: unknown node type") 142 } 143 144 // Render the <xxx> opening tag. 145 if err := w.WriteByte('<'); err != nil { 146 return err 147 } 148 if _, err := w.WriteString(n.Data); err != nil { 149 return err 150 } 151 for _, a := range n.Attr { 152 if err := w.WriteByte(' '); err != nil { 153 return err 154 } 155 if a.Namespace != "" { 156 if _, err := w.WriteString(a.Namespace); err != nil { 157 return err 158 } 159 if err := w.WriteByte(':'); err != nil { 160 return err 161 } 162 } 163 if _, err := w.WriteString(a.Key); err != nil { 164 return err 165 } 166 if _, err := w.WriteString(`="`); err != nil { 167 return err 168 } 169 if err := escape(w, a.Val); err != nil { 170 return err 171 } 172 if err := w.WriteByte('"'); err != nil { 173 return err 174 } 175 } 176 if voidElements[n.Data] { 177 if n.FirstChild != nil { 178 return fmt.Errorf("html: void element <%s> has child nodes", n.Data) 179 } 180 _, err := w.WriteString("/>") 181 return err 182 } 183 if err := w.WriteByte('>'); err != nil { 184 return err 185 } 186 187 // Add initial newline where there is danger of a newline beging ignored. 188 if c := n.FirstChild; c != nil && c.Type == TextNode && strings.HasPrefix(c.Data, "\n") { 189 switch n.Data { 190 case "pre", "listing", "textarea": 191 if err := w.WriteByte('\n'); err != nil { 192 return err 193 } 194 } 195 } 196 197 // Render any child nodes. 198 switch n.Data { 199 case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp": 200 for c := n.FirstChild; c != nil; c = c.NextSibling { 201 if c.Type == TextNode { 202 if _, err := w.WriteString(c.Data); err != nil { 203 return err 204 } 205 } else { 206 if err := render1(w, c); err != nil { 207 return err 208 } 209 } 210 } 211 if n.Data == "plaintext" { 212 // Don't render anything else. <plaintext> must be the 213 // last element in the file, with no closing tag. 214 return plaintextAbort 215 } 216 default: 217 for c := n.FirstChild; c != nil; c = c.NextSibling { 218 if err := render1(w, c); err != nil { 219 return err 220 } 221 } 222 } 223 224 // Render the </xxx> closing tag. 225 if _, err := w.WriteString("</"); err != nil { 226 return err 227 } 228 if _, err := w.WriteString(n.Data); err != nil { 229 return err 230 } 231 return w.WriteByte('>') 232 } 233 234 // writeQuoted writes s to w surrounded by quotes. Normally it will use double 235 // quotes, but if s contains a double quote, it will use single quotes. 236 // It is used for writing the identifiers in a doctype declaration. 237 // In valid HTML, they can't contain both types of quotes. 238 func writeQuoted(w writer, s string) error { 239 var q byte = '"' 240 if strings.Contains(s, `"`) { 241 q = '\'' 242 } 243 if err := w.WriteByte(q); err != nil { 244 return err 245 } 246 if _, err := w.WriteString(s); err != nil { 247 return err 248 } 249 if err := w.WriteByte(q); err != nil { 250 return err 251 } 252 return nil 253 } 254 255 // Section 12.1.2, "Elements", gives this list of void elements. Void elements 256 // are those that can't have any contents. 257 var voidElements = map[string]bool{ 258 "area": true, 259 "base": true, 260 "br": true, 261 "col": true, 262 "embed": true, 263 "hr": true, 264 "img": true, 265 "input": true, 266 "keygen": true, // "keygen" has been removed from the spec, but are kept here for backwards compatibility. 267 "link": true, 268 "meta": true, 269 "param": true, 270 "source": true, 271 "track": true, 272 "wbr": true, 273 }