gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

parse.go (60069B)


      1 // Copyright 2010 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package html
      6 
      7 import (
      8 	"errors"
      9 	"fmt"
     10 	"io"
     11 	"strings"
     12 
     13 	a "golang.org/x/net/html/atom"
     14 )
     15 
     16 // A parser implements the HTML5 parsing algorithm:
     17 // https://html.spec.whatwg.org/multipage/syntax.html#tree-construction
     18 type parser struct {
     19 	// tokenizer provides the tokens for the parser.
     20 	tokenizer *Tokenizer
     21 	// tok is the most recently read token.
     22 	tok Token
     23 	// Self-closing tags like <hr/> are treated as start tags, except that
     24 	// hasSelfClosingToken is set while they are being processed.
     25 	hasSelfClosingToken bool
     26 	// doc is the document root element.
     27 	doc *Node
     28 	// The stack of open elements (section 12.2.4.2) and active formatting
     29 	// elements (section 12.2.4.3).
     30 	oe, afe nodeStack
     31 	// Element pointers (section 12.2.4.4).
     32 	head, form *Node
     33 	// Other parsing state flags (section 12.2.4.5).
     34 	scripting, framesetOK bool
     35 	// The stack of template insertion modes
     36 	templateStack insertionModeStack
     37 	// im is the current insertion mode.
     38 	im insertionMode
     39 	// originalIM is the insertion mode to go back to after completing a text
     40 	// or inTableText insertion mode.
     41 	originalIM insertionMode
     42 	// fosterParenting is whether new elements should be inserted according to
     43 	// the foster parenting rules (section 12.2.6.1).
     44 	fosterParenting bool
     45 	// quirks is whether the parser is operating in "quirks mode."
     46 	quirks bool
     47 	// fragment is whether the parser is parsing an HTML fragment.
     48 	fragment bool
     49 	// context is the context element when parsing an HTML fragment
     50 	// (section 12.4).
     51 	context *Node
     52 }
     53 
     54 func (p *parser) top() *Node {
     55 	if n := p.oe.top(); n != nil {
     56 		return n
     57 	}
     58 	return p.doc
     59 }
     60 
     61 // Stop tags for use in popUntil. These come from section 12.2.4.2.
     62 var (
     63 	defaultScopeStopTags = map[string][]a.Atom{
     64 		"":     {a.Applet, a.Caption, a.Html, a.Table, a.Td, a.Th, a.Marquee, a.Object, a.Template},
     65 		"math": {a.AnnotationXml, a.Mi, a.Mn, a.Mo, a.Ms, a.Mtext},
     66 		"svg":  {a.Desc, a.ForeignObject, a.Title},
     67 	}
     68 )
     69 
     70 type scope int
     71 
     72 const (
     73 	defaultScope scope = iota
     74 	listItemScope
     75 	buttonScope
     76 	tableScope
     77 	tableRowScope
     78 	tableBodyScope
     79 	selectScope
     80 )
     81 
     82 // popUntil pops the stack of open elements at the highest element whose tag
     83 // is in matchTags, provided there is no higher element in the scope's stop
     84 // tags (as defined in section 12.2.4.2). It returns whether or not there was
     85 // such an element. If there was not, popUntil leaves the stack unchanged.
     86 //
     87 // For example, the set of stop tags for table scope is: "html", "table". If
     88 // the stack was:
     89 // ["html", "body", "font", "table", "b", "i", "u"]
     90 // then popUntil(tableScope, "font") would return false, but
     91 // popUntil(tableScope, "i") would return true and the stack would become:
     92 // ["html", "body", "font", "table", "b"]
     93 //
     94 // If an element's tag is in both the stop tags and matchTags, then the stack
     95 // will be popped and the function returns true (provided, of course, there was
     96 // no higher element in the stack that was also in the stop tags). For example,
     97 // popUntil(tableScope, "table") returns true and leaves:
     98 // ["html", "body", "font"]
     99 func (p *parser) popUntil(s scope, matchTags ...a.Atom) bool {
    100 	if i := p.indexOfElementInScope(s, matchTags...); i != -1 {
    101 		p.oe = p.oe[:i]
    102 		return true
    103 	}
    104 	return false
    105 }
    106 
    107 // indexOfElementInScope returns the index in p.oe of the highest element whose
    108 // tag is in matchTags that is in scope. If no matching element is in scope, it
    109 // returns -1.
    110 func (p *parser) indexOfElementInScope(s scope, matchTags ...a.Atom) int {
    111 	for i := len(p.oe) - 1; i >= 0; i-- {
    112 		tagAtom := p.oe[i].DataAtom
    113 		if p.oe[i].Namespace == "" {
    114 			for _, t := range matchTags {
    115 				if t == tagAtom {
    116 					return i
    117 				}
    118 			}
    119 			switch s {
    120 			case defaultScope:
    121 				// No-op.
    122 			case listItemScope:
    123 				if tagAtom == a.Ol || tagAtom == a.Ul {
    124 					return -1
    125 				}
    126 			case buttonScope:
    127 				if tagAtom == a.Button {
    128 					return -1
    129 				}
    130 			case tableScope:
    131 				if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template {
    132 					return -1
    133 				}
    134 			case selectScope:
    135 				if tagAtom != a.Optgroup && tagAtom != a.Option {
    136 					return -1
    137 				}
    138 			default:
    139 				panic("unreachable")
    140 			}
    141 		}
    142 		switch s {
    143 		case defaultScope, listItemScope, buttonScope:
    144 			for _, t := range defaultScopeStopTags[p.oe[i].Namespace] {
    145 				if t == tagAtom {
    146 					return -1
    147 				}
    148 			}
    149 		}
    150 	}
    151 	return -1
    152 }
    153 
    154 // elementInScope is like popUntil, except that it doesn't modify the stack of
    155 // open elements.
    156 func (p *parser) elementInScope(s scope, matchTags ...a.Atom) bool {
    157 	return p.indexOfElementInScope(s, matchTags...) != -1
    158 }
    159 
    160 // clearStackToContext pops elements off the stack of open elements until a
    161 // scope-defined element is found.
    162 func (p *parser) clearStackToContext(s scope) {
    163 	for i := len(p.oe) - 1; i >= 0; i-- {
    164 		tagAtom := p.oe[i].DataAtom
    165 		switch s {
    166 		case tableScope:
    167 			if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template {
    168 				p.oe = p.oe[:i+1]
    169 				return
    170 			}
    171 		case tableRowScope:
    172 			if tagAtom == a.Html || tagAtom == a.Tr || tagAtom == a.Template {
    173 				p.oe = p.oe[:i+1]
    174 				return
    175 			}
    176 		case tableBodyScope:
    177 			if tagAtom == a.Html || tagAtom == a.Tbody || tagAtom == a.Tfoot || tagAtom == a.Thead || tagAtom == a.Template {
    178 				p.oe = p.oe[:i+1]
    179 				return
    180 			}
    181 		default:
    182 			panic("unreachable")
    183 		}
    184 	}
    185 }
    186 
    187 // parseGenericRawTextElement implements the generic raw text element parsing
    188 // algorithm defined in 12.2.6.2.
    189 // https://html.spec.whatwg.org/multipage/parsing.html#parsing-elements-that-contain-only-text
    190 // TODO: Since both RAWTEXT and RCDATA states are treated as tokenizer's part
    191 // officially, need to make tokenizer consider both states.
    192 func (p *parser) parseGenericRawTextElement() {
    193 	p.addElement()
    194 	p.originalIM = p.im
    195 	p.im = textIM
    196 }
    197 
    198 // generateImpliedEndTags pops nodes off the stack of open elements as long as
    199 // the top node has a tag name of dd, dt, li, optgroup, option, p, rb, rp, rt or rtc.
    200 // If exceptions are specified, nodes with that name will not be popped off.
    201 func (p *parser) generateImpliedEndTags(exceptions ...string) {
    202 	var i int
    203 loop:
    204 	for i = len(p.oe) - 1; i >= 0; i-- {
    205 		n := p.oe[i]
    206 		if n.Type != ElementNode {
    207 			break
    208 		}
    209 		switch n.DataAtom {
    210 		case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc:
    211 			for _, except := range exceptions {
    212 				if n.Data == except {
    213 					break loop
    214 				}
    215 			}
    216 			continue
    217 		}
    218 		break
    219 	}
    220 
    221 	p.oe = p.oe[:i+1]
    222 }
    223 
    224 // addChild adds a child node n to the top element, and pushes n onto the stack
    225 // of open elements if it is an element node.
    226 func (p *parser) addChild(n *Node) {
    227 	if p.shouldFosterParent() {
    228 		p.fosterParent(n)
    229 	} else {
    230 		p.top().AppendChild(n)
    231 	}
    232 
    233 	if n.Type == ElementNode {
    234 		p.oe = append(p.oe, n)
    235 	}
    236 }
    237 
    238 // shouldFosterParent returns whether the next node to be added should be
    239 // foster parented.
    240 func (p *parser) shouldFosterParent() bool {
    241 	if p.fosterParenting {
    242 		switch p.top().DataAtom {
    243 		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
    244 			return true
    245 		}
    246 	}
    247 	return false
    248 }
    249 
    250 // fosterParent adds a child node according to the foster parenting rules.
    251 // Section 12.2.6.1, "foster parenting".
    252 func (p *parser) fosterParent(n *Node) {
    253 	var table, parent, prev, template *Node
    254 	var i int
    255 	for i = len(p.oe) - 1; i >= 0; i-- {
    256 		if p.oe[i].DataAtom == a.Table {
    257 			table = p.oe[i]
    258 			break
    259 		}
    260 	}
    261 
    262 	var j int
    263 	for j = len(p.oe) - 1; j >= 0; j-- {
    264 		if p.oe[j].DataAtom == a.Template {
    265 			template = p.oe[j]
    266 			break
    267 		}
    268 	}
    269 
    270 	if template != nil && (table == nil || j > i) {
    271 		template.AppendChild(n)
    272 		return
    273 	}
    274 
    275 	if table == nil {
    276 		// The foster parent is the html element.
    277 		parent = p.oe[0]
    278 	} else {
    279 		parent = table.Parent
    280 	}
    281 	if parent == nil {
    282 		parent = p.oe[i-1]
    283 	}
    284 
    285 	if table != nil {
    286 		prev = table.PrevSibling
    287 	} else {
    288 		prev = parent.LastChild
    289 	}
    290 	if prev != nil && prev.Type == TextNode && n.Type == TextNode {
    291 		prev.Data += n.Data
    292 		return
    293 	}
    294 
    295 	parent.InsertBefore(n, table)
    296 }
    297 
    298 // addText adds text to the preceding node if it is a text node, or else it
    299 // calls addChild with a new text node.
    300 func (p *parser) addText(text string) {
    301 	if text == "" {
    302 		return
    303 	}
    304 
    305 	if p.shouldFosterParent() {
    306 		p.fosterParent(&Node{
    307 			Type: TextNode,
    308 			Data: text,
    309 		})
    310 		return
    311 	}
    312 
    313 	t := p.top()
    314 	if n := t.LastChild; n != nil && n.Type == TextNode {
    315 		n.Data += text
    316 		return
    317 	}
    318 	p.addChild(&Node{
    319 		Type: TextNode,
    320 		Data: text,
    321 	})
    322 }
    323 
    324 // addElement adds a child element based on the current token.
    325 func (p *parser) addElement() {
    326 	p.addChild(&Node{
    327 		Type:     ElementNode,
    328 		DataAtom: p.tok.DataAtom,
    329 		Data:     p.tok.Data,
    330 		Attr:     p.tok.Attr,
    331 	})
    332 }
    333 
    334 // Section 12.2.4.3.
    335 func (p *parser) addFormattingElement() {
    336 	tagAtom, attr := p.tok.DataAtom, p.tok.Attr
    337 	p.addElement()
    338 
    339 	// Implement the Noah's Ark clause, but with three per family instead of two.
    340 	identicalElements := 0
    341 findIdenticalElements:
    342 	for i := len(p.afe) - 1; i >= 0; i-- {
    343 		n := p.afe[i]
    344 		if n.Type == scopeMarkerNode {
    345 			break
    346 		}
    347 		if n.Type != ElementNode {
    348 			continue
    349 		}
    350 		if n.Namespace != "" {
    351 			continue
    352 		}
    353 		if n.DataAtom != tagAtom {
    354 			continue
    355 		}
    356 		if len(n.Attr) != len(attr) {
    357 			continue
    358 		}
    359 	compareAttributes:
    360 		for _, t0 := range n.Attr {
    361 			for _, t1 := range attr {
    362 				if t0.Key == t1.Key && t0.Namespace == t1.Namespace && t0.Val == t1.Val {
    363 					// Found a match for this attribute, continue with the next attribute.
    364 					continue compareAttributes
    365 				}
    366 			}
    367 			// If we get here, there is no attribute that matches a.
    368 			// Therefore the element is not identical to the new one.
    369 			continue findIdenticalElements
    370 		}
    371 
    372 		identicalElements++
    373 		if identicalElements >= 3 {
    374 			p.afe.remove(n)
    375 		}
    376 	}
    377 
    378 	p.afe = append(p.afe, p.top())
    379 }
    380 
    381 // Section 12.2.4.3.
    382 func (p *parser) clearActiveFormattingElements() {
    383 	for {
    384 		if n := p.afe.pop(); len(p.afe) == 0 || n.Type == scopeMarkerNode {
    385 			return
    386 		}
    387 	}
    388 }
    389 
    390 // Section 12.2.4.3.
    391 func (p *parser) reconstructActiveFormattingElements() {
    392 	n := p.afe.top()
    393 	if n == nil {
    394 		return
    395 	}
    396 	if n.Type == scopeMarkerNode || p.oe.index(n) != -1 {
    397 		return
    398 	}
    399 	i := len(p.afe) - 1
    400 	for n.Type != scopeMarkerNode && p.oe.index(n) == -1 {
    401 		if i == 0 {
    402 			i = -1
    403 			break
    404 		}
    405 		i--
    406 		n = p.afe[i]
    407 	}
    408 	for {
    409 		i++
    410 		clone := p.afe[i].clone()
    411 		p.addChild(clone)
    412 		p.afe[i] = clone
    413 		if i == len(p.afe)-1 {
    414 			break
    415 		}
    416 	}
    417 }
    418 
    419 // Section 12.2.5.
    420 func (p *parser) acknowledgeSelfClosingTag() {
    421 	p.hasSelfClosingToken = false
    422 }
    423 
    424 // An insertion mode (section 12.2.4.1) is the state transition function from
    425 // a particular state in the HTML5 parser's state machine. It updates the
    426 // parser's fields depending on parser.tok (where ErrorToken means EOF).
    427 // It returns whether the token was consumed.
    428 type insertionMode func(*parser) bool
    429 
    430 // setOriginalIM sets the insertion mode to return to after completing a text or
    431 // inTableText insertion mode.
    432 // Section 12.2.4.1, "using the rules for".
    433 func (p *parser) setOriginalIM() {
    434 	if p.originalIM != nil {
    435 		panic("html: bad parser state: originalIM was set twice")
    436 	}
    437 	p.originalIM = p.im
    438 }
    439 
    440 // Section 12.2.4.1, "reset the insertion mode".
    441 func (p *parser) resetInsertionMode() {
    442 	for i := len(p.oe) - 1; i >= 0; i-- {
    443 		n := p.oe[i]
    444 		last := i == 0
    445 		if last && p.context != nil {
    446 			n = p.context
    447 		}
    448 
    449 		switch n.DataAtom {
    450 		case a.Select:
    451 			if !last {
    452 				for ancestor, first := n, p.oe[0]; ancestor != first; {
    453 					ancestor = p.oe[p.oe.index(ancestor)-1]
    454 					switch ancestor.DataAtom {
    455 					case a.Template:
    456 						p.im = inSelectIM
    457 						return
    458 					case a.Table:
    459 						p.im = inSelectInTableIM
    460 						return
    461 					}
    462 				}
    463 			}
    464 			p.im = inSelectIM
    465 		case a.Td, a.Th:
    466 			// TODO: remove this divergence from the HTML5 spec.
    467 			//
    468 			// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
    469 			p.im = inCellIM
    470 		case a.Tr:
    471 			p.im = inRowIM
    472 		case a.Tbody, a.Thead, a.Tfoot:
    473 			p.im = inTableBodyIM
    474 		case a.Caption:
    475 			p.im = inCaptionIM
    476 		case a.Colgroup:
    477 			p.im = inColumnGroupIM
    478 		case a.Table:
    479 			p.im = inTableIM
    480 		case a.Template:
    481 			// TODO: remove this divergence from the HTML5 spec.
    482 			if n.Namespace != "" {
    483 				continue
    484 			}
    485 			p.im = p.templateStack.top()
    486 		case a.Head:
    487 			// TODO: remove this divergence from the HTML5 spec.
    488 			//
    489 			// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
    490 			p.im = inHeadIM
    491 		case a.Body:
    492 			p.im = inBodyIM
    493 		case a.Frameset:
    494 			p.im = inFramesetIM
    495 		case a.Html:
    496 			if p.head == nil {
    497 				p.im = beforeHeadIM
    498 			} else {
    499 				p.im = afterHeadIM
    500 			}
    501 		default:
    502 			if last {
    503 				p.im = inBodyIM
    504 				return
    505 			}
    506 			continue
    507 		}
    508 		return
    509 	}
    510 }
    511 
    512 const whitespace = " \t\r\n\f"
    513 
    514 // Section 12.2.6.4.1.
    515 func initialIM(p *parser) bool {
    516 	switch p.tok.Type {
    517 	case TextToken:
    518 		p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
    519 		if len(p.tok.Data) == 0 {
    520 			// It was all whitespace, so ignore it.
    521 			return true
    522 		}
    523 	case CommentToken:
    524 		p.doc.AppendChild(&Node{
    525 			Type: CommentNode,
    526 			Data: p.tok.Data,
    527 		})
    528 		return true
    529 	case DoctypeToken:
    530 		n, quirks := parseDoctype(p.tok.Data)
    531 		p.doc.AppendChild(n)
    532 		p.quirks = quirks
    533 		p.im = beforeHTMLIM
    534 		return true
    535 	}
    536 	p.quirks = true
    537 	p.im = beforeHTMLIM
    538 	return false
    539 }
    540 
    541 // Section 12.2.6.4.2.
    542 func beforeHTMLIM(p *parser) bool {
    543 	switch p.tok.Type {
    544 	case DoctypeToken:
    545 		// Ignore the token.
    546 		return true
    547 	case TextToken:
    548 		p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
    549 		if len(p.tok.Data) == 0 {
    550 			// It was all whitespace, so ignore it.
    551 			return true
    552 		}
    553 	case StartTagToken:
    554 		if p.tok.DataAtom == a.Html {
    555 			p.addElement()
    556 			p.im = beforeHeadIM
    557 			return true
    558 		}
    559 	case EndTagToken:
    560 		switch p.tok.DataAtom {
    561 		case a.Head, a.Body, a.Html, a.Br:
    562 			p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
    563 			return false
    564 		default:
    565 			// Ignore the token.
    566 			return true
    567 		}
    568 	case CommentToken:
    569 		p.doc.AppendChild(&Node{
    570 			Type: CommentNode,
    571 			Data: p.tok.Data,
    572 		})
    573 		return true
    574 	}
    575 	p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
    576 	return false
    577 }
    578 
    579 // Section 12.2.6.4.3.
    580 func beforeHeadIM(p *parser) bool {
    581 	switch p.tok.Type {
    582 	case TextToken:
    583 		p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
    584 		if len(p.tok.Data) == 0 {
    585 			// It was all whitespace, so ignore it.
    586 			return true
    587 		}
    588 	case StartTagToken:
    589 		switch p.tok.DataAtom {
    590 		case a.Head:
    591 			p.addElement()
    592 			p.head = p.top()
    593 			p.im = inHeadIM
    594 			return true
    595 		case a.Html:
    596 			return inBodyIM(p)
    597 		}
    598 	case EndTagToken:
    599 		switch p.tok.DataAtom {
    600 		case a.Head, a.Body, a.Html, a.Br:
    601 			p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
    602 			return false
    603 		default:
    604 			// Ignore the token.
    605 			return true
    606 		}
    607 	case CommentToken:
    608 		p.addChild(&Node{
    609 			Type: CommentNode,
    610 			Data: p.tok.Data,
    611 		})
    612 		return true
    613 	case DoctypeToken:
    614 		// Ignore the token.
    615 		return true
    616 	}
    617 
    618 	p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
    619 	return false
    620 }
    621 
    622 // Section 12.2.6.4.4.
    623 func inHeadIM(p *parser) bool {
    624 	switch p.tok.Type {
    625 	case TextToken:
    626 		s := strings.TrimLeft(p.tok.Data, whitespace)
    627 		if len(s) < len(p.tok.Data) {
    628 			// Add the initial whitespace to the current node.
    629 			p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
    630 			if s == "" {
    631 				return true
    632 			}
    633 			p.tok.Data = s
    634 		}
    635 	case StartTagToken:
    636 		switch p.tok.DataAtom {
    637 		case a.Html:
    638 			return inBodyIM(p)
    639 		case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta:
    640 			p.addElement()
    641 			p.oe.pop()
    642 			p.acknowledgeSelfClosingTag()
    643 			return true
    644 		case a.Noscript:
    645 			if p.scripting {
    646 				p.parseGenericRawTextElement()
    647 				return true
    648 			}
    649 			p.addElement()
    650 			p.im = inHeadNoscriptIM
    651 			// Don't let the tokenizer go into raw text mode when scripting is disabled.
    652 			p.tokenizer.NextIsNotRawText()
    653 			return true
    654 		case a.Script, a.Title:
    655 			p.addElement()
    656 			p.setOriginalIM()
    657 			p.im = textIM
    658 			return true
    659 		case a.Noframes, a.Style:
    660 			p.parseGenericRawTextElement()
    661 			return true
    662 		case a.Head:
    663 			// Ignore the token.
    664 			return true
    665 		case a.Template:
    666 			// TODO: remove this divergence from the HTML5 spec.
    667 			//
    668 			// We don't handle all of the corner cases when mixing foreign
    669 			// content (i.e. <math> or <svg>) with <template>. Without this
    670 			// early return, we can get into an infinite loop, possibly because
    671 			// of the "TODO... further divergence" a little below.
    672 			//
    673 			// As a workaround, if we are mixing foreign content and templates,
    674 			// just ignore the rest of the HTML. Foreign content is rare and a
    675 			// relatively old HTML feature. Templates are also rare and a
    676 			// relatively new HTML feature. Their combination is very rare.
    677 			for _, e := range p.oe {
    678 				if e.Namespace != "" {
    679 					p.im = ignoreTheRemainingTokens
    680 					return true
    681 				}
    682 			}
    683 
    684 			p.addElement()
    685 			p.afe = append(p.afe, &scopeMarker)
    686 			p.framesetOK = false
    687 			p.im = inTemplateIM
    688 			p.templateStack = append(p.templateStack, inTemplateIM)
    689 			return true
    690 		}
    691 	case EndTagToken:
    692 		switch p.tok.DataAtom {
    693 		case a.Head:
    694 			p.oe.pop()
    695 			p.im = afterHeadIM
    696 			return true
    697 		case a.Body, a.Html, a.Br:
    698 			p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
    699 			return false
    700 		case a.Template:
    701 			if !p.oe.contains(a.Template) {
    702 				return true
    703 			}
    704 			// TODO: remove this further divergence from the HTML5 spec.
    705 			//
    706 			// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
    707 			p.generateImpliedEndTags()
    708 			for i := len(p.oe) - 1; i >= 0; i-- {
    709 				if n := p.oe[i]; n.Namespace == "" && n.DataAtom == a.Template {
    710 					p.oe = p.oe[:i]
    711 					break
    712 				}
    713 			}
    714 			p.clearActiveFormattingElements()
    715 			p.templateStack.pop()
    716 			p.resetInsertionMode()
    717 			return true
    718 		default:
    719 			// Ignore the token.
    720 			return true
    721 		}
    722 	case CommentToken:
    723 		p.addChild(&Node{
    724 			Type: CommentNode,
    725 			Data: p.tok.Data,
    726 		})
    727 		return true
    728 	case DoctypeToken:
    729 		// Ignore the token.
    730 		return true
    731 	}
    732 
    733 	p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
    734 	return false
    735 }
    736 
    737 // Section 12.2.6.4.5.
    738 func inHeadNoscriptIM(p *parser) bool {
    739 	switch p.tok.Type {
    740 	case DoctypeToken:
    741 		// Ignore the token.
    742 		return true
    743 	case StartTagToken:
    744 		switch p.tok.DataAtom {
    745 		case a.Html:
    746 			return inBodyIM(p)
    747 		case a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Style:
    748 			return inHeadIM(p)
    749 		case a.Head:
    750 			// Ignore the token.
    751 			return true
    752 		case a.Noscript:
    753 			// Don't let the tokenizer go into raw text mode even when a <noscript>
    754 			// tag is in "in head noscript" insertion mode.
    755 			p.tokenizer.NextIsNotRawText()
    756 			// Ignore the token.
    757 			return true
    758 		}
    759 	case EndTagToken:
    760 		switch p.tok.DataAtom {
    761 		case a.Noscript, a.Br:
    762 		default:
    763 			// Ignore the token.
    764 			return true
    765 		}
    766 	case TextToken:
    767 		s := strings.TrimLeft(p.tok.Data, whitespace)
    768 		if len(s) == 0 {
    769 			// It was all whitespace.
    770 			return inHeadIM(p)
    771 		}
    772 	case CommentToken:
    773 		return inHeadIM(p)
    774 	}
    775 	p.oe.pop()
    776 	if p.top().DataAtom != a.Head {
    777 		panic("html: the new current node will be a head element.")
    778 	}
    779 	p.im = inHeadIM
    780 	if p.tok.DataAtom == a.Noscript {
    781 		return true
    782 	}
    783 	return false
    784 }
    785 
    786 // Section 12.2.6.4.6.
    787 func afterHeadIM(p *parser) bool {
    788 	switch p.tok.Type {
    789 	case TextToken:
    790 		s := strings.TrimLeft(p.tok.Data, whitespace)
    791 		if len(s) < len(p.tok.Data) {
    792 			// Add the initial whitespace to the current node.
    793 			p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
    794 			if s == "" {
    795 				return true
    796 			}
    797 			p.tok.Data = s
    798 		}
    799 	case StartTagToken:
    800 		switch p.tok.DataAtom {
    801 		case a.Html:
    802 			return inBodyIM(p)
    803 		case a.Body:
    804 			p.addElement()
    805 			p.framesetOK = false
    806 			p.im = inBodyIM
    807 			return true
    808 		case a.Frameset:
    809 			p.addElement()
    810 			p.im = inFramesetIM
    811 			return true
    812 		case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
    813 			p.oe = append(p.oe, p.head)
    814 			defer p.oe.remove(p.head)
    815 			return inHeadIM(p)
    816 		case a.Head:
    817 			// Ignore the token.
    818 			return true
    819 		}
    820 	case EndTagToken:
    821 		switch p.tok.DataAtom {
    822 		case a.Body, a.Html, a.Br:
    823 			// Drop down to creating an implied <body> tag.
    824 		case a.Template:
    825 			return inHeadIM(p)
    826 		default:
    827 			// Ignore the token.
    828 			return true
    829 		}
    830 	case CommentToken:
    831 		p.addChild(&Node{
    832 			Type: CommentNode,
    833 			Data: p.tok.Data,
    834 		})
    835 		return true
    836 	case DoctypeToken:
    837 		// Ignore the token.
    838 		return true
    839 	}
    840 
    841 	p.parseImpliedToken(StartTagToken, a.Body, a.Body.String())
    842 	p.framesetOK = true
    843 	return false
    844 }
    845 
    846 // copyAttributes copies attributes of src not found on dst to dst.
    847 func copyAttributes(dst *Node, src Token) {
    848 	if len(src.Attr) == 0 {
    849 		return
    850 	}
    851 	attr := map[string]string{}
    852 	for _, t := range dst.Attr {
    853 		attr[t.Key] = t.Val
    854 	}
    855 	for _, t := range src.Attr {
    856 		if _, ok := attr[t.Key]; !ok {
    857 			dst.Attr = append(dst.Attr, t)
    858 			attr[t.Key] = t.Val
    859 		}
    860 	}
    861 }
    862 
    863 // Section 12.2.6.4.7.
    864 func inBodyIM(p *parser) bool {
    865 	switch p.tok.Type {
    866 	case TextToken:
    867 		d := p.tok.Data
    868 		switch n := p.oe.top(); n.DataAtom {
    869 		case a.Pre, a.Listing:
    870 			if n.FirstChild == nil {
    871 				// Ignore a newline at the start of a <pre> block.
    872 				if d != "" && d[0] == '\r' {
    873 					d = d[1:]
    874 				}
    875 				if d != "" && d[0] == '\n' {
    876 					d = d[1:]
    877 				}
    878 			}
    879 		}
    880 		d = strings.Replace(d, "\x00", "", -1)
    881 		if d == "" {
    882 			return true
    883 		}
    884 		p.reconstructActiveFormattingElements()
    885 		p.addText(d)
    886 		if p.framesetOK && strings.TrimLeft(d, whitespace) != "" {
    887 			// There were non-whitespace characters inserted.
    888 			p.framesetOK = false
    889 		}
    890 	case StartTagToken:
    891 		switch p.tok.DataAtom {
    892 		case a.Html:
    893 			if p.oe.contains(a.Template) {
    894 				return true
    895 			}
    896 			copyAttributes(p.oe[0], p.tok)
    897 		case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
    898 			return inHeadIM(p)
    899 		case a.Body:
    900 			if p.oe.contains(a.Template) {
    901 				return true
    902 			}
    903 			if len(p.oe) >= 2 {
    904 				body := p.oe[1]
    905 				if body.Type == ElementNode && body.DataAtom == a.Body {
    906 					p.framesetOK = false
    907 					copyAttributes(body, p.tok)
    908 				}
    909 			}
    910 		case a.Frameset:
    911 			if !p.framesetOK || len(p.oe) < 2 || p.oe[1].DataAtom != a.Body {
    912 				// Ignore the token.
    913 				return true
    914 			}
    915 			body := p.oe[1]
    916 			if body.Parent != nil {
    917 				body.Parent.RemoveChild(body)
    918 			}
    919 			p.oe = p.oe[:1]
    920 			p.addElement()
    921 			p.im = inFramesetIM
    922 			return true
    923 		case a.Address, a.Article, a.Aside, a.Blockquote, a.Center, a.Details, a.Dialog, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Main, a.Menu, a.Nav, a.Ol, a.P, a.Section, a.Summary, a.Ul:
    924 			p.popUntil(buttonScope, a.P)
    925 			p.addElement()
    926 		case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
    927 			p.popUntil(buttonScope, a.P)
    928 			switch n := p.top(); n.DataAtom {
    929 			case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
    930 				p.oe.pop()
    931 			}
    932 			p.addElement()
    933 		case a.Pre, a.Listing:
    934 			p.popUntil(buttonScope, a.P)
    935 			p.addElement()
    936 			// The newline, if any, will be dealt with by the TextToken case.
    937 			p.framesetOK = false
    938 		case a.Form:
    939 			if p.form != nil && !p.oe.contains(a.Template) {
    940 				// Ignore the token
    941 				return true
    942 			}
    943 			p.popUntil(buttonScope, a.P)
    944 			p.addElement()
    945 			if !p.oe.contains(a.Template) {
    946 				p.form = p.top()
    947 			}
    948 		case a.Li:
    949 			p.framesetOK = false
    950 			for i := len(p.oe) - 1; i >= 0; i-- {
    951 				node := p.oe[i]
    952 				switch node.DataAtom {
    953 				case a.Li:
    954 					p.oe = p.oe[:i]
    955 				case a.Address, a.Div, a.P:
    956 					continue
    957 				default:
    958 					if !isSpecialElement(node) {
    959 						continue
    960 					}
    961 				}
    962 				break
    963 			}
    964 			p.popUntil(buttonScope, a.P)
    965 			p.addElement()
    966 		case a.Dd, a.Dt:
    967 			p.framesetOK = false
    968 			for i := len(p.oe) - 1; i >= 0; i-- {
    969 				node := p.oe[i]
    970 				switch node.DataAtom {
    971 				case a.Dd, a.Dt:
    972 					p.oe = p.oe[:i]
    973 				case a.Address, a.Div, a.P:
    974 					continue
    975 				default:
    976 					if !isSpecialElement(node) {
    977 						continue
    978 					}
    979 				}
    980 				break
    981 			}
    982 			p.popUntil(buttonScope, a.P)
    983 			p.addElement()
    984 		case a.Plaintext:
    985 			p.popUntil(buttonScope, a.P)
    986 			p.addElement()
    987 		case a.Button:
    988 			p.popUntil(defaultScope, a.Button)
    989 			p.reconstructActiveFormattingElements()
    990 			p.addElement()
    991 			p.framesetOK = false
    992 		case a.A:
    993 			for i := len(p.afe) - 1; i >= 0 && p.afe[i].Type != scopeMarkerNode; i-- {
    994 				if n := p.afe[i]; n.Type == ElementNode && n.DataAtom == a.A {
    995 					p.inBodyEndTagFormatting(a.A, "a")
    996 					p.oe.remove(n)
    997 					p.afe.remove(n)
    998 					break
    999 				}
   1000 			}
   1001 			p.reconstructActiveFormattingElements()
   1002 			p.addFormattingElement()
   1003 		case a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
   1004 			p.reconstructActiveFormattingElements()
   1005 			p.addFormattingElement()
   1006 		case a.Nobr:
   1007 			p.reconstructActiveFormattingElements()
   1008 			if p.elementInScope(defaultScope, a.Nobr) {
   1009 				p.inBodyEndTagFormatting(a.Nobr, "nobr")
   1010 				p.reconstructActiveFormattingElements()
   1011 			}
   1012 			p.addFormattingElement()
   1013 		case a.Applet, a.Marquee, a.Object:
   1014 			p.reconstructActiveFormattingElements()
   1015 			p.addElement()
   1016 			p.afe = append(p.afe, &scopeMarker)
   1017 			p.framesetOK = false
   1018 		case a.Table:
   1019 			if !p.quirks {
   1020 				p.popUntil(buttonScope, a.P)
   1021 			}
   1022 			p.addElement()
   1023 			p.framesetOK = false
   1024 			p.im = inTableIM
   1025 			return true
   1026 		case a.Area, a.Br, a.Embed, a.Img, a.Input, a.Keygen, a.Wbr:
   1027 			p.reconstructActiveFormattingElements()
   1028 			p.addElement()
   1029 			p.oe.pop()
   1030 			p.acknowledgeSelfClosingTag()
   1031 			if p.tok.DataAtom == a.Input {
   1032 				for _, t := range p.tok.Attr {
   1033 					if t.Key == "type" {
   1034 						if strings.ToLower(t.Val) == "hidden" {
   1035 							// Skip setting framesetOK = false
   1036 							return true
   1037 						}
   1038 					}
   1039 				}
   1040 			}
   1041 			p.framesetOK = false
   1042 		case a.Param, a.Source, a.Track:
   1043 			p.addElement()
   1044 			p.oe.pop()
   1045 			p.acknowledgeSelfClosingTag()
   1046 		case a.Hr:
   1047 			p.popUntil(buttonScope, a.P)
   1048 			p.addElement()
   1049 			p.oe.pop()
   1050 			p.acknowledgeSelfClosingTag()
   1051 			p.framesetOK = false
   1052 		case a.Image:
   1053 			p.tok.DataAtom = a.Img
   1054 			p.tok.Data = a.Img.String()
   1055 			return false
   1056 		case a.Textarea:
   1057 			p.addElement()
   1058 			p.setOriginalIM()
   1059 			p.framesetOK = false
   1060 			p.im = textIM
   1061 		case a.Xmp:
   1062 			p.popUntil(buttonScope, a.P)
   1063 			p.reconstructActiveFormattingElements()
   1064 			p.framesetOK = false
   1065 			p.parseGenericRawTextElement()
   1066 		case a.Iframe:
   1067 			p.framesetOK = false
   1068 			p.parseGenericRawTextElement()
   1069 		case a.Noembed:
   1070 			p.parseGenericRawTextElement()
   1071 		case a.Noscript:
   1072 			if p.scripting {
   1073 				p.parseGenericRawTextElement()
   1074 				return true
   1075 			}
   1076 			p.reconstructActiveFormattingElements()
   1077 			p.addElement()
   1078 			// Don't let the tokenizer go into raw text mode when scripting is disabled.
   1079 			p.tokenizer.NextIsNotRawText()
   1080 		case a.Select:
   1081 			p.reconstructActiveFormattingElements()
   1082 			p.addElement()
   1083 			p.framesetOK = false
   1084 			p.im = inSelectIM
   1085 			return true
   1086 		case a.Optgroup, a.Option:
   1087 			if p.top().DataAtom == a.Option {
   1088 				p.oe.pop()
   1089 			}
   1090 			p.reconstructActiveFormattingElements()
   1091 			p.addElement()
   1092 		case a.Rb, a.Rtc:
   1093 			if p.elementInScope(defaultScope, a.Ruby) {
   1094 				p.generateImpliedEndTags()
   1095 			}
   1096 			p.addElement()
   1097 		case a.Rp, a.Rt:
   1098 			if p.elementInScope(defaultScope, a.Ruby) {
   1099 				p.generateImpliedEndTags("rtc")
   1100 			}
   1101 			p.addElement()
   1102 		case a.Math, a.Svg:
   1103 			p.reconstructActiveFormattingElements()
   1104 			if p.tok.DataAtom == a.Math {
   1105 				adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
   1106 			} else {
   1107 				adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
   1108 			}
   1109 			adjustForeignAttributes(p.tok.Attr)
   1110 			p.addElement()
   1111 			p.top().Namespace = p.tok.Data
   1112 			if p.hasSelfClosingToken {
   1113 				p.oe.pop()
   1114 				p.acknowledgeSelfClosingTag()
   1115 			}
   1116 			return true
   1117 		case a.Caption, a.Col, a.Colgroup, a.Frame, a.Head, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
   1118 			// Ignore the token.
   1119 		default:
   1120 			p.reconstructActiveFormattingElements()
   1121 			p.addElement()
   1122 		}
   1123 	case EndTagToken:
   1124 		switch p.tok.DataAtom {
   1125 		case a.Body:
   1126 			if p.elementInScope(defaultScope, a.Body) {
   1127 				p.im = afterBodyIM
   1128 			}
   1129 		case a.Html:
   1130 			if p.elementInScope(defaultScope, a.Body) {
   1131 				p.parseImpliedToken(EndTagToken, a.Body, a.Body.String())
   1132 				return false
   1133 			}
   1134 			return true
   1135 		case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dialog, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Main, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul:
   1136 			p.popUntil(defaultScope, p.tok.DataAtom)
   1137 		case a.Form:
   1138 			if p.oe.contains(a.Template) {
   1139 				i := p.indexOfElementInScope(defaultScope, a.Form)
   1140 				if i == -1 {
   1141 					// Ignore the token.
   1142 					return true
   1143 				}
   1144 				p.generateImpliedEndTags()
   1145 				if p.oe[i].DataAtom != a.Form {
   1146 					// Ignore the token.
   1147 					return true
   1148 				}
   1149 				p.popUntil(defaultScope, a.Form)
   1150 			} else {
   1151 				node := p.form
   1152 				p.form = nil
   1153 				i := p.indexOfElementInScope(defaultScope, a.Form)
   1154 				if node == nil || i == -1 || p.oe[i] != node {
   1155 					// Ignore the token.
   1156 					return true
   1157 				}
   1158 				p.generateImpliedEndTags()
   1159 				p.oe.remove(node)
   1160 			}
   1161 		case a.P:
   1162 			if !p.elementInScope(buttonScope, a.P) {
   1163 				p.parseImpliedToken(StartTagToken, a.P, a.P.String())
   1164 			}
   1165 			p.popUntil(buttonScope, a.P)
   1166 		case a.Li:
   1167 			p.popUntil(listItemScope, a.Li)
   1168 		case a.Dd, a.Dt:
   1169 			p.popUntil(defaultScope, p.tok.DataAtom)
   1170 		case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
   1171 			p.popUntil(defaultScope, a.H1, a.H2, a.H3, a.H4, a.H5, a.H6)
   1172 		case a.A, a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.Nobr, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
   1173 			p.inBodyEndTagFormatting(p.tok.DataAtom, p.tok.Data)
   1174 		case a.Applet, a.Marquee, a.Object:
   1175 			if p.popUntil(defaultScope, p.tok.DataAtom) {
   1176 				p.clearActiveFormattingElements()
   1177 			}
   1178 		case a.Br:
   1179 			p.tok.Type = StartTagToken
   1180 			return false
   1181 		case a.Template:
   1182 			return inHeadIM(p)
   1183 		default:
   1184 			p.inBodyEndTagOther(p.tok.DataAtom, p.tok.Data)
   1185 		}
   1186 	case CommentToken:
   1187 		p.addChild(&Node{
   1188 			Type: CommentNode,
   1189 			Data: p.tok.Data,
   1190 		})
   1191 	case ErrorToken:
   1192 		// TODO: remove this divergence from the HTML5 spec.
   1193 		if len(p.templateStack) > 0 {
   1194 			p.im = inTemplateIM
   1195 			return false
   1196 		}
   1197 		for _, e := range p.oe {
   1198 			switch e.DataAtom {
   1199 			case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc, a.Tbody, a.Td, a.Tfoot, a.Th,
   1200 				a.Thead, a.Tr, a.Body, a.Html:
   1201 			default:
   1202 				return true
   1203 			}
   1204 		}
   1205 	}
   1206 
   1207 	return true
   1208 }
   1209 
   1210 func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) {
   1211 	// This is the "adoption agency" algorithm, described at
   1212 	// https://html.spec.whatwg.org/multipage/syntax.html#adoptionAgency
   1213 
   1214 	// TODO: this is a fairly literal line-by-line translation of that algorithm.
   1215 	// Once the code successfully parses the comprehensive test suite, we should
   1216 	// refactor this code to be more idiomatic.
   1217 
   1218 	// Steps 1-2
   1219 	if current := p.oe.top(); current.Data == tagName && p.afe.index(current) == -1 {
   1220 		p.oe.pop()
   1221 		return
   1222 	}
   1223 
   1224 	// Steps 3-5. The outer loop.
   1225 	for i := 0; i < 8; i++ {
   1226 		// Step 6. Find the formatting element.
   1227 		var formattingElement *Node
   1228 		for j := len(p.afe) - 1; j >= 0; j-- {
   1229 			if p.afe[j].Type == scopeMarkerNode {
   1230 				break
   1231 			}
   1232 			if p.afe[j].DataAtom == tagAtom {
   1233 				formattingElement = p.afe[j]
   1234 				break
   1235 			}
   1236 		}
   1237 		if formattingElement == nil {
   1238 			p.inBodyEndTagOther(tagAtom, tagName)
   1239 			return
   1240 		}
   1241 
   1242 		// Step 7. Ignore the tag if formatting element is not in the stack of open elements.
   1243 		feIndex := p.oe.index(formattingElement)
   1244 		if feIndex == -1 {
   1245 			p.afe.remove(formattingElement)
   1246 			return
   1247 		}
   1248 		// Step 8. Ignore the tag if formatting element is not in the scope.
   1249 		if !p.elementInScope(defaultScope, tagAtom) {
   1250 			// Ignore the tag.
   1251 			return
   1252 		}
   1253 
   1254 		// Step 9. This step is omitted because it's just a parse error but no need to return.
   1255 
   1256 		// Steps 10-11. Find the furthest block.
   1257 		var furthestBlock *Node
   1258 		for _, e := range p.oe[feIndex:] {
   1259 			if isSpecialElement(e) {
   1260 				furthestBlock = e
   1261 				break
   1262 			}
   1263 		}
   1264 		if furthestBlock == nil {
   1265 			e := p.oe.pop()
   1266 			for e != formattingElement {
   1267 				e = p.oe.pop()
   1268 			}
   1269 			p.afe.remove(e)
   1270 			return
   1271 		}
   1272 
   1273 		// Steps 12-13. Find the common ancestor and bookmark node.
   1274 		commonAncestor := p.oe[feIndex-1]
   1275 		bookmark := p.afe.index(formattingElement)
   1276 
   1277 		// Step 14. The inner loop. Find the lastNode to reparent.
   1278 		lastNode := furthestBlock
   1279 		node := furthestBlock
   1280 		x := p.oe.index(node)
   1281 		// Step 14.1.
   1282 		j := 0
   1283 		for {
   1284 			// Step 14.2.
   1285 			j++
   1286 			// Step. 14.3.
   1287 			x--
   1288 			node = p.oe[x]
   1289 			// Step 14.4. Go to the next step if node is formatting element.
   1290 			if node == formattingElement {
   1291 				break
   1292 			}
   1293 			// Step 14.5. Remove node from the list of active formatting elements if
   1294 			// inner loop counter is greater than three and node is in the list of
   1295 			// active formatting elements.
   1296 			if ni := p.afe.index(node); j > 3 && ni > -1 {
   1297 				p.afe.remove(node)
   1298 				// If any element of the list of active formatting elements is removed,
   1299 				// we need to take care whether bookmark should be decremented or not.
   1300 				// This is because the value of bookmark may exceed the size of the
   1301 				// list by removing elements from the list.
   1302 				if ni <= bookmark {
   1303 					bookmark--
   1304 				}
   1305 				continue
   1306 			}
   1307 			// Step 14.6. Continue the next inner loop if node is not in the list of
   1308 			// active formatting elements.
   1309 			if p.afe.index(node) == -1 {
   1310 				p.oe.remove(node)
   1311 				continue
   1312 			}
   1313 			// Step 14.7.
   1314 			clone := node.clone()
   1315 			p.afe[p.afe.index(node)] = clone
   1316 			p.oe[p.oe.index(node)] = clone
   1317 			node = clone
   1318 			// Step 14.8.
   1319 			if lastNode == furthestBlock {
   1320 				bookmark = p.afe.index(node) + 1
   1321 			}
   1322 			// Step 14.9.
   1323 			if lastNode.Parent != nil {
   1324 				lastNode.Parent.RemoveChild(lastNode)
   1325 			}
   1326 			node.AppendChild(lastNode)
   1327 			// Step 14.10.
   1328 			lastNode = node
   1329 		}
   1330 
   1331 		// Step 15. Reparent lastNode to the common ancestor,
   1332 		// or for misnested table nodes, to the foster parent.
   1333 		if lastNode.Parent != nil {
   1334 			lastNode.Parent.RemoveChild(lastNode)
   1335 		}
   1336 		switch commonAncestor.DataAtom {
   1337 		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
   1338 			p.fosterParent(lastNode)
   1339 		default:
   1340 			commonAncestor.AppendChild(lastNode)
   1341 		}
   1342 
   1343 		// Steps 16-18. Reparent nodes from the furthest block's children
   1344 		// to a clone of the formatting element.
   1345 		clone := formattingElement.clone()
   1346 		reparentChildren(clone, furthestBlock)
   1347 		furthestBlock.AppendChild(clone)
   1348 
   1349 		// Step 19. Fix up the list of active formatting elements.
   1350 		if oldLoc := p.afe.index(formattingElement); oldLoc != -1 && oldLoc < bookmark {
   1351 			// Move the bookmark with the rest of the list.
   1352 			bookmark--
   1353 		}
   1354 		p.afe.remove(formattingElement)
   1355 		p.afe.insert(bookmark, clone)
   1356 
   1357 		// Step 20. Fix up the stack of open elements.
   1358 		p.oe.remove(formattingElement)
   1359 		p.oe.insert(p.oe.index(furthestBlock)+1, clone)
   1360 	}
   1361 }
   1362 
   1363 // inBodyEndTagOther performs the "any other end tag" algorithm for inBodyIM.
   1364 // "Any other end tag" handling from 12.2.6.5 The rules for parsing tokens in foreign content
   1365 // https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inforeign
   1366 func (p *parser) inBodyEndTagOther(tagAtom a.Atom, tagName string) {
   1367 	for i := len(p.oe) - 1; i >= 0; i-- {
   1368 		// Two element nodes have the same tag if they have the same Data (a
   1369 		// string-typed field). As an optimization, for common HTML tags, each
   1370 		// Data string is assigned a unique, non-zero DataAtom (a uint32-typed
   1371 		// field), since integer comparison is faster than string comparison.
   1372 		// Uncommon (custom) tags get a zero DataAtom.
   1373 		//
   1374 		// The if condition here is equivalent to (p.oe[i].Data == tagName).
   1375 		if (p.oe[i].DataAtom == tagAtom) &&
   1376 			((tagAtom != 0) || (p.oe[i].Data == tagName)) {
   1377 			p.oe = p.oe[:i]
   1378 			break
   1379 		}
   1380 		if isSpecialElement(p.oe[i]) {
   1381 			break
   1382 		}
   1383 	}
   1384 }
   1385 
   1386 // Section 12.2.6.4.8.
   1387 func textIM(p *parser) bool {
   1388 	switch p.tok.Type {
   1389 	case ErrorToken:
   1390 		p.oe.pop()
   1391 	case TextToken:
   1392 		d := p.tok.Data
   1393 		if n := p.oe.top(); n.DataAtom == a.Textarea && n.FirstChild == nil {
   1394 			// Ignore a newline at the start of a <textarea> block.
   1395 			if d != "" && d[0] == '\r' {
   1396 				d = d[1:]
   1397 			}
   1398 			if d != "" && d[0] == '\n' {
   1399 				d = d[1:]
   1400 			}
   1401 		}
   1402 		if d == "" {
   1403 			return true
   1404 		}
   1405 		p.addText(d)
   1406 		return true
   1407 	case EndTagToken:
   1408 		p.oe.pop()
   1409 	}
   1410 	p.im = p.originalIM
   1411 	p.originalIM = nil
   1412 	return p.tok.Type == EndTagToken
   1413 }
   1414 
   1415 // Section 12.2.6.4.9.
   1416 func inTableIM(p *parser) bool {
   1417 	switch p.tok.Type {
   1418 	case TextToken:
   1419 		p.tok.Data = strings.Replace(p.tok.Data, "\x00", "", -1)
   1420 		switch p.oe.top().DataAtom {
   1421 		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
   1422 			if strings.Trim(p.tok.Data, whitespace) == "" {
   1423 				p.addText(p.tok.Data)
   1424 				return true
   1425 			}
   1426 		}
   1427 	case StartTagToken:
   1428 		switch p.tok.DataAtom {
   1429 		case a.Caption:
   1430 			p.clearStackToContext(tableScope)
   1431 			p.afe = append(p.afe, &scopeMarker)
   1432 			p.addElement()
   1433 			p.im = inCaptionIM
   1434 			return true
   1435 		case a.Colgroup:
   1436 			p.clearStackToContext(tableScope)
   1437 			p.addElement()
   1438 			p.im = inColumnGroupIM
   1439 			return true
   1440 		case a.Col:
   1441 			p.parseImpliedToken(StartTagToken, a.Colgroup, a.Colgroup.String())
   1442 			return false
   1443 		case a.Tbody, a.Tfoot, a.Thead:
   1444 			p.clearStackToContext(tableScope)
   1445 			p.addElement()
   1446 			p.im = inTableBodyIM
   1447 			return true
   1448 		case a.Td, a.Th, a.Tr:
   1449 			p.parseImpliedToken(StartTagToken, a.Tbody, a.Tbody.String())
   1450 			return false
   1451 		case a.Table:
   1452 			if p.popUntil(tableScope, a.Table) {
   1453 				p.resetInsertionMode()
   1454 				return false
   1455 			}
   1456 			// Ignore the token.
   1457 			return true
   1458 		case a.Style, a.Script, a.Template:
   1459 			return inHeadIM(p)
   1460 		case a.Input:
   1461 			for _, t := range p.tok.Attr {
   1462 				if t.Key == "type" && strings.ToLower(t.Val) == "hidden" {
   1463 					p.addElement()
   1464 					p.oe.pop()
   1465 					return true
   1466 				}
   1467 			}
   1468 			// Otherwise drop down to the default action.
   1469 		case a.Form:
   1470 			if p.oe.contains(a.Template) || p.form != nil {
   1471 				// Ignore the token.
   1472 				return true
   1473 			}
   1474 			p.addElement()
   1475 			p.form = p.oe.pop()
   1476 		case a.Select:
   1477 			p.reconstructActiveFormattingElements()
   1478 			switch p.top().DataAtom {
   1479 			case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
   1480 				p.fosterParenting = true
   1481 			}
   1482 			p.addElement()
   1483 			p.fosterParenting = false
   1484 			p.framesetOK = false
   1485 			p.im = inSelectInTableIM
   1486 			return true
   1487 		}
   1488 	case EndTagToken:
   1489 		switch p.tok.DataAtom {
   1490 		case a.Table:
   1491 			if p.popUntil(tableScope, a.Table) {
   1492 				p.resetInsertionMode()
   1493 				return true
   1494 			}
   1495 			// Ignore the token.
   1496 			return true
   1497 		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
   1498 			// Ignore the token.
   1499 			return true
   1500 		case a.Template:
   1501 			return inHeadIM(p)
   1502 		}
   1503 	case CommentToken:
   1504 		p.addChild(&Node{
   1505 			Type: CommentNode,
   1506 			Data: p.tok.Data,
   1507 		})
   1508 		return true
   1509 	case DoctypeToken:
   1510 		// Ignore the token.
   1511 		return true
   1512 	case ErrorToken:
   1513 		return inBodyIM(p)
   1514 	}
   1515 
   1516 	p.fosterParenting = true
   1517 	defer func() { p.fosterParenting = false }()
   1518 
   1519 	return inBodyIM(p)
   1520 }
   1521 
   1522 // Section 12.2.6.4.11.
   1523 func inCaptionIM(p *parser) bool {
   1524 	switch p.tok.Type {
   1525 	case StartTagToken:
   1526 		switch p.tok.DataAtom {
   1527 		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Thead, a.Tr:
   1528 			if !p.popUntil(tableScope, a.Caption) {
   1529 				// Ignore the token.
   1530 				return true
   1531 			}
   1532 			p.clearActiveFormattingElements()
   1533 			p.im = inTableIM
   1534 			return false
   1535 		case a.Select:
   1536 			p.reconstructActiveFormattingElements()
   1537 			p.addElement()
   1538 			p.framesetOK = false
   1539 			p.im = inSelectInTableIM
   1540 			return true
   1541 		}
   1542 	case EndTagToken:
   1543 		switch p.tok.DataAtom {
   1544 		case a.Caption:
   1545 			if p.popUntil(tableScope, a.Caption) {
   1546 				p.clearActiveFormattingElements()
   1547 				p.im = inTableIM
   1548 			}
   1549 			return true
   1550 		case a.Table:
   1551 			if !p.popUntil(tableScope, a.Caption) {
   1552 				// Ignore the token.
   1553 				return true
   1554 			}
   1555 			p.clearActiveFormattingElements()
   1556 			p.im = inTableIM
   1557 			return false
   1558 		case a.Body, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
   1559 			// Ignore the token.
   1560 			return true
   1561 		}
   1562 	}
   1563 	return inBodyIM(p)
   1564 }
   1565 
   1566 // Section 12.2.6.4.12.
   1567 func inColumnGroupIM(p *parser) bool {
   1568 	switch p.tok.Type {
   1569 	case TextToken:
   1570 		s := strings.TrimLeft(p.tok.Data, whitespace)
   1571 		if len(s) < len(p.tok.Data) {
   1572 			// Add the initial whitespace to the current node.
   1573 			p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
   1574 			if s == "" {
   1575 				return true
   1576 			}
   1577 			p.tok.Data = s
   1578 		}
   1579 	case CommentToken:
   1580 		p.addChild(&Node{
   1581 			Type: CommentNode,
   1582 			Data: p.tok.Data,
   1583 		})
   1584 		return true
   1585 	case DoctypeToken:
   1586 		// Ignore the token.
   1587 		return true
   1588 	case StartTagToken:
   1589 		switch p.tok.DataAtom {
   1590 		case a.Html:
   1591 			return inBodyIM(p)
   1592 		case a.Col:
   1593 			p.addElement()
   1594 			p.oe.pop()
   1595 			p.acknowledgeSelfClosingTag()
   1596 			return true
   1597 		case a.Template:
   1598 			return inHeadIM(p)
   1599 		}
   1600 	case EndTagToken:
   1601 		switch p.tok.DataAtom {
   1602 		case a.Colgroup:
   1603 			if p.oe.top().DataAtom == a.Colgroup {
   1604 				p.oe.pop()
   1605 				p.im = inTableIM
   1606 			}
   1607 			return true
   1608 		case a.Col:
   1609 			// Ignore the token.
   1610 			return true
   1611 		case a.Template:
   1612 			return inHeadIM(p)
   1613 		}
   1614 	case ErrorToken:
   1615 		return inBodyIM(p)
   1616 	}
   1617 	if p.oe.top().DataAtom != a.Colgroup {
   1618 		return true
   1619 	}
   1620 	p.oe.pop()
   1621 	p.im = inTableIM
   1622 	return false
   1623 }
   1624 
   1625 // Section 12.2.6.4.13.
   1626 func inTableBodyIM(p *parser) bool {
   1627 	switch p.tok.Type {
   1628 	case StartTagToken:
   1629 		switch p.tok.DataAtom {
   1630 		case a.Tr:
   1631 			p.clearStackToContext(tableBodyScope)
   1632 			p.addElement()
   1633 			p.im = inRowIM
   1634 			return true
   1635 		case a.Td, a.Th:
   1636 			p.parseImpliedToken(StartTagToken, a.Tr, a.Tr.String())
   1637 			return false
   1638 		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead:
   1639 			if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
   1640 				p.im = inTableIM
   1641 				return false
   1642 			}
   1643 			// Ignore the token.
   1644 			return true
   1645 		}
   1646 	case EndTagToken:
   1647 		switch p.tok.DataAtom {
   1648 		case a.Tbody, a.Tfoot, a.Thead:
   1649 			if p.elementInScope(tableScope, p.tok.DataAtom) {
   1650 				p.clearStackToContext(tableBodyScope)
   1651 				p.oe.pop()
   1652 				p.im = inTableIM
   1653 			}
   1654 			return true
   1655 		case a.Table:
   1656 			if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
   1657 				p.im = inTableIM
   1658 				return false
   1659 			}
   1660 			// Ignore the token.
   1661 			return true
   1662 		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th, a.Tr:
   1663 			// Ignore the token.
   1664 			return true
   1665 		}
   1666 	case CommentToken:
   1667 		p.addChild(&Node{
   1668 			Type: CommentNode,
   1669 			Data: p.tok.Data,
   1670 		})
   1671 		return true
   1672 	}
   1673 
   1674 	return inTableIM(p)
   1675 }
   1676 
   1677 // Section 12.2.6.4.14.
   1678 func inRowIM(p *parser) bool {
   1679 	switch p.tok.Type {
   1680 	case StartTagToken:
   1681 		switch p.tok.DataAtom {
   1682 		case a.Td, a.Th:
   1683 			p.clearStackToContext(tableRowScope)
   1684 			p.addElement()
   1685 			p.afe = append(p.afe, &scopeMarker)
   1686 			p.im = inCellIM
   1687 			return true
   1688 		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead, a.Tr:
   1689 			if p.popUntil(tableScope, a.Tr) {
   1690 				p.im = inTableBodyIM
   1691 				return false
   1692 			}
   1693 			// Ignore the token.
   1694 			return true
   1695 		}
   1696 	case EndTagToken:
   1697 		switch p.tok.DataAtom {
   1698 		case a.Tr:
   1699 			if p.popUntil(tableScope, a.Tr) {
   1700 				p.im = inTableBodyIM
   1701 				return true
   1702 			}
   1703 			// Ignore the token.
   1704 			return true
   1705 		case a.Table:
   1706 			if p.popUntil(tableScope, a.Tr) {
   1707 				p.im = inTableBodyIM
   1708 				return false
   1709 			}
   1710 			// Ignore the token.
   1711 			return true
   1712 		case a.Tbody, a.Tfoot, a.Thead:
   1713 			if p.elementInScope(tableScope, p.tok.DataAtom) {
   1714 				p.parseImpliedToken(EndTagToken, a.Tr, a.Tr.String())
   1715 				return false
   1716 			}
   1717 			// Ignore the token.
   1718 			return true
   1719 		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th:
   1720 			// Ignore the token.
   1721 			return true
   1722 		}
   1723 	}
   1724 
   1725 	return inTableIM(p)
   1726 }
   1727 
   1728 // Section 12.2.6.4.15.
   1729 func inCellIM(p *parser) bool {
   1730 	switch p.tok.Type {
   1731 	case StartTagToken:
   1732 		switch p.tok.DataAtom {
   1733 		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
   1734 			if p.popUntil(tableScope, a.Td, a.Th) {
   1735 				// Close the cell and reprocess.
   1736 				p.clearActiveFormattingElements()
   1737 				p.im = inRowIM
   1738 				return false
   1739 			}
   1740 			// Ignore the token.
   1741 			return true
   1742 		case a.Select:
   1743 			p.reconstructActiveFormattingElements()
   1744 			p.addElement()
   1745 			p.framesetOK = false
   1746 			p.im = inSelectInTableIM
   1747 			return true
   1748 		}
   1749 	case EndTagToken:
   1750 		switch p.tok.DataAtom {
   1751 		case a.Td, a.Th:
   1752 			if !p.popUntil(tableScope, p.tok.DataAtom) {
   1753 				// Ignore the token.
   1754 				return true
   1755 			}
   1756 			p.clearActiveFormattingElements()
   1757 			p.im = inRowIM
   1758 			return true
   1759 		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html:
   1760 			// Ignore the token.
   1761 			return true
   1762 		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
   1763 			if !p.elementInScope(tableScope, p.tok.DataAtom) {
   1764 				// Ignore the token.
   1765 				return true
   1766 			}
   1767 			// Close the cell and reprocess.
   1768 			if p.popUntil(tableScope, a.Td, a.Th) {
   1769 				p.clearActiveFormattingElements()
   1770 			}
   1771 			p.im = inRowIM
   1772 			return false
   1773 		}
   1774 	}
   1775 	return inBodyIM(p)
   1776 }
   1777 
   1778 // Section 12.2.6.4.16.
   1779 func inSelectIM(p *parser) bool {
   1780 	switch p.tok.Type {
   1781 	case TextToken:
   1782 		p.addText(strings.Replace(p.tok.Data, "\x00", "", -1))
   1783 	case StartTagToken:
   1784 		switch p.tok.DataAtom {
   1785 		case a.Html:
   1786 			return inBodyIM(p)
   1787 		case a.Option:
   1788 			if p.top().DataAtom == a.Option {
   1789 				p.oe.pop()
   1790 			}
   1791 			p.addElement()
   1792 		case a.Optgroup:
   1793 			if p.top().DataAtom == a.Option {
   1794 				p.oe.pop()
   1795 			}
   1796 			if p.top().DataAtom == a.Optgroup {
   1797 				p.oe.pop()
   1798 			}
   1799 			p.addElement()
   1800 		case a.Select:
   1801 			if !p.popUntil(selectScope, a.Select) {
   1802 				// Ignore the token.
   1803 				return true
   1804 			}
   1805 			p.resetInsertionMode()
   1806 		case a.Input, a.Keygen, a.Textarea:
   1807 			if p.elementInScope(selectScope, a.Select) {
   1808 				p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())
   1809 				return false
   1810 			}
   1811 			// In order to properly ignore <textarea>, we need to change the tokenizer mode.
   1812 			p.tokenizer.NextIsNotRawText()
   1813 			// Ignore the token.
   1814 			return true
   1815 		case a.Script, a.Template:
   1816 			return inHeadIM(p)
   1817 		case a.Iframe, a.Noembed, a.Noframes, a.Noscript, a.Plaintext, a.Style, a.Title, a.Xmp:
   1818 			// Don't let the tokenizer go into raw text mode when there are raw tags
   1819 			// to be ignored. These tags should be ignored from the tokenizer
   1820 			// properly.
   1821 			p.tokenizer.NextIsNotRawText()
   1822 			// Ignore the token.
   1823 			return true
   1824 		}
   1825 	case EndTagToken:
   1826 		switch p.tok.DataAtom {
   1827 		case a.Option:
   1828 			if p.top().DataAtom == a.Option {
   1829 				p.oe.pop()
   1830 			}
   1831 		case a.Optgroup:
   1832 			i := len(p.oe) - 1
   1833 			if p.oe[i].DataAtom == a.Option {
   1834 				i--
   1835 			}
   1836 			if p.oe[i].DataAtom == a.Optgroup {
   1837 				p.oe = p.oe[:i]
   1838 			}
   1839 		case a.Select:
   1840 			if !p.popUntil(selectScope, a.Select) {
   1841 				// Ignore the token.
   1842 				return true
   1843 			}
   1844 			p.resetInsertionMode()
   1845 		case a.Template:
   1846 			return inHeadIM(p)
   1847 		}
   1848 	case CommentToken:
   1849 		p.addChild(&Node{
   1850 			Type: CommentNode,
   1851 			Data: p.tok.Data,
   1852 		})
   1853 	case DoctypeToken:
   1854 		// Ignore the token.
   1855 		return true
   1856 	case ErrorToken:
   1857 		return inBodyIM(p)
   1858 	}
   1859 
   1860 	return true
   1861 }
   1862 
   1863 // Section 12.2.6.4.17.
   1864 func inSelectInTableIM(p *parser) bool {
   1865 	switch p.tok.Type {
   1866 	case StartTagToken, EndTagToken:
   1867 		switch p.tok.DataAtom {
   1868 		case a.Caption, a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr, a.Td, a.Th:
   1869 			if p.tok.Type == EndTagToken && !p.elementInScope(tableScope, p.tok.DataAtom) {
   1870 				// Ignore the token.
   1871 				return true
   1872 			}
   1873 			// This is like p.popUntil(selectScope, a.Select), but it also
   1874 			// matches <math select>, not just <select>. Matching the MathML
   1875 			// tag is arguably incorrect (conceptually), but it mimics what
   1876 			// Chromium does.
   1877 			for i := len(p.oe) - 1; i >= 0; i-- {
   1878 				if n := p.oe[i]; n.DataAtom == a.Select {
   1879 					p.oe = p.oe[:i]
   1880 					break
   1881 				}
   1882 			}
   1883 			p.resetInsertionMode()
   1884 			return false
   1885 		}
   1886 	}
   1887 	return inSelectIM(p)
   1888 }
   1889 
   1890 // Section 12.2.6.4.18.
   1891 func inTemplateIM(p *parser) bool {
   1892 	switch p.tok.Type {
   1893 	case TextToken, CommentToken, DoctypeToken:
   1894 		return inBodyIM(p)
   1895 	case StartTagToken:
   1896 		switch p.tok.DataAtom {
   1897 		case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
   1898 			return inHeadIM(p)
   1899 		case a.Caption, a.Colgroup, a.Tbody, a.Tfoot, a.Thead:
   1900 			p.templateStack.pop()
   1901 			p.templateStack = append(p.templateStack, inTableIM)
   1902 			p.im = inTableIM
   1903 			return false
   1904 		case a.Col:
   1905 			p.templateStack.pop()
   1906 			p.templateStack = append(p.templateStack, inColumnGroupIM)
   1907 			p.im = inColumnGroupIM
   1908 			return false
   1909 		case a.Tr:
   1910 			p.templateStack.pop()
   1911 			p.templateStack = append(p.templateStack, inTableBodyIM)
   1912 			p.im = inTableBodyIM
   1913 			return false
   1914 		case a.Td, a.Th:
   1915 			p.templateStack.pop()
   1916 			p.templateStack = append(p.templateStack, inRowIM)
   1917 			p.im = inRowIM
   1918 			return false
   1919 		default:
   1920 			p.templateStack.pop()
   1921 			p.templateStack = append(p.templateStack, inBodyIM)
   1922 			p.im = inBodyIM
   1923 			return false
   1924 		}
   1925 	case EndTagToken:
   1926 		switch p.tok.DataAtom {
   1927 		case a.Template:
   1928 			return inHeadIM(p)
   1929 		default:
   1930 			// Ignore the token.
   1931 			return true
   1932 		}
   1933 	case ErrorToken:
   1934 		if !p.oe.contains(a.Template) {
   1935 			// Ignore the token.
   1936 			return true
   1937 		}
   1938 		// TODO: remove this divergence from the HTML5 spec.
   1939 		//
   1940 		// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
   1941 		p.generateImpliedEndTags()
   1942 		for i := len(p.oe) - 1; i >= 0; i-- {
   1943 			if n := p.oe[i]; n.Namespace == "" && n.DataAtom == a.Template {
   1944 				p.oe = p.oe[:i]
   1945 				break
   1946 			}
   1947 		}
   1948 		p.clearActiveFormattingElements()
   1949 		p.templateStack.pop()
   1950 		p.resetInsertionMode()
   1951 		return false
   1952 	}
   1953 	return false
   1954 }
   1955 
   1956 // Section 12.2.6.4.19.
   1957 func afterBodyIM(p *parser) bool {
   1958 	switch p.tok.Type {
   1959 	case ErrorToken:
   1960 		// Stop parsing.
   1961 		return true
   1962 	case TextToken:
   1963 		s := strings.TrimLeft(p.tok.Data, whitespace)
   1964 		if len(s) == 0 {
   1965 			// It was all whitespace.
   1966 			return inBodyIM(p)
   1967 		}
   1968 	case StartTagToken:
   1969 		if p.tok.DataAtom == a.Html {
   1970 			return inBodyIM(p)
   1971 		}
   1972 	case EndTagToken:
   1973 		if p.tok.DataAtom == a.Html {
   1974 			if !p.fragment {
   1975 				p.im = afterAfterBodyIM
   1976 			}
   1977 			return true
   1978 		}
   1979 	case CommentToken:
   1980 		// The comment is attached to the <html> element.
   1981 		if len(p.oe) < 1 || p.oe[0].DataAtom != a.Html {
   1982 			panic("html: bad parser state: <html> element not found, in the after-body insertion mode")
   1983 		}
   1984 		p.oe[0].AppendChild(&Node{
   1985 			Type: CommentNode,
   1986 			Data: p.tok.Data,
   1987 		})
   1988 		return true
   1989 	}
   1990 	p.im = inBodyIM
   1991 	return false
   1992 }
   1993 
   1994 // Section 12.2.6.4.20.
   1995 func inFramesetIM(p *parser) bool {
   1996 	switch p.tok.Type {
   1997 	case CommentToken:
   1998 		p.addChild(&Node{
   1999 			Type: CommentNode,
   2000 			Data: p.tok.Data,
   2001 		})
   2002 	case TextToken:
   2003 		// Ignore all text but whitespace.
   2004 		s := strings.Map(func(c rune) rune {
   2005 			switch c {
   2006 			case ' ', '\t', '\n', '\f', '\r':
   2007 				return c
   2008 			}
   2009 			return -1
   2010 		}, p.tok.Data)
   2011 		if s != "" {
   2012 			p.addText(s)
   2013 		}
   2014 	case StartTagToken:
   2015 		switch p.tok.DataAtom {
   2016 		case a.Html:
   2017 			return inBodyIM(p)
   2018 		case a.Frameset:
   2019 			p.addElement()
   2020 		case a.Frame:
   2021 			p.addElement()
   2022 			p.oe.pop()
   2023 			p.acknowledgeSelfClosingTag()
   2024 		case a.Noframes:
   2025 			return inHeadIM(p)
   2026 		}
   2027 	case EndTagToken:
   2028 		switch p.tok.DataAtom {
   2029 		case a.Frameset:
   2030 			if p.oe.top().DataAtom != a.Html {
   2031 				p.oe.pop()
   2032 				if p.oe.top().DataAtom != a.Frameset {
   2033 					p.im = afterFramesetIM
   2034 					return true
   2035 				}
   2036 			}
   2037 		}
   2038 	default:
   2039 		// Ignore the token.
   2040 	}
   2041 	return true
   2042 }
   2043 
   2044 // Section 12.2.6.4.21.
   2045 func afterFramesetIM(p *parser) bool {
   2046 	switch p.tok.Type {
   2047 	case CommentToken:
   2048 		p.addChild(&Node{
   2049 			Type: CommentNode,
   2050 			Data: p.tok.Data,
   2051 		})
   2052 	case TextToken:
   2053 		// Ignore all text but whitespace.
   2054 		s := strings.Map(func(c rune) rune {
   2055 			switch c {
   2056 			case ' ', '\t', '\n', '\f', '\r':
   2057 				return c
   2058 			}
   2059 			return -1
   2060 		}, p.tok.Data)
   2061 		if s != "" {
   2062 			p.addText(s)
   2063 		}
   2064 	case StartTagToken:
   2065 		switch p.tok.DataAtom {
   2066 		case a.Html:
   2067 			return inBodyIM(p)
   2068 		case a.Noframes:
   2069 			return inHeadIM(p)
   2070 		}
   2071 	case EndTagToken:
   2072 		switch p.tok.DataAtom {
   2073 		case a.Html:
   2074 			p.im = afterAfterFramesetIM
   2075 			return true
   2076 		}
   2077 	default:
   2078 		// Ignore the token.
   2079 	}
   2080 	return true
   2081 }
   2082 
   2083 // Section 12.2.6.4.22.
   2084 func afterAfterBodyIM(p *parser) bool {
   2085 	switch p.tok.Type {
   2086 	case ErrorToken:
   2087 		// Stop parsing.
   2088 		return true
   2089 	case TextToken:
   2090 		s := strings.TrimLeft(p.tok.Data, whitespace)
   2091 		if len(s) == 0 {
   2092 			// It was all whitespace.
   2093 			return inBodyIM(p)
   2094 		}
   2095 	case StartTagToken:
   2096 		if p.tok.DataAtom == a.Html {
   2097 			return inBodyIM(p)
   2098 		}
   2099 	case CommentToken:
   2100 		p.doc.AppendChild(&Node{
   2101 			Type: CommentNode,
   2102 			Data: p.tok.Data,
   2103 		})
   2104 		return true
   2105 	case DoctypeToken:
   2106 		return inBodyIM(p)
   2107 	}
   2108 	p.im = inBodyIM
   2109 	return false
   2110 }
   2111 
   2112 // Section 12.2.6.4.23.
   2113 func afterAfterFramesetIM(p *parser) bool {
   2114 	switch p.tok.Type {
   2115 	case CommentToken:
   2116 		p.doc.AppendChild(&Node{
   2117 			Type: CommentNode,
   2118 			Data: p.tok.Data,
   2119 		})
   2120 	case TextToken:
   2121 		// Ignore all text but whitespace.
   2122 		s := strings.Map(func(c rune) rune {
   2123 			switch c {
   2124 			case ' ', '\t', '\n', '\f', '\r':
   2125 				return c
   2126 			}
   2127 			return -1
   2128 		}, p.tok.Data)
   2129 		if s != "" {
   2130 			p.tok.Data = s
   2131 			return inBodyIM(p)
   2132 		}
   2133 	case StartTagToken:
   2134 		switch p.tok.DataAtom {
   2135 		case a.Html:
   2136 			return inBodyIM(p)
   2137 		case a.Noframes:
   2138 			return inHeadIM(p)
   2139 		}
   2140 	case DoctypeToken:
   2141 		return inBodyIM(p)
   2142 	default:
   2143 		// Ignore the token.
   2144 	}
   2145 	return true
   2146 }
   2147 
   2148 func ignoreTheRemainingTokens(p *parser) bool {
   2149 	return true
   2150 }
   2151 
   2152 const whitespaceOrNUL = whitespace + "\x00"
   2153 
   2154 // Section 12.2.6.5
   2155 func parseForeignContent(p *parser) bool {
   2156 	switch p.tok.Type {
   2157 	case TextToken:
   2158 		if p.framesetOK {
   2159 			p.framesetOK = strings.TrimLeft(p.tok.Data, whitespaceOrNUL) == ""
   2160 		}
   2161 		p.tok.Data = strings.Replace(p.tok.Data, "\x00", "\ufffd", -1)
   2162 		p.addText(p.tok.Data)
   2163 	case CommentToken:
   2164 		p.addChild(&Node{
   2165 			Type: CommentNode,
   2166 			Data: p.tok.Data,
   2167 		})
   2168 	case StartTagToken:
   2169 		if !p.fragment {
   2170 			b := breakout[p.tok.Data]
   2171 			if p.tok.DataAtom == a.Font {
   2172 			loop:
   2173 				for _, attr := range p.tok.Attr {
   2174 					switch attr.Key {
   2175 					case "color", "face", "size":
   2176 						b = true
   2177 						break loop
   2178 					}
   2179 				}
   2180 			}
   2181 			if b {
   2182 				for i := len(p.oe) - 1; i >= 0; i-- {
   2183 					n := p.oe[i]
   2184 					if n.Namespace == "" || htmlIntegrationPoint(n) || mathMLTextIntegrationPoint(n) {
   2185 						p.oe = p.oe[:i+1]
   2186 						break
   2187 					}
   2188 				}
   2189 				return false
   2190 			}
   2191 		}
   2192 		current := p.adjustedCurrentNode()
   2193 		switch current.Namespace {
   2194 		case "math":
   2195 			adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
   2196 		case "svg":
   2197 			// Adjust SVG tag names. The tokenizer lower-cases tag names, but
   2198 			// SVG wants e.g. "foreignObject" with a capital second "O".
   2199 			if x := svgTagNameAdjustments[p.tok.Data]; x != "" {
   2200 				p.tok.DataAtom = a.Lookup([]byte(x))
   2201 				p.tok.Data = x
   2202 			}
   2203 			adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
   2204 		default:
   2205 			panic("html: bad parser state: unexpected namespace")
   2206 		}
   2207 		adjustForeignAttributes(p.tok.Attr)
   2208 		namespace := current.Namespace
   2209 		p.addElement()
   2210 		p.top().Namespace = namespace
   2211 		if namespace != "" {
   2212 			// Don't let the tokenizer go into raw text mode in foreign content
   2213 			// (e.g. in an SVG <title> tag).
   2214 			p.tokenizer.NextIsNotRawText()
   2215 		}
   2216 		if p.hasSelfClosingToken {
   2217 			p.oe.pop()
   2218 			p.acknowledgeSelfClosingTag()
   2219 		}
   2220 	case EndTagToken:
   2221 		for i := len(p.oe) - 1; i >= 0; i-- {
   2222 			if p.oe[i].Namespace == "" {
   2223 				return p.im(p)
   2224 			}
   2225 			if strings.EqualFold(p.oe[i].Data, p.tok.Data) {
   2226 				p.oe = p.oe[:i]
   2227 				break
   2228 			}
   2229 		}
   2230 		return true
   2231 	default:
   2232 		// Ignore the token.
   2233 	}
   2234 	return true
   2235 }
   2236 
   2237 // Section 12.2.4.2.
   2238 func (p *parser) adjustedCurrentNode() *Node {
   2239 	if len(p.oe) == 1 && p.fragment && p.context != nil {
   2240 		return p.context
   2241 	}
   2242 	return p.oe.top()
   2243 }
   2244 
   2245 // Section 12.2.6.
   2246 func (p *parser) inForeignContent() bool {
   2247 	if len(p.oe) == 0 {
   2248 		return false
   2249 	}
   2250 	n := p.adjustedCurrentNode()
   2251 	if n.Namespace == "" {
   2252 		return false
   2253 	}
   2254 	if mathMLTextIntegrationPoint(n) {
   2255 		if p.tok.Type == StartTagToken && p.tok.DataAtom != a.Mglyph && p.tok.DataAtom != a.Malignmark {
   2256 			return false
   2257 		}
   2258 		if p.tok.Type == TextToken {
   2259 			return false
   2260 		}
   2261 	}
   2262 	if n.Namespace == "math" && n.DataAtom == a.AnnotationXml && p.tok.Type == StartTagToken && p.tok.DataAtom == a.Svg {
   2263 		return false
   2264 	}
   2265 	if htmlIntegrationPoint(n) && (p.tok.Type == StartTagToken || p.tok.Type == TextToken) {
   2266 		return false
   2267 	}
   2268 	if p.tok.Type == ErrorToken {
   2269 		return false
   2270 	}
   2271 	return true
   2272 }
   2273 
   2274 // parseImpliedToken parses a token as though it had appeared in the parser's
   2275 // input.
   2276 func (p *parser) parseImpliedToken(t TokenType, dataAtom a.Atom, data string) {
   2277 	realToken, selfClosing := p.tok, p.hasSelfClosingToken
   2278 	p.tok = Token{
   2279 		Type:     t,
   2280 		DataAtom: dataAtom,
   2281 		Data:     data,
   2282 	}
   2283 	p.hasSelfClosingToken = false
   2284 	p.parseCurrentToken()
   2285 	p.tok, p.hasSelfClosingToken = realToken, selfClosing
   2286 }
   2287 
   2288 // parseCurrentToken runs the current token through the parsing routines
   2289 // until it is consumed.
   2290 func (p *parser) parseCurrentToken() {
   2291 	if p.tok.Type == SelfClosingTagToken {
   2292 		p.hasSelfClosingToken = true
   2293 		p.tok.Type = StartTagToken
   2294 	}
   2295 
   2296 	consumed := false
   2297 	for !consumed {
   2298 		if p.inForeignContent() {
   2299 			consumed = parseForeignContent(p)
   2300 		} else {
   2301 			consumed = p.im(p)
   2302 		}
   2303 	}
   2304 
   2305 	if p.hasSelfClosingToken {
   2306 		// This is a parse error, but ignore it.
   2307 		p.hasSelfClosingToken = false
   2308 	}
   2309 }
   2310 
   2311 func (p *parser) parse() error {
   2312 	// Iterate until EOF. Any other error will cause an early return.
   2313 	var err error
   2314 	for err != io.EOF {
   2315 		// CDATA sections are allowed only in foreign content.
   2316 		n := p.oe.top()
   2317 		p.tokenizer.AllowCDATA(n != nil && n.Namespace != "")
   2318 		// Read and parse the next token.
   2319 		p.tokenizer.Next()
   2320 		p.tok = p.tokenizer.Token()
   2321 		if p.tok.Type == ErrorToken {
   2322 			err = p.tokenizer.Err()
   2323 			if err != nil && err != io.EOF {
   2324 				return err
   2325 			}
   2326 		}
   2327 		p.parseCurrentToken()
   2328 	}
   2329 	return nil
   2330 }
   2331 
   2332 // Parse returns the parse tree for the HTML from the given Reader.
   2333 //
   2334 // It implements the HTML5 parsing algorithm
   2335 // (https://html.spec.whatwg.org/multipage/syntax.html#tree-construction),
   2336 // which is very complicated. The resultant tree can contain implicitly created
   2337 // nodes that have no explicit <tag> listed in r's data, and nodes' parents can
   2338 // differ from the nesting implied by a naive processing of start and end
   2339 // <tag>s. Conversely, explicit <tag>s in r's data can be silently dropped,
   2340 // with no corresponding node in the resulting tree.
   2341 //
   2342 // The input is assumed to be UTF-8 encoded.
   2343 func Parse(r io.Reader) (*Node, error) {
   2344 	return ParseWithOptions(r)
   2345 }
   2346 
   2347 // ParseFragment parses a fragment of HTML and returns the nodes that were
   2348 // found. If the fragment is the InnerHTML for an existing element, pass that
   2349 // element in context.
   2350 //
   2351 // It has the same intricacies as Parse.
   2352 func ParseFragment(r io.Reader, context *Node) ([]*Node, error) {
   2353 	return ParseFragmentWithOptions(r, context)
   2354 }
   2355 
   2356 // ParseOption configures a parser.
   2357 type ParseOption func(p *parser)
   2358 
   2359 // ParseOptionEnableScripting configures the scripting flag.
   2360 // https://html.spec.whatwg.org/multipage/webappapis.html#enabling-and-disabling-scripting
   2361 //
   2362 // By default, scripting is enabled.
   2363 func ParseOptionEnableScripting(enable bool) ParseOption {
   2364 	return func(p *parser) {
   2365 		p.scripting = enable
   2366 	}
   2367 }
   2368 
   2369 // ParseWithOptions is like Parse, with options.
   2370 func ParseWithOptions(r io.Reader, opts ...ParseOption) (*Node, error) {
   2371 	p := &parser{
   2372 		tokenizer: NewTokenizer(r),
   2373 		doc: &Node{
   2374 			Type: DocumentNode,
   2375 		},
   2376 		scripting:  true,
   2377 		framesetOK: true,
   2378 		im:         initialIM,
   2379 	}
   2380 
   2381 	for _, f := range opts {
   2382 		f(p)
   2383 	}
   2384 
   2385 	if err := p.parse(); err != nil {
   2386 		return nil, err
   2387 	}
   2388 	return p.doc, nil
   2389 }
   2390 
   2391 // ParseFragmentWithOptions is like ParseFragment, with options.
   2392 func ParseFragmentWithOptions(r io.Reader, context *Node, opts ...ParseOption) ([]*Node, error) {
   2393 	contextTag := ""
   2394 	if context != nil {
   2395 		if context.Type != ElementNode {
   2396 			return nil, errors.New("html: ParseFragment of non-element Node")
   2397 		}
   2398 		// The next check isn't just context.DataAtom.String() == context.Data because
   2399 		// it is valid to pass an element whose tag isn't a known atom. For example,
   2400 		// DataAtom == 0 and Data = "tagfromthefuture" is perfectly consistent.
   2401 		if context.DataAtom != a.Lookup([]byte(context.Data)) {
   2402 			return nil, fmt.Errorf("html: inconsistent Node: DataAtom=%q, Data=%q", context.DataAtom, context.Data)
   2403 		}
   2404 		contextTag = context.DataAtom.String()
   2405 	}
   2406 	p := &parser{
   2407 		doc: &Node{
   2408 			Type: DocumentNode,
   2409 		},
   2410 		scripting: true,
   2411 		fragment:  true,
   2412 		context:   context,
   2413 	}
   2414 	if context != nil && context.Namespace != "" {
   2415 		p.tokenizer = NewTokenizer(r)
   2416 	} else {
   2417 		p.tokenizer = NewTokenizerFragment(r, contextTag)
   2418 	}
   2419 
   2420 	for _, f := range opts {
   2421 		f(p)
   2422 	}
   2423 
   2424 	root := &Node{
   2425 		Type:     ElementNode,
   2426 		DataAtom: a.Html,
   2427 		Data:     a.Html.String(),
   2428 	}
   2429 	p.doc.AppendChild(root)
   2430 	p.oe = nodeStack{root}
   2431 	if context != nil && context.DataAtom == a.Template {
   2432 		p.templateStack = append(p.templateStack, inTemplateIM)
   2433 	}
   2434 	p.resetInsertionMode()
   2435 
   2436 	for n := context; n != nil; n = n.Parent {
   2437 		if n.Type == ElementNode && n.DataAtom == a.Form {
   2438 			p.form = n
   2439 			break
   2440 		}
   2441 	}
   2442 
   2443 	if err := p.parse(); err != nil {
   2444 		return nil, err
   2445 	}
   2446 
   2447 	parent := p.doc
   2448 	if context != nil {
   2449 		parent = root
   2450 	}
   2451 
   2452 	var result []*Node
   2453 	for c := parent.FirstChild; c != nil; {
   2454 		next := c.NextSibling
   2455 		parent.RemoveChild(c)
   2456 		result = append(result, c)
   2457 		c = next
   2458 	}
   2459 	return result, nil
   2460 }