@@ -136,7 +136,7 @@ func (p *parser) indexOfElementInScope(s scope, matchTags ...a.Atom) int {
136
136
return - 1
137
137
}
138
138
default :
139
- panic ("unreachable" )
139
+ panic (fmt . Sprintf ( "html: internal error: indexOfElementInScope unknown scope: %d" , s ) )
140
140
}
141
141
}
142
142
switch s {
@@ -179,7 +179,7 @@ func (p *parser) clearStackToContext(s scope) {
179
179
return
180
180
}
181
181
default :
182
- panic ("unreachable" )
182
+ panic (fmt . Sprintf ( "html: internal error: clearStackToContext unknown scope: %d" , s ) )
183
183
}
184
184
}
185
185
}
@@ -231,7 +231,14 @@ func (p *parser) addChild(n *Node) {
231
231
}
232
232
233
233
if n .Type == ElementNode {
234
- p .oe = append (p .oe , n )
234
+ p .insertOpenElement (n )
235
+ }
236
+ }
237
+
238
+ func (p * parser ) insertOpenElement (n * Node ) {
239
+ p .oe = append (p .oe , n )
240
+ if len (p .oe ) > 512 {
241
+ panic ("html: open stack of elements exceeds 512 nodes" )
235
242
}
236
243
}
237
244
@@ -810,7 +817,7 @@ func afterHeadIM(p *parser) bool {
810
817
p .im = inFramesetIM
811
818
return true
812
819
case a .Base , a .Basefont , a .Bgsound , a .Link , a .Meta , a .Noframes , a .Script , a .Style , a .Template , a .Title :
813
- p .oe = append ( p . oe , p .head )
820
+ p .insertOpenElement ( p .head )
814
821
defer p .oe .remove (p .head )
815
822
return inHeadIM (p )
816
823
case a .Head :
@@ -1678,7 +1685,7 @@ func inTableBodyIM(p *parser) bool {
1678
1685
return inTableIM (p )
1679
1686
}
1680
1687
1681
- // Section 12 .2.6.4.14.
1688
+ // Section 13 .2.6.4.14.
1682
1689
func inRowIM (p * parser ) bool {
1683
1690
switch p .tok .Type {
1684
1691
case StartTagToken :
@@ -1690,7 +1697,9 @@ func inRowIM(p *parser) bool {
1690
1697
p .im = inCellIM
1691
1698
return true
1692
1699
case a .Caption , a .Col , a .Colgroup , a .Tbody , a .Tfoot , a .Thead , a .Tr :
1693
- if p .popUntil (tableScope , a .Tr ) {
1700
+ if p .elementInScope (tableScope , a .Tr ) {
1701
+ p .clearStackToContext (tableRowScope )
1702
+ p .oe .pop ()
1694
1703
p .im = inTableBodyIM
1695
1704
return false
1696
1705
}
@@ -1700,22 +1709,28 @@ func inRowIM(p *parser) bool {
1700
1709
case EndTagToken :
1701
1710
switch p .tok .DataAtom {
1702
1711
case a .Tr :
1703
- if p .popUntil (tableScope , a .Tr ) {
1712
+ if p .elementInScope (tableScope , a .Tr ) {
1713
+ p .clearStackToContext (tableRowScope )
1714
+ p .oe .pop ()
1704
1715
p .im = inTableBodyIM
1705
1716
return true
1706
1717
}
1707
1718
// Ignore the token.
1708
1719
return true
1709
1720
case a .Table :
1710
- if p .popUntil (tableScope , a .Tr ) {
1721
+ if p .elementInScope (tableScope , a .Tr ) {
1722
+ p .clearStackToContext (tableRowScope )
1723
+ p .oe .pop ()
1711
1724
p .im = inTableBodyIM
1712
1725
return false
1713
1726
}
1714
1727
// Ignore the token.
1715
1728
return true
1716
1729
case a .Tbody , a .Tfoot , a .Thead :
1717
- if p .elementInScope (tableScope , p .tok .DataAtom ) {
1718
- p .parseImpliedToken (EndTagToken , a .Tr , a .Tr .String ())
1730
+ if p .elementInScope (tableScope , p .tok .DataAtom ) && p .elementInScope (tableScope , a .Tr ) {
1731
+ p .clearStackToContext (tableRowScope )
1732
+ p .oe .pop ()
1733
+ p .im = inTableBodyIM
1719
1734
return false
1720
1735
}
1721
1736
// Ignore the token.
@@ -2222,16 +2237,20 @@ func parseForeignContent(p *parser) bool {
2222
2237
p .acknowledgeSelfClosingTag ()
2223
2238
}
2224
2239
case EndTagToken :
2240
+ if strings .EqualFold (p .oe [len (p .oe )- 1 ].Data , p .tok .Data ) {
2241
+ p .oe = p .oe [:len (p .oe )- 1 ]
2242
+ return true
2243
+ }
2225
2244
for i := len (p .oe ) - 1 ; i >= 0 ; i -- {
2226
- if p .oe [i ].Namespace == "" {
2227
- return p .im (p )
2228
- }
2229
2245
if strings .EqualFold (p .oe [i ].Data , p .tok .Data ) {
2230
2246
p .oe = p .oe [:i ]
2247
+ return true
2248
+ }
2249
+ if i > 0 && p .oe [i - 1 ].Namespace == "" {
2231
2250
break
2232
2251
}
2233
2252
}
2234
- return true
2253
+ return p . im ( p )
2235
2254
default :
2236
2255
// Ignore the token.
2237
2256
}
@@ -2312,9 +2331,13 @@ func (p *parser) parseCurrentToken() {
2312
2331
}
2313
2332
}
2314
2333
2315
- func (p * parser ) parse () error {
2334
+ func (p * parser ) parse () (err error ) {
2335
+ defer func () {
2336
+ if panicErr := recover (); panicErr != nil {
2337
+ err = fmt .Errorf ("%s" , panicErr )
2338
+ }
2339
+ }()
2316
2340
// Iterate until EOF. Any other error will cause an early return.
2317
- var err error
2318
2341
for err != io .EOF {
2319
2342
// CDATA sections are allowed only in foreign content.
2320
2343
n := p .oe .top ()
@@ -2343,6 +2366,8 @@ func (p *parser) parse() error {
2343
2366
// <tag>s. Conversely, explicit <tag>s in r's data can be silently dropped,
2344
2367
// with no corresponding node in the resulting tree.
2345
2368
//
2369
+ // Parse will reject HTML that is nested deeper than 512 elements.
2370
+ //
2346
2371
// The input is assumed to be UTF-8 encoded.
2347
2372
func Parse (r io.Reader ) (* Node , error ) {
2348
2373
return ParseWithOptions (r )
0 commit comments