Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -476,6 +476,8 @@ private static enum State {
COMMENT,
COMMENT_DASH,
COMMENT_DASH_DASH,
COMMENT_DASH_DASH_BANG,
COMMENT_DASH_AFTER_BANG,
DIRECTIVE,
DONE,
BOGUS_COMMENT,
Expand Down Expand Up @@ -640,20 +642,35 @@ && canonicalElementName(start + 2, end)
case BANG:
if ('-' == ch) {
state = State.BANG_DASH;
} else if('>' == ch) { // <!> is a valid html comment
state = State.DONE;
type = HtmlTokenType.COMMENT;
} else {
state = State.DIRECTIVE;
}
break;
case BANG_DASH:
if ('-' == ch) {
state = State.COMMENT;
state = State.COMMENT_DASH_AFTER_BANG;
} else {
state = State.DIRECTIVE;
}
break;
case COMMENT_DASH_AFTER_BANG:
if ('>' == ch) { // <!--> is a valid html comment
state = State.DONE;
type = HtmlTokenType.COMMENT;
} else if ('-' == ch) { // <!---> is a valid html comment
state = State.COMMENT_DASH_AFTER_BANG;
} else {
state = State.COMMENT;
}
break;
case COMMENT:
if ('-' == ch) {
state = State.COMMENT_DASH;
} else {
state = State.COMMENT;
}
break;
case COMMENT_DASH:
Expand All @@ -665,12 +682,24 @@ && canonicalElementName(start + 2, end)
if ('>' == ch) {
state = State.DONE;
type = HtmlTokenType.COMMENT;
} else if ('!' == ch) { // --!> is also valid closing sequence
state = State.COMMENT_DASH_DASH_BANG;
} else if ('-' == ch) {
state = State.COMMENT_DASH_DASH;
} else {
state = State.COMMENT_DASH;
}
break;
case COMMENT_DASH_DASH_BANG:
if ('>' == ch) {
state = State.DONE;
type = HtmlTokenType.COMMENT;
}else if ('-' == ch) {
state = State.COMMENT_DASH;
}else {
state = State.COMMENT;
}
break;
case DIRECTIVE:
if ('>' == ch) {
type = HtmlTokenType.DIRECTIVE;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,65 @@ public static final void testShortTags() {
"TAGEND: >");
}

@Test
public static final void testCommentDeclarationWith0CommentsAndXss() throws Exception
{
//check https://datatracker.ietf.org/doc/html/rfc1866#section-3.2.5
assertTokens("<!><img src=1 onError=alert(\"nice\")>",
"COMMENT: <!>",
"TAGBEGIN: <img",
"ATTRNAME: src",
"ATTRVALUE: 1",
"ATTRNAME: onError",
"ATTRVALUE: alert(\"nice\")",
"TAGEND: >"
);
}

@Test
public static final void testTextEndingWithTagOpenAndBang() throws Exception
{
//taken from https://html.spec.whatwg.org/#comments
assertTokens("<!--My favorite operators are > and <!--><a></a>",
"COMMENT: <!--My favorite operators are > and <!-->",
"TAGBEGIN: <a",
"TAGEND: >",
"TAGBEGIN: </a",
"TAGEND: >"
);
}


public static final void testDashDashBangComment() throws Exception
{
assertTokens("<!-- --!-->",
"COMMENT: <!-- --!-->"
);
}
@Test
public static final void testAbruptClosingOfEmptyComment() throws Exception
{
assertTokens("<!--><img>a<!--->b<!->c",
"COMMENT: <!-->",
"TAGBEGIN: <img",
"TAGEND: >",
"TEXT: a",
"COMMENT: <!--->",
"TEXT: b",
"SERVERCODE: <!->c"
);
}

@Test
public static final void testIncorrectlyClosedComment() throws Exception
{
assertTokens("<!-- Comment --!><img>",
"COMMENT: <!-- Comment --!>",
"TAGBEGIN: <img",
"TAGEND: >"
);
}

private static void lex(String input, Appendable out) throws Exception {
HtmlLexer lexer = new HtmlLexer(input);
int maxTypeLength = 0;
Expand Down