Skip to content

Commit c2c74fc

Browse files
authored
Fix missing null checks in uses of consumeIdentOrUrlOrFunctions (OWASP#266)
CssTokens code assumed that consumeIdentOrUrlOrFunctions always returned a token type and consumed characters. This commit audits all uses of that function and checks that they make progress.
1 parent 5372c74 commit c2c74fc

File tree

2 files changed

+34
-5
lines changed

2 files changed

+34
-5
lines changed

src/main/java/org/owasp/html/CssTokens.java

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -453,7 +453,7 @@ void lex() {
453453
char ch = css.charAt(pos);
454454
int startOfToken = pos;
455455
int startOfOutputToken = sb.length();
456-
final TokenType type;
456+
TokenType type;
457457
switch (ch) {
458458
case '\t': case '\n': case '\f': case '\r': case ' ': case '\ufeff':
459459
consumeIgnorable();
@@ -514,6 +514,7 @@ void lex() {
514514
type = TokenType.UNICODE_RANGE;
515515
} else {
516516
type = consumeIdentOrUrlOrFunction();
517+
assert type != null;
517518
}
518519
break;
519520
case '0': case '1': case '2': case '3': case '4':
@@ -533,7 +534,14 @@ && isDecimal(css.charAt(pos + 2)))) {
533534
if (consumeIgnorable()) { // -->
534535
type = TokenType.WHITESPACE;
535536
} else {
536-
type = consumeIdentOrUrlOrFunction();
537+
TokenType identType = consumeIdentOrUrlOrFunction();
538+
if (identType == null) {
539+
breakOutput();
540+
consumeDelim(ch);
541+
type = TokenType.DELIM;
542+
} else {
543+
type = identType;
544+
}
537545
}
538546
} else if (isIdentPart(lookahead)) {
539547
// treat ".<IDENT>" as one token.
@@ -589,9 +597,17 @@ && isDecimal(css.charAt(pos + 2)))) {
589597
}
590598
break;
591599
}
592-
case '_':
593-
type = consumeIdentOrUrlOrFunction();
600+
case '_': {
601+
TokenType identType = consumeIdentOrUrlOrFunction();
602+
if (identType != null) {
603+
type = identType;
604+
} else {
605+
++pos; // drop
606+
breakOutput();
607+
type = TokenType.WHITESPACE;
608+
}
594609
break;
610+
}
595611
case '\\': {
596612
// Optimistically parse as an ident.
597613
TokenType identType = consumeIdentOrUrlOrFunction();
@@ -624,7 +640,13 @@ && isDecimal(css.charAt(pos + 2)))) {
624640
type = TokenType.WHITESPACE;
625641
}
626642
}
627-
assert pos > startOfToken
643+
// Make progress even in the face of errors above.
644+
if (type == null && pos == startOfToken) {
645+
type = TokenType.WHITESPACE;
646+
breakOutput();
647+
++pos;
648+
}
649+
assert type != null && pos > startOfToken
628650
: "empty token at " + pos + ", ch0=" + css.charAt(startOfToken)
629651
+ ":U+" + Integer.toHexString(css.charAt(startOfToken));
630652
int endOfOutputToken = sb.length();

src/test/java/org/owasp/html/HtmlSanitizerTest.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -447,6 +447,13 @@ public static final void testIssue254SemicolonlessNamedCharactersInUrls() {
447447
assertEquals(want, sanitize(input));
448448
}
449449

450+
@Test
451+
public static final void testStylingCornerCase() {
452+
String input = "<a style=\\006-\\000038";
453+
String want = "";
454+
assertEquals(want, sanitize(input));
455+
}
456+
450457
private static String sanitize(@Nullable String html) {
451458
StringBuilder sb = new StringBuilder();
452459
HtmlStreamRenderer renderer = HtmlStreamRenderer.create(

0 commit comments

Comments
 (0)