Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
73b68bc
+Added HtmlPolicyBuilder methods for excluding elements with specific…
Oct 9, 2015
e75d980
Reverted changes
Oct 9, 2015
ea02e71
added methods for excluding elements with specific empty or missing a…
Oct 9, 2015
2537933
Added TestCase for disallowWithoutAttribute()
Nov 9, 2015
e6dd2ea
s/master/main/ for default branch
mikesamuel Jun 15, 2020
f3f56d4
Release candidate 20200615.1
mikesamuel Jun 15, 2020
fd6b2dd
Bumped dev version
mikesamuel Jun 15, 2020
eb6ef02
Do not lcase element or attribute names that match SVG or MathML name…
mikesamuel Jul 13, 2020
25c3d64
Release candidate 20200713.1
mikesamuel Jul 13, 2020
ffe5cfa
Bumped dev version
mikesamuel Jul 13, 2020
c7db2d4
we use spotbugs now instead of findbugs
mikesamuel Jul 13, 2020
ca40697
s/master/main/ in doc URLs
mikesamuel Jul 13, 2020
af0ca83
Bump junit from 4.12 to 4.13.1 in /parent (#215)
dependabot[bot] Dec 7, 2020
acaf3f2
hsl and hsla (#216)
aakritisi Dec 9, 2020
33d319f
Fix code formatting lint checks (#217)
mikesamuel Dec 14, 2020
020d5d0
Fixed allowAtributes("style").globally() (#218)
aakritisi Dec 21, 2020
ad287c3
Upgrade to a modern guava dependency
mikesamuel May 13, 2021
be33ec6
Render style tag content more strictly.
mikesamuel Oct 18, 2021
374ea2f
Release candidate 20211018.1
mikesamuel Oct 18, 2021
7d76ba9
Bumped dev version
mikesamuel Oct 18, 2021
e2b29e8
Update vulnerabilities.md
mikesamuel Oct 18, 2021
14f84fd
Recognize that `<style>` is not really workable inside `<select>`
mikesamuel Oct 18, 2021
62a0715
Release candidate 20211018.2
mikesamuel Oct 18, 2021
06b299c
Bumped dev version
mikesamuel Oct 18, 2021
5372c74
Decode attribute content differently from text node content (#255)
mikesamuel Jun 8, 2022
c2c74fc
Fix missing null checks in uses of consumeIdentOrUrlOrFunctions (#266)
mikesamuel Jun 8, 2022
e35ef4f
Release candidate 20220608.1
mikesamuel Jun 8, 2022
3756979
Bumped dev version
mikesamuel Jun 8, 2022
0372f4f
Merge branch 'OWASP:master' into master
forum-is Nov 29, 2022
ccb4c18
Merge remote-tracking branch 'upstream/main'
forum-is Jan 5, 2023
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Decode attribute content differently from text node content (#255)
As described in issue #254 `&para` is a full complete character reference when decoding text node content, but not when decoding attribute content which causes problems for URL attribute values like /test?param1=foo&param2=bar As shown via JS test code in that issue, a small set of next characters prevent a character reference name match from being considered complete. This commit: - modifies the decode functions to take an extra parameter `boolean inAttribute`, and modifies the Trie traversal loops to not store a longest match so far based on that parameter and some next character tests - modifies the HTML lexer to pass that attribute appropriately - for backwards compat, leaves the old APIs in place but `@deprecated` - adds unit tests for the decode functions - adds a unit test for the specific input from the issue This change should make us more conformant with observed browser behaviour so is not expected to cause compatibility problems for existing users. Fixes #254
  • Loading branch information
mikesamuel authored Jun 8, 2022
commit 5372c747173f312b9342fac2ea6bc7445d3abeee
15 changes: 14 additions & 1 deletion src/main/java/org/owasp/html/Encoding.java
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,21 @@ public final class Encoding {
*
* @param s text/html
* @return text/plain
* @deprecated specify whether s is in an attribute value
*/
public static String decodeHtml(String s) {
return decodeHtml(s, false);
}

/**
* Decodes HTML entities to produce a string containing only valid
* Unicode scalar values.
*
* @param s text/html
* @param inAttribute is s in an attribute value?
* @return text/plain
*/
public static String decodeHtml(String s, boolean inAttribute) {
int firstAmp = s.indexOf('&');
int safeLimit = longestPrefixOfGoodCodeunits(s);
if ((firstAmp & safeLimit) < 0) { return s; }
Expand All @@ -55,7 +68,7 @@ public static String decodeHtml(String s) {
int amp = firstAmp;
while (amp >= 0) {
sb.append(s, pos, amp);
int end = HtmlEntities.appendDecodedEntity(s, amp, n, sb);
int end = HtmlEntities.appendDecodedEntity(s, amp, n, inAttribute, sb);
pos = end;
amp = s.indexOf('&', end);
}
Expand Down
50 changes: 46 additions & 4 deletions src/main/java/org/owasp/html/HtmlEntities.java
Original file line number Diff line number Diff line change
Expand Up @@ -2307,9 +2307,26 @@ final class HtmlEntities {
* in {@code html}.
* @param sb string builder to append to.
* @return The offset after the end of the decoded sequence in {@code html}.
* @deprecated specify whether html is in an attribute value.
*/
public static int appendDecodedEntity(
String html, int offset, int limit, StringBuilder sb) {
String html, int offset, int limit, StringBuilder sb) {
return appendDecodedEntity(html, offset, limit, false, sb);
}

/**
* Decodes any HTML entity at the given location and appends it to a string
* builder. This handles both named and numeric entities.
*
* @param html HTML text.
* @param offset the position of the sequence to decode in {@code html}.
* @param limit the last position that could be part of the sequence to decode
* in {@code html}.
* @param sb string builder to append to.
* @return The offset after the end of the decoded sequence in {@code html}.
*/
public static int appendDecodedEntity(
String html, int offset, int limit, boolean inAttribute, StringBuilder sb) {
char ch = html.charAt(offset);
if ('&' != ch) {
sb.append(ch);
Expand Down Expand Up @@ -2422,19 +2439,20 @@ public static int appendDecodedEntity(
char nameChar = html.charAt(i);
t = t.lookup(nameChar);
if (t == null) { break; }
if (t.isTerminal()) {
if (t.isTerminal() && mayComplete(inAttribute, html, i, limit)) {
longestDecode = t;
tail = i + 1;
}
}
// Try again, case insensitively.
if (longestDecode == null) {
t = ENTITY_TRIE;
for (int i = offset + 1; i < limit; ++i) {
char nameChar = html.charAt(i);
if ('Z' >= nameChar && nameChar >= 'A') { nameChar |= 32; }
t = t.lookup(nameChar);
if (t == null) { break; }
if (t.isTerminal()) {
if (t.isTerminal() && mayComplete(inAttribute, html, i, limit)) {
longestDecode = t;
tail = i + 1;
}
Expand All @@ -2456,11 +2474,35 @@ public static int appendDecodedEntity(

private static boolean isHtmlIdContinueChar(char ch) {
int chLower = ch | 32;
return ('0' <= chLower && chLower <= '9')
return ('0' <= ch && ch <= '9')
|| ('a' <= chLower && chLower <= 'z')
|| ('-' == ch);
}

/** True if the character at i in html may complete a named character reference */
private static boolean mayComplete(boolean inAttribute, String html, int i, int limit) {
if (inAttribute && html.charAt(i) != ';' && i + 1 < limit) {
// See if the next character blocks treating this as a full match.
// This avoids problems like "&para" being treated as a decoding in
// <a href="?foo&param=1">
if (continuesCharacterReferenceName(html.charAt(i + 1))) {
return false;
}
}
return true;
}

/**
* @see <a href="https://github.com/OWASP/java-html-sanitizer/issues/254#issuecomment-1080864368"
* >comments in issue 254</a>
*/
private static boolean continuesCharacterReferenceName(char ch) {
int chLower = ch | 32;
return ('0' <= ch && ch <= '9')
|| ('a' <= chLower && chLower <= 'z')
|| (ch == '=');
}

// /** A possible entity name like "amp" or "gt". */
// public static boolean isEntityName(String name) {
// Trie t = ENTITY_TRIE;
Expand Down
7 changes: 4 additions & 3 deletions src/main/java/org/owasp/html/HtmlSanitizer.java
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ public static void sanitize(
switch (token.type) {
case TEXT:
receiver.text(
Encoding.decodeHtml(htmlContent.substring(token.start, token.end)));
Encoding.decodeHtml(htmlContent.substring(token.start, token.end), false));
break;
case UNESCAPED:
receiver.text(Encoding.stripBannedCodeunits(
Expand Down Expand Up @@ -177,8 +177,9 @@ public static void sanitize(
htmlContent.substring(tagBodyToken.start, tagBodyToken.end)));
break;
case ATTRVALUE:
attrs.add(Encoding.decodeHtml(stripQuotes(
htmlContent.substring(tagBodyToken.start, tagBodyToken.end))));
String attributeContentRaw =
stripQuotes(htmlContent.substring(tagBodyToken.start, tagBodyToken.end));
attrs.add(Encoding.decodeHtml(attributeContentRaw, true));
attrsReadyForName = true;
break;
case TAGEND:
Expand Down
Loading