@@ -99,33 +99,41 @@ public interface Policy extends HtmlStreamEventReceiver {
9999 * {@link HtmlStreamRenderer} after filtering.
100100 * {@link HtmlPolicyBuilder} provides an easy way to create policies.
101101 */
102- public static void sanitize (@ Nullable String html , final Policy policy ) {
103- if (html == null ) { html = "" ; }
104-
105- TagBalancingHtmlStreamEventReceiver balancer
106- = new TagBalancingHtmlStreamEventReceiver (policy );
107-
108- // According to Opera the maximum table nesting depth seen in the wild is
109- // 795, but 99.99% of documents have a table nesting depth of less than 22.
110- // Since each table has a nesting depth of 4 (incl. TBODY), this leads to a
111- // document depth of 90 (incl. HTML & BODY).
112- // Obviously table nesting depth is not the same as whole document depth,
113- // but it is the best proxy I have available.
114- // See http://devfiles.myopera.com/articles/590/maxtabledepth-url.htm for
115- // the original data.
102+ public static void sanitize (
103+ @ Nullable String html , final Policy policy ) {
104+ sanitize (html , policy , HtmlStreamEventProcessor .Processors .IDENTITY );
105+ }
116106
117- // Webkit defines the maximum HTML parser tree depth as 512.
118- // http://trac.webkit.org/browser/trunk/Source/WebCore/page/Settings.h#L408
119- // static const unsigned defaultMaximumHTMLParserDOMTreeDepth = 512;
107+ /**
108+ * Sanitizes the given HTML by applying the given policy to it.
109+ *
110+ * <p>
111+ * This method is not in the TCB.
112+ *
113+ * <p>
114+ * This method has no return value since policies are assumed to render things
115+ * they accept and do nothing on things they reject.
116+ * Use {@link HtmlStreamRenderer} to render content to an output buffer.
117+ *
118+ * @param html A snippet of HTML to sanitize. {@code null} is treated as the
119+ * empty string and will not result in a {@code NullPointerException}.
120+ * @param policy The Policy that will receive events based on the tokens in
121+ * HTML. Typically, this policy ends up routing the events to an
122+ * {@link HtmlStreamRenderer} after filtering.
123+ * {@link HtmlPolicyBuilder} provides an easy way to create policies.
124+ * @param preprocessor A processor that may wrap the policy to reinterpret
125+ * parse events.
126+ * Since the policy encapsulates its output buffer, this is not in the
127+ * policy's TCB.
128+ */
129+ public static void sanitize (
130+ @ Nullable String html , final Policy policy ,
131+ HtmlStreamEventProcessor preprocessor ) {
132+ if (html == null ) { html = "" ; }
120133
121- // The first number gives us a lower bound on the nesting depth we allow,
122- // 90, and the second gives us an upper bound: 512.
123- // We do not want to bump right up against that limit.
124- // 256 is substantially larger than the lower bound and well clear of the
125- // upper bound.
126- balancer .setNestingLimit (256 );
134+ HtmlStreamEventReceiver receiver = initializePolicy (policy , preprocessor );
127135
128- balancer .openDocument ();
136+ receiver .openDocument ();
129137
130138 HtmlLexer lexer = new HtmlLexer (html );
131139 // Use a linked list so that policies can use Iterator.remove() in an O(1)
@@ -135,16 +143,16 @@ public static void sanitize(@Nullable String html, final Policy policy) {
135143 HtmlToken token = lexer .next ();
136144 switch (token .type ) {
137145 case TEXT :
138- balancer .text (
146+ receiver .text (
139147 Encoding .decodeHtml (html .substring (token .start , token .end )));
140148 break ;
141149 case UNESCAPED :
142- balancer .text (Encoding .stripBannedCodeunits (
150+ receiver .text (Encoding .stripBannedCodeunits (
143151 html .substring (token .start , token .end )));
144152 break ;
145153 case TAGBEGIN :
146154 if (html .charAt (token .start + 1 ) == '/' ) { // A close tag.
147- balancer .closeTag (HtmlLexer .canonicalName (
155+ receiver .closeTag (HtmlLexer .canonicalName (
148156 html .substring (token .start + 2 , token .end )));
149157 while (lexer .hasNext ()
150158 && lexer .next ().type != HtmlTokenType .TAGEND ) {
@@ -182,7 +190,7 @@ public static void sanitize(@Nullable String html, final Policy policy) {
182190 if (!attrsReadyForName ) {
183191 attrs .add (attrs .getLast ());
184192 }
185- balancer .openTag (
193+ receiver .openTag (
186194 HtmlLexer .canonicalName (
187195 html .substring (token .start + 1 , token .end )),
188196 attrs );
@@ -195,7 +203,7 @@ public static void sanitize(@Nullable String html, final Policy policy) {
195203 }
196204 }
197205
198- balancer .closeDocument ();
206+ receiver .closeDocument ();
199207 }
200208
201209 private static String stripQuotes (String encodedAttributeValue ) {
@@ -216,4 +224,31 @@ private static String stripQuotes(String encodedAttributeValue) {
216224 return encodedAttributeValue ;
217225 }
218226
227+
228+ private static HtmlStreamEventReceiver initializePolicy (
229+ Policy policy , HtmlStreamEventProcessor preprocessor ) {
230+ TagBalancingHtmlStreamEventReceiver balancer
231+ = new TagBalancingHtmlStreamEventReceiver (policy );
232+
233+ // According to Opera the maximum table nesting depth seen in the wild is
234+ // 795, but 99.99% of documents have a table nesting depth of less than 22.
235+ // Since each table has a nesting depth of 4 (incl. TBODY), this leads to a
236+ // document depth of 90 (incl. HTML & BODY).
237+ // Obviously table nesting depth is not the same as whole document depth,
238+ // but it is the best proxy I have available.
239+ // See http://devfiles.myopera.com/articles/590/maxtabledepth-url.htm for
240+ // the original data.
241+
242+ // Webkit defines the maximum HTML parser tree depth as 512.
243+ // http://trac.webkit.org/browser/trunk/Source/WebCore/page/Settings.h#L408
244+ // static const unsigned defaultMaximumHTMLParserDOMTreeDepth = 512;
245+
246+ // The first number gives us a lower bound on the nesting depth we allow,
247+ // 90, and the second gives us an upper bound: 512.
248+ // We do not want to bump right up against that limit.
249+ // 256 is substantially larger than the lower bound and well clear of the
250+ // upper bound.
251+ balancer .setNestingLimit (256 );
252+ return preprocessor .wrap (balancer );
253+ }
219254}
0 commit comments