3333import javax .annotation .Nullable ;
3434
3535/** Encoders and decoders for HTML. */
36- final class Encoding {
36+ public final class Encoding {
3737
3838 /**
3939 * Decodes HTML entities to produce a string containing only valid
4040 * Unicode scalar values.
41+ *
42+ * @param s text/html
43+ * @return text/plain
4144 */
4245 public static String decodeHtml (String s ) {
4346 int firstAmp = s .indexOf ('&' );
@@ -151,11 +154,40 @@ private static int longestPrefixOfGoodCodeunits(String s) {
151154 return -1 ;
152155 }
153156
157+ /**
158+ * Appends an encoded form of plainText to output where the encoding is
159+ * sufficient to prevent an HTML parser from interpreting any characters in
160+ * the appended chunk as part of an attribute or tag boundary.
161+ *
162+ * @param plainText text/plain
163+ * @param output a buffer of text/html that has a well-formed HTML prefix that
164+ * ends after the open-quote of an attribute value and does not yet contain
165+ * a corresponding close quote.
166+ * Modified in place.
167+ */
154168 static void encodeHtmlAttribOnto (String plainText , Appendable output )
155169 throws IOException {
156170 encodeHtmlOnto (plainText , output , "{\u200B " );
157171 }
158172
173+ /**
174+ * Appends an encoded form of plainText to putput where the encoding is
175+ * sufficient to prevent an HTML parser from transitioning out of the
176+ * <a href="https://html.spec.whatwg.org/multipage/parsing.html#data-state">
177+ * Data state</a>.
178+ *
179+ * This is suitable for encoding a text node inside any element that does not
180+ * require special handling as a context element (see "context element" in
181+ * <a href="https://html.spec.whatwg.org/multipage/parsing.html#parsing-html-fragments">
182+ * step 4</a>.)
183+ *
184+ * @param plainText text/plain
185+ * @param output a buffer of text/html that has a well-formed HTML prefix that
186+ * would leave an HTML parser in the Data state if it were to encounter a space
187+ * character as the next character. In practice this means that the buffer
188+ * does not contain partial tags or comments, and does not have an unclosed
189+ * element with a special content model.
190+ */
159191 static void encodePcdataOnto (String plainText , Appendable output )
160192 throws IOException {
161193 // Avoid problems with client-side template languages like
@@ -166,7 +198,23 @@ static void encodePcdataOnto(String plainText, Appendable output)
166198 encodeHtmlOnto (plainText , output , "{<!-- -->" );
167199 }
168200
169- static void encodeRcdataOnto (String plainText , Appendable output )
201+ /**
202+ * Appends an encoded form of plainText to putput where the encoding is
203+ * sufficient to prevent an HTML parser from transitioning out of the
204+ * <a href="https://html.spec.whatwg.org/multipage/parsing.html#rcdata-state">
205+ * RCDATA state</a>.
206+ *
207+ * This is suitable for encoding a text node inside a {@code <textarea>} or
208+ * {@code <title>} element outside foreign content.
209+ *
210+ * @param plainText text/plain
211+ * @param output a buffer of text/html that has a well-formed HTML prefix that
212+ * would leave an HTML parser in the Data state if it were to encounter a space
213+ * character as the next character. In practice this means that the buffer
214+ * does not contain partial tags or comments, and the most recently opened
215+ * element is `<textarea>` or `<title>` and that element is still open.
216+ */
217+ public static void encodeRcdataOnto (String plainText , Appendable output )
170218 throws IOException {
171219 // Avoid problems with client-side template languages like
172220 // Angular & Polymer which attach special significance to text like
@@ -316,7 +364,7 @@ static void appendNumericEntity(int codepoint, Appendable output)
316364 };
317365
318366 /** Maps ASCII chars that need to be encoded to an equivalent HTML entity. */
319- static final String [] REPLACEMENTS = new String [0x80 ];
367+ private static final String [] REPLACEMENTS = new String [0x80 ];
320368 static {
321369 for (int i = 0 ; i < ' ' ; ++i ) {
322370 // We elide control characters so that we can ensure that our output is
@@ -342,8 +390,8 @@ static void appendNumericEntity(int codepoint, Appendable output)
342390 }
343391
344392 /**
345- * {@code DECODES_TO_SELF[c]} is true iff the codepoint c decodes to itself in
346- * an HTML5 text node or properly quoted attribute value .
393+ * IS_BANNED_ASCII[i] where is an ASCII control character codepoint (< 0x20)
394+ * is true for control characters that are not allowed in an XML source text .
347395 */
348396 private static boolean [] IS_BANNED_ASCII = new boolean [0x20 ];
349397 static {
0 commit comments