golang
diff --git a/‎src/pkg/Makefile‎
Lines changed: 1 addition & 0 deletions b/‎src/pkg/Makefile‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/pkg/exp/html/Makefile‎
Lines changed: 20 additions & 0 deletions b/‎src/pkg/exp/html/Makefile‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎src/pkg/html/const.go‎ renamed to ‎src/pkg/exp/html/const.go‎ b/‎src/pkg/html/const.go‎ renamed to ‎src/pkg/exp/html/const.go‎
diff --git a/‎src/pkg/html/doc.go‎ renamed to ‎src/pkg/exp/html/doc.go‎ b/‎src/pkg/html/doc.go‎ renamed to ‎src/pkg/exp/html/doc.go‎
diff --git a/‎src/pkg/html/doctype.go‎ renamed to ‎src/pkg/exp/html/doctype.go‎ b/‎src/pkg/html/doctype.go‎ renamed to ‎src/pkg/exp/html/doctype.go‎
diff --git a/‎src/pkg/exp/html/entity.go‎
Lines changed: 2253 additions & 0 deletions b/‎src/pkg/exp/html/entity.go‎
Lines changed: 2253 additions & 0 deletions
diff --git a/‎src/pkg/exp/html/entity_test.go‎
Lines changed: 29 additions & 0 deletions b/‎src/pkg/exp/html/entity_test.go‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎src/pkg/exp/html/escape.go‎
Lines changed: 253 additions & 0 deletions b/‎src/pkg/exp/html/escape.go‎
Lines changed: 253 additions & 0 deletions
diff --git a/‎src/pkg/html/foreign.go‎ renamed to ‎src/pkg/exp/html/foreign.go‎ b/‎src/pkg/html/foreign.go‎ renamed to ‎src/pkg/exp/html/foreign.go‎
diff --git a/‎src/pkg/html/node.go‎ renamed to ‎src/pkg/exp/html/node.go‎ b/‎src/pkg/html/node.go‎ renamed to ‎src/pkg/exp/html/node.go‎
@@ -82,6 +82,7 @@ DIRS=\
 exp/ebnf\
 exp/ebnflint\
 exp/gotype\
+exp/html\
 exp/norm\
 exp/spdy\
 exp/ssh\
 
@@ -0,0 +1,20 @@
+# Copyright 2010 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+include ../../../Make.inc
+
+TARG=html
+GOFILES=\
+const.go\
+doc.go\
+doctype.go\
+entity.go\
+escape.go\
+foreign.go\
+node.go\
+parse.go\
+render.go\
+token.go\
+
+include ../../../Make.pkg
@@ -0,0 +1,29 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+import (
+"testing"
+"unicode/utf8"
+)
+
+func TestEntityLength(t *testing.T) {
+// We verify that the length of UTF-8 encoding of each value is <= 1 + len(key).
+// The +1 comes from the leading "&". This property implies that the length of
+// unescaped text is <= the length of escaped text.
+for k, v := range entity {
+if 1+len(k) < utf8.RuneLen(v) {
+t.Error("escaped entity &" + k + " is shorter than its UTF-8 encoding " + string(v))
+}
+if len(k) > longestEntityWithoutSemicolon && k[len(k)-1] != ';' {
+t.Errorf("entity name %s is %d characters, but longestEntityWithoutSemicolon=%d", k, len(k), longestEntityWithoutSemicolon)
+}
+}
+for k, v := range entity2 {
+if 1+len(k) < utf8.RuneLen(v[0])+utf8.RuneLen(v[1]) {
+t.Error("escaped entity &" + k + " is shorter than its UTF-8 encoding " + string(v[0]) + string(v[1]))
+}
+}
+}
@@ -0,0 +1,253 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+import (
+"bytes"
+"strings"
+"unicode/utf8"
+)
+
+// These replacements permit compatibility with old numeric entities that 
+// assumed Windows-1252 encoding.
+// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#consume-a-character-reference
+var replacementTable = [...]rune{
+'\u20AC', // First entry is what 0x80 should be replaced with.
+'\u0081',
+'\u201A',
+'\u0192',
+'\u201E',
+'\u2026',
+'\u2020',
+'\u2021',
+'\u02C6',
+'\u2030',
+'\u0160',
+'\u2039',
+'\u0152',
+'\u008D',
+'\u017D',
+'\u008F',
+'\u0090',
+'\u2018',
+'\u2019',
+'\u201C',
+'\u201D',
+'\u2022',
+'\u2013',
+'\u2014',
+'\u02DC',
+'\u2122',
+'\u0161',
+'\u203A',
+'\u0153',
+'\u009D',
+'\u017E',
+'\u0178', // Last entry is 0x9F.
+// 0x00->'\uFFFD' is handled programmatically. 
+// 0x0D->'\u000D' is a no-op.
+}
+
+// unescapeEntity reads an entity like "&lt;" from b[src:] and writes the
+// corresponding "<" to b[dst:], returning the incremented dst and src cursors.
+// Precondition: b[src] == '&' && dst <= src.
+// attribute should be true if parsing an attribute value.
+func unescapeEntity(b []byte, dst, src int, attribute bool) (dst1, src1 int) {
+// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#consume-a-character-reference
+
+// i starts at 1 because we already know that s[0] == '&'.
+i, s := 1, b[src:]
+
+if len(s) <= 1 {
+b[dst] = b[src]
+return dst + 1, src + 1
+}
+
+if s[i] == '#' {
+if len(s) <= 3 { // We need to have at least "&#.".
+b[dst] = b[src]
+return dst + 1, src + 1
+}
+i++
+c := s[i]
+hex := false
+if c == 'x' || c == 'X' {
+hex = true
+i++
+}
+
+x := '\x00'
+for i < len(s) {
+c = s[i]
+i++
+if hex {
+if '0' <= c && c <= '9' {
+x = 16*x + rune(c) - '0'
+continue
+} else if 'a' <= c && c <= 'f' {
+x = 16*x + rune(c) - 'a' + 10
+continue
+} else if 'A' <= c && c <= 'F' {
+x = 16*x + rune(c) - 'A' + 10
+continue
+}
+} else if '0' <= c && c <= '9' {
+x = 10*x + rune(c) - '0'
+continue
+}
+if c != ';' {
+i--
+}
+break
+}
+
+if i <= 3 { // No characters matched.
+b[dst] = b[src]
+return dst + 1, src + 1
+}
+
+if 0x80 <= x && x <= 0x9F {
+// Replace characters from Windows-1252 with UTF-8 equivalents.
+x = replacementTable[x-0x80]
+} else if x == 0 || (0xD800 <= x && x <= 0xDFFF) || x > 0x10FFFF {
+// Replace invalid characters with the replacement character.
+x = '\uFFFD'
+}
+
+return dst + utf8.EncodeRune(b[dst:], x), src + i
+}
+
+// Consume the maximum number of characters possible, with the
+// consumed characters matching one of the named references.
+
+for i < len(s) {
+c := s[i]
+i++
+// Lower-cased characters are more common in entities, so we check for them first.
+if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' {
+continue
+}
+if c != ';' {
+i--
+}
+break
+}
+
+entityName := string(s[1:i])
+if entityName == "" {
+// No-op.
+} else if attribute && entityName[len(entityName)-1] != ';' && len(s) > i && s[i] == '=' {
+// No-op.
+} else if x := entity[entityName]; x != 0 {
+return dst + utf8.EncodeRune(b[dst:], x), src + i
+} else if x := entity2[entityName]; x[0] != 0 {
+dst1 := dst + utf8.EncodeRune(b[dst:], x[0])
+return dst1 + utf8.EncodeRune(b[dst1:], x[1]), src + i
+} else if !attribute {
+maxLen := len(entityName) - 1
+if maxLen > longestEntityWithoutSemicolon {
+maxLen = longestEntityWithoutSemicolon
+}
+for j := maxLen; j > 1; j-- {
+if x := entity[entityName[:j]]; x != 0 {
+return dst + utf8.EncodeRune(b[dst:], x), src + j + 1
+}
+}
+}
+
+dst1, src1 = dst+i, src+i
+copy(b[dst:dst1], b[src:src1])
+return dst1, src1
+}
+
+// unescape unescapes b's entities in-place, so that "a&lt;b" becomes "a<b".
+func unescape(b []byte) []byte {
+for i, c := range b {
+if c == '&' {
+dst, src := unescapeEntity(b, i, i, false)
+for src < len(b) {
+c := b[src]
+if c == '&' {
+dst, src = unescapeEntity(b, dst, src, false)
+} else {
+b[dst] = c
+dst, src = dst+1, src+1
+}
+}
+return b[0:dst]
+}
+}
+return b
+}
+
+// lower lower-cases the A-Z bytes in b in-place, so that "aBc" becomes "abc".
+func lower(b []byte) []byte {
+for i, c := range b {
+if 'A' <= c && c <= 'Z' {
+b[i] = c + 'a' - 'A'
+}
+}
+return b
+}
+
+const escapedChars = `&'<>"`
+
+func escape(w writer, s string) error {
+i := strings.IndexAny(s, escapedChars)
+for i != -1 {
+if _, err := w.WriteString(s[:i]); err != nil {
+return err
+}
+var esc string
+switch s[i] {
+case '&':
+esc = "&amp;"
+case '\'':
+esc = "&apos;"
+case '<':
+esc = "&lt;"
+case '>':
+esc = "&gt;"
+case '"':
+esc = "&quot;"
+default:
+panic("unrecognized escape character")
+}
+s = s[i+1:]
+if _, err := w.WriteString(esc); err != nil {
+return err
+}
+i = strings.IndexAny(s, escapedChars)
+}
+_, err := w.WriteString(s)
+return err
+}
+
+// EscapeString escapes special characters like "<" to become "&lt;". It
+// escapes only five such characters: amp, apos, lt, gt and quot.
+// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't
+// always true.
+func EscapeString(s string) string {
+if strings.IndexAny(s, escapedChars) == -1 {
+return s
+}
+buf := bytes.NewBuffer(nil)
+escape(buf, s)
+return buf.String()
+}
+
+// UnescapeString unescapes entities like "&lt;" to become "<". It unescapes a
+// larger range of entities than EscapeString escapes. For example, "&aacute;"
+// unescapes to "á", as does "&#225;" and "&xE1;".
+// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't
+// always true.
+func UnescapeString(s string) string {
+for _, c := range s {
+if c == '&' {
+return string(unescape([]byte(s)))
+}
+}
+return s
+}