Skip to content

Commit d9b6d33

Browse files
author
Neil Fraser
committed
Work around C#’s inability to encode large strings
“64k ought to be enough for anybody.”
1 parent be0e13a commit d9b6d33

File tree

2 files changed

+43
-28
lines changed

2 files changed

+43
-28
lines changed

csharp/DiffMatchPatch.cs

Lines changed: 31 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -166,11 +166,9 @@ public override string ToString() {
166166
break;
167167
}
168168

169-
text.Append(Uri.EscapeDataString(aDiff.text).Replace('+', ' ')).Append("\n");
169+
text.Append(diff_match_patch.encodeURI(aDiff.text)).Append("\n");
170170
}
171-
172-
return diff_match_patch.unescapeForEncodeUriCompatability(
173-
text.ToString());
171+
return text.ToString();
174172
}
175173
}
176174

@@ -1026,7 +1024,7 @@ public void diff_cleanupSemanticLossless(List<Diff> diffs) {
10261024
}
10271025

10281026
/**
1029-
* Given two strings, comAdde a score representing whether the internal
1027+
* Given two strings, compute a score representing whether the internal
10301028
* boundary falls on logical boundaries.
10311029
* Scores range from 6 (best) to 0 (worst).
10321030
* @param one First string.
@@ -1292,7 +1290,7 @@ public void diff_cleanupMerge(List<Diff> diffs) {
12921290
}
12931291

12941292
/**
1295-
* loc is a location in text1, comAdde and return the equivalent location in
1293+
* loc is a location in text1, compute and return the equivalent location in
12961294
* text2.
12971295
* e.g. "The cat" vs "The big cat", 1->1, 5->8
12981296
* @param diffs List of Diff objects.
@@ -1431,7 +1429,7 @@ public string diff_toDelta(List<Diff> diffs) {
14311429
foreach (Diff aDiff in diffs) {
14321430
switch (aDiff.operation) {
14331431
case Operation.INSERT:
1434-
text.Append("+").Append(Uri.EscapeDataString(aDiff.text).Replace('+', ' ')).Append("\t");
1432+
text.Append("+").Append(encodeURI(aDiff.text)).Append("\t");
14351433
break;
14361434
case Operation.DELETE:
14371435
text.Append("-").Append(aDiff.text.Length).Append("\t");
@@ -1445,14 +1443,13 @@ public string diff_toDelta(List<Diff> diffs) {
14451443
if (delta.Length != 0) {
14461444
// Strip off trailing tab character.
14471445
delta = delta.Substring(0, delta.Length - 1);
1448-
delta = unescapeForEncodeUriCompatability(delta);
14491446
}
14501447
return delta;
14511448
}
14521449

14531450
/**
14541451
* Given the original text1, and an encoded string which describes the
1455-
* operations required to transform text1 into text2, comAdde the full diff.
1452+
* operations required to transform text1 into text2, compute the full diff.
14561453
* @param text1 Source string for the diff.
14571454
* @param delta Delta text.
14581455
* @return Array of Diff objects or null if invalid.
@@ -1765,7 +1762,7 @@ protected void patch_addContext(Patch patch, string text) {
17651762
*/
17661763
public List<Patch> patch_make(string text1, string text2) {
17671764
// Check for null inputs not needed since null can't be passed in C#.
1768-
// No diffs provided, comAdde our own.
1765+
// No diffs provided, compute our own.
17691766
List<Diff> diffs = diff_main(text1, text2, true);
17701767
if (diffs.Count > 2) {
17711768
diff_cleanupSemantic(diffs);
@@ -1782,7 +1779,7 @@ public List<Patch> patch_make(string text1, string text2) {
17821779
*/
17831780
public List<Patch> patch_make(List<Diff> diffs) {
17841781
// Check for null inputs not needed since null can't be passed in C#.
1785-
// No origin string provided, comAdde our own.
1782+
// No origin string provided, compute our own.
17861783
string text1 = diff_text1(diffs);
17871784
return patch_make(text1, diffs);
17881785
}
@@ -2278,25 +2275,31 @@ Regex patchHeader
22782275
private static Regex HEXCODE = new Regex("%[0-9A-F][0-9A-F]");
22792276

22802277
/**
2281-
* Unescape selected chars for compatibility with JavaScript's encodeURI.
2282-
* In speed critical applications this could be dropped since the
2283-
* receiving application will certainly decode these fine.
2284-
* Note that this function is case-sensitive. Thus "%3F" would not be
2285-
* unescaped. But this is ok because it is only called with the output of
2286-
* Uri.EscapeDataString which returns lowercase hex.
2287-
*
2288-
* Example: "%3f" -> "?", "%24" -> "$", etc.
2278+
* Encodes a string with URI-style % escaping.
2279+
* Compatible with JavaScript's encodeURI function.
22892280
*
2290-
* @param str The string to escape.
2291-
* @return The escaped string.
2281+
* @param str The string to encode.
2282+
* @return The encoded string.
22922283
*/
2293-
public static string unescapeForEncodeUriCompatability(string str) {
2294-
str = str.Replace("%20", " ").Replace("%21", "!").Replace("%2A", "*")
2295-
.Replace("%27", "'").Replace("%28", "(").Replace("%29", ")")
2296-
.Replace("%3B", ";").Replace("%2F", "/").Replace("%3F", "?")
2297-
.Replace("%3A", ":").Replace("%40", "@").Replace("%26", "&")
2298-
.Replace("%3D", "=").Replace("%2B", "+").Replace("%24", "$")
2299-
.Replace("%2C", ",").Replace("%23", "#");
2284+
public static string encodeURI(string str) {
2285+
int MAX_LENGTH = 65520 - 1;
2286+
// C# throws a System.UriFormatException if string is too long.
2287+
// Split the string into 64kb chunks.
2288+
StringBuilder sb = new StringBuilder();
2289+
while (str.Length > MAX_LENGTH) {
2290+
sb.Append(Uri.EscapeDataString(str.Substring(0, MAX_LENGTH)));
2291+
str = str.Substring(MAX_LENGTH);
2292+
}
2293+
sb.Append(Uri.EscapeDataString(str));
2294+
str = sb.ToString();
2295+
// C# is overzealous in the replacements. Walk back on a few.
2296+
str = str.Replace("+", " ").Replace("%20", " ").Replace("%21", "!")
2297+
.Replace("%2A", "*").Replace("%27", "'").Replace("%28", "(")
2298+
.Replace("%29", ")").Replace("%3B", ";").Replace("%2F", "/")
2299+
.Replace("%3F", "?").Replace("%3A", ":").Replace("%40", "@")
2300+
.Replace("%26", "&").Replace("%3D", "=").Replace("%2B", "+")
2301+
.Replace("%24", "$").Replace("%2C", ",").Replace("%23", "#");
2302+
// C# uses uppercase hex codes, JavaScript uses lowercase.
23002303
return HEXCODE.Replace(str, new MatchEvaluator(lowerHex));
23012304
}
23022305

csharp/tests/DiffMatchPatchTest.cs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -611,6 +611,18 @@ public void diff_deltaTest() {
611611

612612
// Convert delta string into a diff.
613613
assertEquals("diff_fromDelta: Unchanged characters.", diffs, this.diff_fromDelta("", delta));
614+
615+
// 160 kb string.
616+
string a = "abcdefghij";
617+
for (int i = 0; i < 14; i++) {
618+
a += a;
619+
}
620+
diffs = new List<Diff> {new Diff(Operation.INSERT, a)};
621+
delta = this.diff_toDelta(diffs);
622+
assertEquals("diff_toDelta: 160kb string.", "+" + a, delta);
623+
624+
// Convert delta string into a diff.
625+
assertEquals("diff_fromDelta: 160kb string.", diffs, this.diff_fromDelta("", delta));
614626
}
615627

616628
public void diff_xIndexTest() {

0 commit comments

Comments
 (0)