Skip to content

Commit 2f9e586

Browse files
author
Neil Fraser
committed
Fix issue with line-mode speedup (all language).
Lua not affected. Python 3 unique in having a larger bailout.
1 parent f914d72 commit 2f9e586

File tree

13 files changed

+160
-62
lines changed

13 files changed

+160
-62
lines changed

csharp/DiffMatchPatch.cs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -631,15 +631,20 @@ private string diff_linesToCharsMunge(string text, List<string> lineArray,
631631
lineEnd = text.Length - 1;
632632
}
633633
line = text.JavaSubstring(lineStart, lineEnd + 1);
634-
lineStart = lineEnd + 1;
635634

636635
if (lineHash.ContainsKey(line)) {
637636
chars.Append(((char)(int)lineHash[line]));
638637
} else {
638+
if (lineArray.Count == 65535) {
639+
// Bail out at 65535 because char 65536 == char 0.
640+
line = text.Substring(lineStart);
641+
lineEnd = text.Length;
642+
}
639643
lineArray.Add(line);
640644
lineHash.Add(line, lineArray.Count - 1);
641645
chars.Append(((char)(lineArray.Count - 1)));
642646
}
647+
lineStart = lineEnd + 1;
643648
}
644649
return chars.ToString();
645650
}

csharp/tests/DiffMatchPatchTest.cs

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -124,10 +124,10 @@ public void diff_linesToCharsTest() {
124124
tmpVector.Clear();
125125
StringBuilder lineList = new StringBuilder();
126126
StringBuilder charList = new StringBuilder();
127-
for (int x = 1; x < n + 1; x++) {
128-
tmpVector.Add(x + "\n");
129-
lineList.Append(x + "\n");
130-
charList.Append(Convert.ToChar(x));
127+
for (int i = 1; i < n + 1; i++) {
128+
tmpVector.Add(i + "\n");
129+
lineList.Append(i + "\n");
130+
charList.Append(Convert.ToChar(i));
131131
}
132132
assertEquals("Test initialization fail #1.", n, tmpVector.Count);
133133
string lines = lineList.ToString();
@@ -164,10 +164,10 @@ public void diff_charsToLinesTest() {
164164
tmpVector.Clear();
165165
StringBuilder lineList = new StringBuilder();
166166
StringBuilder charList = new StringBuilder();
167-
for (int x = 1; x < n + 1; x++) {
168-
tmpVector.Add(x + "\n");
169-
lineList.Append(x + "\n");
170-
charList.Append(Convert.ToChar (x));
167+
for (int i = 1; i < n + 1; i++) {
168+
tmpVector.Add(i + "\n");
169+
lineList.Append(i + "\n");
170+
charList.Append(Convert.ToChar (i));
171171
}
172172
assertEquals("Test initialization fail #3.", n, tmpVector.Count);
173173
string lines = lineList.ToString();
@@ -178,6 +178,17 @@ public void diff_charsToLinesTest() {
178178
this.diff_charsToLines(diffs, tmpVector);
179179
assertEquals("diff_charsToLines: More than 256.", new List<Diff>
180180
{new Diff(Operation.DELETE, lines)}, diffs);
181+
182+
// More than 65536 to verify any 16-bit limitation.
183+
lineList = new StringBuilder();
184+
for (int i = 0; i < 66000; i++) {
185+
lineList.Append(i + "\n");
186+
}
187+
chars = lineList.ToString();
188+
Object[] result = this.diff_linesToChars(chars, "");
189+
diffs = new List<Diff> {new Diff(Operation.INSERT, (string)result[0])};
190+
this.diff_charsToLines(diffs, (List<string>)result[2]);
191+
assertEquals("diff_charsToLines: More than 65536.", chars, diffs[0].text);
181192
}
182193

183194
public void diff_cleanupMergeTest() {
@@ -692,9 +703,9 @@ public void diff_mainTest() {
692703
string a = "`Twas brillig, and the slithy toves\nDid gyre and gimble in the wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n";
693704
string b = "I am the very model of a modern major general,\nI've information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n";
694705
// Increase the text lengths by 1024 times to ensure a timeout.
695-
for (int x = 0; x < 10; x++) {
696-
a = a + a;
697-
b = b + b;
706+
for (int i = 0; i < 10; i++) {
707+
a += a;
708+
b += b;
698709
}
699710
DateTime startTime = DateTime.Now;
700711
this.diff_main(a, b);

dart/DMPClass.dart

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -480,15 +480,23 @@ class DiffMatchPatch {
480480
lineEnd = text.length - 1;
481481
}
482482
line = text.substring(lineStart, lineEnd + 1);
483-
lineStart = lineEnd + 1;
484483

485484
if (lineHash.containsKey(line)) {
486485
chars.writeCharCode(lineHash[line]);
487486
} else {
487+
if (lineArray.length == 65535) {
488+
// Bail out at 65535 because
489+
// final chars1 = new StringBuffer();
490+
// chars1.writeCharCode(65536);
491+
// chars1.toString().codeUnitAt(0) == 55296;
492+
line = text.substring(lineStart);
493+
lineEnd = text.length;
494+
}
488495
lineArray.add(line);
489496
lineHash[line] = lineArray.length - 1;
490497
chars.writeCharCode(lineArray.length - 1);
491498
}
499+
lineStart = lineEnd + 1;
492500
}
493501
return chars.toString();
494502
}

dart/tests/DiffMatchPatchTest.dart

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -185,9 +185,9 @@ void testDiffLinesToChars() {
185185
int n = 300;
186186
List<String> lineList = [];
187187
StringBuffer charList = new StringBuffer();
188-
for (int x = 1; x < n + 1; x++) {
189-
lineList.add('$x\n');
190-
charList.writeCharCode(x);
188+
for (int i = 1; i < n + 1; i++) {
189+
lineList.add('$i\n');
190+
charList.writeCharCode(i);
191191
}
192192
Expect.equals(n, lineList.length, 'Test initialization fail #1.');
193193
String lines = lineList.join();
@@ -212,9 +212,9 @@ void testDiffCharsToLines() {
212212
int n = 300;
213213
List<String> lineList = [];
214214
StringBuffer charList = new StringBuffer();
215-
for (int x = 1; x < n + 1; x++) {
216-
lineList.add('$x\n');
217-
charList.writeCharCode(x);
215+
for (int i = 1; i < n + 1; i++) {
216+
lineList.add('$i\n');
217+
charList.writeCharCode(i);
218218
}
219219
Expect.equals(n, lineList.length, 'Test initialization fail #3.');
220220
String lines = lineList.join();
@@ -224,6 +224,17 @@ void testDiffCharsToLines() {
224224
diffs = [new Diff(Operation.delete, chars)];
225225
dmp._diff_charsToLines(diffs, lineList);
226226
Expect.listEquals([new Diff(Operation.delete, lines)], diffs, 'diff_charsToLines: More than 256.');
227+
228+
// More than 65536 to verify any 16-bit limitation.
229+
lineList = [];
230+
for (int i = 0; i < 66000; i++) {
231+
lineList.add('$i\n');
232+
}
233+
chars = lineList.join();
234+
final results = dmp._diff_linesToChars(chars, '');
235+
diffs = [new Diff(Operation.insert, results['chars1'])];
236+
dmp._diff_charsToLines(diffs, results['lineArray']);
237+
Expect.equals(chars, diffs[0].text, 'diff_charsToLines: More than 65536.');
227238
}
228239

229240
void testDiffCleanupMerge() {
@@ -532,7 +543,7 @@ void testDiffMain() {
532543
String a = '`Twas brillig, and the slithy toves\nDid gyre and gimble in the wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n';
533544
String b = 'I am the very model of a modern major general,\nI\'ve information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n';
534545
// Increase the text lengths by 1024 times to ensure a timeout.
535-
for (int x = 0; x < 10; x++) {
546+
for (int i = 0; i < 10; i++) {
536547
a += a;
537548
b += b;
538549
}

java/src/name/fraser/neil/plaintext/diff_match_patch.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -545,15 +545,21 @@ private String diff_linesToCharsMunge(String text, List<String> lineArray,
545545
lineEnd = text.length() - 1;
546546
}
547547
line = text.substring(lineStart, lineEnd + 1);
548-
lineStart = lineEnd + 1;
549548

550549
if (lineHash.containsKey(line)) {
551550
chars.append(String.valueOf((char) (int) lineHash.get(line)));
552551
} else {
552+
if (lineArray.size() == 65535) {
553+
// Bail out at 65535 because
554+
// String.valueOf((char) 65536).equals(String.valueOf(((char) 0)))
555+
line = text.substring(lineStart);
556+
lineEnd = text.length();
557+
}
553558
lineArray.add(line);
554559
lineHash.put(line, lineArray.size() - 1);
555560
chars.append(String.valueOf((char) (lineArray.size() - 1)));
556561
}
562+
lineStart = lineEnd + 1;
557563
}
558564
return chars.toString();
559565
}

java/tests/name/fraser/neil/plaintext/diff_match_patch_test.java

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -135,10 +135,10 @@ public static void testDiffLinesToChars() {
135135
tmpVector.clear();
136136
StringBuilder lineList = new StringBuilder();
137137
StringBuilder charList = new StringBuilder();
138-
for (int x = 1; x < n + 1; x++) {
139-
tmpVector.add(x + "\n");
140-
lineList.append(x + "\n");
141-
charList.append(String.valueOf((char) x));
138+
for (int i = 1; i < n + 1; i++) {
139+
tmpVector.add(i + "\n");
140+
lineList.append(i + "\n");
141+
charList.append(String.valueOf((char) i));
142142
}
143143
assertEquals("Test initialization fail #1.", n, tmpVector.size());
144144
String lines = lineList.toString();
@@ -168,10 +168,10 @@ public static void testDiffCharsToLines() {
168168
tmpVector.clear();
169169
StringBuilder lineList = new StringBuilder();
170170
StringBuilder charList = new StringBuilder();
171-
for (int x = 1; x < n + 1; x++) {
172-
tmpVector.add(x + "\n");
173-
lineList.append(x + "\n");
174-
charList.append(String.valueOf((char) x));
171+
for (int i = 1; i < n + 1; i++) {
172+
tmpVector.add(i + "\n");
173+
lineList.append(i + "\n");
174+
charList.append(String.valueOf((char) i));
175175
}
176176
assertEquals("Test initialization fail #3.", n, tmpVector.size());
177177
String lines = lineList.toString();
@@ -181,6 +181,17 @@ public static void testDiffCharsToLines() {
181181
diffs = diffList(new Diff(DELETE, chars));
182182
dmp.diff_charsToLines(diffs, tmpVector);
183183
assertEquals("diff_charsToLines: More than 256.", diffList(new Diff(DELETE, lines)), diffs);
184+
185+
// More than 65536 to verify any 16-bit limitation.
186+
lineList = new StringBuilder();
187+
for (int i = 0; i < 66000; i++) {
188+
lineList.append(i + "\n");
189+
}
190+
chars = lineList.toString();
191+
LinesToCharsResult results = dmp.diff_linesToChars(chars, "");
192+
diffs = diffList(new Diff(INSERT, results.chars1));
193+
dmp.diff_charsToLines(diffs, results.lineArray);
194+
assertEquals("diff_charsToLines: More than 65536.", chars, diffs.getFirst().text);
184195
}
185196

186197
public static void testDiffCleanupMerge() {
@@ -500,9 +511,9 @@ public static void testDiffMain() {
500511
String a = "`Twas brillig, and the slithy toves\nDid gyre and gimble in the wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n";
501512
String b = "I am the very model of a modern major general,\nI've information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n";
502513
// Increase the text lengths by 1024 times to ensure a timeout.
503-
for (int x = 0; x < 10; x++) {
504-
a = a + a;
505-
b = b + b;
514+
for (int i = 0; i < 10; i++) {
515+
a += a;
516+
b += b;
506517
}
507518
long startTime = System.currentTimeMillis();
508519
dmp.diff_main(a, b);

javascript/tests/diff_match_patch_test.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -570,7 +570,7 @@ function testDiffMain() {
570570
var a = '`Twas brillig, and the slithy toves\nDid gyre and gimble in the wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n';
571571
var b = 'I am the very model of a modern major general,\nI\'ve information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n';
572572
// Increase the text lengths by 1024 times to ensure a timeout.
573-
for (var x = 0; x < 10; x++) {
573+
for (var i = 0; i < 10; i++) {
574574
a += a;
575575
b += b;
576576
}
@@ -603,7 +603,7 @@ function testDiffMain() {
603603
var texts_linemode = diff_rebuildtexts(dmp.diff_main(a, b, true));
604604
var texts_textmode = diff_rebuildtexts(dmp.diff_main(a, b, false));
605605
assertEquivalent(texts_textmode, texts_linemode);
606-
606+
607607
// Test null inputs.
608608
try {
609609
dmp.diff_main(null, null);

objectivec/DiffMatchPatchCFUtilities.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -480,14 +480,18 @@ CFStringRef diff_linesToCharsMungeCFStringCreate(CFStringRef text, CFMutableArra
480480
}*/
481481

482482
line = diff_CFStringCreateJavaSubstring(text, lineStart, lineEnd + 1);
483-
lineStart = lineEnd + 1;
484483

485484
if (CFDictionaryContainsKey(lineHash, line)) {
486485
CFDictionaryGetValueIfPresent(lineHash, line, (const void **)&hashNumber);
487486
CFNumberGetValue(hashNumber, kCFNumberCFIndexType, &hash);
488487
const UniChar hashChar = (UniChar)hash;
489488
CFStringAppendCharacters(chars, &hashChar, 1);
490489
} else {
490+
if (CFArrayGetCount(lineArray) == 65535) {
491+
// Bail out at 65535 because char 65536 == char 0.
492+
line = diff_CFStringCreateJavaSubstring(text, lineStart, textLength);
493+
lineEnd = textLength;
494+
}
491495
CFArrayAppendValue(lineArray, line);
492496
hash = CFArrayGetCount(lineArray) - 1;
493497
hashNumber = CFNumberCreate(kCFAllocatorDefault, kCFNumberCFIndexType, &hash);
@@ -497,6 +501,7 @@ CFStringRef diff_linesToCharsMungeCFStringCreate(CFStringRef text, CFMutableArra
497501
const UniChar hashChar = (UniChar)hash;
498502
CFStringAppendCharacters(chars, &hashChar, 1);
499503
}
504+
lineStart = lineEnd + 1;
500505

501506
CFRelease(line);
502507
}

objectivec/Tests/DiffMatchPatchTest.m

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -172,11 +172,11 @@ - (void)test_diff_linesToCharsTest {
172172
NSMutableString *lines = [NSMutableString string];
173173
NSMutableString *chars = [NSMutableString string];
174174
NSString *currentLine;
175-
for (unichar x = 1; x < n + 1; x++) {
176-
currentLine = [NSString stringWithFormat:@"%d\n", (int)x];
175+
for (unichar i = 1; i < n + 1; i++) {
176+
currentLine = [NSString stringWithFormat:@"%d\n", (int)i];
177177
[tmpVector addObject:currentLine];
178178
[lines appendString:currentLine];
179-
[chars appendString:[NSString stringWithFormat:@"%C", x]];
179+
[chars appendString:[NSString stringWithFormat:@"%C", i]];
180180
}
181181
XCTAssertEqual((NSUInteger)n, tmpVector.count, @"More than 256 #1.");
182182
XCTAssertEqual((NSUInteger)n, chars.length, @"More than 256 #2.");
@@ -212,11 +212,11 @@ - (void)test_diff_charsToLinesTest {
212212
NSMutableString *lines = [NSMutableString string];
213213
NSMutableString *chars = [NSMutableString string];
214214
NSString *currentLine;
215-
for (unichar x = 1; x < n + 1; x++) {
216-
currentLine = [NSString stringWithFormat:@"%d\n", (int)x];
215+
for (unichar i = 1; i < n + 1; i++) {
216+
currentLine = [NSString stringWithFormat:@"%d\n", (int)i];
217217
[tmpVector addObject:currentLine];
218218
[lines appendString:currentLine];
219-
[chars appendString:[NSString stringWithFormat:@"%C", x]];
219+
[chars appendString:[NSString stringWithFormat:@"%C", i]];
220220
}
221221
XCTAssertEqual((NSUInteger)n, tmpVector.count, @"More than 256 #1.");
222222
XCTAssertEqual((NSUInteger)n, chars.length, @"More than 256 #2.");
@@ -225,6 +225,19 @@ - (void)test_diff_charsToLinesTest {
225225
[dmp diff_chars:diffs toLines:tmpVector];
226226
XCTAssertEqualObjects([NSArray arrayWithObject:[Diff diffWithOperation:DIFF_DELETE andText:lines]], diffs, @"More than 256 #3.");
227227

228+
// More than 65536 to verify any 16-bit limitation.
229+
lines = [NSMutableString string];
230+
for (int i = 1; i < 66000; i++) {
231+
currentLine = [NSString stringWithFormat:@"%d\n", i];
232+
[lines appendString:currentLine];
233+
}
234+
NSArray *result;
235+
result = [dmp diff_linesToCharsForFirstString:lines andSecondString:@""];
236+
diffs = [NSArray arrayWithObject:[Diff diffWithOperation:DIFF_INSERT andText:result[0]]];
237+
[dmp diff_chars:diffs toLines:result[2]];
238+
Diff *myDiff = diffs.firstObject;
239+
XCTAssertEqualObjects(lines, myDiff.text, @"More than 65536.");
240+
228241
[dmp release];
229242
}
230243

@@ -859,7 +872,7 @@ - (void)test_diff_mainTest {
859872
NSMutableString *aMutable = [NSMutableString stringWithString:a];
860873
NSMutableString *bMutable = [NSMutableString stringWithString:b];
861874
// Increase the text lengths by 1024 times to ensure a timeout.
862-
for (int x = 0; x < 10; x++) {
875+
for (int i = 0; i < 10; i++) {
863876
[aMutable appendString:aMutable];
864877
[bMutable appendString:bMutable];
865878
}

python2/diff_match_patch.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -422,14 +422,18 @@ def diff_linesToCharsMunge(text):
422422
if lineEnd == -1:
423423
lineEnd = len(text) - 1
424424
line = text[lineStart:lineEnd + 1]
425-
lineStart = lineEnd + 1
426425

427426
if line in lineHash:
428427
chars.append(unichr(lineHash[line]))
429428
else:
429+
if len(lineArray) == 65535:
430+
# Bail out at 65535 because unichr(65536) throws.
431+
line = text[lineStart:]
432+
lineEnd = len(text)
430433
lineArray.append(line)
431434
lineHash[line] = len(lineArray) - 1
432435
chars.append(unichr(len(lineArray) - 1))
436+
lineStart = lineEnd + 1
433437
return "".join(chars)
434438

435439
chars1 = diff_linesToCharsMunge(text1)

0 commit comments

Comments
 (0)