Skip to content

Commit 41788a5

Browse files
author
Neil Fraser
committed
Improve performance of line-mode speedup
Reserve 2/3rds of space for text1, 1/3rds for text2. Prevent overflow beyond hard limit.
1 parent 2f9e586 commit 41788a5

File tree

12 files changed

+42
-42
lines changed

12 files changed

+42
-42
lines changed

csharp/DiffMatchPatch.cs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -603,8 +603,9 @@ protected Object[] diff_linesToChars(string text1, string text2) {
603603
// So we'll insert a junk entry to avoid generating a null character.
604604
lineArray.Add(string.Empty);
605605

606-
string chars1 = diff_linesToCharsMunge(text1, lineArray, lineHash);
607-
string chars2 = diff_linesToCharsMunge(text2, lineArray, lineHash);
606+
// Allocate 2/3rds of the space for text1, the rest for text2.
607+
string chars1 = diff_linesToCharsMunge(text1, lineArray, lineHash, 40000);
608+
string chars2 = diff_linesToCharsMunge(text2, lineArray, lineHash, 65535);
608609
return new Object[] { chars1, chars2, lineArray };
609610
}
610611

@@ -614,10 +615,11 @@ protected Object[] diff_linesToChars(string text1, string text2) {
614615
* @param text String to encode.
615616
* @param lineArray List of unique strings.
616617
* @param lineHash Map of strings to indices.
618+
* @param maxLines Maximum length of lineArray.
617619
* @return Encoded string.
618620
*/
619621
private string diff_linesToCharsMunge(string text, List<string> lineArray,
620-
Dictionary<string, int> lineHash) {
622+
Dictionary<string, int> lineHash, int maxLines) {
621623
int lineStart = 0;
622624
int lineEnd = -1;
623625
string line;
@@ -635,7 +637,7 @@ private string diff_linesToCharsMunge(string text, List<string> lineArray,
635637
if (lineHash.ContainsKey(line)) {
636638
chars.Append(((char)(int)lineHash[line]));
637639
} else {
638-
if (lineArray.Count == 65535) {
640+
if (lineArray.Count == maxLines) {
639641
// Bail out at 65535 because char 65536 == char 0.
640642
line = text.Substring(lineStart);
641643
lineEnd = text.Length;

dart/DMPClass.dart

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -452,8 +452,9 @@ class DiffMatchPatch {
452452
// So we'll insert a junk entry to avoid generating a null character.
453453
lineArray.add('');
454454

455-
String chars1 = _diff_linesToCharsMunge(text1, lineArray, lineHash);
456-
String chars2 = _diff_linesToCharsMunge(text2, lineArray, lineHash);
455+
// Allocate 2/3rds of the space for text1, the rest for text2.
456+
String chars1 = _diff_linesToCharsMunge(text1, lineArray, lineHash, 40000);
457+
String chars2 = _diff_linesToCharsMunge(text2, lineArray, lineHash, 65535);
457458
return {'chars1': chars1, 'chars2': chars2, 'lineArray': lineArray};
458459
}
459460

@@ -463,10 +464,11 @@ class DiffMatchPatch {
463464
* [text] is the string to encode.
464465
* [lineArray] is a List of unique strings.
465466
* [lineHash] is a Map of strings to indices.
467+
* [maxLines] is the maximum length for lineArray.
466468
* Returns an encoded string.
467469
*/
468-
String _diff_linesToCharsMunge(
469-
String text, List<String> lineArray, Map<String, int> lineHash) {
470+
String _diff_linesToCharsMunge(String text, List<String> lineArray,
471+
Map<String, int> lineHash, int maxLines) {
470472
int lineStart = 0;
471473
int lineEnd = -1;
472474
String line;
@@ -484,7 +486,7 @@ class DiffMatchPatch {
484486
if (lineHash.containsKey(line)) {
485487
chars.writeCharCode(lineHash[line]);
486488
} else {
487-
if (lineArray.length == 65535) {
489+
if (lineArray.length == maxLines) {
488490
// Bail out at 65535 because
489491
// final chars1 = new StringBuffer();
490492
// chars1.writeCharCode(65536);

java/src/name/fraser/neil/plaintext/diff_match_patch.java

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -517,8 +517,9 @@ protected LinesToCharsResult diff_linesToChars(String text1, String text2) {
517517
// So we'll insert a junk entry to avoid generating a null character.
518518
lineArray.add("");
519519

520-
String chars1 = diff_linesToCharsMunge(text1, lineArray, lineHash);
521-
String chars2 = diff_linesToCharsMunge(text2, lineArray, lineHash);
520+
// Allocate 2/3rds of the space for text1, the rest for text2.
521+
String chars1 = diff_linesToCharsMunge(text1, lineArray, lineHash, 40000);
522+
String chars2 = diff_linesToCharsMunge(text2, lineArray, lineHash, 65535);
522523
return new LinesToCharsResult(chars1, chars2, lineArray);
523524
}
524525

@@ -528,10 +529,11 @@ protected LinesToCharsResult diff_linesToChars(String text1, String text2) {
528529
* @param text String to encode.
529530
* @param lineArray List of unique strings.
530531
* @param lineHash Map of strings to indices.
532+
* @param maxLines Maximum length of lineArray.
531533
* @return Encoded string.
532534
*/
533535
private String diff_linesToCharsMunge(String text, List<String> lineArray,
534-
Map<String, Integer> lineHash) {
536+
Map<String, Integer> lineHash, int maxLines) {
535537
int lineStart = 0;
536538
int lineEnd = -1;
537539
String line;
@@ -549,7 +551,7 @@ private String diff_linesToCharsMunge(String text, List<String> lineArray,
549551
if (lineHash.containsKey(line)) {
550552
chars.append(String.valueOf((char) (int) lineHash.get(line)));
551553
} else {
552-
if (lineArray.size() == 65535) {
554+
if (lineArray.size() == maxLines) {
553555
// Bail out at 65535 because
554556
// String.valueOf((char) 65536).equals(String.valueOf(((char) 0)))
555557
line = text.substring(lineStart);

javascript/diff_match_patch.js

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

javascript/diff_match_patch_uncompressed.js

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -475,7 +475,7 @@ diff_match_patch.prototype.diff_linesToChars_ = function(text1, text2) {
475475
(lineHash[line] !== undefined)) {
476476
chars += String.fromCharCode(lineHash[line]);
477477
} else {
478-
if (lineArrayLength == 65535) {
478+
if (lineArrayLength == maxLines) {
479479
// Bail out at 65535 because
480480
// String.fromCharCode(65536) == String.fromCharCode(0)
481481
line = text.substring(lineStart);
@@ -489,8 +489,10 @@ diff_match_patch.prototype.diff_linesToChars_ = function(text1, text2) {
489489
}
490490
return chars;
491491
}
492-
492+
// Allocate 2/3rds of the space for text1, the rest for text2.
493+
var maxLines = 40000;
493494
var chars1 = diff_linesToCharsMunge_(text1);
495+
maxLines = 65535;
494496
var chars2 = diff_linesToCharsMunge_(text2);
495497
return {chars1: chars1, chars2: chars2, lineArray: lineArray};
496498
};

objectivec/DiffMatchPatch.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,6 @@ typedef enum {
157157
- (NSMutableArray *)diff_computeFromOldString:(NSString *)text1 andNewString:(NSString *)text2 checkLines:(BOOL)checklines deadline:(NSTimeInterval)deadline;
158158
- (NSMutableArray *)diff_lineModeFromOldString:(NSString *)text1 andNewString:(NSString *)text2 deadline:(NSTimeInterval)deadline;
159159
- (NSArray *)diff_linesToCharsForFirstString:(NSString *)text1 andSecondString:(NSString *)text1;
160-
- (NSString *)diff_linesToCharsMungeOfText:(NSString *)text lineArray:(NSMutableArray *)lineArray lineHash:(NSMutableDictionary *)lineHash;
161160
- (void)diff_chars:(NSArray *)diffs toLines:(NSMutableArray *)lineArray;
162161
- (NSMutableArray *)diff_bisectOfOldString:(NSString *)text1 andNewString:(NSString *)text2 deadline:(NSTimeInterval)deadline;
163162
- (NSMutableArray *)diff_bisectSplitOfOldString:(NSString *)text1 andNewString:(NSString *)text2 x:(NSUInteger)x y:(NSUInteger)y deadline:(NSTimeInterval)deadline;

objectivec/DiffMatchPatch.m

Lines changed: 5 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -623,23 +623,6 @@ - (NSMutableArray *)diff_lineModeFromOldString:(NSString *)text1
623623
return diffs;
624624
}
625625

626-
/**
627-
* Split a text into a list of strings. Reduce the texts to a string of
628-
* hashes where each Unicode character represents one line.
629-
* @param text NSString to encode.
630-
* @param lineArray NSMutableArray of unique strings.
631-
* @param lineHash Map of strings to indices.
632-
* @return Encoded string.
633-
*/
634-
- (NSString *)diff_linesToCharsMungeOfText:(NSString *)text
635-
lineArray:(NSMutableArray *)lineArray
636-
lineHash:(NSMutableDictionary *)lineHash;
637-
{
638-
return [((NSString *)diff_linesToCharsMungeCFStringCreate((CFStringRef)text,
639-
(CFMutableArrayRef)lineArray,
640-
(CFMutableDictionaryRef)lineHash)) autorelease];
641-
}
642-
643626
/**
644627
* Find the 'middle snake' of a diff, split the problem in two
645628
* and return the recursively constructed diff.
@@ -859,12 +842,15 @@ - (NSArray *)diff_linesToCharsForFirstString:(NSString *)text1
859842
// So we'll insert a junk entry to avoid generating a nil character.
860843
[lineArray addObject:@""];
861844

845+
// Allocate 2/3rds of the space for text1, the rest for text2.
862846
NSString *chars1 = (NSString *)diff_linesToCharsMungeCFStringCreate((CFStringRef)text1,
863847
(CFMutableArrayRef)lineArray,
864-
(CFMutableDictionaryRef)lineHash);
848+
(CFMutableDictionaryRef)lineHash,
849+
40000);
865850
NSString *chars2 = (NSString *)diff_linesToCharsMungeCFStringCreate((CFStringRef)text2,
866851
(CFMutableArrayRef)lineArray,
867-
(CFMutableDictionaryRef)lineHash);
852+
(CFMutableDictionaryRef)lineHash,
853+
65535);
868854

869855
NSArray *result = [NSArray arrayWithObjects:chars1, chars2, lineArray, nil];
870856

objectivec/DiffMatchPatchCFUtilities.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -450,9 +450,10 @@ CFArrayRef diff_halfMatchICreate(CFStringRef longtext, CFStringRef shorttext, CF
450450
* @param text CFString to encode.
451451
* @param lineArray CFMutableArray of unique strings.
452452
* @param lineHash Map of strings to indices.
453+
* @param maxLines Maximum length for lineArray.
453454
* @return Encoded CFStringRef.
454455
*/
455-
CFStringRef diff_linesToCharsMungeCFStringCreate(CFStringRef text, CFMutableArrayRef lineArray, CFMutableDictionaryRef lineHash) {
456+
CFStringRef diff_linesToCharsMungeCFStringCreate(CFStringRef text, CFMutableArrayRef lineArray, CFMutableDictionaryRef lineHash, CFIndex maxLines) {
456457
#define lineStart lineStartRange.location
457458
#define lineEnd lineEndRange.location
458459

@@ -487,7 +488,7 @@ CFStringRef diff_linesToCharsMungeCFStringCreate(CFStringRef text, CFMutableArra
487488
const UniChar hashChar = (UniChar)hash;
488489
CFStringAppendCharacters(chars, &hashChar, 1);
489490
} else {
490-
if (CFArrayGetCount(lineArray) == 65535) {
491+
if (CFArrayGetCount(lineArray) == maxLength) {
491492
// Bail out at 65535 because char 65536 == char 0.
492493
line = diff_CFStringCreateJavaSubstring(text, lineStart, textLength);
493494
lineEnd = textLength;

objectivec/DiffMatchPatchCFUtilities.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ CFIndex diff_commonOverlap(CFStringRef text1, CFStringRef text2);
3131
CFArrayRef diff_halfMatchCreate(CFStringRef text1, CFStringRef text2, const float diffTimeout);
3232
CFArrayRef diff_halfMatchICreate(CFStringRef longtext, CFStringRef shorttext, CFIndex i);
3333

34-
CFStringRef diff_linesToCharsMungeCFStringCreate(CFStringRef text, CFMutableArrayRef lineArray, CFMutableDictionaryRef lineHash);
34+
CFStringRef diff_linesToCharsMungeCFStringCreate(CFStringRef text, CFMutableArrayRef lineArray, CFMutableDictionaryRef lineHash, CFIndex maxLines);
3535

3636
CFIndex diff_cleanupSemanticScore(CFStringRef one, CFStringRef two);
3737

python2/diff_match_patch.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -426,7 +426,7 @@ def diff_linesToCharsMunge(text):
426426
if line in lineHash:
427427
chars.append(unichr(lineHash[line]))
428428
else:
429-
if len(lineArray) == 65535:
429+
if len(lineArray) == maxLines:
430430
# Bail out at 65535 because unichr(65536) throws.
431431
line = text[lineStart:]
432432
lineEnd = len(text)
@@ -436,7 +436,10 @@ def diff_linesToCharsMunge(text):
436436
lineStart = lineEnd + 1
437437
return "".join(chars)
438438

439+
# Allocate 2/3rds of the space for text1, the rest for text2.
440+
maxLines = 40000
439441
chars1 = diff_linesToCharsMunge(text1)
442+
maxLines = 65535
440443
chars2 = diff_linesToCharsMunge(text2)
441444
return (chars1, chars2, lineArray)
442445

0 commit comments

Comments
 (0)