|
| 1 | +import {assertDiff} from './util'; |
| 2 | + |
| 3 | +/** |
| 4 | + * Tests for a specific bug where the string diff algorithm would incorrectly |
| 5 | + * handle surrogate pairs (emoji characters) when they appeared in different |
| 6 | + * positions in the source and destination strings. |
| 7 | + */ |
| 8 | +describe('String diff with emoji - specific bug cases', () => { |
| 9 | + test('emoji at beginning - from error log', () => { |
| 10 | + // This is the exact case that was failing in the fuzzing tests |
| 11 | + const str1 = '💚莡韚😻襘😴}诇'; |
| 12 | + const str2 = '😻ʯ愂H😤副🗶íŋ😒😹Ù'; |
| 13 | + assertDiff(str1, str2); |
| 14 | + }); |
| 15 | + |
| 16 | + test('simple emoji replacement at start', () => { |
| 17 | + assertDiff('😻hello', '😤hello'); |
| 18 | + }); |
| 19 | + |
| 20 | + test('emoji deletion at start', () => { |
| 21 | + assertDiff('😻hello', 'hello'); |
| 22 | + }); |
| 23 | + |
| 24 | + test('emoji insertion at start', () => { |
| 25 | + assertDiff('hello', '😻hello'); |
| 26 | + }); |
| 27 | + |
| 28 | + test('multiple emojis at start', () => { |
| 29 | + assertDiff('😻😤hello', '😤😻hello'); |
| 30 | + }); |
| 31 | + |
| 32 | + test('emoji in middle position', () => { |
| 33 | + assertDiff('hello😻world', 'hello😤world'); |
| 34 | + }); |
| 35 | + |
| 36 | + test('complex emoji sequences', () => { |
| 37 | + assertDiff('👨👩👧👦test', 'test👨👩👧👦'); |
| 38 | + }); |
| 39 | + |
| 40 | + test('emoji with other unicode', () => { |
| 41 | + assertDiff('😻你好', 'hello😻'); |
| 42 | + }); |
| 43 | + |
| 44 | + test('surrogate pair handling', () => { |
| 45 | + // Test surrogate pairs specifically |
| 46 | + // 😻 is encoded as \uD83D\uDE3B (two UTF-16 code units) |
| 47 | + const emoji = '\uD83D\uDE3B'; // 😻 |
| 48 | + assertDiff(emoji + 'test', 'test' + emoji); |
| 49 | + }); |
| 50 | + |
| 51 | + test('mixed emoji and CJK', () => { |
| 52 | + assertDiff('😻中文😤', '中文😻😤'); |
| 53 | + }); |
| 54 | + |
| 55 | + test('emoji at same position in both strings', () => { |
| 56 | + // Should recognize emoji as common part |
| 57 | + assertDiff('😻test', '😻best'); |
| 58 | + }); |
| 59 | + |
| 60 | + test('multiple different emojis starting with same high surrogate', () => { |
| 61 | + // Many emojis share the same high surrogate (d83d) |
| 62 | + // 😻 = d83d de3b |
| 63 | + // 😤 = d83d de24 |
| 64 | + // 😴 = d83d de34 |
| 65 | + // 💚 = d83d dc9a |
| 66 | + assertDiff('💚😻😤', '😴😻😤'); |
| 67 | + }); |
| 68 | + |
| 69 | + test('emoji moved from middle to beginning', () => { |
| 70 | + assertDiff('abc😻def', '😻abcdef'); |
| 71 | + }); |
| 72 | + |
| 73 | + test('emoji moved from beginning to middle', () => { |
| 74 | + assertDiff('😻abcdef', 'abc😻def'); |
| 75 | + }); |
| 76 | + |
| 77 | + test('two identical emojis at different positions', () => { |
| 78 | + assertDiff('😻abc😻def', 'abc😻def😻'); |
| 79 | + }); |
| 80 | + |
| 81 | + test('long strings with emoji in common', () => { |
| 82 | + const prefix = 'a'.repeat(100); |
| 83 | + const suffix = 'b'.repeat(100); |
| 84 | + assertDiff(prefix + '😻' + suffix, prefix + '😤' + suffix); |
| 85 | + }); |
| 86 | + |
| 87 | + test('regression: emoji not in common but sharing high surrogate', () => { |
| 88 | + // This specifically tests the case where emojis share a high surrogate |
| 89 | + // but are different characters - should NOT be treated as common |
| 90 | + assertDiff('💚test', '😻test'); |
| 91 | + }); |
| 92 | +}); |
0 commit comments