Skip to content

Commit 72a21d0

Browse files
committed
complete lz77 compress algorithm
1 parent 959a37e commit 72a21d0

File tree

4 files changed

+58
-28
lines changed

4 files changed

+58
-28
lines changed

build.gradle

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ tasks.withType(JavaCompile) { options.encoding = "UTF-8" }
2626

2727
group = 'com.github.myibu'
2828
archivesBaseName = "algorithm-java"
29-
version = "1.0.0b"
29+
version = "1.0.0c"
3030

3131
repositories {
3232
mavenCentral()

readme.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ Reference to: [MersenneTwister.pdf](./docs/HoffmanAndGolombCoding.pdf)
4949
<dependency>
5050
<groupId>com.github.myibu</groupId>
5151
<artifactId>algorithm-java</artifactId>
52-
<version>1.0.0b</version>
52+
<version>1.0.0c</version>
5353
</dependency>
5454
```
5555

src/main/java/com/github/myibu/algorithm/compress/LZ77Compressor.java

Lines changed: 55 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import com.github.myibu.algorithm.data.Bits;
44
import com.github.myibu.algorithm.endode.GolombEncoder;
55

6+
import java.math.BigDecimal;
7+
import java.math.RoundingMode;
68
import java.util.*;
79
import java.util.stream.Collectors;
810

@@ -74,41 +76,40 @@ public int compress(byte[] in_data, int in_len, byte[] out_data) {
7476
}
7577
int llStart = sEnd - 1, rrStart = 0, llEnd = 0, rrEnd = (lp = lEnd);
7678
int minMatched = 1, minIndex = 0;
77-
while (llStart >= 0) {
78-
int matched = 0, left = llStart, right = rrStart;
79-
while (left >= 0 && right < rrEnd && sBuf[left--] == lWindow[right++]) {
79+
for (int i = llStart; i >= 0; i--) {
80+
int matched = 0, left = i, right = rrStart;
81+
while (left >= llEnd && right < rrEnd && sBuf[left--] == lWindow[right++]) {
8082
matched++;
8183
}
8284
if (matched >= minMatched) {
83-
minIndex = llStart;
85+
minIndex = i;
8486
minMatched = matched;
8587
}
86-
llStart--;
8788
}
89+
int lWindowLen = lWindow.length;
8890
// only one byte in window, set tuple to (0, 0, lWindow[0])
89-
if (lWindow.length == 1) {
91+
if (lWindowLen == 1) {
9092
minIndex = 0;
9193
}
9294
// matched
9395
if (minIndex > 0) {
94-
// 匹配到5个怎么编码
95-
tuples.add(Arrays.asList( minIndex + 1, minMatched, (int)lWindow[minMatched]));
96-
sp += (minMatched + 1);
97-
ip += (minMatched + 1);
96+
tuples.add(Arrays.asList( minIndex + 1, minMatched, (minMatched == lWindowLen) ? null : (int)lWindow[minMatched]));
97+
sp += ((minMatched == lWindowLen) ? minMatched : (minMatched + 1));
98+
ip += ((minMatched == lWindowLen) ? minMatched : (minMatched + 1));
9899
} else {
99100
sp++;
100101
ip++;
101102
tuples.add(Arrays.asList(0, 0, (int)lWindow[0]));
102103
}
103104
if (isDebug) {
104-
System.out.println("Txt=" + new String() + new String(in_data) + ", SearchBuffer="
105+
System.out.println(", SearchBuffer="
105106
+ new StringBuilder(new String(sBuf)).reverse().toString() + ", LookaheadWindow=" + new String(lWindow)
106107
+ " | " + tuples.get(tuples.size()-1)/* + " | " + (char)(tuples.get(tuples.size()-1).get(2).intValue())*/);
107108
}
108109
}
109110
int compressedLen = doEncode(tuples, out_data);
110111
if (isDebug) {
111-
System.out.println("after encode: compressed rate=" + (compressedLen * 1.0 / in_len));
112+
System.out.println("after encode: compressed rate=" + new BigDecimal(compressedLen * 100.0 / in_len).setScale(2, RoundingMode.HALF_UP) + "%");
112113
}
113114
return compressedLen;
114115
}
@@ -122,8 +123,11 @@ private int doEncode(List<List<Integer>> tuples, byte[] out_data) {
122123
bits.append(bits1);
123124
Bits bits2 = encoder.encode(tuple.get(1), l);
124125
bits.append(bits2);
125-
Bits bits3 = Bits.ofByte((byte)tuple.get(2).intValue());
126-
bits.append(bits3);
126+
Bits bits3 = new Bits();
127+
if (tuple.get(2) != null) {
128+
bits3 = Bits.ofByte((byte) tuple.get(2).intValue());
129+
bits.append(bits3);
130+
}
127131
if (isDebug) {
128132
System.out.println(tuple + " encoded result: " + "("+ bits1 + ", "+ bits2 + ", "+ bits3 + ")");
129133
}
@@ -181,12 +185,16 @@ public int decompress(byte[] in_data, int in_len, byte[] out_data) {
181185
}
182186
}
183187
}
184-
if (length == -1 || ip+8 > bits.length()) {
188+
if (length == -1 ) {
185189
break;
186190
}
187-
int symbol = (int)bits.subBits(ip, ip+8).toByte();
188-
tuples.add(Arrays.asList(offset, length, symbol));
189-
ip += 8;
191+
if (length != l && ip + 8 <= bits.length()) {
192+
int symbol = (int) bits.subBits(ip, ip + 8).toByte();
193+
tuples.add(Arrays.asList(offset, length, symbol));
194+
ip += 8;
195+
} else {
196+
tuples.add(Arrays.asList(offset, length, null));
197+
}
190198
}
191199
if (isDebug) {
192200
System.out.println("decode tuples=" + tuples);
@@ -197,17 +205,35 @@ public int decompress(byte[] in_data, int in_len, byte[] out_data) {
197205
private int doDecode(List<List<Integer>> tuples, byte[] out_data) {
198206
Bits seq = new Bits();
199207
for (List<Integer> tuple: tuples) {
200-
int offset = tuple.get(0), length = tuple.get(1), symbol = tuple.get(2);
201-
Bits sb = Bits.ofByte((byte) symbol);
202-
if (offset == 0) {
203-
seq.append(sb);
208+
int offset = tuple.get(0), length = tuple.get(1);
209+
if (tuple.get(2) != null) {
210+
int symbol = tuple.get(2);
211+
Bits sb = Bits.ofByte((byte) symbol);
212+
if (offset == 0) {
213+
seq.append(sb);
214+
if (isDebug) {
215+
System.out.println(tuple + ", seq=" + new String(seq.toByteArray()));
216+
}
217+
} else {
218+
int start = seq.byteLength() < s ? seq.byteLength() - offset: s - offset;
219+
int used = seq.byteLength() < s ? 0 : seq.byteLength() - s;
220+
seq.append(seq.subBits((used + start) * 8, (used + start + length) * 8)).append(sb);
221+
if (isDebug) {
222+
System.out.println(tuple + ", seq=" + new String(seq.toByteArray()));
223+
}
224+
}
204225
} else {
205226
int start = seq.byteLength() < s ? seq.byteLength() - offset: s - offset;
206227
int used = seq.byteLength() < s ? 0 : seq.byteLength() - s;
207-
seq.append(seq.subBits((used + start) * 8, (used + start + length) * 8)).append(sb);
208-
// System.out.println("start=" + start + ", used=" + used + ", length=" + length + ", seq=" + seq);
228+
seq.append(seq.subBits((used + start) * 8, (used + start + length) * 8));
229+
if (isDebug) {
230+
System.out.println(tuple + ", seq=" + new String(seq.toByteArray()));
231+
}
209232
}
210233
}
234+
if (isDebug) {
235+
System.out.println("after decode, bits=" + seq);
236+
}
211237
int len = seq.byteLength();
212238
for (int i = 0; i < len; i++) {
213239
out_data[i] = seq.getByte(i).toByte();
@@ -222,4 +248,9 @@ private int doDecode(List<List<Integer>> tuples, byte[] out_data) {
222248
public void setDebug(boolean isDebug) {
223249
this.isDebug = isDebug;
224250
}
251+
252+
public void setSL(int s, int l) {
253+
this.s = s;
254+
this.l = l;
255+
}
225256
}

src/test/java/com/github/myibu/algorithm/AlgorithmTest.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -168,15 +168,14 @@ public void testLZFCompressor() {
168168

169169
@Test
170170
public void testLZ77Compressor() {
171-
//todo
172171
String txt = "abracadabradabracadabradabracadabradabracadabradabracadabradabracadabradabracadabradabracadabradabracadabrad";
173172
byte[] in_data = txt.getBytes(StandardCharsets.UTF_8);
174173
byte[] out_data = new byte[in_data.length*2];
175174
Compressor compressor = new LZ77Compressor();
176175
compressor.setDebug(true);
177176
int compressed = compressor.compress(in_data, in_data.length, out_data);
178177
byte[] compressed_data = Arrays.copyOf(out_data, compressed);
179-
byte[] decompressed_data = new byte[compressed * 2];
178+
byte[] decompressed_data = new byte[txt.length()];
180179
int decompressed = compressor.decompress(compressed_data, compressed, decompressed_data);
181180
Assert.assertEquals(txt,
182181
new String(Arrays.copyOf(decompressed_data, decompressed), StandardCharsets.UTF_8));

0 commit comments

Comments
 (0)