Skip to content

Commit dd31402

Browse files
committed
complete lz77 compress algorithm
1 parent 8c2a627 commit dd31402

File tree

6 files changed

+64
-19
lines changed

6 files changed

+64
-19
lines changed

src/main/java/com/github/myibu/algorithm/compress/Compressor.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
* @author myibu
66
* Created on 2021/10/11
77
*/
8-
public interface Compressor {
8+
public interface Compressor extends Debugable {
99
/**
1010
* compress bytes
1111
* @param in_data input
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
package com.github.myibu.algorithm.compress;
2+
3+
/**
4+
* compressor for compress and decompress
5+
* @author myibu
6+
* Created on 2021/10/15
7+
*/
8+
public interface Debugable {
9+
/**
10+
* enable Debug or not, default should be not enabled
11+
* @param isDebug is debug
12+
*/
13+
void setDebug(boolean isDebug);
14+
}

src/main/java/com/github/myibu/algorithm/compress/LZ77Compressor.java

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,6 @@ public int compress(byte[] in_data, int in_len, byte[] out_data) {
6969
for (int i = lStart; i < lEnd; i++) {
7070
lWindow[i] = in_data[ip + i];
7171
}
72-
//System.out.println("txt=" + new String() + new String(in_data) + ", sBuf="
73-
// + new StringBuilder(new String(sBuf)).reverse().toString() + ", lWindow=" + new String(lWindow));
74-
7572
int llStart = sEnd - 1, rrStart = 0, llEnd = 0, rrEnd = (lp = lEnd);
7673
int minMatched = 1, minIndex = 0;
7774
while (llStart >= 0) {
@@ -95,9 +92,17 @@ public int compress(byte[] in_data, int in_len, byte[] out_data) {
9592
ip++;
9693
tuples.add(Arrays.asList(0, 0, (int)lWindow[0]));
9794
}
95+
if (isDebug) {
96+
System.out.println("Txt=" + new String() + new String(in_data) + ", SearchBuffer="
97+
+ new StringBuilder(new String(sBuf)).reverse().toString() + ", LookaheadWindow=" + new String(lWindow)
98+
+ " | " + tuples.get(tuples.size()-1)/* + " | " + (char)(tuples.get(tuples.size()-1).get(2).intValue())*/);
99+
}
100+
}
101+
int compressedLen = doEncode(tuples, out_data);
102+
if (isDebug) {
103+
System.out.println("after encode: compressed rate=" + (compressedLen * 1.0 / in_len));
98104
}
99-
// System.out.println(tuples);
100-
return doEncode(tuples, out_data);
105+
return compressedLen;
101106
}
102107

103108
private int doEncode(List<List<Integer>> tuples, byte[] out_data) {
@@ -111,12 +116,16 @@ private int doEncode(List<List<Integer>> tuples, byte[] out_data) {
111116
bits.append(bits2);
112117
Bits bits3 = Bits.ofByte((byte)tuple.get(2).intValue());
113118
bits.append(bits3);
114-
// System.out.println("("+ bits1 + ", "+ bits2 + ", "+ bits3 + ")");
119+
if (isDebug) {
120+
System.out.println(tuple + " encoded result: " + "("+ bits1 + ", "+ bits2 + ", "+ bits3 + ")");
121+
}
115122
finalRes.append(bits);
116123
}
117124
byte[] fr = finalRes.toByteArray();
118125
System.arraycopy(fr, 0, out_data, 0, fr.length);
119-
// System.out.println("bits: " + finalRes);
126+
if (isDebug) {
127+
System.out.println("after encode: bits=" + finalRes);
128+
}
120129
return fr.length;
121130
}
122131

@@ -145,9 +154,12 @@ public int decompress(byte[] in_data, int in_len, byte[] out_data) {
145154
}
146155
List<Bits> sortedEncodeSeq = allEncodeSeq.stream().sorted(Comparator.comparingInt(Bits::length)).collect(Collectors.toList());
147156
Bits bits = Bits.ofByte(in_data);
157+
if (isDebug) {
158+
System.out.println("before decode: bits=" + bits);
159+
}
148160
int ip = 0;
149161
List<List<Integer>> tuples = new ArrayList<>();
150-
while (ip < bits.length()) {
162+
while (ip < bits.length() && ip + e1 <= bits.length()) {
151163
Bits b1 = bits.subBits(ip, ip + e1);
152164
ip = ip + e1;
153165
int offset = encoder.encodeToBinary(b1);
@@ -168,7 +180,9 @@ public int decompress(byte[] in_data, int in_len, byte[] out_data) {
168180
tuples.add(Arrays.asList(offset, length, symbol));
169181
ip += 8;
170182
}
171-
// System.out.println(tuples);
183+
if (isDebug) {
184+
System.out.println("decode tuples=" + tuples);
185+
}
172186
return doDecode(tuples, out_data);
173187
}
174188

@@ -181,7 +195,9 @@ private int doDecode(List<List<Integer>> tuples, byte[] out_data) {
181195
seq.append(sb);
182196
} else {
183197
int start = seq.byteLength() < s ? seq.byteLength() - offset: s - offset;
184-
seq.append(seq.subBits(start * 8, (start + length) * 8)).append(sb);
198+
int used = seq.byteLength() < s ? 0 : seq.byteLength() - s;
199+
// System.out.println("start=" + start + ", used=" + used + ", length=" + length);
200+
seq.append(seq.subBits((used + start) * 8, (used + start + length) * 8)).append(sb);
185201
}
186202
}
187203
int len = seq.byteLength();
@@ -190,4 +206,12 @@ private int doDecode(List<List<Integer>> tuples, byte[] out_data) {
190206
}
191207
return len;
192208
}
209+
210+
211+
private boolean isDebug = false;
212+
213+
@Override
214+
public void setDebug(boolean isDebug) {
215+
this.isDebug = isDebug;
216+
}
193217
}

src/main/java/com/github/myibu/algorithm/compress/LZFCompressor.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,4 +178,11 @@ public int decompress(byte[] in_data, int in_len, byte[] out_data) {
178178
}
179179
return op;
180180
}
181+
182+
private boolean isDebug;
183+
184+
@Override
185+
public void setDebug(boolean isDebug) {
186+
this.isDebug = isDebug;
187+
}
181188
}

src/main/java/com/github/myibu/algorithm/data/Bits.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@ public static Bits ofByte(byte val, int len) {
245245
bits.table[j] = (((val>>i) & 0x01) ==1) ? Bit.ONE : Bit.ZERO;
246246
}
247247
for (; j >= 0; j--) {
248-
bits.table[j] = val > 0 ? Bit.ZERO : Bit.ONE;
248+
bits.table[j] = val >= 0 ? Bit.ZERO : Bit.ONE;
249249
}
250250
bits.used += len;
251251
return bits;
@@ -272,7 +272,7 @@ public static Bits ofShort(short val, int len) {
272272
bits.table[j] = (((val>>i) & 0x01) ==1) ? Bit.ONE : Bit.ZERO;
273273
}
274274
for (; j >= 0; j--) {
275-
bits.table[j] = val > 0 ? Bit.ZERO : Bit.ONE;
275+
bits.table[j] = val >= 0 ? Bit.ZERO : Bit.ONE;
276276
}
277277
bits.used += len;
278278
return bits;
@@ -299,7 +299,7 @@ public static Bits ofInt(int val, int len) {
299299
bits.table[j] = (((val>>i) & 0x01) ==1) ? Bit.ONE : Bit.ZERO;
300300
}
301301
for (; j >= 0; j--) {
302-
bits.table[j] = val > 0 ? Bit.ZERO : Bit.ONE;
302+
bits.table[j] = val >= 0 ? Bit.ZERO : Bit.ONE;
303303
}
304304
bits.used += len;
305305
return bits;
@@ -378,7 +378,7 @@ public static Bits ofLong(long val, int len) {
378378
bits.table[j] = (((val>>i) & 0x01) ==1) ? Bit.ONE : Bit.ZERO;
379379
}
380380
for (; j >= 0; j--) {
381-
bits.table[j] = val > 0 ? Bit.ZERO : Bit.ONE;
381+
bits.table[j] = val >= 0 ? Bit.ZERO : Bit.ONE;
382382
}
383383
bits.used += len;
384384
return bits;

src/test/java/com/github/myibu/algorithm/AlgorithmTest.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -168,16 +168,16 @@ public void testLZFCompressor() {
168168

169169
@Test
170170
public void testLZ77Compressor() {
171-
// todo
172-
byte[] in_data = "com.github.myibu.algorithm.AlgorithmTest.testLZ77Compressor".getBytes(StandardCharsets.UTF_8);
171+
String txt = "在法律或合同明确规定的情况下,如当事人一方不履行或不适当履行合同义务时,另一方有权解除合同。故合同可由当事人一方行使解除权而消灭。《中华人民共和国经济合同法》规定,如由于合同所依据的国家计划被修改或取消,由于行政命令企业必须关闭、停产或转产,由于不可抗力以及由于一方违约致使合同不能履行或履行已无必要时,允许当事人一方及时通知他方变更或解除合同。";
172+
byte[] in_data = txt.getBytes(StandardCharsets.UTF_8);
173173
byte[] out_data = new byte[in_data.length*2];
174174
Compressor compressor = new LZ77Compressor();
175+
compressor.setDebug(true);
175176
int compressed = compressor.compress(in_data, in_data.length, out_data);
176177
byte[] compressed_data = Arrays.copyOf(out_data, compressed);
177-
// System.out.println(new String(compressed_data));
178178
byte[] decompressed_data = new byte[compressed * 2];
179179
int decompressed = compressor.decompress(compressed_data, compressed, decompressed_data);
180-
Assert.assertEquals("com.github.myibu.algorithm.AlgorithmTest.testLZ77Compressor",
180+
Assert.assertEquals(txt,
181181
new String(Arrays.copyOf(decompressed_data, decompressed), StandardCharsets.UTF_8));
182182
}
183183

0 commit comments

Comments
 (0)