Skip to content

Commit 6772960

Browse files
committed
Adding Python notebook with examples.
1 parent d8bb582 commit 6772960

File tree

1 file changed

+190
-0
lines changed

1 file changed

+190
-0
lines changed

python_bindings/examples.ipynb

Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,190 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 3,
6+
"metadata": {
7+
"collapsed": true
8+
},
9+
"outputs": [],
10+
"source": [
11+
"from pyfastpfor import *\n",
12+
"import numpy as np"
13+
]
14+
},
15+
{
16+
"cell_type": "code",
17+
"execution_count": 33,
18+
"metadata": {},
19+
"outputs": [
20+
{
21+
"name": "stdout",
22+
"output_type": "stream",
23+
"text": [
24+
"Compression ratio: 0.34668\n"
25+
]
26+
}
27+
],
28+
"source": [
29+
"arrSize = 128 * 32\n",
30+
"maxVal = 2048\n",
31+
"# 1. Example without data differencing\n",
32+
"\n",
33+
"# All arrays the library use must be contiguous-memory C-style numpy arrays\n",
34+
"inp = np.array(np.random.randint(0, maxVal, arrSize), dtype = np.uint32, order = 'C')\n",
35+
"inpCompDecomp = np.zeros(arrSize, dtype = np.uint32, order = 'C')\n",
36+
"\n",
37+
"# To be on the safe side, let's reserve plenty of additional memory:\n",
38+
"# sometimes the size of compressed data is not smaller than the size \n",
39+
"# of the original one\n",
40+
"inpComp = np.zeros(arrSize + 1024, dtype = np.uint32, order = 'C')\n",
41+
"\n",
42+
"# Obtain a codec by name\n",
43+
"codec = getCodec('simdbinarypacking')\n",
44+
"\n",
45+
"# Compress data\n",
46+
"compSize = codec.encodeArray(inp, arrSize, inpComp, len(inpComp))\n",
47+
" \n",
48+
"print('Compression ratio: %g' % (float(compSize)/arrSize))\n",
49+
"\n",
50+
"# Decompress data\n",
51+
"assert(arrSize == codec.decodeArray(inpComp, compSize, inpCompDecomp, arrSize))\n",
52+
"assert(np.all(inpCompDecomp == inp))"
53+
]
54+
},
55+
{
56+
"cell_type": "code",
57+
"execution_count": 34,
58+
"metadata": {},
59+
"outputs": [
60+
{
61+
"name": "stdout",
62+
"output_type": "stream",
63+
"text": [
64+
"Compression ratio: 0.691406\n"
65+
]
66+
}
67+
],
68+
"source": [
69+
"arrSize = 128 * 32\n",
70+
"maxVal = 1024 * 1024 * 1024 * 2\n",
71+
"\n",
72+
"# 2. Example with slower data differencing\n",
73+
"\n",
74+
"# All arrays the library use must be contiguous-memory C-style numpy arrays\n",
75+
"inp = np.array(np.random.randint(0, maxVal, arrSize), dtype = np.uint32, order = 'C')\n",
76+
"inpCompDecomp = np.zeros(arrSize, dtype = np.uint32, order = 'C')\n",
77+
"\n",
78+
"inp.sort()\n",
79+
"inpCopy = np.array(inp, copy = True, dtype = np.uint32, order = 'C')\n",
80+
"\n",
81+
"# To be on the safe side, let's reserve plenty of additional memory:\n",
82+
"# sometimes the size of compressed data is not smaller than the size \n",
83+
"# of the original one\n",
84+
"inpComp = np.zeros(arrSize + 1024, dtype = np.uint32, order = 'C')\n",
85+
"\n",
86+
"# Carry out dafa differencing to convert a sorted sequence of large numbers\n",
87+
"# into a sequence of small numbers (differences between adjacent numbers)\n",
88+
"delta1(inpCopy, arrSize)\n",
89+
"\n",
90+
"\n",
91+
"# Obtain a codec by name\n",
92+
"codec = getCodec('simdbinarypacking')\n",
93+
"\n",
94+
"# Compress data\n",
95+
"compSize = codec.encodeArray(inpCopy, arrSize, inpComp, len(inpComp))\n",
96+
" \n",
97+
"print('Compression ratio: %g' % (float(compSize)/arrSize))\n",
98+
"\n",
99+
"# Decompress data\n",
100+
"assert(arrSize == codec.decodeArray(inpComp, compSize, inpCompDecomp, arrSize))\n",
101+
"# Reverse differencing by computing the prefix sum\n",
102+
"prefixSum1(inpCompDecomp, arrSize)\n",
103+
"\n",
104+
"assert(np.all(inpCompDecomp == inp))"
105+
]
106+
},
107+
{
108+
"cell_type": "code",
109+
"execution_count": 35,
110+
"metadata": {},
111+
"outputs": [
112+
{
113+
"name": "stdout",
114+
"output_type": "stream",
115+
"text": [
116+
"Compression ratio: 0.72168\n"
117+
]
118+
}
119+
],
120+
"source": [
121+
"arrSize = 128 * 32\n",
122+
"maxVal = 1024 * 1024 * 1024 * 2\n",
123+
"\n",
124+
"# 3. Example with faster but coarser data differencing\n",
125+
"\n",
126+
"# All arrays the library use must be contiguous-memory C-style numpy arrays\n",
127+
"inp = np.array(np.random.randint(0, maxVal, arrSize), dtype = np.uint32, order = 'C')\n",
128+
"inpCompDecomp = np.zeros(arrSize, dtype = np.uint32, order = 'C')\n",
129+
"\n",
130+
"inp.sort()\n",
131+
"inpCopy = np.array(inp, copy = True, dtype = np.uint32, order = 'C')\n",
132+
"\n",
133+
"# To be on the safe side, let's reserve plenty of additional memory:\n",
134+
"# sometimes the size of compressed data is not smaller than the size \n",
135+
"# of the original one\n",
136+
"inpComp = np.zeros(arrSize + 1024, dtype = np.uint32, order = 'C')\n",
137+
"\n",
138+
"# Carry out dafa differencing to convert a sorted sequence of large numbers\n",
139+
"# into a sequence of small numbers (differences between numbers that are 4 indices apart)\n",
140+
"delta4(inpCopy, arrSize)\n",
141+
"\n",
142+
"\n",
143+
"# Obtain a codec by name\n",
144+
"codec = getCodec('simdbinarypacking')\n",
145+
"\n",
146+
"# Compress data\n",
147+
"compSize = codec.encodeArray(inpCopy, arrSize, inpComp, len(inpComp))\n",
148+
" \n",
149+
"print('Compression ratio: %g' % (float(compSize)/arrSize))\n",
150+
"\n",
151+
"# Decompress data\n",
152+
"assert(arrSize == codec.decodeArray(inpComp, compSize, inpCompDecomp, arrSize))\n",
153+
"# Reverse differencing by computing the prefix sum\n",
154+
"prefixSum4(inpCompDecomp, arrSize)\n",
155+
"\n",
156+
"assert(np.all(inpCompDecomp == inp))"
157+
]
158+
},
159+
{
160+
"cell_type": "code",
161+
"execution_count": null,
162+
"metadata": {
163+
"collapsed": true
164+
},
165+
"outputs": [],
166+
"source": []
167+
}
168+
],
169+
"metadata": {
170+
"kernelspec": {
171+
"display_name": "Python 3",
172+
"language": "python",
173+
"name": "python3"
174+
},
175+
"language_info": {
176+
"codemirror_mode": {
177+
"name": "ipython",
178+
"version": 3
179+
},
180+
"file_extension": ".py",
181+
"mimetype": "text/x-python",
182+
"name": "python",
183+
"nbconvert_exporter": "python",
184+
"pygments_lexer": "ipython3",
185+
"version": "3.5.2"
186+
}
187+
},
188+
"nbformat": 4,
189+
"nbformat_minor": 2
190+
}

0 commit comments

Comments
 (0)