1616// under the License.
1717
1818// Use the ES5 UMD target as perf baseline
19- // const { lit, col, DataFrame, Table, readVectors } = require('../targets/es5/umd');
20- // const { lit, col, DataFrame, Table, readVectors } = require('../targets/es5/cjs');
21- // const { lit, col, DataFrame, Table, readVectors } = require('../targets/es2015/umd');
22- const { lit , col, DataFrame, Table, readVectors } = require ( '../targets/es2015/cjs' ) ;
19+ const { col, DataFrame, Table, readVectors } = require ( '../targets/es5/umd' ) ;
20+ // const { col, DataFrame, Table, readVectors } = require('../targets/es5/cjs');
21+ // const { col, DataFrame, Table, readVectors } = require('../targets/es2015/umd');
22+ // const { col, DataFrame, Table, readVectors } = require('../targets/es2015/cjs');
2323
2424const config = require ( './config' ) ;
2525const Benchmark = require ( 'benchmark' ) ;
2626
2727const suites = [ ] ;
2828
29- // for (let { name, buffers} of config) {
30- // const parseSuite = new Benchmark.Suite(`Parse "${name}"`, { async: true });
31- // const sliceSuite = new Benchmark.Suite(`Slice "${name}" vectors`, { async: true });
32- // const iterateSuite = new Benchmark.Suite(`Iterate "${name}" vectors`, { async: true });
33- // const getByIndexSuite = new Benchmark.Suite(`Get "${name}" values by index`, { async: true });
34- // parseSuite.add(createFromTableTest(name, buffers));
35- // parseSuite.add(createReadVectorsTest(name, buffers));
36- // for (const vector of Table.from(buffers).columns) {
37- // sliceSuite.add(createSliceTest(vector));
38- // iterateSuite.add(createIterateTest(vector));
39- // getByIndexSuite.add(createGetByIndexTest(vector));
40- // }
41- // suites.push(getByIndexSuite, iterateSuite, sliceSuite, parseSuite);
42- // }
29+ for ( let { name, buffers} of config ) {
30+ const parseSuite = new Benchmark . Suite ( `Parse "${ name } "` , { async : true } ) ;
31+ const sliceSuite = new Benchmark . Suite ( `Slice "${ name } " vectors` , { async : true } ) ;
32+ const iterateSuite = new Benchmark . Suite ( `Iterate "${ name } " vectors` , { async : true } ) ;
33+ const getByIndexSuite = new Benchmark . Suite ( `Get "${ name } " values by index` , { async : true } ) ;
34+ parseSuite . add ( createFromTableTest ( name , buffers ) ) ;
35+ parseSuite . add ( createReadVectorsTest ( name , buffers ) ) ;
36+ for ( const vector of Table . from ( buffers ) . columns ) {
37+ sliceSuite . add ( createSliceTest ( vector ) ) ;
38+ iterateSuite . add ( createIterateTest ( vector ) ) ;
39+ getByIndexSuite . add ( createGetByIndexTest ( vector ) ) ;
40+ }
41+ suites . push ( getByIndexSuite , iterateSuite , sliceSuite , parseSuite ) ;
42+ }
4343
4444for ( let { name, buffers, tests} of require ( './table_config' ) ) {
45- const tableIteratorSuite = new Benchmark . Suite ( `Table Iterator "${ name } "` , { async : true } ) ;
46- const tableCountSuite = new Benchmark . Suite ( `Table Count "${ name } "` , { async : true } ) ;
47- const dfIteratorSuite = new Benchmark . Suite ( `DataFrame Iterator "${ name } "` , { async : true } ) ;
48- const dfIteratorCountSuite = new Benchmark . Suite ( `DataFrame Iterator Count "${ name } "` , { async : true } ) ;
45+ const dfFilterCountSuite = new Benchmark . Suite ( `DataFrame Filter-Scan Count "${ name } "` , { async : true } ) ;
4946 const dfDirectCountSuite = new Benchmark . Suite ( `DataFrame Direct Count "${ name } "` , { async : true } ) ;
50- const dfScanCountSuite = new Benchmark . Suite ( `DataFrame Scan Count "${ name } "` , { async : true } ) ;
51- const dfFilterCountSuite = new Benchmark . Suite ( `DataFrame Filter Scan Count "${ name } "` , { async : true } ) ;
52- const vectorCountSuite = new Benchmark . Suite ( `Vector Count "${ name } "` , { async : true } ) ;
5347 const table = Table . from ( buffers ) ;
5448
55- tableIteratorSuite . add ( createTableIteratorTest ( table ) ) ;
56- dfIteratorSuite . add ( createDataFrameIteratorTest ( table ) ) ;
5749 for ( test of tests ) {
58- tableCountSuite . add ( createTableCountTest ( table , test . col , test . test , test . value ) )
59- dfIteratorCountSuite . add ( createDataFrameIteratorCountTest ( table , test . col , test . test , test . value ) )
60- dfDirectCountSuite . add ( createDataFrameDirectCountTest ( table , test . col , test . test , test . value ) )
61- dfScanCountSuite . add ( createDataFrameScanCountTest ( table , test . col , test . test , test . value ) )
6250 dfFilterCountSuite . add ( createDataFrameFilterCountTest ( table , test . col , test . test , test . value ) )
63- vectorCountSuite . add ( createVectorCountTest ( table . columns [ test . col ] , test . test , test . value ) )
51+ dfDirectCountSuite . add ( createDataFrameDirectCountTest ( table , test . col , test . test , test . value ) )
6452 }
6553
66- suites . push ( tableIteratorSuite , tableCountSuite , dfIteratorSuite , dfIteratorCountSuite , dfDirectCountSuite , dfScanCountSuite , dfFilterCountSuite , vectorCountSuite )
54+ suites . push ( dfFilterCountSuite , dfDirectCountSuite )
6755}
6856
6957console . log ( 'Running apache-arrow performance tests...\n' ) ;
@@ -135,81 +123,9 @@ function createGetByIndexTest(vector) {
135123 } ;
136124}
137125
138- function createVectorCountTest ( vector , test , value ) {
139- let op ;
140- if ( test == 'gteq' ) {
141- op = function ( ) {
142- sum = 0 ;
143- for ( cell of vector ) {
144- sum += ( cell >= value )
145- }
146- }
147- } else if ( test == 'eq' ) {
148- op = function ( ) {
149- sum = 0 ;
150- for ( cell of vector ) {
151- sum += ( cell == value )
152- }
153- }
154- } else {
155- throw new Error ( `Unrecognized test "$test"` ) ;
156- }
157-
158- return {
159- async : true ,
160- name : `name: '${ vector . name } ', length: ${ vector . length } , type: ${ vector . type } , test: ${ test } , value: ${ value } ` ,
161- fn : op
162- } ;
163- }
164-
165- function createTableIteratorTest ( table ) {
166- let row ;
167- return {
168- async : true ,
169- name : `length: ${ table . length } ` ,
170- fn ( ) { for ( row of table ) { } }
171- } ;
172- }
173-
174- function createTableCountTest ( table , column , test , value ) {
175- let op ;
176- if ( test == 'gteq' ) {
177- op = function ( ) {
178- sum = 0 ;
179- for ( row of table ) {
180- sum += ( row . get ( column ) >= value )
181- }
182- }
183- } else if ( test == 'eq' ) {
184- op = function ( ) {
185- sum = 0 ;
186- for ( row of table ) {
187- sum += ( row . get ( column ) == value )
188- }
189- }
190- } else {
191- throw new Error ( `Unrecognized test "${ test } "` ) ;
192- }
193-
194- return {
195- async : true ,
196- name : `name: '${ table . columns [ column ] . name } ', length: ${ table . length } , type: ${ table . columns [ column ] . type } , test: ${ test } , value: ${ value } ` ,
197- fn : op
198- } ;
199- }
200-
201- function createDataFrameIteratorTest ( table ) {
202- let df = DataFrame . from ( table ) ;
203- let idx ;
204- return {
205- async : true ,
206- name : `length: ${ table . length } ` ,
207- fn ( ) { for ( idx of table ) { } }
208- } ;
209- }
210-
211126function createDataFrameDirectCountTest ( table , column , test , value ) {
212127 let df = DataFrame . from ( table ) ;
128+ let colidx = table . columns . findIndex ( ( c ) => c . name === column ) ;
213129
214130 if ( test == 'gteq' ) {
215131 op = function ( ) {
@@ -218,11 +134,11 @@ function createDataFrameDirectCountTest(table, column, test, value) {
218134 const length = df . lengths [ batch ] ;
219135
220136 // load batches
221- const columns = df . getBatch ( batch ) ;
137+ const columns = df . batches [ batch ] ;
222138
223139 // yield all indices
224140 for ( let idx = - 1 ; ++ idx < length ; ) {
225- sum += ( columns [ column ] . get ( idx ) >= value ) ;
141+ sum += ( columns [ colidx ] . get ( idx ) >= value ) ;
226142 }
227143 }
228144 }
@@ -233,11 +149,11 @@ function createDataFrameDirectCountTest(table, column, test, value) {
233149 const length = df . lengths [ batch ] ;
234150
235151 // load batches
236- const columns = df . getBatch ( batch ) ;
152+ const columns = df . batches [ batch ]
237153
238154 // yield all indices
239155 for ( let idx = - 1 ; ++ idx < length ; ) {
240- sum += ( columns [ column ] . get ( idx ) == value ) ;
156+ sum += ( columns [ colidx ] . get ( idx ) == value ) ;
241157 }
242158 }
243159 }
@@ -247,79 +163,28 @@ function createDataFrameDirectCountTest(table, column, test, value) {
247163
248164 return {
249165 async : true ,
250- name : `name: '${ table . columns [ column ] . name } ', length: ${ table . length } , type: ${ table . columns [ column ] . type } , test: ${ test } , value: ${ value } ` ,
251- fn : op
252- } ;
253- }
254-
255- function createDataFrameScanCountTest ( table , column , test , value ) {
256- let df = DataFrame . from ( table ) ;
257-
258- if ( test == 'gteq' ) {
259- op = function ( ) {
260- sum = 0 ;
261- df . scan ( ( idx , cols ) => { sum += cols [ column ] . get ( idx ) >= value } ) ;
262- }
263- } else if ( test == 'eq' ) {
264- op = function ( ) {
265- sum = 0 ;
266- df . scan ( ( idx , cols ) => { sum += cols [ column ] . get ( idx ) == value } ) ;
267- console . log ( sum ) ;
268- }
269- } else {
270- throw new Error ( `Unrecognized test "${ test } "` ) ;
271- }
272-
273- return {
274- async : true ,
275- name : `name: '${ table . columns [ column ] . name } ', length: ${ table . length } , type: ${ table . columns [ column ] . type } , test: ${ test } , value: ${ value } ` ,
166+ name : `name: '${ column } ', length: ${ table . length } , type: ${ table . columns [ colidx ] . type } , test: ${ test } , value: ${ value } ` ,
276167 fn : op
277168 } ;
278169}
279170
280171function createDataFrameFilterCountTest ( table , column , test , value ) {
281172 let df = DataFrame . from ( table ) ;
173+ let colidx = table . columns . findIndex ( ( c ) => c . name === column ) ;
174+
282175 if ( test == 'gteq' ) {
283- df = df . filter ( col ( table . columns [ column ] . name ) . gteq ( value ) ) ;
176+ df = df . filter ( col ( column ) . gteq ( value ) ) ;
284177 } else if ( test == 'eq' ) {
285- df = df . filter ( col ( table . columns [ column ] . name ) . eq ( value ) ) ;
178+ df = df . filter ( col ( column ) . eq ( value ) ) ;
286179 } else {
287180 throw new Error ( `Unrecognized test "${ test } "` ) ;
288181 }
289182
290183 return {
291184 async : true ,
292- name : `name: '${ table . columns [ column ] . name } ', length: ${ table . length } , type: ${ table . columns [ column ] . type } , test: ${ test } , value: ${ value } ` ,
185+ name : `name: '${ column } ', length: ${ table . length } , type: ${ table . columns [ colidx ] . type } , test: ${ test } , value: ${ value } ` ,
293186 fn ( ) {
294187 df . count ( ) ;
295188 }
296189 } ;
297190}
298-
299- function createDataFrameIteratorCountTest ( table , column , test , value ) {
300- let df = DataFrame . from ( table ) ;
301-
302- if ( test == 'gteq' ) {
303- op = function ( ) {
304- sum = 0 ;
305- for ( idx of df ) {
306- sum += ( df . columns [ column ] . get ( idx ) >= value ) ;
307- }
308- }
309- } else if ( test == 'eq' ) {
310- op = function ( ) {
311- sum = 0 ;
312- for ( idx of df ) {
313- sum += ( df . columns [ column ] . get ( idx ) == value ) ;
314- }
315- }
316- } else {
317- throw new Error ( `Unrecognized test "${ test } "` ) ;
318- }
319-
320- return {
321- async : true ,
322- name : `name: '${ table . columns [ column ] . name } ', length: ${ table . length } , type: ${ table . columns [ column ] . type } , test: ${ test } , value: ${ value } ` ,
323- fn : op
324- } ;
325- }
0 commit comments