@@ -3,10 +3,10 @@ Given a data set of an unknown size,
33Get a random sample in a random order
44It's used in data analytics, often as a way to get a small random sample from a data lake or warehouse, or from a large CSV file
55*/
6- function shuf ( datasetSource , sampleSize ) {
7- let output = fillBaseSample ( datasetSource , sampleSize ) ;
6+ function shuf ( datasetSource , sampleSize ) {
7+ const output = fillBaseSample ( datasetSource , sampleSize )
88
9- return randomizeOutputFromDataset ( datasetSource , output ) ;
9+ return randomizeOutputFromDataset ( datasetSource , output )
1010}
1111
1212/**
@@ -16,39 +16,39 @@ function shuf(datasetSource, sampleSize) {
1616 * @returns {Array.<T> } The random sample, as an array
1717 * @template T
1818 */
19- function fillBaseSample ( datasetSource , sampleSize ) {
20- let filledIndexes = [ ] ;
21- let output = new Array ( sampleSize ) ;
19+ function fillBaseSample ( datasetSource , sampleSize ) {
20+ let filledIndexes = [ ]
21+ let output = new Array ( sampleSize )
2222
2323 // Spread data out filling the array
2424 while ( true ) {
25- const iterator = datasetSource . next ( ) ;
26- if ( iterator . done ) break ;
25+ const iterator = datasetSource . next ( )
26+ if ( iterator . done ) break
2727
28- let insertTo = Math . floor ( Math . random ( ) * output . length ) ;
28+ let insertTo = Math . floor ( Math . random ( ) * output . length )
2929 while ( filledIndexes . includes ( insertTo ) ) {
30- insertTo ++ ;
30+ insertTo ++
3131 if ( insertTo === output . length ) {
32- insertTo = 0 ;
32+ insertTo = 0
3333 }
3434 }
3535 output [ insertTo ] = {
36- value : iterator . value ,
37- } ;
36+ value : iterator . value
37+ }
3838
39- filledIndexes = [ ...filledIndexes , insertTo ] ;
39+ filledIndexes = [ ...filledIndexes , insertTo ]
4040
4141 if ( filledIndexes . length === sampleSize ) {
42- break ;
42+ break
4343 }
4444 }
4545
4646 if ( filledIndexes . length < output . length ) {
4747 // Not a large enough dataset to fill the sample - trim empty values
48- output = output . filter ( ( _ , i ) => filledIndexes . includes ( i ) ) ;
48+ output = output . filter ( ( _ , i ) => filledIndexes . includes ( i ) )
4949 }
5050
51- return output . map ( ( o ) => o . value ) ;
51+ return output . map ( ( o ) => o . value )
5252}
5353
5454/**
@@ -58,22 +58,22 @@ function fillBaseSample(datasetSource, sampleSize) {
5858 * @returns {Array.<T> } The random sample, as an array
5959 * @template T
6060 */
61- function randomizeOutputFromDataset ( datasetSource , output ) {
62- const newOutput = [ ...output ] ;
63- let readSoFar = output . length ;
61+ function randomizeOutputFromDataset ( datasetSource , output ) {
62+ const newOutput = [ ...output ]
63+ let readSoFar = output . length
6464
6565 while ( true ) {
66- const iterator = datasetSource . next ( ) ;
67- if ( iterator . done ) break ;
68- readSoFar ++ ;
66+ const iterator = datasetSource . next ( )
67+ if ( iterator . done ) break
68+ readSoFar ++
6969
70- const insertTo = Math . floor ( Math . random ( ) * readSoFar ) ;
70+ const insertTo = Math . floor ( Math . random ( ) * readSoFar )
7171 if ( insertTo < newOutput . length ) {
72- newOutput [ insertTo ] = iterator . value ;
72+ newOutput [ insertTo ] = iterator . value
7373 }
7474 }
7575
76- return newOutput ;
76+ return newOutput
7777}
7878
7979const main = ( ) => {
@@ -82,15 +82,15 @@ const main = () => {
8282 * @param {number } length The number of data items to generate
8383 * @returns {Iterable<number> } Random iterable data
8484 */
85- function * generateRandomData ( length ) {
86- const maxValue = Math . pow ( 2 , 31 ) - 1 ;
85+ function * generateRandomData ( length ) {
86+ const maxValue = Math . pow ( 2 , 31 ) - 1
8787 for ( let i = 0 ; i < length ; i ++ ) {
88- yield Math . floor ( Math . random ( ) * maxValue ) ;
88+ yield Math . floor ( Math . random ( ) * maxValue )
8989 }
9090 }
9191
92- const source = generateRandomData ( 1000 ) ;
93- const result = shuf ( source , 10 ) ;
92+ const source = generateRandomData ( 1000 )
93+ const result = shuf ( source , 10 )
9494 console . log ( result )
9595}
9696main ( )
0 commit comments