@@ -390,9 +390,21 @@ public Cursor(ParquetLoader parent, Func<int, bool> predicate, IRandom rand)
390390 Columns = _loader . _columnsLoaded . Select ( i => i . Name ) . ToArray ( )
391391 } ;
392392
393- int numBlocks = ( int ) Math . Ceiling ( ( ( decimal ) parent . GetRowCount ( ) / _readerOptions . Count ) ) ;
394- int [ ] blockOrder = _rand == null ? Utils . GetIdentityPermutation ( numBlocks ) : Utils . GetRandomPermutation ( rand , numBlocks ) ;
395- _blockEnumerator = blockOrder . GetEnumerator ( ) ;
393+ try
394+ {
395+ int numBlocks = checked ( ( int ) Math . Ceiling ( ( ( decimal ) parent . GetRowCount ( ) / _readerOptions . Count ) ) ) ;
396+ int [ ] blockOrder = _rand == null ? Utils . GetIdentityPermutation ( numBlocks ) : Utils . GetRandomPermutation ( rand , numBlocks ) ;
397+ _blockEnumerator = blockOrder . GetEnumerator ( ) ;
398+ }
399+ catch ( Exception e )
400+ {
401+ if ( e is OutOfMemoryException || e is OverflowException )
402+ {
403+ throw new InvalidDataException ( "Error due to too many blocks. Try increasing block size." , e ) ;
404+ }
405+
406+ throw ;
407+ }
396408
397409 _dataSetEnumerator = new int [ 0 ] . GetEnumerator ( ) ; // Initialize an empty enumerator to get started
398410 _columnValues = new IList [ _actives . Length ] ;
@@ -477,7 +489,7 @@ protected override bool MoveNextCore()
477489 }
478490 else if ( _blockEnumerator . MoveNext ( ) )
479491 {
480- _readerOptions . Offset = ( int ) _blockEnumerator . Current * _readerOptions . Count ;
492+ _readerOptions . Offset = ( long ) _blockEnumerator . Current * _readerOptions . Count ;
481493
482494 // When current dataset runs out, read the next portion of the parquet file.
483495 DataSet ds ;
@@ -486,9 +498,21 @@ protected override bool MoveNextCore()
486498 ds = ParquetReader . Read ( _loader . _parquetStream , _loader . _parquetOptions , _readerOptions ) ;
487499 }
488500
489- int [ ] dataSetOrder = _rand == null ? Utils . GetIdentityPermutation ( ds . RowCount ) : Utils . GetRandomPermutation ( _rand , ds . RowCount ) ;
490- _dataSetEnumerator = dataSetOrder . GetEnumerator ( ) ;
491- _curDataSetRow = dataSetOrder [ 0 ] ;
501+ try
502+ {
503+ int [ ] dataSetOrder = _rand == null ? Utils . GetIdentityPermutation ( ds . RowCount ) : Utils . GetRandomPermutation ( _rand , ds . RowCount ) ;
504+ _dataSetEnumerator = dataSetOrder . GetEnumerator ( ) ;
505+ _curDataSetRow = dataSetOrder [ 0 ] ;
506+ }
507+ catch ( Exception e )
508+ {
509+ if ( e is OutOfMemoryException )
510+ {
511+ throw new InvalidDataException ( "Error caused because block size too big. Try decreasing block size." , e ) ;
512+ }
513+
514+ throw ;
515+ }
492516
493517 // Cache list for each active column
494518 for ( int i = 0 ; i < _actives . Length ; i ++ )
@@ -671,4 +695,4 @@ private string ConvertListToString(IList list)
671695 }
672696 }
673697 }
674- }
698+ }
0 commit comments