Skip to content

Commit 49bb968

Browse files
committed
block size exception
1 parent f0e639a commit 49bb968

File tree

1 file changed

+32
-8
lines changed

1 file changed

+32
-8
lines changed

src/Microsoft.ML.Parquet/ParquetLoader.cs

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -390,9 +390,21 @@ public Cursor(ParquetLoader parent, Func<int, bool> predicate, IRandom rand)
390390
Columns = _loader._columnsLoaded.Select(i => i.Name).ToArray()
391391
};
392392

393-
int numBlocks = (int)Math.Ceiling(((decimal)parent.GetRowCount() / _readerOptions.Count));
394-
int[] blockOrder = _rand == null ? Utils.GetIdentityPermutation(numBlocks) : Utils.GetRandomPermutation(rand, numBlocks);
395-
_blockEnumerator = blockOrder.GetEnumerator();
393+
try
394+
{
395+
int numBlocks = checked((int)Math.Ceiling(((decimal)parent.GetRowCount() / _readerOptions.Count)));
396+
int[] blockOrder = _rand == null ? Utils.GetIdentityPermutation(numBlocks) : Utils.GetRandomPermutation(rand, numBlocks);
397+
_blockEnumerator = blockOrder.GetEnumerator();
398+
}
399+
catch (Exception e)
400+
{
401+
if (e is OutOfMemoryException || e is OverflowException)
402+
{
403+
throw new InvalidDataException("Error due to too many blocks. Try increasing block size.", e);
404+
}
405+
406+
throw;
407+
}
396408

397409
_dataSetEnumerator = new int[0].GetEnumerator(); // Initialize an empty enumerator to get started
398410
_columnValues = new IList[_actives.Length];
@@ -477,7 +489,7 @@ protected override bool MoveNextCore()
477489
}
478490
else if (_blockEnumerator.MoveNext())
479491
{
480-
_readerOptions.Offset = (int)_blockEnumerator.Current * _readerOptions.Count;
492+
_readerOptions.Offset = (long)_blockEnumerator.Current * _readerOptions.Count;
481493

482494
// When current dataset runs out, read the next portion of the parquet file.
483495
DataSet ds;
@@ -486,9 +498,21 @@ protected override bool MoveNextCore()
486498
ds = ParquetReader.Read(_loader._parquetStream, _loader._parquetOptions, _readerOptions);
487499
}
488500

489-
int[] dataSetOrder = _rand == null ? Utils.GetIdentityPermutation(ds.RowCount) : Utils.GetRandomPermutation(_rand, ds.RowCount);
490-
_dataSetEnumerator = dataSetOrder.GetEnumerator();
491-
_curDataSetRow = dataSetOrder[0];
501+
try
502+
{
503+
int[] dataSetOrder = _rand == null ? Utils.GetIdentityPermutation(ds.RowCount) : Utils.GetRandomPermutation(_rand, ds.RowCount);
504+
_dataSetEnumerator = dataSetOrder.GetEnumerator();
505+
_curDataSetRow = dataSetOrder[0];
506+
}
507+
catch (Exception e)
508+
{
509+
if (e is OutOfMemoryException)
510+
{
511+
throw new InvalidDataException("Error caused because block size too big. Try decreasing block size.", e);
512+
}
513+
514+
throw;
515+
}
492516

493517
// Cache list for each active column
494518
for (int i = 0; i < _actives.Length; i++)
@@ -671,4 +695,4 @@ private string ConvertListToString(IList list)
671695
}
672696
}
673697
}
674-
}
698+
}

0 commit comments

Comments
 (0)