Skip to content

Commit 66b1859

Browse files
authored
[ENG-8488] MFR: .xlsx file not rendering correctly in OSF Storage (#396)
1 parent dc2a053 commit 66b1859

File tree

6 files changed

+28
-25
lines changed

6 files changed

+28
-25
lines changed

mfr/core/exceptions.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,13 @@ def __init__(self, message, *args, metadata_url: str = '', response: str = '', *
146146
'response': self.response
147147
}])
148148

149+
class CorruptedError(RendererError):
150+
151+
__TYPE = 'corrupted'
152+
153+
def __init__(self, *args, renderer_class: str = '', **kwargs):
154+
super().__init__("File is corrupted, impossible to render, please check it's integrity", *args, renderer_class, **kwargs)
155+
149156
class TooBigToRenderError(ProviderError):
150157
"""If the user tries to render a file larger than a server specified maximum, throw a
151158
TooBigToRenderError.

mfr/extensions/tabular/libs/__init__.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,14 @@ def sav_pandas():
3131
from ..libs.panda_tools import sav_pandas
3232
return sav_pandas
3333

34+
def xls():
35+
from ..libs.xlrd_tools import xls
36+
return xls
3437

35-
def xlsx_xlrd():
36-
from ..libs.xlrd_tools import xlsx_xlrd
37-
return xlsx_xlrd
38+
39+
def xlsx():
40+
from ..libs.xlrd_tools import xlsx
41+
return xlsx
3842

3943
def mat_h5py_scipy():
4044
from ..libs.h5py_scipy_tools import mat_h5py_scipy

mfr/extensions/tabular/libs/xlrd_tools.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
)
1212

1313

14-
def xlsx_xlrd(fp):
14+
def xls(fp):
1515
"""
1616
• .xls → xlrd
1717
• .xlsx → openpyxl (xlrd ≥2.0 dropped xlsx support)
@@ -21,13 +21,11 @@ def xlsx_xlrd(fp):
2121
ZipFile) can seek inside safely.
2222
"""
2323
sheets = OrderedDict()
24+
wb = xlrd.open_workbook(file_contents=to_bytes(fp))
25+
return parse_xls(wb, sheets)
2426

25-
try:
26-
wb = xlrd.open_workbook(file_contents=to_bytes(fp))
27-
return parse_xls(wb, sheets)
28-
except xlrd.biffh.XLRDError:
29-
pass
30-
27+
def xlsx(fp):
28+
sheets = OrderedDict()
3129
try:
3230
wb = load_workbook(BytesIO(to_bytes(fp)), data_only=True, read_only=True)
3331
except zipfile.BadZipFile as exc:

mfr/extensions/tabular/settings.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@
1212
LIBS = config.get_object('LIBS', {
1313
'.csv': [libs.csv_stdlib],
1414
'.tsv': [libs.csv_stdlib],
15-
'.gsheet': [libs.xlsx_xlrd],
16-
'.xlsx': [libs.xlsx_xlrd],
17-
'.xls': [libs.xlsx_xlrd],
15+
'.gsheet': [libs.xlsx],
16+
'.xlsx': [libs.xlsx],
17+
'.xls': [libs.xls],
1818
'.dta': [libs.dta_pandas],
1919
'.sav': [libs.sav_stdlib],
2020
'.mat': [libs.mat_h5py_scipy],

mfr/extensions/tabular/utilities.py

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from tempfile import NamedTemporaryFile
99

1010
from mfr.extensions.tabular import compat
11-
from mfr.core.exceptions import SubprocessError, TooBigToRenderError
11+
from mfr.core.exceptions import SubprocessError, TooBigToRenderError, CorruptedError
1212
from mfr.extensions.tabular.settings import (PSPP_CONVERT_BIN,
1313
PSPP_CONVERT_TIMEOUT)
1414

@@ -143,28 +143,22 @@ def parse_xls(wb, sheets):
143143
def parse_xlsx(wb, sheets):
144144
for name in wb.sheetnames:
145145
ws = wb[name]
146-
max_row = ws.max_row or 0
147-
max_col = ws.max_column or 0
148-
verify_size(max_row, max_col, '.xlsx')
149-
150-
if max_row == 0 or max_col == 0:
151-
sheets[name] = ([], [])
152-
continue
153-
154146
header_row = next(ws.iter_rows(max_row=1, values_only=True), [])
155147
fields = fix_headers(header_row)
156148
rows = [
157149
dict(zip(fields, row))
158150
for row in ws.iter_rows(min_row=2,
159-
max_row=max_row,
160-
max_col=max_col,
151+
max_row=MAX_SIZE,
152+
max_col=MAX_SIZE,
161153
values_only=True)
162154
]
163155
sheets[name] = (header_population(fields), rows)
164156
return sheets
165157

166158

167159
def verify_size(rows, cols, ext):
160+
if rows is None or cols is None:
161+
raise CorruptedError
168162
if rows > MAX_SIZE or cols > MAX_SIZE:
169163
raise TooBigToRenderError('Table is too large to render.', ext,
170164
nbr_cols=cols, nbr_rows=rows)

tests/extensions/tabular/test_xlsx_tools.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ class TestTabularPandaTools:
99

1010
def test_xlsx_xlrd(self):
1111
with open(os.path.join(BASE, 'files', 'test.xlsx'), 'rb') as fp:
12-
sheets = xlrd_tools.xlsx_xlrd(fp)
12+
sheets = xlrd_tools.xlsx(fp)
1313

1414
sheet = sheets.popitem()[1]
1515
assert sheet[0][0] == {'field': 'one', 'id': 'one', 'name': 'one', 'sortable': True}

0 commit comments

Comments
 (0)