Skip to content

Commit 8d3c485

Browse files
committed
stop using rpy2; shell to pspp-convert
* rpy2 was only being used to convert SPSS .sav files to a format pandas can handle. pspp-convert is a program that can convert .sav to .csv. Update the tabular renderer to use that instead. As a side effect, it turns out the standard csv library is better at detecting data types than pandas is, so use that to render the resulting csv file
1 parent 16c64c4 commit 8d3c485

File tree

10 files changed

+61
-26
lines changed

10 files changed

+61
-26
lines changed

.travis.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ install:
1616
- travis_retry pip install --upgrade pip
1717
- travis_retry pip install wheel==0.26.0
1818
- travis_retry pip install invoke==0.11.1
19-
- sed 's/^rpy2==/#rpy2==/' -i requirements.txt
2019
- travis_retry invoke wheelhouse --develop
2120
- travis_retry invoke install --develop
2221

Dockerfile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ RUN apt-get update \
2222
# unoconv dependencies
2323
&& apt-get install -y \
2424
unoconv \
25+
# pspp dependencies
26+
&& apt-get install -y \
27+
pspp \
2528
&& apt-get clean \
2629
&& apt-get autoremove -y \
2730
&& rm -rf /var/lib/apt/lists/*

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,13 @@ For MacOSX users:
1414

1515
```bash
1616
brew install python3
17-
brew install r
17+
brew install pspp
1818
```
1919
For Ubuntu users:
2020

2121
```bash
2222
apt-get install python3
23-
apt-get install r-base
23+
apt-get install pspp
2424
```
2525

2626
After installing python3.5, create the virtual environment with the following commands:

docs/install.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ Or download one of the following:
1414
* tarball_
1515
* zipball_
1616

17-
Make sure that you have installed R, are using python3.5, and have installed invoke for your current python3 version.
17+
Make sure that you have installed pspp, are using python3.5, and have installed invoke for your current python3 version.
1818

1919
Install the version of invoke found in the requirements.txt file. Currently 0.11.1
2020

mfr/extensions/tabular/libs/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@ def dta_pandas():
2323
return dta_pandas
2424

2525

26+
def sav_stdlib():
27+
from ..libs.stdlib_tools import sav_stdlib
28+
return sav_stdlib
29+
2630
def sav_pandas():
2731
from ..libs.panda_tools import sav_pandas
2832
return sav_pandas

mfr/extensions/tabular/libs/panda_tools.py

Lines changed: 9 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1+
from tempfile import NamedTemporaryFile
2+
13
import numpy
24
import pandas
3-
from tempfile import NamedTemporaryFile
4-
from ..utilities import header_population, strip_comments
5+
6+
from mfr.extensions.tabular.utilities import header_population, strip_comments, sav_to_csv
57

68

79
def csv_pandas(fp):
@@ -36,11 +38,14 @@ def dta_pandas(fp):
3638

3739

3840
def sav_pandas(fp):
39-
"""Read and convert a sav file to JSON format using the pandas library
41+
"""Read and convert a .sav file to a .csv file via pspp, then convert that to JSON format
42+
using the pandas library
43+
4044
:param fp: File pointer object
4145
:return: tuple of table headers and data
4246
"""
43-
dataframe = robjectify(fp)
47+
csv_file = sav_to_csv(fp)
48+
dataframe = pandas.read_csv(csv_file.name, low_memory=False)
4449
return data_from_dataframe(dataframe)
4550

4651

@@ -64,15 +69,3 @@ def data_from_dataframe(dataframe):
6469
data_row[name] = value
6570
data.append(data_row)
6671
return {'Sheet 1': (header, data)}
67-
68-
69-
def robjectify(fp):
70-
"""Create a dataframe object using R"""
71-
72-
import pandas.rpy.common as common
73-
import rpy2.robjects as robjects
74-
r = robjects
75-
r.r("require(foreign)")
76-
r.r('x <- read.spss("{}",to.data.frame=T)'.format(fp.name))
77-
r.r('row.names(x) = 0:(nrow(x)-1)')
78-
return common.load_data('x')

mfr/extensions/tabular/libs/stdlib_tools.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
1-
import csv
21
import re
2+
import csv
3+
34
from ..exceptions import EmptyTableException
5+
from mfr.extensions.tabular import utilities
46

57

68
def csv_stdlib(fp):
@@ -42,6 +44,19 @@ def csv_stdlib(fp):
4244
return {'Sheet 1': (columns, rows)}
4345

4446

47+
def sav_stdlib(fp):
48+
"""Read and convert a .sav file to .csv with pspp, then convert that to JSON format using
49+
the python standard library
50+
51+
:param fp: File pointer object to a .sav file
52+
:return: tuple of table headers and data
53+
"""
54+
csv_file = utilities.sav_to_csv(fp)
55+
with open(csv_file.name, 'r') as file:
56+
csv_file.close()
57+
return csv_stdlib(file)
58+
59+
4560
def _set_dialect_quote_attrs(dialect, data):
4661
"""Set quote-related dialect attributes based on up to 2kb of csv data.
4762

mfr/extensions/tabular/settings.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
'.xlsx': [libs.xlsx_xlrd],
2121
'.xls': [libs.xlsx_xlrd],
2222
'.dta': [libs.dta_pandas],
23-
'.sav': [libs.sav_pandas],
23+
'.sav': [libs.sav_stdlib],
2424
# '.ods': [libs.ods_ezodf],
2525
})
2626

@@ -38,3 +38,5 @@
3838
'syncColumnCellResize': True,
3939
'multiColumnSort': True,
4040
})
41+
42+
PSPP_CONVERT_BIN = config.get('PSPP_CONVERT_BIN', '/usr/bin/pspp-convert')

mfr/extensions/tabular/utilities.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
import re
2+
import subprocess
3+
from tempfile import NamedTemporaryFile
24

3-
from mfr.extensions.tabular import compat
5+
from mfr.core import exceptions
6+
from mfr.extensions.tabular import compat, settings
47

58

69
def header_population(headers):
@@ -35,3 +38,22 @@ def strip_comments(src, dest):
3538
data = data.encode('utf-8', 'ignore')
3639
dest.write(data)
3740
dest.seek(0)
41+
42+
43+
def sav_to_csv(fp):
44+
"""Converts a SPSS .sav to a .csv file by calling out to ``pspp-convert``.
45+
46+
:param fp: file pointer object to .sav file
47+
:return: file pointer to .csv file. You are responsible for closing this.
48+
"""
49+
csv_file = NamedTemporaryFile(mode='w+b', suffix='.csv')
50+
try:
51+
subprocess.check_call([
52+
settings.PSPP_CONVERT_BIN,
53+
fp.name,
54+
csv_file.name,
55+
])
56+
except subprocess.CalledProcessError:
57+
raise exceptions.ExporterError(
58+
'Unable to convert the SPSS file to CSV, please try again later.', code=400)
59+
return csv_file

requirements.txt

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,6 @@ docutils==0.12
3939
pandas==0.17.1
4040
git+https://github.com/icereval/xlrd.git
4141

42-
# Rpy
43-
rpy2==2.7.8
44-
4542
# Md
4643
markdown==2.6.2
4744

0 commit comments

Comments
 (0)