Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/r_interface.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ its release 2.3, while the current interface is
designed for the 2.2.x series. We recommend to use 2.2.x over other series
unless you are prepared to fix parts of the code, yet the rpy2-2.3.0
introduces improvements such as a better R-Python bridge memory management
layer so I might be a good idea to bite the bullet and submit patches for
layer so it might be a good idea to bite the bullet and submit patches for
the few minor differences that need to be fixed.


Expand Down
4 changes: 4 additions & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,10 @@ Improvements to existing features
:issue:`4998`)
- ``to_dict`` now takes ``records`` as a possible outtype. Returns an array
of column-keyed dictionaries. (:issue:`4936`)
- Improve support for converting R datasets to pandas objects (more
informative index for timeseries and numeric, support for factors, dist, and
high-dimensional arrays).


API Changes
~~~~~~~~~~~
Expand Down
6 changes: 6 additions & 0 deletions doc/source/v0.13.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,12 @@ Enhancements
dfi[mask.any(1)]

:ref:`See the docs<indexing.basics.indexing_isin>` for more.
- All R datasets listed here http://stat.ethz.ch/R-manual/R-devel/library/datasets/html/00Index.html can now be loaded into Pandas objects

.. code-block:: python

import pandas.rpy.common as com
com.load_data('Titanic')

.. _whatsnew_0130.experimental:

Expand Down
194 changes: 53 additions & 141 deletions pandas/rpy/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
from rpy2.robjects import r
import rpy2.robjects as robj

import itertools as IT


__all__ = ['convert_robj', 'load_data', 'convert_to_r_dataframe',
'convert_to_r_matrix']

Expand Down Expand Up @@ -46,47 +49,69 @@ def _is_null(obj):

def _convert_list(obj):
"""
Convert named Vector to dict
Convert named Vector to dict, factors to list
"""
values = [convert_robj(x) for x in obj]
return dict(zip(obj.names, values))
try:
values = [convert_robj(x) for x in obj]
keys = r['names'](obj)
return dict(zip(keys, values))
except TypeError:
# For state.division and state.region
factors = list(r['factor'](obj))
level = list(r['levels'](obj))
result = [level[index-1] for index in factors]
return result


def _convert_array(obj):
"""
Convert Array to ndarray
Convert Array to DataFrame
"""
# this royally sucks. "Matrices" (arrays) with dimension > 3 in R aren't
# really matrices-- things come out Fortran order in the first two
# dimensions. Maybe I'm wrong?

def _list(item):
try:
return list(item)
except TypeError:
return []

# For iris3, HairEyeColor, UCBAdmissions, Titanic
dim = list(obj.dim)
values = np.array(list(obj))

if len(dim) == 3:
arr = values.reshape(dim[-1:] + dim[:-1]).swapaxes(1, 2)

if obj.names is not None:
name_list = [list(x) for x in obj.names]
if len(dim) == 2:
return pd.DataFrame(arr, index=name_list[0], columns=name_list[1])
elif len(dim) == 3:
return pd.Panel(arr, items=name_list[2],
major_axis=name_list[0],
minor_axis=name_list[1])
else:
print('Cannot handle dim=%d' % len(dim))
else:
return arr
names = r['dimnames'](obj)
try:
columns = list(r['names'](names))[::-1]
except TypeError:
columns = ['X{:d}'.format(i) for i in range(len(names))][::-1]
columns.append('value')
name_list = [(_list(x) or range(d)) for x, d in zip(names, dim)][::-1]
arr = np.array(list(IT.product(*name_list)))
arr = np.column_stack([arr,values])
df = pd.DataFrame(arr, columns=columns)
return df


def _convert_vector(obj):
if isinstance(obj, robj.IntVector):
return _convert_int_vector(obj)
elif isinstance(obj, robj.StrVector):
return _convert_str_vector(obj)

return list(obj)
# Check if the vector has extra information attached to it that can be used
# as an index
try:
attributes = set(r['attributes'](obj).names)
except AttributeError:
return list(obj)
if 'names' in attributes:
return pd.Series(list(obj), index=r['names'](obj))
elif 'tsp' in attributes:
return pd.Series(list(obj), index=r['time'](obj))
elif 'labels' in attributes:
return pd.Series(list(obj), index=r['labels'](obj))
if _rclass(obj) == 'dist':
# For 'eurodist'. WARNING: This results in a DataFrame, not a Series or list.
matrix = r['as.matrix'](obj)
return convert_robj(matrix)
else:
return list(obj)

NA_INTEGER = -2147483648

Expand Down Expand Up @@ -141,8 +166,7 @@ def _convert_Matrix(mat):
rows = mat.rownames

columns = None if _is_null(columns) else list(columns)
index = None if _is_null(rows) else list(rows)

index = r['time'](mat) if _is_null(rows) else list(rows)
return pd.DataFrame(np.array(mat), index=_check_int(index),
columns=columns)

Expand Down Expand Up @@ -197,7 +221,7 @@ def convert_robj(obj, use_pandas=True):
if isinstance(obj, rpy_type):
return converter(obj)

raise Exception('Do not know what to do with %s object' % type(obj))
raise TypeError('Do not know what to do with %s object' % type(obj))


def convert_to_r_posixct(obj):
Expand Down Expand Up @@ -329,117 +353,5 @@ def convert_to_r_matrix(df, strings_as_factors=False):

return r_matrix


def test_convert_list():
obj = r('list(a=1, b=2, c=3)')

converted = convert_robj(obj)
expected = {'a': [1], 'b': [2], 'c': [3]}

_test.assert_dict_equal(converted, expected)


def test_convert_nested_list():
obj = r('list(a=list(foo=1, bar=2))')

converted = convert_robj(obj)
expected = {'a': {'foo': [1], 'bar': [2]}}

_test.assert_dict_equal(converted, expected)


def test_convert_frame():
# built-in dataset
df = r['faithful']

converted = convert_robj(df)

assert np.array_equal(converted.columns, ['eruptions', 'waiting'])
assert np.array_equal(converted.index, np.arange(1, 273))


def _test_matrix():
r('mat <- matrix(rnorm(9), ncol=3)')
r('colnames(mat) <- c("one", "two", "three")')
r('rownames(mat) <- c("a", "b", "c")')

return r['mat']


def test_convert_matrix():
mat = _test_matrix()

converted = convert_robj(mat)

assert np.array_equal(converted.index, ['a', 'b', 'c'])
assert np.array_equal(converted.columns, ['one', 'two', 'three'])


def test_convert_r_dataframe():

is_na = robj.baseenv.get("is.na")

seriesd = _test.getSeriesData()
frame = pd.DataFrame(seriesd, columns=['D', 'C', 'B', 'A'])

# Null data
frame["E"] = [np.nan for item in frame["A"]]
# Some mixed type data
frame["F"] = ["text" if item % 2 == 0 else np.nan for item in range(30)]

r_dataframe = convert_to_r_dataframe(frame)

assert np.array_equal(convert_robj(r_dataframe.rownames), frame.index)
assert np.array_equal(convert_robj(r_dataframe.colnames), frame.columns)
assert all(is_na(item) for item in r_dataframe.rx2("E"))

for column in frame[["A", "B", "C", "D"]]:
coldata = r_dataframe.rx2(column)
original_data = frame[column]
assert np.array_equal(convert_robj(coldata), original_data)

for column in frame[["D", "E"]]:
for original, converted in zip(frame[column],
r_dataframe.rx2(column)):

if pd.isnull(original):
assert is_na(converted)
else:
assert original == converted


def test_convert_r_matrix():

is_na = robj.baseenv.get("is.na")

seriesd = _test.getSeriesData()
frame = pd.DataFrame(seriesd, columns=['D', 'C', 'B', 'A'])
# Null data
frame["E"] = [np.nan for item in frame["A"]]

r_dataframe = convert_to_r_matrix(frame)

assert np.array_equal(convert_robj(r_dataframe.rownames), frame.index)
assert np.array_equal(convert_robj(r_dataframe.colnames), frame.columns)
assert all(is_na(item) for item in r_dataframe.rx(True, "E"))

for column in frame[["A", "B", "C", "D"]]:
coldata = r_dataframe.rx(True, column)
original_data = frame[column]
assert np.array_equal(convert_robj(coldata),
original_data)

# Pandas bug 1282
frame["F"] = ["text" if item % 2 == 0 else np.nan for item in range(30)]

# FIXME: Ugly, this whole module needs to be ported to nose/unittest
try:
wrong_matrix = convert_to_r_matrix(frame)
except TypeError:
pass
except Exception:
raise


if __name__ == '__main__':
pass
Empty file added pandas/rpy/tests/__init__.py
Empty file.
Loading