-
- Notifications
You must be signed in to change notification settings - Fork 19.3k
BUG in clipboard (linux, python2) with unicode and separator (GH13747) #14599
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
fb922d6 c83d000 edb8553 66d8ebf 14d94a0 d565b1f f708c2e c5a87d8 825bbe2 02f87b0 71d58d0 dd57ae3 d202fd0 0665fd4 ed1375f 9946fb7 c0aafd7 b03ed56 ac8ae60 7af95da cedb690 98b61e8 1dca292 9db42d8 b74fbc1 2aafb66 File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,6 +1,6 @@ | ||
| """ io on the clipboard """ | ||
| from pandas import compat, get_option, option_context, DataFrame | ||
| from pandas.compat import StringIO | ||
| from pandas.compat import StringIO, PY2 | ||
| | ||
| | ||
| def read_clipboard(sep='\s+', **kwargs): # pragma: no cover | ||
| | @@ -18,6 +18,14 @@ def read_clipboard(sep='\s+', **kwargs): # pragma: no cover | |
| ------- | ||
| parsed : DataFrame | ||
| """ | ||
| encoding = kwargs.pop('encoding', 'utf-8') | ||
| | ||
| # only utf-8 is valid for passed value because that's what clipboard | ||
| # supports | ||
| if encoding is not None and encoding.lower().replace('-', '') != 'utf8': | ||
| raise NotImplementedError( | ||
| 'reading from clipboard only supports utf-8 encoding') | ||
| | ||
| from pandas.util.clipboard import clipboard_get | ||
| from pandas.io.parsers import read_table | ||
| text = clipboard_get() | ||
| | @@ -78,6 +86,12 @@ def to_clipboard(obj, excel=None, sep=None, **kwargs): # pragma: no cover | |
| - Windows: | ||
| - OS X: | ||
| """ | ||
| encoding = kwargs.pop('encoding', 'utf-8') | ||
| | ||
| # testing if an invalid encoding is passed to clipboard | ||
| if encoding is not None and encoding.lower().replace('-', '') != 'utf8': | ||
| raise ValueError('clipboard only supports utf-8 encoding') | ||
| | ||
| from pandas.util.clipboard import clipboard_set | ||
| if excel is None: | ||
| excel = True | ||
| | @@ -87,8 +101,12 @@ def to_clipboard(obj, excel=None, sep=None, **kwargs): # pragma: no cover | |
| if sep is None: | ||
| sep = '\t' | ||
| buf = StringIO() | ||
| obj.to_csv(buf, sep=sep, **kwargs) | ||
| clipboard_set(buf.getvalue()) | ||
| # clipboard_set (pyperclip) expects unicode | ||
| obj.to_csv(buf, sep=sep, encoding='utf-8', **kwargs) | ||
| Contributor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so Then check for encoding, if its not Contributor Author There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jreback the changes that i have made earlier (show above) are in the to_clipboard method. I am trying to understand your review comment, are you suggesting that instead of explicitly passing encoding = 'utf-8' to obj.to_csv i should check before hand if the encoding is None or utf-8 and raise an exception if it is not one of them Contributor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so you still should pass Contributor Author There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jreback oh ok, i get it now, let me make that change | ||
| text = buf.getvalue() | ||
| if PY2: | ||
| text = text.decode('utf-8') | ||
| Contributor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. shouldn't we always decode here? Contributor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
| ||
| clipboard_set(text) | ||
| return | ||
| except: | ||
| pass | ||
| | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| | @@ -9,16 +9,16 @@ | |
| from pandas import read_clipboard | ||
| from pandas import get_option | ||
| from pandas.util import testing as tm | ||
| from pandas.util.testing import makeCustomDataframe as mkdf, disabled | ||
| from pandas.util.testing import makeCustomDataframe as mkdf | ||
| from pandas.util.clipboard.exceptions import PyperclipException | ||
| | ||
| | ||
| try: | ||
| import pandas.util.clipboard # noqa | ||
| except OSError: | ||
| raise nose.SkipTest("no clipboard found") | ||
| DataFrame({'A': [1, 2]}).to_clipboard() | ||
| except PyperclipException: | ||
| raise nose.SkipTest("clipboard primitives not installed") | ||
| | ||
| | ||
| @disabled | ||
| class TestClipboard(tm.TestCase): | ||
| | ||
| @classmethod | ||
| | @@ -52,20 +52,24 @@ def setUpClass(cls): | |
| # Test for non-ascii text: GH9263 | ||
| cls.data['nonascii'] = pd.DataFrame({'en': 'in English'.split(), | ||
| 'es': 'en español'.split()}) | ||
| # unicode round trip test for GH 13747, GH 12529 | ||
| cls.data['utf8'] = pd.DataFrame({'a': ['µasd', 'Ωœ∑´'], | ||
| 'b': ['øπ∆˚¬', 'œ∑´®']}) | ||
| cls.data_types = list(cls.data.keys()) | ||
| | ||
| @classmethod | ||
| def tearDownClass(cls): | ||
| super(TestClipboard, cls).tearDownClass() | ||
| del cls.data_types, cls.data | ||
| | ||
| def check_round_trip_frame(self, data_type, excel=None, sep=None): | ||
| def check_round_trip_frame(self, data_type, excel=None, sep=None, | ||
| encoding=None): | ||
| data = self.data[data_type] | ||
| data.to_clipboard(excel=excel, sep=sep) | ||
| data.to_clipboard(excel=excel, sep=sep, encoding=encoding) | ||
| if sep is not None: | ||
| result = read_clipboard(sep=sep, index_col=0) | ||
| result = read_clipboard(sep=sep, index_col=0, encoding=encoding) | ||
| else: | ||
| result = read_clipboard() | ||
| result = read_clipboard(encoding=encoding) | ||
| tm.assert_frame_equal(data, result, check_dtype=False) | ||
| | ||
| def test_round_trip_frame_sep(self): | ||
| | @@ -115,3 +119,16 @@ def test_read_clipboard_infer_excel(self): | |
| exp = pd.read_clipboard() | ||
| Contributor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add tests for other issues that we mentioned (or attach the issue number to an existing tests as appropriate) Contributor Author There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. | ||
| | ||
| tm.assert_frame_equal(res, exp) | ||
| | ||
| def test_invalid_encoding(self): | ||
| Contributor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. put comments inside the test Contributor Author There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. implemented the suggested change | ||
| # test case for testing invalid encoding | ||
| data = self.data['string'] | ||
| with tm.assertRaises(ValueError): | ||
| data.to_clipboard(encoding='ascii') | ||
| Contributor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. test Contributor Author There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i have not modified read_clipboard, so that check for encoding is not present in read_clipboard. Should i add the check in read_clipboard and the test backing it Contributor Author There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. also read_clipboard later on uses the encoding from kwargs**, if i pop the kwargs in read clipboard before this code, will that not cause any issues? if compat.PY3: Contributor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. right in reading you still need to strip the passed encoding (and read from the clipboard as utc8) then i guess we could reencode as the encoding (if it's not None) put a test for this but for now I would just raise NotImplementError if an encoding is passed in and it's not utf8 or none | ||
| with tm.assertRaises(NotImplementedError): | ||
| pd.read_clipboard(encoding='ascii') | ||
| | ||
| def test_round_trip_valid_encodings(self): | ||
| for enc in ['UTF-8', 'utf-8', 'utf8']: | ||
| for dt in self.data_types: | ||
| self.check_round_trip_frame(dt, encoding=enc) | ||
This file was deleted.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
list all of the isues this is expected to close
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
implemented this change now