Skip to content

Commit f157a35

Browse files
author
Mike Dirolf
committed
allow writing unicode to GridFS if an encoding attribute has been specified PYTHON-100
1 parent 9ddabb9 commit f157a35

File tree

4 files changed

+66
-5
lines changed

4 files changed

+66
-5
lines changed

gridfs/__init__.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,11 @@ def put(self, data, **kwargs):
8787
>>> f.close()
8888
8989
`data` can be either an instance of :class:`str` or a
90-
file-like object providing a :meth:`read` method. Any keyword
91-
arguments will be passed through to the created file - see
90+
file-like object providing a :meth:`read` method. If an
91+
`encoding` keyword argument is passed, `data` can also be a
92+
:class:`unicode` instance, which will be encoded as `encoding`
93+
before being written. Any keyword arguments will be passed
94+
through to the created file - see
9295
:meth:`~gridfs.grid_file.GridIn` for possible
9396
arguments. Returns the ``"_id"`` of the created file.
9497
@@ -100,6 +103,10 @@ def put(self, data, **kwargs):
100103
- `data`: data to be written as a file.
101104
- `**kwargs` (optional): keyword arguments for file creation
102105
106+
.. versionadded:: 1.8.1+
107+
The ability to write :class:`unicode`, if an `encoding` has
108+
been specified as a keyword argument.
109+
103110
.. versionadded:: 1.6
104111
"""
105112
grid_file = GridIn(self.__collection, **kwargs)

gridfs/grid_file.py

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,10 @@ def __init__(self, root_collection, **kwargs):
109109
- ``"chunkSize"`` or ``"chunk_size"``: size of each of the
110110
chunks, in bytes (default: 256 kb)
111111
112+
- ``"encoding"``: encoding used for this file - any
113+
:class:`unicode` that is written to the file will be
114+
converted to a :class:`str` with this encoding
115+
112116
:Parameters:
113117
- `root_collection`: root collection to write to
114118
- `**kwargs` (optional): file level options (see above)
@@ -224,17 +228,26 @@ def write(self, data):
224228
"""Write data to the file. There is no return value.
225229
226230
`data` can be either a string of bytes or a file-like object
227-
(implementing :meth:`read`).
231+
(implementing :meth:`read`). If the file has an
232+
:attr:`encoding` attribute, `data` can also be a
233+
:class:`unicode` instance, which will be encoded as
234+
:attr:`encoding` before being written.
228235
229236
Due to buffering, the data may not actually be written to the
230237
database until the :meth:`close` method is called. Raises
231238
:class:`ValueError` if this file is already closed. Raises
232239
:class:`TypeError` if `data` is not an instance of
233-
:class:`str` or a file-like object.
240+
:class:`str`, a file-like object, or an instance of
241+
:class:`unicode` (only allowed if the file has an
242+
:attr:`encoding` attribute).
234243
235244
:Parameters:
236245
- `data`: string of bytes or file-like object to be written
237246
to the file
247+
248+
.. versionadded:: 1.8.1+
249+
The ability to write :class:`unicode`, if the file has an
250+
:attr:`encoding` attribute.
238251
"""
239252
if self._closed:
240253
raise ValueError("cannot write to a closed file")
@@ -252,9 +265,16 @@ def write(self, data):
252265
self._buffer.write(to_write)
253266
# string
254267
except AttributeError:
255-
if not isinstance(data, str):
268+
if not isinstance(data, basestring):
256269
raise TypeError("can only write strings or file-like objects")
257270

271+
if isinstance(data, unicode):
272+
try:
273+
data = data.encode(self.encoding)
274+
except AttributeError:
275+
raise TypeError("must specify an encoding for file in "
276+
"order to write unicode")
277+
258278
while data:
259279
space = self.chunk_size - self._buffer.tell()
260280

test/test_grid_file.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
# -*- coding: utf-8 -*-
2+
#
13
# Copyright 2009-2010 10gen, Inc.
24
#
35
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -422,5 +424,24 @@ def test_read_chunks_unaligned_buffer_size(self):
422424

423425
self.assertEqual(in_data, out_data)
424426

427+
def test_write_unicode(self):
428+
f = GridIn(self.db.fs)
429+
self.assertRaises(TypeError, f.write, u"foo")
430+
431+
f = GridIn(self.db.fs, encoding="utf-8")
432+
f.write(u"foo")
433+
f.close()
434+
435+
g = GridOut(self.db.fs, f._id)
436+
self.assertEqual("foo", g.read())
437+
438+
f = GridIn(self.db.fs, encoding="iso-8859-1")
439+
f.write(u"aé")
440+
f.close()
441+
442+
g = GridOut(self.db.fs, f._id)
443+
self.assertEqual(u"aé".encode("iso-8859-1"), g.read())
444+
445+
425446
if __name__ == "__main__":
426447
unittest.main()

test/test_gridfs.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
# -*- coding: utf-8 -*-
2+
#
13
# Copyright 2009-2010 10gen, Inc.
24
#
35
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -231,6 +233,17 @@ def test_exists(self):
231233
self.assertFalse(self.fs.exists(foo={"$gt": 12}))
232234
self.assertFalse(self.fs.exists({"foo": {"$gt": 12}}))
233235

236+
def test_put_unicode(self):
237+
self.assertRaises(TypeError, self.fs.put, u"hello")
238+
239+
oid = self.fs.put(u"hello", encoding="utf-8")
240+
self.assertEqual("hello", self.fs.get(oid).read())
241+
self.assertEqual("utf-8", self.fs.get(oid).encoding)
242+
243+
oid = self.fs.put(u"aé", encoding="iso-8859-1")
244+
self.assertEqual(u"aé".encode("iso-8859-1"), self.fs.get(oid).read())
245+
self.assertEqual("iso-8859-1", self.fs.get(oid).encoding)
246+
234247

235248
if __name__ == "__main__":
236249
unittest.main()

0 commit comments

Comments
 (0)