Skip to content

Commit 3649a3a

Browse files
Curtis VogtA. Jesse Jiryu Davis
authored andcommitted
Improved performance of GridOut readline by creating readchunk method.
Previously readline would read one byte at a time using the read method. By reading one byte at a time we would be slicing the buffer over and over again causing readline to use more CPU than necessary.
1 parent 6ee6777 commit 3649a3a

File tree

1 file changed

+56
-27
lines changed

1 file changed

+56
-27
lines changed

gridfs/grid_file.py

Lines changed: 56 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -432,6 +432,35 @@ def __getattr__(self, name):
432432
return self._file[name]
433433
raise AttributeError("GridOut object has no attribute '%s'" % name)
434434

435+
def readchunk(self):
436+
"""Reads a chunk at a time. If the current position is within a
437+
chunk the remainder of the chunk is returned.
438+
"""
439+
size = int(self.length) - self.__position
440+
441+
received = len(self.__buffer)
442+
chunk_data = EMPTY
443+
444+
if received > 0:
445+
chunk_data = self.__buffer
446+
elif received < size:
447+
chunk_number = int((received + self.__position) / self.chunk_size)
448+
449+
450+
chunk = self.__chunks.find_one({"files_id": self._id,
451+
"n": chunk_number})
452+
if not chunk:
453+
raise CorruptGridFile("no chunk #%d" % chunk_number)
454+
455+
if received:
456+
chunk_data = chunk["data"]
457+
else:
458+
chunk_data = chunk["data"][self.__position % self.chunk_size:]
459+
460+
self.__position += len(chunk_data)
461+
self.__buffer = EMPTY
462+
return chunk_data
463+
435464
def read(self, size=-1):
436465
"""Read at most `size` bytes from the file (less if there
437466
isn't enough data).
@@ -451,30 +480,16 @@ def read(self, size=-1):
451480
if size < 0 or size > remainder:
452481
size = remainder
453482

454-
received = len(self.__buffer)
455-
chunk_number = int((received + self.__position) / self.chunk_size)
456-
chunks = []
457-
483+
received = 0
484+
data = EMPTY
458485
while received < size:
459-
chunk = self.__chunks.find_one({"files_id": self._id,
460-
"n": chunk_number})
461-
if not chunk:
462-
raise CorruptGridFile("no chunk #%d" % chunk_number)
463-
464-
if received:
465-
chunk_data = chunk["data"]
466-
else:
467-
chunk_data = chunk["data"][self.__position % self.chunk_size:]
468-
486+
chunk_data = self.readchunk()
469487
received += len(chunk_data)
470-
chunks.append(chunk_data)
471-
chunk_number += 1
488+
data += chunk_data
472489

473-
data = EMPTY.join([self.__buffer] + chunks)
474-
self.__position += size
475-
to_return = data[:size]
490+
self.__position -= received - size
476491
self.__buffer = data[size:]
477-
return to_return
492+
return data[:size]
478493

479494
def readline(self, size=-1):
480495
"""Read one line or up to `size` bytes from the file.
@@ -484,13 +499,27 @@ def readline(self, size=-1):
484499
485500
.. versionadded:: 1.9
486501
"""
487-
bytes = EMPTY
488-
while len(bytes) != size:
489-
byte = self.read(1)
490-
bytes += byte
491-
if byte == EMPTY or byte == NEWLN:
492-
break
493-
return bytes
502+
remainder = int(self.length) - self.__position
503+
if size < 0 or size > remainder:
504+
size = remainder
505+
506+
received = 0
507+
data = EMPTY
508+
while received < size:
509+
chunk_data = self.readchunk()
510+
511+
for pos in xrange(len(chunk_data)):
512+
byte = chunk_data[pos]
513+
if byte == EMPTY or byte == NEWLN:
514+
size = received + pos
515+
break
516+
517+
received += len(chunk_data)
518+
data += chunk_data
519+
520+
self.__position -= received - size
521+
self.__buffer = data[size:]
522+
return data[:size]
494523

495524
def tell(self):
496525
"""Return the current position of this file.

0 commit comments

Comments
 (0)