aboutsummaryrefslogtreecommitdiffstats
path: root/mediagoblin/storage
diff options
context:
space:
mode:
authorSebastian Spaeth <Sebastian@SSpaeth.de>2012-12-19 11:06:51 +0100
committerChristopher Allan Webber <cwebber@dustycloud.org>2013-03-03 14:40:06 -0600
commit99a54c0095ccadcebeb640cb20cb6eadb8b9a39d (patch)
tree8eb9aa91ffdecddbb1aca9780fe5ba9a9e3bded9 /mediagoblin/storage
parent3ff4f75203295550a823e3651ead3340a690b7de (diff)
downloadmediagoblin-99a54c0095ccadcebeb640cb20cb6eadb8b9a39d.tar.lz
mediagoblin-99a54c0095ccadcebeb640cb20cb6eadb8b9a39d.tar.xz
mediagoblin-99a54c0095ccadcebeb640cb20cb6eadb8b9a39d.zip
Make copying to/from storage systems memory efficient (#419)
The copy_locally and copy_local_to_storage (very inconsistent terms BTW) were simply slurping in everything in RAM and writing it out at once. (the copy_locally was actually memory efficient if the remote system was local) Use shutil.copyfileobj which does chunked reads/writes on file objects. The default buffer size is 16kb, and as each chunk means a separate HTTP request for e.g. cloudfiles, we use a chunksize of 4MB here (which has just been arbitrarily set by me without tests). This should help with the failure to upload large files issue #419.
Diffstat (limited to 'mediagoblin/storage')
-rw-r--r--mediagoblin/storage/__init__.py10
-rw-r--r--mediagoblin/storage/filestorage.py5
2 files changed, 8 insertions, 7 deletions
diff --git a/mediagoblin/storage/__init__.py b/mediagoblin/storage/__init__.py
index 2db4c37d..5c1d7d36 100644
--- a/mediagoblin/storage/__init__.py
+++ b/mediagoblin/storage/__init__.py
@@ -160,12 +160,13 @@ class StorageInterface(object):
appropriate.
"""
if self.local_storage:
- shutil.copy(
- self.get_local_path(filepath), dest_path)
+ # Note: this will copy in small chunks
+ shutil.copy(self.get_local_path(filepath), dest_path)
else:
with self.get_file(filepath, 'rb') as source_file:
with file(dest_path, 'wb') as dest_file:
- dest_file.write(source_file.read())
+ # Copy from remote storage in 4M chunks
+ shutil.copyfileobj(source_file, dest_file, length=4*1048576)
def copy_local_to_storage(self, filename, filepath):
"""
@@ -177,7 +178,8 @@ class StorageInterface(object):
"""
with self.get_file(filepath, 'wb') as dest_file:
with file(filename, 'rb') as source_file:
- dest_file.write(source_file.read())
+ # Copy to storage system in 4M chunks
+ shutil.copyfileobj(source_file, dest_file, length=4*1048576)
###########
diff --git a/mediagoblin/storage/filestorage.py b/mediagoblin/storage/filestorage.py
index 00d6335e..ef786b61 100644
--- a/mediagoblin/storage/filestorage.py
+++ b/mediagoblin/storage/filestorage.py
@@ -87,6 +87,5 @@ class BasicFileStorage(StorageInterface):
directory = self._resolve_filepath(filepath[:-1])
if not os.path.exists(directory):
os.makedirs(directory)
-
- shutil.copy(
- filename, self.get_local_path(filepath))
+ # This uses chunked copying of 16kb buffers (Py2.7):
+ shutil.copy(filename, self.get_local_path(filepath))