diff options
author | Sebastian Spaeth <Sebastian@SSpaeth.de> | 2012-12-19 11:06:51 +0100 |
---|---|---|
committer | Christopher Allan Webber <cwebber@dustycloud.org> | 2013-03-03 14:40:06 -0600 |
commit | 99a54c0095ccadcebeb640cb20cb6eadb8b9a39d (patch) | |
tree | 8eb9aa91ffdecddbb1aca9780fe5ba9a9e3bded9 /mediagoblin/storage | |
parent | 3ff4f75203295550a823e3651ead3340a690b7de (diff) | |
download | mediagoblin-99a54c0095ccadcebeb640cb20cb6eadb8b9a39d.tar.lz mediagoblin-99a54c0095ccadcebeb640cb20cb6eadb8b9a39d.tar.xz mediagoblin-99a54c0095ccadcebeb640cb20cb6eadb8b9a39d.zip |
Make copying to/from storage systems memory efficient (#419)
The copy_locally and copy_local_to_storage (very inconsistent terms BTW)
were simply slurping in everything in RAM and writing it out at once.
(the copy_locally was actually memory efficient if the remote system was local)
Use shutil.copyfileobj which does chunked reads/writes on file objects.
The default buffer size is 16kb, and as each chunk means a separate HTTP
request for e.g. cloudfiles, we use a chunksize of 4MB here (which has
just been arbitrarily set by me without tests).
This should help with the failure to upload large files issue #419.
Diffstat (limited to 'mediagoblin/storage')
-rw-r--r-- | mediagoblin/storage/__init__.py | 10 | ||||
-rw-r--r-- | mediagoblin/storage/filestorage.py | 5 |
2 files changed, 8 insertions, 7 deletions
diff --git a/mediagoblin/storage/__init__.py b/mediagoblin/storage/__init__.py index 2db4c37d..5c1d7d36 100644 --- a/mediagoblin/storage/__init__.py +++ b/mediagoblin/storage/__init__.py @@ -160,12 +160,13 @@ class StorageInterface(object): appropriate. """ if self.local_storage: - shutil.copy( - self.get_local_path(filepath), dest_path) + # Note: this will copy in small chunks + shutil.copy(self.get_local_path(filepath), dest_path) else: with self.get_file(filepath, 'rb') as source_file: with file(dest_path, 'wb') as dest_file: - dest_file.write(source_file.read()) + # Copy from remote storage in 4M chunks + shutil.copyfileobj(source_file, dest_file, length=4*1048576) def copy_local_to_storage(self, filename, filepath): """ @@ -177,7 +178,8 @@ class StorageInterface(object): """ with self.get_file(filepath, 'wb') as dest_file: with file(filename, 'rb') as source_file: - dest_file.write(source_file.read()) + # Copy to storage system in 4M chunks + shutil.copyfileobj(source_file, dest_file, length=4*1048576) ########### diff --git a/mediagoblin/storage/filestorage.py b/mediagoblin/storage/filestorage.py index 00d6335e..ef786b61 100644 --- a/mediagoblin/storage/filestorage.py +++ b/mediagoblin/storage/filestorage.py @@ -87,6 +87,5 @@ class BasicFileStorage(StorageInterface): directory = self._resolve_filepath(filepath[:-1]) if not os.path.exists(directory): os.makedirs(directory) - - shutil.copy( - filename, self.get_local_path(filepath)) + # This uses chunked copying of 16kb buffers (Py2.7): + shutil.copy(filename, self.get_local_path(filepath)) |