diff options
Diffstat (limited to 'mediagoblin/storage')
-rw-r--r-- | mediagoblin/storage/__init__.py | 26 | ||||
-rw-r--r-- | mediagoblin/storage/cloudfiles.py | 58 | ||||
-rw-r--r-- | mediagoblin/storage/filestorage.py | 31 |
3 files changed, 102 insertions, 13 deletions
diff --git a/mediagoblin/storage/__init__.py b/mediagoblin/storage/__init__.py index 2db4c37d..bbe134a7 100644 --- a/mediagoblin/storage/__init__.py +++ b/mediagoblin/storage/__init__.py @@ -101,10 +101,20 @@ class StorageInterface(object): def delete_file(self, filepath): """ - Delete or dereference the file at filepath. + Delete or dereference the file (not directory) at filepath. + """ + # Subclasses should override this method. + self.__raise_not_implemented() + + def delete_dir(self, dirpath, recursive=False): + """Delete the directory at dirpath + + :param recursive: Usually, a directory must not contain any + files for the delete to succeed. If True, containing files + and subdirectories within dirpath will be recursively + deleted. - This might need to delete directories, buckets, whatever, for - cleanliness. (Be sure to avoid race conditions on that though) + :returns: True in case of success, False otherwise. """ # Subclasses should override this method. self.__raise_not_implemented() @@ -160,12 +170,13 @@ class StorageInterface(object): appropriate. """ if self.local_storage: - shutil.copy( - self.get_local_path(filepath), dest_path) + # Note: this will copy in small chunks + shutil.copy(self.get_local_path(filepath), dest_path) else: with self.get_file(filepath, 'rb') as source_file: with file(dest_path, 'wb') as dest_file: - dest_file.write(source_file.read()) + # Copy from remote storage in 4M chunks + shutil.copyfileobj(source_file, dest_file, length=4*1048576) def copy_local_to_storage(self, filename, filepath): """ @@ -177,7 +188,8 @@ class StorageInterface(object): """ with self.get_file(filepath, 'wb') as dest_file: with file(filename, 'rb') as source_file: - dest_file.write(source_file.read()) + # Copy to storage system in 4M chunks + shutil.copyfileobj(source_file, dest_file, length=4*1048576) ########### diff --git a/mediagoblin/storage/cloudfiles.py b/mediagoblin/storage/cloudfiles.py index 1b5a6363..b6e57c91 100644 --- a/mediagoblin/storage/cloudfiles.py +++ b/mediagoblin/storage/cloudfiles.py @@ -131,6 +131,43 @@ class CloudFilesStorage(StorageInterface): self._resolve_filepath(filepath)]) + def copy_locally(self, filepath, dest_path): + """ + Copy this file locally. + + A basic working method for this is provided that should + function both for local_storage systems and remote storge + systems, but if more efficient systems for copying locally + apply to your system, override this method with something more + appropriate. + """ + # Override this method, using the "stream" iterator for efficient streaming + with self.get_file(filepath, 'rb') as source_file: + with file(dest_path, 'wb') as dest_file: + for data in source_file: + dest_file.write(data) + + def copy_local_to_storage(self, filename, filepath): + """ + Copy this file from locally to the storage system. + + This is kind of the opposite of copy_locally. It's likely you + could override this method with something more appropriate to + your storage system. + """ + # It seems that (our implementation of) cloudfiles.write() takes + # all existing data and appends write(data) to it, sending the + # full monty over the wire everytime. This would of course + # absolutely kill chunked writes with some O(1^n) performance + # and bandwidth usage. So, override this method and use the + # Cloudfile's "send" interface instead. + # TODO: Fixing write() still seems worthwhile though. + _log.debug('Sending {0} to cloudfiles...'.format(filepath)) + with self.get_file(filepath, 'wb') as dest_file: + with file(filename, 'rb') as source_file: + # Copy to storage system in 4096 byte chunks + dest_file.send(source_file) + class CloudFilesStorageObjectWrapper(): """ Wrapper for python-cloudfiles's cloudfiles.storage_object.Object @@ -160,6 +197,10 @@ class CloudFilesStorageObjectWrapper(): Currently this method does not support any write modes except "append". However if we should need it it would be easy implement. """ + _log.warn( + '{0}.write() has bad performance! Use .send instead for now'\ + .format(self.__class__.__name__)) + if self.storage_object.size and type(data) == str: _log.debug('{0} is > 0 in size, appending data'.format( self.storage_object.name)) @@ -169,9 +210,12 @@ class CloudFilesStorageObjectWrapper(): self.storage_object.name)) self.storage_object.write(data, *args, **kwargs) + def send(self, *args, **kw): + self.storage_object.send(*args, **kw) + def close(self): """ - Not implemented. + Not sure we need anything here. """ pass @@ -188,3 +232,15 @@ class CloudFilesStorageObjectWrapper(): see self.__enter__() """ self.close() + + + def __iter__(self, **kwargs): + """Make CloudFile an iterator, yielding 8192 bytes by default + + This returns a generator object that can be used to getting the + object's content in a memory efficient way. + + Warning: The HTTP response is only complete after this generator + has raised a StopIteration. No other methods can be called until + this has occurred.""" + return self.storage_object.stream(**kwargs) diff --git a/mediagoblin/storage/filestorage.py b/mediagoblin/storage/filestorage.py index 00d6335e..3d6e0753 100644 --- a/mediagoblin/storage/filestorage.py +++ b/mediagoblin/storage/filestorage.py @@ -62,10 +62,32 @@ class BasicFileStorage(StorageInterface): return open(self._resolve_filepath(filepath), mode) def delete_file(self, filepath): - # TODO: Also delete unused directories if empty (safely, with - # checks to avoid race conditions). + """Delete file at filepath + + Raises OSError in case filepath is a directory.""" + #TODO: log error os.remove(self._resolve_filepath(filepath)) + def delete_dir(self, dirpath, recursive=False): + """returns True on succes, False on failure""" + + dirpath = self._resolve_filepath(dirpath) + + # Shortcut the default and simple case of nonempty=F, recursive=F + if recursive: + try: + shutil.rmtree(dirpath) + except OSError as e: + #TODO: log something here + return False + else: # recursively delete everything + try: + os.rmdir(dirpath) + except OSError as e: + #TODO: log something here + return False + return True + def file_url(self, filepath): if not self.base_url: raise NoWebServing( @@ -87,6 +109,5 @@ class BasicFileStorage(StorageInterface): directory = self._resolve_filepath(filepath[:-1]) if not os.path.exists(directory): os.makedirs(directory) - - shutil.copy( - filename, self.get_local_path(filepath)) + # This uses chunked copying of 16kb buffers (Py2.7): + shutil.copy(filename, self.get_local_path(filepath)) |