# GNU MediaGoblin -- federated, autonomous media hosting # Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS. # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . ''' Make it so that ``import cloudfiles`` does not pick THIS file, but the python-cloudfiles one. http://docs.python.org/whatsnew/2.5.html#pep-328-absolute-and-relative-imports ''' from mediagoblin.storage import StorageInterface, clean_listy_filepath import cloudfiles import mimetypes import logging _log = logging.getLogger(__name__) class CloudFilesStorage(StorageInterface): ''' OpenStack/Rackspace Cloud's Swift/CloudFiles support ''' local_storage = False def __init__(self, **kwargs): self.param_container = kwargs.get('cloudfiles_container') self.param_user = kwargs.get('cloudfiles_user') self.param_api_key = kwargs.get('cloudfiles_api_key') self.param_host = kwargs.get('cloudfiles_host') self.param_use_servicenet = kwargs.get('cloudfiles_use_servicenet') # the Mime Type webm doesn't exists, let's add it mimetypes.add_type("video/webm", "webm") if not self.param_host: _log.info('No CloudFiles host URL specified, ' 'defaulting to Rackspace US') self.connection = cloudfiles.get_connection( username=self.param_user, api_key=self.param_api_key, servicenet=True if self.param_use_servicenet == 'true' or \ self.param_use_servicenet == True else False) _log.debug('Connected to {} (auth: {})'.format( self.connection.connection.host, self.connection.auth.host)) if not self.param_container == \ self.connection.get_container(self.param_container): self.container = self.connection.create_container( self.param_container) self.container.make_public( ttl=60 * 60 * 2) else: self.container = self.connection.get_container( self.param_container) _log.debug('Container: {}'.format( self.container.name)) self.container_uri = self.container.public_ssl_uri() def _resolve_filepath(self, filepath): return '/'.join( clean_listy_filepath(filepath)) def file_exists(self, filepath): try: self.container.get_object(self._resolve_filepath(filepath)) return True except cloudfiles.errors.NoSuchObject: return False def get_file(self, filepath, *args, **kwargs): """ - Doesn't care about the "mode" argument. """ try: obj = self.container.get_object( self._resolve_filepath(filepath)) except cloudfiles.errors.NoSuchObject: obj = self.container.create_object( self._resolve_filepath(filepath)) # Detect the mimetype ourselves, since some extensions (webm) # may not be universally accepted as video/webm mimetype = mimetypes.guess_type( filepath[-1]) if mimetype[0]: # Set the mimetype on the CloudFiles object obj.content_type = mimetype[0] obj.metadata = {'mime-type': mimetype[0]} else: obj.content_type = 'application/octet-stream' obj.metadata = {'mime-type': 'application/octet-stream'} return CloudFilesStorageObjectWrapper(obj, *args, **kwargs) def delete_file(self, filepath): # TODO: Also delete unused directories if empty (safely, with # checks to avoid race conditions). try: self.container.delete_object( self._resolve_filepath(filepath)) except cloudfiles.container.ResponseError: pass finally: pass def file_url(self, filepath): return '/'.join([ self.container_uri, self._resolve_filepath(filepath)]) def copy_locally(self, filepath, dest_path): """ Copy this file locally. A basic working method for this is provided that should function both for local_storage systems and remote storge systems, but if more efficient systems for copying locally apply to your system, override this method with something more appropriate. """ # Override this method, using the "stream" iterator for efficient streaming with self.get_file(filepath, 'rb') as source_file: with open(dest_path, 'wb') as dest_file: for data in source_file: dest_file.write(data) def copy_local_to_storage(self, filename, filepath): """ Copy this file from locally to the storage system. This is kind of the opposite of copy_locally. It's likely you could override this method with something more appropriate to your storage system. """ # It seems that (our implementation of) cloudfiles.write() takes # all existing data and appends write(data) to it, sending the # full monty over the wire everytime. This would of course # absolutely kill chunked writes with some O(1^n) performance # and bandwidth usage. So, override this method and use the # Cloudfile's "send" interface instead. # TODO: Fixing write() still seems worthwhile though. _log.debug(f'Sending {filepath} to cloudfiles...') with self.get_file(filepath, 'wb') as dest_file: with open(filename, 'rb') as source_file: # Copy to storage system in 4096 byte chunks dest_file.send(source_file) def get_file_size(self, filepath): """Returns the file size in bytes""" obj = self.container.get_object( self._resolve_filepath(filepath)) return obj.total_bytes class CloudFilesStorageObjectWrapper(): """ Wrapper for python-cloudfiles's cloudfiles.storage_object.Object used to circumvent the mystic `medium.jpg` corruption issue, where we had both python-cloudfiles and PIL doing buffering on both ends and causing breakage. This wrapper currently meets mediagoblin's needs for a public_store file-like object. """ def __init__(self, storage_object, *args, **kwargs): self.storage_object = storage_object def read(self, *args, **kwargs): _log.debug('Reading {}'.format( self.storage_object.name)) return self.storage_object.read(*args, **kwargs) def write(self, data, *args, **kwargs): self.storage_object.write(data, *args, **kwargs) def send(self, *args, **kw): self.storage_object.send(*args, **kw) def close(self): """ Not sure we need anything here. """ pass def __enter__(self): """ Context Manager API implementation http://docs.python.org/library/stdtypes.html#context-manager-types """ return self def __exit__(self, *exc_info): """ Context Manger API implementation see self.__enter__() """ self.close() def __iter__(self, **kwargs): """Make CloudFile an iterator, yielding 8192 bytes by default This returns a generator object that can be used to getting the object's content in a memory efficient way. Warning: The HTTP response is only complete after this generator has raised a StopIteration. No other methods can be called until this has occurred.""" return self.storage_object.stream(**kwargs)