aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJoar Wandborg <git@wandborg.com>2011-09-12 02:32:03 +0200
committerJoar Wandborg <git@wandborg.com>2011-09-12 02:32:03 +0200
commita2468d18ca39afed9102d707e7aea6a613ff2dab (patch)
treeefede9c515269b6ce56b8afcf8b472a1d2dde48b
parent55376ff403496a3271154627c45fe59dc2b44ca7 (diff)
downloadmediagoblin-a2468d18ca39afed9102d707e7aea6a613ff2dab.tar.lz
mediagoblin-a2468d18ca39afed9102d707e7aea6a613ff2dab.tar.xz
mediagoblin-a2468d18ca39afed9102d707e7aea6a613ff2dab.zip
Feature #587 - Split storage.py into submodules
* Removed storage.py * Created submodules for filestorage, cloudfiles, mountstorage * Changed test_storage to reflect the changes made in the storage module structure * Added mediagoblin.storage.filestorage.BasicFileStorage as a default for both publicstore and queuestore's `storage_class`
-rw-r--r--mediagoblin/config_spec.ini2
-rw-r--r--mediagoblin/storage.py568
-rw-r--r--mediagoblin/storage/__init__.py240
-rw-r--r--mediagoblin/storage/cloudfiles.py156
-rw-r--r--mediagoblin/storage/filestorage.py78
-rw-r--r--mediagoblin/storage/mountstorage.py156
-rw-r--r--mediagoblin/tests/test_storage.py6
7 files changed, 635 insertions, 571 deletions
diff --git a/mediagoblin/config_spec.ini b/mediagoblin/config_spec.ini
index 6fefb581..0801b39e 100644
--- a/mediagoblin/config_spec.ini
+++ b/mediagoblin/config_spec.ini
@@ -43,10 +43,12 @@ allow_attachments = boolean(default=False)
[storage:publicstore]
+storage_class = string(default="mediagoblin.storage.filestorage:BasicFileStorage")
base_dir = string(default="%(here)s/user_dev/media/public")
base_url = string(default="/mgoblin_media/")
[storage:queuestore]
+storage_class = string(default="mediagoblin.storage.filestorage:BasicFileStorage")
base_dir = string(default="%(here)s/user_dev/media/queue")
diff --git a/mediagoblin/storage.py b/mediagoblin/storage.py
deleted file mode 100644
index f9563031..00000000
--- a/mediagoblin/storage.py
+++ /dev/null
@@ -1,568 +0,0 @@
-# GNU MediaGoblin -- federated, autonomous media hosting
-# Copyright (C) 2011 MediaGoblin contributors. See AUTHORS.
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Affero General Public License for more details.
-#
-# You should have received a copy of the GNU Affero General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-import os
-import shutil
-import urlparse
-import uuid
-import cloudfiles
-import mimetypes
-import tempfile
-
-from werkzeug.utils import secure_filename
-
-from mediagoblin import util
-
-########
-# Errors
-########
-
-
-class Error(Exception):
- pass
-
-
-class InvalidFilepath(Error):
- pass
-
-
-class NoWebServing(Error):
- pass
-
-
-class NotImplementedError(Error):
- pass
-
-
-###############################################
-# Storage interface & basic file implementation
-###############################################
-
-class StorageInterface(object):
- """
- Interface for the storage API.
-
- This interface doesn't actually provide behavior, but it defines
- what kind of storage patterns subclasses should provide.
-
- It is important to note that the storage API idea of a "filepath"
- is actually like ['dir1', 'dir2', 'file.jpg'], so keep that in
- mind while reading method documentation.
-
- You should set up your __init__ method with whatever keyword
- arguments are appropriate to your storage system, but you should
- also passively accept all extraneous keyword arguments like:
-
- def __init__(self, **kwargs):
- pass
-
- See BasicFileStorage as a simple implementation of the
- StorageInterface.
- """
-
- # Whether this file store is on the local filesystem.
- local_storage = False
-
- def __raise_not_implemented(self):
- """
- Raise a warning about some component not implemented by a
- subclass of this interface.
- """
- raise NotImplementedError(
- "This feature not implemented in this storage API implementation.")
-
- def file_exists(self, filepath):
- """
- Return a boolean asserting whether or not file at filepath
- exists in our storage system.
-
- Returns:
- True / False depending on whether file exists or not.
- """
- # Subclasses should override this method.
- self.__raise_not_implemented()
-
- def get_file(self, filepath, mode='r'):
- """
- Return a file-like object for reading/writing from this filepath.
-
- Should create directories, buckets, whatever, as necessary.
- """
- # Subclasses should override this method.
- self.__raise_not_implemented()
-
- def delete_file(self, filepath):
- """
- Delete or dereference the file at filepath.
-
- This might need to delete directories, buckets, whatever, for
- cleanliness. (Be sure to avoid race conditions on that though)
- """
- # Subclasses should override this method.
- self.__raise_not_implemented()
-
- def file_url(self, filepath):
- """
- Get the URL for this file. This assumes our storage has been
- mounted with some kind of URL which makes this possible.
- """
- # Subclasses should override this method.
- self.__raise_not_implemented()
-
- def get_unique_filepath(self, filepath):
- """
- If a filename at filepath already exists, generate a new name.
-
- Eg, if the filename doesn't exist:
- >>> storage_handler.get_unique_filename(['dir1', 'dir2', 'fname.jpg'])
- [u'dir1', u'dir2', u'fname.jpg']
-
- But if a file does exist, let's get one back with at uuid tacked on:
- >>> storage_handler.get_unique_filename(['dir1', 'dir2', 'fname.jpg'])
- [u'dir1', u'dir2', u'd02c3571-dd62-4479-9d62-9e3012dada29-fname.jpg']
- """
- # Make sure we have a clean filepath to start with, since
- # we'll be possibly tacking on stuff to the filename.
- filepath = clean_listy_filepath(filepath)
-
- if self.file_exists(filepath):
- return filepath[:-1] + ["%s-%s" % (uuid.uuid4(), filepath[-1])]
- else:
- return filepath
-
- def get_local_path(self, filepath):
- """
- If this is a local_storage implementation, give us a link to
- the local filesystem reference to this file.
-
- >>> storage_handler.get_local_path(['foo', 'bar', 'baz.jpg'])
- u'/path/to/mounting/foo/bar/baz.jpg'
- """
- # Subclasses should override this method, if applicable.
- self.__raise_not_implemented()
-
- def copy_locally(self, filepath, dest_path):
- """
- Copy this file locally.
-
- A basic working method for this is provided that should
- function both for local_storage systems and remote storge
- systems, but if more efficient systems for copying locally
- apply to your system, override this method with something more
- appropriate.
- """
- if self.local_storage:
- shutil.copy(
- self.get_local_path(filepath), dest_path)
- else:
- with self.get_file(filepath, 'rb') as source_file:
- with file(dest_path, 'wb') as dest_file:
- dest_file.write(source_file.read())
-
-
-class BasicFileStorage(StorageInterface):
- """
- Basic local filesystem implementation of storage API
- """
-
- local_storage = True
-
- def __init__(self, base_dir, base_url=None, **kwargs):
- """
- Keyword arguments:
- - base_dir: Base directory things will be served out of. MUST
- be an absolute path.
- - base_url: URL files will be served from
- """
- self.base_dir = base_dir
- self.base_url = base_url
-
- def _resolve_filepath(self, filepath):
- """
- Transform the given filepath into a local filesystem filepath.
- """
- return os.path.join(
- self.base_dir, *clean_listy_filepath(filepath))
-
- def file_exists(self, filepath):
- return os.path.exists(self._resolve_filepath(filepath))
-
- def get_file(self, filepath, mode='r'):
- # Make directories if necessary
- if len(filepath) > 1:
- directory = self._resolve_filepath(filepath[:-1])
- if not os.path.exists(directory):
- os.makedirs(directory)
-
- # Grab and return the file in the mode specified
- return open(self._resolve_filepath(filepath), mode)
-
- def delete_file(self, filepath):
- # TODO: Also delete unused directories if empty (safely, with
- # checks to avoid race conditions).
- os.remove(self._resolve_filepath(filepath))
-
- def file_url(self, filepath):
- if not self.base_url:
- raise NoWebServing(
- "base_url not set, cannot provide file urls")
-
- return urlparse.urljoin(
- self.base_url,
- '/'.join(clean_listy_filepath(filepath)))
-
- def get_local_path(self, filepath):
- return self._resolve_filepath(filepath)
-
-
-# ----------------------------------------------------
-# OpenStack/Rackspace Cloud's Swift/CloudFiles support
-# ----------------------------------------------------
-
-class CloudFilesStorage(StorageInterface):
- def __init__(self, **kwargs):
- self.param_container = kwargs.get('cloudfiles_container')
- self.param_user = kwargs.get('cloudfiles_user')
- self.param_api_key = kwargs.get('cloudfiles_api_key')
- self.param_host = kwargs.get('cloudfiles_host')
- self.param_use_servicenet = kwargs.get('cloudfiles_use_servicenet')
-
- if not self.param_host:
- print('No CloudFiles host URL specified, '
- 'defaulting to Rackspace US')
-
- self.connection = cloudfiles.get_connection(
- username=self.param_user,
- api_key=self.param_api_key,
- servicenet=True if self.param_use_servicenet == 'true' or \
- self.param_use_servicenet == True else False)
-
- if not self.param_container == \
- self.connection.get_container(self.param_container):
- self.container = self.connection.create_container(
- self.param_container)
- self.container.make_public(
- ttl=60 * 60 * 2)
- else:
- self.container = self.connection.get_container(
- self.param_container)
-
- self.container_uri = self.container.public_uri()
-
- def _resolve_filepath(self, filepath):
- return '/'.join(
- clean_listy_filepath(filepath))
-
- def file_exists(self, filepath):
- try:
- object = self.container.get_object(
- self._resolve_filepath(filepath))
- return True
- except cloudfiles.errors.NoSuchObject:
- return False
-
- def get_file(self, filepath, *args, **kwargs):
- """
- - Doesn't care about the "mode" argument
- """
- try:
- obj = self.container.get_object(
- self._resolve_filepath(filepath))
- except cloudfiles.errors.NoSuchObject:
- obj = self.container.create_object(
- self._resolve_filepath(filepath))
-
- mimetype = mimetypes.guess_type(
- filepath[-1])
-
- if mimetype:
- obj.content_type = mimetype[0]
-
- return CloudFilesStorageObjectWrapper(obj, *args, **kwargs)
-
- def delete_file(self, filepath):
- # TODO: Also delete unused directories if empty (safely, with
- # checks to avoid race conditions).
- self.container.delete_object(
- self._resolve_filepath(filepath))
-
- def file_url(self, filepath):
- return '/'.join([
- self.container_uri,
- self._resolve_filepath(filepath)])
-
-
-class CloudFilesStorageObjectWrapper():
- """
- Wrapper for python-cloudfiles's cloudfiles.storage_object.Object
- used to circumvent the mystic `medium.jpg` corruption issue, where
- we had both python-cloudfiles and PIL doing buffering on both
- ends and that breaking things.
-
- This wrapper currently meets mediagoblin's needs for a public_store
- file-like object.
- """
- def __init__(self, storage_object, *args, **kwargs):
- self.storage_object = storage_object
-
- def read(self, *args, **kwargs):
- return self.storage_object.read(*args, **kwargs)
-
- def write(self, data, *args, **kwargs):
- """
- write data to the cloudfiles storage object
-
- The original motivation for this wrapper is to ensure
- that buffered writing to a cloudfiles storage object does not overwrite
- any preexisting data.
-
- Currently this method does not support any write modes except "append".
- However if we should need it it would be easy implement.
- """
- if self.storage_object.size and type(data) == str:
- data = self.read() + data
-
- self.storage_object.write(data, *args, **kwargs)
-
- def close(self):
- pass
-
- def __enter__(self):
- """
- Context Manager API implementation
- http://docs.python.org/library/stdtypes.html#context-manager-types
- """
- return self
-
- def __exit__(self, *exc_info):
- """
- Context Manger API implementation
- see self.__enter__()
- """
- self.close()
-
-
-# ------------
-# MountStorage
-# ------------
-
-class MountStorage(StorageInterface):
- """
- Experimental "Mount" virtual Storage Interface
-
- This isn't an interface to some real storage, instead it's a
- redirecting interface, that redirects requests to other
- "StorageInterface"s.
-
- For example, say you have the paths:
-
- 1. ['user_data', 'cwebber', 'avatar.jpg']
- 2. ['user_data', 'elrond', 'avatar.jpg']
- 3. ['media_entries', '34352f304c3f4d0ad8ad0f043522b6f2', 'thumb.jpg']
-
- You could mount media_entries under CloudFileStorage and user_data
- under BasicFileStorage. Then 1 would be passed to
- BasicFileStorage under the path ['cwebber', 'avatar.jpg'] and 3
- would be passed to CloudFileStorage under
- ['34352f304c3f4d0ad8ad0f043522b6f2', 'thumb.jpg'].
-
- In other words, this is kind of like mounting /home/ and /etc/
- under different filesystems on your operating system... but with
- mediagoblin filestorages :)
-
- To set this up, you currently need to call the mount() method with
- the target path and a backend, that shall be available under that
- target path. You have to mount things in a sensible order,
- especially you can't mount ["a", "b"] before ["a"].
- """
- def __init__(self, **kwargs):
- self.mounttab = {}
-
- def mount(self, dirpath, backend):
- """
- Mount a new backend under dirpath
- """
- new_ent = clean_listy_filepath(dirpath)
-
- print "Mounting:", repr(new_ent)
- already, rem_1, table, rem_2 = self._resolve_to_backend(new_ent, True)
- print "===", repr(already), repr(rem_1), repr(rem_2), len(table)
-
- assert (len(rem_2) > 0) or (None not in table), \
- "That path is already mounted"
- assert (len(rem_2) > 0) or (len(table)==0), \
- "A longer path is already mounted here"
-
- for part in rem_2:
- table[part] = {}
- table = table[part]
- table[None] = backend
-
- def _resolve_to_backend(self, filepath, extra_info = False):
- """
- extra_info = True is for internal use!
-
- Normally, returns the backend and the filepath inside that backend.
-
- With extra_info = True it returns the last directory node and the
- remaining filepath from there in addition.
- """
- table = self.mounttab
- filepath = filepath[:]
- res_fp = None
- while True:
- new_be = table.get(None)
- if (new_be is not None) or res_fp is None:
- res_be = new_be
- res_fp = filepath[:]
- res_extra = (table, filepath[:])
- # print "... New res: %r, %r, %r" % (res_be, res_fp, res_extra)
- if len(filepath) == 0:
- break
- query = filepath.pop(0)
- entry = table.get(query)
- if entry is not None:
- table = entry
- res_extra = (table, filepath[:])
- else:
- break
- if extra_info:
- return (res_be, res_fp) + res_extra
- else:
- return (res_be, res_fp)
-
- def resolve_to_backend(self, filepath):
- backend, filepath = self._resolve_to_backend(filepath)
- if backend is None:
- raise Error("Path not mounted")
- return backend, filepath
-
- def __repr__(self, table = None, indent = []):
- res = []
- if table is None:
- res.append("MountStorage<")
- table = self.mounttab
- v = table.get(None)
- if v:
- res.append(" " * len(indent) + repr(indent) + ": " + repr(v))
- for k, v in table.iteritems():
- if k == None:
- continue
- res.append(" " * len(indent) + repr(k) + ":")
- res += self.__repr__(v, indent + [k])
- if table is self.mounttab:
- res.append(">")
- return "\n".join(res)
- else:
- return res
-
- def file_exists(self, filepath):
- backend, filepath = self.resolve_to_backend(filepath)
- return backend.file_exists(filepath)
-
- def get_file(self, filepath, mode='r'):
- backend, filepath = self.resolve_to_backend(filepath)
- return backend.get_file(filepath, mode)
-
- def delete_file(self, filepath):
- backend, filepath = self.resolve_to_backend(filepath)
- return backend.delete_file(filepath)
-
- def file_url(self, filepath):
- backend, filepath = self.resolve_to_backend(filepath)
- return backend.file_url(filepath)
-
- def get_local_path(self, filepath):
- backend, filepath = self.resolve_to_backend(filepath)
- return backend.get_local_path(filepath)
-
- def copy_locally(self, filepath, dest_path):
- """
- Need to override copy_locally, because the local_storage
- attribute is not correct.
- """
- backend, filepath = self.resolve_to_backend(filepath)
- backend.copy_locally(filepath, dest_path)
-
-
-###########
-# Utilities
-###########
-
-def clean_listy_filepath(listy_filepath):
- """
- Take a listy filepath (like ['dir1', 'dir2', 'filename.jpg']) and
- clean out any nastiness from it.
-
-
- >>> clean_listy_filepath([u'/dir1/', u'foo/../nasty', u'linooks.jpg'])
- [u'dir1', u'foo_.._nasty', u'linooks.jpg']
-
- Args:
- - listy_filepath: a list of filepath components, mediagoblin
- storage API style.
-
- Returns:
- A cleaned list of unicode objects.
- """
- cleaned_filepath = [
- unicode(secure_filename(filepath))
- for filepath in listy_filepath]
-
- if u'' in cleaned_filepath:
- raise InvalidFilepath(
- "A filename component could not be resolved into a usable name.")
-
- return cleaned_filepath
-
-
-def storage_system_from_config(config_section):
- """
- Utility for setting up a storage system from a config section.
-
- Note that a special argument may be passed in to
- the config_section which is "storage_class" which will provide an
- import path to a storage system. This defaults to
- "mediagoblin.storage:BasicFileStorage" if otherwise undefined.
-
- Arguments:
- - config_section: dictionary of config parameters
-
- Returns:
- An instantiated storage system.
-
- Example:
- storage_system_from_config(
- {'base_url': '/media/',
- 'base_dir': '/var/whatever/media/'})
-
- Will return:
- BasicFileStorage(
- base_url='/media/',
- base_dir='/var/whatever/media')
- """
- # This construct is needed, because dict(config) does
- # not replace the variables in the config items.
- config_params = dict(config_section.iteritems())
-
- if 'storage_class' in config_params:
- storage_class = config_params['storage_class']
- config_params.pop('storage_class')
- else:
- storage_class = "mediagoblin.storage:BasicFileStorage"
-
- storage_class = util.import_component(storage_class)
- return storage_class(**config_params)
diff --git a/mediagoblin/storage/__init__.py b/mediagoblin/storage/__init__.py
new file mode 100644
index 00000000..8665d9e5
--- /dev/null
+++ b/mediagoblin/storage/__init__.py
@@ -0,0 +1,240 @@
+# GNU MediaGoblin -- federated, autonomous media hosting
+# Copyright (C) 2011 MediaGoblin contributors. See AUTHORS.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import os
+import shutil
+import urlparse
+import uuid
+
+from werkzeug.utils import secure_filename
+
+from mediagoblin import util
+
+########
+# Errors
+########
+
+
+class Error(Exception):
+ pass
+
+
+class InvalidFilepath(Error):
+ pass
+
+
+class NoWebServing(Error):
+ pass
+
+
+class NotImplementedError(Error):
+ pass
+
+
+###############################################
+# Storage interface & basic file implementation
+###############################################
+
+class StorageInterface(object):
+ """
+ Interface for the storage API.
+
+ This interface doesn't actually provide behavior, but it defines
+ what kind of storage patterns subclasses should provide.
+
+ It is important to note that the storage API idea of a "filepath"
+ is actually like ['dir1', 'dir2', 'file.jpg'], so keep that in
+ mind while reading method documentation.
+
+ You should set up your __init__ method with whatever keyword
+ arguments are appropriate to your storage system, but you should
+ also passively accept all extraneous keyword arguments like:
+
+ def __init__(self, **kwargs):
+ pass
+
+ See BasicFileStorage as a simple implementation of the
+ StorageInterface.
+ """
+
+ # Whether this file store is on the local filesystem.
+ local_storage = False
+
+ def __raise_not_implemented(self):
+ """
+ Raise a warning about some component not implemented by a
+ subclass of this interface.
+ """
+ raise NotImplementedError(
+ "This feature not implemented in this storage API implementation.")
+
+ def file_exists(self, filepath):
+ """
+ Return a boolean asserting whether or not file at filepath
+ exists in our storage system.
+
+ Returns:
+ True / False depending on whether file exists or not.
+ """
+ # Subclasses should override this method.
+ self.__raise_not_implemented()
+
+ def get_file(self, filepath, mode='r'):
+ """
+ Return a file-like object for reading/writing from this filepath.
+
+ Should create directories, buckets, whatever, as necessary.
+ """
+ # Subclasses should override this method.
+ self.__raise_not_implemented()
+
+ def delete_file(self, filepath):
+ """
+ Delete or dereference the file at filepath.
+
+ This might need to delete directories, buckets, whatever, for
+ cleanliness. (Be sure to avoid race conditions on that though)
+ """
+ # Subclasses should override this method.
+ self.__raise_not_implemented()
+
+ def file_url(self, filepath):
+ """
+ Get the URL for this file. This assumes our storage has been
+ mounted with some kind of URL which makes this possible.
+ """
+ # Subclasses should override this method.
+ self.__raise_not_implemented()
+
+ def get_unique_filepath(self, filepath):
+ """
+ If a filename at filepath already exists, generate a new name.
+
+ Eg, if the filename doesn't exist:
+ >>> storage_handler.get_unique_filename(['dir1', 'dir2', 'fname.jpg'])
+ [u'dir1', u'dir2', u'fname.jpg']
+
+ But if a file does exist, let's get one back with at uuid tacked on:
+ >>> storage_handler.get_unique_filename(['dir1', 'dir2', 'fname.jpg'])
+ [u'dir1', u'dir2', u'd02c3571-dd62-4479-9d62-9e3012dada29-fname.jpg']
+ """
+ # Make sure we have a clean filepath to start with, since
+ # we'll be possibly tacking on stuff to the filename.
+ filepath = clean_listy_filepath(filepath)
+
+ if self.file_exists(filepath):
+ return filepath[:-1] + ["%s-%s" % (uuid.uuid4(), filepath[-1])]
+ else:
+ return filepath
+
+ def get_local_path(self, filepath):
+ """
+ If this is a local_storage implementation, give us a link to
+ the local filesystem reference to this file.
+
+ >>> storage_handler.get_local_path(['foo', 'bar', 'baz.jpg'])
+ u'/path/to/mounting/foo/bar/baz.jpg'
+ """
+ # Subclasses should override this method, if applicable.
+ self.__raise_not_implemented()
+
+ def copy_locally(self, filepath, dest_path):
+ """
+ Copy this file locally.
+
+ A basic working method for this is provided that should
+ function both for local_storage systems and remote storge
+ systems, but if more efficient systems for copying locally
+ apply to your system, override this method with something more
+ appropriate.
+ """
+ if self.local_storage:
+ shutil.copy(
+ self.get_local_path(filepath), dest_path)
+ else:
+ with self.get_file(filepath, 'rb') as source_file:
+ with file(dest_path, 'wb') as dest_file:
+ dest_file.write(source_file.read())
+
+
+###########
+# Utilities
+###########
+
+def clean_listy_filepath(listy_filepath):
+ """
+ Take a listy filepath (like ['dir1', 'dir2', 'filename.jpg']) and
+ clean out any nastiness from it.
+
+
+ >>> clean_listy_filepath([u'/dir1/', u'foo/../nasty', u'linooks.jpg'])
+ [u'dir1', u'foo_.._nasty', u'linooks.jpg']
+
+ Args:
+ - listy_filepath: a list of filepath components, mediagoblin
+ storage API style.
+
+ Returns:
+ A cleaned list of unicode objects.
+ """
+ cleaned_filepath = [
+ unicode(secure_filename(filepath))
+ for filepath in listy_filepath]
+
+ if u'' in cleaned_filepath:
+ raise InvalidFilepath(
+ "A filename component could not be resolved into a usable name.")
+
+ return cleaned_filepath
+
+
+def storage_system_from_config(config_section):
+ """
+ Utility for setting up a storage system from a config section.
+
+ Note that a special argument may be passed in to
+ the config_section which is "storage_class" which will provide an
+ import path to a storage system. This defaults to
+ "mediagoblin.storage:BasicFileStorage" if otherwise undefined.
+
+ Arguments:
+ - config_section: dictionary of config parameters
+
+ Returns:
+ An instantiated storage system.
+
+ Example:
+ storage_system_from_config(
+ {'base_url': '/media/',
+ 'base_dir': '/var/whatever/media/'})
+
+ Will return:
+ BasicFileStorage(
+ base_url='/media/',
+ base_dir='/var/whatever/media')
+ """
+ # This construct is needed, because dict(config) does
+ # not replace the variables in the config items.
+ config_params = dict(config_section.iteritems())
+
+ if 'storage_class' in config_params:
+ storage_class = config_params['storage_class']
+ config_params.pop('storage_class')
+ else:
+ storage_class = 'mediagoblin.storage.filestorage:BasicFileStorage'
+
+ storage_class = util.import_component(storage_class)
+ return storage_class(**config_params)
diff --git a/mediagoblin/storage/cloudfiles.py b/mediagoblin/storage/cloudfiles.py
new file mode 100644
index 00000000..b1dd9450
--- /dev/null
+++ b/mediagoblin/storage/cloudfiles.py
@@ -0,0 +1,156 @@
+# GNU MediaGoblin -- federated, autonomous media hosting
+# Copyright (C) 2011 MediaGoblin contributors. See AUTHORS.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+'''
+Make it so that ``import cloudfiles`` does not pick THIS file, but the
+python-cloudfiles one.
+
+http://docs.python.org/whatsnew/2.5.html#pep-328-absolute-and-relative-imports
+'''
+from __future__ import absolute_import
+
+from mediagoblin.storage import StorageInterface, clean_listy_filepath
+
+import cloudfiles
+import mimetypes
+
+class CloudFilesStorage(StorageInterface):
+ '''
+ OpenStack/Rackspace Cloud's Swift/CloudFiles support
+ '''
+
+ local_storage = False
+
+ def __init__(self, **kwargs):
+ self.param_container = kwargs.get('cloudfiles_container')
+ self.param_user = kwargs.get('cloudfiles_user')
+ self.param_api_key = kwargs.get('cloudfiles_api_key')
+ self.param_host = kwargs.get('cloudfiles_host')
+ self.param_use_servicenet = kwargs.get('cloudfiles_use_servicenet')
+
+ if not self.param_host:
+ print('No CloudFiles host URL specified, '
+ 'defaulting to Rackspace US')
+
+ self.connection = cloudfiles.get_connection(
+ username=self.param_user,
+ api_key=self.param_api_key,
+ servicenet=True if self.param_use_servicenet == 'true' or \
+ self.param_use_servicenet == True else False)
+
+ if not self.param_container == \
+ self.connection.get_container(self.param_container):
+ self.container = self.connection.create_container(
+ self.param_container)
+ self.container.make_public(
+ ttl=60 * 60 * 2)
+ else:
+ self.container = self.connection.get_container(
+ self.param_container)
+
+ self.container_uri = self.container.public_uri()
+
+ def _resolve_filepath(self, filepath):
+ return '/'.join(
+ clean_listy_filepath(filepath))
+
+ def file_exists(self, filepath):
+ try:
+ object = self.container.get_object(
+ self._resolve_filepath(filepath))
+ return True
+ except cloudfiles.errors.NoSuchObject:
+ return False
+
+ def get_file(self, filepath, *args, **kwargs):
+ """
+ - Doesn't care about the "mode" argument
+ """
+ try:
+ obj = self.container.get_object(
+ self._resolve_filepath(filepath))
+ except cloudfiles.errors.NoSuchObject:
+ obj = self.container.create_object(
+ self._resolve_filepath(filepath))
+
+ mimetype = mimetypes.guess_type(
+ filepath[-1])
+
+ if mimetype:
+ obj.content_type = mimetype[0]
+
+ return CloudFilesStorageObjectWrapper(obj, *args, **kwargs)
+
+ def delete_file(self, filepath):
+ # TODO: Also delete unused directories if empty (safely, with
+ # checks to avoid race conditions).
+ self.container.delete_object(
+ self._resolve_filepath(filepath))
+
+ def file_url(self, filepath):
+ return '/'.join([
+ self.container_uri,
+ self._resolve_filepath(filepath)])
+
+
+class CloudFilesStorageObjectWrapper():
+ """
+ Wrapper for python-cloudfiles's cloudfiles.storage_object.Object
+ used to circumvent the mystic `medium.jpg` corruption issue, where
+ we had both python-cloudfiles and PIL doing buffering on both
+ ends and that breaking things.
+
+ This wrapper currently meets mediagoblin's needs for a public_store
+ file-like object.
+ """
+ def __init__(self, storage_object, *args, **kwargs):
+ self.storage_object = storage_object
+
+ def read(self, *args, **kwargs):
+ return self.storage_object.read(*args, **kwargs)
+
+ def write(self, data, *args, **kwargs):
+ """
+ write data to the cloudfiles storage object
+
+ The original motivation for this wrapper is to ensure
+ that buffered writing to a cloudfiles storage object does not overwrite
+ any preexisting data.
+
+ Currently this method does not support any write modes except "append".
+ However if we should need it it would be easy implement.
+ """
+ if self.storage_object.size and type(data) == str:
+ data = self.read() + data
+
+ self.storage_object.write(data, *args, **kwargs)
+
+ def close(self):
+ pass
+
+ def __enter__(self):
+ """
+ Context Manager API implementation
+ http://docs.python.org/library/stdtypes.html#context-manager-types
+ """
+ return self
+
+ def __exit__(self, *exc_info):
+ """
+ Context Manger API implementation
+ see self.__enter__()
+ """
+ self.close()
diff --git a/mediagoblin/storage/filestorage.py b/mediagoblin/storage/filestorage.py
new file mode 100644
index 00000000..22d6eb5a
--- /dev/null
+++ b/mediagoblin/storage/filestorage.py
@@ -0,0 +1,78 @@
+# GNU MediaGoblin -- federated, autonomous media hosting
+# Copyright (C) 2011 MediaGoblin contributors. See AUTHORS.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+from mediagoblin.storage import (
+ StorageInterface,
+ clean_listy_filepath,
+ NoWebServing)
+
+import os
+import urlparse
+
+
+class BasicFileStorage(StorageInterface):
+ """
+ Basic local filesystem implementation of storage API
+ """
+
+ local_storage = True
+
+ def __init__(self, base_dir, base_url=None, **kwargs):
+ """
+ Keyword arguments:
+ - base_dir: Base directory things will be served out of. MUST
+ be an absolute path.
+ - base_url: URL files will be served from
+ """
+ self.base_dir = base_dir
+ self.base_url = base_url
+
+ def _resolve_filepath(self, filepath):
+ """
+ Transform the given filepath into a local filesystem filepath.
+ """
+ return os.path.join(
+ self.base_dir, *clean_listy_filepath(filepath))
+
+ def file_exists(self, filepath):
+ return os.path.exists(self._resolve_filepath(filepath))
+
+ def get_file(self, filepath, mode='r'):
+ # Make directories if necessary
+ if len(filepath) > 1:
+ directory = self._resolve_filepath(filepath[:-1])
+ if not os.path.exists(directory):
+ os.makedirs(directory)
+
+ # Grab and return the file in the mode specified
+ return open(self._resolve_filepath(filepath), mode)
+
+ def delete_file(self, filepath):
+ # TODO: Also delete unused directories if empty (safely, with
+ # checks to avoid race conditions).
+ os.remove(self._resolve_filepath(filepath))
+
+ def file_url(self, filepath):
+ if not self.base_url:
+ raise NoWebServing(
+ "base_url not set, cannot provide file urls")
+
+ return urlparse.urljoin(
+ self.base_url,
+ '/'.join(clean_listy_filepath(filepath)))
+
+ def get_local_path(self, filepath):
+ return self._resolve_filepath(filepath)
diff --git a/mediagoblin/storage/mountstorage.py b/mediagoblin/storage/mountstorage.py
new file mode 100644
index 00000000..6adb7a0d
--- /dev/null
+++ b/mediagoblin/storage/mountstorage.py
@@ -0,0 +1,156 @@
+# GNU MediaGoblin -- federated, autonomous media hosting
+# Copyright (C) 2011 MediaGoblin contributors. See AUTHORS.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+from medigoblin.storage import StorageInterface, clean_listy_filepath
+
+
+class MountStorage(StorageInterface):
+ """
+ Experimental "Mount" virtual Storage Interface
+
+ This isn't an interface to some real storage, instead it's a
+ redirecting interface, that redirects requests to other
+ "StorageInterface"s.
+
+ For example, say you have the paths:
+
+ 1. ['user_data', 'cwebber', 'avatar.jpg']
+ 2. ['user_data', 'elrond', 'avatar.jpg']
+ 3. ['media_entries', '34352f304c3f4d0ad8ad0f043522b6f2', 'thumb.jpg']
+
+ You could mount media_entries under CloudFileStorage and user_data
+ under BasicFileStorage. Then 1 would be passed to
+ BasicFileStorage under the path ['cwebber', 'avatar.jpg'] and 3
+ would be passed to CloudFileStorage under
+ ['34352f304c3f4d0ad8ad0f043522b6f2', 'thumb.jpg'].
+
+ In other words, this is kind of like mounting /home/ and /etc/
+ under different filesystems on your operating system... but with
+ mediagoblin filestorages :)
+
+ To set this up, you currently need to call the mount() method with
+ the target path and a backend, that shall be available under that
+ target path. You have to mount things in a sensible order,
+ especially you can't mount ["a", "b"] before ["a"].
+ """
+ def __init__(self, **kwargs):
+ self.mounttab = {}
+
+ def mount(self, dirpath, backend):
+ """
+ Mount a new backend under dirpath
+ """
+ new_ent = clean_listy_filepath(dirpath)
+
+ print "Mounting:", repr(new_ent)
+ already, rem_1, table, rem_2 = self._resolve_to_backend(new_ent, True)
+ print "===", repr(already), repr(rem_1), repr(rem_2), len(table)
+
+ assert (len(rem_2) > 0) or (None not in table), \
+ "That path is already mounted"
+ assert (len(rem_2) > 0) or (len(table) == 0), \
+ "A longer path is already mounted here"
+
+ for part in rem_2:
+ table[part] = {}
+ table = table[part]
+ table[None] = backend
+
+ def _resolve_to_backend(self, filepath, extra_info=False):
+ """
+ extra_info = True is for internal use!
+
+ Normally, returns the backend and the filepath inside that backend.
+
+ With extra_info = True it returns the last directory node and the
+ remaining filepath from there in addition.
+ """
+ table = self.mounttab
+ filepath = filepath[:]
+ res_fp = None
+ while True:
+ new_be = table.get(None)
+ if (new_be is not None) or res_fp is None:
+ res_be = new_be
+ res_fp = filepath[:]
+ res_extra = (table, filepath[:])
+ # print "... New res: %r, %r, %r" % (res_be, res_fp, res_extra)
+ if len(filepath) == 0:
+ break
+ query = filepath.pop(0)
+ entry = table.get(query)
+ if entry is not None:
+ table = entry
+ res_extra = (table, filepath[:])
+ else:
+ break
+ if extra_info:
+ return (res_be, res_fp) + res_extra
+ else:
+ return (res_be, res_fp)
+
+ def resolve_to_backend(self, filepath):
+ backend, filepath = self._resolve_to_backend(filepath)
+ if backend is None:
+ raise Error("Path not mounted")
+ return backend, filepath
+
+ def __repr__(self, table=None, indent=[]):
+ res = []
+ if table is None:
+ res.append("MountStorage<")
+ table = self.mounttab
+ v = table.get(None)
+ if v:
+ res.append(" " * len(indent) + repr(indent) + ": " + repr(v))
+ for k, v in table.iteritems():
+ if k == None:
+ continue
+ res.append(" " * len(indent) + repr(k) + ":")
+ res += self.__repr__(v, indent + [k])
+ if table is self.mounttab:
+ res.append(">")
+ return "\n".join(res)
+ else:
+ return res
+
+ def file_exists(self, filepath):
+ backend, filepath = self.resolve_to_backend(filepath)
+ return backend.file_exists(filepath)
+
+ def get_file(self, filepath, mode='r'):
+ backend, filepath = self.resolve_to_backend(filepath)
+ return backend.get_file(filepath, mode)
+
+ def delete_file(self, filepath):
+ backend, filepath = self.resolve_to_backend(filepath)
+ return backend.delete_file(filepath)
+
+ def file_url(self, filepath):
+ backend, filepath = self.resolve_to_backend(filepath)
+ return backend.file_url(filepath)
+
+ def get_local_path(self, filepath):
+ backend, filepath = self.resolve_to_backend(filepath)
+ return backend.get_local_path(filepath)
+
+ def copy_locally(self, filepath, dest_path):
+ """
+ Need to override copy_locally, because the local_storage
+ attribute is not correct.
+ """
+ backend, filepath = self.resolve_to_backend(filepath)
+ backend.copy_locally(filepath, dest_path)
diff --git a/mediagoblin/tests/test_storage.py b/mediagoblin/tests/test_storage.py
index 9c96f6ca..46ecb2ec 100644
--- a/mediagoblin/tests/test_storage.py
+++ b/mediagoblin/tests/test_storage.py
@@ -52,7 +52,7 @@ class FakeStorageSystem():
self.foobie = foobie
self.blech = blech
-class FakeRemoteStorage(storage.BasicFileStorage):
+class FakeRemoteStorage(storage.filestorage.BasicFileStorage):
# Theoretically despite this, all the methods should work but it
# should force copying to the workbench
local_storage = False
@@ -66,7 +66,7 @@ def test_storage_system_from_config():
'garbage_arg': 'trash'})
assert this_storage.base_url == 'http://example.org/moodia/'
assert this_storage.base_dir == '/tmp/'
- assert this_storage.__class__ is storage.BasicFileStorage
+ assert this_storage.__class__ is storage.filestorage.BasicFileStorage
this_storage = storage.storage_system_from_config(
{'foobie': 'eiboof',
@@ -88,7 +88,7 @@ def get_tmp_filestorage(mount_url=None, fake_remote=False):
if fake_remote:
this_storage = FakeRemoteStorage(tmpdir, mount_url)
else:
- this_storage = storage.BasicFileStorage(tmpdir, mount_url)
+ this_storage = storage.filestorage.BasicFileStorage(tmpdir, mount_url)
return tmpdir, this_storage