6 files changed, 1004 insertions, 0 deletions
diff --git a/mediagoblin/db/mongo/__init__.py b/mediagoblin/db/mongo/__init__.py
new file mode 100644
index 00000000..ba347c69
--- /dev/null
+++ b/mediagoblin/db/mongo/__init__.py
@@ -0,0 +1,15 @@
+# GNU MediaGoblin -- federated, autonomous media hosting
+# Copyright (C) 2011 MediaGoblin contributors.  See AUTHORS.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
diff --git a/mediagoblin/db/mongo/indexes.py b/mediagoblin/db/mongo/indexes.py
new file mode 100644
index 00000000..1dd73f2b
--- /dev/null
+++ b/mediagoblin/db/mongo/indexes.py
@@ -0,0 +1,146 @@
+# GNU MediaGoblin -- federated, autonomous media hosting
+# Copyright (C) 2011 MediaGoblin contributors.  See AUTHORS.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+"""
+Indexes for the local database.
+
+To add new indexes
+------------------
+
+Indexes are recorded in the following format:
+
+ACTIVE_INDEXES = {
+    'collection_name': {
+        'identifier': {  # key identifier used for possibly deprecating later
+            'index': [index_foo_goes_here]}}
+
+... and anything else being parameters to the create_index function
+(including unique=True, etc)
+
+Current indexes must be registered in ACTIVE_INDEXES... deprecated
+indexes should be marked in DEPRECATED_INDEXES.
+
+Remember, ordering of compound indexes MATTERS.  Read below for more.
+
+REQUIRED READING:
+ - http://kylebanker.com/blog/2010/09/21/the-joy-of-mongodb-indexes/
+
+ - http://www.mongodb.org/display/DOCS/Indexes
+ - http://www.mongodb.org/display/DOCS/Indexing+Advice+and+FAQ
+
+
+To remove deprecated indexes
+----------------------------
+
+Removing deprecated indexes is the same, just move the index into the
+deprecated indexes mapping.
+
+DEPRECATED_INDEXES = {
+    'collection_name': {
+        'deprecated_index_identifier1': {
+            'index': [index_foo_goes_here]}}
+        
+... etc.
+
+If an index has been deprecated that identifier should NEVER BE USED
+AGAIN.  Eg, if you previously had 'awesomepants_unique', you shouldn't
+use 'awesomepants_unique' again, you should create a totally new name
+or at worst use 'awesomepants_unique2'.
+"""
+
+from pymongo import ASCENDING, DESCENDING
+
+
+################
+# Active indexes
+################
+ACTIVE_INDEXES = {}
+
+# MediaEntry indexes
+# ------------------
+
+MEDIAENTRY_INDEXES = {
+    'uploader_slug_unique': {
+        # Matching an object to an uploader + slug.
+        # MediaEntries are unique on these two combined, eg:
+        #   /u/${myuser}/m/${myslugname}/
+        'index': [('uploader', ASCENDING),
+                  ('slug', ASCENDING)],
+        'unique': True},
+
+    'created': {
+        # A global index for all media entries created, in descending
+        # order.  This is used for the site's frontpage.
+        'index': [('created', DESCENDING)]},
+
+    'uploader_created': {
+        # Indexing on uploaders and when media entries are created.
+        # Used for showing a user gallery, etc.
+        'index': [('uploader', ASCENDING),
+                  ('created', DESCENDING)]},
+
+    'state_uploader_tags_created': {
+        # Indexing on processed?, media uploader, associated tags, and
+        # timestamp Used for showing media items matching a tag
+        # search, most recent first.
+        'index': [('state', ASCENDING),
+                  ('uploader', ASCENDING),
+                  ('tags.slug', DESCENDING),
+                  ('created', DESCENDING)]},
+
+    'state_tags_created': {
+        # Indexing on processed?, media tags, and timestamp (across all users)
+        # This is used for a front page tag search.
+        'index': [('state', ASCENDING),
+                  ('tags.slug', DESCENDING),
+                  ('created', DESCENDING)]}}
+
+
+ACTIVE_INDEXES['media_entries'] = MEDIAENTRY_INDEXES
+
+
+# User indexes
+# ------------
+
+USER_INDEXES = {
+    'username_unique': {
+        # Index usernames, and make sure they're unique.
+        # ... I guess we might need to adjust this once we're federated :)
+        'index': 'username',
+        'unique': True},
+    'created': {
+        # All most recently created users
+        'index': 'created'}}
+
+
+ACTIVE_INDEXES['users'] = USER_INDEXES
+
+
+# MediaComment indexes
+
+MEDIA_COMMENT_INDEXES = {
+   'mediaentry_created': {
+        'index': [('media_entry', ASCENDING),
+                  ('created', DESCENDING)]}}
+
+ACTIVE_INDEXES['media_comments'] = MEDIA_COMMENT_INDEXES
+
+
+####################
+# Deprecated indexes
+####################
+
+DEPRECATED_INDEXES = {}
diff --git a/mediagoblin/db/mongo/migrations.py b/mediagoblin/db/mongo/migrations.py
new file mode 100644
index 00000000..cf4e94ae
--- /dev/null
+++ b/mediagoblin/db/mongo/migrations.py
@@ -0,0 +1,110 @@
+# GNU MediaGoblin -- federated, autonomous media hosting
+# Copyright (C) 2011 MediaGoblin contributors.  See AUTHORS.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+from mediagoblin.db.mongo.util import RegisterMigration
+from mediagoblin.tools.text import cleaned_markdown_conversion
+
+
+def add_table_field(db, table_name, field_name, default_value):
+    """
+    Add a new field to the table/collection named table_name.
+    The field will have the name field_name and the value default_value
+    """
+    db[table_name].update(
+        {field_name: {'$exists': False}},
+        {'$set': {field_name: default_value}},
+        multi=True)
+
+
+# Please see mediagoblin/tests/test_migrations.py for some examples of
+# basic migrations.
+
+
+@RegisterMigration(1)
+def user_add_bio_html(database):
+    """
+    Users now have richtext bios via Markdown, reflect appropriately.
+    """
+    collection = database['users']
+
+    target = collection.find(
+        {'bio_html': {'$exists': False}})
+
+    for document in target:
+        document['bio_html'] = cleaned_markdown_conversion(
+            document['bio'])
+        collection.save(document)
+
+
+@RegisterMigration(2)
+def mediaentry_mediafiles_main_to_original(database):
+    """
+    Rename "main" media file to "original".
+    """
+    collection = database['media_entries']
+    target = collection.find(
+        {'media_files.main': {'$exists': True}})
+
+    for document in target:
+        original = document['media_files'].pop('main')
+        document['media_files']['original'] = original
+
+        collection.save(document)
+
+
+@RegisterMigration(3)
+def mediaentry_remove_thumbnail_file(database):
+    """
+    Use media_files['thumb'] instead of media_entries['thumbnail_file']
+    """
+    database['media_entries'].update(
+        {'thumbnail_file': {'$exists': True}},
+        {'$unset': {'thumbnail_file': 1}},
+        multi=True)
+
+
+@RegisterMigration(4)
+def mediaentry_add_queued_task_id(database):
+    """
+    Add the 'queued_task_id' field for entries that don't have it.
+    """
+    add_table_field(database, 'media_entries', 'queued_task_id', None)
+
+
+@RegisterMigration(5)
+def mediaentry_add_fail_error_and_metadata(database):
+    """
+    Add 'fail_error' and 'fail_metadata' fields to media entries
+    """
+    add_table_field(database, 'media_entries', 'fail_error', None)
+    add_table_field(database, 'media_entries', 'fail_metadata', {})
+
+
+@RegisterMigration(6)
+def user_add_forgot_password_token_and_expires(database):
+    """
+    Add token and expiration fields to help recover forgotten passwords
+    """
+    add_table_field(database, 'users', 'fp_verification_key', None)
+    add_table_field(database, 'users', 'fp_token_expire', None)
+
+
+@RegisterMigration(7)
+def media_type_image_to_multimedia_type_image(database):
+    database['media_entries'].update(
+        {'media_type': 'image'},
+        {'$set': {'media_type': 'mediagoblin.media_types.image'}},
+        multi=True)
diff --git a/mediagoblin/db/mongo/models.py b/mediagoblin/db/mongo/models.py
new file mode 100644
index 00000000..e085840e
--- /dev/null
+++ b/mediagoblin/db/mongo/models.py
@@ -0,0 +1,363 @@
+# GNU MediaGoblin -- federated, autonomous media hosting
+# Copyright (C) 2011 MediaGoblin contributors.  See AUTHORS.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+import datetime, uuid
+
+from mongokit import Document
+
+from mediagoblin.auth import lib as auth_lib
+from mediagoblin import mg_globals
+from mediagoblin.db import migrations
+from mediagoblin.db.util import ASCENDING, DESCENDING, ObjectId
+from mediagoblin.tools.pagination import Pagination
+from mediagoblin.tools import url, common
+from mediagoblin.tools import licenses
+
+###################
+# Custom validators
+###################
+
+########
+# Models
+########
+
+
+class User(Document):
+    """
+    A user of MediaGoblin.
+
+    Structure:
+     - username: The username of this user, should be unique to this instance.
+     - email: Email address of this user
+     - created: When the user was created
+     - plugin_data: a mapping of extra plugin information for this User.
+       Nothing uses this yet as we don't have plugins, but someday we
+       might... :)
+     - pw_hash: Hashed version of user's password.
+     - email_verified: Whether or not the user has verified their email or not.
+       Most parts of the site are disabled for users who haven't yet.
+     - status: whether or not the user is active, etc.  Currently only has two
+       values, 'needs_email_verification' or 'active'.  (In the future, maybe
+       we'll change this to a boolean with a key of 'active' and have a
+       separate field for a reason the user's been disabled if that's
+       appropriate... email_verified is already separate, after all.)
+     - verification_key: If the user is awaiting email verification, the user
+       will have to provide this key (which will be encoded in the presented
+       URL) in order to confirm their email as active.
+     - is_admin: Whether or not this user is an administrator or not.
+     - url: this user's personal webpage/website, if appropriate.
+     - bio: biography of this user (plaintext, in markdown)
+     - bio_html: biography of the user converted to proper HTML.
+    """
+    __collection__ = 'users'
+
+    structure = {
+        'username': unicode,
+        'email': unicode,
+        'created': datetime.datetime,
+        'plugin_data': dict, # plugins can dump stuff here.
+        'pw_hash': unicode,
+        'email_verified': bool,
+        'status': unicode,
+        'verification_key': unicode,
+        'is_admin': bool,
+        'url' : unicode,
+        'bio' : unicode,     # May contain markdown
+        'bio_html': unicode, # May contain plaintext, or HTML
+        'fp_verification_key': unicode, # forgotten password verification key
+        'fp_token_expire': datetime.datetime
+        }
+
+    required_fields = ['username', 'created', 'pw_hash', 'email']
+
+    default_values = {
+        'created': datetime.datetime.utcnow,
+        'email_verified': False,
+        'status': u'needs_email_verification',
+        'verification_key': lambda: unicode(uuid.uuid4()),
+        'is_admin': False}
+
+    def check_login(self, password):
+        """
+        See if a user can login with this password
+        """
+        return auth_lib.bcrypt_check_password(
+            password, self['pw_hash'])
+
+
+class MediaEntry(Document):
+    """
+    Record of a piece of media.
+
+    Structure:
+     - uploader: A reference to a User who uploaded this.
+
+     - title: Title of this work
+
+     - slug: A normalized "slug" which can be used as part of a URL to retrieve
+       this work, such as 'my-works-name-in-slug-form' may be viewable by
+       'http://mg.example.org/u/username/m/my-works-name-in-slug-form/'
+       Note that since URLs are constructed this way, slugs must be unique
+       per-uploader.  (An index is provided to enforce that but code should be
+       written on the python side to ensure this as well.)
+
+     - created: Date and time of when this piece of work was uploaded.
+
+     - description: Uploader-set description of this work.  This can be marked
+       up with MarkDown for slight fanciness (links, boldness, italics,
+       paragraphs...)
+
+     - description_html: Rendered version of the description, run through
+       Markdown and cleaned with our cleaning tool.
+
+     - media_type: What type of media is this?  Currently we only support
+       'image' ;)
+
+     - media_data: Extra information that's media-format-dependent.
+       For example, images might contain some EXIF data that's not appropriate
+       to other formats.  You might store it like:
+
+         mediaentry['media_data']['exif'] = {
+             'manufacturer': 'CASIO',
+             'model': 'QV-4000',
+             'exposure_time': .659}
+
+       Alternately for video you might store:
+
+         # play length in seconds
+         mediaentry['media_data']['play_length'] = 340
+
+       ... so what's appropriate here really depends on the media type.
+
+     - plugin_data: a mapping of extra plugin information for this User.
+       Nothing uses this yet as we don't have plugins, but someday we
+       might... :)
+
+     - tags: A list of tags.  Each tag is stored as a dictionary that has a key
+       for the actual name and the normalized name-as-slug, so ultimately this
+       looks like:
+         [{'name': 'Gully Gardens',
+           'slug': 'gully-gardens'},
+          {'name': 'Castle Adventure Time?!",
+           'slug': 'castle-adventure-time'}]
+
+     - state: What's the state of this file?  Active, inactive, disabled, etc...
+       But really for now there are only two states:
+        "unprocessed": uploaded but needs to go through processing for display
+        "processed": processed and able to be displayed
+
+     - license: URI for entry's license
+
+     - queued_media_file: storage interface style filepath describing a file
+       queued for processing.  This is stored in the mg_globals.queue_store
+       storage system.
+
+     - queued_task_id: celery task id.  Use this to fetch the task state.
+
+     - media_files: Files relevant to this that have actually been processed
+       and are available for various types of display.  Stored like:
+         {'thumb': ['dir1', 'dir2', 'pic.png'}
+
+     - attachment_files: A list of "attachment" files, ones that aren't
+       critical to this piece of media but may be usefully relevant to people
+       viewing the work.  (currently unused.)
+
+     - fail_error: path to the exception raised 
+     - fail_metadata: 
+
+    """
+    __collection__ = 'media_entries'
+
+    structure = {
+        'uploader': ObjectId,
+        'title': unicode,
+        'slug': unicode,
+        'created': datetime.datetime,
+        'description': unicode, # May contain markdown/up
+        'description_html': unicode, # May contain plaintext, or HTML
+        'media_type': unicode,
+        'media_data': dict, # extra data relevant to this media_type
+        'plugin_data': dict, # plugins can dump stuff here.
+        'tags': [dict],
+        'state': unicode,
+        'license': unicode, # License URI
+
+        # For now let's assume there can only be one main file queued
+        # at a time
+        'queued_media_file': [unicode],
+        'queued_task_id': unicode,
+
+        # A dictionary of logical names to filepaths
+        'media_files': dict,
+
+        # The following should be lists of lists, in appropriate file
+        # record form
+        'attachment_files': list,
+
+        # If things go badly in processing things, we'll store that
+        # data here
+        'fail_error': unicode,
+        'fail_metadata': dict}
+
+    required_fields = [
+        'uploader', 'created', 'media_type', 'slug']
+
+    default_values = {
+        'created': datetime.datetime.utcnow,
+        'state': u'unprocessed'}
+
+    def get_comments(self):
+        return self.db.MediaComment.find({
+                'media_entry': self['_id']}).sort('created', DESCENDING)
+
+    def get_display_media(self, media_map, fetch_order=common.DISPLAY_IMAGE_FETCHING_ORDER):
+        """
+        Find the best media for display.
+
+        Args:
+        - media_map: a dict like
+          {u'image_size': [u'dir1', u'dir2', u'image.jpg']}
+        - fetch_order: the order we should try fetching images in
+
+        Returns:
+        (media_size, media_path)
+        """
+        media_sizes = media_map.keys()
+
+        for media_size in common.DISPLAY_IMAGE_FETCHING_ORDER:
+            if media_size in media_sizes:
+                return media_map[media_size]
+
+    def main_mediafile(self):
+        pass
+
+    def generate_slug(self):
+        self['slug'] = url.slugify(self['title'])
+
+        duplicate = mg_globals.database.media_entries.find_one(
+            {'slug': self['slug']})
+
+        if duplicate:
+            self['slug'] = "%s-%s" % (self['_id'], self['slug'])
+
+    def url_for_self(self, urlgen):
+        """
+        Generate an appropriate url for ourselves
+
+        Use a slug if we have one, else use our '_id'.
+        """
+        uploader = self.uploader()
+
+        if self.get('slug'):
+            return urlgen(
+                'mediagoblin.user_pages.media_home',
+                user=uploader['username'],
+                media=self['slug'])
+        else:
+            return urlgen(
+                'mediagoblin.user_pages.media_home',
+                user=uploader['username'],
+                media=unicode(self['_id']))
+
+    def url_to_prev(self, urlgen):
+        """
+        Provide a url to the previous entry from this user, if there is one
+        """
+        cursor = self.db.MediaEntry.find({'_id' : {"$gt": self['_id']},
+                                          'uploader': self['uploader'],
+                                          'state': 'processed'}).sort(
+                                                    '_id', ASCENDING).limit(1)
+        if cursor.count():
+            return urlgen('mediagoblin.user_pages.media_home',
+                          user=self.uploader()['username'],
+                          media=unicode(cursor[0]['slug']))
+
+    def url_to_next(self, urlgen):
+        """
+        Provide a url to the next entry from this user, if there is one
+        """
+        cursor = self.db.MediaEntry.find({'_id' : {"$lt": self['_id']},
+                                          'uploader': self['uploader'],
+                                          'state': 'processed'}).sort(
+                                                    '_id', DESCENDING).limit(1)
+
+        if cursor.count():
+            return urlgen('mediagoblin.user_pages.media_home',
+                          user=self.uploader()['username'],
+                          media=unicode(cursor[0]['slug']))
+
+    def uploader(self):
+        return self.db.User.find_one({'_id': self['uploader']})
+
+    def get_fail_exception(self):
+        """
+        Get the exception that's appropriate for this error
+        """
+        if self['fail_error']:
+            return common.import_component(self['fail_error'])
+
+    def get_license_data(self):
+        """Return license dict for requested license"""
+        return licenses.SUPPORTED_LICENSES[self['license']]
+
+
+class MediaComment(Document):
+    """
+    A comment on a MediaEntry.
+
+    Structure:
+     - media_entry: The media entry this comment is attached to
+     - author: user who posted this comment
+     - created: when the comment was created
+     - content: plaintext (but markdown'able) version of the comment's content.
+     - content_html: the actual html-rendered version of the comment displayed.
+       Run through Markdown and the HTML cleaner.
+    """
+
+    __collection__ = 'media_comments'
+
+    structure = {
+        'media_entry': ObjectId,
+        'author': ObjectId,
+        'created': datetime.datetime,
+        'content': unicode,
+        'content_html': unicode}
+
+    required_fields = [
+        'media_entry', 'author', 'created', 'content']
+
+    default_values = {
+        'created': datetime.datetime.utcnow}
+
+    def media_entry(self):
+        return self.db.MediaEntry.find_one({'_id': self['media_entry']})
+
+    def author(self):
+        return self.db.User.find_one({'_id': self['author']})
+
+
+REGISTER_MODELS = [
+    MediaEntry,
+    User,
+    MediaComment]
+
+
+def register_models(connection):
+    """
+    Register all models in REGISTER_MODELS with this connection.
+    """
+    connection.register(REGISTER_MODELS)
+
diff --git a/mediagoblin/db/mongo/open.py b/mediagoblin/db/mongo/open.py
new file mode 100644
index 00000000..48c909d9
--- /dev/null
+++ b/mediagoblin/db/mongo/open.py
@@ -0,0 +1,78 @@
+# GNU MediaGoblin -- federated, autonomous media hosting
+# Copyright (C) 2011 MediaGoblin contributors.  See AUTHORS.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+import pymongo
+import mongokit
+from paste.deploy.converters import asint
+from mediagoblin.db.mongo import models
+from mediagoblin.db.mongo.util import MigrationManager
+
+
+def connect_database_from_config(app_config, use_pymongo=False):
+    """
+    Connect to the main database, take config from app_config
+
+    Optionally use pymongo instead of mongokit for the connection.
+    """
+    port = app_config.get('db_port')
+    if port:
+        port = asint(port)
+
+    if use_pymongo:
+        connection = pymongo.Connection(
+            app_config.get('db_host'), port)
+    else:
+        connection = mongokit.Connection(
+            app_config.get('db_host'), port)
+    return connection
+
+
+def setup_connection_and_db_from_config(app_config, use_pymongo=False):
+    """
+    Setup connection and database from config.
+
+    Optionally use pymongo instead of mongokit.
+    """
+    connection = connect_database_from_config(app_config, use_pymongo)
+    database_path = app_config['db_name']
+    db = connection[database_path]
+
+    if not use_pymongo:
+        models.register_models(connection)
+
+    return (connection, db)
+
+
+def check_db_migrations_current(db):
+    # This MUST be imported so as to set up the appropriate migrations!
+    from mediagoblin.db.mongo import migrations
+
+    # Init the migration number if necessary
+    migration_manager = MigrationManager(db)
+    migration_manager.install_migration_version_if_missing()
+
+    # Tiny hack to warn user if our migration is out of date
+    if not migration_manager.database_at_latest_migration():
+        db_migration_num = migration_manager.database_current_migration()
+        latest_migration_num = migration_manager.latest_migration()
+        if db_migration_num < latest_migration_num:
+            print (
+                "*WARNING:* Your migrations are out of date, "
+                "maybe run ./bin/gmg migrate?")
+        elif db_migration_num > latest_migration_num:
+            print (
+                "*WARNING:* Your migrations are out of date... "
+                "in fact they appear to be from the future?!")
diff --git a/mediagoblin/db/mongo/util.py b/mediagoblin/db/mongo/util.py
new file mode 100644
index 00000000..e2065693
--- /dev/null
+++ b/mediagoblin/db/mongo/util.py
@@ -0,0 +1,292 @@
+# GNU MediaGoblin -- federated, autonomous media hosting
+# Copyright (C) 2011 MediaGoblin contributors.  See AUTHORS.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+"""
+Utilities for database operations.
+
+Some note on migration and indexing tools:
+
+We store information about what the state of the database is in the
+'mediagoblin' document of the 'app_metadata' collection.  Keys in that
+document relevant to here:
+
+ - 'migration_number': The integer representing the current state of
+   the migrations
+"""
+
+import copy
+
+# Imports that other modules might use
+from pymongo import ASCENDING, DESCENDING
+from pymongo.errors import InvalidId
+from mongokit import ObjectId
+
+from mediagoblin.db.mongo.indexes import ACTIVE_INDEXES, DEPRECATED_INDEXES
+
+
+################
+# Indexing tools
+################
+
+
+def add_new_indexes(database, active_indexes=ACTIVE_INDEXES):
+    """
+    Add any new indexes to the database.
+
+    Args:
+     - database: pymongo or mongokit database instance.
+     - active_indexes: indexes to possibly add in the pattern of:
+       {'collection_name': {
+            'identifier': {
+                'index': [index_foo_goes_here],
+                'unique': True}}
+       where 'index' is the index to add and all other options are
+       arguments for collection.create_index.
+
+    Returns:
+      A list of indexes added in form ('collection', 'index_name')
+    """
+    indexes_added = []
+
+    for collection_name, indexes in active_indexes.iteritems():
+        collection = database[collection_name]
+        collection_indexes = collection.index_information().keys()
+
+        for index_name, index_data in indexes.iteritems():
+            if not index_name in collection_indexes:
+                # Get a copy actually so we don't modify the actual
+                # structure
+                index_data = copy.copy(index_data)
+                index = index_data.pop('index')
+                collection.create_index(
+                    index, name=index_name, **index_data)
+
+                indexes_added.append((collection_name, index_name))
+
+    return indexes_added
+
+
+def remove_deprecated_indexes(database, deprecated_indexes=DEPRECATED_INDEXES):
+    """
+    Remove any deprecated indexes from the database.
+
+    Args:
+     - database: pymongo or mongokit database instance.
+     - deprecated_indexes: the indexes to deprecate in the pattern of:
+       {'collection_name': {
+            'identifier': {
+                'index': [index_foo_goes_here],
+                'unique': True}}
+
+       (... although we really only need the 'identifier' here, as the
+       rest of the information isn't used in this case.  But it's kept
+       around so we can remember what it was)
+
+    Returns:
+      A list of indexes removed in form ('collection', 'index_name')
+    """
+    indexes_removed = []
+
+    for collection_name, indexes in deprecated_indexes.iteritems():
+        collection = database[collection_name]
+        collection_indexes = collection.index_information().keys()
+
+        for index_name, index_data in indexes.iteritems():
+            if index_name in collection_indexes:
+                collection.drop_index(index_name)
+
+                indexes_removed.append((collection_name, index_name))
+
+    return indexes_removed
+
+
+#################
+# Migration tools
+#################
+
+# The default migration registry...
+#
+# Don't set this yourself!  RegisterMigration will automatically fill
+# this with stuff via decorating methods in migrations.py
+
+class MissingCurrentMigration(Exception):
+    pass
+
+
+MIGRATIONS = {}
+
+
+class RegisterMigration(object):
+    """
+    Tool for registering migrations
+
+    Call like:
+
+    @RegisterMigration(33)
+    def update_dwarves(database):
+        [...]
+
+    This will register your migration with the default migration
+    registry.  Alternately, to specify a very specific
+    migration_registry, you can pass in that as the second argument.
+
+    Note, the number of your migration should NEVER be 0 or less than
+    0.  0 is the default "no migrations" state!
+    """
+    def __init__(self, migration_number, migration_registry=MIGRATIONS):
+        assert migration_number > 0, "Migration number must be > 0!"
+        assert migration_number not in migration_registry, \
+            "Duplicate migration numbers detected!  That's not allowed!"
+
+        self.migration_number = migration_number
+        self.migration_registry = migration_registry
+
+    def __call__(self, migration):
+        self.migration_registry[self.migration_number] = migration
+        return migration
+
+
+class MigrationManager(object):
+    """
+    Migration handling tool.
+
+    Takes information about a database, lets you update the database
+    to the latest migrations, etc.
+    """
+    def __init__(self, database, migration_registry=MIGRATIONS):
+        """
+        Args:
+         - database: database we're going to migrate
+         - migration_registry: where we should find all migrations to
+           run
+        """
+        self.database = database
+        self.migration_registry = migration_registry
+        self._sorted_migrations = None
+
+    def _ensure_current_migration_record(self):
+        """
+        If there isn't a database[u'app_metadata'] mediagoblin entry
+        with the 'current_migration', throw an error.
+        """
+        if self.database_current_migration() is None:
+            raise MissingCurrentMigration(
+                "Tried to call function which requires "
+                "'current_migration' set in database")
+
+    @property
+    def sorted_migrations(self):
+        """
+        Sort migrations if necessary and store in self._sorted_migrations
+        """
+        if not self._sorted_migrations:
+            self._sorted_migrations = sorted(
+                self.migration_registry.items(),
+                # sort on the key... the migration number
+                key=lambda migration_tuple: migration_tuple[0])
+
+        return self._sorted_migrations
+
+    def latest_migration(self):
+        """
+        Return a migration number for the latest migration, or 0 if
+        there are no migrations.
+        """
+        if self.sorted_migrations:
+            return self.sorted_migrations[-1][0]
+        else:
+            # If no migrations have been set, we start at 0.
+            return 0
+
+    def set_current_migration(self, migration_number):
+        """
+        Set the migration in the database to migration_number
+        """
+        # Add the mediagoblin migration if necessary
+        self.database[u'app_metadata'].update(
+            {u'_id': u'mediagoblin'},
+            {u'$set': {u'current_migration': migration_number}},
+            upsert=True)
+
+    def install_migration_version_if_missing(self):
+        """
+        Sets the migration to the latest version if no migration
+        version at all is set.
+        """
+        mgoblin_metadata = self.database[u'app_metadata'].find_one(
+            {u'_id': u'mediagoblin'})
+        if not mgoblin_metadata:
+            latest_migration = self.latest_migration()
+            self.set_current_migration(latest_migration)
+
+    def database_current_migration(self):
+        """
+        Return the current migration in the database.
+        """
+        mgoblin_metadata = self.database[u'app_metadata'].find_one(
+            {u'_id': u'mediagoblin'})
+        if not mgoblin_metadata:
+            return None
+        else:
+            return mgoblin_metadata[u'current_migration']
+
+    def database_at_latest_migration(self):
+        """
+        See if the database is at the latest migration.
+        Returns a boolean.
+        """
+        current_migration = self.database_current_migration()
+        return current_migration == self.latest_migration()
+
+    def migrations_to_run(self):
+        """
+        Get a list of migrations to run still, if any.
+        
+        Note that calling this will set your migration version to the
+        latest version if it isn't installed to anything yet!
+        """
+        self._ensure_current_migration_record()
+
+        db_current_migration = self.database_current_migration()
+
+        return [
+            (migration_number, migration_func)
+            for migration_number, migration_func in self.sorted_migrations
+            if migration_number > db_current_migration]
+
+    def migrate_new(self, pre_callback=None, post_callback=None):
+        """
+        Run all migrations.
+
+        Includes two optional args:
+         - pre_callback: if called, this is a callback on something to
+           run pre-migration.  Takes (migration_number, migration_func)
+           as arguments
+         - pre_callback: if called, this is a callback on something to
+           run post-migration.  Takes (migration_number, migration_func)
+           as arguments
+        """
+        # If we aren't set to any version number, presume we're at the
+        # latest (which means we'll do nothing here...)
+        self.install_migration_version_if_missing()
+
+        for migration_number, migration_func in self.migrations_to_run():
+            if pre_callback:
+                pre_callback(migration_number, migration_func)
+            migration_func(self.database)
+            self.set_current_migration(migration_number)
+            if post_callback:
+                post_callback(migration_number, migration_func)