diff options
Diffstat (limited to 'mediagoblin/db/mongo')
| -rw-r--r-- | mediagoblin/db/mongo/__init__.py | 15 | ||||
| -rw-r--r-- | mediagoblin/db/mongo/indexes.py | 146 | ||||
| -rw-r--r-- | mediagoblin/db/mongo/migrations.py | 110 | ||||
| -rw-r--r-- | mediagoblin/db/mongo/models.py | 359 | ||||
| -rw-r--r-- | mediagoblin/db/mongo/open.py | 78 | ||||
| -rw-r--r-- | mediagoblin/db/mongo/util.py | 292 | 
6 files changed, 1000 insertions, 0 deletions
| diff --git a/mediagoblin/db/mongo/__init__.py b/mediagoblin/db/mongo/__init__.py new file mode 100644 index 00000000..ba347c69 --- /dev/null +++ b/mediagoblin/db/mongo/__init__.py @@ -0,0 +1,15 @@ +# GNU MediaGoblin -- federated, autonomous media hosting +# Copyright (C) 2011 MediaGoblin contributors.  See AUTHORS. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program.  If not, see <http://www.gnu.org/licenses/>. diff --git a/mediagoblin/db/mongo/indexes.py b/mediagoblin/db/mongo/indexes.py new file mode 100644 index 00000000..1dd73f2b --- /dev/null +++ b/mediagoblin/db/mongo/indexes.py @@ -0,0 +1,146 @@ +# GNU MediaGoblin -- federated, autonomous media hosting +# Copyright (C) 2011 MediaGoblin contributors.  See AUTHORS. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program.  If not, see <http://www.gnu.org/licenses/>. + +""" +Indexes for the local database. + +To add new indexes +------------------ + +Indexes are recorded in the following format: + +ACTIVE_INDEXES = { +    'collection_name': { +        'identifier': {  # key identifier used for possibly deprecating later +            'index': [index_foo_goes_here]}} + +... and anything else being parameters to the create_index function +(including unique=True, etc) + +Current indexes must be registered in ACTIVE_INDEXES... deprecated +indexes should be marked in DEPRECATED_INDEXES. + +Remember, ordering of compound indexes MATTERS.  Read below for more. + +REQUIRED READING: + - http://kylebanker.com/blog/2010/09/21/the-joy-of-mongodb-indexes/ + + - http://www.mongodb.org/display/DOCS/Indexes + - http://www.mongodb.org/display/DOCS/Indexing+Advice+and+FAQ + + +To remove deprecated indexes +---------------------------- + +Removing deprecated indexes is the same, just move the index into the +deprecated indexes mapping. + +DEPRECATED_INDEXES = { +    'collection_name': { +        'deprecated_index_identifier1': { +            'index': [index_foo_goes_here]}} +         +... etc. + +If an index has been deprecated that identifier should NEVER BE USED +AGAIN.  Eg, if you previously had 'awesomepants_unique', you shouldn't +use 'awesomepants_unique' again, you should create a totally new name +or at worst use 'awesomepants_unique2'. +""" + +from pymongo import ASCENDING, DESCENDING + + +################ +# Active indexes +################ +ACTIVE_INDEXES = {} + +# MediaEntry indexes +# ------------------ + +MEDIAENTRY_INDEXES = { +    'uploader_slug_unique': { +        # Matching an object to an uploader + slug. +        # MediaEntries are unique on these two combined, eg: +        #   /u/${myuser}/m/${myslugname}/ +        'index': [('uploader', ASCENDING), +                  ('slug', ASCENDING)], +        'unique': True}, + +    'created': { +        # A global index for all media entries created, in descending +        # order.  This is used for the site's frontpage. +        'index': [('created', DESCENDING)]}, + +    'uploader_created': { +        # Indexing on uploaders and when media entries are created. +        # Used for showing a user gallery, etc. +        'index': [('uploader', ASCENDING), +                  ('created', DESCENDING)]}, + +    'state_uploader_tags_created': { +        # Indexing on processed?, media uploader, associated tags, and +        # timestamp Used for showing media items matching a tag +        # search, most recent first. +        'index': [('state', ASCENDING), +                  ('uploader', ASCENDING), +                  ('tags.slug', DESCENDING), +                  ('created', DESCENDING)]}, + +    'state_tags_created': { +        # Indexing on processed?, media tags, and timestamp (across all users) +        # This is used for a front page tag search. +        'index': [('state', ASCENDING), +                  ('tags.slug', DESCENDING), +                  ('created', DESCENDING)]}} + + +ACTIVE_INDEXES['media_entries'] = MEDIAENTRY_INDEXES + + +# User indexes +# ------------ + +USER_INDEXES = { +    'username_unique': { +        # Index usernames, and make sure they're unique. +        # ... I guess we might need to adjust this once we're federated :) +        'index': 'username', +        'unique': True}, +    'created': { +        # All most recently created users +        'index': 'created'}} + + +ACTIVE_INDEXES['users'] = USER_INDEXES + + +# MediaComment indexes + +MEDIA_COMMENT_INDEXES = { +   'mediaentry_created': { +        'index': [('media_entry', ASCENDING), +                  ('created', DESCENDING)]}} + +ACTIVE_INDEXES['media_comments'] = MEDIA_COMMENT_INDEXES + + +#################### +# Deprecated indexes +#################### + +DEPRECATED_INDEXES = {} diff --git a/mediagoblin/db/mongo/migrations.py b/mediagoblin/db/mongo/migrations.py new file mode 100644 index 00000000..cf4e94ae --- /dev/null +++ b/mediagoblin/db/mongo/migrations.py @@ -0,0 +1,110 @@ +# GNU MediaGoblin -- federated, autonomous media hosting +# Copyright (C) 2011 MediaGoblin contributors.  See AUTHORS. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program.  If not, see <http://www.gnu.org/licenses/>. + +from mediagoblin.db.mongo.util import RegisterMigration +from mediagoblin.tools.text import cleaned_markdown_conversion + + +def add_table_field(db, table_name, field_name, default_value): +    """ +    Add a new field to the table/collection named table_name. +    The field will have the name field_name and the value default_value +    """ +    db[table_name].update( +        {field_name: {'$exists': False}}, +        {'$set': {field_name: default_value}}, +        multi=True) + + +# Please see mediagoblin/tests/test_migrations.py for some examples of +# basic migrations. + + +@RegisterMigration(1) +def user_add_bio_html(database): +    """ +    Users now have richtext bios via Markdown, reflect appropriately. +    """ +    collection = database['users'] + +    target = collection.find( +        {'bio_html': {'$exists': False}}) + +    for document in target: +        document['bio_html'] = cleaned_markdown_conversion( +            document['bio']) +        collection.save(document) + + +@RegisterMigration(2) +def mediaentry_mediafiles_main_to_original(database): +    """ +    Rename "main" media file to "original". +    """ +    collection = database['media_entries'] +    target = collection.find( +        {'media_files.main': {'$exists': True}}) + +    for document in target: +        original = document['media_files'].pop('main') +        document['media_files']['original'] = original + +        collection.save(document) + + +@RegisterMigration(3) +def mediaentry_remove_thumbnail_file(database): +    """ +    Use media_files['thumb'] instead of media_entries['thumbnail_file'] +    """ +    database['media_entries'].update( +        {'thumbnail_file': {'$exists': True}}, +        {'$unset': {'thumbnail_file': 1}}, +        multi=True) + + +@RegisterMigration(4) +def mediaentry_add_queued_task_id(database): +    """ +    Add the 'queued_task_id' field for entries that don't have it. +    """ +    add_table_field(database, 'media_entries', 'queued_task_id', None) + + +@RegisterMigration(5) +def mediaentry_add_fail_error_and_metadata(database): +    """ +    Add 'fail_error' and 'fail_metadata' fields to media entries +    """ +    add_table_field(database, 'media_entries', 'fail_error', None) +    add_table_field(database, 'media_entries', 'fail_metadata', {}) + + +@RegisterMigration(6) +def user_add_forgot_password_token_and_expires(database): +    """ +    Add token and expiration fields to help recover forgotten passwords +    """ +    add_table_field(database, 'users', 'fp_verification_key', None) +    add_table_field(database, 'users', 'fp_token_expire', None) + + +@RegisterMigration(7) +def media_type_image_to_multimedia_type_image(database): +    database['media_entries'].update( +        {'media_type': 'image'}, +        {'$set': {'media_type': 'mediagoblin.media_types.image'}}, +        multi=True) diff --git a/mediagoblin/db/mongo/models.py b/mediagoblin/db/mongo/models.py new file mode 100644 index 00000000..5de59c12 --- /dev/null +++ b/mediagoblin/db/mongo/models.py @@ -0,0 +1,359 @@ +# GNU MediaGoblin -- federated, autonomous media hosting +# Copyright (C) 2011 MediaGoblin contributors.  See AUTHORS. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program.  If not, see <http://www.gnu.org/licenses/>. + +import datetime + +from mongokit import Document + +from mediagoblin.auth import lib as auth_lib +from mediagoblin import mg_globals +from mediagoblin.db.mongo import migrations +from mediagoblin.db.mongo.util import ASCENDING, DESCENDING, ObjectId +from mediagoblin.tools.pagination import Pagination +from mediagoblin.tools import url, common + +################### +# Custom validators +################### + +######## +# Models +######## + + +class User(Document): +    """ +    A user of MediaGoblin. + +    Structure: +     - username: The username of this user, should be unique to this instance. +     - email: Email address of this user +     - created: When the user was created +     - plugin_data: a mapping of extra plugin information for this User. +       Nothing uses this yet as we don't have plugins, but someday we +       might... :) +     - pw_hash: Hashed version of user's password. +     - email_verified: Whether or not the user has verified their email or not. +       Most parts of the site are disabled for users who haven't yet. +     - status: whether or not the user is active, etc.  Currently only has two +       values, 'needs_email_verification' or 'active'.  (In the future, maybe +       we'll change this to a boolean with a key of 'active' and have a +       separate field for a reason the user's been disabled if that's +       appropriate... email_verified is already separate, after all.) +     - verification_key: If the user is awaiting email verification, the user +       will have to provide this key (which will be encoded in the presented +       URL) in order to confirm their email as active. +     - is_admin: Whether or not this user is an administrator or not. +     - url: this user's personal webpage/website, if appropriate. +     - bio: biography of this user (plaintext, in markdown) +     - bio_html: biography of the user converted to proper HTML. +    """ +    __collection__ = 'users' +    use_dot_notation = True + +    structure = { +        'username': unicode, +        'email': unicode, +        'created': datetime.datetime, +        'plugin_data': dict,  # plugins can dump stuff here. +        'pw_hash': unicode, +        'email_verified': bool, +        'status': unicode, +        'verification_key': unicode, +        'is_admin': bool, +        'url': unicode, +        'bio': unicode,      # May contain markdown +        'bio_html': unicode,  # May contain plaintext, or HTML +        'fp_verification_key': unicode,  # forgotten password verification key +        'fp_token_expire': datetime.datetime, +        } + +    required_fields = ['username', 'created', 'pw_hash', 'email'] + +    default_values = { +        'created': datetime.datetime.utcnow, +        'email_verified': False, +        'status': u'needs_email_verification', +        'is_admin': False} + +    def check_login(self, password): +        """ +        See if a user can login with this password +        """ +        return auth_lib.bcrypt_check_password( +            password, self.pw_hash) + + +class MediaEntry(Document): +    """ +    Record of a piece of media. + +    Structure: +     - uploader: A reference to a User who uploaded this. + +     - title: Title of this work + +     - slug: A normalized "slug" which can be used as part of a URL to retrieve +       this work, such as 'my-works-name-in-slug-form' may be viewable by +       'http://mg.example.org/u/username/m/my-works-name-in-slug-form/' +       Note that since URLs are constructed this way, slugs must be unique +       per-uploader.  (An index is provided to enforce that but code should be +       written on the python side to ensure this as well.) + +     - created: Date and time of when this piece of work was uploaded. + +     - description: Uploader-set description of this work.  This can be marked +       up with MarkDown for slight fanciness (links, boldness, italics, +       paragraphs...) + +     - description_html: Rendered version of the description, run through +       Markdown and cleaned with our cleaning tool. + +     - media_type: What type of media is this?  Currently we only support +       'image' ;) + +     - media_data: Extra information that's media-format-dependent. +       For example, images might contain some EXIF data that's not appropriate +       to other formats.  You might store it like: + +         mediaentry.media_data['exif'] = { +             'manufacturer': 'CASIO', +             'model': 'QV-4000', +             'exposure_time': .659} + +       Alternately for video you might store: + +         # play length in seconds +         mediaentry.media_data['play_length'] = 340 + +       ... so what's appropriate here really depends on the media type. + +     - plugin_data: a mapping of extra plugin information for this User. +       Nothing uses this yet as we don't have plugins, but someday we +       might... :) + +     - tags: A list of tags.  Each tag is stored as a dictionary that has a key +       for the actual name and the normalized name-as-slug, so ultimately this +       looks like: +         [{'name': 'Gully Gardens', +           'slug': 'gully-gardens'}, +          {'name': 'Castle Adventure Time?!", +           'slug': 'castle-adventure-time'}] + +     - state: What's the state of this file?  Active, inactive, disabled, etc... +       But really for now there are only two states: +        "unprocessed": uploaded but needs to go through processing for display +        "processed": processed and able to be displayed + +     - queued_media_file: storage interface style filepath describing a file +       queued for processing.  This is stored in the mg_globals.queue_store +       storage system. + +     - queued_task_id: celery task id.  Use this to fetch the task state. + +     - media_files: Files relevant to this that have actually been processed +       and are available for various types of display.  Stored like: +         {'thumb': ['dir1', 'dir2', 'pic.png'} + +     - attachment_files: A list of "attachment" files, ones that aren't +       critical to this piece of media but may be usefully relevant to people +       viewing the work.  (currently unused.) + +     - fail_error: path to the exception raised +     - fail_metadata: +    """ +    __collection__ = 'media_entries' +    use_dot_notation = True + +    structure = { +        'uploader': ObjectId, +        'title': unicode, +        'slug': unicode, +        'created': datetime.datetime, +        'description': unicode,  # May contain markdown/up +        'description_html': unicode,  # May contain plaintext, or HTML +        'media_type': unicode, +        'media_data': dict,  # extra data relevant to this media_type +        'plugin_data': dict,  # plugins can dump stuff here. +        'tags': [dict], +        'state': unicode, + +        # For now let's assume there can only be one main file queued +        # at a time +        'queued_media_file': [unicode], +        'queued_task_id': unicode, + +        # A dictionary of logical names to filepaths +        'media_files': dict, + +        # The following should be lists of lists, in appropriate file +        # record form +        'attachment_files': list, + +        # If things go badly in processing things, we'll store that +        # data here +        'fail_error': unicode, +        'fail_metadata': dict} + +    required_fields = [ +        'uploader', 'created', 'media_type', 'slug'] + +    default_values = { +        'created': datetime.datetime.utcnow, +        'state': u'unprocessed'} + +    def get_comments(self, ascending=False): +        if ascending: +            order = ASCENDING +        else: +            order = DESCENDING +             +        return self.db.MediaComment.find({ +                'media_entry': self._id}).sort('created', order) + +    def get_display_media(self, media_map, +                          fetch_order=common.DISPLAY_IMAGE_FETCHING_ORDER): +        """ +        Find the best media for display. + +        Args: +        - media_map: a dict like +          {u'image_size': [u'dir1', u'dir2', u'image.jpg']} +        - fetch_order: the order we should try fetching images in + +        Returns: +        (media_size, media_path) +        """ +        media_sizes = media_map.keys() + +        for media_size in common.DISPLAY_IMAGE_FETCHING_ORDER: +            if media_size in media_sizes: +                return media_map[media_size] + +    def main_mediafile(self): +        pass + +    def generate_slug(self): +        self.slug = url.slugify(self.title) + +        duplicate = mg_globals.database.media_entries.find_one( +            {'slug': self.slug}) + +        if duplicate: +            self.slug = "%s-%s" % (self._id, self.slug) + +    def url_for_self(self, urlgen): +        """ +        Generate an appropriate url for ourselves + +        Use a slug if we have one, else use our '_id'. +        """ +        uploader = self.get_uploader + +        if self.get('slug'): +            return urlgen( +                'mediagoblin.user_pages.media_home', +                user=uploader.username, +                media=self.slug) +        else: +            return urlgen( +                'mediagoblin.user_pages.media_home', +                user=uploader.username, +                media=unicode(self._id)) + +    def url_to_prev(self, urlgen): +        """ +        Provide a url to the previous entry from this user, if there is one +        """ +        cursor = self.db.MediaEntry.find({'_id': {"$gt": self._id}, +                                          'uploader': self.uploader, +                                          'state': 'processed'}).sort( +                                                    '_id', ASCENDING).limit(1) +        for media in cursor: +            return media.url_for_self(urlgen) + +    def url_to_next(self, urlgen): +        """ +        Provide a url to the next entry from this user, if there is one +        """ +        cursor = self.db.MediaEntry.find({'_id': {"$lt": self._id}, +                                          'uploader': self.uploader, +                                          'state': 'processed'}).sort( +                                                    '_id', DESCENDING).limit(1) + +        for media in cursor: +            return media.url_for_self(urlgen) + +    @property +    def get_uploader(self): +        return self.db.User.find_one({'_id': self.uploader}) + +    def get_fail_exception(self): +        """ +        Get the exception that's appropriate for this error +        """ +        if self['fail_error']: +            return common.import_component(self['fail_error']) + + +class MediaComment(Document): +    """ +    A comment on a MediaEntry. + +    Structure: +     - media_entry: The media entry this comment is attached to +     - author: user who posted this comment +     - created: when the comment was created +     - content: plaintext (but markdown'able) version of the comment's content. +     - content_html: the actual html-rendered version of the comment displayed. +       Run through Markdown and the HTML cleaner. +    """ + +    __collection__ = 'media_comments' +    use_dot_notation = True + +    structure = { +        'media_entry': ObjectId, +        'author': ObjectId, +        'created': datetime.datetime, +        'content': unicode, +        'content_html': unicode} + +    required_fields = [ +        'media_entry', 'author', 'created', 'content'] + +    default_values = { +        'created': datetime.datetime.utcnow} + +    def media_entry(self): +        return self.db.MediaEntry.find_one({'_id': self['media_entry']}) + +    @property +    def get_author(self): +        return self.db.User.find_one({'_id': self['author']}) + + +REGISTER_MODELS = [ +    MediaEntry, +    User, +    MediaComment] + + +def register_models(connection): +    """ +    Register all models in REGISTER_MODELS with this connection. +    """ +    connection.register(REGISTER_MODELS) diff --git a/mediagoblin/db/mongo/open.py b/mediagoblin/db/mongo/open.py new file mode 100644 index 00000000..48c909d9 --- /dev/null +++ b/mediagoblin/db/mongo/open.py @@ -0,0 +1,78 @@ +# GNU MediaGoblin -- federated, autonomous media hosting +# Copyright (C) 2011 MediaGoblin contributors.  See AUTHORS. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program.  If not, see <http://www.gnu.org/licenses/>. + +import pymongo +import mongokit +from paste.deploy.converters import asint +from mediagoblin.db.mongo import models +from mediagoblin.db.mongo.util import MigrationManager + + +def connect_database_from_config(app_config, use_pymongo=False): +    """ +    Connect to the main database, take config from app_config + +    Optionally use pymongo instead of mongokit for the connection. +    """ +    port = app_config.get('db_port') +    if port: +        port = asint(port) + +    if use_pymongo: +        connection = pymongo.Connection( +            app_config.get('db_host'), port) +    else: +        connection = mongokit.Connection( +            app_config.get('db_host'), port) +    return connection + + +def setup_connection_and_db_from_config(app_config, use_pymongo=False): +    """ +    Setup connection and database from config. + +    Optionally use pymongo instead of mongokit. +    """ +    connection = connect_database_from_config(app_config, use_pymongo) +    database_path = app_config['db_name'] +    db = connection[database_path] + +    if not use_pymongo: +        models.register_models(connection) + +    return (connection, db) + + +def check_db_migrations_current(db): +    # This MUST be imported so as to set up the appropriate migrations! +    from mediagoblin.db.mongo import migrations + +    # Init the migration number if necessary +    migration_manager = MigrationManager(db) +    migration_manager.install_migration_version_if_missing() + +    # Tiny hack to warn user if our migration is out of date +    if not migration_manager.database_at_latest_migration(): +        db_migration_num = migration_manager.database_current_migration() +        latest_migration_num = migration_manager.latest_migration() +        if db_migration_num < latest_migration_num: +            print ( +                "*WARNING:* Your migrations are out of date, " +                "maybe run ./bin/gmg migrate?") +        elif db_migration_num > latest_migration_num: +            print ( +                "*WARNING:* Your migrations are out of date... " +                "in fact they appear to be from the future?!") diff --git a/mediagoblin/db/mongo/util.py b/mediagoblin/db/mongo/util.py new file mode 100644 index 00000000..e2065693 --- /dev/null +++ b/mediagoblin/db/mongo/util.py @@ -0,0 +1,292 @@ +# GNU MediaGoblin -- federated, autonomous media hosting +# Copyright (C) 2011 MediaGoblin contributors.  See AUTHORS. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program.  If not, see <http://www.gnu.org/licenses/>. + +""" +Utilities for database operations. + +Some note on migration and indexing tools: + +We store information about what the state of the database is in the +'mediagoblin' document of the 'app_metadata' collection.  Keys in that +document relevant to here: + + - 'migration_number': The integer representing the current state of +   the migrations +""" + +import copy + +# Imports that other modules might use +from pymongo import ASCENDING, DESCENDING +from pymongo.errors import InvalidId +from mongokit import ObjectId + +from mediagoblin.db.mongo.indexes import ACTIVE_INDEXES, DEPRECATED_INDEXES + + +################ +# Indexing tools +################ + + +def add_new_indexes(database, active_indexes=ACTIVE_INDEXES): +    """ +    Add any new indexes to the database. + +    Args: +     - database: pymongo or mongokit database instance. +     - active_indexes: indexes to possibly add in the pattern of: +       {'collection_name': { +            'identifier': { +                'index': [index_foo_goes_here], +                'unique': True}} +       where 'index' is the index to add and all other options are +       arguments for collection.create_index. + +    Returns: +      A list of indexes added in form ('collection', 'index_name') +    """ +    indexes_added = [] + +    for collection_name, indexes in active_indexes.iteritems(): +        collection = database[collection_name] +        collection_indexes = collection.index_information().keys() + +        for index_name, index_data in indexes.iteritems(): +            if not index_name in collection_indexes: +                # Get a copy actually so we don't modify the actual +                # structure +                index_data = copy.copy(index_data) +                index = index_data.pop('index') +                collection.create_index( +                    index, name=index_name, **index_data) + +                indexes_added.append((collection_name, index_name)) + +    return indexes_added + + +def remove_deprecated_indexes(database, deprecated_indexes=DEPRECATED_INDEXES): +    """ +    Remove any deprecated indexes from the database. + +    Args: +     - database: pymongo or mongokit database instance. +     - deprecated_indexes: the indexes to deprecate in the pattern of: +       {'collection_name': { +            'identifier': { +                'index': [index_foo_goes_here], +                'unique': True}} + +       (... although we really only need the 'identifier' here, as the +       rest of the information isn't used in this case.  But it's kept +       around so we can remember what it was) + +    Returns: +      A list of indexes removed in form ('collection', 'index_name') +    """ +    indexes_removed = [] + +    for collection_name, indexes in deprecated_indexes.iteritems(): +        collection = database[collection_name] +        collection_indexes = collection.index_information().keys() + +        for index_name, index_data in indexes.iteritems(): +            if index_name in collection_indexes: +                collection.drop_index(index_name) + +                indexes_removed.append((collection_name, index_name)) + +    return indexes_removed + + +################# +# Migration tools +################# + +# The default migration registry... +# +# Don't set this yourself!  RegisterMigration will automatically fill +# this with stuff via decorating methods in migrations.py + +class MissingCurrentMigration(Exception): +    pass + + +MIGRATIONS = {} + + +class RegisterMigration(object): +    """ +    Tool for registering migrations + +    Call like: + +    @RegisterMigration(33) +    def update_dwarves(database): +        [...] + +    This will register your migration with the default migration +    registry.  Alternately, to specify a very specific +    migration_registry, you can pass in that as the second argument. + +    Note, the number of your migration should NEVER be 0 or less than +    0.  0 is the default "no migrations" state! +    """ +    def __init__(self, migration_number, migration_registry=MIGRATIONS): +        assert migration_number > 0, "Migration number must be > 0!" +        assert migration_number not in migration_registry, \ +            "Duplicate migration numbers detected!  That's not allowed!" + +        self.migration_number = migration_number +        self.migration_registry = migration_registry + +    def __call__(self, migration): +        self.migration_registry[self.migration_number] = migration +        return migration + + +class MigrationManager(object): +    """ +    Migration handling tool. + +    Takes information about a database, lets you update the database +    to the latest migrations, etc. +    """ +    def __init__(self, database, migration_registry=MIGRATIONS): +        """ +        Args: +         - database: database we're going to migrate +         - migration_registry: where we should find all migrations to +           run +        """ +        self.database = database +        self.migration_registry = migration_registry +        self._sorted_migrations = None + +    def _ensure_current_migration_record(self): +        """ +        If there isn't a database[u'app_metadata'] mediagoblin entry +        with the 'current_migration', throw an error. +        """ +        if self.database_current_migration() is None: +            raise MissingCurrentMigration( +                "Tried to call function which requires " +                "'current_migration' set in database") + +    @property +    def sorted_migrations(self): +        """ +        Sort migrations if necessary and store in self._sorted_migrations +        """ +        if not self._sorted_migrations: +            self._sorted_migrations = sorted( +                self.migration_registry.items(), +                # sort on the key... the migration number +                key=lambda migration_tuple: migration_tuple[0]) + +        return self._sorted_migrations + +    def latest_migration(self): +        """ +        Return a migration number for the latest migration, or 0 if +        there are no migrations. +        """ +        if self.sorted_migrations: +            return self.sorted_migrations[-1][0] +        else: +            # If no migrations have been set, we start at 0. +            return 0 + +    def set_current_migration(self, migration_number): +        """ +        Set the migration in the database to migration_number +        """ +        # Add the mediagoblin migration if necessary +        self.database[u'app_metadata'].update( +            {u'_id': u'mediagoblin'}, +            {u'$set': {u'current_migration': migration_number}}, +            upsert=True) + +    def install_migration_version_if_missing(self): +        """ +        Sets the migration to the latest version if no migration +        version at all is set. +        """ +        mgoblin_metadata = self.database[u'app_metadata'].find_one( +            {u'_id': u'mediagoblin'}) +        if not mgoblin_metadata: +            latest_migration = self.latest_migration() +            self.set_current_migration(latest_migration) + +    def database_current_migration(self): +        """ +        Return the current migration in the database. +        """ +        mgoblin_metadata = self.database[u'app_metadata'].find_one( +            {u'_id': u'mediagoblin'}) +        if not mgoblin_metadata: +            return None +        else: +            return mgoblin_metadata[u'current_migration'] + +    def database_at_latest_migration(self): +        """ +        See if the database is at the latest migration. +        Returns a boolean. +        """ +        current_migration = self.database_current_migration() +        return current_migration == self.latest_migration() + +    def migrations_to_run(self): +        """ +        Get a list of migrations to run still, if any. +         +        Note that calling this will set your migration version to the +        latest version if it isn't installed to anything yet! +        """ +        self._ensure_current_migration_record() + +        db_current_migration = self.database_current_migration() + +        return [ +            (migration_number, migration_func) +            for migration_number, migration_func in self.sorted_migrations +            if migration_number > db_current_migration] + +    def migrate_new(self, pre_callback=None, post_callback=None): +        """ +        Run all migrations. + +        Includes two optional args: +         - pre_callback: if called, this is a callback on something to +           run pre-migration.  Takes (migration_number, migration_func) +           as arguments +         - pre_callback: if called, this is a callback on something to +           run post-migration.  Takes (migration_number, migration_func) +           as arguments +        """ +        # If we aren't set to any version number, presume we're at the +        # latest (which means we'll do nothing here...) +        self.install_migration_version_if_missing() + +        for migration_number, migration_func in self.migrations_to_run(): +            if pre_callback: +                pre_callback(migration_number, migration_func) +            migration_func(self.database) +            self.set_current_migration(migration_number) +            if post_callback: +                post_callback(migration_number, migration_func) | 
