diff options
Diffstat (limited to 'mediagoblin/db/mongo')
-rw-r--r-- | mediagoblin/db/mongo/__init__.py | 15 | ||||
-rw-r--r-- | mediagoblin/db/mongo/indexes.py | 146 | ||||
-rw-r--r-- | mediagoblin/db/mongo/migrations.py | 110 | ||||
-rw-r--r-- | mediagoblin/db/mongo/models.py | 363 | ||||
-rw-r--r-- | mediagoblin/db/mongo/open.py | 78 | ||||
-rw-r--r-- | mediagoblin/db/mongo/util.py | 292 |
6 files changed, 1004 insertions, 0 deletions
diff --git a/mediagoblin/db/mongo/__init__.py b/mediagoblin/db/mongo/__init__.py new file mode 100644 index 00000000..ba347c69 --- /dev/null +++ b/mediagoblin/db/mongo/__init__.py @@ -0,0 +1,15 @@ +# GNU MediaGoblin -- federated, autonomous media hosting +# Copyright (C) 2011 MediaGoblin contributors. See AUTHORS. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. diff --git a/mediagoblin/db/mongo/indexes.py b/mediagoblin/db/mongo/indexes.py new file mode 100644 index 00000000..1dd73f2b --- /dev/null +++ b/mediagoblin/db/mongo/indexes.py @@ -0,0 +1,146 @@ +# GNU MediaGoblin -- federated, autonomous media hosting +# Copyright (C) 2011 MediaGoblin contributors. See AUTHORS. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +""" +Indexes for the local database. + +To add new indexes +------------------ + +Indexes are recorded in the following format: + +ACTIVE_INDEXES = { + 'collection_name': { + 'identifier': { # key identifier used for possibly deprecating later + 'index': [index_foo_goes_here]}} + +... and anything else being parameters to the create_index function +(including unique=True, etc) + +Current indexes must be registered in ACTIVE_INDEXES... deprecated +indexes should be marked in DEPRECATED_INDEXES. + +Remember, ordering of compound indexes MATTERS. Read below for more. + +REQUIRED READING: + - http://kylebanker.com/blog/2010/09/21/the-joy-of-mongodb-indexes/ + + - http://www.mongodb.org/display/DOCS/Indexes + - http://www.mongodb.org/display/DOCS/Indexing+Advice+and+FAQ + + +To remove deprecated indexes +---------------------------- + +Removing deprecated indexes is the same, just move the index into the +deprecated indexes mapping. + +DEPRECATED_INDEXES = { + 'collection_name': { + 'deprecated_index_identifier1': { + 'index': [index_foo_goes_here]}} + +... etc. + +If an index has been deprecated that identifier should NEVER BE USED +AGAIN. Eg, if you previously had 'awesomepants_unique', you shouldn't +use 'awesomepants_unique' again, you should create a totally new name +or at worst use 'awesomepants_unique2'. +""" + +from pymongo import ASCENDING, DESCENDING + + +################ +# Active indexes +################ +ACTIVE_INDEXES = {} + +# MediaEntry indexes +# ------------------ + +MEDIAENTRY_INDEXES = { + 'uploader_slug_unique': { + # Matching an object to an uploader + slug. + # MediaEntries are unique on these two combined, eg: + # /u/${myuser}/m/${myslugname}/ + 'index': [('uploader', ASCENDING), + ('slug', ASCENDING)], + 'unique': True}, + + 'created': { + # A global index for all media entries created, in descending + # order. This is used for the site's frontpage. + 'index': [('created', DESCENDING)]}, + + 'uploader_created': { + # Indexing on uploaders and when media entries are created. + # Used for showing a user gallery, etc. + 'index': [('uploader', ASCENDING), + ('created', DESCENDING)]}, + + 'state_uploader_tags_created': { + # Indexing on processed?, media uploader, associated tags, and + # timestamp Used for showing media items matching a tag + # search, most recent first. + 'index': [('state', ASCENDING), + ('uploader', ASCENDING), + ('tags.slug', DESCENDING), + ('created', DESCENDING)]}, + + 'state_tags_created': { + # Indexing on processed?, media tags, and timestamp (across all users) + # This is used for a front page tag search. + 'index': [('state', ASCENDING), + ('tags.slug', DESCENDING), + ('created', DESCENDING)]}} + + +ACTIVE_INDEXES['media_entries'] = MEDIAENTRY_INDEXES + + +# User indexes +# ------------ + +USER_INDEXES = { + 'username_unique': { + # Index usernames, and make sure they're unique. + # ... I guess we might need to adjust this once we're federated :) + 'index': 'username', + 'unique': True}, + 'created': { + # All most recently created users + 'index': 'created'}} + + +ACTIVE_INDEXES['users'] = USER_INDEXES + + +# MediaComment indexes + +MEDIA_COMMENT_INDEXES = { + 'mediaentry_created': { + 'index': [('media_entry', ASCENDING), + ('created', DESCENDING)]}} + +ACTIVE_INDEXES['media_comments'] = MEDIA_COMMENT_INDEXES + + +#################### +# Deprecated indexes +#################### + +DEPRECATED_INDEXES = {} diff --git a/mediagoblin/db/mongo/migrations.py b/mediagoblin/db/mongo/migrations.py new file mode 100644 index 00000000..cf4e94ae --- /dev/null +++ b/mediagoblin/db/mongo/migrations.py @@ -0,0 +1,110 @@ +# GNU MediaGoblin -- federated, autonomous media hosting +# Copyright (C) 2011 MediaGoblin contributors. See AUTHORS. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +from mediagoblin.db.mongo.util import RegisterMigration +from mediagoblin.tools.text import cleaned_markdown_conversion + + +def add_table_field(db, table_name, field_name, default_value): + """ + Add a new field to the table/collection named table_name. + The field will have the name field_name and the value default_value + """ + db[table_name].update( + {field_name: {'$exists': False}}, + {'$set': {field_name: default_value}}, + multi=True) + + +# Please see mediagoblin/tests/test_migrations.py for some examples of +# basic migrations. + + +@RegisterMigration(1) +def user_add_bio_html(database): + """ + Users now have richtext bios via Markdown, reflect appropriately. + """ + collection = database['users'] + + target = collection.find( + {'bio_html': {'$exists': False}}) + + for document in target: + document['bio_html'] = cleaned_markdown_conversion( + document['bio']) + collection.save(document) + + +@RegisterMigration(2) +def mediaentry_mediafiles_main_to_original(database): + """ + Rename "main" media file to "original". + """ + collection = database['media_entries'] + target = collection.find( + {'media_files.main': {'$exists': True}}) + + for document in target: + original = document['media_files'].pop('main') + document['media_files']['original'] = original + + collection.save(document) + + +@RegisterMigration(3) +def mediaentry_remove_thumbnail_file(database): + """ + Use media_files['thumb'] instead of media_entries['thumbnail_file'] + """ + database['media_entries'].update( + {'thumbnail_file': {'$exists': True}}, + {'$unset': {'thumbnail_file': 1}}, + multi=True) + + +@RegisterMigration(4) +def mediaentry_add_queued_task_id(database): + """ + Add the 'queued_task_id' field for entries that don't have it. + """ + add_table_field(database, 'media_entries', 'queued_task_id', None) + + +@RegisterMigration(5) +def mediaentry_add_fail_error_and_metadata(database): + """ + Add 'fail_error' and 'fail_metadata' fields to media entries + """ + add_table_field(database, 'media_entries', 'fail_error', None) + add_table_field(database, 'media_entries', 'fail_metadata', {}) + + +@RegisterMigration(6) +def user_add_forgot_password_token_and_expires(database): + """ + Add token and expiration fields to help recover forgotten passwords + """ + add_table_field(database, 'users', 'fp_verification_key', None) + add_table_field(database, 'users', 'fp_token_expire', None) + + +@RegisterMigration(7) +def media_type_image_to_multimedia_type_image(database): + database['media_entries'].update( + {'media_type': 'image'}, + {'$set': {'media_type': 'mediagoblin.media_types.image'}}, + multi=True) diff --git a/mediagoblin/db/mongo/models.py b/mediagoblin/db/mongo/models.py new file mode 100644 index 00000000..e085840e --- /dev/null +++ b/mediagoblin/db/mongo/models.py @@ -0,0 +1,363 @@ +# GNU MediaGoblin -- federated, autonomous media hosting +# Copyright (C) 2011 MediaGoblin contributors. See AUTHORS. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +import datetime, uuid + +from mongokit import Document + +from mediagoblin.auth import lib as auth_lib +from mediagoblin import mg_globals +from mediagoblin.db import migrations +from mediagoblin.db.util import ASCENDING, DESCENDING, ObjectId +from mediagoblin.tools.pagination import Pagination +from mediagoblin.tools import url, common +from mediagoblin.tools import licenses + +################### +# Custom validators +################### + +######## +# Models +######## + + +class User(Document): + """ + A user of MediaGoblin. + + Structure: + - username: The username of this user, should be unique to this instance. + - email: Email address of this user + - created: When the user was created + - plugin_data: a mapping of extra plugin information for this User. + Nothing uses this yet as we don't have plugins, but someday we + might... :) + - pw_hash: Hashed version of user's password. + - email_verified: Whether or not the user has verified their email or not. + Most parts of the site are disabled for users who haven't yet. + - status: whether or not the user is active, etc. Currently only has two + values, 'needs_email_verification' or 'active'. (In the future, maybe + we'll change this to a boolean with a key of 'active' and have a + separate field for a reason the user's been disabled if that's + appropriate... email_verified is already separate, after all.) + - verification_key: If the user is awaiting email verification, the user + will have to provide this key (which will be encoded in the presented + URL) in order to confirm their email as active. + - is_admin: Whether or not this user is an administrator or not. + - url: this user's personal webpage/website, if appropriate. + - bio: biography of this user (plaintext, in markdown) + - bio_html: biography of the user converted to proper HTML. + """ + __collection__ = 'users' + + structure = { + 'username': unicode, + 'email': unicode, + 'created': datetime.datetime, + 'plugin_data': dict, # plugins can dump stuff here. + 'pw_hash': unicode, + 'email_verified': bool, + 'status': unicode, + 'verification_key': unicode, + 'is_admin': bool, + 'url' : unicode, + 'bio' : unicode, # May contain markdown + 'bio_html': unicode, # May contain plaintext, or HTML + 'fp_verification_key': unicode, # forgotten password verification key + 'fp_token_expire': datetime.datetime + } + + required_fields = ['username', 'created', 'pw_hash', 'email'] + + default_values = { + 'created': datetime.datetime.utcnow, + 'email_verified': False, + 'status': u'needs_email_verification', + 'verification_key': lambda: unicode(uuid.uuid4()), + 'is_admin': False} + + def check_login(self, password): + """ + See if a user can login with this password + """ + return auth_lib.bcrypt_check_password( + password, self['pw_hash']) + + +class MediaEntry(Document): + """ + Record of a piece of media. + + Structure: + - uploader: A reference to a User who uploaded this. + + - title: Title of this work + + - slug: A normalized "slug" which can be used as part of a URL to retrieve + this work, such as 'my-works-name-in-slug-form' may be viewable by + 'http://mg.example.org/u/username/m/my-works-name-in-slug-form/' + Note that since URLs are constructed this way, slugs must be unique + per-uploader. (An index is provided to enforce that but code should be + written on the python side to ensure this as well.) + + - created: Date and time of when this piece of work was uploaded. + + - description: Uploader-set description of this work. This can be marked + up with MarkDown for slight fanciness (links, boldness, italics, + paragraphs...) + + - description_html: Rendered version of the description, run through + Markdown and cleaned with our cleaning tool. + + - media_type: What type of media is this? Currently we only support + 'image' ;) + + - media_data: Extra information that's media-format-dependent. + For example, images might contain some EXIF data that's not appropriate + to other formats. You might store it like: + + mediaentry['media_data']['exif'] = { + 'manufacturer': 'CASIO', + 'model': 'QV-4000', + 'exposure_time': .659} + + Alternately for video you might store: + + # play length in seconds + mediaentry['media_data']['play_length'] = 340 + + ... so what's appropriate here really depends on the media type. + + - plugin_data: a mapping of extra plugin information for this User. + Nothing uses this yet as we don't have plugins, but someday we + might... :) + + - tags: A list of tags. Each tag is stored as a dictionary that has a key + for the actual name and the normalized name-as-slug, so ultimately this + looks like: + [{'name': 'Gully Gardens', + 'slug': 'gully-gardens'}, + {'name': 'Castle Adventure Time?!", + 'slug': 'castle-adventure-time'}] + + - state: What's the state of this file? Active, inactive, disabled, etc... + But really for now there are only two states: + "unprocessed": uploaded but needs to go through processing for display + "processed": processed and able to be displayed + + - license: URI for entry's license + + - queued_media_file: storage interface style filepath describing a file + queued for processing. This is stored in the mg_globals.queue_store + storage system. + + - queued_task_id: celery task id. Use this to fetch the task state. + + - media_files: Files relevant to this that have actually been processed + and are available for various types of display. Stored like: + {'thumb': ['dir1', 'dir2', 'pic.png'} + + - attachment_files: A list of "attachment" files, ones that aren't + critical to this piece of media but may be usefully relevant to people + viewing the work. (currently unused.) + + - fail_error: path to the exception raised + - fail_metadata: + + """ + __collection__ = 'media_entries' + + structure = { + 'uploader': ObjectId, + 'title': unicode, + 'slug': unicode, + 'created': datetime.datetime, + 'description': unicode, # May contain markdown/up + 'description_html': unicode, # May contain plaintext, or HTML + 'media_type': unicode, + 'media_data': dict, # extra data relevant to this media_type + 'plugin_data': dict, # plugins can dump stuff here. + 'tags': [dict], + 'state': unicode, + 'license': unicode, # License URI + + # For now let's assume there can only be one main file queued + # at a time + 'queued_media_file': [unicode], + 'queued_task_id': unicode, + + # A dictionary of logical names to filepaths + 'media_files': dict, + + # The following should be lists of lists, in appropriate file + # record form + 'attachment_files': list, + + # If things go badly in processing things, we'll store that + # data here + 'fail_error': unicode, + 'fail_metadata': dict} + + required_fields = [ + 'uploader', 'created', 'media_type', 'slug'] + + default_values = { + 'created': datetime.datetime.utcnow, + 'state': u'unprocessed'} + + def get_comments(self): + return self.db.MediaComment.find({ + 'media_entry': self['_id']}).sort('created', DESCENDING) + + def get_display_media(self, media_map, fetch_order=common.DISPLAY_IMAGE_FETCHING_ORDER): + """ + Find the best media for display. + + Args: + - media_map: a dict like + {u'image_size': [u'dir1', u'dir2', u'image.jpg']} + - fetch_order: the order we should try fetching images in + + Returns: + (media_size, media_path) + """ + media_sizes = media_map.keys() + + for media_size in common.DISPLAY_IMAGE_FETCHING_ORDER: + if media_size in media_sizes: + return media_map[media_size] + + def main_mediafile(self): + pass + + def generate_slug(self): + self['slug'] = url.slugify(self['title']) + + duplicate = mg_globals.database.media_entries.find_one( + {'slug': self['slug']}) + + if duplicate: + self['slug'] = "%s-%s" % (self['_id'], self['slug']) + + def url_for_self(self, urlgen): + """ + Generate an appropriate url for ourselves + + Use a slug if we have one, else use our '_id'. + """ + uploader = self.uploader() + + if self.get('slug'): + return urlgen( + 'mediagoblin.user_pages.media_home', + user=uploader['username'], + media=self['slug']) + else: + return urlgen( + 'mediagoblin.user_pages.media_home', + user=uploader['username'], + media=unicode(self['_id'])) + + def url_to_prev(self, urlgen): + """ + Provide a url to the previous entry from this user, if there is one + """ + cursor = self.db.MediaEntry.find({'_id' : {"$gt": self['_id']}, + 'uploader': self['uploader'], + 'state': 'processed'}).sort( + '_id', ASCENDING).limit(1) + if cursor.count(): + return urlgen('mediagoblin.user_pages.media_home', + user=self.uploader()['username'], + media=unicode(cursor[0]['slug'])) + + def url_to_next(self, urlgen): + """ + Provide a url to the next entry from this user, if there is one + """ + cursor = self.db.MediaEntry.find({'_id' : {"$lt": self['_id']}, + 'uploader': self['uploader'], + 'state': 'processed'}).sort( + '_id', DESCENDING).limit(1) + + if cursor.count(): + return urlgen('mediagoblin.user_pages.media_home', + user=self.uploader()['username'], + media=unicode(cursor[0]['slug'])) + + def uploader(self): + return self.db.User.find_one({'_id': self['uploader']}) + + def get_fail_exception(self): + """ + Get the exception that's appropriate for this error + """ + if self['fail_error']: + return common.import_component(self['fail_error']) + + def get_license_data(self): + """Return license dict for requested license""" + return licenses.SUPPORTED_LICENSES[self['license']] + + +class MediaComment(Document): + """ + A comment on a MediaEntry. + + Structure: + - media_entry: The media entry this comment is attached to + - author: user who posted this comment + - created: when the comment was created + - content: plaintext (but markdown'able) version of the comment's content. + - content_html: the actual html-rendered version of the comment displayed. + Run through Markdown and the HTML cleaner. + """ + + __collection__ = 'media_comments' + + structure = { + 'media_entry': ObjectId, + 'author': ObjectId, + 'created': datetime.datetime, + 'content': unicode, + 'content_html': unicode} + + required_fields = [ + 'media_entry', 'author', 'created', 'content'] + + default_values = { + 'created': datetime.datetime.utcnow} + + def media_entry(self): + return self.db.MediaEntry.find_one({'_id': self['media_entry']}) + + def author(self): + return self.db.User.find_one({'_id': self['author']}) + + +REGISTER_MODELS = [ + MediaEntry, + User, + MediaComment] + + +def register_models(connection): + """ + Register all models in REGISTER_MODELS with this connection. + """ + connection.register(REGISTER_MODELS) + diff --git a/mediagoblin/db/mongo/open.py b/mediagoblin/db/mongo/open.py new file mode 100644 index 00000000..48c909d9 --- /dev/null +++ b/mediagoblin/db/mongo/open.py @@ -0,0 +1,78 @@ +# GNU MediaGoblin -- federated, autonomous media hosting +# Copyright (C) 2011 MediaGoblin contributors. See AUTHORS. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +import pymongo +import mongokit +from paste.deploy.converters import asint +from mediagoblin.db.mongo import models +from mediagoblin.db.mongo.util import MigrationManager + + +def connect_database_from_config(app_config, use_pymongo=False): + """ + Connect to the main database, take config from app_config + + Optionally use pymongo instead of mongokit for the connection. + """ + port = app_config.get('db_port') + if port: + port = asint(port) + + if use_pymongo: + connection = pymongo.Connection( + app_config.get('db_host'), port) + else: + connection = mongokit.Connection( + app_config.get('db_host'), port) + return connection + + +def setup_connection_and_db_from_config(app_config, use_pymongo=False): + """ + Setup connection and database from config. + + Optionally use pymongo instead of mongokit. + """ + connection = connect_database_from_config(app_config, use_pymongo) + database_path = app_config['db_name'] + db = connection[database_path] + + if not use_pymongo: + models.register_models(connection) + + return (connection, db) + + +def check_db_migrations_current(db): + # This MUST be imported so as to set up the appropriate migrations! + from mediagoblin.db.mongo import migrations + + # Init the migration number if necessary + migration_manager = MigrationManager(db) + migration_manager.install_migration_version_if_missing() + + # Tiny hack to warn user if our migration is out of date + if not migration_manager.database_at_latest_migration(): + db_migration_num = migration_manager.database_current_migration() + latest_migration_num = migration_manager.latest_migration() + if db_migration_num < latest_migration_num: + print ( + "*WARNING:* Your migrations are out of date, " + "maybe run ./bin/gmg migrate?") + elif db_migration_num > latest_migration_num: + print ( + "*WARNING:* Your migrations are out of date... " + "in fact they appear to be from the future?!") diff --git a/mediagoblin/db/mongo/util.py b/mediagoblin/db/mongo/util.py new file mode 100644 index 00000000..e2065693 --- /dev/null +++ b/mediagoblin/db/mongo/util.py @@ -0,0 +1,292 @@ +# GNU MediaGoblin -- federated, autonomous media hosting +# Copyright (C) 2011 MediaGoblin contributors. See AUTHORS. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +""" +Utilities for database operations. + +Some note on migration and indexing tools: + +We store information about what the state of the database is in the +'mediagoblin' document of the 'app_metadata' collection. Keys in that +document relevant to here: + + - 'migration_number': The integer representing the current state of + the migrations +""" + +import copy + +# Imports that other modules might use +from pymongo import ASCENDING, DESCENDING +from pymongo.errors import InvalidId +from mongokit import ObjectId + +from mediagoblin.db.mongo.indexes import ACTIVE_INDEXES, DEPRECATED_INDEXES + + +################ +# Indexing tools +################ + + +def add_new_indexes(database, active_indexes=ACTIVE_INDEXES): + """ + Add any new indexes to the database. + + Args: + - database: pymongo or mongokit database instance. + - active_indexes: indexes to possibly add in the pattern of: + {'collection_name': { + 'identifier': { + 'index': [index_foo_goes_here], + 'unique': True}} + where 'index' is the index to add and all other options are + arguments for collection.create_index. + + Returns: + A list of indexes added in form ('collection', 'index_name') + """ + indexes_added = [] + + for collection_name, indexes in active_indexes.iteritems(): + collection = database[collection_name] + collection_indexes = collection.index_information().keys() + + for index_name, index_data in indexes.iteritems(): + if not index_name in collection_indexes: + # Get a copy actually so we don't modify the actual + # structure + index_data = copy.copy(index_data) + index = index_data.pop('index') + collection.create_index( + index, name=index_name, **index_data) + + indexes_added.append((collection_name, index_name)) + + return indexes_added + + +def remove_deprecated_indexes(database, deprecated_indexes=DEPRECATED_INDEXES): + """ + Remove any deprecated indexes from the database. + + Args: + - database: pymongo or mongokit database instance. + - deprecated_indexes: the indexes to deprecate in the pattern of: + {'collection_name': { + 'identifier': { + 'index': [index_foo_goes_here], + 'unique': True}} + + (... although we really only need the 'identifier' here, as the + rest of the information isn't used in this case. But it's kept + around so we can remember what it was) + + Returns: + A list of indexes removed in form ('collection', 'index_name') + """ + indexes_removed = [] + + for collection_name, indexes in deprecated_indexes.iteritems(): + collection = database[collection_name] + collection_indexes = collection.index_information().keys() + + for index_name, index_data in indexes.iteritems(): + if index_name in collection_indexes: + collection.drop_index(index_name) + + indexes_removed.append((collection_name, index_name)) + + return indexes_removed + + +################# +# Migration tools +################# + +# The default migration registry... +# +# Don't set this yourself! RegisterMigration will automatically fill +# this with stuff via decorating methods in migrations.py + +class MissingCurrentMigration(Exception): + pass + + +MIGRATIONS = {} + + +class RegisterMigration(object): + """ + Tool for registering migrations + + Call like: + + @RegisterMigration(33) + def update_dwarves(database): + [...] + + This will register your migration with the default migration + registry. Alternately, to specify a very specific + migration_registry, you can pass in that as the second argument. + + Note, the number of your migration should NEVER be 0 or less than + 0. 0 is the default "no migrations" state! + """ + def __init__(self, migration_number, migration_registry=MIGRATIONS): + assert migration_number > 0, "Migration number must be > 0!" + assert migration_number not in migration_registry, \ + "Duplicate migration numbers detected! That's not allowed!" + + self.migration_number = migration_number + self.migration_registry = migration_registry + + def __call__(self, migration): + self.migration_registry[self.migration_number] = migration + return migration + + +class MigrationManager(object): + """ + Migration handling tool. + + Takes information about a database, lets you update the database + to the latest migrations, etc. + """ + def __init__(self, database, migration_registry=MIGRATIONS): + """ + Args: + - database: database we're going to migrate + - migration_registry: where we should find all migrations to + run + """ + self.database = database + self.migration_registry = migration_registry + self._sorted_migrations = None + + def _ensure_current_migration_record(self): + """ + If there isn't a database[u'app_metadata'] mediagoblin entry + with the 'current_migration', throw an error. + """ + if self.database_current_migration() is None: + raise MissingCurrentMigration( + "Tried to call function which requires " + "'current_migration' set in database") + + @property + def sorted_migrations(self): + """ + Sort migrations if necessary and store in self._sorted_migrations + """ + if not self._sorted_migrations: + self._sorted_migrations = sorted( + self.migration_registry.items(), + # sort on the key... the migration number + key=lambda migration_tuple: migration_tuple[0]) + + return self._sorted_migrations + + def latest_migration(self): + """ + Return a migration number for the latest migration, or 0 if + there are no migrations. + """ + if self.sorted_migrations: + return self.sorted_migrations[-1][0] + else: + # If no migrations have been set, we start at 0. + return 0 + + def set_current_migration(self, migration_number): + """ + Set the migration in the database to migration_number + """ + # Add the mediagoblin migration if necessary + self.database[u'app_metadata'].update( + {u'_id': u'mediagoblin'}, + {u'$set': {u'current_migration': migration_number}}, + upsert=True) + + def install_migration_version_if_missing(self): + """ + Sets the migration to the latest version if no migration + version at all is set. + """ + mgoblin_metadata = self.database[u'app_metadata'].find_one( + {u'_id': u'mediagoblin'}) + if not mgoblin_metadata: + latest_migration = self.latest_migration() + self.set_current_migration(latest_migration) + + def database_current_migration(self): + """ + Return the current migration in the database. + """ + mgoblin_metadata = self.database[u'app_metadata'].find_one( + {u'_id': u'mediagoblin'}) + if not mgoblin_metadata: + return None + else: + return mgoblin_metadata[u'current_migration'] + + def database_at_latest_migration(self): + """ + See if the database is at the latest migration. + Returns a boolean. + """ + current_migration = self.database_current_migration() + return current_migration == self.latest_migration() + + def migrations_to_run(self): + """ + Get a list of migrations to run still, if any. + + Note that calling this will set your migration version to the + latest version if it isn't installed to anything yet! + """ + self._ensure_current_migration_record() + + db_current_migration = self.database_current_migration() + + return [ + (migration_number, migration_func) + for migration_number, migration_func in self.sorted_migrations + if migration_number > db_current_migration] + + def migrate_new(self, pre_callback=None, post_callback=None): + """ + Run all migrations. + + Includes two optional args: + - pre_callback: if called, this is a callback on something to + run pre-migration. Takes (migration_number, migration_func) + as arguments + - pre_callback: if called, this is a callback on something to + run post-migration. Takes (migration_number, migration_func) + as arguments + """ + # If we aren't set to any version number, presume we're at the + # latest (which means we'll do nothing here...) + self.install_migration_version_if_missing() + + for migration_number, migration_func in self.migrations_to_run(): + if pre_callback: + pre_callback(migration_number, migration_func) + migration_func(self.database) + self.set_current_migration(migration_number) + if post_callback: + post_callback(migration_number, migration_func) |