aboutsummaryrefslogtreecommitdiffstats
path: root/mediagoblin/db/mongo
diff options
context:
space:
mode:
Diffstat (limited to 'mediagoblin/db/mongo')
-rw-r--r--mediagoblin/db/mongo/__init__.py15
-rw-r--r--mediagoblin/db/mongo/indexes.py146
-rw-r--r--mediagoblin/db/mongo/migrations.py110
-rw-r--r--mediagoblin/db/mongo/models.py363
-rw-r--r--mediagoblin/db/mongo/open.py78
-rw-r--r--mediagoblin/db/mongo/util.py292
6 files changed, 1004 insertions, 0 deletions
diff --git a/mediagoblin/db/mongo/__init__.py b/mediagoblin/db/mongo/__init__.py
new file mode 100644
index 00000000..ba347c69
--- /dev/null
+++ b/mediagoblin/db/mongo/__init__.py
@@ -0,0 +1,15 @@
+# GNU MediaGoblin -- federated, autonomous media hosting
+# Copyright (C) 2011 MediaGoblin contributors. See AUTHORS.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
diff --git a/mediagoblin/db/mongo/indexes.py b/mediagoblin/db/mongo/indexes.py
new file mode 100644
index 00000000..1dd73f2b
--- /dev/null
+++ b/mediagoblin/db/mongo/indexes.py
@@ -0,0 +1,146 @@
+# GNU MediaGoblin -- federated, autonomous media hosting
+# Copyright (C) 2011 MediaGoblin contributors. See AUTHORS.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+"""
+Indexes for the local database.
+
+To add new indexes
+------------------
+
+Indexes are recorded in the following format:
+
+ACTIVE_INDEXES = {
+ 'collection_name': {
+ 'identifier': { # key identifier used for possibly deprecating later
+ 'index': [index_foo_goes_here]}}
+
+... and anything else being parameters to the create_index function
+(including unique=True, etc)
+
+Current indexes must be registered in ACTIVE_INDEXES... deprecated
+indexes should be marked in DEPRECATED_INDEXES.
+
+Remember, ordering of compound indexes MATTERS. Read below for more.
+
+REQUIRED READING:
+ - http://kylebanker.com/blog/2010/09/21/the-joy-of-mongodb-indexes/
+
+ - http://www.mongodb.org/display/DOCS/Indexes
+ - http://www.mongodb.org/display/DOCS/Indexing+Advice+and+FAQ
+
+
+To remove deprecated indexes
+----------------------------
+
+Removing deprecated indexes is the same, just move the index into the
+deprecated indexes mapping.
+
+DEPRECATED_INDEXES = {
+ 'collection_name': {
+ 'deprecated_index_identifier1': {
+ 'index': [index_foo_goes_here]}}
+
+... etc.
+
+If an index has been deprecated that identifier should NEVER BE USED
+AGAIN. Eg, if you previously had 'awesomepants_unique', you shouldn't
+use 'awesomepants_unique' again, you should create a totally new name
+or at worst use 'awesomepants_unique2'.
+"""
+
+from pymongo import ASCENDING, DESCENDING
+
+
+################
+# Active indexes
+################
+ACTIVE_INDEXES = {}
+
+# MediaEntry indexes
+# ------------------
+
+MEDIAENTRY_INDEXES = {
+ 'uploader_slug_unique': {
+ # Matching an object to an uploader + slug.
+ # MediaEntries are unique on these two combined, eg:
+ # /u/${myuser}/m/${myslugname}/
+ 'index': [('uploader', ASCENDING),
+ ('slug', ASCENDING)],
+ 'unique': True},
+
+ 'created': {
+ # A global index for all media entries created, in descending
+ # order. This is used for the site's frontpage.
+ 'index': [('created', DESCENDING)]},
+
+ 'uploader_created': {
+ # Indexing on uploaders and when media entries are created.
+ # Used for showing a user gallery, etc.
+ 'index': [('uploader', ASCENDING),
+ ('created', DESCENDING)]},
+
+ 'state_uploader_tags_created': {
+ # Indexing on processed?, media uploader, associated tags, and
+ # timestamp Used for showing media items matching a tag
+ # search, most recent first.
+ 'index': [('state', ASCENDING),
+ ('uploader', ASCENDING),
+ ('tags.slug', DESCENDING),
+ ('created', DESCENDING)]},
+
+ 'state_tags_created': {
+ # Indexing on processed?, media tags, and timestamp (across all users)
+ # This is used for a front page tag search.
+ 'index': [('state', ASCENDING),
+ ('tags.slug', DESCENDING),
+ ('created', DESCENDING)]}}
+
+
+ACTIVE_INDEXES['media_entries'] = MEDIAENTRY_INDEXES
+
+
+# User indexes
+# ------------
+
+USER_INDEXES = {
+ 'username_unique': {
+ # Index usernames, and make sure they're unique.
+ # ... I guess we might need to adjust this once we're federated :)
+ 'index': 'username',
+ 'unique': True},
+ 'created': {
+ # All most recently created users
+ 'index': 'created'}}
+
+
+ACTIVE_INDEXES['users'] = USER_INDEXES
+
+
+# MediaComment indexes
+
+MEDIA_COMMENT_INDEXES = {
+ 'mediaentry_created': {
+ 'index': [('media_entry', ASCENDING),
+ ('created', DESCENDING)]}}
+
+ACTIVE_INDEXES['media_comments'] = MEDIA_COMMENT_INDEXES
+
+
+####################
+# Deprecated indexes
+####################
+
+DEPRECATED_INDEXES = {}
diff --git a/mediagoblin/db/mongo/migrations.py b/mediagoblin/db/mongo/migrations.py
new file mode 100644
index 00000000..cf4e94ae
--- /dev/null
+++ b/mediagoblin/db/mongo/migrations.py
@@ -0,0 +1,110 @@
+# GNU MediaGoblin -- federated, autonomous media hosting
+# Copyright (C) 2011 MediaGoblin contributors. See AUTHORS.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+from mediagoblin.db.mongo.util import RegisterMigration
+from mediagoblin.tools.text import cleaned_markdown_conversion
+
+
+def add_table_field(db, table_name, field_name, default_value):
+ """
+ Add a new field to the table/collection named table_name.
+ The field will have the name field_name and the value default_value
+ """
+ db[table_name].update(
+ {field_name: {'$exists': False}},
+ {'$set': {field_name: default_value}},
+ multi=True)
+
+
+# Please see mediagoblin/tests/test_migrations.py for some examples of
+# basic migrations.
+
+
+@RegisterMigration(1)
+def user_add_bio_html(database):
+ """
+ Users now have richtext bios via Markdown, reflect appropriately.
+ """
+ collection = database['users']
+
+ target = collection.find(
+ {'bio_html': {'$exists': False}})
+
+ for document in target:
+ document['bio_html'] = cleaned_markdown_conversion(
+ document['bio'])
+ collection.save(document)
+
+
+@RegisterMigration(2)
+def mediaentry_mediafiles_main_to_original(database):
+ """
+ Rename "main" media file to "original".
+ """
+ collection = database['media_entries']
+ target = collection.find(
+ {'media_files.main': {'$exists': True}})
+
+ for document in target:
+ original = document['media_files'].pop('main')
+ document['media_files']['original'] = original
+
+ collection.save(document)
+
+
+@RegisterMigration(3)
+def mediaentry_remove_thumbnail_file(database):
+ """
+ Use media_files['thumb'] instead of media_entries['thumbnail_file']
+ """
+ database['media_entries'].update(
+ {'thumbnail_file': {'$exists': True}},
+ {'$unset': {'thumbnail_file': 1}},
+ multi=True)
+
+
+@RegisterMigration(4)
+def mediaentry_add_queued_task_id(database):
+ """
+ Add the 'queued_task_id' field for entries that don't have it.
+ """
+ add_table_field(database, 'media_entries', 'queued_task_id', None)
+
+
+@RegisterMigration(5)
+def mediaentry_add_fail_error_and_metadata(database):
+ """
+ Add 'fail_error' and 'fail_metadata' fields to media entries
+ """
+ add_table_field(database, 'media_entries', 'fail_error', None)
+ add_table_field(database, 'media_entries', 'fail_metadata', {})
+
+
+@RegisterMigration(6)
+def user_add_forgot_password_token_and_expires(database):
+ """
+ Add token and expiration fields to help recover forgotten passwords
+ """
+ add_table_field(database, 'users', 'fp_verification_key', None)
+ add_table_field(database, 'users', 'fp_token_expire', None)
+
+
+@RegisterMigration(7)
+def media_type_image_to_multimedia_type_image(database):
+ database['media_entries'].update(
+ {'media_type': 'image'},
+ {'$set': {'media_type': 'mediagoblin.media_types.image'}},
+ multi=True)
diff --git a/mediagoblin/db/mongo/models.py b/mediagoblin/db/mongo/models.py
new file mode 100644
index 00000000..e085840e
--- /dev/null
+++ b/mediagoblin/db/mongo/models.py
@@ -0,0 +1,363 @@
+# GNU MediaGoblin -- federated, autonomous media hosting
+# Copyright (C) 2011 MediaGoblin contributors. See AUTHORS.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import datetime, uuid
+
+from mongokit import Document
+
+from mediagoblin.auth import lib as auth_lib
+from mediagoblin import mg_globals
+from mediagoblin.db import migrations
+from mediagoblin.db.util import ASCENDING, DESCENDING, ObjectId
+from mediagoblin.tools.pagination import Pagination
+from mediagoblin.tools import url, common
+from mediagoblin.tools import licenses
+
+###################
+# Custom validators
+###################
+
+########
+# Models
+########
+
+
+class User(Document):
+ """
+ A user of MediaGoblin.
+
+ Structure:
+ - username: The username of this user, should be unique to this instance.
+ - email: Email address of this user
+ - created: When the user was created
+ - plugin_data: a mapping of extra plugin information for this User.
+ Nothing uses this yet as we don't have plugins, but someday we
+ might... :)
+ - pw_hash: Hashed version of user's password.
+ - email_verified: Whether or not the user has verified their email or not.
+ Most parts of the site are disabled for users who haven't yet.
+ - status: whether or not the user is active, etc. Currently only has two
+ values, 'needs_email_verification' or 'active'. (In the future, maybe
+ we'll change this to a boolean with a key of 'active' and have a
+ separate field for a reason the user's been disabled if that's
+ appropriate... email_verified is already separate, after all.)
+ - verification_key: If the user is awaiting email verification, the user
+ will have to provide this key (which will be encoded in the presented
+ URL) in order to confirm their email as active.
+ - is_admin: Whether or not this user is an administrator or not.
+ - url: this user's personal webpage/website, if appropriate.
+ - bio: biography of this user (plaintext, in markdown)
+ - bio_html: biography of the user converted to proper HTML.
+ """
+ __collection__ = 'users'
+
+ structure = {
+ 'username': unicode,
+ 'email': unicode,
+ 'created': datetime.datetime,
+ 'plugin_data': dict, # plugins can dump stuff here.
+ 'pw_hash': unicode,
+ 'email_verified': bool,
+ 'status': unicode,
+ 'verification_key': unicode,
+ 'is_admin': bool,
+ 'url' : unicode,
+ 'bio' : unicode, # May contain markdown
+ 'bio_html': unicode, # May contain plaintext, or HTML
+ 'fp_verification_key': unicode, # forgotten password verification key
+ 'fp_token_expire': datetime.datetime
+ }
+
+ required_fields = ['username', 'created', 'pw_hash', 'email']
+
+ default_values = {
+ 'created': datetime.datetime.utcnow,
+ 'email_verified': False,
+ 'status': u'needs_email_verification',
+ 'verification_key': lambda: unicode(uuid.uuid4()),
+ 'is_admin': False}
+
+ def check_login(self, password):
+ """
+ See if a user can login with this password
+ """
+ return auth_lib.bcrypt_check_password(
+ password, self['pw_hash'])
+
+
+class MediaEntry(Document):
+ """
+ Record of a piece of media.
+
+ Structure:
+ - uploader: A reference to a User who uploaded this.
+
+ - title: Title of this work
+
+ - slug: A normalized "slug" which can be used as part of a URL to retrieve
+ this work, such as 'my-works-name-in-slug-form' may be viewable by
+ 'http://mg.example.org/u/username/m/my-works-name-in-slug-form/'
+ Note that since URLs are constructed this way, slugs must be unique
+ per-uploader. (An index is provided to enforce that but code should be
+ written on the python side to ensure this as well.)
+
+ - created: Date and time of when this piece of work was uploaded.
+
+ - description: Uploader-set description of this work. This can be marked
+ up with MarkDown for slight fanciness (links, boldness, italics,
+ paragraphs...)
+
+ - description_html: Rendered version of the description, run through
+ Markdown and cleaned with our cleaning tool.
+
+ - media_type: What type of media is this? Currently we only support
+ 'image' ;)
+
+ - media_data: Extra information that's media-format-dependent.
+ For example, images might contain some EXIF data that's not appropriate
+ to other formats. You might store it like:
+
+ mediaentry['media_data']['exif'] = {
+ 'manufacturer': 'CASIO',
+ 'model': 'QV-4000',
+ 'exposure_time': .659}
+
+ Alternately for video you might store:
+
+ # play length in seconds
+ mediaentry['media_data']['play_length'] = 340
+
+ ... so what's appropriate here really depends on the media type.
+
+ - plugin_data: a mapping of extra plugin information for this User.
+ Nothing uses this yet as we don't have plugins, but someday we
+ might... :)
+
+ - tags: A list of tags. Each tag is stored as a dictionary that has a key
+ for the actual name and the normalized name-as-slug, so ultimately this
+ looks like:
+ [{'name': 'Gully Gardens',
+ 'slug': 'gully-gardens'},
+ {'name': 'Castle Adventure Time?!",
+ 'slug': 'castle-adventure-time'}]
+
+ - state: What's the state of this file? Active, inactive, disabled, etc...
+ But really for now there are only two states:
+ "unprocessed": uploaded but needs to go through processing for display
+ "processed": processed and able to be displayed
+
+ - license: URI for entry's license
+
+ - queued_media_file: storage interface style filepath describing a file
+ queued for processing. This is stored in the mg_globals.queue_store
+ storage system.
+
+ - queued_task_id: celery task id. Use this to fetch the task state.
+
+ - media_files: Files relevant to this that have actually been processed
+ and are available for various types of display. Stored like:
+ {'thumb': ['dir1', 'dir2', 'pic.png'}
+
+ - attachment_files: A list of "attachment" files, ones that aren't
+ critical to this piece of media but may be usefully relevant to people
+ viewing the work. (currently unused.)
+
+ - fail_error: path to the exception raised
+ - fail_metadata:
+
+ """
+ __collection__ = 'media_entries'
+
+ structure = {
+ 'uploader': ObjectId,
+ 'title': unicode,
+ 'slug': unicode,
+ 'created': datetime.datetime,
+ 'description': unicode, # May contain markdown/up
+ 'description_html': unicode, # May contain plaintext, or HTML
+ 'media_type': unicode,
+ 'media_data': dict, # extra data relevant to this media_type
+ 'plugin_data': dict, # plugins can dump stuff here.
+ 'tags': [dict],
+ 'state': unicode,
+ 'license': unicode, # License URI
+
+ # For now let's assume there can only be one main file queued
+ # at a time
+ 'queued_media_file': [unicode],
+ 'queued_task_id': unicode,
+
+ # A dictionary of logical names to filepaths
+ 'media_files': dict,
+
+ # The following should be lists of lists, in appropriate file
+ # record form
+ 'attachment_files': list,
+
+ # If things go badly in processing things, we'll store that
+ # data here
+ 'fail_error': unicode,
+ 'fail_metadata': dict}
+
+ required_fields = [
+ 'uploader', 'created', 'media_type', 'slug']
+
+ default_values = {
+ 'created': datetime.datetime.utcnow,
+ 'state': u'unprocessed'}
+
+ def get_comments(self):
+ return self.db.MediaComment.find({
+ 'media_entry': self['_id']}).sort('created', DESCENDING)
+
+ def get_display_media(self, media_map, fetch_order=common.DISPLAY_IMAGE_FETCHING_ORDER):
+ """
+ Find the best media for display.
+
+ Args:
+ - media_map: a dict like
+ {u'image_size': [u'dir1', u'dir2', u'image.jpg']}
+ - fetch_order: the order we should try fetching images in
+
+ Returns:
+ (media_size, media_path)
+ """
+ media_sizes = media_map.keys()
+
+ for media_size in common.DISPLAY_IMAGE_FETCHING_ORDER:
+ if media_size in media_sizes:
+ return media_map[media_size]
+
+ def main_mediafile(self):
+ pass
+
+ def generate_slug(self):
+ self['slug'] = url.slugify(self['title'])
+
+ duplicate = mg_globals.database.media_entries.find_one(
+ {'slug': self['slug']})
+
+ if duplicate:
+ self['slug'] = "%s-%s" % (self['_id'], self['slug'])
+
+ def url_for_self(self, urlgen):
+ """
+ Generate an appropriate url for ourselves
+
+ Use a slug if we have one, else use our '_id'.
+ """
+ uploader = self.uploader()
+
+ if self.get('slug'):
+ return urlgen(
+ 'mediagoblin.user_pages.media_home',
+ user=uploader['username'],
+ media=self['slug'])
+ else:
+ return urlgen(
+ 'mediagoblin.user_pages.media_home',
+ user=uploader['username'],
+ media=unicode(self['_id']))
+
+ def url_to_prev(self, urlgen):
+ """
+ Provide a url to the previous entry from this user, if there is one
+ """
+ cursor = self.db.MediaEntry.find({'_id' : {"$gt": self['_id']},
+ 'uploader': self['uploader'],
+ 'state': 'processed'}).sort(
+ '_id', ASCENDING).limit(1)
+ if cursor.count():
+ return urlgen('mediagoblin.user_pages.media_home',
+ user=self.uploader()['username'],
+ media=unicode(cursor[0]['slug']))
+
+ def url_to_next(self, urlgen):
+ """
+ Provide a url to the next entry from this user, if there is one
+ """
+ cursor = self.db.MediaEntry.find({'_id' : {"$lt": self['_id']},
+ 'uploader': self['uploader'],
+ 'state': 'processed'}).sort(
+ '_id', DESCENDING).limit(1)
+
+ if cursor.count():
+ return urlgen('mediagoblin.user_pages.media_home',
+ user=self.uploader()['username'],
+ media=unicode(cursor[0]['slug']))
+
+ def uploader(self):
+ return self.db.User.find_one({'_id': self['uploader']})
+
+ def get_fail_exception(self):
+ """
+ Get the exception that's appropriate for this error
+ """
+ if self['fail_error']:
+ return common.import_component(self['fail_error'])
+
+ def get_license_data(self):
+ """Return license dict for requested license"""
+ return licenses.SUPPORTED_LICENSES[self['license']]
+
+
+class MediaComment(Document):
+ """
+ A comment on a MediaEntry.
+
+ Structure:
+ - media_entry: The media entry this comment is attached to
+ - author: user who posted this comment
+ - created: when the comment was created
+ - content: plaintext (but markdown'able) version of the comment's content.
+ - content_html: the actual html-rendered version of the comment displayed.
+ Run through Markdown and the HTML cleaner.
+ """
+
+ __collection__ = 'media_comments'
+
+ structure = {
+ 'media_entry': ObjectId,
+ 'author': ObjectId,
+ 'created': datetime.datetime,
+ 'content': unicode,
+ 'content_html': unicode}
+
+ required_fields = [
+ 'media_entry', 'author', 'created', 'content']
+
+ default_values = {
+ 'created': datetime.datetime.utcnow}
+
+ def media_entry(self):
+ return self.db.MediaEntry.find_one({'_id': self['media_entry']})
+
+ def author(self):
+ return self.db.User.find_one({'_id': self['author']})
+
+
+REGISTER_MODELS = [
+ MediaEntry,
+ User,
+ MediaComment]
+
+
+def register_models(connection):
+ """
+ Register all models in REGISTER_MODELS with this connection.
+ """
+ connection.register(REGISTER_MODELS)
+
diff --git a/mediagoblin/db/mongo/open.py b/mediagoblin/db/mongo/open.py
new file mode 100644
index 00000000..48c909d9
--- /dev/null
+++ b/mediagoblin/db/mongo/open.py
@@ -0,0 +1,78 @@
+# GNU MediaGoblin -- federated, autonomous media hosting
+# Copyright (C) 2011 MediaGoblin contributors. See AUTHORS.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import pymongo
+import mongokit
+from paste.deploy.converters import asint
+from mediagoblin.db.mongo import models
+from mediagoblin.db.mongo.util import MigrationManager
+
+
+def connect_database_from_config(app_config, use_pymongo=False):
+ """
+ Connect to the main database, take config from app_config
+
+ Optionally use pymongo instead of mongokit for the connection.
+ """
+ port = app_config.get('db_port')
+ if port:
+ port = asint(port)
+
+ if use_pymongo:
+ connection = pymongo.Connection(
+ app_config.get('db_host'), port)
+ else:
+ connection = mongokit.Connection(
+ app_config.get('db_host'), port)
+ return connection
+
+
+def setup_connection_and_db_from_config(app_config, use_pymongo=False):
+ """
+ Setup connection and database from config.
+
+ Optionally use pymongo instead of mongokit.
+ """
+ connection = connect_database_from_config(app_config, use_pymongo)
+ database_path = app_config['db_name']
+ db = connection[database_path]
+
+ if not use_pymongo:
+ models.register_models(connection)
+
+ return (connection, db)
+
+
+def check_db_migrations_current(db):
+ # This MUST be imported so as to set up the appropriate migrations!
+ from mediagoblin.db.mongo import migrations
+
+ # Init the migration number if necessary
+ migration_manager = MigrationManager(db)
+ migration_manager.install_migration_version_if_missing()
+
+ # Tiny hack to warn user if our migration is out of date
+ if not migration_manager.database_at_latest_migration():
+ db_migration_num = migration_manager.database_current_migration()
+ latest_migration_num = migration_manager.latest_migration()
+ if db_migration_num < latest_migration_num:
+ print (
+ "*WARNING:* Your migrations are out of date, "
+ "maybe run ./bin/gmg migrate?")
+ elif db_migration_num > latest_migration_num:
+ print (
+ "*WARNING:* Your migrations are out of date... "
+ "in fact they appear to be from the future?!")
diff --git a/mediagoblin/db/mongo/util.py b/mediagoblin/db/mongo/util.py
new file mode 100644
index 00000000..e2065693
--- /dev/null
+++ b/mediagoblin/db/mongo/util.py
@@ -0,0 +1,292 @@
+# GNU MediaGoblin -- federated, autonomous media hosting
+# Copyright (C) 2011 MediaGoblin contributors. See AUTHORS.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+"""
+Utilities for database operations.
+
+Some note on migration and indexing tools:
+
+We store information about what the state of the database is in the
+'mediagoblin' document of the 'app_metadata' collection. Keys in that
+document relevant to here:
+
+ - 'migration_number': The integer representing the current state of
+ the migrations
+"""
+
+import copy
+
+# Imports that other modules might use
+from pymongo import ASCENDING, DESCENDING
+from pymongo.errors import InvalidId
+from mongokit import ObjectId
+
+from mediagoblin.db.mongo.indexes import ACTIVE_INDEXES, DEPRECATED_INDEXES
+
+
+################
+# Indexing tools
+################
+
+
+def add_new_indexes(database, active_indexes=ACTIVE_INDEXES):
+ """
+ Add any new indexes to the database.
+
+ Args:
+ - database: pymongo or mongokit database instance.
+ - active_indexes: indexes to possibly add in the pattern of:
+ {'collection_name': {
+ 'identifier': {
+ 'index': [index_foo_goes_here],
+ 'unique': True}}
+ where 'index' is the index to add and all other options are
+ arguments for collection.create_index.
+
+ Returns:
+ A list of indexes added in form ('collection', 'index_name')
+ """
+ indexes_added = []
+
+ for collection_name, indexes in active_indexes.iteritems():
+ collection = database[collection_name]
+ collection_indexes = collection.index_information().keys()
+
+ for index_name, index_data in indexes.iteritems():
+ if not index_name in collection_indexes:
+ # Get a copy actually so we don't modify the actual
+ # structure
+ index_data = copy.copy(index_data)
+ index = index_data.pop('index')
+ collection.create_index(
+ index, name=index_name, **index_data)
+
+ indexes_added.append((collection_name, index_name))
+
+ return indexes_added
+
+
+def remove_deprecated_indexes(database, deprecated_indexes=DEPRECATED_INDEXES):
+ """
+ Remove any deprecated indexes from the database.
+
+ Args:
+ - database: pymongo or mongokit database instance.
+ - deprecated_indexes: the indexes to deprecate in the pattern of:
+ {'collection_name': {
+ 'identifier': {
+ 'index': [index_foo_goes_here],
+ 'unique': True}}
+
+ (... although we really only need the 'identifier' here, as the
+ rest of the information isn't used in this case. But it's kept
+ around so we can remember what it was)
+
+ Returns:
+ A list of indexes removed in form ('collection', 'index_name')
+ """
+ indexes_removed = []
+
+ for collection_name, indexes in deprecated_indexes.iteritems():
+ collection = database[collection_name]
+ collection_indexes = collection.index_information().keys()
+
+ for index_name, index_data in indexes.iteritems():
+ if index_name in collection_indexes:
+ collection.drop_index(index_name)
+
+ indexes_removed.append((collection_name, index_name))
+
+ return indexes_removed
+
+
+#################
+# Migration tools
+#################
+
+# The default migration registry...
+#
+# Don't set this yourself! RegisterMigration will automatically fill
+# this with stuff via decorating methods in migrations.py
+
+class MissingCurrentMigration(Exception):
+ pass
+
+
+MIGRATIONS = {}
+
+
+class RegisterMigration(object):
+ """
+ Tool for registering migrations
+
+ Call like:
+
+ @RegisterMigration(33)
+ def update_dwarves(database):
+ [...]
+
+ This will register your migration with the default migration
+ registry. Alternately, to specify a very specific
+ migration_registry, you can pass in that as the second argument.
+
+ Note, the number of your migration should NEVER be 0 or less than
+ 0. 0 is the default "no migrations" state!
+ """
+ def __init__(self, migration_number, migration_registry=MIGRATIONS):
+ assert migration_number > 0, "Migration number must be > 0!"
+ assert migration_number not in migration_registry, \
+ "Duplicate migration numbers detected! That's not allowed!"
+
+ self.migration_number = migration_number
+ self.migration_registry = migration_registry
+
+ def __call__(self, migration):
+ self.migration_registry[self.migration_number] = migration
+ return migration
+
+
+class MigrationManager(object):
+ """
+ Migration handling tool.
+
+ Takes information about a database, lets you update the database
+ to the latest migrations, etc.
+ """
+ def __init__(self, database, migration_registry=MIGRATIONS):
+ """
+ Args:
+ - database: database we're going to migrate
+ - migration_registry: where we should find all migrations to
+ run
+ """
+ self.database = database
+ self.migration_registry = migration_registry
+ self._sorted_migrations = None
+
+ def _ensure_current_migration_record(self):
+ """
+ If there isn't a database[u'app_metadata'] mediagoblin entry
+ with the 'current_migration', throw an error.
+ """
+ if self.database_current_migration() is None:
+ raise MissingCurrentMigration(
+ "Tried to call function which requires "
+ "'current_migration' set in database")
+
+ @property
+ def sorted_migrations(self):
+ """
+ Sort migrations if necessary and store in self._sorted_migrations
+ """
+ if not self._sorted_migrations:
+ self._sorted_migrations = sorted(
+ self.migration_registry.items(),
+ # sort on the key... the migration number
+ key=lambda migration_tuple: migration_tuple[0])
+
+ return self._sorted_migrations
+
+ def latest_migration(self):
+ """
+ Return a migration number for the latest migration, or 0 if
+ there are no migrations.
+ """
+ if self.sorted_migrations:
+ return self.sorted_migrations[-1][0]
+ else:
+ # If no migrations have been set, we start at 0.
+ return 0
+
+ def set_current_migration(self, migration_number):
+ """
+ Set the migration in the database to migration_number
+ """
+ # Add the mediagoblin migration if necessary
+ self.database[u'app_metadata'].update(
+ {u'_id': u'mediagoblin'},
+ {u'$set': {u'current_migration': migration_number}},
+ upsert=True)
+
+ def install_migration_version_if_missing(self):
+ """
+ Sets the migration to the latest version if no migration
+ version at all is set.
+ """
+ mgoblin_metadata = self.database[u'app_metadata'].find_one(
+ {u'_id': u'mediagoblin'})
+ if not mgoblin_metadata:
+ latest_migration = self.latest_migration()
+ self.set_current_migration(latest_migration)
+
+ def database_current_migration(self):
+ """
+ Return the current migration in the database.
+ """
+ mgoblin_metadata = self.database[u'app_metadata'].find_one(
+ {u'_id': u'mediagoblin'})
+ if not mgoblin_metadata:
+ return None
+ else:
+ return mgoblin_metadata[u'current_migration']
+
+ def database_at_latest_migration(self):
+ """
+ See if the database is at the latest migration.
+ Returns a boolean.
+ """
+ current_migration = self.database_current_migration()
+ return current_migration == self.latest_migration()
+
+ def migrations_to_run(self):
+ """
+ Get a list of migrations to run still, if any.
+
+ Note that calling this will set your migration version to the
+ latest version if it isn't installed to anything yet!
+ """
+ self._ensure_current_migration_record()
+
+ db_current_migration = self.database_current_migration()
+
+ return [
+ (migration_number, migration_func)
+ for migration_number, migration_func in self.sorted_migrations
+ if migration_number > db_current_migration]
+
+ def migrate_new(self, pre_callback=None, post_callback=None):
+ """
+ Run all migrations.
+
+ Includes two optional args:
+ - pre_callback: if called, this is a callback on something to
+ run pre-migration. Takes (migration_number, migration_func)
+ as arguments
+ - pre_callback: if called, this is a callback on something to
+ run post-migration. Takes (migration_number, migration_func)
+ as arguments
+ """
+ # If we aren't set to any version number, presume we're at the
+ # latest (which means we'll do nothing here...)
+ self.install_migration_version_if_missing()
+
+ for migration_number, migration_func in self.migrations_to_run():
+ if pre_callback:
+ pre_callback(migration_number, migration_func)
+ migration_func(self.database)
+ self.set_current_migration(migration_number)
+ if post_callback:
+ post_callback(migration_number, migration_func)