diff options
Diffstat (limited to 'mediagoblin/db')
-rw-r--r-- | mediagoblin/db/indexes.py | 10 | ||||
-rw-r--r-- | mediagoblin/db/migrations.py | 79 | ||||
-rw-r--r-- | mediagoblin/db/models.py | 153 | ||||
-rw-r--r-- | mediagoblin/db/open.py | 36 | ||||
-rw-r--r-- | mediagoblin/db/util.py | 196 |
5 files changed, 400 insertions, 74 deletions
diff --git a/mediagoblin/db/indexes.py b/mediagoblin/db/indexes.py index d0e11311..30d43c98 100644 --- a/mediagoblin/db/indexes.py +++ b/mediagoblin/db/indexes.py @@ -45,11 +45,13 @@ REQUIRED READING: To remove deprecated indexes ---------------------------- -Removing deprecated indexes is easier, just do: +Removing deprecated indexes is the same, just move the index into the +deprecated indexes mapping. -INACTIVE_INDEXES = { - 'collection_name': [ - 'deprecated_index_identifier1', 'deprecated_index_identifier2']} +DEPRECATED_INDEXES = { + 'collection_name': { + 'deprecated_index_identifier1': { + 'index': [index_foo_goes_here]}} ... etc. diff --git a/mediagoblin/db/migrations.py b/mediagoblin/db/migrations.py index 712f8ab4..6a8ebcf9 100644 --- a/mediagoblin/db/migrations.py +++ b/mediagoblin/db/migrations.py @@ -14,56 +14,41 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. +from mediagoblin.db.util import RegisterMigration from mediagoblin.util import cleaned_markdown_conversion -from mongokit import DocumentMigration +# Please see mediagoblin/tests/test_migrations.py for some examples of +# basic migrations. -class MediaEntryMigration(DocumentMigration): - def allmigration01_uploader_to_reference(self): - """ - Old MediaEntry['uploader'] accidentally embedded the User instead - of referencing it. Fix that! - """ - # uploader is an associative array - self.target = {'uploader': {'$type': 3}} - if not self.status: - for doc in self.collection.find(self.target): - self.update = { - '$set': { - 'uploader': doc['uploader']['_id']}} - self.collection.update( - self.target, self.update, multi=True, safe=True) - def allmigration02_add_description_html(self): - """ - Now that we can have rich descriptions via Markdown, we should - update all existing entries to record the rich description versions. - """ - self.target = {'description_html': {'$exists': False}, - 'description': {'$exists': True}} +@RegisterMigration(1) +def user_add_bio_html(database): + """ + Users now have richtext bios via Markdown, reflect appropriately. + """ + collection = database['users'] - if not self.status: - for doc in self.collection.find(self.target): - self.update = { - '$set': { - 'description_html': cleaned_markdown_conversion( - doc['description'])}} - -class UserMigration(DocumentMigration): - def allmigration01_add_bio_and_url_profile(self): - """ - User can elaborate profile with home page and biography - """ - self.target = {'url': {'$exists': False}, - 'bio': {'$exists': False}} - if not self.status: - for doc in self.collection.find(self.target): - self.update = { - '$set': {'url': '', - 'bio': ''}} - self.collection.update( - self.target, self.update, multi=True, safe=True) - - -MIGRATE_CLASSES = ['MediaEntry', 'User'] + target = collection.find( + {'bio_html': {'$exists': False}}) + + for document in target: + document['bio_html'] = cleaned_markdown_conversion( + document['bio']) + collection.save(document) + + +@RegisterMigration(2) +def mediaentry_mediafiles_main_to_original(database): + """ + Rename "main" media file to "original". + """ + collection = database['media_entries'] + target = collection.find( + {'media_files.main': {'$exists': True}}) + + for document in target: + original = document['media_files'].pop('main') + document['media_files']['original'] = original + + collection.save(document) diff --git a/mediagoblin/db/models.py b/mediagoblin/db/models.py index 8fcbb208..4ef2d928 100644 --- a/mediagoblin/db/models.py +++ b/mediagoblin/db/models.py @@ -16,13 +16,16 @@ import datetime, uuid -from mongokit import Document, Set +from mongokit import Document from mediagoblin import util from mediagoblin.auth import lib as auth_lib from mediagoblin import mg_globals from mediagoblin.db import migrations from mediagoblin.db.util import ASCENDING, DESCENDING, ObjectId +from mediagoblin.util import Pagination +from mediagoblin.util import DISPLAY_IMAGE_FETCHING_ORDER + ################### # Custom validators @@ -34,6 +37,32 @@ from mediagoblin.db.util import ASCENDING, DESCENDING, ObjectId class User(Document): + """ + A user of MediaGoblin. + + Structure: + - username: The username of this user, should be unique to this instance. + - email: Email address of this user + - created: When the user was created + - plugin_data: a mapping of extra plugin information for this User. + Nothing uses this yet as we don't have plugins, but someday we + might... :) + - pw_hash: Hashed version of user's password. + - email_verified: Whether or not the user has verified their email or not. + Most parts of the site are disabled for users who haven't yet. + - status: whether or not the user is active, etc. Currently only has two + values, 'needs_email_verification' or 'active'. (In the future, maybe + we'll change this to a boolean with a key of 'active' and have a + separate field for a reason the user's been disabled if that's + appropriate... email_verified is already separate, after all.) + - verification_key: If the user is awaiting email verification, the user + will have to provide this key (which will be encoded in the presented + URL) in order to confirm their email as active. + - is_admin: Whether or not this user is an administrator or not. + - url: this user's personal webpage/website, if appropriate. + - bio: biography of this user (plaintext, in markdown) + - bio_html: biography of the user converted to proper HTML. + """ __collection__ = 'users' structure = { @@ -47,7 +76,8 @@ class User(Document): 'verification_key': unicode, 'is_admin': bool, 'url' : unicode, - 'bio' : unicode + 'bio' : unicode, # May contain markdown + 'bio_html': unicode, # May contain plaintext, or HTML } required_fields = ['username', 'created', 'pw_hash', 'email'] @@ -58,8 +88,6 @@ class User(Document): 'status': u'needs_email_verification', 'verification_key': lambda: unicode(uuid.uuid4()), 'is_admin': False} - - migration_handler = migrations.UserMigration def check_login(self, password): """ @@ -70,6 +98,80 @@ class User(Document): class MediaEntry(Document): + """ + Record of a piece of media. + + Structure: + - uploader: A reference to a User who uploaded this. + + - title: Title of this work + + - slug: A normalized "slug" which can be used as part of a URL to retrieve + this work, such as 'my-works-name-in-slug-form' may be viewable by + 'http://mg.example.org/u/username/m/my-works-name-in-slug-form/' + Note that since URLs are constructed this way, slugs must be unique + per-uploader. (An index is provided to enforce that but code should be + written on the python side to ensure this as well.) + + - created: Date and time of when this piece of work was uploaded. + + - description: Uploader-set description of this work. This can be marked + up with MarkDown for slight fanciness (links, boldness, italics, + paragraphs...) + + - description_html: Rendered version of the description, run through + Markdown and cleaned with our cleaning tool. + + - media_type: What type of media is this? Currently we only support + 'image' ;) + + - media_data: Extra information that's media-format-dependent. + For example, images might contain some EXIF data that's not appropriate + to other formats. You might store it like: + + mediaentry['media_data']['exif'] = { + 'manufacturer': 'CASIO', + 'model': 'QV-4000', + 'exposure_time': .659} + + Alternately for video you might store: + + # play length in seconds + mediaentry['media_data']['play_length'] = 340 + + ... so what's appropriate here really depends on the media type. + + - plugin_data: a mapping of extra plugin information for this User. + Nothing uses this yet as we don't have plugins, but someday we + might... :) + + - tags: A list of tags. Each tag is stored as a dictionary that has a key + for the actual name and the normalized name-as-slug, so ultimately this + looks like: + [{'name': 'Gully Gardens', + 'slug': 'gully-gardens'}, + {'name': 'Castle Adventure Time?!", + 'slug': 'castle-adventure-time'}] + + - state: What's the state of this file? Active, inactive, disabled, etc... + But really for now there are only two states: + "unprocessed": uploaded but needs to go through processing for display + "processed": processed and able to be displayed + + - queued_media_file: storage interface style filepath describing a file + queued for processing. This is stored in the mg_globals.queue_store + storage system. + + - media_files: Files relevant to this that have actually been processed + and are available for various types of display. Stored like: + {'thumb': ['dir1', 'dir2', 'pic.png'} + + - attachment_files: A list of "attachment" files, ones that aren't + critical to this piece of media but may be usefully relevant to people + viewing the work. (currently unused.) + + - thumbnail_file: Deprecated... we should remove this ;) + """ __collection__ = 'media_entries' structure = { @@ -106,12 +208,28 @@ class MediaEntry(Document): 'created': datetime.datetime.utcnow, 'state': u'unprocessed'} - migration_handler = migrations.MediaEntryMigration - def get_comments(self): return self.db.MediaComment.find({ 'media_entry': self['_id']}).sort('created', DESCENDING) + def get_display_media(self, media_map, fetch_order=DISPLAY_IMAGE_FETCHING_ORDER): + """ + Find the best media for display. + + Args: + - media_map: a dict like + {u'image_size': [u'dir1', u'dir2', u'image.jpg']} + - fetch_order: the order we should try fetching images in + + Returns: + (media_size, media_path) + """ + media_sizes = media_map.keys() + + for media_size in DISPLAY_IMAGE_FETCHING_ORDER: + if media_size in media_sizes: + return media_map[media_size] + def main_mediafile(self): pass @@ -120,7 +238,7 @@ class MediaEntry(Document): duplicate = mg_globals.database.media_entries.find_one( {'slug': self['slug']}) - + if duplicate: self['slug'] = "%s-%s" % (self['_id'], self['slug']) @@ -142,12 +260,12 @@ class MediaEntry(Document): 'mediagoblin.user_pages.media_home', user=uploader['username'], media=unicode(self['_id'])) - + def url_to_prev(self, urlgen): """ Provide a url to the previous entry from this user, if there is one """ - cursor = self.db.MediaEntry.find({'_id' : {"$gt": self['_id']}, + cursor = self.db.MediaEntry.find({'_id' : {"$gt": self['_id']}, 'uploader': self['uploader'], 'state': 'processed'}).sort( '_id', ASCENDING).limit(1) @@ -155,12 +273,12 @@ class MediaEntry(Document): return urlgen('mediagoblin.user_pages.media_home', user=self.uploader()['username'], media=unicode(cursor[0]['slug'])) - + def url_to_next(self, urlgen): """ Provide a url to the next entry from this user, if there is one """ - cursor = self.db.MediaEntry.find({'_id' : {"$lt": self['_id']}, + cursor = self.db.MediaEntry.find({'_id' : {"$lt": self['_id']}, 'uploader': self['uploader'], 'state': 'processed'}).sort( '_id', DESCENDING).limit(1) @@ -175,6 +293,18 @@ class MediaEntry(Document): class MediaComment(Document): + """ + A comment on a MediaEntry. + + Structure: + - media_entry: The media entry this comment is attached to + - author: user who posted this comment + - created: when the comment was created + - content: plaintext (but markdown'able) version of the comment's content. + - content_html: the actual html-rendered version of the comment displayed. + Run through Markdown and the HTML cleaner. + """ + __collection__ = 'media_comments' structure = { @@ -196,6 +326,7 @@ class MediaComment(Document): def author(self): return self.db.User.find_one({'_id': self['author']}) + REGISTER_MODELS = [ MediaEntry, User, diff --git a/mediagoblin/db/open.py b/mediagoblin/db/open.py index cae33394..e5fde6f9 100644 --- a/mediagoblin/db/open.py +++ b/mediagoblin/db/open.py @@ -14,24 +14,42 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. +import pymongo import mongokit from paste.deploy.converters import asint from mediagoblin.db import models -def connect_database_from_config(app_config): - """Connect to the main database, take config from app_config""" +def connect_database_from_config(app_config, use_pymongo=False): + """ + Connect to the main database, take config from app_config + + Optionally use pymongo instead of mongokit for the connection. + """ port = app_config.get('db_port') if port: port = asint(port) - connection = mongokit.Connection( - app_config.get('db_host'), port) + + if use_pymongo: + connection = pymongo.Connection( + app_config.get('db_host'), port) + else: + connection = mongokit.Connection( + app_config.get('db_host'), port) return connection -def setup_connection_and_db_from_config(app_config): - connection = connect_database_from_config(app_config) - database_path = app_config.get('db_name', 'mediagoblin') + +def setup_connection_and_db_from_config(app_config, use_pymongo=False): + """ + Setup connection and database from config. + + Optionally use pymongo instead of mongokit. + """ + connection = connect_database_from_config(app_config, use_pymongo) + database_path = app_config['db_name'] db = connection[database_path] - models.register_models(connection) - # Could configure indexes here on db + + if not use_pymongo: + models.register_models(connection) + return (connection, db) diff --git a/mediagoblin/db/util.py b/mediagoblin/db/util.py index 70c37945..0f3220d2 100644 --- a/mediagoblin/db/util.py +++ b/mediagoblin/db/util.py @@ -37,6 +37,11 @@ from mongokit import ObjectId from mediagoblin.db.indexes import ACTIVE_INDEXES, DEPRECATED_INDEXES +################ +# Indexing tools +################ + + def add_new_indexes(database, active_indexes=ACTIVE_INDEXES): """ Add any new indexes to the database. @@ -81,21 +86,206 @@ def remove_deprecated_indexes(database, deprecated_indexes=DEPRECATED_INDEXES): Args: - database: pymongo or mongokit database instance. - deprecated_indexes: the indexes to deprecate in the pattern of: - {'collection': ['index_identifier1', 'index_identifier2']} + {'collection_name': { + 'identifier': { + 'index': [index_foo_goes_here], + 'unique': True}} + + (... although we really only need the 'identifier' here, as the + rest of the information isn't used in this case. But it's kept + around so we can remember what it was) Returns: A list of indexes removed in form ('collection', 'index_name') """ indexes_removed = [] - for collection_name, index_names in deprecated_indexes.iteritems(): + for collection_name, indexes in deprecated_indexes.iteritems(): collection = database[collection_name] collection_indexes = collection.index_information().keys() - for index_name in index_names: + for index_name, index_data in indexes.iteritems(): if index_name in collection_indexes: collection.drop_index(index_name) indexes_removed.append((collection_name, index_name)) return indexes_removed + + +################# +# Migration tools +################# + +# The default migration registry... +# +# Don't set this yourself! RegisterMigration will automatically fill +# this with stuff via decorating methods in migrations.py + +class MissingCurrentMigration(Exception): pass + + +MIGRATIONS = {} + + +class RegisterMigration(object): + """ + Tool for registering migrations + + Call like: + + @RegisterMigration(33) + def update_dwarves(database): + [...] + + This will register your migration with the default migration + registry. Alternately, to specify a very specific + migration_registry, you can pass in that as the second argument. + + Note, the number of your migration should NEVER be 0 or less than + 0. 0 is the default "no migrations" state! + """ + def __init__(self, migration_number, migration_registry=MIGRATIONS): + assert migration_number > 0, "Migration number must be > 0!" + assert not migration_registry.has_key(migration_number), \ + "Duplicate migration numbers detected! That's not allowed!" + + self.migration_number = migration_number + self.migration_registry = migration_registry + + def __call__(self, migration): + self.migration_registry[self.migration_number] = migration + return migration + + +class MigrationManager(object): + """ + Migration handling tool. + + Takes information about a database, lets you update the database + to the latest migrations, etc. + """ + def __init__(self, database, migration_registry=MIGRATIONS): + """ + Args: + - database: database we're going to migrate + - migration_registry: where we should find all migrations to + run + """ + self.database = database + self.migration_registry = migration_registry + self._sorted_migrations = None + + def _ensure_current_migration_record(self): + """ + If there isn't a database[u'app_metadata'] mediagoblin entry + with the 'current_migration', throw an error. + """ + if self.database_current_migration() is None: + raise MissingCurrentMigration( + "Tried to call function which requires " + "'current_migration' set in database") + + @property + def sorted_migrations(self): + """ + Sort migrations if necessary and store in self._sorted_migrations + """ + if not self._sorted_migrations: + self._sorted_migrations = sorted( + self.migration_registry.items(), + # sort on the key... the migration number + key=lambda migration_tuple: migration_tuple[0]) + + return self._sorted_migrations + + def latest_migration(self): + """ + Return a migration number for the latest migration, or 0 if + there are no migrations. + """ + if self.sorted_migrations: + return self.sorted_migrations[-1][0] + else: + # If no migrations have been set, we start at 0. + return 0 + + def set_current_migration(self, migration_number): + """ + Set the migration in the database to migration_number + """ + # Add the mediagoblin migration if necessary + self.database[u'app_metadata'].update( + {u'_id': u'mediagoblin'}, + {u'$set': {u'current_migration': migration_number}}, + upsert=True) + + def install_migration_version_if_missing(self): + """ + Sets the migration to the latest version if no migration + version at all is set. + """ + mgoblin_metadata = self.database[u'app_metadata'].find_one( + {u'_id': u'mediagoblin'}) + if not mgoblin_metadata: + latest_migration = self.latest_migration() + self.set_current_migration(latest_migration) + + def database_current_migration(self): + """ + Return the current migration in the database. + """ + mgoblin_metadata = self.database[u'app_metadata'].find_one( + {u'_id': u'mediagoblin'}) + if not mgoblin_metadata: + return None + else: + return mgoblin_metadata[u'current_migration'] + + def database_at_latest_migration(self): + """ + See if the database is at the latest migration. + Returns a boolean. + """ + current_migration = self.database_current_migration() + return current_migration == self.latest_migration() + + def migrations_to_run(self): + """ + Get a list of migrations to run still, if any. + + Note that calling this will set your migration version to the + latest version if it isn't installed to anything yet! + """ + self._ensure_current_migration_record() + + db_current_migration = self.database_current_migration() + + return [ + (migration_number, migration_func) + for migration_number, migration_func in self.sorted_migrations + if migration_number > db_current_migration] + + def migrate_new(self, pre_callback=None, post_callback=None): + """ + Run all migrations. + + Includes two optional args: + - pre_callback: if called, this is a callback on something to + run pre-migration. Takes (migration_number, migration_func) + as arguments + - pre_callback: if called, this is a callback on something to + run post-migration. Takes (migration_number, migration_func) + as arguments + """ + # If we aren't set to any version number, presume we're at the + # latest (which means we'll do nothing here...) + self.install_migration_version_if_missing() + + for migration_number, migration_func in self.migrations_to_run(): + if pre_callback: + pre_callback(migration_number, migration_func) + migration_func(self.database) + self.set_current_migration(migration_number) + if post_callback: + post_callback(migration_number, migration_func) |