aboutsummaryrefslogtreecommitdiffstats
path: root/mediagoblin/db
diff options
context:
space:
mode:
Diffstat (limited to 'mediagoblin/db')
-rw-r--r--mediagoblin/db/indexes.py10
-rw-r--r--mediagoblin/db/migrations.py79
-rw-r--r--mediagoblin/db/models.py153
-rw-r--r--mediagoblin/db/open.py36
-rw-r--r--mediagoblin/db/util.py196
5 files changed, 400 insertions, 74 deletions
diff --git a/mediagoblin/db/indexes.py b/mediagoblin/db/indexes.py
index d0e11311..30d43c98 100644
--- a/mediagoblin/db/indexes.py
+++ b/mediagoblin/db/indexes.py
@@ -45,11 +45,13 @@ REQUIRED READING:
To remove deprecated indexes
----------------------------
-Removing deprecated indexes is easier, just do:
+Removing deprecated indexes is the same, just move the index into the
+deprecated indexes mapping.
-INACTIVE_INDEXES = {
- 'collection_name': [
- 'deprecated_index_identifier1', 'deprecated_index_identifier2']}
+DEPRECATED_INDEXES = {
+ 'collection_name': {
+ 'deprecated_index_identifier1': {
+ 'index': [index_foo_goes_here]}}
... etc.
diff --git a/mediagoblin/db/migrations.py b/mediagoblin/db/migrations.py
index 712f8ab4..6a8ebcf9 100644
--- a/mediagoblin/db/migrations.py
+++ b/mediagoblin/db/migrations.py
@@ -14,56 +14,41 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
+from mediagoblin.db.util import RegisterMigration
from mediagoblin.util import cleaned_markdown_conversion
-from mongokit import DocumentMigration
+# Please see mediagoblin/tests/test_migrations.py for some examples of
+# basic migrations.
-class MediaEntryMigration(DocumentMigration):
- def allmigration01_uploader_to_reference(self):
- """
- Old MediaEntry['uploader'] accidentally embedded the User instead
- of referencing it. Fix that!
- """
- # uploader is an associative array
- self.target = {'uploader': {'$type': 3}}
- if not self.status:
- for doc in self.collection.find(self.target):
- self.update = {
- '$set': {
- 'uploader': doc['uploader']['_id']}}
- self.collection.update(
- self.target, self.update, multi=True, safe=True)
- def allmigration02_add_description_html(self):
- """
- Now that we can have rich descriptions via Markdown, we should
- update all existing entries to record the rich description versions.
- """
- self.target = {'description_html': {'$exists': False},
- 'description': {'$exists': True}}
+@RegisterMigration(1)
+def user_add_bio_html(database):
+ """
+ Users now have richtext bios via Markdown, reflect appropriately.
+ """
+ collection = database['users']
- if not self.status:
- for doc in self.collection.find(self.target):
- self.update = {
- '$set': {
- 'description_html': cleaned_markdown_conversion(
- doc['description'])}}
-
-class UserMigration(DocumentMigration):
- def allmigration01_add_bio_and_url_profile(self):
- """
- User can elaborate profile with home page and biography
- """
- self.target = {'url': {'$exists': False},
- 'bio': {'$exists': False}}
- if not self.status:
- for doc in self.collection.find(self.target):
- self.update = {
- '$set': {'url': '',
- 'bio': ''}}
- self.collection.update(
- self.target, self.update, multi=True, safe=True)
-
-
-MIGRATE_CLASSES = ['MediaEntry', 'User']
+ target = collection.find(
+ {'bio_html': {'$exists': False}})
+
+ for document in target:
+ document['bio_html'] = cleaned_markdown_conversion(
+ document['bio'])
+ collection.save(document)
+
+
+@RegisterMigration(2)
+def mediaentry_mediafiles_main_to_original(database):
+ """
+ Rename "main" media file to "original".
+ """
+ collection = database['media_entries']
+ target = collection.find(
+ {'media_files.main': {'$exists': True}})
+
+ for document in target:
+ original = document['media_files'].pop('main')
+ document['media_files']['original'] = original
+
+ collection.save(document)
diff --git a/mediagoblin/db/models.py b/mediagoblin/db/models.py
index 8fcbb208..4ef2d928 100644
--- a/mediagoblin/db/models.py
+++ b/mediagoblin/db/models.py
@@ -16,13 +16,16 @@
import datetime, uuid
-from mongokit import Document, Set
+from mongokit import Document
from mediagoblin import util
from mediagoblin.auth import lib as auth_lib
from mediagoblin import mg_globals
from mediagoblin.db import migrations
from mediagoblin.db.util import ASCENDING, DESCENDING, ObjectId
+from mediagoblin.util import Pagination
+from mediagoblin.util import DISPLAY_IMAGE_FETCHING_ORDER
+
###################
# Custom validators
@@ -34,6 +37,32 @@ from mediagoblin.db.util import ASCENDING, DESCENDING, ObjectId
class User(Document):
+ """
+ A user of MediaGoblin.
+
+ Structure:
+ - username: The username of this user, should be unique to this instance.
+ - email: Email address of this user
+ - created: When the user was created
+ - plugin_data: a mapping of extra plugin information for this User.
+ Nothing uses this yet as we don't have plugins, but someday we
+ might... :)
+ - pw_hash: Hashed version of user's password.
+ - email_verified: Whether or not the user has verified their email or not.
+ Most parts of the site are disabled for users who haven't yet.
+ - status: whether or not the user is active, etc. Currently only has two
+ values, 'needs_email_verification' or 'active'. (In the future, maybe
+ we'll change this to a boolean with a key of 'active' and have a
+ separate field for a reason the user's been disabled if that's
+ appropriate... email_verified is already separate, after all.)
+ - verification_key: If the user is awaiting email verification, the user
+ will have to provide this key (which will be encoded in the presented
+ URL) in order to confirm their email as active.
+ - is_admin: Whether or not this user is an administrator or not.
+ - url: this user's personal webpage/website, if appropriate.
+ - bio: biography of this user (plaintext, in markdown)
+ - bio_html: biography of the user converted to proper HTML.
+ """
__collection__ = 'users'
structure = {
@@ -47,7 +76,8 @@ class User(Document):
'verification_key': unicode,
'is_admin': bool,
'url' : unicode,
- 'bio' : unicode
+ 'bio' : unicode, # May contain markdown
+ 'bio_html': unicode, # May contain plaintext, or HTML
}
required_fields = ['username', 'created', 'pw_hash', 'email']
@@ -58,8 +88,6 @@ class User(Document):
'status': u'needs_email_verification',
'verification_key': lambda: unicode(uuid.uuid4()),
'is_admin': False}
-
- migration_handler = migrations.UserMigration
def check_login(self, password):
"""
@@ -70,6 +98,80 @@ class User(Document):
class MediaEntry(Document):
+ """
+ Record of a piece of media.
+
+ Structure:
+ - uploader: A reference to a User who uploaded this.
+
+ - title: Title of this work
+
+ - slug: A normalized "slug" which can be used as part of a URL to retrieve
+ this work, such as 'my-works-name-in-slug-form' may be viewable by
+ 'http://mg.example.org/u/username/m/my-works-name-in-slug-form/'
+ Note that since URLs are constructed this way, slugs must be unique
+ per-uploader. (An index is provided to enforce that but code should be
+ written on the python side to ensure this as well.)
+
+ - created: Date and time of when this piece of work was uploaded.
+
+ - description: Uploader-set description of this work. This can be marked
+ up with MarkDown for slight fanciness (links, boldness, italics,
+ paragraphs...)
+
+ - description_html: Rendered version of the description, run through
+ Markdown and cleaned with our cleaning tool.
+
+ - media_type: What type of media is this? Currently we only support
+ 'image' ;)
+
+ - media_data: Extra information that's media-format-dependent.
+ For example, images might contain some EXIF data that's not appropriate
+ to other formats. You might store it like:
+
+ mediaentry['media_data']['exif'] = {
+ 'manufacturer': 'CASIO',
+ 'model': 'QV-4000',
+ 'exposure_time': .659}
+
+ Alternately for video you might store:
+
+ # play length in seconds
+ mediaentry['media_data']['play_length'] = 340
+
+ ... so what's appropriate here really depends on the media type.
+
+ - plugin_data: a mapping of extra plugin information for this User.
+ Nothing uses this yet as we don't have plugins, but someday we
+ might... :)
+
+ - tags: A list of tags. Each tag is stored as a dictionary that has a key
+ for the actual name and the normalized name-as-slug, so ultimately this
+ looks like:
+ [{'name': 'Gully Gardens',
+ 'slug': 'gully-gardens'},
+ {'name': 'Castle Adventure Time?!",
+ 'slug': 'castle-adventure-time'}]
+
+ - state: What's the state of this file? Active, inactive, disabled, etc...
+ But really for now there are only two states:
+ "unprocessed": uploaded but needs to go through processing for display
+ "processed": processed and able to be displayed
+
+ - queued_media_file: storage interface style filepath describing a file
+ queued for processing. This is stored in the mg_globals.queue_store
+ storage system.
+
+ - media_files: Files relevant to this that have actually been processed
+ and are available for various types of display. Stored like:
+ {'thumb': ['dir1', 'dir2', 'pic.png'}
+
+ - attachment_files: A list of "attachment" files, ones that aren't
+ critical to this piece of media but may be usefully relevant to people
+ viewing the work. (currently unused.)
+
+ - thumbnail_file: Deprecated... we should remove this ;)
+ """
__collection__ = 'media_entries'
structure = {
@@ -106,12 +208,28 @@ class MediaEntry(Document):
'created': datetime.datetime.utcnow,
'state': u'unprocessed'}
- migration_handler = migrations.MediaEntryMigration
-
def get_comments(self):
return self.db.MediaComment.find({
'media_entry': self['_id']}).sort('created', DESCENDING)
+ def get_display_media(self, media_map, fetch_order=DISPLAY_IMAGE_FETCHING_ORDER):
+ """
+ Find the best media for display.
+
+ Args:
+ - media_map: a dict like
+ {u'image_size': [u'dir1', u'dir2', u'image.jpg']}
+ - fetch_order: the order we should try fetching images in
+
+ Returns:
+ (media_size, media_path)
+ """
+ media_sizes = media_map.keys()
+
+ for media_size in DISPLAY_IMAGE_FETCHING_ORDER:
+ if media_size in media_sizes:
+ return media_map[media_size]
+
def main_mediafile(self):
pass
@@ -120,7 +238,7 @@ class MediaEntry(Document):
duplicate = mg_globals.database.media_entries.find_one(
{'slug': self['slug']})
-
+
if duplicate:
self['slug'] = "%s-%s" % (self['_id'], self['slug'])
@@ -142,12 +260,12 @@ class MediaEntry(Document):
'mediagoblin.user_pages.media_home',
user=uploader['username'],
media=unicode(self['_id']))
-
+
def url_to_prev(self, urlgen):
"""
Provide a url to the previous entry from this user, if there is one
"""
- cursor = self.db.MediaEntry.find({'_id' : {"$gt": self['_id']},
+ cursor = self.db.MediaEntry.find({'_id' : {"$gt": self['_id']},
'uploader': self['uploader'],
'state': 'processed'}).sort(
'_id', ASCENDING).limit(1)
@@ -155,12 +273,12 @@ class MediaEntry(Document):
return urlgen('mediagoblin.user_pages.media_home',
user=self.uploader()['username'],
media=unicode(cursor[0]['slug']))
-
+
def url_to_next(self, urlgen):
"""
Provide a url to the next entry from this user, if there is one
"""
- cursor = self.db.MediaEntry.find({'_id' : {"$lt": self['_id']},
+ cursor = self.db.MediaEntry.find({'_id' : {"$lt": self['_id']},
'uploader': self['uploader'],
'state': 'processed'}).sort(
'_id', DESCENDING).limit(1)
@@ -175,6 +293,18 @@ class MediaEntry(Document):
class MediaComment(Document):
+ """
+ A comment on a MediaEntry.
+
+ Structure:
+ - media_entry: The media entry this comment is attached to
+ - author: user who posted this comment
+ - created: when the comment was created
+ - content: plaintext (but markdown'able) version of the comment's content.
+ - content_html: the actual html-rendered version of the comment displayed.
+ Run through Markdown and the HTML cleaner.
+ """
+
__collection__ = 'media_comments'
structure = {
@@ -196,6 +326,7 @@ class MediaComment(Document):
def author(self):
return self.db.User.find_one({'_id': self['author']})
+
REGISTER_MODELS = [
MediaEntry,
User,
diff --git a/mediagoblin/db/open.py b/mediagoblin/db/open.py
index cae33394..e5fde6f9 100644
--- a/mediagoblin/db/open.py
+++ b/mediagoblin/db/open.py
@@ -14,24 +14,42 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
+import pymongo
import mongokit
from paste.deploy.converters import asint
from mediagoblin.db import models
-def connect_database_from_config(app_config):
- """Connect to the main database, take config from app_config"""
+def connect_database_from_config(app_config, use_pymongo=False):
+ """
+ Connect to the main database, take config from app_config
+
+ Optionally use pymongo instead of mongokit for the connection.
+ """
port = app_config.get('db_port')
if port:
port = asint(port)
- connection = mongokit.Connection(
- app_config.get('db_host'), port)
+
+ if use_pymongo:
+ connection = pymongo.Connection(
+ app_config.get('db_host'), port)
+ else:
+ connection = mongokit.Connection(
+ app_config.get('db_host'), port)
return connection
-def setup_connection_and_db_from_config(app_config):
- connection = connect_database_from_config(app_config)
- database_path = app_config.get('db_name', 'mediagoblin')
+
+def setup_connection_and_db_from_config(app_config, use_pymongo=False):
+ """
+ Setup connection and database from config.
+
+ Optionally use pymongo instead of mongokit.
+ """
+ connection = connect_database_from_config(app_config, use_pymongo)
+ database_path = app_config['db_name']
db = connection[database_path]
- models.register_models(connection)
- # Could configure indexes here on db
+
+ if not use_pymongo:
+ models.register_models(connection)
+
return (connection, db)
diff --git a/mediagoblin/db/util.py b/mediagoblin/db/util.py
index 70c37945..0f3220d2 100644
--- a/mediagoblin/db/util.py
+++ b/mediagoblin/db/util.py
@@ -37,6 +37,11 @@ from mongokit import ObjectId
from mediagoblin.db.indexes import ACTIVE_INDEXES, DEPRECATED_INDEXES
+################
+# Indexing tools
+################
+
+
def add_new_indexes(database, active_indexes=ACTIVE_INDEXES):
"""
Add any new indexes to the database.
@@ -81,21 +86,206 @@ def remove_deprecated_indexes(database, deprecated_indexes=DEPRECATED_INDEXES):
Args:
- database: pymongo or mongokit database instance.
- deprecated_indexes: the indexes to deprecate in the pattern of:
- {'collection': ['index_identifier1', 'index_identifier2']}
+ {'collection_name': {
+ 'identifier': {
+ 'index': [index_foo_goes_here],
+ 'unique': True}}
+
+ (... although we really only need the 'identifier' here, as the
+ rest of the information isn't used in this case. But it's kept
+ around so we can remember what it was)
Returns:
A list of indexes removed in form ('collection', 'index_name')
"""
indexes_removed = []
- for collection_name, index_names in deprecated_indexes.iteritems():
+ for collection_name, indexes in deprecated_indexes.iteritems():
collection = database[collection_name]
collection_indexes = collection.index_information().keys()
- for index_name in index_names:
+ for index_name, index_data in indexes.iteritems():
if index_name in collection_indexes:
collection.drop_index(index_name)
indexes_removed.append((collection_name, index_name))
return indexes_removed
+
+
+#################
+# Migration tools
+#################
+
+# The default migration registry...
+#
+# Don't set this yourself! RegisterMigration will automatically fill
+# this with stuff via decorating methods in migrations.py
+
+class MissingCurrentMigration(Exception): pass
+
+
+MIGRATIONS = {}
+
+
+class RegisterMigration(object):
+ """
+ Tool for registering migrations
+
+ Call like:
+
+ @RegisterMigration(33)
+ def update_dwarves(database):
+ [...]
+
+ This will register your migration with the default migration
+ registry. Alternately, to specify a very specific
+ migration_registry, you can pass in that as the second argument.
+
+ Note, the number of your migration should NEVER be 0 or less than
+ 0. 0 is the default "no migrations" state!
+ """
+ def __init__(self, migration_number, migration_registry=MIGRATIONS):
+ assert migration_number > 0, "Migration number must be > 0!"
+ assert not migration_registry.has_key(migration_number), \
+ "Duplicate migration numbers detected! That's not allowed!"
+
+ self.migration_number = migration_number
+ self.migration_registry = migration_registry
+
+ def __call__(self, migration):
+ self.migration_registry[self.migration_number] = migration
+ return migration
+
+
+class MigrationManager(object):
+ """
+ Migration handling tool.
+
+ Takes information about a database, lets you update the database
+ to the latest migrations, etc.
+ """
+ def __init__(self, database, migration_registry=MIGRATIONS):
+ """
+ Args:
+ - database: database we're going to migrate
+ - migration_registry: where we should find all migrations to
+ run
+ """
+ self.database = database
+ self.migration_registry = migration_registry
+ self._sorted_migrations = None
+
+ def _ensure_current_migration_record(self):
+ """
+ If there isn't a database[u'app_metadata'] mediagoblin entry
+ with the 'current_migration', throw an error.
+ """
+ if self.database_current_migration() is None:
+ raise MissingCurrentMigration(
+ "Tried to call function which requires "
+ "'current_migration' set in database")
+
+ @property
+ def sorted_migrations(self):
+ """
+ Sort migrations if necessary and store in self._sorted_migrations
+ """
+ if not self._sorted_migrations:
+ self._sorted_migrations = sorted(
+ self.migration_registry.items(),
+ # sort on the key... the migration number
+ key=lambda migration_tuple: migration_tuple[0])
+
+ return self._sorted_migrations
+
+ def latest_migration(self):
+ """
+ Return a migration number for the latest migration, or 0 if
+ there are no migrations.
+ """
+ if self.sorted_migrations:
+ return self.sorted_migrations[-1][0]
+ else:
+ # If no migrations have been set, we start at 0.
+ return 0
+
+ def set_current_migration(self, migration_number):
+ """
+ Set the migration in the database to migration_number
+ """
+ # Add the mediagoblin migration if necessary
+ self.database[u'app_metadata'].update(
+ {u'_id': u'mediagoblin'},
+ {u'$set': {u'current_migration': migration_number}},
+ upsert=True)
+
+ def install_migration_version_if_missing(self):
+ """
+ Sets the migration to the latest version if no migration
+ version at all is set.
+ """
+ mgoblin_metadata = self.database[u'app_metadata'].find_one(
+ {u'_id': u'mediagoblin'})
+ if not mgoblin_metadata:
+ latest_migration = self.latest_migration()
+ self.set_current_migration(latest_migration)
+
+ def database_current_migration(self):
+ """
+ Return the current migration in the database.
+ """
+ mgoblin_metadata = self.database[u'app_metadata'].find_one(
+ {u'_id': u'mediagoblin'})
+ if not mgoblin_metadata:
+ return None
+ else:
+ return mgoblin_metadata[u'current_migration']
+
+ def database_at_latest_migration(self):
+ """
+ See if the database is at the latest migration.
+ Returns a boolean.
+ """
+ current_migration = self.database_current_migration()
+ return current_migration == self.latest_migration()
+
+ def migrations_to_run(self):
+ """
+ Get a list of migrations to run still, if any.
+
+ Note that calling this will set your migration version to the
+ latest version if it isn't installed to anything yet!
+ """
+ self._ensure_current_migration_record()
+
+ db_current_migration = self.database_current_migration()
+
+ return [
+ (migration_number, migration_func)
+ for migration_number, migration_func in self.sorted_migrations
+ if migration_number > db_current_migration]
+
+ def migrate_new(self, pre_callback=None, post_callback=None):
+ """
+ Run all migrations.
+
+ Includes two optional args:
+ - pre_callback: if called, this is a callback on something to
+ run pre-migration. Takes (migration_number, migration_func)
+ as arguments
+ - pre_callback: if called, this is a callback on something to
+ run post-migration. Takes (migration_number, migration_func)
+ as arguments
+ """
+ # If we aren't set to any version number, presume we're at the
+ # latest (which means we'll do nothing here...)
+ self.install_migration_version_if_missing()
+
+ for migration_number, migration_func in self.migrations_to_run():
+ if pre_callback:
+ pre_callback(migration_number, migration_func)
+ migration_func(self.database)
+ self.set_current_migration(migration_number)
+ if post_callback:
+ post_callback(migration_number, migration_func)