10 files changed, 2132 insertions, 0 deletions
diff --git a/mediagoblin/db/__init__.py b/mediagoblin/db/__init__.py
new file mode 100644
index 00000000..27ca4b06
--- /dev/null
+++ b/mediagoblin/db/__init__.py
@@ -0,0 +1,49 @@
+# GNU MediaGoblin -- federated, autonomous media hosting
+# Copyright (C) 2011, 2012 MediaGoblin contributors.  See AUTHORS.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+"""
+Database Abstraction/Wrapper Layer
+==================================
+
+This submodule is for most of the db specific stuff.
+
+There are two main ideas here:
+
+1. Open up a small possibility to replace mongo by another
+   db.  This means, that all direct mongo accesses should
+   happen in the db submodule.  While all the rest uses an
+   API defined by this submodule.
+
+   Currently this API happens to be basicly mongo.
+   Which means, that the abstraction/wrapper layer is
+   extremely thin.
+
+2. Give the rest of the app a simple and easy way to get most of
+   their db needs. Which often means some simple import
+   from db.util.
+
+What does that mean?
+
+* Never import mongo directly outside of this submodule.
+
+* Inside this submodule you can do whatever is needed. The
+  API border is exactly at the submodule layer. Nowhere
+  else.
+
+* helper functions can be moved in here. They become part
+  of the db.* API
+
+"""
diff --git a/mediagoblin/db/base.py b/mediagoblin/db/base.py
new file mode 100644
index 00000000..699a503a
--- /dev/null
+++ b/mediagoblin/db/base.py
@@ -0,0 +1,78 @@
+# GNU MediaGoblin -- federated, autonomous media hosting
+# Copyright (C) 2011, 2012 MediaGoblin contributors.  See AUTHORS.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import scoped_session, sessionmaker, object_session
+
+Session = scoped_session(sessionmaker())
+
+
+class GMGTableBase(object):
+    query = Session.query_property()
+
+    @classmethod
+    def find(cls, query_dict):
+        return cls.query.filter_by(**query_dict)
+
+    @classmethod
+    def find_one(cls, query_dict):
+        return cls.query.filter_by(**query_dict).first()
+
+    @classmethod
+    def one(cls, query_dict):
+        return cls.find(query_dict).one()
+
+    def get(self, key):
+        return getattr(self, key)
+
+    def setdefault(self, key, defaultvalue):
+        # The key *has* to exist on sql.
+        return getattr(self, key)
+
+    def save(self):
+        sess = object_session(self)
+        if sess is None:
+            sess = Session()
+        sess.add(self)
+        sess.commit()
+
+    def delete(self, commit=True):
+        """Delete the object and commit the change immediately by default"""
+        sess = object_session(self)
+        assert sess is not None, "Not going to delete detached %r" % self
+        sess.delete(self)
+        if commit:
+            sess.commit()
+
+
+Base = declarative_base(cls=GMGTableBase)
+
+
+class DictReadAttrProxy(object):
+    """
+    Maps read accesses to obj['key'] to obj.key
+    and hides all the rest of the obj
+    """
+    def __init__(self, proxied_obj):
+        self.proxied_obj = proxied_obj
+
+    def __getitem__(self, key):
+        try:
+            return getattr(self.proxied_obj, key)
+        except AttributeError:
+            raise KeyError("%r is not an attribute on %r"
+                % (key, self.proxied_obj))
diff --git a/mediagoblin/db/extratypes.py b/mediagoblin/db/extratypes.py
new file mode 100644
index 00000000..f2304af0
--- /dev/null
+++ b/mediagoblin/db/extratypes.py
@@ -0,0 +1,63 @@
+# GNU MediaGoblin -- federated, autonomous media hosting
+# Copyright (C) 2011, 2012 MediaGoblin contributors.  See AUTHORS.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+
+from sqlalchemy.types import TypeDecorator, Unicode, TEXT
+import json
+
+
+class PathTupleWithSlashes(TypeDecorator):
+    "Represents a Tuple of strings as a slash separated string."
+
+    impl = Unicode
+
+    def process_bind_param(self, value, dialect):
+        if value is not None:
+            if len(value) == 0:
+                value = None
+            else:
+                value = '/'.join(value)
+        return value
+
+    def process_result_value(self, value, dialect):
+        if value is not None:
+            value = tuple(value.split('/'))
+        return value
+
+
+# The following class and only this one class is in very
+# large parts based on example code from sqlalchemy.
+#
+# The original copyright notice and license follows:
+#     Copyright (C) 2005-2011 the SQLAlchemy authors and contributors <see AUTHORS file>
+#
+#     This module is part of SQLAlchemy and is released under
+#     the MIT License: http://www.opensource.org/licenses/mit-license.php
+#
+class JSONEncoded(TypeDecorator):
+    "Represents an immutable structure as a json-encoded string."
+
+    impl = TEXT
+
+    def process_bind_param(self, value, dialect):
+        if value is not None:
+            value = json.dumps(value)
+        return value
+
+    def process_result_value(self, value, dialect):
+        if value is not None:
+            value = json.loads(value)
+        return value
diff --git a/mediagoblin/db/migration_tools.py b/mediagoblin/db/migration_tools.py
new file mode 100644
index 00000000..c0c7e998
--- /dev/null
+++ b/mediagoblin/db/migration_tools.py
@@ -0,0 +1,276 @@
+# GNU MediaGoblin -- federated, autonomous media hosting
+# Copyright (C) 2011, 2012 MediaGoblin contributors.  See AUTHORS.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+from mediagoblin.tools.common import simple_printer
+from sqlalchemy import Table
+
+class TableAlreadyExists(Exception):
+    pass
+
+
+class MigrationManager(object):
+    """
+    Migration handling tool.
+
+    Takes information about a database, lets you update the database
+    to the latest migrations, etc.
+    """
+
+    def __init__(self, name, models, migration_registry, session,
+                 printer=simple_printer):
+        """
+        Args:
+         - name: identifier of this section of the database
+         - session: session we're going to migrate
+         - migration_registry: where we should find all migrations to
+           run
+        """
+        self.name = unicode(name)
+        self.models = models
+        self.session = session
+        self.migration_registry = migration_registry
+        self._sorted_migrations = None
+        self.printer = printer
+
+        # For convenience
+        from mediagoblin.db.models import MigrationData
+
+        self.migration_model = MigrationData
+        self.migration_table = MigrationData.__table__
+
+    @property
+    def sorted_migrations(self):
+        """
+        Sort migrations if necessary and store in self._sorted_migrations
+        """
+        if not self._sorted_migrations:
+            self._sorted_migrations = sorted(
+                self.migration_registry.items(),
+                # sort on the key... the migration number
+                key=lambda migration_tuple: migration_tuple[0])
+
+        return self._sorted_migrations
+
+    @property
+    def migration_data(self):
+        """
+        Get the migration row associated with this object, if any.
+        """
+        return self.session.query(
+            self.migration_model).filter_by(name=self.name).first()
+
+    @property
+    def latest_migration(self):
+        """
+        Return a migration number for the latest migration, or 0 if
+        there are no migrations.
+        """
+        if self.sorted_migrations:
+            return self.sorted_migrations[-1][0]
+        else:
+            # If no migrations have been set, we start at 0.
+            return 0
+
+    @property
+    def database_current_migration(self):
+        """
+        Return the current migration in the database.
+        """
+        # If the table doesn't even exist, return None.
+        if not self.migration_table.exists(self.session.bind):
+            return None
+
+        # Also return None if self.migration_data is None.
+        if self.migration_data is None:
+            return None
+
+        return self.migration_data.version
+
+    def set_current_migration(self, migration_number=None):
+        """
+        Set the migration in the database to migration_number
+        (or, the latest available)
+        """
+        self.migration_data.version = migration_number or self.latest_migration
+        self.session.commit()
+
+    def migrations_to_run(self):
+        """
+        Get a list of migrations to run still, if any.
+        
+        Note that this will fail if there's no migration record for
+        this class!
+        """
+        assert self.database_current_migration is not None
+
+        db_current_migration = self.database_current_migration
+        
+        return [
+            (migration_number, migration_func)
+            for migration_number, migration_func in self.sorted_migrations
+            if migration_number > db_current_migration]
+
+
+    def init_tables(self):
+        """
+        Create all tables relative to this package
+        """
+        # sanity check before we proceed, none of these should be created
+        for model in self.models:
+            # Maybe in the future just print out a "Yikes!" or something?
+            if model.__table__.exists(self.session.bind):
+                raise TableAlreadyExists(
+                    u"Intended to create table '%s' but it already exists" %
+                    model.__table__.name)
+
+        self.migration_model.metadata.create_all(
+            self.session.bind,
+            tables=[model.__table__ for model in self.models])
+
+    def create_new_migration_record(self):
+        """
+        Create a new migration record for this migration set
+        """
+        migration_record = self.migration_model(
+            name=self.name,
+            version=self.latest_migration)
+        self.session.add(migration_record)
+        self.session.commit()
+
+    def dry_run(self):
+        """
+        Print out a dry run of what we would have upgraded.
+        """
+        if self.database_current_migration is None:
+            self.printer(
+                    u'~> Woulda initialized: %s\n' % self.name_for_printing())
+            return u'inited'
+
+        migrations_to_run = self.migrations_to_run()
+        if migrations_to_run:
+            self.printer(
+                u'~> Woulda updated %s:\n' % self.name_for_printing())
+
+            for migration_number, migration_func in migrations_to_run():
+                self.printer(
+                    u'   + Would update %s, "%s"\n' % (
+                        migration_number, migration_func.func_name))
+
+            return u'migrated'
+        
+    def name_for_printing(self):
+        if self.name == u'__main__':
+            return u"main mediagoblin tables"
+        else:
+            # TODO: Use the friendlier media manager "human readable" name
+            return u'media type "%s"' % self.name
+
+    def init_or_migrate(self):
+        """
+        Initialize the database or migrate if appropriate.
+
+        Returns information about whether or not we initialized
+        ('inited'), migrated ('migrated'), or did nothing (None)
+        """
+        assure_migrations_table_setup(self.session)
+
+        # Find out what migration number, if any, this database data is at,
+        # and what the latest is.
+        migration_number = self.database_current_migration
+
+        # Is this our first time?  Is there even a table entry for
+        # this identifier?
+        # If so:
+        #  - create all tables
+        #  - create record in migrations registry
+        #  - print / inform the user
+        #  - return 'inited'
+        if migration_number is None:
+            self.printer(u"-> Initializing %s... " % self.name_for_printing())
+
+            self.init_tables()
+            # auto-set at latest migration number
+            self.create_new_migration_record()  
+            
+            self.printer(u"done.\n")
+            self.set_current_migration()
+            return u'inited'
+
+        # Run migrations, if appropriate.
+        migrations_to_run = self.migrations_to_run()
+        if migrations_to_run:
+            self.printer(
+                u'-> Updating %s:\n' % self.name_for_printing())
+            for migration_number, migration_func in migrations_to_run:
+                self.printer(
+                    u'   + Running migration %s, "%s"... ' % (
+                        migration_number, migration_func.func_name))
+                migration_func(self.session)
+                self.set_current_migration(migration_number)
+                self.printer('done.\n')
+
+            return u'migrated'
+
+        # Otherwise return None.  Well it would do this anyway, but
+        # for clarity... ;)
+        return None
+
+
+class RegisterMigration(object):
+    """
+    Tool for registering migrations
+
+    Call like:
+
+    @RegisterMigration(33)
+    def update_dwarves(database):
+        [...]
+
+    This will register your migration with the default migration
+    registry.  Alternately, to specify a very specific
+    migration_registry, you can pass in that as the second argument.
+
+    Note, the number of your migration should NEVER be 0 or less than
+    0.  0 is the default "no migrations" state!
+    """
+    def __init__(self, migration_number, migration_registry):
+        assert migration_number > 0, "Migration number must be > 0!"
+        assert migration_number not in migration_registry, \
+            "Duplicate migration numbers detected!  That's not allowed!"
+
+        self.migration_number = migration_number
+        self.migration_registry = migration_registry
+
+    def __call__(self, migration):
+        self.migration_registry[self.migration_number] = migration
+        return migration
+
+
+def assure_migrations_table_setup(db):
+    """
+    Make sure the migrations table is set up in the database.
+    """
+    from mediagoblin.db.models import MigrationData
+
+    if not MigrationData.__table__.exists(db.bind):
+        MigrationData.metadata.create_all(
+            db.bind, tables=[MigrationData.__table__])
+
+
+def inspect_table(metadata, table_name):
+    """Simple helper to get a ref to an already existing table"""
+    return Table(table_name, metadata, autoload=True,
+                 autoload_with=metadata.bind)
diff --git a/mediagoblin/db/migrations.py b/mediagoblin/db/migrations.py
new file mode 100644
index 00000000..2c553396
--- /dev/null
+++ b/mediagoblin/db/migrations.py
@@ -0,0 +1,289 @@
+# GNU MediaGoblin -- federated, autonomous media hosting
+# Copyright (C) 2011, 2012 MediaGoblin contributors.  See AUTHORS.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+import datetime
+import uuid
+
+from sqlalchemy import (MetaData, Table, Column, Boolean, SmallInteger,
+                        Integer, Unicode, UnicodeText, DateTime,
+                        ForeignKey)
+from sqlalchemy.exc import ProgrammingError
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.sql import and_
+from migrate.changeset.constraint import UniqueConstraint
+
+from mediagoblin.db.migration_tools import RegisterMigration, inspect_table
+from mediagoblin.db.models import MediaEntry, Collection, User
+
+MIGRATIONS = {}
+
+
+@RegisterMigration(1, MIGRATIONS)
+def ogg_to_webm_audio(db_conn):
+    metadata = MetaData(bind=db_conn.bind)
+
+    file_keynames = Table('core__file_keynames', metadata, autoload=True,
+                          autoload_with=db_conn.bind)
+
+    db_conn.execute(
+        file_keynames.update().where(file_keynames.c.name == 'ogg').
+            values(name='webm_audio')
+    )
+    db_conn.commit()
+
+
+@RegisterMigration(2, MIGRATIONS)
+def add_wants_notification_column(db_conn):
+    metadata = MetaData(bind=db_conn.bind)
+
+    users = Table('core__users', metadata, autoload=True,
+            autoload_with=db_conn.bind)
+
+    col = Column('wants_comment_notification', Boolean,
+            default=True, nullable=True)
+    col.create(users, populate_defaults=True)
+    db_conn.commit()
+
+
+@RegisterMigration(3, MIGRATIONS)
+def add_transcoding_progress(db_conn):
+    metadata = MetaData(bind=db_conn.bind)
+
+    media_entry = inspect_table(metadata, 'core__media_entries')
+
+    col = Column('transcoding_progress', SmallInteger)
+    col.create(media_entry)
+    db_conn.commit()
+
+
+class Collection_v0(declarative_base()):
+    __tablename__ = "core__collections"
+
+    id = Column(Integer, primary_key=True)
+    title = Column(Unicode, nullable=False)
+    slug = Column(Unicode)
+    created = Column(DateTime, nullable=False, default=datetime.datetime.now,
+        index=True)
+    description = Column(UnicodeText)
+    creator = Column(Integer, ForeignKey(User.id), nullable=False)
+    items = Column(Integer, default=0)
+
+class CollectionItem_v0(declarative_base()):
+    __tablename__ = "core__collection_items"
+
+    id = Column(Integer, primary_key=True)
+    media_entry = Column(
+        Integer, ForeignKey(MediaEntry.id), nullable=False, index=True)
+    collection = Column(Integer, ForeignKey(Collection.id), nullable=False)
+    note = Column(UnicodeText, nullable=True)
+    added = Column(DateTime, nullable=False, default=datetime.datetime.now)
+    position = Column(Integer)
+
+    ## This should be activated, normally.
+    ## But this would change the way the next migration used to work.
+    ## So it's commented for now.
+    __table_args__ = (
+        UniqueConstraint('collection', 'media_entry'),
+        {})
+
+collectionitem_unique_constraint_done = False
+
+@RegisterMigration(4, MIGRATIONS)
+def add_collection_tables(db_conn):
+    Collection_v0.__table__.create(db_conn.bind)
+    CollectionItem_v0.__table__.create(db_conn.bind)
+
+    global collectionitem_unique_constraint_done
+    collectionitem_unique_constraint_done = True
+
+    db_conn.commit()
+
+
+@RegisterMigration(5, MIGRATIONS)
+def add_mediaentry_collected(db_conn):
+    metadata = MetaData(bind=db_conn.bind)
+
+    media_entry = inspect_table(metadata, 'core__media_entries')
+
+    col = Column('collected', Integer, default=0)
+    col.create(media_entry)
+    db_conn.commit()
+
+
+class ProcessingMetaData_v0(declarative_base()):
+    __tablename__ = 'core__processing_metadata'
+
+    id = Column(Integer, primary_key=True)
+    media_entry_id = Column(Integer, ForeignKey(MediaEntry.id), nullable=False,
+            index=True)
+    callback_url = Column(Unicode)
+
+@RegisterMigration(6, MIGRATIONS)
+def create_processing_metadata_table(db):
+    ProcessingMetaData_v0.__table__.create(db.bind)
+    db.commit()
+
+
+# Okay, problem being:
+#  Migration #4 forgot to add the uniqueconstraint for the
+#  new tables. While creating the tables from scratch had
+#  the constraint enabled.
+#
+# So we have four situations that should end up at the same
+# db layout:
+#
+# 1. Fresh install.
+#    Well, easy. Just uses the tables in models.py
+# 2. Fresh install using a git version just before this migration
+#    The tables are all there, the unique constraint is also there.
+#    This migration should do nothing.
+#    But as we can't detect the uniqueconstraint easily,
+#    this migration just adds the constraint again.
+#    And possibly fails very loud. But ignores the failure.
+# 3. old install, not using git, just releases.
+#    This one will get the new tables in #4 (now with constraint!)
+#    And this migration is just skipped silently.
+# 4. old install, always on latest git.
+#    This one has the tables, but lacks the constraint.
+#    So this migration adds the constraint.
+@RegisterMigration(7, MIGRATIONS)
+def fix_CollectionItem_v0_constraint(db_conn):
+    """Add the forgotten Constraint on CollectionItem"""
+
+    global collectionitem_unique_constraint_done
+    if collectionitem_unique_constraint_done:
+        # Reset it. Maybe the whole thing gets run again
+        # For a different db?
+        collectionitem_unique_constraint_done = False
+        return
+
+    metadata = MetaData(bind=db_conn.bind)
+
+    CollectionItem_table = inspect_table(metadata, 'core__collection_items')
+
+    constraint = UniqueConstraint('collection', 'media_entry',
+        name='core__collection_items_collection_media_entry_key',
+        table=CollectionItem_table)
+
+    try:
+        constraint.create()
+    except ProgrammingError:
+        # User probably has an install that was run since the
+        # collection tables were added, so we don't need to run this migration.
+        pass
+
+    db_conn.commit()
+
+
+@RegisterMigration(8, MIGRATIONS)
+def add_license_preference(db):
+    metadata = MetaData(bind=db.bind)
+
+    user_table = inspect_table(metadata, 'core__users')
+
+    col = Column('license_preference', Unicode)
+    col.create(user_table)
+    db.commit()
+
+
+@RegisterMigration(9, MIGRATIONS)
+def mediaentry_new_slug_era(db):
+    """
+    Update for the new era for media type slugs.
+
+    Entries without slugs now display differently in the url like:
+      /u/cwebber/m/id=251/
+
+    ... because of this, we should back-convert:
+     - entries without slugs should be converted to use the id, if possible, to
+       make old urls still work
+     - slugs with = (or also : which is now also not allowed) to have those
+       stripped out (small possibility of breakage here sadly)
+    """
+
+    def slug_and_user_combo_exists(slug, uploader):
+        return db.execute(
+            media_table.select(
+                and_(media_table.c.uploader==uploader,
+                     media_table.c.slug==slug))).first() is not None
+
+    def append_garbage_till_unique(row, new_slug):
+        """
+        Attach junk to this row until it's unique, then save it
+        """
+        if slug_and_user_combo_exists(new_slug, row.uploader):
+            # okay, still no success;
+            # let's whack junk on there till it's unique.
+            new_slug += '-' + uuid.uuid4().hex[:4]
+            # keep going if necessary!
+            while slug_and_user_combo_exists(new_slug, row.uploader):
+                new_slug += uuid.uuid4().hex[:4]
+
+        db.execute(
+            media_table.update(). \
+            where(media_table.c.id==row.id). \
+            values(slug=new_slug))
+
+    metadata = MetaData(bind=db.bind)
+
+    media_table = inspect_table(metadata, 'core__media_entries')
+
+    for row in db.execute(media_table.select()):
+        # no slug, try setting to an id
+        if not row.slug:
+            append_garbage_till_unique(row, unicode(row.id))
+        # has "=" or ":" in it... we're getting rid of those
+        elif u"=" in row.slug or u":" in row.slug:
+            append_garbage_till_unique(
+                row, row.slug.replace(u"=", u"-").replace(u":", u"-"))
+
+    db.commit()
+
+
+@RegisterMigration(10, MIGRATIONS)
+def unique_collections_slug(db):
+    """Add unique constraint to collection slug"""
+    metadata = MetaData(bind=db.bind)
+    collection_table = inspect_table(metadata, "core__collections")
+    existing_slugs = {}
+    slugs_to_change = []
+
+    for row in db.execute(collection_table.select()):
+        # if duplicate slug, generate a unique slug
+        if row.creator in existing_slugs and row.slug in \
+           existing_slugs[row.creator]:
+            slugs_to_change.append(row.id)
+        else:
+            if not row.creator in existing_slugs:
+                existing_slugs[row.creator] = [row.slug]
+            else:
+                existing_slugs[row.creator].append(row.slug)
+
+    for row_id in slugs_to_change:
+        new_slug = unicode(uuid.uuid4())
+        db.execute(collection_table.update().
+                   where(collection_table.c.id == row_id).
+                   values(slug=new_slug))
+    # sqlite does not like to change the schema when a transaction(update) is
+    # not yet completed
+    db.commit()
+
+    constraint = UniqueConstraint('creator', 'slug',
+                                  name='core__collection_creator_slug_key',
+                                  table=collection_table)
+    constraint.create()
+
+    db.commit()
diff --git a/mediagoblin/db/mixin.py b/mediagoblin/db/mixin.py
new file mode 100644
index 00000000..9f566e36
--- /dev/null
+++ b/mediagoblin/db/mixin.py
@@ -0,0 +1,334 @@
+# GNU MediaGoblin -- federated, autonomous media hosting
+# Copyright (C) 2011, 2012 MediaGoblin contributors.  See AUTHORS.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+"""
+This module contains some Mixin classes for the db objects.
+
+A bunch of functions on the db objects are really more like
+"utility functions": They could live outside the classes
+and be called "by hand" passing the appropiate reference.
+They usually only use the public API of the object and
+rarely use database related stuff.
+
+These functions now live here and get "mixed in" into the
+real objects.
+"""
+
+import uuid
+import re
+import datetime
+
+from werkzeug.utils import cached_property
+
+from mediagoblin import mg_globals
+from mediagoblin.media_types import get_media_managers, FileTypeNotSupported
+from mediagoblin.tools import common, licenses
+from mediagoblin.tools.text import cleaned_markdown_conversion
+from mediagoblin.tools.url import slugify
+
+
+class UserMixin(object):
+    @property
+    def bio_html(self):
+        return cleaned_markdown_conversion(self.bio)
+
+
+class GenerateSlugMixin(object):
+    """
+    Mixin to add a generate_slug method to objects.
+
+    Depends on:
+     - self.slug
+     - self.title
+     - self.check_slug_used(new_slug)
+    """
+    def generate_slug(self):
+        """
+        Generate a unique slug for this object.
+
+        This one does not *force* slugs, but usually it will probably result
+        in a niceish one.
+
+        The end *result* of the algorithm will result in these resolutions for
+        these situations:
+         - If we have a slug, make sure it's clean and sanitized, and if it's
+           unique, we'll use that.
+         - If we have a title, slugify it, and if it's unique, we'll use that.
+         - If we can't get any sort of thing that looks like it'll be a useful
+           slug out of a title or an existing slug, bail, and don't set the
+           slug at all.  Don't try to create something just because.  Make
+           sure we have a reasonable basis for a slug first.
+         - If we have a reasonable basis for a slug (either based on existing
+           slug or slugified title) but it's not unique, first try appending
+           the entry's id, if that exists
+         - If that doesn't result in something unique, tack on some randomly
+           generated bits until it's unique.  That'll be a little bit of junk,
+           but at least it has the basis of a nice slug.
+        """
+        #Is already a slug assigned? Check if it is valid
+        if self.slug:
+            self.slug = slugify(self.slug)
+
+        # otherwise, try to use the title.
+        elif self.title:
+            # assign slug based on title
+            self.slug = slugify(self.title)
+
+        # We don't want any empty string slugs
+        if self.slug == u"":
+            self.slug = None
+
+        # Do we have anything at this point?
+        # If not, we're not going to get a slug
+        # so just return... we're not going to force one.
+        if not self.slug:
+            return  # giving up!
+
+        # Otherwise, let's see if this is unique.
+        if self.check_slug_used(self.slug):
+            # It looks like it's being used... lame.
+
+            # Can we just append the object's id to the end?
+            if self.id:
+                slug_with_id = u"%s-%s" % (self.slug, self.id)
+                if not self.check_slug_used(slug_with_id):
+                    self.slug = slug_with_id
+                    return  # success!
+
+            # okay, still no success;
+            # let's whack junk on there till it's unique.
+            self.slug += '-' + uuid.uuid4().hex[:4]
+            # keep going if necessary!
+            while self.check_slug_used(self.slug):
+                self.slug += uuid.uuid4().hex[:4]
+
+
+class MediaEntryMixin(GenerateSlugMixin):
+    def check_slug_used(self, slug):
+        # import this here due to a cyclic import issue
+        # (db.models -> db.mixin -> db.util -> db.models)
+        from mediagoblin.db.util import check_media_slug_used
+
+        return check_media_slug_used(self.uploader, slug, self.id)
+
+    @property
+    def description_html(self):
+        """
+        Rendered version of the description, run through
+        Markdown and cleaned with our cleaning tool.
+        """
+        return cleaned_markdown_conversion(self.description)
+
+    def get_display_media(self):
+        """Find the best media for display.
+
+        We try checking self.media_manager.fetching_order if it exists to
+        pull down the order.
+
+        Returns:
+          (media_size, media_path)
+          or, if not found, None.
+
+        """
+        fetch_order = self.media_manager.media_fetch_order
+
+        # No fetching order found?  well, give up!
+        if not fetch_order:
+            return None
+
+        media_sizes = self.media_files.keys()
+
+        for media_size in fetch_order:
+            if media_size in media_sizes:
+                return media_size, self.media_files[media_size]
+
+    def main_mediafile(self):
+        pass
+
+    @property
+    def slug_or_id(self):
+        if self.slug:
+            return self.slug
+        else:
+            return u'id:%s' % self.id
+
+    def url_for_self(self, urlgen, **extra_args):
+        """
+        Generate an appropriate url for ourselves
+
+        Use a slug if we have one, else use our 'id'.
+        """
+        uploader = self.get_uploader
+
+        return urlgen(
+            'mediagoblin.user_pages.media_home',
+            user=uploader.username,
+            media=self.slug_or_id,
+            **extra_args)
+
+    @property
+    def thumb_url(self):
+        """Return the thumbnail URL (for usage in templates)
+        Will return either the real thumbnail or a default fallback icon."""
+        # TODO: implement generic fallback in case MEDIA_MANAGER does
+        # not specify one?
+        if u'thumb' in self.media_files:
+            thumb_url = mg_globals.app.public_store.file_url(
+                            self.media_files[u'thumb'])
+        else:
+            # No thumbnail in media available. Get the media's
+            # MEDIA_MANAGER for the fallback icon and return static URL
+            # Raises FileTypeNotSupported in case no such manager is enabled
+            manager = self.media_manager
+            thumb_url = mg_globals.app.staticdirector(manager[u'default_thumb'])
+        return thumb_url
+
+    @cached_property
+    def media_manager(self):
+        """Returns the MEDIA_MANAGER of the media's media_type
+
+        Raises FileTypeNotSupported in case no such manager is enabled
+        """
+        # TODO, we should be able to make this a simple lookup rather
+        # than iterating through all media managers.
+        for media_type, manager in get_media_managers():
+            if media_type == self.media_type:
+                return manager(self)
+        # Not found?  Then raise an error
+        raise FileTypeNotSupported(
+            "MediaManager not in enabled types.  Check media_types in config?")
+
+    def get_fail_exception(self):
+        """
+        Get the exception that's appropriate for this error
+        """
+        if self.fail_error:
+            return common.import_component(self.fail_error)
+
+    def get_license_data(self):
+        """Return license dict for requested license"""
+        return licenses.get_license_by_url(self.license or "")
+
+    def exif_display_iter(self):
+        if not self.media_data:
+            return
+        exif_all = self.media_data.get("exif_all")
+
+        for key in exif_all:
+            label = re.sub('(.)([A-Z][a-z]+)', r'\1 \2', key)
+            yield label.replace('EXIF', '').replace('Image', ''), exif_all[key]
+
+    def exif_display_data_short(self):
+        """Display a very short practical version of exif info"""
+        if not self.media_data:
+            return
+
+        exif_all = self.media_data.get("exif_all")
+
+        exif_short = {}
+
+        if 'Image DateTimeOriginal' in exif_all:
+            # format date taken
+            takendate = datetime.datetime.strptime(
+                exif_all['Image DateTimeOriginal']['printable'],
+                '%Y:%m:%d %H:%M:%S').date()
+            taken = takendate.strftime('%B %d %Y')
+
+            exif_short.update({'Date Taken': taken})
+
+        aperture = None
+        if 'EXIF FNumber' in exif_all:
+            fnum = str(exif_all['EXIF FNumber']['printable']).split('/')
+
+            # calculate aperture
+            if len(fnum) == 2:
+                aperture = "f/%.1f" % (float(fnum[0])/float(fnum[1]))
+            elif fnum[0] != 'None':
+                aperture = "f/%s" % (fnum[0])
+
+        if aperture:
+            exif_short.update({'Aperture': aperture})
+
+        short_keys = [
+            ('Camera', 'Image Model', None),
+            ('Exposure', 'EXIF ExposureTime', lambda x: '%s sec' % x),
+            ('ISO Speed', 'EXIF ISOSpeedRatings', None),
+            ('Focal Length', 'EXIF FocalLength', lambda x: '%s mm' % x)]
+
+        for label, key, fmt_func in short_keys:
+            try:
+                val = fmt_func(exif_all[key]['printable']) if fmt_func \
+                        else exif_all[key]['printable']
+                exif_short.update({label: val})
+            except KeyError:
+                pass
+
+        return exif_short
+
+
+class MediaCommentMixin(object):
+    @property
+    def content_html(self):
+        """
+        the actual html-rendered version of the comment displayed.
+        Run through Markdown and the HTML cleaner.
+        """
+        return cleaned_markdown_conversion(self.content)
+
+
+class CollectionMixin(GenerateSlugMixin):
+    def check_slug_used(self, slug):
+        # import this here due to a cyclic import issue
+        # (db.models -> db.mixin -> db.util -> db.models)
+        from mediagoblin.db.util import check_collection_slug_used
+
+        return check_collection_slug_used(self.creator, slug, self.id)
+
+    @property
+    def description_html(self):
+        """
+        Rendered version of the description, run through
+        Markdown and cleaned with our cleaning tool.
+        """
+        return cleaned_markdown_conversion(self.description)
+
+    @property
+    def slug_or_id(self):
+        return (self.slug or self.id)
+
+    def url_for_self(self, urlgen, **extra_args):
+        """
+        Generate an appropriate url for ourselves
+
+        Use a slug if we have one, else use our 'id'.
+        """
+        creator = self.get_creator
+
+        return urlgen(
+            'mediagoblin.user_pages.user_collection',
+            user=creator.username,
+            collection=self.slug_or_id,
+            **extra_args)
+
+
+class CollectionItemMixin(object):
+    @property
+    def note_html(self):
+        """
+        the actual html-rendered version of the note displayed.
+        Run through Markdown and the HTML cleaner.
+        """
+        return cleaned_markdown_conversion(self.note)
diff --git a/mediagoblin/db/models.py b/mediagoblin/db/models.py
new file mode 100644
index 00000000..2b925983
--- /dev/null
+++ b/mediagoblin/db/models.py
@@ -0,0 +1,524 @@
+# GNU MediaGoblin -- federated, autonomous media hosting
+# Copyright (C) 2011, 2012 MediaGoblin contributors.  See AUTHORS.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+"""
+TODO: indexes on foreignkeys, where useful.
+"""
+
+import logging
+import datetime
+
+from sqlalchemy import Column, Integer, Unicode, UnicodeText, DateTime, \
+        Boolean, ForeignKey, UniqueConstraint, PrimaryKeyConstraint, \
+        SmallInteger
+from sqlalchemy.orm import relationship, backref
+from sqlalchemy.orm.collections import attribute_mapped_collection
+from sqlalchemy.sql.expression import desc
+from sqlalchemy.ext.associationproxy import association_proxy
+from sqlalchemy.util import memoized_property
+
+from mediagoblin.db.extratypes import PathTupleWithSlashes, JSONEncoded
+from mediagoblin.db.base import Base, DictReadAttrProxy
+from mediagoblin.db.mixin import UserMixin, MediaEntryMixin, MediaCommentMixin, CollectionMixin, CollectionItemMixin
+from mediagoblin.tools.files import delete_media_files
+from mediagoblin.tools.common import import_component
+
+# It's actually kind of annoying how sqlalchemy-migrate does this, if
+# I understand it right, but whatever.  Anyway, don't remove this :P
+#
+# We could do migration calls more manually instead of relying on
+# this import-based meddling...
+from migrate import changeset
+
+_log = logging.getLogger(__name__)
+
+
+class User(Base, UserMixin):
+    """
+    TODO: We should consider moving some rarely used fields
+    into some sort of "shadow" table.
+    """
+    __tablename__ = "core__users"
+
+    id = Column(Integer, primary_key=True)
+    username = Column(Unicode, nullable=False, unique=True)
+    # Note: no db uniqueness constraint on email because it's not
+    # reliable (many email systems case insensitive despite against
+    # the RFC) and because it would be a mess to implement at this
+    # point.
+    email = Column(Unicode, nullable=False)
+    created = Column(DateTime, nullable=False, default=datetime.datetime.now)
+    pw_hash = Column(Unicode, nullable=False)
+    email_verified = Column(Boolean, default=False)
+    status = Column(Unicode, default=u"needs_email_verification", nullable=False)
+    # Intented to be nullable=False, but migrations would not work for it
+    # set to nullable=True implicitly.
+    wants_comment_notification = Column(Boolean, default=True)
+    license_preference = Column(Unicode)
+    verification_key = Column(Unicode)
+    is_admin = Column(Boolean, default=False, nullable=False)
+    url = Column(Unicode)
+    bio = Column(UnicodeText)  # ??
+    fp_verification_key = Column(Unicode)
+    fp_token_expire = Column(DateTime)
+
+    ## TODO
+    # plugin data would be in a separate model
+
+    def __repr__(self):
+        return '<{0} #{1} {2} {3} "{4}">'.format(
+                self.__class__.__name__,
+                self.id,
+                'verified' if self.email_verified else 'non-verified',
+                'admin' if self.is_admin else 'user',
+                self.username)
+
+    def delete(self, **kwargs):
+        """Deletes a User and all related entries/comments/files/..."""
+        # Collections get deleted by relationships.
+
+        media_entries = MediaEntry.query.filter(MediaEntry.uploader == self.id)
+        for media in media_entries:
+            # TODO: Make sure that "MediaEntry.delete()" also deletes
+            # all related files/Comments
+            media.delete(del_orphan_tags=False, commit=False)
+
+        # Delete now unused tags
+        # TODO: import here due to cyclic imports!!! This cries for refactoring
+        from mediagoblin.db.util import clean_orphan_tags
+        clean_orphan_tags(commit=False)
+
+        # Delete user, pass through commit=False/True in kwargs
+        super(User, self).delete(**kwargs)
+        _log.info('Deleted user "{0}" account'.format(self.username))
+
+
+class MediaEntry(Base, MediaEntryMixin):
+    """
+    TODO: Consider fetching the media_files using join
+    """
+    __tablename__ = "core__media_entries"
+
+    id = Column(Integer, primary_key=True)
+    uploader = Column(Integer, ForeignKey(User.id), nullable=False, index=True)
+    title = Column(Unicode, nullable=False)
+    slug = Column(Unicode)
+    created = Column(DateTime, nullable=False, default=datetime.datetime.now,
+        index=True)
+    description = Column(UnicodeText) # ??
+    media_type = Column(Unicode, nullable=False)
+    state = Column(Unicode, default=u'unprocessed', nullable=False)
+        # or use sqlalchemy.types.Enum?
+    license = Column(Unicode)
+    collected = Column(Integer, default=0)
+
+    fail_error = Column(Unicode)
+    fail_metadata = Column(JSONEncoded)
+
+    transcoding_progress = Column(SmallInteger)
+
+    queued_media_file = Column(PathTupleWithSlashes)
+
+    queued_task_id = Column(Unicode)
+
+    __table_args__ = (
+        UniqueConstraint('uploader', 'slug'),
+        {})
+
+    get_uploader = relationship(User)
+
+    media_files_helper = relationship("MediaFile",
+        collection_class=attribute_mapped_collection("name"),
+        cascade="all, delete-orphan"
+        )
+    media_files = association_proxy('media_files_helper', 'file_path',
+        creator=lambda k, v: MediaFile(name=k, file_path=v)
+        )
+
+    attachment_files_helper = relationship("MediaAttachmentFile",
+        cascade="all, delete-orphan",
+        order_by="MediaAttachmentFile.created"
+        )
+    attachment_files = association_proxy("attachment_files_helper", "dict_view",
+        creator=lambda v: MediaAttachmentFile(
+            name=v["name"], filepath=v["filepath"])
+        )
+
+    tags_helper = relationship("MediaTag",
+        cascade="all, delete-orphan" # should be automatically deleted
+        )
+    tags = association_proxy("tags_helper", "dict_view",
+        creator=lambda v: MediaTag(name=v["name"], slug=v["slug"])
+        )
+
+    collections_helper = relationship("CollectionItem",
+        cascade="all, delete-orphan"
+        )
+    collections = association_proxy("collections_helper", "in_collection")
+
+    ## TODO
+    # fail_error
+
+    def get_comments(self, ascending=False):
+        order_col = MediaComment.created
+        if not ascending:
+            order_col = desc(order_col)
+        return self.all_comments.order_by(order_col)
+
+    def url_to_prev(self, urlgen):
+        """get the next 'newer' entry by this user"""
+        media = MediaEntry.query.filter(
+            (MediaEntry.uploader == self.uploader)
+            & (MediaEntry.state == u'processed')
+            & (MediaEntry.id > self.id)).order_by(MediaEntry.id).first()
+
+        if media is not None:
+            return media.url_for_self(urlgen)
+
+    def url_to_next(self, urlgen):
+        """get the next 'older' entry by this user"""
+        media = MediaEntry.query.filter(
+            (MediaEntry.uploader == self.uploader)
+            & (MediaEntry.state == u'processed')
+            & (MediaEntry.id < self.id)).order_by(desc(MediaEntry.id)).first()
+
+        if media is not None:
+            return media.url_for_self(urlgen)
+
+    @property
+    def media_data(self):
+        return getattr(self, self.media_data_ref)
+
+    def media_data_init(self, **kwargs):
+        """
+        Initialize or update the contents of a media entry's media_data row
+        """
+        media_data = self.media_data
+
+        if media_data is None:
+            # Get the correct table:
+            table = import_component(self.media_type + '.models:DATA_MODEL')
+            # No media data, so actually add a new one
+            media_data = table(**kwargs)
+            # Get the relationship set up.
+            media_data.get_media_entry = self
+        else:
+            # Update old media data
+            for field, value in kwargs.iteritems():
+                setattr(media_data, field, value)
+
+    @memoized_property
+    def media_data_ref(self):
+        return import_component(self.media_type + '.models:BACKREF_NAME')
+
+    def __repr__(self):
+        safe_title = self.title.encode('ascii', 'replace')
+
+        return '<{classname} {id}: {title}>'.format(
+                classname=self.__class__.__name__,
+                id=self.id,
+                title=safe_title)
+
+    def delete(self, del_orphan_tags=True, **kwargs):
+        """Delete MediaEntry and all related files/attachments/comments
+
+        This will *not* automatically delete unused collections, which
+        can remain empty...
+
+        :param del_orphan_tags: True/false if we delete unused Tags too
+        :param commit: True/False if this should end the db transaction"""
+        # User's CollectionItems are automatically deleted via "cascade".
+        # Comments on this Media are deleted by cascade, hopefully.
+
+        # Delete all related files/attachments
+        try:
+            delete_media_files(self)
+        except OSError, error:
+            # Returns list of files we failed to delete
+            _log.error('No such files from the user "{1}" to delete: '
+                       '{0}'.format(str(error), self.get_uploader))
+        _log.info('Deleted Media entry id "{0}"'.format(self.id))
+        # Related MediaTag's are automatically cleaned, but we might
+        # want to clean out unused Tag's too.
+        if del_orphan_tags:
+            # TODO: Import here due to cyclic imports!!!
+            #       This cries for refactoring
+            from mediagoblin.db.util import clean_orphan_tags
+            clean_orphan_tags(commit=False)
+        # pass through commit=False/True in kwargs
+        super(MediaEntry, self).delete(**kwargs)
+
+
+class FileKeynames(Base):
+    """
+    keywords for various places.
+    currently the MediaFile keys
+    """
+    __tablename__ = "core__file_keynames"
+    id = Column(Integer, primary_key=True)
+    name = Column(Unicode, unique=True)
+
+    def __repr__(self):
+        return "<FileKeyname %r: %r>" % (self.id, self.name)
+
+    @classmethod
+    def find_or_new(cls, name):
+        t = cls.query.filter_by(name=name).first()
+        if t is not None:
+            return t
+        return cls(name=name)
+
+
+class MediaFile(Base):
+    """
+    TODO: Highly consider moving "name" into a new table.
+    TODO: Consider preloading said table in software
+    """
+    __tablename__ = "core__mediafiles"
+
+    media_entry = Column(
+        Integer, ForeignKey(MediaEntry.id),
+        nullable=False)
+    name_id = Column(SmallInteger, ForeignKey(FileKeynames.id), nullable=False)
+    file_path = Column(PathTupleWithSlashes)
+
+    __table_args__ = (
+        PrimaryKeyConstraint('media_entry', 'name_id'),
+        {})
+
+    def __repr__(self):
+        return "<MediaFile %s: %r>" % (self.name, self.file_path)
+
+    name_helper = relationship(FileKeynames, lazy="joined", innerjoin=True)
+    name = association_proxy('name_helper', 'name',
+        creator=FileKeynames.find_or_new
+        )
+
+
+class MediaAttachmentFile(Base):
+    __tablename__ = "core__attachment_files"
+
+    id = Column(Integer, primary_key=True)
+    media_entry = Column(
+        Integer, ForeignKey(MediaEntry.id),
+        nullable=False)
+    name = Column(Unicode, nullable=False)
+    filepath = Column(PathTupleWithSlashes)
+    created = Column(DateTime, nullable=False, default=datetime.datetime.now)
+
+    @property
+    def dict_view(self):
+        """A dict like view on this object"""
+        return DictReadAttrProxy(self)
+
+
+class Tag(Base):
+    __tablename__ = "core__tags"
+
+    id = Column(Integer, primary_key=True)
+    slug = Column(Unicode, nullable=False, unique=True)
+
+    def __repr__(self):
+        return "<Tag %r: %r>" % (self.id, self.slug)
+
+    @classmethod
+    def find_or_new(cls, slug):
+        t = cls.query.filter_by(slug=slug).first()
+        if t is not None:
+            return t
+        return cls(slug=slug)
+
+
+class MediaTag(Base):
+    __tablename__ = "core__media_tags"
+
+    id = Column(Integer, primary_key=True)
+    media_entry = Column(
+        Integer, ForeignKey(MediaEntry.id),
+        nullable=False, index=True)
+    tag = Column(Integer, ForeignKey(Tag.id), nullable=False, index=True)
+    name = Column(Unicode)
+    # created = Column(DateTime, nullable=False, default=datetime.datetime.now)
+
+    __table_args__ = (
+        UniqueConstraint('tag', 'media_entry'),
+        {})
+
+    tag_helper = relationship(Tag)
+    slug = association_proxy('tag_helper', 'slug',
+        creator=Tag.find_or_new
+        )
+
+    def __init__(self, name=None, slug=None):
+        Base.__init__(self)
+        if name is not None:
+            self.name = name
+        if slug is not None:
+            self.tag_helper = Tag.find_or_new(slug)
+
+    @property
+    def dict_view(self):
+        """A dict like view on this object"""
+        return DictReadAttrProxy(self)
+
+
+class MediaComment(Base, MediaCommentMixin):
+    __tablename__ = "core__media_comments"
+
+    id = Column(Integer, primary_key=True)
+    media_entry = Column(
+        Integer, ForeignKey(MediaEntry.id), nullable=False, index=True)
+    author = Column(Integer, ForeignKey(User.id), nullable=False)
+    created = Column(DateTime, nullable=False, default=datetime.datetime.now)
+    content = Column(UnicodeText, nullable=False)
+
+    # Cascade: Comments are owned by their creator. So do the full thing.
+    # lazy=dynamic: People might post a *lot* of comments,
+    #     so make the "posted_comments" a query-like thing.
+    get_author = relationship(User,
+                              backref=backref("posted_comments",
+                                              lazy="dynamic",
+                                              cascade="all, delete-orphan"))
+
+    # Cascade: Comments are somewhat owned by their MediaEntry.
+    #     So do the full thing.
+    # lazy=dynamic: MediaEntries might have many comments,
+    #     so make the "all_comments" a query-like thing.
+    get_media_entry = relationship(MediaEntry,
+                                   backref=backref("all_comments",
+                                                   lazy="dynamic",
+                                                   cascade="all, delete-orphan"))
+
+
+class Collection(Base, CollectionMixin):
+    """An 'album' or 'set' of media by a user.
+
+    On deletion, contained CollectionItems get automatically reaped via
+    SQL cascade"""
+    __tablename__ = "core__collections"
+
+    id = Column(Integer, primary_key=True)
+    title = Column(Unicode, nullable=False)
+    slug = Column(Unicode)
+    created = Column(DateTime, nullable=False, default=datetime.datetime.now,
+                     index=True)
+    description = Column(UnicodeText)
+    creator = Column(Integer, ForeignKey(User.id), nullable=False)
+    # TODO: No of items in Collection. Badly named, can we migrate to num_items?
+    items = Column(Integer, default=0)
+
+    # Cascade: Collections are owned by their creator. So do the full thing.
+    get_creator = relationship(User,
+                               backref=backref("collections",
+                                               cascade="all, delete-orphan"))
+
+    __table_args__ = (
+        UniqueConstraint('creator', 'slug'),
+        {})
+
+    def get_collection_items(self, ascending=False):
+        #TODO, is this still needed with self.collection_items being available?
+        order_col = CollectionItem.position
+        if not ascending:
+            order_col = desc(order_col)
+        return CollectionItem.query.filter_by(
+            collection=self.id).order_by(order_col)
+
+
+class CollectionItem(Base, CollectionItemMixin):
+    __tablename__ = "core__collection_items"
+
+    id = Column(Integer, primary_key=True)
+    media_entry = Column(
+        Integer, ForeignKey(MediaEntry.id), nullable=False, index=True)
+    collection = Column(Integer, ForeignKey(Collection.id), nullable=False)
+    note = Column(UnicodeText, nullable=True)
+    added = Column(DateTime, nullable=False, default=datetime.datetime.now)
+    position = Column(Integer)
+
+    # Cascade: CollectionItems are owned by their Collection. So do the full thing.
+    in_collection = relationship(Collection,
+                                 backref=backref(
+                                     "collection_items",
+                                     cascade="all, delete-orphan"))
+
+    get_media_entry = relationship(MediaEntry)
+
+    __table_args__ = (
+        UniqueConstraint('collection', 'media_entry'),
+        {})
+
+    @property
+    def dict_view(self):
+        """A dict like view on this object"""
+        return DictReadAttrProxy(self)
+
+
+class ProcessingMetaData(Base):
+    __tablename__ = 'core__processing_metadata'
+
+    id = Column(Integer, primary_key=True)
+    media_entry_id = Column(Integer, ForeignKey(MediaEntry.id), nullable=False,
+            index=True)
+    media_entry = relationship(MediaEntry,
+            backref=backref('processing_metadata',
+                cascade='all, delete-orphan'))
+    callback_url = Column(Unicode)
+
+    @property
+    def dict_view(self):
+        """A dict like view on this object"""
+        return DictReadAttrProxy(self)
+
+
+MODELS = [
+    User, MediaEntry, Tag, MediaTag, MediaComment, Collection, CollectionItem, MediaFile, FileKeynames,
+    MediaAttachmentFile, ProcessingMetaData]
+
+
+######################################################
+# Special, migrations-tracking table
+#
+# Not listed in MODELS because this is special and not
+# really migrated, but used for migrations (for now)
+######################################################
+
+class MigrationData(Base):
+    __tablename__ = "core__migrations"
+
+    name = Column(Unicode, primary_key=True)
+    version = Column(Integer, nullable=False, default=0)
+
+######################################################
+
+
+def show_table_init(engine_uri):
+    if engine_uri is None:
+        engine_uri = 'sqlite:///:memory:'
+    from sqlalchemy import create_engine
+    engine = create_engine(engine_uri, echo=True)
+
+    Base.metadata.create_all(engine)
+
+
+if __name__ == '__main__':
+    from sys import argv
+    print repr(argv)
+    if len(argv) == 2:
+        uri = argv[1]
+    else:
+        uri = None
+    show_table_init(uri)
diff --git a/mediagoblin/db/models_v0.py b/mediagoblin/db/models_v0.py
new file mode 100644
index 00000000..bdedec2e
--- /dev/null
+++ b/mediagoblin/db/models_v0.py
@@ -0,0 +1,342 @@
+# GNU MediaGoblin -- federated, autonomous media hosting
+# Copyright (C) 2011, 2012 MediaGoblin contributors.  See AUTHORS.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+"""
+TODO: indexes on foreignkeys, where useful.
+"""
+
+###########################################################################
+# WHAT IS THIS FILE?
+# ------------------
+#
+# Upon occasion, someone runs into this file and wonders why we have
+# both a models.py and a models_v0.py.
+#
+# The short of it is: you can ignore this file.
+#
+# The long version is, in two parts:
+#
+#  - We used to use MongoDB, then we switched to SQL and SQLAlchemy.
+#    We needed to convert peoples' databases; the script we had would
+#    switch them to the first version right after Mongo, convert over
+#    all their tables, then run any migrations that were added after.
+#
+#  - That script is now removed, but there is some discussion of
+#    writing a test that would set us at the first SQL migration and
+#    run everything after.  If we wrote that, this file would still be
+#    useful.  But for now, it's legacy!
+#
+###########################################################################
+
+
+import datetime
+import sys
+
+from sqlalchemy import (
+    Column, Integer, Unicode, UnicodeText, DateTime, Boolean, ForeignKey,
+    UniqueConstraint, PrimaryKeyConstraint, SmallInteger, Float)
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import relationship, backref
+from sqlalchemy.orm.collections import attribute_mapped_collection
+from sqlalchemy.ext.associationproxy import association_proxy
+from sqlalchemy.util import memoized_property
+
+from mediagoblin.db.extratypes import PathTupleWithSlashes, JSONEncoded
+from mediagoblin.db.base import GMGTableBase, Session
+
+
+Base_v0 = declarative_base(cls=GMGTableBase)
+
+
+class User(Base_v0):
+    """
+    TODO: We should consider moving some rarely used fields
+    into some sort of "shadow" table.
+    """
+    __tablename__ = "core__users"
+
+    id = Column(Integer, primary_key=True)
+    username = Column(Unicode, nullable=False, unique=True)
+    email = Column(Unicode, nullable=False)
+    created = Column(DateTime, nullable=False, default=datetime.datetime.now)
+    pw_hash = Column(Unicode, nullable=False)
+    email_verified = Column(Boolean, default=False)
+    status = Column(Unicode, default=u"needs_email_verification", nullable=False)
+    verification_key = Column(Unicode)
+    is_admin = Column(Boolean, default=False, nullable=False)
+    url = Column(Unicode)
+    bio = Column(UnicodeText)  # ??
+    fp_verification_key = Column(Unicode)
+    fp_token_expire = Column(DateTime)
+
+    ## TODO
+    # plugin data would be in a separate model
+
+
+class MediaEntry(Base_v0):
+    """
+    TODO: Consider fetching the media_files using join
+    """
+    __tablename__ = "core__media_entries"
+
+    id = Column(Integer, primary_key=True)
+    uploader = Column(Integer, ForeignKey(User.id), nullable=False, index=True)
+    title = Column(Unicode, nullable=False)
+    slug = Column(Unicode)
+    created = Column(DateTime, nullable=False, default=datetime.datetime.now,
+        index=True)
+    description = Column(UnicodeText) # ??
+    media_type = Column(Unicode, nullable=False)
+    state = Column(Unicode, default=u'unprocessed', nullable=False)
+        # or use sqlalchemy.types.Enum?
+    license = Column(Unicode)
+
+    fail_error = Column(Unicode)
+    fail_metadata = Column(JSONEncoded)
+
+    queued_media_file = Column(PathTupleWithSlashes)
+
+    queued_task_id = Column(Unicode)
+
+    __table_args__ = (
+        UniqueConstraint('uploader', 'slug'),
+        {})
+
+    get_uploader = relationship(User)
+
+    media_files_helper = relationship("MediaFile",
+        collection_class=attribute_mapped_collection("name"),
+        cascade="all, delete-orphan"
+        )
+
+    attachment_files_helper = relationship("MediaAttachmentFile",
+        cascade="all, delete-orphan",
+        order_by="MediaAttachmentFile.created"
+        )
+
+    tags_helper = relationship("MediaTag",
+        cascade="all, delete-orphan"
+        )
+
+    def media_data_init(self, **kwargs):
+        """
+        Initialize or update the contents of a media entry's media_data row
+        """
+        session = Session()
+
+        media_data = session.query(self.media_data_table).filter_by(
+            media_entry=self.id).first()
+
+        # No media data, so actually add a new one
+        if media_data is None:
+            media_data = self.media_data_table(
+                media_entry=self.id,
+                **kwargs)
+            session.add(media_data)
+        # Update old media data
+        else:
+            for field, value in kwargs.iteritems():
+                setattr(media_data, field, value)
+
+    @memoized_property
+    def media_data_table(self):
+        # TODO: memoize this
+        models_module = self.media_type + '.models'
+        __import__(models_module)
+        return sys.modules[models_module].DATA_MODEL
+
+
+class FileKeynames(Base_v0):
+    """
+    keywords for various places.
+    currently the MediaFile keys
+    """
+    __tablename__ = "core__file_keynames"
+    id = Column(Integer, primary_key=True)
+    name = Column(Unicode, unique=True)
+
+    def __repr__(self):
+        return "<FileKeyname %r: %r>" % (self.id, self.name)
+
+    @classmethod
+    def find_or_new(cls, name):
+        t = cls.query.filter_by(name=name).first()
+        if t is not None:
+            return t
+        return cls(name=name)
+
+
+class MediaFile(Base_v0):
+    """
+    TODO: Highly consider moving "name" into a new table.
+    TODO: Consider preloading said table in software
+    """
+    __tablename__ = "core__mediafiles"
+
+    media_entry = Column(
+        Integer, ForeignKey(MediaEntry.id),
+        nullable=False)
+    name_id = Column(SmallInteger, ForeignKey(FileKeynames.id), nullable=False)
+    file_path = Column(PathTupleWithSlashes)
+
+    __table_args__ = (
+        PrimaryKeyConstraint('media_entry', 'name_id'),
+        {})
+
+    def __repr__(self):
+        return "<MediaFile %s: %r>" % (self.name, self.file_path)
+
+    name_helper = relationship(FileKeynames, lazy="joined", innerjoin=True)
+    name = association_proxy('name_helper', 'name',
+        creator=FileKeynames.find_or_new
+        )
+
+
+class MediaAttachmentFile(Base_v0):
+    __tablename__ = "core__attachment_files"
+
+    id = Column(Integer, primary_key=True)
+    media_entry = Column(
+        Integer, ForeignKey(MediaEntry.id),
+        nullable=False)
+    name = Column(Unicode, nullable=False)
+    filepath = Column(PathTupleWithSlashes)
+    created = Column(DateTime, nullable=False, default=datetime.datetime.now)
+
+
+class Tag(Base_v0):
+    __tablename__ = "core__tags"
+
+    id = Column(Integer, primary_key=True)
+    slug = Column(Unicode, nullable=False, unique=True)
+
+    def __repr__(self):
+        return "<Tag %r: %r>" % (self.id, self.slug)
+
+    @classmethod
+    def find_or_new(cls, slug):
+        t = cls.query.filter_by(slug=slug).first()
+        if t is not None:
+            return t
+        return cls(slug=slug)
+
+
+class MediaTag(Base_v0):
+    __tablename__ = "core__media_tags"
+
+    id = Column(Integer, primary_key=True)
+    media_entry = Column(
+        Integer, ForeignKey(MediaEntry.id),
+        nullable=False, index=True)
+    tag = Column(Integer, ForeignKey(Tag.id), nullable=False, index=True)
+    name = Column(Unicode)
+    # created = Column(DateTime, nullable=False, default=datetime.datetime.now)
+
+    __table_args__ = (
+        UniqueConstraint('tag', 'media_entry'),
+        {})
+
+    tag_helper = relationship(Tag)
+    slug = association_proxy('tag_helper', 'slug',
+        creator=Tag.find_or_new
+        )
+
+    def __init__(self, name=None, slug=None):
+        Base_v0.__init__(self)
+        if name is not None:
+            self.name = name
+        if slug is not None:
+            self.tag_helper = Tag.find_or_new(slug)
+
+
+class MediaComment(Base_v0):
+    __tablename__ = "core__media_comments"
+
+    id = Column(Integer, primary_key=True)
+    media_entry = Column(
+        Integer, ForeignKey(MediaEntry.id), nullable=False, index=True)
+    author = Column(Integer, ForeignKey(User.id), nullable=False)
+    created = Column(DateTime, nullable=False, default=datetime.datetime.now)
+    content = Column(UnicodeText, nullable=False)
+
+    get_author = relationship(User)
+
+
+class ImageData(Base_v0):
+    __tablename__ = "image__mediadata"
+
+    # The primary key *and* reference to the main media_entry
+    media_entry = Column(Integer, ForeignKey('core__media_entries.id'),
+        primary_key=True)
+    get_media_entry = relationship("MediaEntry",
+        backref=backref("image__media_data", cascade="all, delete-orphan"))
+
+    width = Column(Integer)
+    height = Column(Integer)
+    exif_all = Column(JSONEncoded)
+    gps_longitude = Column(Float)
+    gps_latitude = Column(Float)
+    gps_altitude = Column(Float)
+    gps_direction = Column(Float)
+
+
+class VideoData(Base_v0):
+    __tablename__ = "video__mediadata"
+
+    # The primary key *and* reference to the main media_entry
+    media_entry = Column(Integer, ForeignKey('core__media_entries.id'),
+        primary_key=True)
+    get_media_entry = relationship("MediaEntry",
+        backref=backref("video__media_data", cascade="all, delete-orphan"))
+
+    width = Column(SmallInteger)
+    height = Column(SmallInteger)
+
+
+class AsciiData(Base_v0):
+    __tablename__ = "ascii__mediadata"
+
+    # The primary key *and* reference to the main media_entry
+    media_entry = Column(Integer, ForeignKey('core__media_entries.id'),
+        primary_key=True)
+    get_media_entry = relationship("MediaEntry",
+        backref=backref("ascii__media_data", cascade="all, delete-orphan"))
+
+
+class AudioData(Base_v0):
+    __tablename__ = "audio__mediadata"
+
+    # The primary key *and* reference to the main media_entry
+    media_entry = Column(Integer, ForeignKey('core__media_entries.id'),
+        primary_key=True)
+    get_media_entry = relationship("MediaEntry",
+        backref=backref("audio__media_data", cascade="all, delete-orphan"))
+
+
+######################################################
+# Special, migrations-tracking table
+#
+# Not listed in MODELS because this is special and not
+# really migrated, but used for migrations (for now)
+######################################################
+
+class MigrationData(Base_v0):
+    __tablename__ = "core__migrations"
+
+    name = Column(Unicode, primary_key=True)
+    version = Column(Integer, nullable=False, default=0)
+
+######################################################
diff --git a/mediagoblin/db/open.py b/mediagoblin/db/open.py
new file mode 100644
index 00000000..0b1679fb
--- /dev/null
+++ b/mediagoblin/db/open.py
@@ -0,0 +1,101 @@
+# GNU MediaGoblin -- federated, autonomous media hosting
+# Copyright (C) 2011, 2012 MediaGoblin contributors.  See AUTHORS.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+
+from sqlalchemy import create_engine, event
+import logging
+
+from mediagoblin.db.base import Base, Session
+from mediagoblin import mg_globals
+
+_log = logging.getLogger(__name__)
+
+
+class DatabaseMaster(object):
+    def __init__(self, engine):
+        self.engine = engine
+
+        for k, v in Base._decl_class_registry.iteritems():
+            setattr(self, k, v)
+
+    def commit(self):
+        Session.commit()
+
+    def save(self, obj):
+        Session.add(obj)
+        Session.flush()
+
+    def check_session_clean(self):
+        for dummy in Session():
+            _log.warn("STRANGE: There are elements in the sql session. "
+                      "Please report this and help us track this down.")
+            break
+
+    def reset_after_request(self):
+        Session.rollback()
+        Session.remove()
+
+
+def load_models(app_config):
+    import mediagoblin.db.models
+
+    for media_type in app_config['media_types']:
+        _log.debug("Loading %s.models", media_type)
+        __import__(media_type + ".models")
+
+    for plugin in mg_globals.global_config.get('plugins', {}).keys():
+        _log.debug("Loading %s.models", plugin)
+        try:
+            __import__(plugin + ".models")
+        except ImportError as exc:
+            _log.debug("Could not load {0}.models: {1}".format(
+                plugin,
+                exc))
+
+
+def _sqlite_fk_pragma_on_connect(dbapi_con, con_record):
+    """Enable foreign key checking on each new sqlite connection"""
+    dbapi_con.execute('pragma foreign_keys=on')
+
+
+def _sqlite_disable_fk_pragma_on_connect(dbapi_con, con_record):
+    """
+    Disable foreign key checking on each new sqlite connection
+    (Good for migrations!)
+    """
+    dbapi_con.execute('pragma foreign_keys=off')
+
+
+def setup_connection_and_db_from_config(app_config, migrations=False):
+    engine = create_engine(app_config['sql_engine'])
+
+    # Enable foreign key checking for sqlite
+    if app_config['sql_engine'].startswith('sqlite://'):
+        if migrations:
+            event.listen(engine, 'connect',
+                         _sqlite_disable_fk_pragma_on_connect)
+        else:
+            event.listen(engine, 'connect', _sqlite_fk_pragma_on_connect)
+
+    # logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO)
+
+    Session.configure(bind=engine)
+
+    return DatabaseMaster(engine)
+
+
+def check_db_migrations_current(db):
+    pass
diff --git a/mediagoblin/db/util.py b/mediagoblin/db/util.py
new file mode 100644
index 00000000..6ffec44d
--- /dev/null
+++ b/mediagoblin/db/util.py
@@ -0,0 +1,76 @@
+# GNU MediaGoblin -- federated, autonomous media hosting
+# Copyright (C) 2011, 2012 MediaGoblin contributors.  See AUTHORS.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+from mediagoblin.db.base import Session
+from mediagoblin.db.models import MediaEntry, Tag, MediaTag, Collection
+
+
+##########################
+# Random utility functions
+##########################
+
+
+def atomic_update(table, query_dict, update_values):
+    table.find(query_dict).update(update_values,
+    	synchronize_session=False)
+    Session.commit()
+
+
+def check_media_slug_used(uploader_id, slug, ignore_m_id):
+    query = MediaEntry.query.filter_by(uploader=uploader_id, slug=slug)
+    if ignore_m_id is not None:
+        query = query.filter(MediaEntry.id != ignore_m_id)
+    does_exist = query.first() is not None
+    return does_exist
+
+
+def media_entries_for_tag_slug(dummy_db, tag_slug):
+    return MediaEntry.query \
+        .join(MediaEntry.tags_helper) \
+        .join(MediaTag.tag_helper) \
+        .filter(
+            (MediaEntry.state == u'processed')
+            & (Tag.slug == tag_slug))
+
+
+def clean_orphan_tags(commit=True):
+    """Search for unused MediaTags and delete them"""
+    q1 = Session.query(Tag).outerjoin(MediaTag).filter(MediaTag.id==None)
+    for t in q1:
+        Session.delete(t)
+    # The "let the db do all the work" version:
+    # q1 = Session.query(Tag.id).outerjoin(MediaTag).filter(MediaTag.id==None)
+    # q2 = Session.query(Tag).filter(Tag.id.in_(q1))
+    # q2.delete(synchronize_session = False)
+    if commit:
+        Session.commit()
+
+
+def check_collection_slug_used(creator_id, slug, ignore_c_id):
+    filt = (Collection.creator == creator_id) \
+        & (Collection.slug == slug)
+    if ignore_c_id is not None:
+        filt = filt & (Collection.id != ignore_c_id)
+    does_exist = Session.query(Collection.id).filter(filt).first() is not None
+    return does_exist
+
+
+if __name__ == '__main__':
+    from mediagoblin.db.open import setup_connection_and_db_from_config
+
+    db = setup_connection_and_db_from_config({'sql_engine':'sqlite:///mediagoblin.db'})
+
+    clean_orphan_tags()