14 files changed, 836 insertions, 73 deletions
diff --git a/mediagoblin/db/mixin.py b/mediagoblin/db/mixin.py
index beaff9b0..a5aded02 100644
--- a/mediagoblin/db/mixin.py
+++ b/mediagoblin/db/mixin.py
@@ -27,8 +27,11 @@ These functions now live here and get "mixed in" into the
 real objects.
 """
 
+from mediagoblin import mg_globals
 from mediagoblin.auth import lib as auth_lib
 from mediagoblin.tools import common, licenses
+from mediagoblin.tools.text import cleaned_markdown_conversion
+from mediagoblin.tools.url import slugify
 
 
 class UserMixin(object):
@@ -39,8 +42,36 @@ class UserMixin(object):
         return auth_lib.bcrypt_check_password(
             password, self.pw_hash)
 
+    @property
+    def bio_html(self):
+        return cleaned_markdown_conversion(self.bio)
+
 
 class MediaEntryMixin(object):
+    def generate_slug(self):
+        # import this here due to a cyclic import issue
+        # (db.models -> db.mixin -> db.util -> db.models)
+        from mediagoblin.db.util import check_media_slug_used
+
+        self.slug = slugify(self.title)
+
+        duplicate = check_media_slug_used(mg_globals.database,
+            self.uploader, self.slug, self.id)
+
+        if duplicate:
+            if self.id is not None:
+                self.slug = "%s-%s" % (self.id, self.slug)
+            else:
+                self.slug = None
+
+    @property
+    def description_html(self):
+        """
+        Rendered version of the description, run through
+        Markdown and cleaned with our cleaning tool.
+        """
+        return cleaned_markdown_conversion(self.description)
+
     def get_display_media(self, media_map,
                           fetch_order=common.DISPLAY_IMAGE_FETCHING_ORDER):
         """
@@ -91,3 +122,24 @@ class MediaEntryMixin(object):
     def get_license_data(self):
         """Return license dict for requested license"""
         return licenses.SUPPORTED_LICENSES[self.license or ""]
+
+    def exif_display_iter(self):
+        from mediagoblin.tools.exif import USEFUL_TAGS
+
+        if not self.media_data:
+            return
+        exif_all = self.media_data.get("exif_all")
+
+        for key in USEFUL_TAGS:
+            if key in exif_all:
+                yield key, exif_all[key]
+
+
+class MediaCommentMixin(object):
+    @property
+    def content_html(self):
+        """
+        the actual html-rendered version of the comment displayed.
+        Run through Markdown and the HTML cleaner.
+        """
+        return cleaned_markdown_conversion(self.content)
diff --git a/mediagoblin/db/mongo/migrations.py b/mediagoblin/db/mongo/migrations.py
index 261e21a5..732f5846 100644
--- a/mediagoblin/db/mongo/migrations.py
+++ b/mediagoblin/db/mongo/migrations.py
@@ -29,6 +29,16 @@ def add_table_field(db, table_name, field_name, default_value):
         multi=True)
 
 
+def drop_table_field(db, table_name, field_name):
+    """
+    Drop an old field from a table/collection
+    """
+    db[table_name].update(
+        {field_name: {'$exists': True}},
+        {'$unset': {field_name: 1}},
+        multi=True)
+
+
 # Please see mediagoblin/tests/test_migrations.py for some examples of
 # basic migrations.
 
@@ -109,9 +119,82 @@ def media_type_image_to_multimedia_type_image(database):
         {'$set': {'media_type': 'mediagoblin.media_types.image'}},
         multi=True)
 
+
 @RegisterMigration(8)
 def mediaentry_add_license(database):
     """
     Add the 'license' field for entries that don't have it.
     """
     add_table_field(database, 'media_entries', 'license', None)
+
+
+@RegisterMigration(9)
+def remove_calculated_html(database):
+    """
+    Drop pre-rendered html again and calculate things
+    on the fly (and cache):
+    - User.bio_html
+    - MediaEntry.description_html
+    - MediaComment.content_html
+    """
+    drop_table_field(database, 'users', 'bio_html')
+    drop_table_field(database, 'media_entries', 'description_html')
+    drop_table_field(database, 'media_comments', 'content_html')
+
+
+@RegisterMigration(10)
+def convert_video_media_data(database):
+    """
+    Move media_data["video"] directly into media_data
+    """
+    collection = database['media_entries']
+    target = collection.find(
+        {'media_data.video': {'$exists': True}})
+
+    for document in target:
+        assert len(document['media_data']) == 1
+        document['media_data'] = document['media_data']['video']
+        collection.save(document)
+
+
+@RegisterMigration(11)
+def convert_gps_media_data(database):
+    """
+    Move media_data["gps"]["*"] to media_data["gps_*"].
+    In preparation for media_data.gps_*
+    """
+    collection = database['media_entries']
+    target = collection.find(
+        {'media_data.gps': {'$exists': True}})
+
+    for document in target:
+        for key, value in document['media_data']['gps'].iteritems():
+            document['media_data']['gps_' + key] = value
+        del document['media_data']['gps']
+        collection.save(document)
+
+
+@RegisterMigration(12)
+def convert_exif_media_data(database):
+    """
+    Move media_data["exif"]["clean"] to media_data["exif_all"].
+    Drop media_data["exif"]["useful"]
+    In preparation for media_data.exif_all
+    """
+    collection = database['media_entries']
+    target = collection.find(
+        {'media_data.exif.clean': {'$exists': True}})
+
+    for document in target:
+        media_data = document['media_data']
+
+        exif_all = media_data['exif'].pop('clean')
+        if len(exif_all):
+            media_data['exif_all'] = exif_all
+
+        del media_data['exif']['useful']
+
+        assert len(media_data['exif']) == 0
+        del media_data['exif']
+
+        collection.save(document)
diff --git a/mediagoblin/db/mongo/models.py b/mediagoblin/db/mongo/models.py
index 541086bc..2e35a2b8 100644
--- a/mediagoblin/db/mongo/models.py
+++ b/mediagoblin/db/mongo/models.py
@@ -18,12 +18,21 @@ import datetime
 
 from mongokit import Document
 
-from mediagoblin import mg_globals
 from mediagoblin.db.mongo import migrations
 from mediagoblin.db.mongo.util import ASCENDING, DESCENDING, ObjectId
 from mediagoblin.tools.pagination import Pagination
-from mediagoblin.tools import url
-from mediagoblin.db.mixin import UserMixin, MediaEntryMixin
+from mediagoblin.db.mixin import UserMixin, MediaEntryMixin, MediaCommentMixin
+
+
+class MongoPK(object):
+    """An alias for the _id primary key"""
+    def __get__(self, instance, cls):
+       return instance['_id']   
+    def __set__(self, instance, val):
+       instance['_id'] = val  
+    def __delete__(self, instance):
+       del instance['_id']
+
 
 ###################
 # Custom validators
@@ -59,7 +68,6 @@ class User(Document, UserMixin):
      - is_admin: Whether or not this user is an administrator or not.
      - url: this user's personal webpage/website, if appropriate.
      - bio: biography of this user (plaintext, in markdown)
-     - bio_html: biography of the user converted to proper HTML.
     """
     __collection__ = 'users'
     use_dot_notation = True
@@ -76,7 +84,6 @@ class User(Document, UserMixin):
         'is_admin': bool,
         'url': unicode,
         'bio': unicode,      # May contain markdown
-        'bio_html': unicode,  # May contain plaintext, or HTML
         'fp_verification_key': unicode,  # forgotten password verification key
         'fp_token_expire': datetime.datetime,
         }
@@ -89,6 +96,8 @@ class User(Document, UserMixin):
         'status': u'needs_email_verification',
         'is_admin': False}
 
+    id = MongoPK()
+
 
 class MediaEntry(Document, MediaEntryMixin):
     """
@@ -112,9 +121,6 @@ class MediaEntry(Document, MediaEntryMixin):
        up with MarkDown for slight fanciness (links, boldness, italics,
        paragraphs...)
 
-     - description_html: Rendered version of the description, run through
-       Markdown and cleaned with our cleaning tool.
-
      - media_type: What type of media is this?  Currently we only support
        'image' ;)
 
@@ -179,7 +185,6 @@ class MediaEntry(Document, MediaEntryMixin):
         'slug': unicode,
         'created': datetime.datetime,
         'description': unicode,  # May contain markdown/up
-        'description_html': unicode,  # May contain plaintext, or HTML
         'media_type': unicode,
         'media_data': dict,  # extra data relevant to this media_type
         'plugin_data': dict,  # plugins can dump stuff here.
@@ -211,6 +216,11 @@ class MediaEntry(Document, MediaEntryMixin):
         'created': datetime.datetime.utcnow,
         'state': u'unprocessed'}
 
+    id = MongoPK()
+
+    def media_data_init(self, **kwargs):
+        self.media_data.update(kwargs)
+
     def get_comments(self, ascending=False):
         if ascending:
             order = ASCENDING
@@ -220,15 +230,6 @@ class MediaEntry(Document, MediaEntryMixin):
         return self.db.MediaComment.find({
                 'media_entry': self._id}).sort('created', order)
 
-    def generate_slug(self):
-        self.slug = url.slugify(self.title)
-
-        duplicate = mg_globals.database.media_entries.find_one(
-            {'slug': self.slug})
-
-        if duplicate:
-            self.slug = "%s-%s" % (self._id, self.slug)
-
     def url_to_prev(self, urlgen):
         """
         Provide a url to the previous entry from this user, if there is one
@@ -257,7 +258,7 @@ class MediaEntry(Document, MediaEntryMixin):
         return self.db.User.find_one({'_id': self.uploader})
 
 
-class MediaComment(Document):
+class MediaComment(Document, MediaCommentMixin):
     """
     A comment on a MediaEntry.
 
@@ -266,8 +267,6 @@ class MediaComment(Document):
      - author: user who posted this comment
      - created: when the comment was created
      - content: plaintext (but markdown'able) version of the comment's content.
-     - content_html: the actual html-rendered version of the comment displayed.
-       Run through Markdown and the HTML cleaner.
     """
 
     __collection__ = 'media_comments'
@@ -278,7 +277,7 @@ class MediaComment(Document):
         'author': ObjectId,
         'created': datetime.datetime,
         'content': unicode,
-        'content_html': unicode}
+        }
 
     required_fields = [
         'media_entry', 'author', 'created', 'content']
diff --git a/mediagoblin/db/mongo/open.py b/mediagoblin/db/mongo/open.py
index bedc497b..c4f37b42 100644
--- a/mediagoblin/db/mongo/open.py
+++ b/mediagoblin/db/mongo/open.py
@@ -21,6 +21,10 @@ from mediagoblin.db.mongo import models
 from mediagoblin.db.mongo.util import MigrationManager
 
 
+def load_models(app_config):
+    pass
+
+
 def connect_database_from_config(app_config, use_pymongo=False):
     """
     Connect to the main database, take config from app_config
diff --git a/mediagoblin/db/mongo/util.py b/mediagoblin/db/mongo/util.py
index 4daf616a..f61ae6be 100644
--- a/mediagoblin/db/mongo/util.py
+++ b/mediagoblin/db/mongo/util.py
@@ -290,3 +290,29 @@ class MigrationManager(object):
             self.set_current_migration(migration_number)
             if post_callback:
                 post_callback(migration_number, migration_func)
+
+
+##########################
+# Random utility functions
+##########################
+
+
+def atomic_update(table, query_dict, update_values):
+    table.collection.update(
+        query_dict,
+        {"$set": update_values})
+
+
+def check_media_slug_used(db, uploader_id, slug, ignore_m_id):
+    query_dict = {'uploader': uploader_id, 'slug': slug}
+    if ignore_m_id is not None:
+        query_dict['_id'] = {'$ne': ignore_m_id}
+    existing_user_slug_entries = db.MediaEntry.find(
+        query_dict).count()
+    return existing_user_slug_entries
+
+
+def media_entries_for_tag_slug(db, tag_slug):
+    return db.MediaEntry.find(
+        {u'state': u'processed',
+         u'tags.slug': tag_slug})
diff --git a/mediagoblin/db/open.py b/mediagoblin/db/open.py
index 0163469f..f4c38511 100644
--- a/mediagoblin/db/open.py
+++ b/mediagoblin/db/open.py
@@ -21,7 +21,9 @@ except ImportError:
 
 if use_sql:
     from mediagoblin.db.sql.open import \
-        setup_connection_and_db_from_config, check_db_migrations_current
+        setup_connection_and_db_from_config, check_db_migrations_current, \
+        load_models
 else:
     from mediagoblin.db.mongo.open import \
-        setup_connection_and_db_from_config, check_db_migrations_current
+        setup_connection_and_db_from_config, check_db_migrations_current, \
+        load_models
diff --git a/mediagoblin/db/sql/base.py b/mediagoblin/db/sql/base.py
index 6ed24a03..838080b0 100644
--- a/mediagoblin/db/sql/base.py
+++ b/mediagoblin/db/sql/base.py
@@ -67,6 +67,10 @@ class GMGTableBase(object):
     def get(self, key):
         return getattr(self, key)
 
+    def setdefault(self, key, defaultvalue):
+        # The key *has* to exist on sql.
+        return getattr(self, key)
+
     def save(self, validate=True):
         assert validate
         sess = object_session(self)
@@ -75,6 +79,12 @@ class GMGTableBase(object):
         sess.add(self)
         sess.commit()
 
+    def delete(self):
+        sess = object_session(self)
+        assert sess is not None, "Not going to delete detached %r" % self
+        sess.delete(self)
+        sess.commit()
+
 
 Base = declarative_base(cls=GMGTableBase)
 
diff --git a/mediagoblin/db/sql/convert.py b/mediagoblin/db/sql/convert.py
index f6575be9..ebf3037c 100644
--- a/mediagoblin/db/sql/convert.py
+++ b/mediagoblin/db/sql/convert.py
@@ -14,17 +14,20 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
+from copy import copy
 
 from mediagoblin.init import setup_global_and_app_config, setup_database
 from mediagoblin.db.mongo.util import ObjectId
 
-from mediagoblin.db.sql.models import (Base, User, MediaEntry, MediaComment,
-    Tag, MediaTag, MediaFile)
+from mediagoblin.db.sql.base import Base, Session
+from mediagoblin.db.sql.models import (User, MediaEntry, MediaComment,
+    Tag, MediaTag, MediaFile, MediaAttachmentFile, MigrationData)
+from mediagoblin.media_types.image.models import ImageData
+from mediagoblin.media_types.video.models import VideoData
 from mediagoblin.db.sql.open import setup_connection_and_db_from_config as \
     sql_connect
 from mediagoblin.db.mongo.open import setup_connection_and_db_from_config as \
     mongo_connect
-from mediagoblin.db.sql.base import Session
 
 
 obj_id_table = dict()
@@ -49,14 +52,14 @@ def copy_reference_attr(entry, new_entry, ref_attr):
 def convert_users(mk_db):
     session = Session()
 
-    for entry in mk_db.User.find():
+    for entry in mk_db.User.find().sort('created'):
         print entry.username
 
         new_entry = User()
         copy_attrs(entry, new_entry,
             ('username', 'email', 'created', 'pw_hash', 'email_verified',
              'status', 'verification_key', 'is_admin', 'url',
-             'bio', 'bio_html',
+             'bio',
              'fp_verification_key', 'fp_token_expire',))
         # new_entry.fp_verification_expire = entry.fp_token_expire
 
@@ -71,15 +74,15 @@ def convert_users(mk_db):
 def convert_media_entries(mk_db):
     session = Session()
 
-    for entry in mk_db.MediaEntry.find():
+    for entry in mk_db.MediaEntry.find().sort('created'):
         print repr(entry.title)
 
         new_entry = MediaEntry()
         copy_attrs(entry, new_entry,
             ('title', 'slug', 'created',
-             'description', 'description_html',
+             'description',
              'media_type', 'state', 'license',
-             'fail_error',
+             'fail_error', 'fail_metadata',
              'queued_task_id',))
         copy_reference_attr(entry, new_entry, "uploader")
 
@@ -92,6 +95,44 @@ def convert_media_entries(mk_db):
             new_file.media_entry = new_entry.id
             Session.add(new_file)
 
+        for attachment in entry.attachment_files:
+            new_attach = MediaAttachmentFile(
+                name=attachment["name"],
+                filepath=attachment["filepath"],
+                created=attachment["created"]
+                )
+            new_attach.media_entry = new_entry.id
+            Session.add(new_attach)
+
+    session.commit()
+    session.close()
+
+
+def convert_image(mk_db):
+    session = Session()
+
+    for media in mk_db.MediaEntry.find(
+            {'media_type': 'mediagoblin.media_types.image'}).sort('created'):
+        media_data = copy(media.media_data)
+
+        if len(media_data):
+            media_data_row = ImageData(**media_data)
+            media_data_row.media_entry = obj_id_table[media['_id']]
+            session.add(media_data_row)
+
+    session.commit()
+    session.close()
+
+
+def convert_video(mk_db):
+    session = Session()
+
+    for media in mk_db.MediaEntry.find(
+            {'media_type': 'mediagoblin.media_types.video'}).sort('created'):
+        media_data_row = VideoData(**media.media_data)
+        media_data_row.media_entry = obj_id_table[media['_id']]
+        session.add(media_data_row)
+
     session.commit()
     session.close()
 
@@ -100,7 +141,7 @@ def convert_media_tags(mk_db):
     session = Session()
     session.autoflush = False
 
-    for media in mk_db.MediaEntry.find():
+    for media in mk_db.MediaEntry.find().sort('created'):
         print repr(media.title)
 
         for otag in media.tags:
@@ -127,13 +168,13 @@ def convert_media_tags(mk_db):
 def convert_media_comments(mk_db):
     session = Session()
 
-    for entry in mk_db.MediaComment.find():
+    for entry in mk_db.MediaComment.find().sort('created'):
         print repr(entry.content)
 
         new_entry = MediaComment()
         copy_attrs(entry, new_entry,
             ('created',
-             'content', 'content_html',))
+             'content',))
         copy_reference_attr(entry, new_entry, "media_entry")
         copy_reference_attr(entry, new_entry, "author")
 
@@ -145,11 +186,24 @@ def convert_media_comments(mk_db):
     session.close()
 
 
-def main():
-    global_config, app_config = setup_global_and_app_config("mediagoblin.ini")
+def convert_add_migration_versions():
+    session = Session()
+
+    for name in ("__main__",
+                 "mediagoblin.media_types.image",
+                 "mediagoblin.media_types.video",
+                 ):
+        m = MigrationData(name=name, version=0)
+        session.add(m)
+
+    session.commit()
+    session.close()
+
 
-    sql_conn, sql_db = sql_connect({'sql_engine': 'sqlite:///mediagoblin.db'})
+def run_conversion(config_name):
+    global_config, app_config = setup_global_and_app_config(config_name)
 
+    sql_conn, sql_db = sql_connect(app_config)
     mk_conn, mk_db = mongo_connect(app_config)
 
     Base.metadata.create_all(sql_db.engine)
@@ -158,11 +212,17 @@ def main():
     Session.remove()
     convert_media_entries(mk_db)
     Session.remove()
+    convert_image(mk_db)
+    Session.remove()
+    convert_video(mk_db)
+    Session.remove()
     convert_media_tags(mk_db)
     Session.remove()
     convert_media_comments(mk_db)
     Session.remove()
+    convert_add_migration_versions()
+    Session.remove()
 
 
 if __name__ == '__main__':
-    main()
+    run_conversion("mediagoblin.ini")
diff --git a/mediagoblin/db/sql/extratypes.py b/mediagoblin/db/sql/extratypes.py
index 3a594728..8e078f14 100644
--- a/mediagoblin/db/sql/extratypes.py
+++ b/mediagoblin/db/sql/extratypes.py
@@ -15,7 +15,8 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 
-from sqlalchemy.types import TypeDecorator, Unicode
+from sqlalchemy.types import TypeDecorator, Unicode, VARCHAR
+import json
 
 
 class PathTupleWithSlashes(TypeDecorator):
@@ -35,3 +36,28 @@ class PathTupleWithSlashes(TypeDecorator):
         if value is not None:
             value = tuple(value.split('/'))
         return value
+
+
+# The following class and only this one class is in very
+# large parts based on example code from sqlalchemy.
+#
+# The original copyright notice and license follows:
+#     Copyright (C) 2005-2011 the SQLAlchemy authors and contributors <see AUTHORS file>
+#
+#     This module is part of SQLAlchemy and is released under
+#     the MIT License: http://www.opensource.org/licenses/mit-license.php
+#
+class JSONEncoded(TypeDecorator):
+    "Represents an immutable structure as a json-encoded string."
+
+    impl = VARCHAR
+
+    def process_bind_param(self, value, dialect):
+        if value is not None:
+            value = json.dumps(value)
+        return value
+
+    def process_result_value(self, value, dialect):
+        if value is not None:
+            value = json.loads(value)
+        return value
diff --git a/mediagoblin/db/sql/migrations.py b/mediagoblin/db/sql/migrations.py
new file mode 100644
index 00000000..98d0d0aa
--- /dev/null
+++ b/mediagoblin/db/sql/migrations.py
@@ -0,0 +1,17 @@
+# GNU MediaGoblin -- federated, autonomous media hosting
+# Copyright (C) 2011 MediaGoblin contributors.  See AUTHORS.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+MIGRATIONS = {}
diff --git a/mediagoblin/db/sql/models.py b/mediagoblin/db/sql/models.py
index 36f94b25..e87aaddb 100644
--- a/mediagoblin/db/sql/models.py
+++ b/mediagoblin/db/sql/models.py
@@ -14,20 +14,34 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
+"""
+TODO: indexes on foreignkeys, where useful.
+"""
+
 
 import datetime
+import sys
 
 from sqlalchemy import (
     Column, Integer, Unicode, UnicodeText, DateTime, Boolean, ForeignKey,
-    UniqueConstraint)
+    UniqueConstraint, PrimaryKeyConstraint, SmallInteger)
 from sqlalchemy.orm import relationship
 from sqlalchemy.orm.collections import attribute_mapped_collection
 from sqlalchemy.sql.expression import desc
 from sqlalchemy.ext.associationproxy import association_proxy
+from sqlalchemy.util import memoized_property
 
-from mediagoblin.db.sql.extratypes import PathTupleWithSlashes
+from mediagoblin.db.sql.extratypes import PathTupleWithSlashes, JSONEncoded
 from mediagoblin.db.sql.base import Base, DictReadAttrProxy
-from mediagoblin.db.mixin import UserMixin, MediaEntryMixin
+from mediagoblin.db.mixin import UserMixin, MediaEntryMixin, MediaCommentMixin
+from mediagoblin.db.sql.base import Session
+
+# It's actually kind of annoying how sqlalchemy-migrate does this, if
+# I understand it right, but whatever.  Anyway, don't remove this :P
+# 
+# We could do migration calls more manually instead of relying on
+# this import-based meddling...
+from migrate import changeset
 
 
 class SimpleFieldAlias(object):
@@ -43,7 +57,11 @@ class SimpleFieldAlias(object):
 
 
 class User(Base, UserMixin):
-    __tablename__ = "users"
+    """
+    TODO: We should consider moving some rarely used fields
+    into some sort of "shadow" table.
+    """
+    __tablename__ = "core__users"
 
     id = Column(Integer, primary_key=True)
     username = Column(Unicode, nullable=False, unique=True)
@@ -56,7 +74,6 @@ class User(Base, UserMixin):
     is_admin = Column(Boolean, default=False, nullable=False)
     url = Column(Unicode)
     bio = Column(UnicodeText)  # ??
-    bio_html = Column(UnicodeText)  # ??
     fp_verification_key = Column(Unicode)
     fp_token_expire = Column(DateTime)
 
@@ -67,22 +84,25 @@ class User(Base, UserMixin):
 
 
 class MediaEntry(Base, MediaEntryMixin):
-    __tablename__ = "media_entries"
+    """
+    TODO: Consider fetching the media_files using join
+    """
+    __tablename__ = "core__media_entries"
 
     id = Column(Integer, primary_key=True)
-    uploader = Column(Integer, ForeignKey('users.id'), nullable=False)
+    uploader = Column(Integer, ForeignKey(User.id), nullable=False, index=True)
     title = Column(Unicode, nullable=False)
     slug = Column(Unicode)
-    created = Column(DateTime, nullable=False, default=datetime.datetime.now)
+    created = Column(DateTime, nullable=False, default=datetime.datetime.now,
+        index=True)
     description = Column(UnicodeText) # ??
-    description_html = Column(UnicodeText) # ??
     media_type = Column(Unicode, nullable=False)
     state = Column(Unicode, default=u'unprocessed', nullable=False)
         # or use sqlalchemy.types.Enum?
     license = Column(Unicode)
 
     fail_error = Column(Unicode)
-    fail_metadata = Column(UnicodeText)
+    fail_metadata = Column(JSONEncoded)
 
     queued_media_file = Column(PathTupleWithSlashes)
 
@@ -102,6 +122,15 @@ class MediaEntry(Base, MediaEntryMixin):
         creator=lambda k, v: MediaFile(name=k, file_path=v)
         )
 
+    attachment_files_helper = relationship("MediaAttachmentFile",
+        cascade="all, delete-orphan",
+        order_by="MediaAttachmentFile.created"
+        )
+    attachment_files = association_proxy("attachment_files_helper", "dict_view",
+        creator=lambda v: MediaAttachmentFile(
+            name=v["name"], filepath=v["filepath"])
+        )
+
     tags_helper = relationship("MediaTag",
         cascade="all, delete-orphan"
         )
@@ -111,7 +140,6 @@ class MediaEntry(Base, MediaEntryMixin):
 
     ## TODO
     # media_data
-    # attachment_files
     # fail_error
 
     _id = SimpleFieldAlias("id")
@@ -143,22 +171,107 @@ class MediaEntry(Base, MediaEntryMixin):
         if media is not None:
             return media.url_for_self(urlgen)
 
+    #@memoized_property
+    @property
+    def media_data(self):
+        session = Session()
+
+        return session.query(self.media_data_table).filter_by(
+            media_entry=self.id).first()
+
+    def media_data_init(self, **kwargs):
+        """
+        Initialize or update the contents of a media entry's media_data row
+        """
+        session = Session()
+
+        media_data = session.query(self.media_data_table).filter_by(
+            media_entry=self.id).first()
+
+        # No media data, so actually add a new one
+        if media_data is None:
+            media_data = self.media_data_table(
+                media_entry=self.id,
+                **kwargs)
+            session.add(media_data)
+        # Update old media data
+        else:
+            for field, value in kwargs.iteritems():
+                setattr(media_data, field, value)
+
+    @memoized_property
+    def media_data_table(self):
+        # TODO: memoize this
+        models_module = self.media_type + '.models'
+        __import__(models_module)
+        return sys.modules[models_module].DATA_MODEL
+
+
+class FileKeynames(Base):
+    """
+    keywords for various places.
+    currently the MediaFile keys
+    """
+    __tablename__ = "core__file_keynames"
+    id = Column(Integer, primary_key=True)
+    name = Column(Unicode, unique=True)
+
+    def __repr__(self):
+        return "<FileKeyname %r: %r>" % (self.id, self.name)
+
+    @classmethod
+    def find_or_new(cls, name):
+        t = cls.query.filter_by(name=name).first()
+        if t is not None:
+            return t
+        return cls(name=name)
+
 
 class MediaFile(Base):
-    __tablename__ = "mediafiles"
+    """
+    TODO: Highly consider moving "name" into a new table.
+    TODO: Consider preloading said table in software
+    """
+    __tablename__ = "core__mediafiles"
 
     media_entry = Column(
         Integer, ForeignKey(MediaEntry.id),
-        nullable=False, primary_key=True)
-    name = Column(Unicode, primary_key=True)
+        nullable=False)
+    name_id = Column(SmallInteger, ForeignKey(FileKeynames.id), nullable=False)
     file_path = Column(PathTupleWithSlashes)
 
+    __table_args__ = (
+        PrimaryKeyConstraint('media_entry', 'name_id'),
+        {})
+
     def __repr__(self):
         return "<MediaFile %s: %r>" % (self.name, self.file_path)
 
+    name_helper = relationship(FileKeynames, lazy="joined", innerjoin=True)
+    name = association_proxy('name_helper', 'name',
+        creator=FileKeynames.find_or_new
+        )
+
+
+class MediaAttachmentFile(Base):
+    __tablename__ = "core__attachment_files"
+
+    id = Column(Integer, primary_key=True)
+    media_entry = Column(
+        Integer, ForeignKey(MediaEntry.id),
+        nullable=False)
+    name = Column(Unicode, nullable=False)
+    filepath = Column(PathTupleWithSlashes)
+    created = Column(DateTime, nullable=False, default=datetime.datetime.now)
+
+    @property
+    def dict_view(self):
+        """A dict like view on this object"""
+        return DictReadAttrProxy(self)
+
 
 class Tag(Base):
-    __tablename__ = "tags"
+    __tablename__ = "core__tags"
 
     id = Column(Integer, primary_key=True)
     slug = Column(Unicode, nullable=False, unique=True)
@@ -175,13 +288,13 @@ class Tag(Base):
 
 
 class MediaTag(Base):
-    __tablename__ = "media_tags"
+    __tablename__ = "core__media_tags"
 
     id = Column(Integer, primary_key=True)
     media_entry = Column(
         Integer, ForeignKey(MediaEntry.id),
-        nullable=False)
-    tag = Column(Integer, ForeignKey('tags.id'), nullable=False)
+        nullable=False, index=True)
+    tag = Column(Integer, ForeignKey(Tag.id), nullable=False, index=True)
     name = Column(Unicode)
     # created = Column(DateTime, nullable=False, default=datetime.datetime.now)
 
@@ -194,10 +307,12 @@ class MediaTag(Base):
         creator=Tag.find_or_new
         )
 
-    def __init__(self, name, slug):
+    def __init__(self, name=None, slug=None):
         Base.__init__(self)
-        self.name = name
-        self.tag_helper = Tag.find_or_new(slug)
+        if name is not None:
+            self.name = name
+        if slug is not None:
+            self.tag_helper = Tag.find_or_new(slug)
 
     @property
     def dict_view(self):
@@ -205,28 +320,56 @@ class MediaTag(Base):
         return DictReadAttrProxy(self)
 
 
-class MediaComment(Base):
-    __tablename__ = "media_comments"
+class MediaComment(Base, MediaCommentMixin):
+    __tablename__ = "core__media_comments"
 
     id = Column(Integer, primary_key=True)
     media_entry = Column(
-        Integer, ForeignKey('media_entries.id'), nullable=False)
-    author = Column(Integer, ForeignKey('users.id'), nullable=False)
+        Integer, ForeignKey(MediaEntry.id), nullable=False, index=True)
+    author = Column(Integer, ForeignKey(User.id), nullable=False)
     created = Column(DateTime, nullable=False, default=datetime.datetime.now)
     content = Column(UnicodeText, nullable=False)
-    content_html = Column(UnicodeText)
 
     get_author = relationship(User)
 
     _id = SimpleFieldAlias("id")
 
 
-def show_table_init():
+MODELS = [
+    User, MediaEntry, Tag, MediaTag, MediaComment, MediaFile, FileKeynames,
+    MediaAttachmentFile]
+
+
+######################################################
+# Special, migrations-tracking table
+#
+# Not listed in MODELS because this is special and not
+# really migrated, but used for migrations (for now)
+######################################################
+
+class MigrationData(Base):
+    __tablename__ = "core__migrations"
+
+    name = Column(Unicode, primary_key=True)
+    version = Column(Integer, nullable=False, default=0)
+
+######################################################
+
+
+def show_table_init(engine_uri):
+    if engine_uri is None:
+        engine_uri = 'sqlite:///:memory:'
     from sqlalchemy import create_engine
-    engine = create_engine('sqlite:///:memory:', echo=True)
+    engine = create_engine(engine_uri, echo=True)
 
     Base.metadata.create_all(engine)
 
 
 if __name__ == '__main__':
-    show_table_init()
+    from sys import argv
+    print repr(argv)
+    if len(argv) == 2:
+        uri = argv[1]
+    else:
+        uri = None
+    show_table_init(uri)
diff --git a/mediagoblin/db/sql/open.py b/mediagoblin/db/sql/open.py
index 1bfc5538..edbf0785 100644
--- a/mediagoblin/db/sql/open.py
+++ b/mediagoblin/db/sql/open.py
@@ -16,9 +16,11 @@
 
 
 from sqlalchemy import create_engine
+import logging
 
-from mediagoblin.db.sql.base import Session
-from mediagoblin.db.sql.models import Base
+from mediagoblin.db.sql.base import Base, Session
+
+_log = logging.getLogger(__name__)
 
 
 class DatabaseMaster(object):
@@ -36,11 +38,22 @@ class DatabaseMaster(object):
         Session.flush()
 
     def reset_after_request(self):
+        Session.rollback()
         Session.remove()
 
 
+def load_models(app_config):
+    import mediagoblin.db.sql.models
+
+    if True:
+        for media_type in app_config['media_types']:
+            _log.debug("Loading %s.models", media_type)
+            __import__(media_type + ".models")
+
+
 def setup_connection_and_db_from_config(app_config):
-    engine = create_engine(app_config['sql_engine'], echo=True)
+    engine = create_engine(app_config['sql_engine'])
+    # logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO)
     Session.configure(bind=engine)
 
     return "dummy conn", DatabaseMaster(engine)
diff --git a/mediagoblin/db/sql/util.py b/mediagoblin/db/sql/util.py
new file mode 100644
index 00000000..60024b28
--- /dev/null
+++ b/mediagoblin/db/sql/util.py
@@ -0,0 +1,324 @@
+# GNU MediaGoblin -- federated, autonomous media hosting
+# Copyright (C) 2011, 2012 MediaGoblin contributors.  See AUTHORS.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+
+import sys
+from mediagoblin.db.sql.base import Session
+from mediagoblin.db.sql.models import MediaEntry, Tag, MediaTag
+
+
+def _simple_printer(string):
+    """
+    Prints a string, but without an auto \n at the end.
+    """
+    sys.stdout.write(string)
+    sys.stdout.flush()
+
+
+class MigrationManager(object):
+    """
+    Migration handling tool.
+
+    Takes information about a database, lets you update the database
+    to the latest migrations, etc.
+    """
+
+    def __init__(self, name, models, migration_registry, session,
+                 printer=_simple_printer):
+        """
+        Args:
+         - name: identifier of this section of the database
+         - session: session we're going to migrate
+         - migration_registry: where we should find all migrations to
+           run
+        """
+        self.name = name
+        self.models = models
+        self.session = session
+        self.migration_registry = migration_registry
+        self._sorted_migrations = None
+        self.printer = printer
+
+        # For convenience
+        from mediagoblin.db.sql.models import MigrationData
+
+        self.migration_model = MigrationData
+        self.migration_table = MigrationData.__table__
+
+    @property
+    def sorted_migrations(self):
+        """
+        Sort migrations if necessary and store in self._sorted_migrations
+        """
+        if not self._sorted_migrations:
+            self._sorted_migrations = sorted(
+                self.migration_registry.items(),
+                # sort on the key... the migration number
+                key=lambda migration_tuple: migration_tuple[0])
+
+        return self._sorted_migrations
+
+    @property
+    def migration_data(self):
+        """
+        Get the migration row associated with this object, if any.
+        """
+        return self.session.query(
+            self.migration_model).filter_by(name=self.name).first()
+
+    @property
+    def latest_migration(self):
+        """
+        Return a migration number for the latest migration, or 0 if
+        there are no migrations.
+        """
+        if self.sorted_migrations:
+            return self.sorted_migrations[-1][0]
+        else:
+            # If no migrations have been set, we start at 0.
+            return 0
+
+    @property
+    def database_current_migration(self):
+        """
+        Return the current migration in the database.
+        """
+        # If the table doesn't even exist, return None.
+        if not self.migration_table.exists(self.session.bind):
+            return None
+
+        # Also return None if self.migration_data is None.
+        if self.migration_data is None:
+            return None
+
+        return self.migration_data.version
+
+    def set_current_migration(self, migration_number=None):
+        """
+        Set the migration in the database to migration_number
+        (or, the latest available)
+        """
+        self.migration_data.version = migration_number or self.latest_migration
+        self.session.commit()
+
+    def migrations_to_run(self):
+        """
+        Get a list of migrations to run still, if any.
+        
+        Note that this will fail if there's no migration record for
+        this class!
+        """
+        assert self.database_current_migration is not None
+
+        db_current_migration = self.database_current_migration
+        
+        return [
+            (migration_number, migration_func)
+            for migration_number, migration_func in self.sorted_migrations
+            if migration_number > db_current_migration]
+
+
+    def init_tables(self):
+        """
+        Create all tables relative to this package
+        """
+        # sanity check before we proceed, none of these should be created
+        for model in self.models:
+            # Maybe in the future just print out a "Yikes!" or something?
+            assert not model.__table__.exists(self.session.bind)
+
+        self.migration_model.metadata.create_all(
+            self.session.bind,
+            tables=[model.__table__ for model in self.models])
+
+    def create_new_migration_record(self):
+        """
+        Create a new migration record for this migration set
+        """
+        migration_record = self.migration_model(
+            name=self.name,
+            version=self.latest_migration)
+        self.session.add(migration_record)
+        self.session.commit()
+
+    def dry_run(self):
+        """
+        Print out a dry run of what we would have upgraded.
+        """
+        if self.database_current_migration is None:
+            self.printer(
+                    u'~> Woulda initialized: %s\n' % self.name_for_printing())
+            return u'inited'
+
+        migrations_to_run = self.migrations_to_run()
+        if migrations_to_run:
+            self.printer(
+                u'~> Woulda updated %s:\n' % self.name_for_printing())
+
+            for migration_number, migration_func in migrations_to_run():
+                self.printer(
+                    u'   + Would update %s, "%s"\n' % (
+                        migration_number, migration_func.func_name))
+
+            return u'migrated'
+        
+    def name_for_printing(self):
+        if self.name == u'__main__':
+            return u"main mediagoblin tables"
+        else:
+            # TODO: Use the friendlier media manager "human readable" name
+            return u'media type "%s"' % self.name
+
+    def init_or_migrate(self):
+        """
+        Initialize the database or migrate if appropriate.
+
+        Returns information about whether or not we initialized
+        ('inited'), migrated ('migrated'), or did nothing (None)
+        """
+        assure_migrations_table_setup(self.session)
+
+        # Find out what migration number, if any, this database data is at,
+        # and what the latest is.
+        migration_number = self.database_current_migration
+
+        # Is this our first time?  Is there even a table entry for
+        # this identifier?
+        # If so:
+        #  - create all tables
+        #  - create record in migrations registry
+        #  - print / inform the user
+        #  - return 'inited'
+        if migration_number is None:
+            self.printer(u"-> Initializing %s... " % self.name_for_printing())
+
+            self.init_tables()
+            # auto-set at latest migration number
+            self.create_new_migration_record()  
+            
+            self.printer(u"done.\n")
+            self.set_current_migration()
+            return u'inited'
+
+        # Run migrations, if appropriate.
+        migrations_to_run = self.migrations_to_run()
+        if migrations_to_run:
+            self.printer(
+                u'-> Updating %s:\n' % self.name_for_printing())
+            for migration_number, migration_func in migrations_to_run:
+                self.printer(
+                    u'   + Running migration %s, "%s"... ' % (
+                        migration_number, migration_func.func_name))
+                migration_func(self.session)
+                self.printer('done.\n')
+
+            self.set_current_migration()
+            return u'migrated'
+
+        # Otherwise return None.  Well it would do this anyway, but
+        # for clarity... ;)
+        return None
+
+
+class RegisterMigration(object):
+    """
+    Tool for registering migrations
+
+    Call like:
+
+    @RegisterMigration(33)
+    def update_dwarves(database):
+        [...]
+
+    This will register your migration with the default migration
+    registry.  Alternately, to specify a very specific
+    migration_registry, you can pass in that as the second argument.
+
+    Note, the number of your migration should NEVER be 0 or less than
+    0.  0 is the default "no migrations" state!
+    """
+    def __init__(self, migration_number, migration_registry):
+        assert migration_number > 0, "Migration number must be > 0!"
+        assert migration_number not in migration_registry, \
+            "Duplicate migration numbers detected!  That's not allowed!"
+
+        self.migration_number = migration_number
+        self.migration_registry = migration_registry
+
+    def __call__(self, migration):
+        self.migration_registry[self.migration_number] = migration
+        return migration
+
+
+def assure_migrations_table_setup(db):
+    """
+    Make sure the migrations table is set up in the database.
+    """
+    from mediagoblin.db.sql.models import MigrationData
+
+    if not MigrationData.__table__.exists(db.bind):
+        MigrationData.metadata.create_all(
+            db.bind, tables=[MigrationData.__table__])
+
+
+##########################
+# Random utility functions
+##########################
+
+
+def atomic_update(table, query_dict, update_values):
+    table.find(query_dict).update(update_values,
+    	synchronize_session=False)
+    Session.commit()
+
+
+def check_media_slug_used(dummy_db, uploader_id, slug, ignore_m_id):
+    filt = (MediaEntry.uploader == uploader_id) \
+        & (MediaEntry.slug == slug)
+    if ignore_m_id is not None:
+        filt = filt & (MediaEntry.id != ignore_m_id)
+    does_exist = Session.query(MediaEntry.id).filter(filt).first() is not None
+    return does_exist
+
+
+def media_entries_for_tag_slug(dummy_db, tag_slug):
+    return MediaEntry.query \
+        .join(MediaEntry.tags_helper) \
+        .join(MediaTag.tag_helper) \
+        .filter(
+            (MediaEntry.state == u'processed')
+            & (Tag.slug == tag_slug))
+
+
+def clean_orphan_tags():
+    q1 = Session.query(Tag).outerjoin(MediaTag).filter(MediaTag.id==None)
+    for t in q1:
+        Session.delete(t)
+
+    # The "let the db do all the work" version:
+    # q1 = Session.query(Tag.id).outerjoin(MediaTag).filter(MediaTag.id==None)
+    # q2 = Session.query(Tag).filter(Tag.id.in_(q1))
+    # q2.delete(synchronize_session = False)
+
+    Session.commit()
+
+
+if __name__ == '__main__':
+    from mediagoblin.db.sql.open import setup_connection_and_db_from_config
+
+    conn,db = setup_connection_and_db_from_config({'sql_engine':'sqlite:///mediagoblin.db'})
+
+    clean_orphan_tags()
diff --git a/mediagoblin/db/util.py b/mediagoblin/db/util.py
index 1fc949a6..540a9244 100644
--- a/mediagoblin/db/util.py
+++ b/mediagoblin/db/util.py
@@ -21,5 +21,9 @@ except ImportError:
 
 if use_sql:
     from mediagoblin.db.sql.fake import ObjectId, InvalidId, DESCENDING
+    from mediagoblin.db.sql.util import atomic_update, check_media_slug_used, \
+        media_entries_for_tag_slug
 else:
-    from mediagoblin.db.mongo.util import ObjectId, InvalidId, DESCENDING
+    from mediagoblin.db.mongo.util import \
+        ObjectId, InvalidId, DESCENDING, atomic_update, \
+        check_media_slug_used, media_entries_for_tag_slug