diff options
Diffstat (limited to 'mediagoblin/db')
-rw-r--r-- | mediagoblin/db/mixin.py | 52 | ||||
-rw-r--r-- | mediagoblin/db/mongo/migrations.py | 83 | ||||
-rw-r--r-- | mediagoblin/db/mongo/models.py | 43 | ||||
-rw-r--r-- | mediagoblin/db/mongo/open.py | 4 | ||||
-rw-r--r-- | mediagoblin/db/mongo/util.py | 26 | ||||
-rw-r--r-- | mediagoblin/db/open.py | 6 | ||||
-rw-r--r-- | mediagoblin/db/sql/base.py | 10 | ||||
-rw-r--r-- | mediagoblin/db/sql/convert.py | 90 | ||||
-rw-r--r-- | mediagoblin/db/sql/extratypes.py | 28 | ||||
-rw-r--r-- | mediagoblin/db/sql/migrations.py | 17 | ||||
-rw-r--r-- | mediagoblin/db/sql/models.py | 201 | ||||
-rw-r--r-- | mediagoblin/db/sql/open.py | 19 | ||||
-rw-r--r-- | mediagoblin/db/sql/util.py | 324 | ||||
-rw-r--r-- | mediagoblin/db/util.py | 6 |
14 files changed, 836 insertions, 73 deletions
diff --git a/mediagoblin/db/mixin.py b/mediagoblin/db/mixin.py index beaff9b0..a5aded02 100644 --- a/mediagoblin/db/mixin.py +++ b/mediagoblin/db/mixin.py @@ -27,8 +27,11 @@ These functions now live here and get "mixed in" into the real objects. """ +from mediagoblin import mg_globals from mediagoblin.auth import lib as auth_lib from mediagoblin.tools import common, licenses +from mediagoblin.tools.text import cleaned_markdown_conversion +from mediagoblin.tools.url import slugify class UserMixin(object): @@ -39,8 +42,36 @@ class UserMixin(object): return auth_lib.bcrypt_check_password( password, self.pw_hash) + @property + def bio_html(self): + return cleaned_markdown_conversion(self.bio) + class MediaEntryMixin(object): + def generate_slug(self): + # import this here due to a cyclic import issue + # (db.models -> db.mixin -> db.util -> db.models) + from mediagoblin.db.util import check_media_slug_used + + self.slug = slugify(self.title) + + duplicate = check_media_slug_used(mg_globals.database, + self.uploader, self.slug, self.id) + + if duplicate: + if self.id is not None: + self.slug = "%s-%s" % (self.id, self.slug) + else: + self.slug = None + + @property + def description_html(self): + """ + Rendered version of the description, run through + Markdown and cleaned with our cleaning tool. + """ + return cleaned_markdown_conversion(self.description) + def get_display_media(self, media_map, fetch_order=common.DISPLAY_IMAGE_FETCHING_ORDER): """ @@ -91,3 +122,24 @@ class MediaEntryMixin(object): def get_license_data(self): """Return license dict for requested license""" return licenses.SUPPORTED_LICENSES[self.license or ""] + + def exif_display_iter(self): + from mediagoblin.tools.exif import USEFUL_TAGS + + if not self.media_data: + return + exif_all = self.media_data.get("exif_all") + + for key in USEFUL_TAGS: + if key in exif_all: + yield key, exif_all[key] + + +class MediaCommentMixin(object): + @property + def content_html(self): + """ + the actual html-rendered version of the comment displayed. + Run through Markdown and the HTML cleaner. + """ + return cleaned_markdown_conversion(self.content) diff --git a/mediagoblin/db/mongo/migrations.py b/mediagoblin/db/mongo/migrations.py index 261e21a5..732f5846 100644 --- a/mediagoblin/db/mongo/migrations.py +++ b/mediagoblin/db/mongo/migrations.py @@ -29,6 +29,16 @@ def add_table_field(db, table_name, field_name, default_value): multi=True) +def drop_table_field(db, table_name, field_name): + """ + Drop an old field from a table/collection + """ + db[table_name].update( + {field_name: {'$exists': True}}, + {'$unset': {field_name: 1}}, + multi=True) + + # Please see mediagoblin/tests/test_migrations.py for some examples of # basic migrations. @@ -109,9 +119,82 @@ def media_type_image_to_multimedia_type_image(database): {'$set': {'media_type': 'mediagoblin.media_types.image'}}, multi=True) + @RegisterMigration(8) def mediaentry_add_license(database): """ Add the 'license' field for entries that don't have it. """ add_table_field(database, 'media_entries', 'license', None) + + +@RegisterMigration(9) +def remove_calculated_html(database): + """ + Drop pre-rendered html again and calculate things + on the fly (and cache): + - User.bio_html + - MediaEntry.description_html + - MediaComment.content_html + """ + drop_table_field(database, 'users', 'bio_html') + drop_table_field(database, 'media_entries', 'description_html') + drop_table_field(database, 'media_comments', 'content_html') + + +@RegisterMigration(10) +def convert_video_media_data(database): + """ + Move media_data["video"] directly into media_data + """ + collection = database['media_entries'] + target = collection.find( + {'media_data.video': {'$exists': True}}) + + for document in target: + assert len(document['media_data']) == 1 + document['media_data'] = document['media_data']['video'] + collection.save(document) + + +@RegisterMigration(11) +def convert_gps_media_data(database): + """ + Move media_data["gps"]["*"] to media_data["gps_*"]. + In preparation for media_data.gps_* + """ + collection = database['media_entries'] + target = collection.find( + {'media_data.gps': {'$exists': True}}) + + for document in target: + for key, value in document['media_data']['gps'].iteritems(): + document['media_data']['gps_' + key] = value + del document['media_data']['gps'] + collection.save(document) + + +@RegisterMigration(12) +def convert_exif_media_data(database): + """ + Move media_data["exif"]["clean"] to media_data["exif_all"]. + Drop media_data["exif"]["useful"] + In preparation for media_data.exif_all + """ + collection = database['media_entries'] + target = collection.find( + {'media_data.exif.clean': {'$exists': True}}) + + for document in target: + media_data = document['media_data'] + + exif_all = media_data['exif'].pop('clean') + if len(exif_all): + media_data['exif_all'] = exif_all + + del media_data['exif']['useful'] + + assert len(media_data['exif']) == 0 + del media_data['exif'] + + collection.save(document) diff --git a/mediagoblin/db/mongo/models.py b/mediagoblin/db/mongo/models.py index 541086bc..2e35a2b8 100644 --- a/mediagoblin/db/mongo/models.py +++ b/mediagoblin/db/mongo/models.py @@ -18,12 +18,21 @@ import datetime from mongokit import Document -from mediagoblin import mg_globals from mediagoblin.db.mongo import migrations from mediagoblin.db.mongo.util import ASCENDING, DESCENDING, ObjectId from mediagoblin.tools.pagination import Pagination -from mediagoblin.tools import url -from mediagoblin.db.mixin import UserMixin, MediaEntryMixin +from mediagoblin.db.mixin import UserMixin, MediaEntryMixin, MediaCommentMixin + + +class MongoPK(object): + """An alias for the _id primary key""" + def __get__(self, instance, cls): + return instance['_id'] + def __set__(self, instance, val): + instance['_id'] = val + def __delete__(self, instance): + del instance['_id'] + ################### # Custom validators @@ -59,7 +68,6 @@ class User(Document, UserMixin): - is_admin: Whether or not this user is an administrator or not. - url: this user's personal webpage/website, if appropriate. - bio: biography of this user (plaintext, in markdown) - - bio_html: biography of the user converted to proper HTML. """ __collection__ = 'users' use_dot_notation = True @@ -76,7 +84,6 @@ class User(Document, UserMixin): 'is_admin': bool, 'url': unicode, 'bio': unicode, # May contain markdown - 'bio_html': unicode, # May contain plaintext, or HTML 'fp_verification_key': unicode, # forgotten password verification key 'fp_token_expire': datetime.datetime, } @@ -89,6 +96,8 @@ class User(Document, UserMixin): 'status': u'needs_email_verification', 'is_admin': False} + id = MongoPK() + class MediaEntry(Document, MediaEntryMixin): """ @@ -112,9 +121,6 @@ class MediaEntry(Document, MediaEntryMixin): up with MarkDown for slight fanciness (links, boldness, italics, paragraphs...) - - description_html: Rendered version of the description, run through - Markdown and cleaned with our cleaning tool. - - media_type: What type of media is this? Currently we only support 'image' ;) @@ -179,7 +185,6 @@ class MediaEntry(Document, MediaEntryMixin): 'slug': unicode, 'created': datetime.datetime, 'description': unicode, # May contain markdown/up - 'description_html': unicode, # May contain plaintext, or HTML 'media_type': unicode, 'media_data': dict, # extra data relevant to this media_type 'plugin_data': dict, # plugins can dump stuff here. @@ -211,6 +216,11 @@ class MediaEntry(Document, MediaEntryMixin): 'created': datetime.datetime.utcnow, 'state': u'unprocessed'} + id = MongoPK() + + def media_data_init(self, **kwargs): + self.media_data.update(kwargs) + def get_comments(self, ascending=False): if ascending: order = ASCENDING @@ -220,15 +230,6 @@ class MediaEntry(Document, MediaEntryMixin): return self.db.MediaComment.find({ 'media_entry': self._id}).sort('created', order) - def generate_slug(self): - self.slug = url.slugify(self.title) - - duplicate = mg_globals.database.media_entries.find_one( - {'slug': self.slug}) - - if duplicate: - self.slug = "%s-%s" % (self._id, self.slug) - def url_to_prev(self, urlgen): """ Provide a url to the previous entry from this user, if there is one @@ -257,7 +258,7 @@ class MediaEntry(Document, MediaEntryMixin): return self.db.User.find_one({'_id': self.uploader}) -class MediaComment(Document): +class MediaComment(Document, MediaCommentMixin): """ A comment on a MediaEntry. @@ -266,8 +267,6 @@ class MediaComment(Document): - author: user who posted this comment - created: when the comment was created - content: plaintext (but markdown'able) version of the comment's content. - - content_html: the actual html-rendered version of the comment displayed. - Run through Markdown and the HTML cleaner. """ __collection__ = 'media_comments' @@ -278,7 +277,7 @@ class MediaComment(Document): 'author': ObjectId, 'created': datetime.datetime, 'content': unicode, - 'content_html': unicode} + } required_fields = [ 'media_entry', 'author', 'created', 'content'] diff --git a/mediagoblin/db/mongo/open.py b/mediagoblin/db/mongo/open.py index bedc497b..c4f37b42 100644 --- a/mediagoblin/db/mongo/open.py +++ b/mediagoblin/db/mongo/open.py @@ -21,6 +21,10 @@ from mediagoblin.db.mongo import models from mediagoblin.db.mongo.util import MigrationManager +def load_models(app_config): + pass + + def connect_database_from_config(app_config, use_pymongo=False): """ Connect to the main database, take config from app_config diff --git a/mediagoblin/db/mongo/util.py b/mediagoblin/db/mongo/util.py index 4daf616a..f61ae6be 100644 --- a/mediagoblin/db/mongo/util.py +++ b/mediagoblin/db/mongo/util.py @@ -290,3 +290,29 @@ class MigrationManager(object): self.set_current_migration(migration_number) if post_callback: post_callback(migration_number, migration_func) + + +########################## +# Random utility functions +########################## + + +def atomic_update(table, query_dict, update_values): + table.collection.update( + query_dict, + {"$set": update_values}) + + +def check_media_slug_used(db, uploader_id, slug, ignore_m_id): + query_dict = {'uploader': uploader_id, 'slug': slug} + if ignore_m_id is not None: + query_dict['_id'] = {'$ne': ignore_m_id} + existing_user_slug_entries = db.MediaEntry.find( + query_dict).count() + return existing_user_slug_entries + + +def media_entries_for_tag_slug(db, tag_slug): + return db.MediaEntry.find( + {u'state': u'processed', + u'tags.slug': tag_slug}) diff --git a/mediagoblin/db/open.py b/mediagoblin/db/open.py index 0163469f..f4c38511 100644 --- a/mediagoblin/db/open.py +++ b/mediagoblin/db/open.py @@ -21,7 +21,9 @@ except ImportError: if use_sql: from mediagoblin.db.sql.open import \ - setup_connection_and_db_from_config, check_db_migrations_current + setup_connection_and_db_from_config, check_db_migrations_current, \ + load_models else: from mediagoblin.db.mongo.open import \ - setup_connection_and_db_from_config, check_db_migrations_current + setup_connection_and_db_from_config, check_db_migrations_current, \ + load_models diff --git a/mediagoblin/db/sql/base.py b/mediagoblin/db/sql/base.py index 6ed24a03..838080b0 100644 --- a/mediagoblin/db/sql/base.py +++ b/mediagoblin/db/sql/base.py @@ -67,6 +67,10 @@ class GMGTableBase(object): def get(self, key): return getattr(self, key) + def setdefault(self, key, defaultvalue): + # The key *has* to exist on sql. + return getattr(self, key) + def save(self, validate=True): assert validate sess = object_session(self) @@ -75,6 +79,12 @@ class GMGTableBase(object): sess.add(self) sess.commit() + def delete(self): + sess = object_session(self) + assert sess is not None, "Not going to delete detached %r" % self + sess.delete(self) + sess.commit() + Base = declarative_base(cls=GMGTableBase) diff --git a/mediagoblin/db/sql/convert.py b/mediagoblin/db/sql/convert.py index f6575be9..ebf3037c 100644 --- a/mediagoblin/db/sql/convert.py +++ b/mediagoblin/db/sql/convert.py @@ -14,17 +14,20 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. +from copy import copy from mediagoblin.init import setup_global_and_app_config, setup_database from mediagoblin.db.mongo.util import ObjectId -from mediagoblin.db.sql.models import (Base, User, MediaEntry, MediaComment, - Tag, MediaTag, MediaFile) +from mediagoblin.db.sql.base import Base, Session +from mediagoblin.db.sql.models import (User, MediaEntry, MediaComment, + Tag, MediaTag, MediaFile, MediaAttachmentFile, MigrationData) +from mediagoblin.media_types.image.models import ImageData +from mediagoblin.media_types.video.models import VideoData from mediagoblin.db.sql.open import setup_connection_and_db_from_config as \ sql_connect from mediagoblin.db.mongo.open import setup_connection_and_db_from_config as \ mongo_connect -from mediagoblin.db.sql.base import Session obj_id_table = dict() @@ -49,14 +52,14 @@ def copy_reference_attr(entry, new_entry, ref_attr): def convert_users(mk_db): session = Session() - for entry in mk_db.User.find(): + for entry in mk_db.User.find().sort('created'): print entry.username new_entry = User() copy_attrs(entry, new_entry, ('username', 'email', 'created', 'pw_hash', 'email_verified', 'status', 'verification_key', 'is_admin', 'url', - 'bio', 'bio_html', + 'bio', 'fp_verification_key', 'fp_token_expire',)) # new_entry.fp_verification_expire = entry.fp_token_expire @@ -71,15 +74,15 @@ def convert_users(mk_db): def convert_media_entries(mk_db): session = Session() - for entry in mk_db.MediaEntry.find(): + for entry in mk_db.MediaEntry.find().sort('created'): print repr(entry.title) new_entry = MediaEntry() copy_attrs(entry, new_entry, ('title', 'slug', 'created', - 'description', 'description_html', + 'description', 'media_type', 'state', 'license', - 'fail_error', + 'fail_error', 'fail_metadata', 'queued_task_id',)) copy_reference_attr(entry, new_entry, "uploader") @@ -92,6 +95,44 @@ def convert_media_entries(mk_db): new_file.media_entry = new_entry.id Session.add(new_file) + for attachment in entry.attachment_files: + new_attach = MediaAttachmentFile( + name=attachment["name"], + filepath=attachment["filepath"], + created=attachment["created"] + ) + new_attach.media_entry = new_entry.id + Session.add(new_attach) + + session.commit() + session.close() + + +def convert_image(mk_db): + session = Session() + + for media in mk_db.MediaEntry.find( + {'media_type': 'mediagoblin.media_types.image'}).sort('created'): + media_data = copy(media.media_data) + + if len(media_data): + media_data_row = ImageData(**media_data) + media_data_row.media_entry = obj_id_table[media['_id']] + session.add(media_data_row) + + session.commit() + session.close() + + +def convert_video(mk_db): + session = Session() + + for media in mk_db.MediaEntry.find( + {'media_type': 'mediagoblin.media_types.video'}).sort('created'): + media_data_row = VideoData(**media.media_data) + media_data_row.media_entry = obj_id_table[media['_id']] + session.add(media_data_row) + session.commit() session.close() @@ -100,7 +141,7 @@ def convert_media_tags(mk_db): session = Session() session.autoflush = False - for media in mk_db.MediaEntry.find(): + for media in mk_db.MediaEntry.find().sort('created'): print repr(media.title) for otag in media.tags: @@ -127,13 +168,13 @@ def convert_media_tags(mk_db): def convert_media_comments(mk_db): session = Session() - for entry in mk_db.MediaComment.find(): + for entry in mk_db.MediaComment.find().sort('created'): print repr(entry.content) new_entry = MediaComment() copy_attrs(entry, new_entry, ('created', - 'content', 'content_html',)) + 'content',)) copy_reference_attr(entry, new_entry, "media_entry") copy_reference_attr(entry, new_entry, "author") @@ -145,11 +186,24 @@ def convert_media_comments(mk_db): session.close() -def main(): - global_config, app_config = setup_global_and_app_config("mediagoblin.ini") +def convert_add_migration_versions(): + session = Session() + + for name in ("__main__", + "mediagoblin.media_types.image", + "mediagoblin.media_types.video", + ): + m = MigrationData(name=name, version=0) + session.add(m) + + session.commit() + session.close() + - sql_conn, sql_db = sql_connect({'sql_engine': 'sqlite:///mediagoblin.db'}) +def run_conversion(config_name): + global_config, app_config = setup_global_and_app_config(config_name) + sql_conn, sql_db = sql_connect(app_config) mk_conn, mk_db = mongo_connect(app_config) Base.metadata.create_all(sql_db.engine) @@ -158,11 +212,17 @@ def main(): Session.remove() convert_media_entries(mk_db) Session.remove() + convert_image(mk_db) + Session.remove() + convert_video(mk_db) + Session.remove() convert_media_tags(mk_db) Session.remove() convert_media_comments(mk_db) Session.remove() + convert_add_migration_versions() + Session.remove() if __name__ == '__main__': - main() + run_conversion("mediagoblin.ini") diff --git a/mediagoblin/db/sql/extratypes.py b/mediagoblin/db/sql/extratypes.py index 3a594728..8e078f14 100644 --- a/mediagoblin/db/sql/extratypes.py +++ b/mediagoblin/db/sql/extratypes.py @@ -15,7 +15,8 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. -from sqlalchemy.types import TypeDecorator, Unicode +from sqlalchemy.types import TypeDecorator, Unicode, VARCHAR +import json class PathTupleWithSlashes(TypeDecorator): @@ -35,3 +36,28 @@ class PathTupleWithSlashes(TypeDecorator): if value is not None: value = tuple(value.split('/')) return value + + +# The following class and only this one class is in very +# large parts based on example code from sqlalchemy. +# +# The original copyright notice and license follows: +# Copyright (C) 2005-2011 the SQLAlchemy authors and contributors <see AUTHORS file> +# +# This module is part of SQLAlchemy and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php +# +class JSONEncoded(TypeDecorator): + "Represents an immutable structure as a json-encoded string." + + impl = VARCHAR + + def process_bind_param(self, value, dialect): + if value is not None: + value = json.dumps(value) + return value + + def process_result_value(self, value, dialect): + if value is not None: + value = json.loads(value) + return value diff --git a/mediagoblin/db/sql/migrations.py b/mediagoblin/db/sql/migrations.py new file mode 100644 index 00000000..98d0d0aa --- /dev/null +++ b/mediagoblin/db/sql/migrations.py @@ -0,0 +1,17 @@ +# GNU MediaGoblin -- federated, autonomous media hosting +# Copyright (C) 2011 MediaGoblin contributors. See AUTHORS. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +MIGRATIONS = {} diff --git a/mediagoblin/db/sql/models.py b/mediagoblin/db/sql/models.py index 36f94b25..e87aaddb 100644 --- a/mediagoblin/db/sql/models.py +++ b/mediagoblin/db/sql/models.py @@ -14,20 +14,34 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. +""" +TODO: indexes on foreignkeys, where useful. +""" + import datetime +import sys from sqlalchemy import ( Column, Integer, Unicode, UnicodeText, DateTime, Boolean, ForeignKey, - UniqueConstraint) + UniqueConstraint, PrimaryKeyConstraint, SmallInteger) from sqlalchemy.orm import relationship from sqlalchemy.orm.collections import attribute_mapped_collection from sqlalchemy.sql.expression import desc from sqlalchemy.ext.associationproxy import association_proxy +from sqlalchemy.util import memoized_property -from mediagoblin.db.sql.extratypes import PathTupleWithSlashes +from mediagoblin.db.sql.extratypes import PathTupleWithSlashes, JSONEncoded from mediagoblin.db.sql.base import Base, DictReadAttrProxy -from mediagoblin.db.mixin import UserMixin, MediaEntryMixin +from mediagoblin.db.mixin import UserMixin, MediaEntryMixin, MediaCommentMixin +from mediagoblin.db.sql.base import Session + +# It's actually kind of annoying how sqlalchemy-migrate does this, if +# I understand it right, but whatever. Anyway, don't remove this :P +# +# We could do migration calls more manually instead of relying on +# this import-based meddling... +from migrate import changeset class SimpleFieldAlias(object): @@ -43,7 +57,11 @@ class SimpleFieldAlias(object): class User(Base, UserMixin): - __tablename__ = "users" + """ + TODO: We should consider moving some rarely used fields + into some sort of "shadow" table. + """ + __tablename__ = "core__users" id = Column(Integer, primary_key=True) username = Column(Unicode, nullable=False, unique=True) @@ -56,7 +74,6 @@ class User(Base, UserMixin): is_admin = Column(Boolean, default=False, nullable=False) url = Column(Unicode) bio = Column(UnicodeText) # ?? - bio_html = Column(UnicodeText) # ?? fp_verification_key = Column(Unicode) fp_token_expire = Column(DateTime) @@ -67,22 +84,25 @@ class User(Base, UserMixin): class MediaEntry(Base, MediaEntryMixin): - __tablename__ = "media_entries" + """ + TODO: Consider fetching the media_files using join + """ + __tablename__ = "core__media_entries" id = Column(Integer, primary_key=True) - uploader = Column(Integer, ForeignKey('users.id'), nullable=False) + uploader = Column(Integer, ForeignKey(User.id), nullable=False, index=True) title = Column(Unicode, nullable=False) slug = Column(Unicode) - created = Column(DateTime, nullable=False, default=datetime.datetime.now) + created = Column(DateTime, nullable=False, default=datetime.datetime.now, + index=True) description = Column(UnicodeText) # ?? - description_html = Column(UnicodeText) # ?? media_type = Column(Unicode, nullable=False) state = Column(Unicode, default=u'unprocessed', nullable=False) # or use sqlalchemy.types.Enum? license = Column(Unicode) fail_error = Column(Unicode) - fail_metadata = Column(UnicodeText) + fail_metadata = Column(JSONEncoded) queued_media_file = Column(PathTupleWithSlashes) @@ -102,6 +122,15 @@ class MediaEntry(Base, MediaEntryMixin): creator=lambda k, v: MediaFile(name=k, file_path=v) ) + attachment_files_helper = relationship("MediaAttachmentFile", + cascade="all, delete-orphan", + order_by="MediaAttachmentFile.created" + ) + attachment_files = association_proxy("attachment_files_helper", "dict_view", + creator=lambda v: MediaAttachmentFile( + name=v["name"], filepath=v["filepath"]) + ) + tags_helper = relationship("MediaTag", cascade="all, delete-orphan" ) @@ -111,7 +140,6 @@ class MediaEntry(Base, MediaEntryMixin): ## TODO # media_data - # attachment_files # fail_error _id = SimpleFieldAlias("id") @@ -143,22 +171,107 @@ class MediaEntry(Base, MediaEntryMixin): if media is not None: return media.url_for_self(urlgen) + #@memoized_property + @property + def media_data(self): + session = Session() + + return session.query(self.media_data_table).filter_by( + media_entry=self.id).first() + + def media_data_init(self, **kwargs): + """ + Initialize or update the contents of a media entry's media_data row + """ + session = Session() + + media_data = session.query(self.media_data_table).filter_by( + media_entry=self.id).first() + + # No media data, so actually add a new one + if media_data is None: + media_data = self.media_data_table( + media_entry=self.id, + **kwargs) + session.add(media_data) + # Update old media data + else: + for field, value in kwargs.iteritems(): + setattr(media_data, field, value) + + @memoized_property + def media_data_table(self): + # TODO: memoize this + models_module = self.media_type + '.models' + __import__(models_module) + return sys.modules[models_module].DATA_MODEL + + +class FileKeynames(Base): + """ + keywords for various places. + currently the MediaFile keys + """ + __tablename__ = "core__file_keynames" + id = Column(Integer, primary_key=True) + name = Column(Unicode, unique=True) + + def __repr__(self): + return "<FileKeyname %r: %r>" % (self.id, self.name) + + @classmethod + def find_or_new(cls, name): + t = cls.query.filter_by(name=name).first() + if t is not None: + return t + return cls(name=name) + class MediaFile(Base): - __tablename__ = "mediafiles" + """ + TODO: Highly consider moving "name" into a new table. + TODO: Consider preloading said table in software + """ + __tablename__ = "core__mediafiles" media_entry = Column( Integer, ForeignKey(MediaEntry.id), - nullable=False, primary_key=True) - name = Column(Unicode, primary_key=True) + nullable=False) + name_id = Column(SmallInteger, ForeignKey(FileKeynames.id), nullable=False) file_path = Column(PathTupleWithSlashes) + __table_args__ = ( + PrimaryKeyConstraint('media_entry', 'name_id'), + {}) + def __repr__(self): return "<MediaFile %s: %r>" % (self.name, self.file_path) + name_helper = relationship(FileKeynames, lazy="joined", innerjoin=True) + name = association_proxy('name_helper', 'name', + creator=FileKeynames.find_or_new + ) + + +class MediaAttachmentFile(Base): + __tablename__ = "core__attachment_files" + + id = Column(Integer, primary_key=True) + media_entry = Column( + Integer, ForeignKey(MediaEntry.id), + nullable=False) + name = Column(Unicode, nullable=False) + filepath = Column(PathTupleWithSlashes) + created = Column(DateTime, nullable=False, default=datetime.datetime.now) + + @property + def dict_view(self): + """A dict like view on this object""" + return DictReadAttrProxy(self) + class Tag(Base): - __tablename__ = "tags" + __tablename__ = "core__tags" id = Column(Integer, primary_key=True) slug = Column(Unicode, nullable=False, unique=True) @@ -175,13 +288,13 @@ class Tag(Base): class MediaTag(Base): - __tablename__ = "media_tags" + __tablename__ = "core__media_tags" id = Column(Integer, primary_key=True) media_entry = Column( Integer, ForeignKey(MediaEntry.id), - nullable=False) - tag = Column(Integer, ForeignKey('tags.id'), nullable=False) + nullable=False, index=True) + tag = Column(Integer, ForeignKey(Tag.id), nullable=False, index=True) name = Column(Unicode) # created = Column(DateTime, nullable=False, default=datetime.datetime.now) @@ -194,10 +307,12 @@ class MediaTag(Base): creator=Tag.find_or_new ) - def __init__(self, name, slug): + def __init__(self, name=None, slug=None): Base.__init__(self) - self.name = name - self.tag_helper = Tag.find_or_new(slug) + if name is not None: + self.name = name + if slug is not None: + self.tag_helper = Tag.find_or_new(slug) @property def dict_view(self): @@ -205,28 +320,56 @@ class MediaTag(Base): return DictReadAttrProxy(self) -class MediaComment(Base): - __tablename__ = "media_comments" +class MediaComment(Base, MediaCommentMixin): + __tablename__ = "core__media_comments" id = Column(Integer, primary_key=True) media_entry = Column( - Integer, ForeignKey('media_entries.id'), nullable=False) - author = Column(Integer, ForeignKey('users.id'), nullable=False) + Integer, ForeignKey(MediaEntry.id), nullable=False, index=True) + author = Column(Integer, ForeignKey(User.id), nullable=False) created = Column(DateTime, nullable=False, default=datetime.datetime.now) content = Column(UnicodeText, nullable=False) - content_html = Column(UnicodeText) get_author = relationship(User) _id = SimpleFieldAlias("id") -def show_table_init(): +MODELS = [ + User, MediaEntry, Tag, MediaTag, MediaComment, MediaFile, FileKeynames, + MediaAttachmentFile] + + +###################################################### +# Special, migrations-tracking table +# +# Not listed in MODELS because this is special and not +# really migrated, but used for migrations (for now) +###################################################### + +class MigrationData(Base): + __tablename__ = "core__migrations" + + name = Column(Unicode, primary_key=True) + version = Column(Integer, nullable=False, default=0) + +###################################################### + + +def show_table_init(engine_uri): + if engine_uri is None: + engine_uri = 'sqlite:///:memory:' from sqlalchemy import create_engine - engine = create_engine('sqlite:///:memory:', echo=True) + engine = create_engine(engine_uri, echo=True) Base.metadata.create_all(engine) if __name__ == '__main__': - show_table_init() + from sys import argv + print repr(argv) + if len(argv) == 2: + uri = argv[1] + else: + uri = None + show_table_init(uri) diff --git a/mediagoblin/db/sql/open.py b/mediagoblin/db/sql/open.py index 1bfc5538..edbf0785 100644 --- a/mediagoblin/db/sql/open.py +++ b/mediagoblin/db/sql/open.py @@ -16,9 +16,11 @@ from sqlalchemy import create_engine +import logging -from mediagoblin.db.sql.base import Session -from mediagoblin.db.sql.models import Base +from mediagoblin.db.sql.base import Base, Session + +_log = logging.getLogger(__name__) class DatabaseMaster(object): @@ -36,11 +38,22 @@ class DatabaseMaster(object): Session.flush() def reset_after_request(self): + Session.rollback() Session.remove() +def load_models(app_config): + import mediagoblin.db.sql.models + + if True: + for media_type in app_config['media_types']: + _log.debug("Loading %s.models", media_type) + __import__(media_type + ".models") + + def setup_connection_and_db_from_config(app_config): - engine = create_engine(app_config['sql_engine'], echo=True) + engine = create_engine(app_config['sql_engine']) + # logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO) Session.configure(bind=engine) return "dummy conn", DatabaseMaster(engine) diff --git a/mediagoblin/db/sql/util.py b/mediagoblin/db/sql/util.py new file mode 100644 index 00000000..60024b28 --- /dev/null +++ b/mediagoblin/db/sql/util.py @@ -0,0 +1,324 @@ +# GNU MediaGoblin -- federated, autonomous media hosting +# Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + + +import sys +from mediagoblin.db.sql.base import Session +from mediagoblin.db.sql.models import MediaEntry, Tag, MediaTag + + +def _simple_printer(string): + """ + Prints a string, but without an auto \n at the end. + """ + sys.stdout.write(string) + sys.stdout.flush() + + +class MigrationManager(object): + """ + Migration handling tool. + + Takes information about a database, lets you update the database + to the latest migrations, etc. + """ + + def __init__(self, name, models, migration_registry, session, + printer=_simple_printer): + """ + Args: + - name: identifier of this section of the database + - session: session we're going to migrate + - migration_registry: where we should find all migrations to + run + """ + self.name = name + self.models = models + self.session = session + self.migration_registry = migration_registry + self._sorted_migrations = None + self.printer = printer + + # For convenience + from mediagoblin.db.sql.models import MigrationData + + self.migration_model = MigrationData + self.migration_table = MigrationData.__table__ + + @property + def sorted_migrations(self): + """ + Sort migrations if necessary and store in self._sorted_migrations + """ + if not self._sorted_migrations: + self._sorted_migrations = sorted( + self.migration_registry.items(), + # sort on the key... the migration number + key=lambda migration_tuple: migration_tuple[0]) + + return self._sorted_migrations + + @property + def migration_data(self): + """ + Get the migration row associated with this object, if any. + """ + return self.session.query( + self.migration_model).filter_by(name=self.name).first() + + @property + def latest_migration(self): + """ + Return a migration number for the latest migration, or 0 if + there are no migrations. + """ + if self.sorted_migrations: + return self.sorted_migrations[-1][0] + else: + # If no migrations have been set, we start at 0. + return 0 + + @property + def database_current_migration(self): + """ + Return the current migration in the database. + """ + # If the table doesn't even exist, return None. + if not self.migration_table.exists(self.session.bind): + return None + + # Also return None if self.migration_data is None. + if self.migration_data is None: + return None + + return self.migration_data.version + + def set_current_migration(self, migration_number=None): + """ + Set the migration in the database to migration_number + (or, the latest available) + """ + self.migration_data.version = migration_number or self.latest_migration + self.session.commit() + + def migrations_to_run(self): + """ + Get a list of migrations to run still, if any. + + Note that this will fail if there's no migration record for + this class! + """ + assert self.database_current_migration is not None + + db_current_migration = self.database_current_migration + + return [ + (migration_number, migration_func) + for migration_number, migration_func in self.sorted_migrations + if migration_number > db_current_migration] + + + def init_tables(self): + """ + Create all tables relative to this package + """ + # sanity check before we proceed, none of these should be created + for model in self.models: + # Maybe in the future just print out a "Yikes!" or something? + assert not model.__table__.exists(self.session.bind) + + self.migration_model.metadata.create_all( + self.session.bind, + tables=[model.__table__ for model in self.models]) + + def create_new_migration_record(self): + """ + Create a new migration record for this migration set + """ + migration_record = self.migration_model( + name=self.name, + version=self.latest_migration) + self.session.add(migration_record) + self.session.commit() + + def dry_run(self): + """ + Print out a dry run of what we would have upgraded. + """ + if self.database_current_migration is None: + self.printer( + u'~> Woulda initialized: %s\n' % self.name_for_printing()) + return u'inited' + + migrations_to_run = self.migrations_to_run() + if migrations_to_run: + self.printer( + u'~> Woulda updated %s:\n' % self.name_for_printing()) + + for migration_number, migration_func in migrations_to_run(): + self.printer( + u' + Would update %s, "%s"\n' % ( + migration_number, migration_func.func_name)) + + return u'migrated' + + def name_for_printing(self): + if self.name == u'__main__': + return u"main mediagoblin tables" + else: + # TODO: Use the friendlier media manager "human readable" name + return u'media type "%s"' % self.name + + def init_or_migrate(self): + """ + Initialize the database or migrate if appropriate. + + Returns information about whether or not we initialized + ('inited'), migrated ('migrated'), or did nothing (None) + """ + assure_migrations_table_setup(self.session) + + # Find out what migration number, if any, this database data is at, + # and what the latest is. + migration_number = self.database_current_migration + + # Is this our first time? Is there even a table entry for + # this identifier? + # If so: + # - create all tables + # - create record in migrations registry + # - print / inform the user + # - return 'inited' + if migration_number is None: + self.printer(u"-> Initializing %s... " % self.name_for_printing()) + + self.init_tables() + # auto-set at latest migration number + self.create_new_migration_record() + + self.printer(u"done.\n") + self.set_current_migration() + return u'inited' + + # Run migrations, if appropriate. + migrations_to_run = self.migrations_to_run() + if migrations_to_run: + self.printer( + u'-> Updating %s:\n' % self.name_for_printing()) + for migration_number, migration_func in migrations_to_run: + self.printer( + u' + Running migration %s, "%s"... ' % ( + migration_number, migration_func.func_name)) + migration_func(self.session) + self.printer('done.\n') + + self.set_current_migration() + return u'migrated' + + # Otherwise return None. Well it would do this anyway, but + # for clarity... ;) + return None + + +class RegisterMigration(object): + """ + Tool for registering migrations + + Call like: + + @RegisterMigration(33) + def update_dwarves(database): + [...] + + This will register your migration with the default migration + registry. Alternately, to specify a very specific + migration_registry, you can pass in that as the second argument. + + Note, the number of your migration should NEVER be 0 or less than + 0. 0 is the default "no migrations" state! + """ + def __init__(self, migration_number, migration_registry): + assert migration_number > 0, "Migration number must be > 0!" + assert migration_number not in migration_registry, \ + "Duplicate migration numbers detected! That's not allowed!" + + self.migration_number = migration_number + self.migration_registry = migration_registry + + def __call__(self, migration): + self.migration_registry[self.migration_number] = migration + return migration + + +def assure_migrations_table_setup(db): + """ + Make sure the migrations table is set up in the database. + """ + from mediagoblin.db.sql.models import MigrationData + + if not MigrationData.__table__.exists(db.bind): + MigrationData.metadata.create_all( + db.bind, tables=[MigrationData.__table__]) + + +########################## +# Random utility functions +########################## + + +def atomic_update(table, query_dict, update_values): + table.find(query_dict).update(update_values, + synchronize_session=False) + Session.commit() + + +def check_media_slug_used(dummy_db, uploader_id, slug, ignore_m_id): + filt = (MediaEntry.uploader == uploader_id) \ + & (MediaEntry.slug == slug) + if ignore_m_id is not None: + filt = filt & (MediaEntry.id != ignore_m_id) + does_exist = Session.query(MediaEntry.id).filter(filt).first() is not None + return does_exist + + +def media_entries_for_tag_slug(dummy_db, tag_slug): + return MediaEntry.query \ + .join(MediaEntry.tags_helper) \ + .join(MediaTag.tag_helper) \ + .filter( + (MediaEntry.state == u'processed') + & (Tag.slug == tag_slug)) + + +def clean_orphan_tags(): + q1 = Session.query(Tag).outerjoin(MediaTag).filter(MediaTag.id==None) + for t in q1: + Session.delete(t) + + # The "let the db do all the work" version: + # q1 = Session.query(Tag.id).outerjoin(MediaTag).filter(MediaTag.id==None) + # q2 = Session.query(Tag).filter(Tag.id.in_(q1)) + # q2.delete(synchronize_session = False) + + Session.commit() + + +if __name__ == '__main__': + from mediagoblin.db.sql.open import setup_connection_and_db_from_config + + conn,db = setup_connection_and_db_from_config({'sql_engine':'sqlite:///mediagoblin.db'}) + + clean_orphan_tags() diff --git a/mediagoblin/db/util.py b/mediagoblin/db/util.py index 1fc949a6..540a9244 100644 --- a/mediagoblin/db/util.py +++ b/mediagoblin/db/util.py @@ -21,5 +21,9 @@ except ImportError: if use_sql: from mediagoblin.db.sql.fake import ObjectId, InvalidId, DESCENDING + from mediagoblin.db.sql.util import atomic_update, check_media_slug_used, \ + media_entries_for_tag_slug else: - from mediagoblin.db.mongo.util import ObjectId, InvalidId, DESCENDING + from mediagoblin.db.mongo.util import \ + ObjectId, InvalidId, DESCENDING, atomic_update, \ + check_media_slug_used, media_entries_for_tag_slug |