diff options
Diffstat (limited to 'mediagoblin/db')
-rw-r--r-- | mediagoblin/db/__init__.py | 49 | ||||
-rw-r--r-- | mediagoblin/db/base.py | 78 | ||||
-rw-r--r-- | mediagoblin/db/extratypes.py | 63 | ||||
-rw-r--r-- | mediagoblin/db/migration_tools.py | 276 | ||||
-rw-r--r-- | mediagoblin/db/migrations.py | 289 | ||||
-rw-r--r-- | mediagoblin/db/mixin.py | 334 | ||||
-rw-r--r-- | mediagoblin/db/models.py | 524 | ||||
-rw-r--r-- | mediagoblin/db/models_v0.py | 342 | ||||
-rw-r--r-- | mediagoblin/db/open.py | 101 | ||||
-rw-r--r-- | mediagoblin/db/util.py | 76 |
10 files changed, 2132 insertions, 0 deletions
diff --git a/mediagoblin/db/__init__.py b/mediagoblin/db/__init__.py new file mode 100644 index 00000000..27ca4b06 --- /dev/null +++ b/mediagoblin/db/__init__.py @@ -0,0 +1,49 @@ +# GNU MediaGoblin -- federated, autonomous media hosting +# Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +""" +Database Abstraction/Wrapper Layer +================================== + +This submodule is for most of the db specific stuff. + +There are two main ideas here: + +1. Open up a small possibility to replace mongo by another + db. This means, that all direct mongo accesses should + happen in the db submodule. While all the rest uses an + API defined by this submodule. + + Currently this API happens to be basicly mongo. + Which means, that the abstraction/wrapper layer is + extremely thin. + +2. Give the rest of the app a simple and easy way to get most of + their db needs. Which often means some simple import + from db.util. + +What does that mean? + +* Never import mongo directly outside of this submodule. + +* Inside this submodule you can do whatever is needed. The + API border is exactly at the submodule layer. Nowhere + else. + +* helper functions can be moved in here. They become part + of the db.* API + +""" diff --git a/mediagoblin/db/base.py b/mediagoblin/db/base.py new file mode 100644 index 00000000..699a503a --- /dev/null +++ b/mediagoblin/db/base.py @@ -0,0 +1,78 @@ +# GNU MediaGoblin -- federated, autonomous media hosting +# Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + + +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import scoped_session, sessionmaker, object_session + +Session = scoped_session(sessionmaker()) + + +class GMGTableBase(object): + query = Session.query_property() + + @classmethod + def find(cls, query_dict): + return cls.query.filter_by(**query_dict) + + @classmethod + def find_one(cls, query_dict): + return cls.query.filter_by(**query_dict).first() + + @classmethod + def one(cls, query_dict): + return cls.find(query_dict).one() + + def get(self, key): + return getattr(self, key) + + def setdefault(self, key, defaultvalue): + # The key *has* to exist on sql. + return getattr(self, key) + + def save(self): + sess = object_session(self) + if sess is None: + sess = Session() + sess.add(self) + sess.commit() + + def delete(self, commit=True): + """Delete the object and commit the change immediately by default""" + sess = object_session(self) + assert sess is not None, "Not going to delete detached %r" % self + sess.delete(self) + if commit: + sess.commit() + + +Base = declarative_base(cls=GMGTableBase) + + +class DictReadAttrProxy(object): + """ + Maps read accesses to obj['key'] to obj.key + and hides all the rest of the obj + """ + def __init__(self, proxied_obj): + self.proxied_obj = proxied_obj + + def __getitem__(self, key): + try: + return getattr(self.proxied_obj, key) + except AttributeError: + raise KeyError("%r is not an attribute on %r" + % (key, self.proxied_obj)) diff --git a/mediagoblin/db/extratypes.py b/mediagoblin/db/extratypes.py new file mode 100644 index 00000000..f2304af0 --- /dev/null +++ b/mediagoblin/db/extratypes.py @@ -0,0 +1,63 @@ +# GNU MediaGoblin -- federated, autonomous media hosting +# Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + + +from sqlalchemy.types import TypeDecorator, Unicode, TEXT +import json + + +class PathTupleWithSlashes(TypeDecorator): + "Represents a Tuple of strings as a slash separated string." + + impl = Unicode + + def process_bind_param(self, value, dialect): + if value is not None: + if len(value) == 0: + value = None + else: + value = '/'.join(value) + return value + + def process_result_value(self, value, dialect): + if value is not None: + value = tuple(value.split('/')) + return value + + +# The following class and only this one class is in very +# large parts based on example code from sqlalchemy. +# +# The original copyright notice and license follows: +# Copyright (C) 2005-2011 the SQLAlchemy authors and contributors <see AUTHORS file> +# +# This module is part of SQLAlchemy and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php +# +class JSONEncoded(TypeDecorator): + "Represents an immutable structure as a json-encoded string." + + impl = TEXT + + def process_bind_param(self, value, dialect): + if value is not None: + value = json.dumps(value) + return value + + def process_result_value(self, value, dialect): + if value is not None: + value = json.loads(value) + return value diff --git a/mediagoblin/db/migration_tools.py b/mediagoblin/db/migration_tools.py new file mode 100644 index 00000000..c0c7e998 --- /dev/null +++ b/mediagoblin/db/migration_tools.py @@ -0,0 +1,276 @@ +# GNU MediaGoblin -- federated, autonomous media hosting +# Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +from mediagoblin.tools.common import simple_printer +from sqlalchemy import Table + +class TableAlreadyExists(Exception): + pass + + +class MigrationManager(object): + """ + Migration handling tool. + + Takes information about a database, lets you update the database + to the latest migrations, etc. + """ + + def __init__(self, name, models, migration_registry, session, + printer=simple_printer): + """ + Args: + - name: identifier of this section of the database + - session: session we're going to migrate + - migration_registry: where we should find all migrations to + run + """ + self.name = unicode(name) + self.models = models + self.session = session + self.migration_registry = migration_registry + self._sorted_migrations = None + self.printer = printer + + # For convenience + from mediagoblin.db.models import MigrationData + + self.migration_model = MigrationData + self.migration_table = MigrationData.__table__ + + @property + def sorted_migrations(self): + """ + Sort migrations if necessary and store in self._sorted_migrations + """ + if not self._sorted_migrations: + self._sorted_migrations = sorted( + self.migration_registry.items(), + # sort on the key... the migration number + key=lambda migration_tuple: migration_tuple[0]) + + return self._sorted_migrations + + @property + def migration_data(self): + """ + Get the migration row associated with this object, if any. + """ + return self.session.query( + self.migration_model).filter_by(name=self.name).first() + + @property + def latest_migration(self): + """ + Return a migration number for the latest migration, or 0 if + there are no migrations. + """ + if self.sorted_migrations: + return self.sorted_migrations[-1][0] + else: + # If no migrations have been set, we start at 0. + return 0 + + @property + def database_current_migration(self): + """ + Return the current migration in the database. + """ + # If the table doesn't even exist, return None. + if not self.migration_table.exists(self.session.bind): + return None + + # Also return None if self.migration_data is None. + if self.migration_data is None: + return None + + return self.migration_data.version + + def set_current_migration(self, migration_number=None): + """ + Set the migration in the database to migration_number + (or, the latest available) + """ + self.migration_data.version = migration_number or self.latest_migration + self.session.commit() + + def migrations_to_run(self): + """ + Get a list of migrations to run still, if any. + + Note that this will fail if there's no migration record for + this class! + """ + assert self.database_current_migration is not None + + db_current_migration = self.database_current_migration + + return [ + (migration_number, migration_func) + for migration_number, migration_func in self.sorted_migrations + if migration_number > db_current_migration] + + + def init_tables(self): + """ + Create all tables relative to this package + """ + # sanity check before we proceed, none of these should be created + for model in self.models: + # Maybe in the future just print out a "Yikes!" or something? + if model.__table__.exists(self.session.bind): + raise TableAlreadyExists( + u"Intended to create table '%s' but it already exists" % + model.__table__.name) + + self.migration_model.metadata.create_all( + self.session.bind, + tables=[model.__table__ for model in self.models]) + + def create_new_migration_record(self): + """ + Create a new migration record for this migration set + """ + migration_record = self.migration_model( + name=self.name, + version=self.latest_migration) + self.session.add(migration_record) + self.session.commit() + + def dry_run(self): + """ + Print out a dry run of what we would have upgraded. + """ + if self.database_current_migration is None: + self.printer( + u'~> Woulda initialized: %s\n' % self.name_for_printing()) + return u'inited' + + migrations_to_run = self.migrations_to_run() + if migrations_to_run: + self.printer( + u'~> Woulda updated %s:\n' % self.name_for_printing()) + + for migration_number, migration_func in migrations_to_run(): + self.printer( + u' + Would update %s, "%s"\n' % ( + migration_number, migration_func.func_name)) + + return u'migrated' + + def name_for_printing(self): + if self.name == u'__main__': + return u"main mediagoblin tables" + else: + # TODO: Use the friendlier media manager "human readable" name + return u'media type "%s"' % self.name + + def init_or_migrate(self): + """ + Initialize the database or migrate if appropriate. + + Returns information about whether or not we initialized + ('inited'), migrated ('migrated'), or did nothing (None) + """ + assure_migrations_table_setup(self.session) + + # Find out what migration number, if any, this database data is at, + # and what the latest is. + migration_number = self.database_current_migration + + # Is this our first time? Is there even a table entry for + # this identifier? + # If so: + # - create all tables + # - create record in migrations registry + # - print / inform the user + # - return 'inited' + if migration_number is None: + self.printer(u"-> Initializing %s... " % self.name_for_printing()) + + self.init_tables() + # auto-set at latest migration number + self.create_new_migration_record() + + self.printer(u"done.\n") + self.set_current_migration() + return u'inited' + + # Run migrations, if appropriate. + migrations_to_run = self.migrations_to_run() + if migrations_to_run: + self.printer( + u'-> Updating %s:\n' % self.name_for_printing()) + for migration_number, migration_func in migrations_to_run: + self.printer( + u' + Running migration %s, "%s"... ' % ( + migration_number, migration_func.func_name)) + migration_func(self.session) + self.set_current_migration(migration_number) + self.printer('done.\n') + + return u'migrated' + + # Otherwise return None. Well it would do this anyway, but + # for clarity... ;) + return None + + +class RegisterMigration(object): + """ + Tool for registering migrations + + Call like: + + @RegisterMigration(33) + def update_dwarves(database): + [...] + + This will register your migration with the default migration + registry. Alternately, to specify a very specific + migration_registry, you can pass in that as the second argument. + + Note, the number of your migration should NEVER be 0 or less than + 0. 0 is the default "no migrations" state! + """ + def __init__(self, migration_number, migration_registry): + assert migration_number > 0, "Migration number must be > 0!" + assert migration_number not in migration_registry, \ + "Duplicate migration numbers detected! That's not allowed!" + + self.migration_number = migration_number + self.migration_registry = migration_registry + + def __call__(self, migration): + self.migration_registry[self.migration_number] = migration + return migration + + +def assure_migrations_table_setup(db): + """ + Make sure the migrations table is set up in the database. + """ + from mediagoblin.db.models import MigrationData + + if not MigrationData.__table__.exists(db.bind): + MigrationData.metadata.create_all( + db.bind, tables=[MigrationData.__table__]) + + +def inspect_table(metadata, table_name): + """Simple helper to get a ref to an already existing table""" + return Table(table_name, metadata, autoload=True, + autoload_with=metadata.bind) diff --git a/mediagoblin/db/migrations.py b/mediagoblin/db/migrations.py new file mode 100644 index 00000000..2c553396 --- /dev/null +++ b/mediagoblin/db/migrations.py @@ -0,0 +1,289 @@ +# GNU MediaGoblin -- federated, autonomous media hosting +# Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +import datetime +import uuid + +from sqlalchemy import (MetaData, Table, Column, Boolean, SmallInteger, + Integer, Unicode, UnicodeText, DateTime, + ForeignKey) +from sqlalchemy.exc import ProgrammingError +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.sql import and_ +from migrate.changeset.constraint import UniqueConstraint + +from mediagoblin.db.migration_tools import RegisterMigration, inspect_table +from mediagoblin.db.models import MediaEntry, Collection, User + +MIGRATIONS = {} + + +@RegisterMigration(1, MIGRATIONS) +def ogg_to_webm_audio(db_conn): + metadata = MetaData(bind=db_conn.bind) + + file_keynames = Table('core__file_keynames', metadata, autoload=True, + autoload_with=db_conn.bind) + + db_conn.execute( + file_keynames.update().where(file_keynames.c.name == 'ogg'). + values(name='webm_audio') + ) + db_conn.commit() + + +@RegisterMigration(2, MIGRATIONS) +def add_wants_notification_column(db_conn): + metadata = MetaData(bind=db_conn.bind) + + users = Table('core__users', metadata, autoload=True, + autoload_with=db_conn.bind) + + col = Column('wants_comment_notification', Boolean, + default=True, nullable=True) + col.create(users, populate_defaults=True) + db_conn.commit() + + +@RegisterMigration(3, MIGRATIONS) +def add_transcoding_progress(db_conn): + metadata = MetaData(bind=db_conn.bind) + + media_entry = inspect_table(metadata, 'core__media_entries') + + col = Column('transcoding_progress', SmallInteger) + col.create(media_entry) + db_conn.commit() + + +class Collection_v0(declarative_base()): + __tablename__ = "core__collections" + + id = Column(Integer, primary_key=True) + title = Column(Unicode, nullable=False) + slug = Column(Unicode) + created = Column(DateTime, nullable=False, default=datetime.datetime.now, + index=True) + description = Column(UnicodeText) + creator = Column(Integer, ForeignKey(User.id), nullable=False) + items = Column(Integer, default=0) + +class CollectionItem_v0(declarative_base()): + __tablename__ = "core__collection_items" + + id = Column(Integer, primary_key=True) + media_entry = Column( + Integer, ForeignKey(MediaEntry.id), nullable=False, index=True) + collection = Column(Integer, ForeignKey(Collection.id), nullable=False) + note = Column(UnicodeText, nullable=True) + added = Column(DateTime, nullable=False, default=datetime.datetime.now) + position = Column(Integer) + + ## This should be activated, normally. + ## But this would change the way the next migration used to work. + ## So it's commented for now. + __table_args__ = ( + UniqueConstraint('collection', 'media_entry'), + {}) + +collectionitem_unique_constraint_done = False + +@RegisterMigration(4, MIGRATIONS) +def add_collection_tables(db_conn): + Collection_v0.__table__.create(db_conn.bind) + CollectionItem_v0.__table__.create(db_conn.bind) + + global collectionitem_unique_constraint_done + collectionitem_unique_constraint_done = True + + db_conn.commit() + + +@RegisterMigration(5, MIGRATIONS) +def add_mediaentry_collected(db_conn): + metadata = MetaData(bind=db_conn.bind) + + media_entry = inspect_table(metadata, 'core__media_entries') + + col = Column('collected', Integer, default=0) + col.create(media_entry) + db_conn.commit() + + +class ProcessingMetaData_v0(declarative_base()): + __tablename__ = 'core__processing_metadata' + + id = Column(Integer, primary_key=True) + media_entry_id = Column(Integer, ForeignKey(MediaEntry.id), nullable=False, + index=True) + callback_url = Column(Unicode) + +@RegisterMigration(6, MIGRATIONS) +def create_processing_metadata_table(db): + ProcessingMetaData_v0.__table__.create(db.bind) + db.commit() + + +# Okay, problem being: +# Migration #4 forgot to add the uniqueconstraint for the +# new tables. While creating the tables from scratch had +# the constraint enabled. +# +# So we have four situations that should end up at the same +# db layout: +# +# 1. Fresh install. +# Well, easy. Just uses the tables in models.py +# 2. Fresh install using a git version just before this migration +# The tables are all there, the unique constraint is also there. +# This migration should do nothing. +# But as we can't detect the uniqueconstraint easily, +# this migration just adds the constraint again. +# And possibly fails very loud. But ignores the failure. +# 3. old install, not using git, just releases. +# This one will get the new tables in #4 (now with constraint!) +# And this migration is just skipped silently. +# 4. old install, always on latest git. +# This one has the tables, but lacks the constraint. +# So this migration adds the constraint. +@RegisterMigration(7, MIGRATIONS) +def fix_CollectionItem_v0_constraint(db_conn): + """Add the forgotten Constraint on CollectionItem""" + + global collectionitem_unique_constraint_done + if collectionitem_unique_constraint_done: + # Reset it. Maybe the whole thing gets run again + # For a different db? + collectionitem_unique_constraint_done = False + return + + metadata = MetaData(bind=db_conn.bind) + + CollectionItem_table = inspect_table(metadata, 'core__collection_items') + + constraint = UniqueConstraint('collection', 'media_entry', + name='core__collection_items_collection_media_entry_key', + table=CollectionItem_table) + + try: + constraint.create() + except ProgrammingError: + # User probably has an install that was run since the + # collection tables were added, so we don't need to run this migration. + pass + + db_conn.commit() + + +@RegisterMigration(8, MIGRATIONS) +def add_license_preference(db): + metadata = MetaData(bind=db.bind) + + user_table = inspect_table(metadata, 'core__users') + + col = Column('license_preference', Unicode) + col.create(user_table) + db.commit() + + +@RegisterMigration(9, MIGRATIONS) +def mediaentry_new_slug_era(db): + """ + Update for the new era for media type slugs. + + Entries without slugs now display differently in the url like: + /u/cwebber/m/id=251/ + + ... because of this, we should back-convert: + - entries without slugs should be converted to use the id, if possible, to + make old urls still work + - slugs with = (or also : which is now also not allowed) to have those + stripped out (small possibility of breakage here sadly) + """ + + def slug_and_user_combo_exists(slug, uploader): + return db.execute( + media_table.select( + and_(media_table.c.uploader==uploader, + media_table.c.slug==slug))).first() is not None + + def append_garbage_till_unique(row, new_slug): + """ + Attach junk to this row until it's unique, then save it + """ + if slug_and_user_combo_exists(new_slug, row.uploader): + # okay, still no success; + # let's whack junk on there till it's unique. + new_slug += '-' + uuid.uuid4().hex[:4] + # keep going if necessary! + while slug_and_user_combo_exists(new_slug, row.uploader): + new_slug += uuid.uuid4().hex[:4] + + db.execute( + media_table.update(). \ + where(media_table.c.id==row.id). \ + values(slug=new_slug)) + + metadata = MetaData(bind=db.bind) + + media_table = inspect_table(metadata, 'core__media_entries') + + for row in db.execute(media_table.select()): + # no slug, try setting to an id + if not row.slug: + append_garbage_till_unique(row, unicode(row.id)) + # has "=" or ":" in it... we're getting rid of those + elif u"=" in row.slug or u":" in row.slug: + append_garbage_till_unique( + row, row.slug.replace(u"=", u"-").replace(u":", u"-")) + + db.commit() + + +@RegisterMigration(10, MIGRATIONS) +def unique_collections_slug(db): + """Add unique constraint to collection slug""" + metadata = MetaData(bind=db.bind) + collection_table = inspect_table(metadata, "core__collections") + existing_slugs = {} + slugs_to_change = [] + + for row in db.execute(collection_table.select()): + # if duplicate slug, generate a unique slug + if row.creator in existing_slugs and row.slug in \ + existing_slugs[row.creator]: + slugs_to_change.append(row.id) + else: + if not row.creator in existing_slugs: + existing_slugs[row.creator] = [row.slug] + else: + existing_slugs[row.creator].append(row.slug) + + for row_id in slugs_to_change: + new_slug = unicode(uuid.uuid4()) + db.execute(collection_table.update(). + where(collection_table.c.id == row_id). + values(slug=new_slug)) + # sqlite does not like to change the schema when a transaction(update) is + # not yet completed + db.commit() + + constraint = UniqueConstraint('creator', 'slug', + name='core__collection_creator_slug_key', + table=collection_table) + constraint.create() + + db.commit() diff --git a/mediagoblin/db/mixin.py b/mediagoblin/db/mixin.py new file mode 100644 index 00000000..9f566e36 --- /dev/null +++ b/mediagoblin/db/mixin.py @@ -0,0 +1,334 @@ +# GNU MediaGoblin -- federated, autonomous media hosting +# Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +""" +This module contains some Mixin classes for the db objects. + +A bunch of functions on the db objects are really more like +"utility functions": They could live outside the classes +and be called "by hand" passing the appropiate reference. +They usually only use the public API of the object and +rarely use database related stuff. + +These functions now live here and get "mixed in" into the +real objects. +""" + +import uuid +import re +import datetime + +from werkzeug.utils import cached_property + +from mediagoblin import mg_globals +from mediagoblin.media_types import get_media_managers, FileTypeNotSupported +from mediagoblin.tools import common, licenses +from mediagoblin.tools.text import cleaned_markdown_conversion +from mediagoblin.tools.url import slugify + + +class UserMixin(object): + @property + def bio_html(self): + return cleaned_markdown_conversion(self.bio) + + +class GenerateSlugMixin(object): + """ + Mixin to add a generate_slug method to objects. + + Depends on: + - self.slug + - self.title + - self.check_slug_used(new_slug) + """ + def generate_slug(self): + """ + Generate a unique slug for this object. + + This one does not *force* slugs, but usually it will probably result + in a niceish one. + + The end *result* of the algorithm will result in these resolutions for + these situations: + - If we have a slug, make sure it's clean and sanitized, and if it's + unique, we'll use that. + - If we have a title, slugify it, and if it's unique, we'll use that. + - If we can't get any sort of thing that looks like it'll be a useful + slug out of a title or an existing slug, bail, and don't set the + slug at all. Don't try to create something just because. Make + sure we have a reasonable basis for a slug first. + - If we have a reasonable basis for a slug (either based on existing + slug or slugified title) but it's not unique, first try appending + the entry's id, if that exists + - If that doesn't result in something unique, tack on some randomly + generated bits until it's unique. That'll be a little bit of junk, + but at least it has the basis of a nice slug. + """ + #Is already a slug assigned? Check if it is valid + if self.slug: + self.slug = slugify(self.slug) + + # otherwise, try to use the title. + elif self.title: + # assign slug based on title + self.slug = slugify(self.title) + + # We don't want any empty string slugs + if self.slug == u"": + self.slug = None + + # Do we have anything at this point? + # If not, we're not going to get a slug + # so just return... we're not going to force one. + if not self.slug: + return # giving up! + + # Otherwise, let's see if this is unique. + if self.check_slug_used(self.slug): + # It looks like it's being used... lame. + + # Can we just append the object's id to the end? + if self.id: + slug_with_id = u"%s-%s" % (self.slug, self.id) + if not self.check_slug_used(slug_with_id): + self.slug = slug_with_id + return # success! + + # okay, still no success; + # let's whack junk on there till it's unique. + self.slug += '-' + uuid.uuid4().hex[:4] + # keep going if necessary! + while self.check_slug_used(self.slug): + self.slug += uuid.uuid4().hex[:4] + + +class MediaEntryMixin(GenerateSlugMixin): + def check_slug_used(self, slug): + # import this here due to a cyclic import issue + # (db.models -> db.mixin -> db.util -> db.models) + from mediagoblin.db.util import check_media_slug_used + + return check_media_slug_used(self.uploader, slug, self.id) + + @property + def description_html(self): + """ + Rendered version of the description, run through + Markdown and cleaned with our cleaning tool. + """ + return cleaned_markdown_conversion(self.description) + + def get_display_media(self): + """Find the best media for display. + + We try checking self.media_manager.fetching_order if it exists to + pull down the order. + + Returns: + (media_size, media_path) + or, if not found, None. + + """ + fetch_order = self.media_manager.media_fetch_order + + # No fetching order found? well, give up! + if not fetch_order: + return None + + media_sizes = self.media_files.keys() + + for media_size in fetch_order: + if media_size in media_sizes: + return media_size, self.media_files[media_size] + + def main_mediafile(self): + pass + + @property + def slug_or_id(self): + if self.slug: + return self.slug + else: + return u'id:%s' % self.id + + def url_for_self(self, urlgen, **extra_args): + """ + Generate an appropriate url for ourselves + + Use a slug if we have one, else use our 'id'. + """ + uploader = self.get_uploader + + return urlgen( + 'mediagoblin.user_pages.media_home', + user=uploader.username, + media=self.slug_or_id, + **extra_args) + + @property + def thumb_url(self): + """Return the thumbnail URL (for usage in templates) + Will return either the real thumbnail or a default fallback icon.""" + # TODO: implement generic fallback in case MEDIA_MANAGER does + # not specify one? + if u'thumb' in self.media_files: + thumb_url = mg_globals.app.public_store.file_url( + self.media_files[u'thumb']) + else: + # No thumbnail in media available. Get the media's + # MEDIA_MANAGER for the fallback icon and return static URL + # Raises FileTypeNotSupported in case no such manager is enabled + manager = self.media_manager + thumb_url = mg_globals.app.staticdirector(manager[u'default_thumb']) + return thumb_url + + @cached_property + def media_manager(self): + """Returns the MEDIA_MANAGER of the media's media_type + + Raises FileTypeNotSupported in case no such manager is enabled + """ + # TODO, we should be able to make this a simple lookup rather + # than iterating through all media managers. + for media_type, manager in get_media_managers(): + if media_type == self.media_type: + return manager(self) + # Not found? Then raise an error + raise FileTypeNotSupported( + "MediaManager not in enabled types. Check media_types in config?") + + def get_fail_exception(self): + """ + Get the exception that's appropriate for this error + """ + if self.fail_error: + return common.import_component(self.fail_error) + + def get_license_data(self): + """Return license dict for requested license""" + return licenses.get_license_by_url(self.license or "") + + def exif_display_iter(self): + if not self.media_data: + return + exif_all = self.media_data.get("exif_all") + + for key in exif_all: + label = re.sub('(.)([A-Z][a-z]+)', r'\1 \2', key) + yield label.replace('EXIF', '').replace('Image', ''), exif_all[key] + + def exif_display_data_short(self): + """Display a very short practical version of exif info""" + if not self.media_data: + return + + exif_all = self.media_data.get("exif_all") + + exif_short = {} + + if 'Image DateTimeOriginal' in exif_all: + # format date taken + takendate = datetime.datetime.strptime( + exif_all['Image DateTimeOriginal']['printable'], + '%Y:%m:%d %H:%M:%S').date() + taken = takendate.strftime('%B %d %Y') + + exif_short.update({'Date Taken': taken}) + + aperture = None + if 'EXIF FNumber' in exif_all: + fnum = str(exif_all['EXIF FNumber']['printable']).split('/') + + # calculate aperture + if len(fnum) == 2: + aperture = "f/%.1f" % (float(fnum[0])/float(fnum[1])) + elif fnum[0] != 'None': + aperture = "f/%s" % (fnum[0]) + + if aperture: + exif_short.update({'Aperture': aperture}) + + short_keys = [ + ('Camera', 'Image Model', None), + ('Exposure', 'EXIF ExposureTime', lambda x: '%s sec' % x), + ('ISO Speed', 'EXIF ISOSpeedRatings', None), + ('Focal Length', 'EXIF FocalLength', lambda x: '%s mm' % x)] + + for label, key, fmt_func in short_keys: + try: + val = fmt_func(exif_all[key]['printable']) if fmt_func \ + else exif_all[key]['printable'] + exif_short.update({label: val}) + except KeyError: + pass + + return exif_short + + +class MediaCommentMixin(object): + @property + def content_html(self): + """ + the actual html-rendered version of the comment displayed. + Run through Markdown and the HTML cleaner. + """ + return cleaned_markdown_conversion(self.content) + + +class CollectionMixin(GenerateSlugMixin): + def check_slug_used(self, slug): + # import this here due to a cyclic import issue + # (db.models -> db.mixin -> db.util -> db.models) + from mediagoblin.db.util import check_collection_slug_used + + return check_collection_slug_used(self.creator, slug, self.id) + + @property + def description_html(self): + """ + Rendered version of the description, run through + Markdown and cleaned with our cleaning tool. + """ + return cleaned_markdown_conversion(self.description) + + @property + def slug_or_id(self): + return (self.slug or self.id) + + def url_for_self(self, urlgen, **extra_args): + """ + Generate an appropriate url for ourselves + + Use a slug if we have one, else use our 'id'. + """ + creator = self.get_creator + + return urlgen( + 'mediagoblin.user_pages.user_collection', + user=creator.username, + collection=self.slug_or_id, + **extra_args) + + +class CollectionItemMixin(object): + @property + def note_html(self): + """ + the actual html-rendered version of the note displayed. + Run through Markdown and the HTML cleaner. + """ + return cleaned_markdown_conversion(self.note) diff --git a/mediagoblin/db/models.py b/mediagoblin/db/models.py new file mode 100644 index 00000000..2b925983 --- /dev/null +++ b/mediagoblin/db/models.py @@ -0,0 +1,524 @@ +# GNU MediaGoblin -- federated, autonomous media hosting +# Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +""" +TODO: indexes on foreignkeys, where useful. +""" + +import logging +import datetime + +from sqlalchemy import Column, Integer, Unicode, UnicodeText, DateTime, \ + Boolean, ForeignKey, UniqueConstraint, PrimaryKeyConstraint, \ + SmallInteger +from sqlalchemy.orm import relationship, backref +from sqlalchemy.orm.collections import attribute_mapped_collection +from sqlalchemy.sql.expression import desc +from sqlalchemy.ext.associationproxy import association_proxy +from sqlalchemy.util import memoized_property + +from mediagoblin.db.extratypes import PathTupleWithSlashes, JSONEncoded +from mediagoblin.db.base import Base, DictReadAttrProxy +from mediagoblin.db.mixin import UserMixin, MediaEntryMixin, MediaCommentMixin, CollectionMixin, CollectionItemMixin +from mediagoblin.tools.files import delete_media_files +from mediagoblin.tools.common import import_component + +# It's actually kind of annoying how sqlalchemy-migrate does this, if +# I understand it right, but whatever. Anyway, don't remove this :P +# +# We could do migration calls more manually instead of relying on +# this import-based meddling... +from migrate import changeset + +_log = logging.getLogger(__name__) + + +class User(Base, UserMixin): + """ + TODO: We should consider moving some rarely used fields + into some sort of "shadow" table. + """ + __tablename__ = "core__users" + + id = Column(Integer, primary_key=True) + username = Column(Unicode, nullable=False, unique=True) + # Note: no db uniqueness constraint on email because it's not + # reliable (many email systems case insensitive despite against + # the RFC) and because it would be a mess to implement at this + # point. + email = Column(Unicode, nullable=False) + created = Column(DateTime, nullable=False, default=datetime.datetime.now) + pw_hash = Column(Unicode, nullable=False) + email_verified = Column(Boolean, default=False) + status = Column(Unicode, default=u"needs_email_verification", nullable=False) + # Intented to be nullable=False, but migrations would not work for it + # set to nullable=True implicitly. + wants_comment_notification = Column(Boolean, default=True) + license_preference = Column(Unicode) + verification_key = Column(Unicode) + is_admin = Column(Boolean, default=False, nullable=False) + url = Column(Unicode) + bio = Column(UnicodeText) # ?? + fp_verification_key = Column(Unicode) + fp_token_expire = Column(DateTime) + + ## TODO + # plugin data would be in a separate model + + def __repr__(self): + return '<{0} #{1} {2} {3} "{4}">'.format( + self.__class__.__name__, + self.id, + 'verified' if self.email_verified else 'non-verified', + 'admin' if self.is_admin else 'user', + self.username) + + def delete(self, **kwargs): + """Deletes a User and all related entries/comments/files/...""" + # Collections get deleted by relationships. + + media_entries = MediaEntry.query.filter(MediaEntry.uploader == self.id) + for media in media_entries: + # TODO: Make sure that "MediaEntry.delete()" also deletes + # all related files/Comments + media.delete(del_orphan_tags=False, commit=False) + + # Delete now unused tags + # TODO: import here due to cyclic imports!!! This cries for refactoring + from mediagoblin.db.util import clean_orphan_tags + clean_orphan_tags(commit=False) + + # Delete user, pass through commit=False/True in kwargs + super(User, self).delete(**kwargs) + _log.info('Deleted user "{0}" account'.format(self.username)) + + +class MediaEntry(Base, MediaEntryMixin): + """ + TODO: Consider fetching the media_files using join + """ + __tablename__ = "core__media_entries" + + id = Column(Integer, primary_key=True) + uploader = Column(Integer, ForeignKey(User.id), nullable=False, index=True) + title = Column(Unicode, nullable=False) + slug = Column(Unicode) + created = Column(DateTime, nullable=False, default=datetime.datetime.now, + index=True) + description = Column(UnicodeText) # ?? + media_type = Column(Unicode, nullable=False) + state = Column(Unicode, default=u'unprocessed', nullable=False) + # or use sqlalchemy.types.Enum? + license = Column(Unicode) + collected = Column(Integer, default=0) + + fail_error = Column(Unicode) + fail_metadata = Column(JSONEncoded) + + transcoding_progress = Column(SmallInteger) + + queued_media_file = Column(PathTupleWithSlashes) + + queued_task_id = Column(Unicode) + + __table_args__ = ( + UniqueConstraint('uploader', 'slug'), + {}) + + get_uploader = relationship(User) + + media_files_helper = relationship("MediaFile", + collection_class=attribute_mapped_collection("name"), + cascade="all, delete-orphan" + ) + media_files = association_proxy('media_files_helper', 'file_path', + creator=lambda k, v: MediaFile(name=k, file_path=v) + ) + + attachment_files_helper = relationship("MediaAttachmentFile", + cascade="all, delete-orphan", + order_by="MediaAttachmentFile.created" + ) + attachment_files = association_proxy("attachment_files_helper", "dict_view", + creator=lambda v: MediaAttachmentFile( + name=v["name"], filepath=v["filepath"]) + ) + + tags_helper = relationship("MediaTag", + cascade="all, delete-orphan" # should be automatically deleted + ) + tags = association_proxy("tags_helper", "dict_view", + creator=lambda v: MediaTag(name=v["name"], slug=v["slug"]) + ) + + collections_helper = relationship("CollectionItem", + cascade="all, delete-orphan" + ) + collections = association_proxy("collections_helper", "in_collection") + + ## TODO + # fail_error + + def get_comments(self, ascending=False): + order_col = MediaComment.created + if not ascending: + order_col = desc(order_col) + return self.all_comments.order_by(order_col) + + def url_to_prev(self, urlgen): + """get the next 'newer' entry by this user""" + media = MediaEntry.query.filter( + (MediaEntry.uploader == self.uploader) + & (MediaEntry.state == u'processed') + & (MediaEntry.id > self.id)).order_by(MediaEntry.id).first() + + if media is not None: + return media.url_for_self(urlgen) + + def url_to_next(self, urlgen): + """get the next 'older' entry by this user""" + media = MediaEntry.query.filter( + (MediaEntry.uploader == self.uploader) + & (MediaEntry.state == u'processed') + & (MediaEntry.id < self.id)).order_by(desc(MediaEntry.id)).first() + + if media is not None: + return media.url_for_self(urlgen) + + @property + def media_data(self): + return getattr(self, self.media_data_ref) + + def media_data_init(self, **kwargs): + """ + Initialize or update the contents of a media entry's media_data row + """ + media_data = self.media_data + + if media_data is None: + # Get the correct table: + table = import_component(self.media_type + '.models:DATA_MODEL') + # No media data, so actually add a new one + media_data = table(**kwargs) + # Get the relationship set up. + media_data.get_media_entry = self + else: + # Update old media data + for field, value in kwargs.iteritems(): + setattr(media_data, field, value) + + @memoized_property + def media_data_ref(self): + return import_component(self.media_type + '.models:BACKREF_NAME') + + def __repr__(self): + safe_title = self.title.encode('ascii', 'replace') + + return '<{classname} {id}: {title}>'.format( + classname=self.__class__.__name__, + id=self.id, + title=safe_title) + + def delete(self, del_orphan_tags=True, **kwargs): + """Delete MediaEntry and all related files/attachments/comments + + This will *not* automatically delete unused collections, which + can remain empty... + + :param del_orphan_tags: True/false if we delete unused Tags too + :param commit: True/False if this should end the db transaction""" + # User's CollectionItems are automatically deleted via "cascade". + # Comments on this Media are deleted by cascade, hopefully. + + # Delete all related files/attachments + try: + delete_media_files(self) + except OSError, error: + # Returns list of files we failed to delete + _log.error('No such files from the user "{1}" to delete: ' + '{0}'.format(str(error), self.get_uploader)) + _log.info('Deleted Media entry id "{0}"'.format(self.id)) + # Related MediaTag's are automatically cleaned, but we might + # want to clean out unused Tag's too. + if del_orphan_tags: + # TODO: Import here due to cyclic imports!!! + # This cries for refactoring + from mediagoblin.db.util import clean_orphan_tags + clean_orphan_tags(commit=False) + # pass through commit=False/True in kwargs + super(MediaEntry, self).delete(**kwargs) + + +class FileKeynames(Base): + """ + keywords for various places. + currently the MediaFile keys + """ + __tablename__ = "core__file_keynames" + id = Column(Integer, primary_key=True) + name = Column(Unicode, unique=True) + + def __repr__(self): + return "<FileKeyname %r: %r>" % (self.id, self.name) + + @classmethod + def find_or_new(cls, name): + t = cls.query.filter_by(name=name).first() + if t is not None: + return t + return cls(name=name) + + +class MediaFile(Base): + """ + TODO: Highly consider moving "name" into a new table. + TODO: Consider preloading said table in software + """ + __tablename__ = "core__mediafiles" + + media_entry = Column( + Integer, ForeignKey(MediaEntry.id), + nullable=False) + name_id = Column(SmallInteger, ForeignKey(FileKeynames.id), nullable=False) + file_path = Column(PathTupleWithSlashes) + + __table_args__ = ( + PrimaryKeyConstraint('media_entry', 'name_id'), + {}) + + def __repr__(self): + return "<MediaFile %s: %r>" % (self.name, self.file_path) + + name_helper = relationship(FileKeynames, lazy="joined", innerjoin=True) + name = association_proxy('name_helper', 'name', + creator=FileKeynames.find_or_new + ) + + +class MediaAttachmentFile(Base): + __tablename__ = "core__attachment_files" + + id = Column(Integer, primary_key=True) + media_entry = Column( + Integer, ForeignKey(MediaEntry.id), + nullable=False) + name = Column(Unicode, nullable=False) + filepath = Column(PathTupleWithSlashes) + created = Column(DateTime, nullable=False, default=datetime.datetime.now) + + @property + def dict_view(self): + """A dict like view on this object""" + return DictReadAttrProxy(self) + + +class Tag(Base): + __tablename__ = "core__tags" + + id = Column(Integer, primary_key=True) + slug = Column(Unicode, nullable=False, unique=True) + + def __repr__(self): + return "<Tag %r: %r>" % (self.id, self.slug) + + @classmethod + def find_or_new(cls, slug): + t = cls.query.filter_by(slug=slug).first() + if t is not None: + return t + return cls(slug=slug) + + +class MediaTag(Base): + __tablename__ = "core__media_tags" + + id = Column(Integer, primary_key=True) + media_entry = Column( + Integer, ForeignKey(MediaEntry.id), + nullable=False, index=True) + tag = Column(Integer, ForeignKey(Tag.id), nullable=False, index=True) + name = Column(Unicode) + # created = Column(DateTime, nullable=False, default=datetime.datetime.now) + + __table_args__ = ( + UniqueConstraint('tag', 'media_entry'), + {}) + + tag_helper = relationship(Tag) + slug = association_proxy('tag_helper', 'slug', + creator=Tag.find_or_new + ) + + def __init__(self, name=None, slug=None): + Base.__init__(self) + if name is not None: + self.name = name + if slug is not None: + self.tag_helper = Tag.find_or_new(slug) + + @property + def dict_view(self): + """A dict like view on this object""" + return DictReadAttrProxy(self) + + +class MediaComment(Base, MediaCommentMixin): + __tablename__ = "core__media_comments" + + id = Column(Integer, primary_key=True) + media_entry = Column( + Integer, ForeignKey(MediaEntry.id), nullable=False, index=True) + author = Column(Integer, ForeignKey(User.id), nullable=False) + created = Column(DateTime, nullable=False, default=datetime.datetime.now) + content = Column(UnicodeText, nullable=False) + + # Cascade: Comments are owned by their creator. So do the full thing. + # lazy=dynamic: People might post a *lot* of comments, + # so make the "posted_comments" a query-like thing. + get_author = relationship(User, + backref=backref("posted_comments", + lazy="dynamic", + cascade="all, delete-orphan")) + + # Cascade: Comments are somewhat owned by their MediaEntry. + # So do the full thing. + # lazy=dynamic: MediaEntries might have many comments, + # so make the "all_comments" a query-like thing. + get_media_entry = relationship(MediaEntry, + backref=backref("all_comments", + lazy="dynamic", + cascade="all, delete-orphan")) + + +class Collection(Base, CollectionMixin): + """An 'album' or 'set' of media by a user. + + On deletion, contained CollectionItems get automatically reaped via + SQL cascade""" + __tablename__ = "core__collections" + + id = Column(Integer, primary_key=True) + title = Column(Unicode, nullable=False) + slug = Column(Unicode) + created = Column(DateTime, nullable=False, default=datetime.datetime.now, + index=True) + description = Column(UnicodeText) + creator = Column(Integer, ForeignKey(User.id), nullable=False) + # TODO: No of items in Collection. Badly named, can we migrate to num_items? + items = Column(Integer, default=0) + + # Cascade: Collections are owned by their creator. So do the full thing. + get_creator = relationship(User, + backref=backref("collections", + cascade="all, delete-orphan")) + + __table_args__ = ( + UniqueConstraint('creator', 'slug'), + {}) + + def get_collection_items(self, ascending=False): + #TODO, is this still needed with self.collection_items being available? + order_col = CollectionItem.position + if not ascending: + order_col = desc(order_col) + return CollectionItem.query.filter_by( + collection=self.id).order_by(order_col) + + +class CollectionItem(Base, CollectionItemMixin): + __tablename__ = "core__collection_items" + + id = Column(Integer, primary_key=True) + media_entry = Column( + Integer, ForeignKey(MediaEntry.id), nullable=False, index=True) + collection = Column(Integer, ForeignKey(Collection.id), nullable=False) + note = Column(UnicodeText, nullable=True) + added = Column(DateTime, nullable=False, default=datetime.datetime.now) + position = Column(Integer) + + # Cascade: CollectionItems are owned by their Collection. So do the full thing. + in_collection = relationship(Collection, + backref=backref( + "collection_items", + cascade="all, delete-orphan")) + + get_media_entry = relationship(MediaEntry) + + __table_args__ = ( + UniqueConstraint('collection', 'media_entry'), + {}) + + @property + def dict_view(self): + """A dict like view on this object""" + return DictReadAttrProxy(self) + + +class ProcessingMetaData(Base): + __tablename__ = 'core__processing_metadata' + + id = Column(Integer, primary_key=True) + media_entry_id = Column(Integer, ForeignKey(MediaEntry.id), nullable=False, + index=True) + media_entry = relationship(MediaEntry, + backref=backref('processing_metadata', + cascade='all, delete-orphan')) + callback_url = Column(Unicode) + + @property + def dict_view(self): + """A dict like view on this object""" + return DictReadAttrProxy(self) + + +MODELS = [ + User, MediaEntry, Tag, MediaTag, MediaComment, Collection, CollectionItem, MediaFile, FileKeynames, + MediaAttachmentFile, ProcessingMetaData] + + +###################################################### +# Special, migrations-tracking table +# +# Not listed in MODELS because this is special and not +# really migrated, but used for migrations (for now) +###################################################### + +class MigrationData(Base): + __tablename__ = "core__migrations" + + name = Column(Unicode, primary_key=True) + version = Column(Integer, nullable=False, default=0) + +###################################################### + + +def show_table_init(engine_uri): + if engine_uri is None: + engine_uri = 'sqlite:///:memory:' + from sqlalchemy import create_engine + engine = create_engine(engine_uri, echo=True) + + Base.metadata.create_all(engine) + + +if __name__ == '__main__': + from sys import argv + print repr(argv) + if len(argv) == 2: + uri = argv[1] + else: + uri = None + show_table_init(uri) diff --git a/mediagoblin/db/models_v0.py b/mediagoblin/db/models_v0.py new file mode 100644 index 00000000..bdedec2e --- /dev/null +++ b/mediagoblin/db/models_v0.py @@ -0,0 +1,342 @@ +# GNU MediaGoblin -- federated, autonomous media hosting +# Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +""" +TODO: indexes on foreignkeys, where useful. +""" + +########################################################################### +# WHAT IS THIS FILE? +# ------------------ +# +# Upon occasion, someone runs into this file and wonders why we have +# both a models.py and a models_v0.py. +# +# The short of it is: you can ignore this file. +# +# The long version is, in two parts: +# +# - We used to use MongoDB, then we switched to SQL and SQLAlchemy. +# We needed to convert peoples' databases; the script we had would +# switch them to the first version right after Mongo, convert over +# all their tables, then run any migrations that were added after. +# +# - That script is now removed, but there is some discussion of +# writing a test that would set us at the first SQL migration and +# run everything after. If we wrote that, this file would still be +# useful. But for now, it's legacy! +# +########################################################################### + + +import datetime +import sys + +from sqlalchemy import ( + Column, Integer, Unicode, UnicodeText, DateTime, Boolean, ForeignKey, + UniqueConstraint, PrimaryKeyConstraint, SmallInteger, Float) +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import relationship, backref +from sqlalchemy.orm.collections import attribute_mapped_collection +from sqlalchemy.ext.associationproxy import association_proxy +from sqlalchemy.util import memoized_property + +from mediagoblin.db.extratypes import PathTupleWithSlashes, JSONEncoded +from mediagoblin.db.base import GMGTableBase, Session + + +Base_v0 = declarative_base(cls=GMGTableBase) + + +class User(Base_v0): + """ + TODO: We should consider moving some rarely used fields + into some sort of "shadow" table. + """ + __tablename__ = "core__users" + + id = Column(Integer, primary_key=True) + username = Column(Unicode, nullable=False, unique=True) + email = Column(Unicode, nullable=False) + created = Column(DateTime, nullable=False, default=datetime.datetime.now) + pw_hash = Column(Unicode, nullable=False) + email_verified = Column(Boolean, default=False) + status = Column(Unicode, default=u"needs_email_verification", nullable=False) + verification_key = Column(Unicode) + is_admin = Column(Boolean, default=False, nullable=False) + url = Column(Unicode) + bio = Column(UnicodeText) # ?? + fp_verification_key = Column(Unicode) + fp_token_expire = Column(DateTime) + + ## TODO + # plugin data would be in a separate model + + +class MediaEntry(Base_v0): + """ + TODO: Consider fetching the media_files using join + """ + __tablename__ = "core__media_entries" + + id = Column(Integer, primary_key=True) + uploader = Column(Integer, ForeignKey(User.id), nullable=False, index=True) + title = Column(Unicode, nullable=False) + slug = Column(Unicode) + created = Column(DateTime, nullable=False, default=datetime.datetime.now, + index=True) + description = Column(UnicodeText) # ?? + media_type = Column(Unicode, nullable=False) + state = Column(Unicode, default=u'unprocessed', nullable=False) + # or use sqlalchemy.types.Enum? + license = Column(Unicode) + + fail_error = Column(Unicode) + fail_metadata = Column(JSONEncoded) + + queued_media_file = Column(PathTupleWithSlashes) + + queued_task_id = Column(Unicode) + + __table_args__ = ( + UniqueConstraint('uploader', 'slug'), + {}) + + get_uploader = relationship(User) + + media_files_helper = relationship("MediaFile", + collection_class=attribute_mapped_collection("name"), + cascade="all, delete-orphan" + ) + + attachment_files_helper = relationship("MediaAttachmentFile", + cascade="all, delete-orphan", + order_by="MediaAttachmentFile.created" + ) + + tags_helper = relationship("MediaTag", + cascade="all, delete-orphan" + ) + + def media_data_init(self, **kwargs): + """ + Initialize or update the contents of a media entry's media_data row + """ + session = Session() + + media_data = session.query(self.media_data_table).filter_by( + media_entry=self.id).first() + + # No media data, so actually add a new one + if media_data is None: + media_data = self.media_data_table( + media_entry=self.id, + **kwargs) + session.add(media_data) + # Update old media data + else: + for field, value in kwargs.iteritems(): + setattr(media_data, field, value) + + @memoized_property + def media_data_table(self): + # TODO: memoize this + models_module = self.media_type + '.models' + __import__(models_module) + return sys.modules[models_module].DATA_MODEL + + +class FileKeynames(Base_v0): + """ + keywords for various places. + currently the MediaFile keys + """ + __tablename__ = "core__file_keynames" + id = Column(Integer, primary_key=True) + name = Column(Unicode, unique=True) + + def __repr__(self): + return "<FileKeyname %r: %r>" % (self.id, self.name) + + @classmethod + def find_or_new(cls, name): + t = cls.query.filter_by(name=name).first() + if t is not None: + return t + return cls(name=name) + + +class MediaFile(Base_v0): + """ + TODO: Highly consider moving "name" into a new table. + TODO: Consider preloading said table in software + """ + __tablename__ = "core__mediafiles" + + media_entry = Column( + Integer, ForeignKey(MediaEntry.id), + nullable=False) + name_id = Column(SmallInteger, ForeignKey(FileKeynames.id), nullable=False) + file_path = Column(PathTupleWithSlashes) + + __table_args__ = ( + PrimaryKeyConstraint('media_entry', 'name_id'), + {}) + + def __repr__(self): + return "<MediaFile %s: %r>" % (self.name, self.file_path) + + name_helper = relationship(FileKeynames, lazy="joined", innerjoin=True) + name = association_proxy('name_helper', 'name', + creator=FileKeynames.find_or_new + ) + + +class MediaAttachmentFile(Base_v0): + __tablename__ = "core__attachment_files" + + id = Column(Integer, primary_key=True) + media_entry = Column( + Integer, ForeignKey(MediaEntry.id), + nullable=False) + name = Column(Unicode, nullable=False) + filepath = Column(PathTupleWithSlashes) + created = Column(DateTime, nullable=False, default=datetime.datetime.now) + + +class Tag(Base_v0): + __tablename__ = "core__tags" + + id = Column(Integer, primary_key=True) + slug = Column(Unicode, nullable=False, unique=True) + + def __repr__(self): + return "<Tag %r: %r>" % (self.id, self.slug) + + @classmethod + def find_or_new(cls, slug): + t = cls.query.filter_by(slug=slug).first() + if t is not None: + return t + return cls(slug=slug) + + +class MediaTag(Base_v0): + __tablename__ = "core__media_tags" + + id = Column(Integer, primary_key=True) + media_entry = Column( + Integer, ForeignKey(MediaEntry.id), + nullable=False, index=True) + tag = Column(Integer, ForeignKey(Tag.id), nullable=False, index=True) + name = Column(Unicode) + # created = Column(DateTime, nullable=False, default=datetime.datetime.now) + + __table_args__ = ( + UniqueConstraint('tag', 'media_entry'), + {}) + + tag_helper = relationship(Tag) + slug = association_proxy('tag_helper', 'slug', + creator=Tag.find_or_new + ) + + def __init__(self, name=None, slug=None): + Base_v0.__init__(self) + if name is not None: + self.name = name + if slug is not None: + self.tag_helper = Tag.find_or_new(slug) + + +class MediaComment(Base_v0): + __tablename__ = "core__media_comments" + + id = Column(Integer, primary_key=True) + media_entry = Column( + Integer, ForeignKey(MediaEntry.id), nullable=False, index=True) + author = Column(Integer, ForeignKey(User.id), nullable=False) + created = Column(DateTime, nullable=False, default=datetime.datetime.now) + content = Column(UnicodeText, nullable=False) + + get_author = relationship(User) + + +class ImageData(Base_v0): + __tablename__ = "image__mediadata" + + # The primary key *and* reference to the main media_entry + media_entry = Column(Integer, ForeignKey('core__media_entries.id'), + primary_key=True) + get_media_entry = relationship("MediaEntry", + backref=backref("image__media_data", cascade="all, delete-orphan")) + + width = Column(Integer) + height = Column(Integer) + exif_all = Column(JSONEncoded) + gps_longitude = Column(Float) + gps_latitude = Column(Float) + gps_altitude = Column(Float) + gps_direction = Column(Float) + + +class VideoData(Base_v0): + __tablename__ = "video__mediadata" + + # The primary key *and* reference to the main media_entry + media_entry = Column(Integer, ForeignKey('core__media_entries.id'), + primary_key=True) + get_media_entry = relationship("MediaEntry", + backref=backref("video__media_data", cascade="all, delete-orphan")) + + width = Column(SmallInteger) + height = Column(SmallInteger) + + +class AsciiData(Base_v0): + __tablename__ = "ascii__mediadata" + + # The primary key *and* reference to the main media_entry + media_entry = Column(Integer, ForeignKey('core__media_entries.id'), + primary_key=True) + get_media_entry = relationship("MediaEntry", + backref=backref("ascii__media_data", cascade="all, delete-orphan")) + + +class AudioData(Base_v0): + __tablename__ = "audio__mediadata" + + # The primary key *and* reference to the main media_entry + media_entry = Column(Integer, ForeignKey('core__media_entries.id'), + primary_key=True) + get_media_entry = relationship("MediaEntry", + backref=backref("audio__media_data", cascade="all, delete-orphan")) + + +###################################################### +# Special, migrations-tracking table +# +# Not listed in MODELS because this is special and not +# really migrated, but used for migrations (for now) +###################################################### + +class MigrationData(Base_v0): + __tablename__ = "core__migrations" + + name = Column(Unicode, primary_key=True) + version = Column(Integer, nullable=False, default=0) + +###################################################### diff --git a/mediagoblin/db/open.py b/mediagoblin/db/open.py new file mode 100644 index 00000000..0b1679fb --- /dev/null +++ b/mediagoblin/db/open.py @@ -0,0 +1,101 @@ +# GNU MediaGoblin -- federated, autonomous media hosting +# Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + + +from sqlalchemy import create_engine, event +import logging + +from mediagoblin.db.base import Base, Session +from mediagoblin import mg_globals + +_log = logging.getLogger(__name__) + + +class DatabaseMaster(object): + def __init__(self, engine): + self.engine = engine + + for k, v in Base._decl_class_registry.iteritems(): + setattr(self, k, v) + + def commit(self): + Session.commit() + + def save(self, obj): + Session.add(obj) + Session.flush() + + def check_session_clean(self): + for dummy in Session(): + _log.warn("STRANGE: There are elements in the sql session. " + "Please report this and help us track this down.") + break + + def reset_after_request(self): + Session.rollback() + Session.remove() + + +def load_models(app_config): + import mediagoblin.db.models + + for media_type in app_config['media_types']: + _log.debug("Loading %s.models", media_type) + __import__(media_type + ".models") + + for plugin in mg_globals.global_config.get('plugins', {}).keys(): + _log.debug("Loading %s.models", plugin) + try: + __import__(plugin + ".models") + except ImportError as exc: + _log.debug("Could not load {0}.models: {1}".format( + plugin, + exc)) + + +def _sqlite_fk_pragma_on_connect(dbapi_con, con_record): + """Enable foreign key checking on each new sqlite connection""" + dbapi_con.execute('pragma foreign_keys=on') + + +def _sqlite_disable_fk_pragma_on_connect(dbapi_con, con_record): + """ + Disable foreign key checking on each new sqlite connection + (Good for migrations!) + """ + dbapi_con.execute('pragma foreign_keys=off') + + +def setup_connection_and_db_from_config(app_config, migrations=False): + engine = create_engine(app_config['sql_engine']) + + # Enable foreign key checking for sqlite + if app_config['sql_engine'].startswith('sqlite://'): + if migrations: + event.listen(engine, 'connect', + _sqlite_disable_fk_pragma_on_connect) + else: + event.listen(engine, 'connect', _sqlite_fk_pragma_on_connect) + + # logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO) + + Session.configure(bind=engine) + + return DatabaseMaster(engine) + + +def check_db_migrations_current(db): + pass diff --git a/mediagoblin/db/util.py b/mediagoblin/db/util.py new file mode 100644 index 00000000..6ffec44d --- /dev/null +++ b/mediagoblin/db/util.py @@ -0,0 +1,76 @@ +# GNU MediaGoblin -- federated, autonomous media hosting +# Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +from mediagoblin.db.base import Session +from mediagoblin.db.models import MediaEntry, Tag, MediaTag, Collection + + +########################## +# Random utility functions +########################## + + +def atomic_update(table, query_dict, update_values): + table.find(query_dict).update(update_values, + synchronize_session=False) + Session.commit() + + +def check_media_slug_used(uploader_id, slug, ignore_m_id): + query = MediaEntry.query.filter_by(uploader=uploader_id, slug=slug) + if ignore_m_id is not None: + query = query.filter(MediaEntry.id != ignore_m_id) + does_exist = query.first() is not None + return does_exist + + +def media_entries_for_tag_slug(dummy_db, tag_slug): + return MediaEntry.query \ + .join(MediaEntry.tags_helper) \ + .join(MediaTag.tag_helper) \ + .filter( + (MediaEntry.state == u'processed') + & (Tag.slug == tag_slug)) + + +def clean_orphan_tags(commit=True): + """Search for unused MediaTags and delete them""" + q1 = Session.query(Tag).outerjoin(MediaTag).filter(MediaTag.id==None) + for t in q1: + Session.delete(t) + # The "let the db do all the work" version: + # q1 = Session.query(Tag.id).outerjoin(MediaTag).filter(MediaTag.id==None) + # q2 = Session.query(Tag).filter(Tag.id.in_(q1)) + # q2.delete(synchronize_session = False) + if commit: + Session.commit() + + +def check_collection_slug_used(creator_id, slug, ignore_c_id): + filt = (Collection.creator == creator_id) \ + & (Collection.slug == slug) + if ignore_c_id is not None: + filt = filt & (Collection.id != ignore_c_id) + does_exist = Session.query(Collection.id).filter(filt).first() is not None + return does_exist + + +if __name__ == '__main__': + from mediagoblin.db.open import setup_connection_and_db_from_config + + db = setup_connection_and_db_from_config({'sql_engine':'sqlite:///mediagoblin.db'}) + + clean_orphan_tags() |