# GNU MediaGoblin -- federated, autonomous media hosting # Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS. # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . import logging import uuid from os.path import splitext from celery import chord from werkzeug.utils import secure_filename from werkzeug.datastructures import FileStorage from mediagoblin import mg_globals from mediagoblin.tools.response import json_response from mediagoblin.tools.text import convert_to_tag_list_of_dicts from mediagoblin.tools.federation import create_activity, create_generator from mediagoblin.db.models import Collection, MediaEntry, ProcessingMetaData from mediagoblin.processing import mark_entry_failed, get_entry_and_processing_manager from mediagoblin.processing.task import ProcessMedia from mediagoblin.notifications import add_comment_subscription from mediagoblin.media_types import sniff_media from mediagoblin.user_pages.lib import add_media_to_collection _log = logging.getLogger(__name__) def check_file_field(request, field_name): """Check if a file field meets minimal criteria""" retval = (field_name in request.files and isinstance(request.files[field_name], FileStorage) and request.files[field_name].stream) if not retval: _log.debug("Form did not contain proper file field %s", field_name) return retval def new_upload_entry(user): """ Create a new MediaEntry for uploading """ entry = MediaEntry() entry.actor = user.id entry.license = user.license_preference return entry def get_upload_file_limits(user): """ Get the upload_limit and max_file_size for this user """ if user.upload_limit is not None and user.upload_limit >= 0: # TODO: debug this upload_limit = user.upload_limit else: upload_limit = mg_globals.app_config.get('upload_limit', None) max_file_size = mg_globals.app_config.get('max_file_size', None) return upload_limit, max_file_size class UploadLimitError(Exception): """ General exception for when an upload will be over some upload limit """ pass class FileUploadLimit(UploadLimitError): """ This file is over the site upload limit """ pass class UserUploadLimit(UploadLimitError): """ This file is over the user's particular upload limit """ pass class UserPastUploadLimit(UploadLimitError): """ The user is *already* past their upload limit! """ pass def submit_media(mg_app, user, submitted_file, filename, title=None, description=None, collection_slug=None, license=None, metadata=None, tags_string="", callback_url=None, urlgen=None,): """ Args: - mg_app: The MediaGoblinApp instantiated for this process - user: the user object this media entry should be associated with - submitted_file: the file-like object that has the being-submitted file data in it (this object should really have a .name attribute which is the filename on disk!) - filename: the *original* filename of this. Not necessarily the one on disk being referenced by submitted_file. - title: title for this media entry - description: description for this media entry - collection_slug: collection for this media entry - license: license for this media entry - tags_string: comma separated string of tags to be associated with this entry - callback_url: possible post-hook to call after submission - urlgen: if provided, used to do the feed_url update and assign a public ID used in the API (very important). """ upload_limit, max_file_size = get_upload_file_limits(user) if upload_limit and user.uploaded >= upload_limit: raise UserPastUploadLimit() # If the filename contains non ascii generate a unique name if not all(ord(c) < 128 for c in filename): filename = str(uuid.uuid4()) + splitext(filename)[-1] # Sniff the submitted media to determine which # media plugin should handle processing media_type, media_manager = sniff_media(submitted_file, filename) # create entry and save in database entry = new_upload_entry(user) entry.media_type = media_type entry.title = (title or str(splitext(filename)[0])) entry.description = description or "" entry.license = license or None entry.media_metadata = metadata or {} # Process the user's folksonomy "tags" entry.tags = convert_to_tag_list_of_dicts(tags_string) # Generate a slug from the title entry.generate_slug() queue_file = prepare_queue_task(mg_app, entry, filename) with queue_file: queue_file.write(submitted_file) # Get file size and round to 2 decimal places file_size = mg_app.queue_store.get_file_size( entry.queued_media_file) / (1024.0 * 1024) file_size = float(f'{file_size:.2f}') # Check if file size is over the limit if max_file_size and file_size >= max_file_size: raise FileUploadLimit() # Check if user is over upload limit if upload_limit and (user.uploaded + file_size) >= upload_limit: raise UserUploadLimit() user.uploaded = user.uploaded + file_size user.save() entry.file_size = file_size # Save now so we have this data before kicking off processing entry.save() # Various "submit to stuff" things, callbackurl and this silly urlgen # thing if callback_url: metadata = ProcessingMetaData() metadata.media_entry = entry metadata.callback_url = callback_url metadata.save() if urlgen: # Generate the public_id, this is very importent, especially relating # to deletion, it allows the shell to be accessable post-delete! entry.get_public_id(urlgen) # Generate the feed URL feed_url = urlgen( 'mediagoblin.user_pages.atom_feed', qualified=True, user=user.username) else: feed_url = None add_comment_subscription(user, entry) # Create activity create_activity("post", entry, entry.actor) entry.save() # add to collection if collection_slug: collection = Collection.query.filter_by(slug=collection_slug, actor=user.id).first() if collection: add_media_to_collection(collection, entry) # Pass off to processing # # (... don't change entry after this point to avoid race # conditions with changes to the document via processing code) run_process_media(entry, feed_url) return entry def prepare_queue_task(app, entry, filename): """ Prepare a MediaEntry for the processing queue and get a queue file """ # We generate this ourselves so we know what the task id is for # retrieval later. # (If we got it off the task's auto-generation, there'd be # a risk of a race condition when we'd save after sending # off the task) task_id = str(uuid.uuid4()) entry.queued_task_id = task_id # Now store generate the queueing related filename queue_filepath = app.queue_store.get_unique_filepath( ['media_entries', task_id, secure_filename(filename)]) # queue appropriately queue_file = app.queue_store.get_file( queue_filepath, 'wb') # Add queued filename to the entry entry.queued_media_file = queue_filepath return queue_file def run_process_media(entry, feed_url=None, reprocess_action="initial", reprocess_info=None): """Process the media asynchronously :param entry: MediaEntry() instance to be processed. :param feed_url: A string indicating the feed_url that the PuSH servers should be notified of. This will be sth like: `request.urlgen( 'mediagoblin.user_pages.atom_feed',qualified=True, user=request.user.username)` :param reprocess_action: What particular action should be run. :param reprocess_info: A dict containing all of the necessary reprocessing info for the given media_type""" entry, manager = get_entry_and_processing_manager(entry.id) try: wf = manager.workflow(entry, feed_url, reprocess_action, reprocess_info) if wf is None: ProcessMedia().apply_async( [entry.id, feed_url, reprocess_action, reprocess_info], {}, task_id=entry.queued_task_id) else: chord(wf[0])(wf[1]) except BaseException as exc: # The purpose of this section is because when running in "lazy" # or always-eager-with-exceptions-propagated celery mode that # the failure handling won't happen on Celery end. Since we # expect a lot of users to run things in this way we have to # capture stuff here. # # ... not completely the diaper pattern because the # exception is re-raised :) mark_entry_failed(entry.id, exc) # re-raise the exception raise def api_upload_request(request, file_data, entry): """ This handles a image upload request """ # Use the same kind of method from mediagoblin/submit/views:submit_start entry.title = file_data.filename # This will be set later but currently we just don't have enough information entry.slug = None # This is a MUST. entry.get_public_id(request.urlgen) queue_file = prepare_queue_task(request.app, entry, file_data.filename) with queue_file: queue_file.write(request.data) entry.save() return json_response(entry.serialize(request)) def api_add_to_feed(request, entry): """ Add media to Feed """ feed_url = request.urlgen( 'mediagoblin.user_pages.atom_feed', qualified=True, user=request.user.username ) add_comment_subscription(request.user, entry) # Create activity activity = create_activity( verb="post", obj=entry, actor=entry.actor, generator=create_generator(request) ) entry.save() run_process_media(entry, feed_url) return activity