diff options
Diffstat (limited to 'mediagoblin/processing')
-rw-r--r-- | mediagoblin/processing/__init__.py | 61 | ||||
-rw-r--r-- | mediagoblin/processing/task.py | 69 |
2 files changed, 115 insertions, 15 deletions
diff --git a/mediagoblin/processing/__init__.py b/mediagoblin/processing/__init__.py index 6b2d50e2..f3a85940 100644 --- a/mediagoblin/processing/__init__.py +++ b/mediagoblin/processing/__init__.py @@ -38,7 +38,7 @@ class ProgressCallback(object): def create_pub_filepath(entry, filename): return mgg.public_store.get_unique_filepath( ['media_entries', - unicode(entry._id), + unicode(entry.id), filename]) @@ -74,6 +74,61 @@ class FilenameBuilder(object): ext=self.ext) +class ProcessingState(object): + """ + The first and only argument to the "processor" of a media type + + This could be thought of as a "request" to the processor + function. It has the main info for the request (media entry) + and a bunch of tools for the request on it. + It can get more fancy without impacting old media types. + """ + def __init__(self, entry): + self.entry = entry + self.workbench = None + self.queued_filename = None + + def set_workbench(self, wb): + self.workbench = wb + + def get_queued_filename(self): + """ + Get the a filename for the original, on local storage + """ + if self.queued_filename is not None: + return self.queued_filename + queued_filepath = self.entry.queued_media_file + queued_filename = self.workbench.localized_file( + mgg.queue_store, queued_filepath, + 'source') + self.queued_filename = queued_filename + return queued_filename + + def copy_original(self, target_name, keyname=u"original"): + self.store_public(keyname, self.get_queued_filename(), target_name) + + def store_public(self, keyname, local_file, target_name=None): + if target_name is None: + target_name = os.path.basename(local_file) + target_filepath = create_pub_filepath(self.entry, target_name) + if keyname in self.entry.media_files: + _log.warn("store_public: keyname %r already used for file %r, " + "replacing with %r", keyname, + self.entry.media_files[keyname], target_filepath) + mgg.public_store.copy_local_to_storage(local_file, target_filepath) + self.entry.media_files[keyname] = target_filepath + + def delete_queue_file(self): + # Remove queued media file from storage and database. + # queued_filepath is in the task_id directory which should + # be removed too, but fail if the directory is not empty to be on + # the super-safe side. + queued_filepath = self.entry.queued_media_file + mgg.queue_store.delete_file(queued_filepath) # rm file + mgg.queue_store.delete_dir(queued_filepath[:-1]) # rm dir + self.entry.queued_media_file = [] + + def mark_entry_failed(entry_id, exc): """ Mark a media entry as having failed in its conversion. @@ -93,7 +148,7 @@ def mark_entry_failed(entry_id, exc): # Looks like yes, so record information about that failure and any # metadata the user might have supplied. atomic_update(mgg.database.MediaEntry, - {'_id': entry_id}, + {'id': entry_id}, {u'state': u'failed', u'fail_error': unicode(exc.exception_path), u'fail_metadata': exc.metadata}) @@ -104,7 +159,7 @@ def mark_entry_failed(entry_id, exc): # metadata (in fact overwrite it if somehow it had previous info # here) atomic_update(mgg.database.MediaEntry, - {'_id': entry_id}, + {'id': entry_id}, {u'state': u'failed', u'fail_error': None, u'fail_metadata': {}}) diff --git a/mediagoblin/processing/task.py b/mediagoblin/processing/task.py index a8bc0f2f..9af192ed 100644 --- a/mediagoblin/processing/task.py +++ b/mediagoblin/processing/task.py @@ -15,12 +15,14 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. import logging +import urllib +import urllib2 -from celery.task import Task +from celery import registry, task from mediagoblin import mg_globals as mgg -from mediagoblin.db.util import ObjectId -from mediagoblin.processing import mark_entry_failed, BaseProcessingFail +from mediagoblin.db.models import MediaEntry +from . import mark_entry_failed, BaseProcessingFail, ProcessingState from mediagoblin.tools.processing import json_processing_callback _log = logging.getLogger(__name__) @@ -28,21 +30,53 @@ logging.basicConfig() _log.setLevel(logging.DEBUG) +@task.task(default_retry_delay=2 * 60) +def handle_push_urls(feed_url): + """Subtask, notifying the PuSH servers of new content + + Retry 3 times every 2 minutes if run in separate process before failing.""" + if not mgg.app_config["push_urls"]: + return # Nothing to do + _log.debug('Notifying Push servers for feed {0}'.format(feed_url)) + hubparameters = { + 'hub.mode': 'publish', + 'hub.url': feed_url} + hubdata = urllib.urlencode(hubparameters) + hubheaders = { + "Content-type": "application/x-www-form-urlencoded", + "Connection": "close"} + for huburl in mgg.app_config["push_urls"]: + hubrequest = urllib2.Request(huburl, hubdata, hubheaders) + try: + hubresponse = urllib2.urlopen(hubrequest) + except (urllib2.HTTPError, urllib2.URLError) as exc: + # We retry by default 3 times before failing + _log.info("PuSH url %r gave error %r", huburl, exc) + try: + return handle_push_urls.retry(exc=exc, throw=False) + except Exception as e: + # All retries failed, Failure is no tragedy here, probably. + _log.warn('Failed to notify PuSH server for feed {0}. ' + 'Giving up.'.format(feed_url)) + return False + ################################ # Media processing initial steps ################################ -class ProcessMedia(Task): +class ProcessMedia(task.Task): """ Pass this entry off for processing. """ - def run(self, media_id): + def run(self, media_id, feed_url): """ Pass the media entry off to the appropriate processing function (for now just process_image...) + + :param feed_url: The feed URL that the PuSH server needs to be + updated for. """ - entry = mgg.database.MediaEntry.one( - {'_id': ObjectId(media_id)}) + entry = MediaEntry.query.get(media_id) # Try to process, and handle expected errors. try: @@ -51,17 +85,24 @@ class ProcessMedia(Task): _log.debug('Processing {0}'.format(entry)) - # run the processing code - entry.media_manager['processor'](entry) + proc_state = ProcessingState(entry) + with mgg.workbench_manager.create() as workbench: + proc_state.set_workbench(workbench) + # run the processing code + entry.media_manager.processor(proc_state) # We set the state to processed and save the entry here so there's # no need to save at the end of the processing stage, probably ;) entry.state = u'processed' entry.save() + # Notify the PuSH servers as async task + if mgg.app_config["push_urls"] and feed_url: + handle_push_urls.subtask().delay(feed_url) + json_processing_callback(entry) except BaseProcessingFail as exc: - mark_entry_failed(entry._id, exc) + mark_entry_failed(entry.id, exc) json_processing_callback(entry) return @@ -72,7 +113,7 @@ class ProcessMedia(Task): entry.title, exc)) - mark_entry_failed(entry._id, exc) + mark_entry_failed(entry.id, exc) json_processing_callback(entry) except Exception as exc: @@ -80,7 +121,7 @@ class ProcessMedia(Task): + ' processing {0}'.format( entry)) - mark_entry_failed(entry._id, exc) + mark_entry_failed(entry.id, exc) json_processing_callback(entry) raise @@ -98,3 +139,7 @@ class ProcessMedia(Task): entry = mgg.database.MediaEntry.query.filter_by(id=entry_id).first() json_processing_callback(entry) + +# Register the task +process_media = registry.tasks[ProcessMedia.name] + |