diff options
author | Sebastian Spaeth <Sebastian@SSpaeth.de> | 2013-01-15 14:41:30 +0100 |
---|---|---|
committer | Sebastian Spaeth <Sebastian@SSpaeth.de> | 2013-01-15 14:53:08 +0100 |
commit | 2cfffd5ed8c054bb60c27ede4e69667f97d12b09 (patch) | |
tree | e5d699c123410a578feebd6f314405530a88da96 /mediagoblin/processing | |
parent | 65969d3fb799765854ac5f57f85842c6ab523b21 (diff) | |
download | mediagoblin-2cfffd5ed8c054bb60c27ede4e69667f97d12b09.tar.lz mediagoblin-2cfffd5ed8c054bb60c27ede4e69667f97d12b09.tar.xz mediagoblin-2cfffd5ed8c054bb60c27ede4e69667f97d12b09.zip |
Make PuSHing the Pubhubsubbub server an async task (#436, #585)
Notifying the PuSH servers had 3 problems.
1) it was done immediately after sending of the processing task to celery. So if celery was run in a separate
process we would notify the PuSH servers before the new media was processed/
visible. (#436)
2) Notification code was called in submit/views.py, so submitting via the
API never resulted in notifications. (#585)
3) If Notifying the PuSH server failed, we would never retry.
The solution was to make the PuSH notification an asynchronous subtask. This
way: 1) it will only be called once async processing has finished, 2) it
is in the main processing code path, so even API calls will result in
notifications, and 3) We retry 3 times in case of failure before giving up.
If the server is in a separate process, we will wait 3x 2 minutes before
retrying the notification.
The only downside is that the celery server needs to have access to the internet
to ping the PuSH server. If that is a problem, we need to make the task belong
to a special group of celery servers that has access to the internet.
As a side effect, I believe I removed the limitation that prevented us from
upgrading celery.
Signed-off-by: Sebastian Spaeth <Sebastian@SSpaeth.de>
Diffstat (limited to 'mediagoblin/processing')
-rw-r--r-- | mediagoblin/processing/task.py | 48 |
1 files changed, 45 insertions, 3 deletions
diff --git a/mediagoblin/processing/task.py b/mediagoblin/processing/task.py index b29de9bd..2cdd5f1a 100644 --- a/mediagoblin/processing/task.py +++ b/mediagoblin/processing/task.py @@ -15,8 +15,10 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. import logging +import urllib +import urllib2 -from celery.task import Task +from celery import registry, task from mediagoblin import mg_globals as mgg from mediagoblin.db.models import MediaEntry @@ -28,18 +30,51 @@ logging.basicConfig() _log.setLevel(logging.DEBUG) +@task.task(default_retry_delay=2 * 60) +def handle_push_urls(feed_url): + """Subtask, notifying the PuSH servers of new content + + Retry 3 times every 2 minutes if run in separate process before failing.""" + if not mgg.app_config["push_urls"]: + return # Nothing to do + _log.debug('Notifying Push servers for feed {0}'.format(feed_url)) + hubparameters = { + 'hub.mode': 'publish', + 'hub.url': feed_url} + hubdata = urllib.urlencode(hubparameters) + hubheaders = { + "Content-type": "application/x-www-form-urlencoded", + "Connection": "close"} + for huburl in mgg.app_config["push_urls"]: + hubrequest = urllib2.Request(huburl, hubdata, hubheaders) + try: + hubresponse = urllib2.urlopen(hubrequest) + except (urllib2.HTTPError, urllib2.URLError) as exc: + # We retry by default 3 times before failing + _log.info("PuSH url %r gave error %r", huburl, exc) + try: + return handle_push_urls.retry(exc=exc, throw=False) + except Exception as e: + # All retries failed, Failure is no tragedy here, probably. + _log.warn('Failed to notify PuSH server for feed {0}. ' + 'Giving up.'.format(feed_url)) + return False + ################################ # Media processing initial steps ################################ -class ProcessMedia(Task): +class ProcessMedia(task.Task): """ Pass this entry off for processing. """ - def run(self, media_id): + def run(self, media_id, feed_url): """ Pass the media entry off to the appropriate processing function (for now just process_image...) + + :param feed_url: The feed URL that the PuSH server needs to be + updated for. """ entry = MediaEntry.query.get(media_id) @@ -58,6 +93,9 @@ class ProcessMedia(Task): entry.state = u'processed' entry.save() + # Notify the PuSH servers as async task + handle_push_urls.subtask().delay(feed_url) + json_processing_callback(entry) except BaseProcessingFail as exc: mark_entry_failed(entry.id, exc) @@ -97,3 +135,7 @@ class ProcessMedia(Task): entry = mgg.database.MediaEntry.query.filter_by(id=entry_id).first() json_processing_callback(entry) + +# Register the task +process_media = registry.tasks[ProcessMedia.name] + |