diff options
| author | Sebastian Spaeth <Sebastian@SSpaeth.de> | 2013-01-15 14:41:30 +0100 | 
|---|---|---|
| committer | Sebastian Spaeth <Sebastian@SSpaeth.de> | 2013-01-15 14:53:08 +0100 | 
| commit | 2cfffd5ed8c054bb60c27ede4e69667f97d12b09 (patch) | |
| tree | e5d699c123410a578feebd6f314405530a88da96 | |
| parent | 65969d3fb799765854ac5f57f85842c6ab523b21 (diff) | |
| download | mediagoblin-2cfffd5ed8c054bb60c27ede4e69667f97d12b09.tar.lz mediagoblin-2cfffd5ed8c054bb60c27ede4e69667f97d12b09.tar.xz mediagoblin-2cfffd5ed8c054bb60c27ede4e69667f97d12b09.zip | |
Make PuSHing the Pubhubsubbub server an async task (#436, #585)
Notifying the PuSH servers had 3 problems. 
1) it was done immediately after sending of the processing task to celery. So if celery was run in a separate
process we would notify the PuSH servers before the new media was processed/
visible. (#436)
2) Notification code was called in submit/views.py, so submitting via the
   API never resulted in notifications. (#585)
3) If Notifying the PuSH server failed, we would never retry.
The solution was to make the PuSH notification an asynchronous subtask. This
way: 1) it will only be called once async processing has finished, 2) it
is in the main processing code path, so even API calls will result in
notifications, and 3) We retry 3 times in case of failure before giving up.
If the server is in a separate process, we will wait 3x 2 minutes before
retrying the notification.
The only downside is that the celery server needs to have access to the internet
to ping the PuSH server. If that is a problem, we need to make the task belong
to a special group of celery servers that has access to the internet.
As a side effect, I believe I removed the limitation that prevented us from
upgrading celery.
Signed-off-by: Sebastian Spaeth <Sebastian@SSpaeth.de>
| -rw-r--r-- | mediagoblin/plugins/api/views.py | 2 | ||||
| -rw-r--r-- | mediagoblin/processing/task.py | 48 | ||||
| -rw-r--r-- | mediagoblin/submit/lib.py | 41 | ||||
| -rw-r--r-- | mediagoblin/submit/views.py | 8 | 
4 files changed, 54 insertions, 45 deletions
| diff --git a/mediagoblin/plugins/api/views.py b/mediagoblin/plugins/api/views.py index 6aa4ef9f..7383e20d 100644 --- a/mediagoblin/plugins/api/views.py +++ b/mediagoblin/plugins/api/views.py @@ -86,7 +86,7 @@ def post_entry(request):      #      # (... don't change entry after this point to avoid race      # conditions with changes to the document via processing code) -    run_process_media(entry) +    run_process_media(entry, request)      return json_response(get_entry_serializable(entry, request.urlgen)) diff --git a/mediagoblin/processing/task.py b/mediagoblin/processing/task.py index b29de9bd..2cdd5f1a 100644 --- a/mediagoblin/processing/task.py +++ b/mediagoblin/processing/task.py @@ -15,8 +15,10 @@  # along with this program.  If not, see <http://www.gnu.org/licenses/>.  import logging +import urllib +import urllib2 -from celery.task import Task +from celery import registry, task  from mediagoblin import mg_globals as mgg  from mediagoblin.db.models import MediaEntry @@ -28,18 +30,51 @@ logging.basicConfig()  _log.setLevel(logging.DEBUG) +@task.task(default_retry_delay=2 * 60) +def handle_push_urls(feed_url): +    """Subtask, notifying the PuSH servers of new content + +    Retry 3 times every 2 minutes if run in separate process before failing.""" +    if not mgg.app_config["push_urls"]: +        return # Nothing to do +    _log.debug('Notifying Push servers for feed {0}'.format(feed_url)) +    hubparameters = { +        'hub.mode': 'publish', +        'hub.url': feed_url} +    hubdata = urllib.urlencode(hubparameters) +    hubheaders = { +        "Content-type": "application/x-www-form-urlencoded", +        "Connection": "close"} +    for huburl in mgg.app_config["push_urls"]: +        hubrequest = urllib2.Request(huburl, hubdata, hubheaders) +        try: +            hubresponse = urllib2.urlopen(hubrequest) +        except (urllib2.HTTPError, urllib2.URLError) as exc: +            # We retry by default 3 times before failing +            _log.info("PuSH url %r gave error %r", huburl, exc) +            try: +                return handle_push_urls.retry(exc=exc, throw=False) +            except Exception as e: +                # All retries failed, Failure is no tragedy here, probably. +                _log.warn('Failed to notify PuSH server for feed {0}. ' +                          'Giving up.'.format(feed_url)) +                return False +  ################################  # Media processing initial steps  ################################ -class ProcessMedia(Task): +class ProcessMedia(task.Task):      """      Pass this entry off for processing.      """ -    def run(self, media_id): +    def run(self, media_id, feed_url):          """          Pass the media entry off to the appropriate processing function          (for now just process_image...) + +        :param feed_url: The feed URL that the PuSH server needs to be +            updated for.          """          entry = MediaEntry.query.get(media_id) @@ -58,6 +93,9 @@ class ProcessMedia(Task):              entry.state = u'processed'              entry.save() +            # Notify the PuSH servers as async task +            handle_push_urls.subtask().delay(feed_url) +              json_processing_callback(entry)          except BaseProcessingFail as exc:              mark_entry_failed(entry.id, exc) @@ -97,3 +135,7 @@ class ProcessMedia(Task):          entry = mgg.database.MediaEntry.query.filter_by(id=entry_id).first()          json_processing_callback(entry) + +# Register the task +process_media = registry.tasks[ProcessMedia.name] + diff --git a/mediagoblin/submit/lib.py b/mediagoblin/submit/lib.py index db5dfe53..ba07c6fa 100644 --- a/mediagoblin/submit/lib.py +++ b/mediagoblin/submit/lib.py @@ -14,16 +14,12 @@  # You should have received a copy of the GNU Affero General Public License  # along with this program.  If not, see <http://www.gnu.org/licenses/>. -import urllib -import urllib2  import logging  import uuid -from celery import registry  from werkzeug.utils import secure_filename -from mediagoblin import mg_globals  from mediagoblin.processing import mark_entry_failed -from mediagoblin.processing.task import ProcessMedia +from mediagoblin.processing.task import process_media  _log = logging.getLogger(__name__) @@ -58,11 +54,13 @@ def prepare_queue_task(app, entry, filename):      return queue_file -def run_process_media(entry): -    process_media = registry.tasks[ProcessMedia.name] +def run_process_media(entry, request): +    feed_url = request.urlgen( +        'mediagoblin.user_pages.atom_feed', +        qualified=True, user=request.user.username)      try:          process_media.apply_async( -            [unicode(entry.id)], {}, +            [entry.id, feed_url], {},              task_id=entry.queued_task_id)      except BaseException as exc:          # The purpose of this section is because when running in "lazy" @@ -76,30 +74,3 @@ def run_process_media(entry):          mark_entry_failed(entry.id, exc)          # re-raise the exception          raise - - -def handle_push_urls(request): -    if mg_globals.app_config["push_urls"]: -        feed_url = request.urlgen( -                           'mediagoblin.user_pages.atom_feed', -                           qualified=True, -                           user=request.user.username) -        hubparameters = { -            'hub.mode': 'publish', -            'hub.url': feed_url} -        hubdata = urllib.urlencode(hubparameters) -        hubheaders = { -            "Content-type": "application/x-www-form-urlencoded", -            "Connection": "close"} -        for huburl in mg_globals.app_config["push_urls"]: -            hubrequest = urllib2.Request(huburl, hubdata, hubheaders) -            try: -                hubresponse = urllib2.urlopen(hubrequest) -            except urllib2.HTTPError as exc: -                # This is not a big issue, the item will be fetched -                # by the PuSH server next time we hit it -                _log.warning( -                    "push url %r gave error %r", huburl, exc.code) -            except urllib2.URLError as exc: -                _log.warning( -                    "push url %r is unreachable %r", huburl, exc.reason) diff --git a/mediagoblin/submit/views.py b/mediagoblin/submit/views.py index 2d609b31..145b9f5e 100644 --- a/mediagoblin/submit/views.py +++ b/mediagoblin/submit/views.py @@ -32,8 +32,7 @@ from mediagoblin.submit import forms as submit_forms  from mediagoblin.messages import add_message, SUCCESS  from mediagoblin.media_types import sniff_media, \      InvalidFileType, FileTypeNotSupported -from mediagoblin.submit.lib import handle_push_urls, run_process_media, \ -    prepare_queue_task +from mediagoblin.submit.lib import run_process_media, prepare_queue_task  @require_active_login @@ -90,10 +89,7 @@ def submit_start(request):                  #                  # (... don't change entry after this point to avoid race                  # conditions with changes to the document via processing code) -                run_process_media(entry) - -                handle_push_urls(request) - +                run_process_media(entry, request)                  add_message(request, SUCCESS, _('Woohoo! Submitted!'))                  return redirect(request, "mediagoblin.user_pages.user_home", | 
