20 files changed, 904 insertions, 802 deletions
diff --git a/docs/source/siteadmin/media-types.rst b/docs/source/siteadmin/media-types.rst
index f8030081..7d9f72b0 100644
--- a/docs/source/siteadmin/media-types.rst
+++ b/docs/source/siteadmin/media-types.rst
@@ -79,12 +79,15 @@ good/bad/ugly).  On Debianoid systems
 
 .. code-block:: bash
 
-    sudo apt-get install python-gst0.10 \
-        gstreamer0.10-plugins-base \
-        gstreamer0.10-plugins-bad \
-        gstreamer0.10-plugins-good \
-        gstreamer0.10-plugins-ugly \
-        gstreamer0.10-ffmpeg
+    sudo apt-get install python-gi python3-gi \
+        gstreamer1.0-tools \
+        gir1.2-gstreamer-1.0 \
+        gir1.2-gst-plugins-base-1.0 \
+        gstreamer1.0-plugins-good \
+        gstreamer1.0-plugins-ugly \
+        gstreamer1.0-plugins-bad \
+        gstreamer1.0-libav \
+        python-gst-1.0
 
 
 Add ``[[mediagoblin.media_types.video]]`` under the ``[plugins]`` section in
@@ -206,7 +209,7 @@ It may work on some earlier versions, but that is not guaranteed (and
 is surely not to work prior to Blender 2.5X).
 
 Add ``[[mediagoblin.media_types.stl]]`` under the ``[plugins]`` section in your
-``mediagoblin_local.ini`` and restart MediaGoblin. 
+``mediagoblin_local.ini`` and restart MediaGoblin.
 
 Run
 
@@ -255,7 +258,7 @@ This feature has been tested on Fedora with:
 It may work on some earlier versions, but that is not guaranteed.
 
 Add ``[[mediagoblin.media_types.pdf]]`` under the ``[plugins]`` section in your
-``mediagoblin_local.ini`` and restart MediaGoblin. 
+``mediagoblin_local.ini`` and restart MediaGoblin.
 
 Run
 
diff --git a/mediagoblin/media_types/__init__.py b/mediagoblin/media_types/__init__.py
index 2e392317..ab39fa36 100644
--- a/mediagoblin/media_types/__init__.py
+++ b/mediagoblin/media_types/__init__.py
@@ -23,10 +23,18 @@ from mediagoblin.tools.translate import lazy_pass_to_ugettext as _
 
 _log = logging.getLogger(__name__)
 
+
 class FileTypeNotSupported(Exception):
     pass
 
-class InvalidFileType(Exception):
+
+class TypeNotFound(FileTypeNotSupported):
+    '''Raised if no mediagoblin plugin supporting this file type was found'''
+    pass
+
+
+class MissingComponents(FileTypeNotSupported):
+    '''Raised if plugin found, but it can't process the file for some reason'''
     pass
 
 
@@ -50,40 +58,30 @@ class MediaManagerBase(object):
         return hasattr(self, i)
 
 
-def sniff_media(media_file, filename):
+def sniff_media_contents(media_file, filename):
     '''
-    Iterate through the enabled media types and find those suited
-    for a certain file.
+    Check media contents using 'expensive' scanning. For example, for video it
+    is checking the contents using gstreamer
+    :param media_file: file-like object with 'name' attribute
+    :param filename: expected filename of the media
     '''
-
-    try:
-        return get_media_type_and_manager(filename)
-    except FileTypeNotSupported:
-        _log.info('No media handler found by file extension. Doing it the expensive way...')
-        # Create a temporary file for sniffers suchs as GStreamer-based
-        # Audio video
-        tmp_media_file = tempfile.NamedTemporaryFile()
-        tmp_media_file.write(media_file.read())
-        tmp_media_file.seek(0)
-        media_file.seek(0)
-
-        media_type = hook_handle('sniff_handler', tmp_media_file, filename)
-        if media_type:
-            _log.info('{0} accepts the file'.format(media_type))
-            return media_type, hook_handle(('media_manager', media_type))
-        else:
-            _log.debug('{0} did not accept the file'.format(media_type))
-
-    raise FileTypeNotSupported(
-        # TODO: Provide information on which file types are supported
-        _(u'Sorry, I don\'t support that file type :('))
-
+    media_type = hook_handle('sniff_handler', media_file, filename)
+    if media_type:
+        _log.info('{0} accepts the file'.format(media_type))
+        return media_type, hook_handle(('media_manager', media_type))
+    else:
+        _log.debug('{0} did not accept the file'.format(media_type))
+        raise FileTypeNotSupported(
+            # TODO: Provide information on which file types are supported
+            _(u'Sorry, I don\'t support that file type :('))
 
 def get_media_type_and_manager(filename):
     '''
     Try to find the media type based on the file name, extension
     specifically. This is used as a speedup, the sniffing functionality
     then falls back on more in-depth bitsniffing of the source file.
+
+    This hook is deprecated, 'type_match_handler' should be used instead
     '''
     if filename.find('.') > 0:
         # Get the file extension
@@ -97,5 +95,67 @@ def get_media_type_and_manager(filename):
         _log.info('File {0} has no file extension, let\'s hope the sniffers get it.'.format(
             filename))
 
-    raise FileTypeNotSupported(
+    raise TypeNotFound(
         _(u'Sorry, I don\'t support that file type :('))
+
+def type_match_handler(media_file, filename):
+    '''Check media file by name and then by content
+
+    Try to find the media type based on the file name, extension
+    specifically. After that, if media type is one of supported ones, check the
+    contents of the file
+    '''
+    if filename.find('.') > 0:
+        # Get the file extension
+        ext = os.path.splitext(filename)[1].lower()
+
+        # Omit the dot from the extension and match it against
+        # the media manager
+        hook_result = hook_handle('type_match_handler', ext[1:])
+        if hook_result:
+            _log.info('Info about file found, checking further')
+            MEDIA_TYPE, Manager, sniffer = hook_result
+            if not sniffer:
+                _log.debug('sniffer is None, plugin trusts the extension')
+                return MEDIA_TYPE, Manager
+            _log.info('checking the contents with sniffer')
+            try:
+                sniffer(media_file)
+                _log.info('checked, found')
+                return MEDIA_TYPE, Manager
+            except Exception as e:
+                _log.info('sniffer says it will not accept the file')
+                _log.debug(e)
+                raise
+        else:
+            _log.info('No plugins handled extension {0}'.format(ext))
+    else:
+        _log.info('File {0} has no known file extension, let\'s hope '
+                'the sniffers get it.'.format(filename))
+    raise TypeNotFound(_(u'Sorry, I don\'t support that file type :('))
+
+
+def sniff_media(media_file, filename):
+    '''
+    Iterate through the enabled media types and find those suited
+    for a certain file.
+    '''
+    # copy the contents to a .name-enabled temporary file for further checks
+    # TODO: there are cases when copying is not required
+    tmp_media_file = tempfile.NamedTemporaryFile()
+    media_file.save(tmp_media_file.name)
+    media_file.seek(0)
+    try:
+        return type_match_handler(tmp_media_file, filename)
+    except TypeNotFound as e:
+        _log.info('No plugins using two-step checking found')
+
+    # keep trying, using old `get_media_type_and_manager`
+    try:
+        return get_media_type_and_manager(filename)
+    except TypeNotFound as e:
+        # again, no luck. Do it expensive way
+        _log.info('No media handler found by file extension')
+    _log.info('Doing it the expensive way...')
+    return sniff_media_contents(tmp_media_file, filename)
+
diff --git a/mediagoblin/media_types/audio/processing.py b/mediagoblin/media_types/audio/processing.py
index de6fa9ca..c1f6cb6b 100644
--- a/mediagoblin/media_types/audio/processing.py
+++ b/mediagoblin/media_types/audio/processing.py
@@ -27,6 +27,7 @@ from mediagoblin.processing import (
 
 from mediagoblin.media_types.audio.transcoders import (
     AudioTranscoder, AudioThumbnailer)
+from mediagoblin.media_types.tools import discover
 
 _log = logging.getLogger(__name__)
 
@@ -36,15 +37,12 @@ MEDIA_TYPE = 'mediagoblin.media_types.audio'
 def sniff_handler(media_file, filename):
     _log.info('Sniffing {0}'.format(MEDIA_TYPE))
     try:
-        transcoder = AudioTranscoder()
-        data = transcoder.discover(media_file.name)
-    except BadMediaFail:
-        _log.debug('Audio discovery raised BadMediaFail')
+        data = discover(media_file.name)
+    except Exception as e:
+        _log.info(unicode(e))
         return None
-
-    if data.is_audio is True and data.is_video is False:
+    if data and data.get_audio_streams() and not data.get_video_streams():
         return MEDIA_TYPE
-
     return None
 
 
@@ -126,8 +124,6 @@ class CommonAudioProcessor(MediaProcessor):
             quality=quality,
             progress_callback=progress_callback)
 
-        self.transcoder.discover(webm_audio_tmp)
-
         self._keep_best()
 
         _log.debug('Saving medium...')
@@ -145,21 +141,14 @@ class CommonAudioProcessor(MediaProcessor):
         if self._skip_processing('spectrogram', max_width=max_width,
                                  fft_size=fft_size):
             return
-
         wav_tmp = os.path.join(self.workbench.dir, self.name_builder.fill(
             '{basename}.ogg'))
-
         _log.info('Creating OGG source for spectrogram')
-        self.transcoder.transcode(
-            self.process_filename,
-            wav_tmp,
-            mux_string='vorbisenc quality={0} ! oggmux'.format(
-                self.audio_config['quality']))
-
+        self.transcoder.transcode(self.process_filename, wav_tmp,
+                                  mux_name='oggmux')
         spectrogram_tmp = os.path.join(self.workbench.dir,
                                        self.name_builder.fill(
                                            '{basename}-spectrogram.jpg'))
-
         self.thumbnailer.spectrogram(
             wav_tmp,
             spectrogram_tmp,
diff --git a/mediagoblin/media_types/audio/transcoders.py b/mediagoblin/media_types/audio/transcoders.py
index 150dad8e..f86528de 100644
--- a/mediagoblin/media_types/audio/transcoders.py
+++ b/mediagoblin/media_types/audio/transcoders.py
@@ -20,10 +20,8 @@ try:
 except ImportError:
     import Image
 
-from mediagoblin.processing import BadMediaFail
 from mediagoblin.media_types.audio import audioprocessing
 
-
 _log = logging.getLogger(__name__)
 
 CPU_COUNT = 2  # Just assuming for now
@@ -39,26 +37,13 @@ try:
 except ImportError:
     _log.warning('Could not import multiprocessing, assuming 2 CPU cores')
 
-# IMPORT GOBJECT
-try:
-    import gobject
-    gobject.threads_init()
-except ImportError:
-    raise Exception('gobject could not be found')
-
-# IMPORT PYGST
-try:
-    import pygst
-
-    # We won't settle for less. For now, this is an arbitrary limit
-    # as we have not tested with > 0.10
-    pygst.require('0.10')
+# uncomment this to get a lot of logs from gst
+# import os;os.environ['GST_DEBUG'] = '5,python:5'
 
-    import gst
-
-    import gst.extend.discoverer
-except ImportError:
-    raise Exception('gst/pygst >= 0.10 could not be imported')
+import gi
+gi.require_version('Gst', '1.0')
+from gi.repository import GObject, Gst
+Gst.init(None)
 
 import numpy
 
@@ -72,7 +57,6 @@ class AudioThumbnailer(object):
         height = int(kw.get('height', float(width) * 0.3))
         fft_size = kw.get('fft_size', 2048)
         callback = kw.get('progress_callback')
-
         processor = audioprocessing.AudioProcessor(
             src,
             fft_size,
@@ -132,95 +116,87 @@ class AudioTranscoder(object):
         _log.info('Initializing {0}'.format(self.__class__.__name__))
 
         # Instantiate MainLoop
-        self._loop = gobject.MainLoop()
+        self._loop = GObject.MainLoop()
         self._failed = None
 
-    def discover(self, src):
-        self._src_path = src
-        _log.info('Discovering {0}'.format(src))
-        self._discovery_path = src
-
-        self._discoverer = gst.extend.discoverer.Discoverer(
-            self._discovery_path)
-        self._discoverer.connect('discovered', self.__on_discovered)
-        self._discoverer.discover()
-
-        self._loop.run()  # Run MainLoop
-
-        if self._failed:
-            raise self._failed
-
-        # Once MainLoop has returned, return discovery data
-        return getattr(self, '_discovery_data', False)
-
-    def __on_discovered(self, data, is_media):
-        if not is_media:
-            self._failed = BadMediaFail()
-            _log.error('Could not discover {0}'.format(self._src_path))
-            self.halt()
-
-        _log.debug('Discovered: {0}'.format(data.__dict__))
-
-        self._discovery_data = data
-
-        # Gracefully shut down MainLoop
-        self.halt()
-
-    def transcode(self, src, dst, **kw):
+    def transcode(self, src, dst, mux_name='webmmux',quality=0.3,
+                  progress_callback=None, **kw):
+        def _on_pad_added(element, pad, connect_to):
+            caps = pad.query_caps(None)
+            name = caps.to_string()
+            _log.debug('on_pad_added: {0}'.format(name))
+            if name.startswith('audio') and not connect_to.is_linked():
+                pad.link(connect_to)
         _log.info('Transcoding {0} into {1}'.format(src, dst))
-        self._discovery_data = kw.get('data', self.discover(src))
-
-        self.__on_progress = kw.get('progress_callback')
-
-        quality = kw.get('quality', 0.3)
-
-        mux_string = kw.get(
-            'mux_string',
-            'vorbisenc quality={0} ! webmmux'.format(quality))
-
+        self.__on_progress = progress_callback
         # Set up pipeline
-        self.pipeline = gst.parse_launch(
-            'filesrc location="{src}" ! '
-            'decodebin2 ! queue ! audiorate tolerance={tolerance} ! '
-            'audioconvert ! audio/x-raw-float,channels=2 ! '
-            '{mux_string} ! '
-            'progressreport silent=true ! '
-            'filesink location="{dst}"'.format(
-                src=src,
-                tolerance=80000000,
-                mux_string=mux_string,
-                dst=dst))
-
+        tolerance = 80000000
+        self.pipeline = Gst.Pipeline()
+        filesrc = Gst.ElementFactory.make('filesrc', 'filesrc')
+        filesrc.set_property('location', src)
+        decodebin = Gst.ElementFactory.make('decodebin', 'decodebin')
+        queue = Gst.ElementFactory.make('queue', 'queue')
+        decodebin.connect('pad-added', _on_pad_added,
+                          queue.get_static_pad('sink'))
+        audiorate = Gst.ElementFactory.make('audiorate', 'audiorate')
+        audiorate.set_property('tolerance', tolerance)
+        audioconvert = Gst.ElementFactory.make('audioconvert', 'audioconvert')
+        caps_struct = Gst.Structure.new_empty('audio/x-raw')
+        caps_struct.set_value('channels', 2)
+        caps = Gst.Caps.new_empty()
+        caps.append_structure(caps_struct)
+        capsfilter = Gst.ElementFactory.make('capsfilter', 'capsfilter')
+        capsfilter.set_property('caps', caps)
+        enc = Gst.ElementFactory.make('vorbisenc', 'enc')
+        enc.set_property('quality', quality)
+        mux = Gst.ElementFactory.make(mux_name, 'mux')
+        progressreport = Gst.ElementFactory.make('progressreport', 'progress')
+        progressreport.set_property('silent', True)
+        sink = Gst.ElementFactory.make('filesink', 'sink')
+        sink.set_property('location', dst)
+        # add to pipeline
+        for e in [filesrc, decodebin, queue, audiorate, audioconvert,
+                  capsfilter, enc, mux, progressreport, sink]:
+            self.pipeline.add(e)
+        # link elements
+        filesrc.link(decodebin)
+        decodebin.link(queue)
+        queue.link(audiorate)
+        audiorate.link(audioconvert)
+        audioconvert.link(capsfilter)
+        capsfilter.link(enc)
+        enc.link(mux)
+        mux.link(progressreport)
+        progressreport.link(sink)
         self.bus = self.pipeline.get_bus()
         self.bus.add_signal_watch()
         self.bus.connect('message', self.__on_bus_message)
-
-        self.pipeline.set_state(gst.STATE_PLAYING)
-
+        # run
+        self.pipeline.set_state(Gst.State.PLAYING)
         self._loop.run()
 
     def __on_bus_message(self, bus, message):
-        _log.debug(message)
-
-        if (message.type == gst.MESSAGE_ELEMENT
-            and message.structure.get_name() == 'progress'):
-            data = dict(message.structure)
-
-            if self.__on_progress:
-                self.__on_progress(data.get('percent'))
-
-            _log.info('{0}% done...'.format(
-                    data.get('percent')))
-        elif message.type == gst.MESSAGE_EOS:
+        _log.debug(message.type)
+        if (message.type == Gst.MessageType.ELEMENT
+                and message.has_name('progress')):
+            structure = message.get_structure()
+            (success, percent) = structure.get_int('percent')
+            if self.__on_progress and success:
+                self.__on_progress(percent)
+            _log.info('{0}% done...'.format(percent))
+        elif message.type == Gst.MessageType.EOS:
             _log.info('Done')
             self.halt()
+        elif message.type == Gst.MessageType.ERROR:
+            _log.error(message.parse_error())
+            self.halt()
 
     def halt(self):
         if getattr(self, 'pipeline', False):
-            self.pipeline.set_state(gst.STATE_NULL)
+            self.pipeline.set_state(Gst.State.NULL)
             del self.pipeline
         _log.info('Quitting MainLoop gracefully...')
-        gobject.idle_add(self._loop.quit)
+        GObject.idle_add(self._loop.quit)
 
 if __name__ == '__main__':
     import sys
diff --git a/mediagoblin/media_types/tools.py b/mediagoblin/media_types/tools.py
index fe7b3772..c3b3a3f0 100644
--- a/mediagoblin/media_types/tools.py
+++ b/mediagoblin/media_types/tools.py
@@ -17,6 +17,11 @@ import logging
 
 from mediagoblin import mg_globals
 
+import gi
+gi.require_version('Gst', '1.0')
+from gi.repository import GObject, Gst, GstPbutils, GLib
+Gst.init(None)
+
 _log = logging.getLogger(__name__)
 
 
@@ -25,3 +30,13 @@ def media_type_warning():
         _log.warning('Media_types have been converted to plugins. Old'
                      ' media_types will no longer work. Please convert them'
                      ' to plugins to continue using them.')
+
+
+def discover(src):
+    '''
+    Discover properties about a media file
+    '''
+    _log.info('Discovering {0}...'.format(src))
+    uri = 'file://{0}'.format(src)
+    discoverer = GstPbutils.Discoverer.new(60 * Gst.SECOND)
+    return discoverer.discover_uri(uri)
diff --git a/mediagoblin/media_types/video/__init__.py b/mediagoblin/media_types/video/__init__.py
index c85cc0b2..f636f1ab 100644
--- a/mediagoblin/media_types/video/__init__.py
+++ b/mediagoblin/media_types/video/__init__.py
@@ -15,8 +15,8 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 from mediagoblin.media_types import MediaManagerBase
-from mediagoblin.media_types.video.processing import VideoProcessingManager, \
-    sniff_handler
+from mediagoblin.media_types.video.processing import (VideoProcessingManager,
+        sniff_handler, sniffer)
 
 
 MEDIA_TYPE = 'mediagoblin.media_types.video'
@@ -38,8 +38,12 @@ def get_media_type_and_manager(ext):
     if ext in ACCEPTED_EXTENSIONS:
         return MEDIA_TYPE, VideoMediaManager
 
+def type_match_handler(ext):
+    if ext in ACCEPTED_EXTENSIONS:
+        return MEDIA_TYPE, VideoMediaManager, sniffer
+
 hooks = {
-    'get_media_type_and_manager': get_media_type_and_manager,
+    'type_match_handler': type_match_handler,
     'sniff_handler': sniff_handler,
     ('media_manager', MEDIA_TYPE): lambda: VideoMediaManager,
     ('reprocess_manager', MEDIA_TYPE): lambda: VideoProcessingManager,
diff --git a/mediagoblin/media_types/video/migrations.py b/mediagoblin/media_types/video/migrations.py
index d68e2933..8088220b 100644
--- a/mediagoblin/media_types/video/migrations.py
+++ b/mediagoblin/media_types/video/migrations.py
@@ -18,6 +18,8 @@ from mediagoblin.db.migration_tools import RegisterMigration, inspect_table
 
 from sqlalchemy import MetaData, Column, Unicode
 
+import json
+
 MIGRATIONS = {}
 
 
@@ -47,3 +49,62 @@ def webm_640_to_webm_video(db):
                 values(name='webm_video'))
 
     db.commit()
+
+
+@RegisterMigration(3, MIGRATIONS)
+def change_metadata_format(db):
+    """Change orig_metadata format for multi-stream a-v"""
+    db_metadata = MetaData(bind=db.bind)
+
+    vid_data = inspect_table(db_metadata, "video__mediadata")
+
+    for row in db.execute(vid_data.select()):
+        metadata = json.loads(row.orig_metadata)
+
+        if not metadata:
+            continue
+
+        # before this migration there was info about only one video or audio
+        # stream. So, we store existing info as the first item in the list
+        new_metadata = {'audio': [], 'video': [], 'common': {}}
+        video_key_map = {  # old: new
+                'videoheight': 'height',
+                'videowidth': 'width',
+                'videorate': 'rate',
+                }
+        audio_key_map = {  # old: new
+                'audiochannels': 'channels',
+                }
+        common_key_map = {
+                'videolength': 'length',
+                }
+
+        new_metadata['video'] = [dict((v, metadata.get(k))
+                for k, v in video_key_map.items() if metadata.get(k))]
+        new_metadata['audio'] = [dict((v, metadata.get(k))
+                for k, v in audio_key_map.items() if metadata.get(k))]
+        new_metadata['common'] = dict((v, metadata.get(k))
+                for k, v in common_key_map.items() if metadata.get(k))
+        
+        # 'mimetype' should be in tags
+        new_metadata['common']['tags'] = {'mimetype': metadata.get('mimetype')}
+        if 'tags' in metadata:
+            new_metadata['video'][0]['tags'] = {}
+            new_metadata['audio'][0]['tags'] = {}
+
+            tags = metadata['tags']
+
+            video_keys = ['encoder', 'encoder-version', 'video-codec']
+            audio_keys = ['audio-codec']
+
+            for t, v in tags.items():
+                if t in video_keys:
+                    new_metadata['video'][0]['tags'][t] = tags[t]
+                elif t in audio_keys:
+                    new_metadata['audio'][0]['tags'][t] = tags[t]
+                else:
+                    new_metadata['common']['tags'][t] = tags[t]
+        db.execute(vid_data.update()
+                .where(vid_data.c.media_entry==row.media_entry)
+                .values(orig_metadata=json.dumps(new_metadata)))
+    db.commit()
diff --git a/mediagoblin/media_types/video/models.py b/mediagoblin/media_types/video/models.py
index be9d258f..4d539e7b 100644
--- a/mediagoblin/media_types/video/models.py
+++ b/mediagoblin/media_types/video/models.py
@@ -68,19 +68,18 @@ class VideoData(Base):
         """
         orig_metadata = self.orig_metadata or {}
 
-        if "webm_video" not in self.get_media_entry.media_files \
-           and "mimetype" in orig_metadata \
-           and "tags" in orig_metadata \
-           and "audio-codec" in orig_metadata["tags"] \
-           and "video-codec" in orig_metadata["tags"]:
+        if ("webm_video" not in self.get_media_entry.media_files
+           and "mimetype" in orig_metadata['common']['tags']
+           and "codec" in orig_metadata['audio']
+           and "codec" in orig_metadata['video']):
             if orig_metadata['mimetype'] == 'application/ogg':
                 # stupid ambiguous .ogg extension
                 mimetype = "video/ogg"
             else:
-                mimetype = orig_metadata['mimetype']
+                mimetype = orig_metadata['common']['tags']['mimetype']
 
-            video_codec = orig_metadata["tags"]["video-codec"].lower()
-            audio_codec = orig_metadata["tags"]["audio-codec"].lower()
+            video_codec = orig_metadata["video"]["codec"].lower()
+            audio_codec = orig_metadata["audio"]["codec"].lower()
 
             # We don't want the "video" at the end of vp8...
             # not sure of a nicer way to be cleaning this stuff
diff --git a/mediagoblin/media_types/video/processing.py b/mediagoblin/media_types/video/processing.py
index bbed4f12..bf195222 100644
--- a/mediagoblin/media_types/video/processing.py
+++ b/mediagoblin/media_types/video/processing.py
@@ -27,6 +27,7 @@ from mediagoblin.processing import (
     get_process_filename, store_public,
     copy_original)
 from mediagoblin.tools.translate import lazy_pass_to_ugettext as _
+from mediagoblin.media_types import MissingComponents
 
 from . import transcoders
 from .util import skip_transcode
@@ -44,86 +45,117 @@ class VideoTranscodingFail(BaseProcessingFail):
     general_message = _(u'Video transcoding failed')
 
 
-EXCLUDED_EXTS = ["nef", "cr2"]
+def sniffer(media_file):
+    '''New style sniffer, used in two-steps check; requires to have .name'''
+    _log.info('Sniffing {0}'.format(MEDIA_TYPE))
+    try:
+        data = transcoders.discover(media_file.name)
+    except Exception as e:
+        # this is usually GLib.GError, but we don't really care which one
+        _log.warning(u'GStreamer: {0}'.format(unicode(e)))
+        raise MissingComponents(u'GStreamer: {0}'.format(unicode(e)))
+    _log.debug('Discovered: {0}'.format(data))
 
-def sniff_handler(media_file, filename):
-    name, ext = os.path.splitext(filename)
-    clean_ext = ext.lower()[1:]
+    if not data.get_video_streams():
+        raise MissingComponents('No video streams found in this video')
 
-    if clean_ext in EXCLUDED_EXTS:
-        # We don't handle this filetype, though gstreamer might think we can
-        return None
+    if data.get_result() != 0:  # it's 0 if success
+        try:
+            missing = data.get_misc().get_string('name')
+            _log.warning('GStreamer: missing {0}'.format(missing))
+        except AttributeError as e:
+            # AttributeError happens here on gstreamer >1.4, when get_misc
+            # returns None. There is a special function to get info about
+            # missing plugin. This info should be printed to logs for admin and
+            # showed to the user in a short and nice version
+            details = data.get_missing_elements_installer_details()
+            _log.warning('GStreamer: missing: {0}'.format(', '.join(details)))
+            missing = u', '.join([u'{0} ({1})'.format(*d.split('|')[3:])
+                                  for d in details])
+        raise MissingComponents(u'{0} is missing'.format(missing))
 
-    transcoder = transcoders.VideoTranscoder()
-    data = transcoder.discover(media_file.name)
+    return MEDIA_TYPE
 
-    _log.info('Sniffing {0}'.format(MEDIA_TYPE))
-    _log.debug('Discovered: {0}'.format(data))
 
-    if not data:
+def sniff_handler(media_file, filename):
+    try:
+        return sniffer(media_file)
+    except:
         _log.error('Could not discover {0}'.format(filename))
         return None
 
-    if data['is_video'] is True:
-        return MEDIA_TYPE
-
-    return None
+def get_tags(stream_info):
+    'gets all tags and their values from stream info'
+    taglist = stream_info.get_tags()
+    if not taglist:
+        return {}
+    tags = []
+    taglist.foreach(
+            lambda list, tag: tags.append((tag, list.get_value_index(tag, 0))))
+    tags = dict(tags)
+
+    # date/datetime should be converted from GDate/GDateTime to strings
+    if 'date' in tags:
+        date = tags['date']
+        tags['date'] = "%s-%s-%s" % (
+                date.year, date.month, date.day)
 
+    if 'datetime' in tags:
+        # TODO: handle timezone info; gst.get_time_zone_offset +
+        # python's tzinfo should help
+        dt = tags['datetime']
+        tags['datetime'] = datetime.datetime(
+            dt.get_year(), dt.get_month(), dt.get_day(), dt.get_hour(),
+            dt.get_minute(), dt.get_second(),
+            dt.get_microsecond()).isoformat()
+    for k, v in tags.items():
+        # types below are accepted by json; others must not present
+        if not isinstance(v, (dict, list, basestring, int, float, bool,
+                              type(None))):
+            del tags[k]
+    return dict(tags)
 
 def store_metadata(media_entry, metadata):
     """
     Store metadata from this video for this media entry.
     """
-    # Let's pull out the easy, not having to be converted ones first
-    stored_metadata = dict(
-        [(key, metadata[key])
-         for key in [
-             "videoheight", "videolength", "videowidth",
-             "audiorate", "audiolength", "audiochannels", "audiowidth",
-             "mimetype"]
-         if key in metadata])
-
-    # We have to convert videorate into a sequence because it's a
-    # special type normally..
-
-    if "videorate" in metadata:
-        videorate = metadata["videorate"]
-        stored_metadata["videorate"] = [videorate.num, videorate.denom]
-
-    # Also make a whitelist conversion of the tags.
-    if "tags" in metadata:
-        tags_metadata = metadata['tags']
-
-        # we don't use *all* of these, but we know these ones are
-        # safe...
-        tags = dict(
-            [(key, tags_metadata[key])
-             for key in [
-                 "application-name", "artist", "audio-codec", "bitrate",
-                 "container-format", "copyright", "encoder",
-                 "encoder-version", "license", "nominal-bitrate", "title",
-                 "video-codec"]
-             if key in tags_metadata])
-        if 'date' in tags_metadata:
-            date = tags_metadata['date']
-            tags['date'] = "%s-%s-%s" % (
-                date.year, date.month, date.day)
-
-        # TODO: handle timezone info; gst.get_time_zone_offset +
-        #   python's tzinfo should help
-        if 'datetime' in tags_metadata:
-            dt = tags_metadata['datetime']
-            tags['datetime'] = datetime.datetime(
-                dt.get_year(), dt.get_month(), dt.get_day(), dt.get_hour(),
-                dt.get_minute(), dt.get_second(),
-                dt.get_microsecond()).isoformat()
-
-        stored_metadata['tags'] = tags
-
+    stored_metadata = dict()
+    audio_info_list = metadata.get_audio_streams()
+    if audio_info_list:
+        stored_metadata['audio'] = []
+    for audio_info in audio_info_list:
+        stored_metadata['audio'].append(
+                {
+                    'channels': audio_info.get_channels(),
+                    'bitrate': audio_info.get_bitrate(),
+                    'depth': audio_info.get_depth(),
+                    'languange': audio_info.get_language(),
+                    'sample_rate': audio_info.get_sample_rate(),
+                    'tags': get_tags(audio_info)
+                })
+
+    video_info_list = metadata.get_video_streams()
+    if video_info_list:
+        stored_metadata['video'] = []
+    for video_info in video_info_list:
+        stored_metadata['video'].append(
+                {
+                    'width': video_info.get_width(),
+                    'height': video_info.get_height(),
+                    'bitrate': video_info.get_bitrate(),
+                    'depth': video_info.get_depth(),
+                    'videorate': [video_info.get_framerate_num(),
+                                  video_info.get_framerate_denom()],
+                    'tags': get_tags(video_info)
+                })
+
+    stored_metadata['common'] = {
+        'duration': metadata.get_duration(),
+        'tags': get_tags(metadata),
+    }
     # Only save this field if there's something to save
     if len(stored_metadata):
-        media_entry.media_data_init(
-            orig_metadata=stored_metadata)
+        media_entry.media_data_init(orig_metadata=stored_metadata)
 
 
 class CommonVideoProcessor(MediaProcessor):
@@ -213,7 +245,11 @@ class CommonVideoProcessor(MediaProcessor):
             return
 
         # Extract metadata and keep a record of it
-        metadata = self.transcoder.discover(self.process_filename)
+        metadata = transcoders.discover(self.process_filename)
+
+        # metadata's stream info here is a DiscovererContainerInfo instance,
+        # it gets split into DiscovererAudioInfo and DiscovererVideoInfo;
+        # metadata itself has container-related data in tags, like video-codec
         store_metadata(self.entry, metadata)
 
         # Figure out whether or not we need to transcode this video or
@@ -221,7 +257,8 @@ class CommonVideoProcessor(MediaProcessor):
         if skip_transcode(metadata, medium_size):
             _log.debug('Skipping transcoding')
 
-            dst_dimensions = metadata['videowidth'], metadata['videoheight']
+            dst_dimensions = (metadata.get_video_streams()[0].get_width(),
+                    metadata.get_video_streams()[0].get_height())
 
             # If there is an original and transcoded, delete the transcoded
             # since it must be of lower quality then the original
@@ -236,10 +273,8 @@ class CommonVideoProcessor(MediaProcessor):
                                       vorbis_quality=vorbis_quality,
                                       progress_callback=progress_callback,
                                       dimensions=tuple(medium_size))
-
-            dst_dimensions = self.transcoder.dst_data.videowidth,\
-                self.transcoder.dst_data.videoheight
-
+            video_info = self.transcoder.dst_data.get_video_streams()[0]
+            dst_dimensions = (video_info.get_width(), video_info.get_height())
             self._keep_best()
 
             # Push transcoded video to public storage
@@ -270,7 +305,7 @@ class CommonVideoProcessor(MediaProcessor):
             return
 
         # We will only use the width so that the correct scale is kept
-        transcoders.VideoThumbnailerMarkII(
+        transcoders.capture_thumb(
             self.process_filename,
             tmp_thumb,
             thumb_size[0])
diff --git a/mediagoblin/media_types/video/transcoders.py b/mediagoblin/media_types/video/transcoders.py
index 3a3fa97f..3c060fd7 100644
--- a/mediagoblin/media_types/video/transcoders.py
+++ b/mediagoblin/media_types/video/transcoders.py
@@ -19,16 +19,20 @@ from __future__ import division
 import os
 import sys
 import logging
-import urllib
 import multiprocessing
-import gobject
+
+from mediagoblin.media_types.tools import discover
+from mediagoblin.tools.translate import lazy_pass_to_ugettext as _
+
+#os.environ['GST_DEBUG'] = '4,python:4'
 
 old_argv = sys.argv
 sys.argv = []
 
-import pygst
-pygst.require('0.10')
-import gst
+import gi
+gi.require_version('Gst', '1.0')
+from gi.repository import GObject, Gst, GstPbutils
+Gst.init(None)
 
 sys.argv = old_argv
 import struct
@@ -37,12 +41,8 @@ try:
 except ImportError:
     import Image
 
-from gst.extend import discoverer
-
 _log = logging.getLogger(__name__)
 
-gobject.threads_init()
-
 CPU_COUNT = 2
 
 try:
@@ -53,338 +53,92 @@ except NotImplementedError:
 os.putenv('GST_DEBUG_DUMP_DOT_DIR', '/tmp')
 
 
-def pixbuf_to_pilbuf(buf):
-    data = list()
-    for i in range(0, len(buf), 3):
-        r, g, b = struct.unpack('BBB', buf[i:i + 3])
-        data.append((r, g, b))
-
-    return data
-
-
-class VideoThumbnailerMarkII(object):
-    '''
-    Creates a thumbnail from a video file. Rewrite of VideoThumbnailer.
-
-    Large parts of the functionality and overall architectue contained within
-    this object is taken from Participatory Culture Foundation's
-    `gst_extractor.Extractor` object last seen at
-    https://github.com/pculture/miro/blob/master/tv/lib/frontends/widgets/gst/gst_extractor.py
-    in the `miro` codebase.
-
-    The `miro` codebase and the gst_extractor.py are licensed under the GNU
-    General Public License v2 or later.
-    '''
-    STATE_NULL = 0
-    STATE_HALTING = 1
-    STATE_PROCESSING = 2
-    STATE_PROCESSING_THUMBNAIL = 3
-
-    def __init__(self, source_path, dest_path, width=None, height=None,
-            position_callback=None):
-        self.state = self.STATE_NULL
-
-        self.has_reached_playbin_pause = False
-
-        self.thumbnail_pipeline = None
-
-        self.permission_to_take_picture = False
-
-        self.buffer_probes = {}
-
-        self.errors = []
-
-        self.source_path = os.path.abspath(source_path)
-        self.dest_path = os.path.abspath(dest_path)
-
-        self.width = width
-        self.height = height
-        self.position_callback = position_callback \
-                or self.wadsworth_position_callback
-
-        self.mainloop = gobject.MainLoop()
-
-        self.playbin = gst.element_factory_make('playbin')
-
-        self.videosink = gst.element_factory_make('fakesink', 'videosink')
-        self.audiosink = gst.element_factory_make('fakesink', 'audiosink')
-
-        self.playbin.set_property('video-sink', self.videosink)
-        self.playbin.set_property('audio-sink', self.audiosink)
-
-        self.playbin_message_bus = self.playbin.get_bus()
-
-        self.playbin_message_bus.add_signal_watch()
-        self.playbin_bus_watch_id = self.playbin_message_bus.connect(
-                'message',
-                self.on_playbin_message)
-
-        self.playbin.set_property(
-                'uri',
-                'file:{0}'.format(
-                    urllib.pathname2url(self.source_path)))
-
-        self.playbin.set_state(gst.STATE_PAUSED)
-
-        try:
-            self.run()
-        except Exception as exc:
-            _log.critical(
-                'Exception "{0}" caught, shutting down mainloop and re-raising'\
-                    .format(exc))
-            self.disconnect()
-            raise
-
-    def wadsworth_position_callback(self, duration, gst):
-        return self.duration / 100 * 30
-
-    def run(self):
-        self.mainloop.run()
-
-    def on_playbin_message(self, message_bus, message):
-        # Silenced to prevent clobbering of output
-        #_log.debug('playbin message: {0}'.format(message))
-
-        if message.type == gst.MESSAGE_ERROR:
-            _log.error('playbin error: {0}'.format(message))
-            gobject.idle_add(self.on_playbin_error)
-
-        if message.type == gst.MESSAGE_STATE_CHANGED:
-            prev_state, cur_state, pending_state = \
-                    message.parse_state_changed()
-
-            _log.debug('playbin state changed: \nprev: {0}\ncur: {1}\n \
-pending: {2}'.format(
-    prev_state,
-    cur_state,
-    pending_state))
-
-            if cur_state == gst.STATE_PAUSED:
-                if message.src == self.playbin:
-                    _log.info('playbin ready')
-                    gobject.idle_add(self.on_playbin_paused)
-
-    def on_playbin_paused(self):
-        if self.has_reached_playbin_pause:
-            _log.warn('Has already reached on_playbin_paused. Aborting \
-without doing anything this time.')
-            return False
-
-        self.has_reached_playbin_pause = True
-
-        # XXX: Why is this even needed at this point?
-        current_video = self.playbin.get_property('current-video')
-
-        if not current_video:
-            _log.critical('Could not get any video data \
-from playbin')
-        else:
-            _log.info('Got video data from playbin')
-
-        self.duration = self.get_duration(self.playbin)
-        self.permission_to_take_picture = True
-        self.buffer_probes = {}
-
-        pipeline = ''.join([
-            'filesrc location="%s" ! decodebin2 ! ' % self.source_path,
-            'ffmpegcolorspace ! videoscale ! ',
-            'video/x-raw-rgb,depth=24,bpp=24,pixel-aspect-ratio=1/1',
-            ',width={0}'.format(self.width) if self.width else '',
-            ',height={0}'.format(self.height) if self.height else '',
-            ' ! ',
-            'fakesink signal-handoffs=True'])
-
-        _log.debug('thumbnail_pipeline: {0}'.format(pipeline))
-
-        self.thumbnail_pipeline = gst.parse_launch(pipeline)
-        self.thumbnail_message_bus = self.thumbnail_pipeline.get_bus()
-        self.thumbnail_message_bus.add_signal_watch()
-        self.thumbnail_bus_watch_id = self.thumbnail_message_bus.connect(
-                'message',
-                self.on_thumbnail_message)
-
-        self.thumbnail_pipeline.set_state(gst.STATE_PAUSED)
-
-        gobject.timeout_add(3000, self.on_gobject_timeout)
-
-        return False
-
-    def on_thumbnail_message(self, message_bus, message):
-        # This is silenced to prevent clobbering of the terminal window
-        #_log.debug('thumbnail message: {0}'.format(message))
-
-        if message.type == gst.MESSAGE_ERROR:
-            _log.error('thumbnail error: {0}'.format(message.parse_error()))
-            gobject.idle_add(self.on_thumbnail_error, message)
-
-        if message.type == gst.MESSAGE_STATE_CHANGED:
-            prev_state, cur_state, pending_state = \
-                    message.parse_state_changed()
-
-            _log.debug('thumbnail state changed: \nprev: {0}\ncur: {1}\n \
-pending: {2}'.format(
-    prev_state,
-    cur_state,
-    pending_state))
-
-            if cur_state == gst.STATE_PAUSED and \
-               not self.state == self.STATE_PROCESSING_THUMBNAIL:
-                # Find the fakesink sink pad and attach the on_buffer_probe
-                # handler to it.
-                seek_amount = self.position_callback(self.duration, gst)
-
-                seek_result = self.thumbnail_pipeline.seek(
-                        1.0,
-                        gst.FORMAT_TIME,
-                        gst.SEEK_FLAG_FLUSH | gst.SEEK_FLAG_ACCURATE,
-                        gst.SEEK_TYPE_SET,
-                        seek_amount,
-                        gst.SEEK_TYPE_NONE,
-                        0)
-
-                if not seek_result:
-                    _log.info('Could not seek.')
-                else:
-                    _log.info('Seek successful, attaching buffer probe')
-                    self.state = self.STATE_PROCESSING_THUMBNAIL
-                    for sink in self.thumbnail_pipeline.sinks():
-                        sink_name = sink.get_name()
-                        sink_factory_name = sink.get_factory().get_name()
-
-                        if sink_factory_name == 'fakesink':
-                            sink_pad = sink.get_pad('sink')
-
-                            self.buffer_probes[sink_name] = sink_pad\
-                                    .add_buffer_probe(
-                                            self.on_pad_buffer_probe,
-                                            sink_name)
-
-                            _log.info('Attached buffer probes: {0}'.format(
-                                self.buffer_probes))
-
-                            break
-
-
-            elif self.state == self.STATE_PROCESSING_THUMBNAIL:
-                _log.info('Already processing thumbnail')
-
-    def on_pad_buffer_probe(self, *args):
-        _log.debug('buffer probe handler: {0}'.format(args))
-        gobject.idle_add(lambda: self.take_snapshot(*args))
-
-    def take_snapshot(self, pad, buff, name):
-        if self.state == self.STATE_HALTING:
-            _log.debug('Pipeline is halting, will not take snapshot')
-            return False
-
-        _log.info('Taking snapshot! ({0})'.format(
-            (pad, buff, name)))
-        try:
-            caps = buff.caps
-            if caps is None:
-                _log.error('No buffer caps present /take_snapshot')
-                self.disconnect()
-
-            _log.debug('caps: {0}'.format(caps))
-
-            filters = caps[0]
-            width = filters['width']
-            height = filters['height']
-
-            im = Image.new('RGB', (width, height))
-
-            data = pixbuf_to_pilbuf(buff.data)
-
-            im.putdata(data)
-
-            im.save(self.dest_path)
-
-            _log.info('Saved snapshot!')
-
-            self.disconnect()
-
-        except gst.QueryError as exc:
-            _log.error('take_snapshot - QueryError: {0}'.format(exc))
-
-        return False
-
-    def on_thumbnail_error(self, message):
-        scaling_failed = False
-
-        if 'Error calculating the output scaled size - integer overflow' \
-           in message.parse_error()[1]:
-            # GStreamer videoscale sometimes fails to calculate the dimensions
-            # given only one of the destination dimensions and the source
-            # dimensions. This is a workaround in case videoscale returns an
-            # error that indicates this has happened.
-            scaling_failed = True
-            _log.error('Thumbnailing failed because of videoscale integer'
-                       ' overflow. Will retry with fallback.')
-        else:
-            _log.error('Thumbnailing failed: {0}'.format(message.parse_error()))
-
-        # Kill the current mainloop
-        self.disconnect()
-
-        if scaling_failed:
-            # Manually scale the destination dimensions
-            _log.info('Retrying with manually set sizes...')
-
-            info = VideoTranscoder().discover(self.source_path)
-
-            h = info['videoheight']
-            w = info['videowidth']
-            ratio = 180 / int(w)
-            h = int(h * ratio)
-
-            self.__init__(self.source_path, self.dest_path, 180, h)
-
-    def disconnect(self):
-        self.state = self.STATE_HALTING
-
-        if self.playbin is not None:
-            self.playbin.set_state(gst.STATE_NULL)
-
-            for sink in self.playbin.sinks():
-                sink_name = sink.get_name()
-                sink_factory_name = sink.get_factory().get_name()
-
-                if sink_factory_name == 'fakesink':
-                    sink_pad = sink.get_pad('sink')
-                    sink_pad.remove_buffer_probe(self.buffer_probes[sink_name])
-                    del self.buffer_probes[sink_name]
-
-            self.playbin = None
-
-        if self.thumbnail_pipeline is not None:
-            self.thumbnail_pipeline.set_state(gst.STATE_NULL)
-            self.thumbnail_pipeline = None
-
-        if self.playbin_message_bus is not None:
-            self.playbin_message_bus.disconnect(self.playbin_bus_watch_id)
-            self.playbin_message_bus = None
-
-        self.halt()
-
-    def halt(self):
-        gobject.idle_add(self.mainloop.quit)
-
-    def on_gobject_timeout(self):
-        _log.critical('Reached gobject timeout')
-        self.disconnect()
-
-    def get_duration(self, pipeline, attempt=1):
-        if attempt == 5:
-            _log.critical('Pipeline duration query retry limit reached.')
-            return 0
-
-        try:
-            return pipeline.query_duration(gst.FORMAT_TIME)[0]
-        except gst.QueryError as exc:
-            _log.error('Could not get duration on attempt {0}: {1}'.format(
-                attempt,
-                exc))
-            return self.get_duration(pipeline, attempt + 1)
+def capture_thumb(video_path, dest_path, width=None, height=None, percent=0.5):
+    def pad_added(element, pad, connect_to):
+        '''This is a callback to dynamically add element to pipeline'''
+        caps = pad.query_caps(None)
+        name = caps.to_string()
+        _log.debug('on_pad_added: {0}'.format(name))
+        if name.startswith('video') and not connect_to.is_linked():
+            pad.link(connect_to)
+
+    # construct pipeline: uridecodebin ! videoconvert ! videoscale ! \
+    # ! CAPS ! appsink
+    pipeline = Gst.Pipeline()
+    uridecodebin = Gst.ElementFactory.make('uridecodebin', None)
+    uridecodebin.set_property('uri', 'file://{0}'.format(video_path))
+    videoconvert = Gst.ElementFactory.make('videoconvert', None)
+    uridecodebin.connect('pad-added', pad_added,
+                         videoconvert.get_static_pad('sink'))
+    videoscale = Gst.ElementFactory.make('videoscale', None)
+
+    # create caps for video scaling
+    caps_struct = Gst.Structure.new_empty('video/x-raw')
+    caps_struct.set_value('pixel-aspect-ratio', Gst.Fraction(1, 1))
+    caps_struct.set_value('format', 'RGB')
+    if height:
+        caps_struct.set_value('height', height)
+    if width:
+        caps_struct.set_value('width', width)
+    caps = Gst.Caps.new_empty()
+    caps.append_structure(caps_struct)
+
+    # sink everything to memory
+    appsink = Gst.ElementFactory.make('appsink', None)
+    appsink.set_property('caps', caps)
+
+    # add everything to pipeline
+    elements = [uridecodebin, videoconvert, videoscale, appsink]
+    for e in elements:
+        pipeline.add(e)
+    videoconvert.link(videoscale)
+    videoscale.link(appsink)
+
+    # pipeline constructed, starting playing, but first some preparations
+    # seek to 50% of the file is required
+    pipeline.set_state(Gst.State.PAUSED)
+    # timeout of 3 seconds below was set experimentally
+    state = pipeline.get_state(Gst.SECOND * 3)
+    if state[0] != Gst.StateChangeReturn.SUCCESS:
+        _log.warning('state change failed, {0}'.format(state))
+        return
+
+    # get duration
+    (success, duration) = pipeline.query_duration(Gst.Format.TIME)
+    if not success:
+        _log.warning('query_duration failed')
+        return
+
+    seek_to = int(duration * int(percent * 100) / 100)
+    _log.debug('Seeking to {0} of {1}'.format(
+            float(seek_to) / Gst.SECOND, float(duration) / Gst.SECOND))
+    seek = pipeline.seek_simple(Gst.Format.TIME, Gst.SeekFlags.FLUSH, seek_to)
+    if not seek:
+        _log.warning('seek failed')
+        return
+
+    # get sample, retrieve it's format and save
+    sample = appsink.emit("pull-preroll")
+    if not sample:
+        _log.warning('could not get sample')
+        return
+    caps = sample.get_caps()
+    if not caps:
+        _log.warning('could not get snapshot format')
+        return
+    structure = caps.get_structure(0)
+    (success, width) = structure.get_int('width')
+    (success, height) = structure.get_int('height')
+    buffer = sample.get_buffer()
+
+    # get the image from the buffer and save it to disk
+    im = Image.frombytes('RGB', (width, height),
+                         buffer.extract_dup(0, buffer.get_size()))
+    im.save(dest_path)
+    _log.info('thumbnail saved to {0}'.format(dest_path))
+
+    # cleanup
+    pipeline.set_state(Gst.State.NULL)
 
 
 class VideoTranscoder(object):
@@ -393,16 +147,12 @@ class VideoTranscoder(object):
 
     Transcodes the SRC video file to a VP8 WebM video file at DST
 
-     - Does the same thing as VideoThumbnailer, but produces a WebM vp8
-       and vorbis video file.
-     - The VideoTranscoder exceeds the VideoThumbnailer in the way
-       that it was refined afterwards and therefore is done more
-       correctly.
+     - Produces a WebM vp8 and vorbis video file.
     '''
     def __init__(self):
         _log.info('Initializing VideoTranscoder...')
         self.progress_percentage = None
-        self.loop = gobject.MainLoop()
+        self.loop = GObject.MainLoop()
 
     def transcode(self, src, dst, **kwargs):
         '''
@@ -435,153 +185,84 @@ class VideoTranscoder(object):
         if not type(self.destination_dimensions) == tuple:
             raise Exception('dimensions must be tuple: (width, height)')
 
-        self._setup()
-        self._run()
-
-    # XXX: This could be a static method.
-    def discover(self, src):
-        '''
-        Discover properties about a media file
-        '''
-        _log.info('Discovering {0}'.format(src))
-
-        self.source_path = src
-        self._setup_discover(discovered_callback=self.__on_discovered)
-
-        self.discoverer.discover()
-
-        self.loop.run()
-
-        if hasattr(self, '_discovered_data'):
-            return self._discovered_data.__dict__
-        else:
-            return None
-
-    def __on_discovered(self, data, is_media):
-        _log.debug('Discovered: {0}'.format(data))
-        if not is_media:
-            self.__stop()
-            raise Exception('Could not discover {0}'.format(self.source_path))
-
-        self._discovered_data = data
-
-        self.__stop_mainloop()
-
-    def _setup(self):
-        self._setup_discover()
         self._setup_pipeline()
-
-    def _run(self):
-        _log.info('Discovering...')
-        self.discoverer.discover()
-        _log.info('Done')
-
-        _log.debug('Initializing MainLoop()')
-        self.loop.run()
-
-    def _setup_discover(self, **kw):
-        _log.debug('Setting up discoverer')
-        self.discoverer = discoverer.Discoverer(self.source_path)
-
-        # Connect self.__discovered to the 'discovered' event
-        self.discoverer.connect(
-            'discovered',
-            kw.get('discovered_callback', self.__discovered))
-
-    def __discovered(self, data, is_media):
-        '''
-        Callback for media discoverer.
-        '''
-        if not is_media:
-            self.__stop()
-            raise Exception('Could not discover {0}'.format(self.source_path))
-
-        _log.debug('__discovered, data: {0}'.format(data.__dict__))
-
-        self.data = data
-
-        # Launch things that should be done after discovery
+        self.data = discover(self.source_path)
         self._link_elements()
         self.__setup_videoscale_capsfilter()
-
-        # Tell the transcoding pipeline to start running
-        self.pipeline.set_state(gst.STATE_PLAYING)
+        self.pipeline.set_state(Gst.State.PLAYING)
         _log.info('Transcoding...')
+        _log.debug('Initializing MainLoop()')
+        self.loop.run()
+
 
     def _setup_pipeline(self):
         _log.debug('Setting up transcoding pipeline')
         # Create the pipeline bin.
-        self.pipeline = gst.Pipeline('VideoTranscoderPipeline')
+        self.pipeline = Gst.Pipeline.new('VideoTranscoderPipeline')
 
         # Create all GStreamer elements, starting with
         # filesrc & decoder
-        self.filesrc = gst.element_factory_make('filesrc', 'filesrc')
+        self.filesrc = Gst.ElementFactory.make('filesrc', 'filesrc')
         self.filesrc.set_property('location', self.source_path)
         self.pipeline.add(self.filesrc)
 
-        self.decoder = gst.element_factory_make('decodebin2', 'decoder')
-        self.decoder.connect('new-decoded-pad', self._on_dynamic_pad)
+        self.decoder = Gst.ElementFactory.make('decodebin', 'decoder')
+        self.decoder.connect('pad-added', self._on_dynamic_pad)
         self.pipeline.add(self.decoder)
 
         # Video elements
-        self.videoqueue = gst.element_factory_make('queue', 'videoqueue')
+        self.videoqueue = Gst.ElementFactory.make('queue', 'videoqueue')
         self.pipeline.add(self.videoqueue)
 
-        self.videorate = gst.element_factory_make('videorate', 'videorate')
+        self.videorate = Gst.ElementFactory.make('videorate', 'videorate')
         self.pipeline.add(self.videorate)
 
-        self.ffmpegcolorspace = gst.element_factory_make(
-            'ffmpegcolorspace', 'ffmpegcolorspace')
-        self.pipeline.add(self.ffmpegcolorspace)
+        self.videoconvert = Gst.ElementFactory.make('videoconvert',
+                                                    'videoconvert')
+        self.pipeline.add(self.videoconvert)
 
-        self.videoscale = gst.element_factory_make('ffvideoscale', 'videoscale')
-        #self.videoscale.set_property('method', 2)  # I'm not sure this works
-        #self.videoscale.set_property('add-borders', 0)
+        self.videoscale = Gst.ElementFactory.make('videoscale', 'videoscale')
         self.pipeline.add(self.videoscale)
 
-        self.capsfilter = gst.element_factory_make('capsfilter', 'capsfilter')
+        self.capsfilter = Gst.ElementFactory.make('capsfilter', 'capsfilter')
         self.pipeline.add(self.capsfilter)
 
-        self.vp8enc = gst.element_factory_make('vp8enc', 'vp8enc')
-        self.vp8enc.set_property('quality', self.vp8_quality)
+        self.vp8enc = Gst.ElementFactory.make('vp8enc', 'vp8enc')
         self.vp8enc.set_property('threads', self.vp8_threads)
-        self.vp8enc.set_property('max-latency', 25)
         self.pipeline.add(self.vp8enc)
 
         # Audio elements
-        self.audioqueue = gst.element_factory_make('queue', 'audioqueue')
+        self.audioqueue = Gst.ElementFactory.make('queue', 'audioqueue')
         self.pipeline.add(self.audioqueue)
 
-        self.audiorate = gst.element_factory_make('audiorate', 'audiorate')
+        self.audiorate = Gst.ElementFactory.make('audiorate', 'audiorate')
         self.audiorate.set_property('tolerance', 80000000)
         self.pipeline.add(self.audiorate)
 
-        self.audioconvert = gst.element_factory_make('audioconvert', 'audioconvert')
+        self.audioconvert = Gst.ElementFactory.make('audioconvert', 'audioconvert')
         self.pipeline.add(self.audioconvert)
-
-        self.audiocapsfilter = gst.element_factory_make('capsfilter',
-                                                        'audiocapsfilter')
-        audiocaps = ['audio/x-raw-float']
-        self.audiocapsfilter.set_property(
-            'caps',
-            gst.caps_from_string(
-                ','.join(audiocaps)))
+        self.audiocapsfilter = Gst.ElementFactory.make('capsfilter',
+                                                       'audiocapsfilter')
+        audiocaps = Gst.Caps.new_empty()
+        audiocaps_struct = Gst.Structure.new_empty('audio/x-raw')
+        audiocaps.append_structure(audiocaps_struct)
+        self.audiocapsfilter.set_property('caps', audiocaps)
         self.pipeline.add(self.audiocapsfilter)
 
-        self.vorbisenc = gst.element_factory_make('vorbisenc', 'vorbisenc')
+        self.vorbisenc = Gst.ElementFactory.make('vorbisenc', 'vorbisenc')
         self.vorbisenc.set_property('quality', self.vorbis_quality)
         self.pipeline.add(self.vorbisenc)
 
         # WebMmux & filesink
-        self.webmmux = gst.element_factory_make('webmmux', 'webmmux')
+        self.webmmux = Gst.ElementFactory.make('webmmux', 'webmmux')
         self.pipeline.add(self.webmmux)
 
-        self.filesink = gst.element_factory_make('filesink', 'filesink')
+        self.filesink = Gst.ElementFactory.make('filesink', 'filesink')
         self.filesink.set_property('location', self.destination_path)
         self.pipeline.add(self.filesink)
 
         # Progressreport
-        self.progressreport = gst.element_factory_make(
+        self.progressreport = Gst.ElementFactory.make(
             'progressreport', 'progressreport')
         # Update every second
         self.progressreport.set_property('update-freq', 1)
@@ -600,48 +281,41 @@ class VideoTranscoder(object):
         # 'new-decoded-pad' which links decoded src pads to either a video
         # or audio sink
         self.filesrc.link(self.decoder)
-
-        # Link all the video elements in a row to webmmux
-        gst.element_link_many(
-            self.videoqueue,
-            self.videorate,
-            self.ffmpegcolorspace,
-            self.videoscale,
-            self.capsfilter,
-            self.vp8enc,
-            self.webmmux)
-
-        if self.data.is_audio:
-            # Link all the audio elements in a row to webmux
-            gst.element_link_many(
-                self.audioqueue,
-                self.audiorate,
-                self.audioconvert,
-                self.audiocapsfilter,
-                self.vorbisenc,
-                self.webmmux)
-
-        gst.element_link_many(
-            self.webmmux,
-            self.progressreport,
-            self.filesink)
+        # link the rest
+        self.videoqueue.link(self.videorate)
+        self.videorate.link(self.videoconvert)
+        self.videoconvert.link(self.videoscale)
+        self.videoscale.link(self.capsfilter)
+        self.capsfilter.link(self.vp8enc)
+        self.vp8enc.link(self.webmmux)
+
+        if self.data.get_audio_streams():
+            self.audioqueue.link(self.audiorate)
+            self.audiorate.link(self.audioconvert)
+            self.audioconvert.link(self.audiocapsfilter)
+            self.audiocapsfilter.link(self.vorbisenc)
+            self.vorbisenc.link(self.webmmux)
+        self.webmmux.link(self.progressreport)
+        self.progressreport.link(self.filesink)
 
         # Setup the message bus and connect _on_message to the pipeline
         self._setup_bus()
 
-    def _on_dynamic_pad(self, dbin, pad, islast):
+    def _on_dynamic_pad(self, dbin, pad):
         '''
-        Callback called when ``decodebin2`` has a pad that we can connect to
+        Callback called when ``decodebin`` has a pad that we can connect to
         '''
         # Intersect the capabilities of the video sink and the pad src
         # Then check if they have no common capabilities.
-        if self.ffmpegcolorspace.get_pad_template('sink')\
-                .get_caps().intersect(pad.get_caps()).is_empty():
+        if (self.videorate.get_static_pad('sink').get_pad_template()
+                .get_caps().intersect(pad.query_caps()).is_empty()):
             # It is NOT a video src pad.
-            pad.link(self.audioqueue.get_pad('sink'))
+            _log.debug('linking audio to the pad dynamically')
+            pad.link(self.audioqueue.get_static_pad('sink'))
         else:
             # It IS a video src pad.
-            pad.link(self.videoqueue.get_pad('sink'))
+            _log.debug('linking video to the pad dynamically')
+            pad.link(self.videoqueue.get_static_pad('sink'))
 
     def _setup_bus(self):
         self.bus = self.pipeline.get_bus()
@@ -652,73 +326,53 @@ class VideoTranscoder(object):
         '''
         Sets up the output format (width, height) for the video
         '''
-        caps = ['video/x-raw-yuv', 'pixel-aspect-ratio=1/1', 'framerate=30/1']
-
-        if self.data.videoheight > self.data.videowidth:
-            # Whoa! We have ourselves a portrait video!
-            caps.append('height={0}'.format(
-                    self.destination_dimensions[1]))
+        caps_struct = Gst.Structure.new_empty('video/x-raw')
+        caps_struct.set_value('pixel-aspect-ratio', Gst.Fraction(1, 1))
+        caps_struct.set_value('framerate', Gst.Fraction(30, 1))
+        video_info = self.data.get_video_streams()[0]
+        if video_info.get_height() > video_info.get_width():
+            # portrait
+            caps_struct.set_value('height', self.destination_dimensions[1])
         else:
-            # It's a landscape, phew, how normal.
-            caps.append('width={0}'.format(
-                    self.destination_dimensions[0]))
-
-        self.capsfilter.set_property(
-            'caps',
-            gst.caps_from_string(
-                ','.join(caps)))
+            # landscape
+            caps_struct.set_value('width', self.destination_dimensions[0])
+        caps = Gst.Caps.new_empty()
+        caps.append_structure(caps_struct)
+        self.capsfilter.set_property('caps', caps)
 
     def _on_message(self, bus, message):
         _log.debug((bus, message, message.type))
-
-        t = message.type
-
-        if message.type == gst.MESSAGE_EOS:
-            self._discover_dst_and_stop()
+        if message.type == Gst.MessageType.EOS:
+            self.dst_data = discover(self.destination_path)
+            self.__stop()
             _log.info('Done')
-
-        elif message.type == gst.MESSAGE_ELEMENT:
-            if message.structure.get_name() == 'progress':
-                data = dict(message.structure)
+        elif message.type == Gst.MessageType.ELEMENT:
+            if message.has_name('progress'):
+                structure = message.get_structure()
                 # Update progress state if it has changed
-                if self.progress_percentage != data.get('percent'):
-                    self.progress_percentage = data.get('percent')
+                (success, percent) = structure.get_int('percent')
+                if self.progress_percentage != percent and success:
+                    self.progress_percentage = percent
                     if self._progress_callback:
-                        self._progress_callback(data.get('percent'))
-
-                    _log.info('{percent}% done...'.format(
-                            percent=data.get('percent')))
-                _log.debug(data)
-
-        elif t == gst.MESSAGE_ERROR:
-            _log.error((bus, message))
+                        self._progress_callback(percent)
+                    _log.info('{percent}% done...'.format(percent=percent))
+        elif message.type == Gst.MessageType.ERROR:
+            _log.error('Got error: {0}'.format(message.parse_error()))
             self.__stop()
 
-    def _discover_dst_and_stop(self):
-        self.dst_discoverer = discoverer.Discoverer(self.destination_path)
-
-        self.dst_discoverer.connect('discovered', self.__dst_discovered)
-
-        self.dst_discoverer.discover()
-
-    def __dst_discovered(self, data, is_media):
-        self.dst_data = data
-
-        self.__stop()
-
     def __stop(self):
         _log.debug(self.loop)
 
         if hasattr(self, 'pipeline'):
             # Stop executing the pipeline
-            self.pipeline.set_state(gst.STATE_NULL)
+            self.pipeline.set_state(Gst.State.NULL)
 
         # This kills the loop, mercifully
-        gobject.idle_add(self.__stop_mainloop)
+        GObject.idle_add(self.__stop_mainloop)
 
     def __stop_mainloop(self):
         '''
-        Wrapper for gobject.MainLoop.quit()
+        Wrapper for GObject.MainLoop.quit()
 
         This wrapper makes us able to see if self.loop.quit has been called
         '''
@@ -729,7 +383,6 @@ class VideoTranscoder(object):
 
 if __name__ == '__main__':
     os.nice(19)
-    logging.basicConfig()
     from optparse import OptionParser
 
     parser = OptionParser(
diff --git a/mediagoblin/media_types/video/util.py b/mediagoblin/media_types/video/util.py
index 29b7f410..604702d7 100644
--- a/mediagoblin/media_types/video/util.py
+++ b/mediagoblin/media_types/video/util.py
@@ -33,27 +33,33 @@ def skip_transcode(metadata, size):
     medium_config = mgg.global_config['media:medium']
 
     _log.debug('skip_transcode config: {0}'.format(config))
-
-    if config['mime_types'] and metadata.get('mimetype'):
-        if not metadata['mimetype'] in config['mime_types']:
+    tags = metadata.get_tags()
+    if config['mime_types'] and tags.get_string('mimetype'):
+        if not tags.get_string('mimetype') in config['mime_types']:
             return False
 
-    if config['container_formats'] and metadata['tags'].get('container-format'):
-        if not metadata['tags']['container-format'] in config['container_formats']:
+    if config['container_formats'] and tags.get_string('container-format'):
+        if not (metadata.get_tags().get_string('container-format') in
+                config['container_formats']):
             return False
 
-    if config['video_codecs'] and metadata['tags'].get('video-codec'):
-        if not metadata['tags']['video-codec'] in config['video_codecs']:
-            return False
+    if config['video_codecs']:
+        for video_info in metadata.get_video_streams():
+            if not (video_info.get_tags().get_string('video-codec') in
+                    config['video_codecs']):
+                return False
 
-    if config['audio_codecs'] and metadata['tags'].get('audio-codec'):
-        if not metadata['tags']['audio-codec'] in config['audio_codecs']:
-            return False
+    if config['audio_codecs']:
+        for audio_info in metadata.get_audio_streams():
+            if not (audio_info.get_tags().get_string('audio-codec') in
+                    config['audio_codecs']):
+                return False
 
     if config['dimensions_match']:
-        if not metadata['videoheight'] <= size[1]:
-            return False
-        if not metadata['videowidth'] <= size[0]:
-            return False
+        for video_info in metadata.get_video_streams():
+            if not video_info.get_height() <= size[1]:
+                return False
+            if not video_info.get_width() <= size[0]:
+                return False
 
     return True
diff --git a/mediagoblin/plugins/api/views.py b/mediagoblin/plugins/api/views.py
index ef0b87e3..23341065 100644
--- a/mediagoblin/plugins/api/views.py
+++ b/mediagoblin/plugins/api/views.py
@@ -26,8 +26,7 @@ from mediagoblin.tools.translate import pass_to_ugettext as _
 from mediagoblin.tools.response import json_response
 from mediagoblin.decorators import require_active_login
 from mediagoblin.meddleware.csrf import csrf_exempt
-from mediagoblin.media_types import \
-    InvalidFileType, FileTypeNotSupported
+from mediagoblin.media_types import FileTypeNotSupported
 from mediagoblin.plugins.api.tools import api_auth, get_entry_serializable
 from mediagoblin.submit.lib import \
     check_file_field, submit_media, get_upload_file_limits, \
@@ -83,17 +82,8 @@ def post_entry(request):
     except UserPastUploadLimit:
         raise BadRequest(
             _('Sorry, you have reached your upload limit.'))
-
-    except Exception as e:
-        '''
-        This section is intended to catch exceptions raised in
-        mediagoblin.media_types
-        '''
-        if isinstance(e, InvalidFileType) or \
-                isinstance(e, FileTypeNotSupported):
-            raise BadRequest(six.text_type(e))
-        else:
-            raise
+    except FileTypeNotSupported as e:
+        raise BadRequest(e)
 
 
 @api_auth
diff --git a/mediagoblin/processing/__init__.py b/mediagoblin/processing/__init__.py
index 5a88ddea..aa8f1447 100644
--- a/mediagoblin/processing/__init__.py
+++ b/mediagoblin/processing/__init__.py
@@ -309,8 +309,8 @@ def mark_entry_failed(entry_id, exc):
     store extra information that can be useful for users telling them
     why their media failed to process.
 
-    Args:
-     - entry_id: The id of the media entry
+    :param entry_id: The id of the media entry
+    :param exc: An instance of BaseProcessingFail
 
     """
     # Was this a BaseProcessingFail?  In other words, was this a
@@ -378,12 +378,11 @@ def store_public(entry, keyname, local_file, target_name=None,
                   entry.media_files[keyname], target_filepath)
         if delete_if_exists:
             mgg.public_store.delete_file(entry.media_files[keyname])
-
     try:
         mgg.public_store.copy_local_to_storage(local_file, target_filepath)
-    except:
+    except Exception as e:
+        _log.error(u'Exception happened: {0}'.format(e))
         raise PublicStoreFail(keyname=keyname)
-
     # raise an error if the file failed to copy
     if not mgg.public_store.file_exists(target_filepath):
         raise PublicStoreFail(keyname=keyname)
diff --git a/mediagoblin/processing/task.py b/mediagoblin/processing/task.py
index 0c254767..5e0e772d 100644
--- a/mediagoblin/processing/task.py
+++ b/mediagoblin/processing/task.py
@@ -74,8 +74,11 @@ class ProcessMedia(celery.Task):
         Pass the media entry off to the appropriate processing function
         (for now just process_image...)
 
+        :param media_id: MediaEntry().id
         :param feed_url: The feed URL that the PuSH server needs to be
             updated for.
+        :param reprocess_action: What particular action should be run. For
+            example, 'initial'.
         :param reprocess: A dict containing all of the necessary reprocessing
             info for the media_type.
         """
diff --git a/mediagoblin/submit/views.py b/mediagoblin/submit/views.py
index b0588599..ccdd70bc 100644
--- a/mediagoblin/submit/views.py
+++ b/mediagoblin/submit/views.py
@@ -29,8 +29,7 @@ from mediagoblin.tools.response import render_to_response, redirect
 from mediagoblin.decorators import require_active_login, user_has_privilege
 from mediagoblin.submit import forms as submit_forms
 from mediagoblin.messages import add_message, SUCCESS
-from mediagoblin.media_types import \
-    InvalidFileType, FileTypeNotSupported
+from mediagoblin.media_types import FileTypeNotSupported
 from mediagoblin.submit.lib import \
     check_file_field, submit_media, get_upload_file_limits, \
     FileUploadLimit, UserUploadLimit, UserPastUploadLimit
@@ -89,18 +88,10 @@ def submit_start(request):
                     _('Sorry, you have reached your upload limit.'))
                 return redirect(request, "mediagoblin.user_pages.user_home",
                                 user=request.user.username)
-
+            except FileTypeNotSupported as e:
+                submit_form.file.errors.append(e)
             except Exception as e:
-                '''
-                This section is intended to catch exceptions raised in
-                mediagoblin.media_types
-                '''
-                if isinstance(e, InvalidFileType) or \
-                        isinstance(e, FileTypeNotSupported):
-                    submit_form.file.errors.append(
-                        e)
-                else:
-                    raise
+                raise
 
     return render_to_response(
         request,
diff --git a/mediagoblin/tests/media_tools.py b/mediagoblin/tests/media_tools.py
new file mode 100644
index 00000000..8d58c024
--- /dev/null
+++ b/mediagoblin/tests/media_tools.py
@@ -0,0 +1,61 @@
+# GNU MediaGoblin -- federated, autonomous media hosting
+# Copyright (C) 2013 MediaGoblin contributors.  See AUTHORS.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+from contextlib import contextmanager
+import tempfile
+
+import gi
+gi.require_version('Gst', '1.0')
+from gi.repository import Gst
+Gst.init(None)
+
+@contextmanager
+def create_av(make_video=False, make_audio=False):
+    'creates audio/video in `path`, throws AssertionError on any error'
+    media = tempfile.NamedTemporaryFile(suffix='.ogg')
+    pipeline = Gst.Pipeline()
+    mux = Gst.ElementFactory.make('oggmux', 'mux')
+    pipeline.add(mux)
+    if make_video:
+        video_src = Gst.ElementFactory.make('videotestsrc', 'video_src')
+        video_src.set_property('num-buffers', 20)
+        video_enc = Gst.ElementFactory.make('theoraenc', 'video_enc')
+        pipeline.add(video_src)
+        pipeline.add(video_enc)
+        assert video_src.link(video_enc)
+        assert video_enc.link(mux)
+    if make_audio:
+        audio_src = Gst.ElementFactory.make('audiotestsrc', 'audio_src')
+        audio_src.set_property('num-buffers', 20)
+        audio_enc = Gst.ElementFactory.make('vorbisenc', 'audio_enc')
+        pipeline.add(audio_src)
+        pipeline.add(audio_enc)
+        assert audio_src.link(audio_enc)
+        assert audio_enc.link(mux)
+    sink = Gst.ElementFactory.make('filesink', 'sink')
+    sink.set_property('location', media.name)
+    pipeline.add(sink)
+    mux.link(sink)
+    pipeline.set_state(Gst.State.PLAYING)
+    state = pipeline.get_state(Gst.SECOND)
+    assert state[0] == Gst.StateChangeReturn.SUCCESS
+    bus = pipeline.get_bus()
+    message = bus.timed_pop_filtered(
+            Gst.SECOND,  # one second should be more than enough for 50-buf vid
+            Gst.MessageType.ERROR | Gst.MessageType.EOS)
+    assert message.type == Gst.MessageType.EOS
+    pipeline.set_state(Gst.State.NULL)
+    yield media.name
diff --git a/mediagoblin/tests/test_audio.py b/mediagoblin/tests/test_audio.py
new file mode 100644
index 00000000..62d582ff
--- /dev/null
+++ b/mediagoblin/tests/test_audio.py
@@ -0,0 +1,104 @@
+# GNU MediaGoblin -- federated, autonomous media hosting
+# Copyright (C) 2013 MediaGoblin contributors.  See AUTHORS.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+import tempfile
+import shutil
+import os
+import pytest
+from contextlib import contextmanager
+import logging
+import imghdr
+
+#os.environ['GST_DEBUG'] = '4,python:4'
+
+pytest.importorskip("gi.repository.Gst")
+import gi
+gi.require_version('Gst', '1.0')
+from gi.repository import Gst
+Gst.init(None)
+
+from mediagoblin.media_types.audio.transcoders import (AudioTranscoder,
+        AudioThumbnailer)
+from mediagoblin.media_types.tools import discover
+
+
+@contextmanager
+def create_audio():
+    audio = tempfile.NamedTemporaryFile()
+    src = Gst.ElementFactory.make('audiotestsrc', None)
+    src.set_property('num-buffers', 50)
+    enc = Gst.ElementFactory.make('flacenc', None)
+    dst = Gst.ElementFactory.make('filesink', None)
+    dst.set_property('location', audio.name)
+    pipeline = Gst.Pipeline()
+    pipeline.add(src)
+    pipeline.add(enc)
+    pipeline.add(dst)
+    src.link(enc)
+    enc.link(dst)
+    pipeline.set_state(Gst.State.PLAYING)
+    state = pipeline.get_state(3 * Gst.SECOND)
+    assert state[0] == Gst.StateChangeReturn.SUCCESS
+    bus = pipeline.get_bus()
+    bus.timed_pop_filtered(
+            3 * Gst.SECOND,
+            Gst.MessageType.ERROR | Gst.MessageType.EOS)
+    pipeline.set_state(Gst.State.NULL)
+    yield (audio.name)
+
+
+@contextmanager
+def create_data_for_test():
+    with create_audio() as audio_name:
+        second_file = tempfile.NamedTemporaryFile()
+        yield (audio_name, second_file.name)
+
+
+def test_transcoder():
+    '''
+    Tests AudioTransocder's transcode method
+    '''
+    transcoder = AudioTranscoder()
+    with create_data_for_test() as (audio_name, result_name):
+        transcoder.transcode(audio_name, result_name, quality=0.3,
+                             progress_callback=None)
+        info = discover(result_name)
+        assert len(info.get_audio_streams()) == 1
+        transcoder.transcode(audio_name, result_name, quality=0.3,
+                             mux_name='oggmux', progress_callback=None)
+        info = discover(result_name)
+        assert len(info.get_audio_streams()) == 1
+
+
+def test_thumbnails():
+    '''Test thumbnails generation.
+
+    The code below heavily repeats
+    audio.processing.CommonAudioProcessor.create_spectrogram
+    1. Create test audio
+    2. Convert it to OGG source for spectogram using transcoder
+    3. Create spectogram in jpg
+
+    '''
+    thumbnailer = AudioThumbnailer()
+    transcoder = AudioTranscoder()
+    with create_data_for_test() as (audio_name, new_name):
+        transcoder.transcode(audio_name, new_name, mux_name='oggmux')
+        thumbnail = tempfile.NamedTemporaryFile(suffix='.jpg')
+        # fft_size below is copypasted from config_spec.ini
+        thumbnailer.spectrogram(new_name, thumbnail.name, width=100,
+                                fft_size=4096)
+        assert imghdr.what(thumbnail.name) == 'jpeg'
diff --git a/mediagoblin/tests/test_mgoblin_app.ini b/mediagoblin/tests/test_mgoblin_app.ini
index c351d3fc..6ac64321 100644
--- a/mediagoblin/tests/test_mgoblin_app.ini
+++ b/mediagoblin/tests/test_mgoblin_app.ini
@@ -36,4 +36,6 @@ BROKER_URL = "sqlite:///%(here)s/test_user_dev/kombu.db"
 [[mediagoblin.plugins.basic_auth]]
 [[mediagoblin.plugins.openid]]
 [[mediagoblin.media_types.image]]
+[[mediagoblin.media_types.video]]
+[[mediagoblin.media_types.audio]]
 [[mediagoblin.media_types.pdf]]
diff --git a/mediagoblin/tests/test_submission.py b/mediagoblin/tests/test_submission.py
index 03d255fb..65c4b3a3 100644
--- a/mediagoblin/tests/test_submission.py
+++ b/mediagoblin/tests/test_submission.py
@@ -26,7 +26,14 @@ import pytest
 
 import six.moves.urllib.parse as urlparse
 
+# this gst initialization stuff is really required here
+import gi
+gi.require_version('Gst', '1.0')
+from gi.repository import Gst
+Gst.init(None)
+
 from mediagoblin.tests.tools import fixture_add_user
+from .media_tools import create_av
 from mediagoblin import mg_globals
 from mediagoblin.db.models import MediaEntry, User
 from mediagoblin.db.base import Session
@@ -365,6 +372,18 @@ class TestSubmission:
         media = self.check_media(None, {"title": u"With GPS data"}, 1)
         assert media.get_location.position["latitude"] == 59.336666666666666
 
+    def test_audio(self):
+        with create_av(make_audio=True) as path:
+            self.check_normal_upload('Audio', path)
+
+    def test_video(self):
+        with create_av(make_video=True) as path:
+            self.check_normal_upload('Video', path)
+
+    def test_audio_and_video(self):
+        with create_av(make_audio=True, make_video=True) as path:
+            self.check_normal_upload('Audio and Video', path)
+
     def test_processing(self):
         public_store_dir = mg_globals.global_config[
             'storage:publicstore']['base_dir']
diff --git a/mediagoblin/tests/test_video.py b/mediagoblin/tests/test_video.py
new file mode 100644
index 00000000..79244515
--- /dev/null
+++ b/mediagoblin/tests/test_video.py
@@ -0,0 +1,132 @@
+# GNU MediaGoblin -- federated, autonomous media hosting
+# Copyright (C) 2013 MediaGoblin contributors.  See AUTHORS.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+import tempfile
+import os
+from contextlib import contextmanager
+import imghdr
+
+#os.environ['GST_DEBUG'] = '4,python:4'
+import pytest
+pytest.importorskip("gi.repository.Gst")
+
+import gi
+gi.require_version('Gst', '1.0')
+from gi.repository import Gst
+Gst.init(None)
+
+from mediagoblin.media_types.video.transcoders import (capture_thumb,
+        VideoTranscoder)
+from mediagoblin.media_types.tools import discover
+
+@contextmanager
+def create_data(suffix=None, make_audio=False):
+    video = tempfile.NamedTemporaryFile()
+    src = Gst.ElementFactory.make('videotestsrc', None)
+    src.set_property('num-buffers', 10)
+    videorate = Gst.ElementFactory.make('videorate', None)
+    enc = Gst.ElementFactory.make('theoraenc', None)
+    mux = Gst.ElementFactory.make('oggmux', None)
+    dst = Gst.ElementFactory.make('filesink', None)
+    dst.set_property('location', video.name)
+    pipeline = Gst.Pipeline()
+    pipeline.add(src)
+    pipeline.add(videorate)
+    pipeline.add(enc)
+    pipeline.add(mux)
+    pipeline.add(dst)
+    src.link(videorate)
+    videorate.link(enc)
+    enc.link(mux)
+    mux.link(dst)
+    if make_audio:
+        audio_src = Gst.ElementFactory.make('audiotestsrc', None)
+        audio_src.set_property('num-buffers', 10)
+        audiorate = Gst.ElementFactory.make('audiorate', None)
+        audio_enc = Gst.ElementFactory.make('vorbisenc', None)
+        pipeline.add(audio_src)
+        pipeline.add(audio_enc)
+        pipeline.add(audiorate)
+        audio_src.link(audiorate)
+        audiorate.link(audio_enc)
+        audio_enc.link(mux)
+    pipeline.set_state(Gst.State.PLAYING)
+    state = pipeline.get_state(3 * Gst.SECOND)
+    assert state[0] == Gst.StateChangeReturn.SUCCESS
+    bus = pipeline.get_bus()
+    message = bus.timed_pop_filtered(
+            3 * Gst.SECOND,
+            Gst.MessageType.ERROR | Gst.MessageType.EOS)
+    pipeline.set_state(Gst.State.NULL)
+    if suffix:
+        result = tempfile.NamedTemporaryFile(suffix=suffix)
+    else:
+        result = tempfile.NamedTemporaryFile()
+    yield (video.name, result.name)
+
+
+#TODO: this should be skipped if video plugin is not enabled
+def test_thumbnails():
+    '''
+    Test thumbnails generation.
+    1. Create a video (+audio) from gst's videotestsrc
+    2. Capture thumbnail
+    3. Everything should get removed because of temp files usage
+    '''
+    #data  create_data() as (video_name, thumbnail_name):
+    test_formats = [('.png', 'png'), ('.jpg', 'jpeg'), ('.gif', 'gif')]
+    for suffix, format in test_formats:
+        with create_data(suffix) as (video_name, thumbnail_name):
+            capture_thumb(video_name, thumbnail_name, width=40)
+            # check result file format
+            assert imghdr.what(thumbnail_name) == format
+            # TODO: check height and width
+            # FIXME: it doesn't work with small width, say, 10px. This should be
+            # fixed somehow
+    suffix, format = test_formats[0]
+    with create_data(suffix, True) as (video_name, thumbnail_name):
+        capture_thumb(video_name, thumbnail_name, width=40)
+        assert imghdr.what(thumbnail_name) == format
+    with create_data(suffix, True) as (video_name, thumbnail_name):
+        capture_thumb(video_name, thumbnail_name, width=10)  # smaller width
+        assert imghdr.what(thumbnail_name) == format
+    with create_data(suffix, True) as (video_name, thumbnail_name):
+        capture_thumb(video_name, thumbnail_name, width=100)  # bigger width
+        assert imghdr.what(thumbnail_name) == format
+
+
+def test_transcoder():
+    # test without audio
+    with create_data() as (video_name, result_name):
+        transcoder = VideoTranscoder()
+        transcoder.transcode(
+                video_name, result_name,
+                vp8_quality=8,
+                vp8_threads=0,  # autodetect
+                vorbis_quality=0.3,
+                dimensions=(640, 640))
+        assert len(discover(result_name).get_video_streams()) == 1
+    # test with audio
+    with create_data(make_audio=True) as (video_name, result_name):
+        transcoder = VideoTranscoder()
+        transcoder.transcode(
+                video_name, result_name,
+                vp8_quality=8,
+                vp8_threads=0,  # autodetect
+                vorbis_quality=0.3,
+                dimensions=(640, 640))
+        assert len(discover(result_name).get_video_streams()) == 1
+        assert len(discover(result_name).get_audio_streams()) == 1