Rewrite thumbnailer

Previous thumbnailer didn't always work properly. It was also not ready to be ported to GStreamer 1.0 The rewrite makes it shorter, more pythonic and prepares it for porting. - no longer uses playbin2; - is tested - logs some events - previous thumbnailer is removed
author: Boris Bobrov <breton@cynicmansion.ru> 2014-05-29 14:50:32 +0400
committer: Boris Bobrov <breton@cynicmansion.ru> 2015-02-15 05:52:22 +0300
commit: 7e266d5a374fe16355375f82be45e2c5707450d1 (patch)
tree: 532d4ecbdbf6747876f6ea1c4b273e2ea2b0fd45 /mediagoblin/media_types
parent: a02de38f9105bd6ffa54133d4b1ccdd33ec34034 (diff)
download: mediagoblin-7e266d5a374fe16355375f82be45e2c5707450d1.tar.lz
mediagoblin-7e266d5a374fe16355375f82be45e2c5707450d1.tar.xz
mediagoblin-7e266d5a374fe16355375f82be45e2c5707450d1.zip
2 files changed, 72 insertions, 330 deletions
diff --git a/mediagoblin/media_types/video/processing.py b/mediagoblin/media_types/video/processing.py
index bbed4f12..ca9a6ad9 100644
--- a/mediagoblin/media_types/video/processing.py
+++ b/mediagoblin/media_types/video/processing.py
@@ -74,6 +74,13 @@ def store_metadata(media_entry, metadata):
     """
     Store metadata from this video for this media entry.
     """
+    stored_metadata = dict()
+    audio_info_list = metadata.get_audio_streams()
+    if audio_info_list:
+        audio_info = audio_info_list[0]
+        stored_metadata['audiochannels'] = audio_info.get_channels()
+    # video is always there
+    video_info = metadata.get_video_streams()[0]
     # Let's pull out the easy, not having to be converted ones first
     stored_metadata = dict(
         [(key, metadata[key])
@@ -270,7 +277,7 @@ class CommonVideoProcessor(MediaProcessor):
             return
 
         # We will only use the width so that the correct scale is kept
-        transcoders.VideoThumbnailerMarkII(
+        transcoders.capture_thumb(
             self.process_filename,
             tmp_thumb,
             thumb_size[0])
diff --git a/mediagoblin/media_types/video/transcoders.py b/mediagoblin/media_types/video/transcoders.py
index 3a3fa97f..e08b897c 100644
--- a/mediagoblin/media_types/video/transcoders.py
+++ b/mediagoblin/media_types/video/transcoders.py
@@ -55,336 +55,73 @@ os.putenv('GST_DEBUG_DUMP_DOT_DIR', '/tmp')
 
 def pixbuf_to_pilbuf(buf):
     data = list()
-    for i in range(0, len(buf), 3):
-        r, g, b = struct.unpack('BBB', buf[i:i + 3])
+    for i in range(0, len(buf)-4, 4):
+        r, g, b, x = struct.unpack('BBBB', buf[i:i + 4])
+        # XXX: can something be done with the 'X' part of RGBX?
         data.append((r, g, b))
-
     return data
 
-
-class VideoThumbnailerMarkII(object):
-    '''
-    Creates a thumbnail from a video file. Rewrite of VideoThumbnailer.
-
-    Large parts of the functionality and overall architectue contained within
-    this object is taken from Participatory Culture Foundation's
-    `gst_extractor.Extractor` object last seen at
-    https://github.com/pculture/miro/blob/master/tv/lib/frontends/widgets/gst/gst_extractor.py
-    in the `miro` codebase.
-
-    The `miro` codebase and the gst_extractor.py are licensed under the GNU
-    General Public License v2 or later.
-    '''
-    STATE_NULL = 0
-    STATE_HALTING = 1
-    STATE_PROCESSING = 2
-    STATE_PROCESSING_THUMBNAIL = 3
-
-    def __init__(self, source_path, dest_path, width=None, height=None,
-            position_callback=None):
-        self.state = self.STATE_NULL
-
-        self.has_reached_playbin_pause = False
-
-        self.thumbnail_pipeline = None
-
-        self.permission_to_take_picture = False
-
-        self.buffer_probes = {}
-
-        self.errors = []
-
-        self.source_path = os.path.abspath(source_path)
-        self.dest_path = os.path.abspath(dest_path)
-
-        self.width = width
-        self.height = height
-        self.position_callback = position_callback \
-                or self.wadsworth_position_callback
-
-        self.mainloop = gobject.MainLoop()
-
-        self.playbin = gst.element_factory_make('playbin')
-
-        self.videosink = gst.element_factory_make('fakesink', 'videosink')
-        self.audiosink = gst.element_factory_make('fakesink', 'audiosink')
-
-        self.playbin.set_property('video-sink', self.videosink)
-        self.playbin.set_property('audio-sink', self.audiosink)
-
-        self.playbin_message_bus = self.playbin.get_bus()
-
-        self.playbin_message_bus.add_signal_watch()
-        self.playbin_bus_watch_id = self.playbin_message_bus.connect(
-                'message',
-                self.on_playbin_message)
-
-        self.playbin.set_property(
-                'uri',
-                'file:{0}'.format(
-                    urllib.pathname2url(self.source_path)))
-
-        self.playbin.set_state(gst.STATE_PAUSED)
-
-        try:
-            self.run()
-        except Exception as exc:
-            _log.critical(
-                'Exception "{0}" caught, shutting down mainloop and re-raising'\
-                    .format(exc))
-            self.disconnect()
-            raise
-
-    def wadsworth_position_callback(self, duration, gst):
-        return self.duration / 100 * 30
-
-    def run(self):
-        self.mainloop.run()
-
-    def on_playbin_message(self, message_bus, message):
-        # Silenced to prevent clobbering of output
-        #_log.debug('playbin message: {0}'.format(message))
-
-        if message.type == gst.MESSAGE_ERROR:
-            _log.error('playbin error: {0}'.format(message))
-            gobject.idle_add(self.on_playbin_error)
-
-        if message.type == gst.MESSAGE_STATE_CHANGED:
-            prev_state, cur_state, pending_state = \
-                    message.parse_state_changed()
-
-            _log.debug('playbin state changed: \nprev: {0}\ncur: {1}\n \
-pending: {2}'.format(
-    prev_state,
-    cur_state,
-    pending_state))
-
-            if cur_state == gst.STATE_PAUSED:
-                if message.src == self.playbin:
-                    _log.info('playbin ready')
-                    gobject.idle_add(self.on_playbin_paused)
-
-    def on_playbin_paused(self):
-        if self.has_reached_playbin_pause:
-            _log.warn('Has already reached on_playbin_paused. Aborting \
-without doing anything this time.')
-            return False
-
-        self.has_reached_playbin_pause = True
-
-        # XXX: Why is this even needed at this point?
-        current_video = self.playbin.get_property('current-video')
-
-        if not current_video:
-            _log.critical('Could not get any video data \
-from playbin')
-        else:
-            _log.info('Got video data from playbin')
-
-        self.duration = self.get_duration(self.playbin)
-        self.permission_to_take_picture = True
-        self.buffer_probes = {}
-
-        pipeline = ''.join([
-            'filesrc location="%s" ! decodebin2 ! ' % self.source_path,
-            'ffmpegcolorspace ! videoscale ! ',
-            'video/x-raw-rgb,depth=24,bpp=24,pixel-aspect-ratio=1/1',
-            ',width={0}'.format(self.width) if self.width else '',
-            ',height={0}'.format(self.height) if self.height else '',
-            ' ! ',
-            'fakesink signal-handoffs=True'])
-
-        _log.debug('thumbnail_pipeline: {0}'.format(pipeline))
-
-        self.thumbnail_pipeline = gst.parse_launch(pipeline)
-        self.thumbnail_message_bus = self.thumbnail_pipeline.get_bus()
-        self.thumbnail_message_bus.add_signal_watch()
-        self.thumbnail_bus_watch_id = self.thumbnail_message_bus.connect(
-                'message',
-                self.on_thumbnail_message)
-
-        self.thumbnail_pipeline.set_state(gst.STATE_PAUSED)
-
-        gobject.timeout_add(3000, self.on_gobject_timeout)
-
-        return False
-
-    def on_thumbnail_message(self, message_bus, message):
-        # This is silenced to prevent clobbering of the terminal window
-        #_log.debug('thumbnail message: {0}'.format(message))
-
-        if message.type == gst.MESSAGE_ERROR:
-            _log.error('thumbnail error: {0}'.format(message.parse_error()))
-            gobject.idle_add(self.on_thumbnail_error, message)
-
-        if message.type == gst.MESSAGE_STATE_CHANGED:
-            prev_state, cur_state, pending_state = \
-                    message.parse_state_changed()
-
-            _log.debug('thumbnail state changed: \nprev: {0}\ncur: {1}\n \
-pending: {2}'.format(
-    prev_state,
-    cur_state,
-    pending_state))
-
-            if cur_state == gst.STATE_PAUSED and \
-               not self.state == self.STATE_PROCESSING_THUMBNAIL:
-                # Find the fakesink sink pad and attach the on_buffer_probe
-                # handler to it.
-                seek_amount = self.position_callback(self.duration, gst)
-
-                seek_result = self.thumbnail_pipeline.seek(
-                        1.0,
-                        gst.FORMAT_TIME,
-                        gst.SEEK_FLAG_FLUSH | gst.SEEK_FLAG_ACCURATE,
-                        gst.SEEK_TYPE_SET,
-                        seek_amount,
-                        gst.SEEK_TYPE_NONE,
-                        0)
-
-                if not seek_result:
-                    _log.info('Could not seek.')
-                else:
-                    _log.info('Seek successful, attaching buffer probe')
-                    self.state = self.STATE_PROCESSING_THUMBNAIL
-                    for sink in self.thumbnail_pipeline.sinks():
-                        sink_name = sink.get_name()
-                        sink_factory_name = sink.get_factory().get_name()
-
-                        if sink_factory_name == 'fakesink':
-                            sink_pad = sink.get_pad('sink')
-
-                            self.buffer_probes[sink_name] = sink_pad\
-                                    .add_buffer_probe(
-                                            self.on_pad_buffer_probe,
-                                            sink_name)
-
-                            _log.info('Attached buffer probes: {0}'.format(
-                                self.buffer_probes))
-
-                            break
-
-
-            elif self.state == self.STATE_PROCESSING_THUMBNAIL:
-                _log.info('Already processing thumbnail')
-
-    def on_pad_buffer_probe(self, *args):
-        _log.debug('buffer probe handler: {0}'.format(args))
-        gobject.idle_add(lambda: self.take_snapshot(*args))
-
-    def take_snapshot(self, pad, buff, name):
-        if self.state == self.STATE_HALTING:
-            _log.debug('Pipeline is halting, will not take snapshot')
-            return False
-
-        _log.info('Taking snapshot! ({0})'.format(
-            (pad, buff, name)))
-        try:
-            caps = buff.caps
-            if caps is None:
-                _log.error('No buffer caps present /take_snapshot')
-                self.disconnect()
-
-            _log.debug('caps: {0}'.format(caps))
-
-            filters = caps[0]
-            width = filters['width']
-            height = filters['height']
-
-            im = Image.new('RGB', (width, height))
-
-            data = pixbuf_to_pilbuf(buff.data)
-
-            im.putdata(data)
-
-            im.save(self.dest_path)
-
-            _log.info('Saved snapshot!')
-
-            self.disconnect()
-
-        except gst.QueryError as exc:
-            _log.error('take_snapshot - QueryError: {0}'.format(exc))
-
-        return False
-
-    def on_thumbnail_error(self, message):
-        scaling_failed = False
-
-        if 'Error calculating the output scaled size - integer overflow' \
-           in message.parse_error()[1]:
-            # GStreamer videoscale sometimes fails to calculate the dimensions
-            # given only one of the destination dimensions and the source
-            # dimensions. This is a workaround in case videoscale returns an
-            # error that indicates this has happened.
-            scaling_failed = True
-            _log.error('Thumbnailing failed because of videoscale integer'
-                       ' overflow. Will retry with fallback.')
-        else:
-            _log.error('Thumbnailing failed: {0}'.format(message.parse_error()))
-
-        # Kill the current mainloop
-        self.disconnect()
-
-        if scaling_failed:
-            # Manually scale the destination dimensions
-            _log.info('Retrying with manually set sizes...')
-
-            info = VideoTranscoder().discover(self.source_path)
-
-            h = info['videoheight']
-            w = info['videowidth']
-            ratio = 180 / int(w)
-            h = int(h * ratio)
-
-            self.__init__(self.source_path, self.dest_path, 180, h)
-
-    def disconnect(self):
-        self.state = self.STATE_HALTING
-
-        if self.playbin is not None:
-            self.playbin.set_state(gst.STATE_NULL)
-
-            for sink in self.playbin.sinks():
-                sink_name = sink.get_name()
-                sink_factory_name = sink.get_factory().get_name()
-
-                if sink_factory_name == 'fakesink':
-                    sink_pad = sink.get_pad('sink')
-                    sink_pad.remove_buffer_probe(self.buffer_probes[sink_name])
-                    del self.buffer_probes[sink_name]
-
-            self.playbin = None
-
-        if self.thumbnail_pipeline is not None:
-            self.thumbnail_pipeline.set_state(gst.STATE_NULL)
-            self.thumbnail_pipeline = None
-
-        if self.playbin_message_bus is not None:
-            self.playbin_message_bus.disconnect(self.playbin_bus_watch_id)
-            self.playbin_message_bus = None
-
-        self.halt()
-
-    def halt(self):
-        gobject.idle_add(self.mainloop.quit)
-
-    def on_gobject_timeout(self):
-        _log.critical('Reached gobject timeout')
-        self.disconnect()
-
-    def get_duration(self, pipeline, attempt=1):
-        if attempt == 5:
-            _log.critical('Pipeline duration query retry limit reached.')
-            return 0
-
-        try:
-            return pipeline.query_duration(gst.FORMAT_TIME)[0]
-        except gst.QueryError as exc:
-            _log.error('Could not get duration on attempt {0}: {1}'.format(
-                attempt,
-                exc))
-            return self.get_duration(pipeline, attempt + 1)
+def capture_thumb(video_path, dest_path, width=None, height=None, percent=0.5):
+    def pad_added(element, pad, connect_to):
+        caps = pad.get_caps()
+        name = caps[0].get_name()
+        _log.debug('on_pad_added: {0}'.format(name))
+        if name.startswith('video') and not connect_to.is_linked():
+            pad.link(connect_to)
+    # construct pipeline: uridecodebin ! ffmpegcolorspace ! videoscale ! \
+    # ! CAPS ! appsink
+    pipeline = gst.Pipeline()
+    uridecodebin = gst.element_factory_make('uridecodebin')
+    uridecodebin.set_property('uri', 'file://{0}'.format(video_path))
+    ffmpegcolorspace = gst.element_factory_make('ffmpegcolorspace')
+    uridecodebin.connect('pad-added', pad_added,
+                         ffmpegcolorspace.get_pad('sink'))
+    videoscale = gst.element_factory_make('videoscale')
+    filter = gst.element_factory_make('capsfilter', 'filter')
+    # create caps for video scaling
+    caps_struct = gst.Structure('video/x-raw-rgb')
+    caps_struct.set_value('pixel-aspect-ratio', gst.Fraction(1, 1))
+    if height:
+        caps_struct.set_value('height', height)
+    if width:
+        caps_struct.set_value('width', width)
+    caps = gst.Caps(caps_struct)
+    filter.set_property('caps', caps)
+    appsink = gst.element_factory_make('appsink')
+    pipeline.add(uridecodebin, ffmpegcolorspace, videoscale, filter, appsink)
+    gst.element_link_many(ffmpegcolorspace, videoscale, filter, appsink)
+    # pipeline constructed, starting playing, but first some preparations
+    if pipeline.set_state(gst.STATE_PAUSED) == gst.STATE_CHANGE_FAILURE:
+        _log.warning('state change failed')
+    pipeline.get_state()
+    duration = pipeline.query_duration(gst.FORMAT_TIME, None)[0]
+    if duration == gst.CLOCK_TIME_NONE:
+        _log.warning('query_duration failed')
+        duration = 0  # XXX
+    seek_to = int(duration * int(percent * 100) / 100)
+    _log.debug('Seeking to {0} of {1}'.format(
+            seek_to / gst.SECOND, duration / gst.SECOND))
+    seek = pipeline.seek_simple(gst.FORMAT_TIME, gst.SEEK_FLAG_FLUSH, seek_to)
+    if not seek:
+        _log.warning('seek failed')
+    # get sample, retrieve it's format and save
+    sample = appsink.emit("pull-preroll")
+    if not sample:
+        _log.warning('could not get sample')
+        return
+    caps = sample.get_caps()
+    if not caps:
+        _log.warning('could not get snapshot format')
+    structure = caps.get_structure(0)
+    (success, width) = structure.get_int('width')
+    (success, height) = structure.get_int('height')
+    buffer = sample.get_buffer()
+    im = Image.frombytes('RGB', (width, height),
+                         buffer.extract_dup(0, buffer.get_size()))
+    im.save(dest_path)
+    _log.info('thumbnail saved to {0}'.format(dest_path))
+    # cleanup
+    pipeline.set_state(gst.STATE_NULL)
 
 
 class VideoTranscoder(object):
@@ -451,7 +188,6 @@ class VideoTranscoder(object):
         self.discoverer.discover()
 
         self.loop.run()
-
         if hasattr(self, '_discovered_data'):
             return self._discovered_data.__dict__
         else:
@@ -729,7 +465,6 @@ class VideoTranscoder(object):
 
 if __name__ == '__main__':
     os.nice(19)
-    logging.basicConfig()
     from optparse import OptionParser
 
     parser = OptionParser(
author	Boris Bobrov <breton@cynicmansion.ru>	2014-05-29 14:50:32 +0400
committer	Boris Bobrov <breton@cynicmansion.ru>	2015-02-15 05:52:22 +0300
commit	7e266d5a374fe16355375f82be45e2c5707450d1 (patch)
tree	532d4ecbdbf6747876f6ea1c4b273e2ea2b0fd45 /mediagoblin/media_types
parent	a02de38f9105bd6ffa54133d4b1ccdd33ec34034 (diff)
download	mediagoblin-7e266d5a374fe16355375f82be45e2c5707450d1.tar.lz mediagoblin-7e266d5a374fe16355375f82be45e2c5707450d1.tar.xz mediagoblin-7e266d5a374fe16355375f82be45e2c5707450d1.zip