From b5d0d817bc8a23ef6dc2a00d1af6fad893143206 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister <phihag@phihag.de>
Date: Wed, 30 Oct 2013 01:09:44 +0100
Subject: Remove superfluous space

---
 youtube_dl/extractor/common.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'youtube_dl/extractor/common.py')
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index ce349fe20..cef4dce85 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -63,7 +63,7 @@ class InfoExtractor(object):
                     * ext       Will be calculated from url if missing
                     * format    A human-readable description of the format
                                 ("mp4 container with h264/opus").
-                                Calculated from the format_id, width, height 
+                                Calculated from the format_id, width, height.
                                 and format_note fields if missing.
                     * format_id A short description of the format
                                 ("mp4_h264_opus" or "19")
-- 
cgit v1.2.3


From 9103bbc5cd11957de2e906e4401dcf4df9511d28 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
 <jaime.marquinez.ferrandiz@gmail.com>
Date: Sun, 3 Nov 2013 12:11:13 +0100
Subject: Add the 'webpage_url' field to info_dict

The url for the video page, it must allow to reproduce the result.
It's automatically set by YoutubeDL if it's missing.
---
 youtube_dl/extractor/common.py | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index cef4dce85..e0ccba533 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -71,6 +71,9 @@ class InfoExtractor(object):
                                 ("3D" or "DASH video")
                     * width     Width of the video, if known
                     * height    Height of the video, if known
+    webpage_url:    The url to the video webpage, if given to youtube-dl it
+                    should allow to get the same result again. (It will be set
+                    by YoutubeDL if it's missing)
 
     Unless mentioned otherwise, the fields should be Unicode strings.
 
-- 
cgit v1.2.3


From a8eeb0597b11dbc9d1b48f95264cc2815311aa15 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marcin=20Cie=C5=9Blak?= <saper@saper.info>
Date: Tue, 5 Nov 2013 23:19:29 +0100
Subject: Fix AssertionError when og property not found

On tvp.pl some webpages contain OpenGraph
metadata and some don't.

If og property is not found, _og_search_description
fails with

WARNING: unable to extract OpenGraph description; please report this issue on http://yt-dl.org/bug
Traceback (most recent call last):
  File "/usr/home/saper/bin/youtube-dl", line 18, in <module>
    youtube_dl.main()
  File "/usr/home/saper/sw/youtube-dl/youtube_dl/__init__.py", line 766, in main
    _real_main(argv)
  File "/usr/home/saper/sw/youtube-dl/youtube_dl/__init__.py", line 719, in _real_main
    retcode = ydl.download(all_urls)
  File "/usr/home/saper/sw/youtube-dl/youtube_dl/YoutubeDL.py", line 715, in download
    videos = self.extract_info(url)
  File "/usr/home/saper/sw/youtube-dl/youtube_dl/YoutubeDL.py", line 348, in extract_info
    ie_result = ie.extract(url)
  File "/usr/home/saper/sw/youtube-dl/youtube_dl/extractor/common.py", line 125, in extract
    return self._real_extract(url)
  File "/usr/home/saper/sw/youtube-dl/youtube_dl/extractor/tvp.py", line 56, in _real_extract
    info['description'] = self._og_search_description(webpage)
  File "/usr/home/saper/sw/youtube-dl/youtube_dl/extractor/common.py", line 331, in _og_search_description
    return self._og_search_property('description', html, fatal=False, **kargs)
  File "/usr/home/saper/sw/youtube-dl/youtube_dl/extractor/common.py", line 325, in _og_search_property
    return unescapeHTML(escaped)
  File "/usr/home/saper/sw/youtube-dl/youtube_dl/utils.py", line 494, in unescapeHTML
    assert type(s) == type(u'')
AssertionError

The patch allows me to use:

  try:
    info['description'] = self._og_search_description(webpage)
    info['thumbnail'] = self._og_search_thumbnail(webpage)
  except RegexNotFoundError:
    pass
---
 youtube_dl/extractor/common.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index e0ccba533..fb2d50a09 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -322,7 +322,9 @@ class InfoExtractor(object):
         if name is None:
             name = 'OpenGraph %s' % prop
         escaped = self._search_regex(self._og_regex(prop), html, name, flags=re.DOTALL, **kargs)
-        return unescapeHTML(escaped)
+        if not escaped is None:
+            return unescapeHTML(escaped)
+        return None
 
     def _og_search_thumbnail(self, html, **kargs):
         return self._og_search_property('image', html, u'thumbnail url', fatal=False, **kargs)
-- 
cgit v1.2.3


From eb0a83986642cf660820b168bd83c8770e3e5ce6 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister <phihag@phihag.de>
Date: Tue, 12 Nov 2013 10:36:23 +0100
Subject: [common] Simplify og_search_property

---
 youtube_dl/extractor/common.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index fb2d50a09..9c20d30b4 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -322,9 +322,9 @@ class InfoExtractor(object):
         if name is None:
             name = 'OpenGraph %s' % prop
         escaped = self._search_regex(self._og_regex(prop), html, name, flags=re.DOTALL, **kargs)
-        if not escaped is None:
-            return unescapeHTML(escaped)
-        return None
+        if escaped is None:
+            return None
+        return unescapeHTML(escaped)
 
     def _og_search_thumbnail(self, html, **kargs):
         return self._og_search_property('image', html, u'thumbnail url', fatal=False, **kargs)
-- 
cgit v1.2.3


From ab2d524780736249c8988313db021e83642c24d1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
 <jaime.marquinez.ferrandiz@gmail.com>
Date: Fri, 15 Nov 2013 12:24:54 +0100
Subject: Improve the OpenGraph regex

* Do not accept '>' between the property and content attributes.
* Recognize the properties if the content attribute is before the property attribute using two regexes (fixes the extraction of the description for SlideshareIE).
---
 youtube_dl/extractor/common.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 9c20d30b4..e02176852 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -315,13 +315,17 @@ class InfoExtractor(object):
 
     # Helper functions for extracting OpenGraph info
     @staticmethod
-    def _og_regex(prop):
-        return r'<meta.+?property=[\'"]og:%s[\'"].+?content=(?:"(.+?)"|\'(.+?)\')' % re.escape(prop)
+    def _og_regexes(prop):
+        esc_prop = re.escape(prop)
+        return [
+            r'<meta[^>]+?property=[\'"]og:%s[\'"][^>]+?content=(?:"(.+?)"|\'(.+?)\')' % esc_prop,
+            r'<meta[^>]+?content=(?:"(.+?)"|\'(.+?)\')[^>]+?property=[\'"]og:%s[\'"]' % esc_prop,
+        ]
 
     def _og_search_property(self, prop, html, name=None, **kargs):
         if name is None:
             name = 'OpenGraph %s' % prop
-        escaped = self._search_regex(self._og_regex(prop), html, name, flags=re.DOTALL, **kargs)
+        escaped = self._search_regex(self._og_regexes(prop), html, name, flags=re.DOTALL, **kargs)
         if escaped is None:
             return None
         return unescapeHTML(escaped)
@@ -336,8 +340,8 @@ class InfoExtractor(object):
         return self._og_search_property('title', html, **kargs)
 
     def _og_search_video_url(self, html, name='video url', secure=True, **kargs):
-        regexes = [self._og_regex('video')]
-        if secure: regexes.insert(0, self._og_regex('video:secure_url'))
+        regexes = self._og_regexes('video')
+        if secure: regexes = self._og_regexes('video:secure_url') + regexes
         return self._html_search_regex(regexes, html, name, **kargs)
 
     def _rta_search(self, html):
-- 
cgit v1.2.3


From 78fb87b2837e15124b5855734a951598dfe025fe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
 <jaime.marquinez.ferrandiz@gmail.com>
Date: Fri, 15 Nov 2013 12:54:13 +0100
Subject: Don't accept '>' inside the content attribute in OpenGraph regexes

---
 youtube_dl/extractor/common.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index e02176852..45dd01789 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -316,10 +316,12 @@ class InfoExtractor(object):
     # Helper functions for extracting OpenGraph info
     @staticmethod
     def _og_regexes(prop):
-        esc_prop = re.escape(prop)
+        content_re = r'content=(?:"([^>]+?)"|\'(.+?)\')'
+        property_re = r'property=[\'"]og:%s[\'"]' % re.escape(prop)
+        template = r'<meta[^>]+?%s[^>]+?%s'
         return [
-            r'<meta[^>]+?property=[\'"]og:%s[\'"][^>]+?content=(?:"(.+?)"|\'(.+?)\')' % esc_prop,
-            r'<meta[^>]+?content=(?:"(.+?)"|\'(.+?)\')[^>]+?property=[\'"]og:%s[\'"]' % esc_prop,
+            template % (property_re, content_re),
+            template % (content_re, property_re),
         ]
 
     def _og_search_property(self, prop, html, name=None, **kargs):
-- 
cgit v1.2.3


From 91c7271aabdd74c833ef570db59018e2d9f9d803 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister <phihag@phihag.de>
Date: Sat, 16 Nov 2013 01:08:43 +0100
Subject: Add automatic generation of format note based on bitrate and codecs

---
 youtube_dl/extractor/common.py | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 45dd01789..f787d0a3c 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -71,6 +71,10 @@ class InfoExtractor(object):
                                 ("3D" or "DASH video")
                     * width     Width of the video, if known
                     * height    Height of the video, if known
+                    * abr       Average audio bitrate in KBit/s
+                    * acodec    Name of the audio codec in use
+                    * vbr       Average video bitrate in KBit/s
+                    * vcodec    Name of the video codec in use
     webpage_url:    The url to the video webpage, if given to youtube-dl it
                     should allow to get the same result again. (It will be set
                     by YoutubeDL if it's missing)
-- 
cgit v1.2.3