Merge pull request #140 from WolfganP/patch-1

ITV BTCC new pages' URL update (articles instead of races)
author: Tom-Oliver Heidel <github@tom-oliver.eu> 2020-11-30 02:48:02 +0100
committer: GitHub <noreply@github.com> 2020-11-30 02:48:02 +0100
commit: e29288d667a4ede4339f00313f7d2b7f00fd1281 (patch)
tree: 54e8cff9ec0c6a6908ddd2fe24d77dc88aab3054
parent: 94c29091d049f48962c3e81a1b5b0237ab54827d (diff)
parent: 85da4055c06ee5a2cf3462b2aa8404bcf7197955 (diff)
download: hypervideo-pre-e29288d667a4ede4339f00313f7d2b7f00fd1281.tar.lz
hypervideo-pre-e29288d667a4ede4339f00313f7d2b7f00fd1281.tar.xz
hypervideo-pre-e29288d667a4ede4339f00313f7d2b7f00fd1281.zip
1 files changed, 16 insertions, 5 deletions
diff --git a/youtube_dlc/extractor/itv.py b/youtube_dlc/extractor/itv.py
index ad2f4eca5..20144cd82 100644
--- a/youtube_dlc/extractor/itv.py
+++ b/youtube_dlc/extractor/itv.py
@@ -20,6 +20,7 @@ from ..utils import (
     merge_dicts,
     parse_duration,
     smuggle_url,
+    try_get,
     url_or_none,
     xpath_with_ns,
     xpath_element,
@@ -280,12 +281,12 @@ class ITVIE(InfoExtractor):
 class ITVBTCCIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?itv\.com/btcc/(?:[^/]+/)*(?P<id>[^/?#&]+)'
     _TEST = {
-        'url': 'http://www.itv.com/btcc/races/btcc-2018-all-the-action-from-brands-hatch',
+        'url': 'https://www.itv.com/btcc/articles/btcc-2019-brands-hatch-gp-race-action',
         'info_dict': {
-            'id': 'btcc-2018-all-the-action-from-brands-hatch',
-            'title': 'BTCC 2018: All the action from Brands Hatch',
+            'id': 'btcc-2019-brands-hatch-gp-race-action',
+            'title': 'BTCC 2019: Brands Hatch GP race action',
         },
-        'playlist_mincount': 9,
+        'playlist_count': 12,
     }
     BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1582188683001/HkiHLnNRx_default/index.html?videoId=%s'
 
@@ -294,6 +295,16 @@ class ITVBTCCIE(InfoExtractor):
 
         webpage = self._download_webpage(url, playlist_id)
 
+        json_map = try_get(self._parse_json(self._html_search_regex(
+            '(?s)<script[^>]+id=[\'"]__NEXT_DATA__[^>]*>([^<]+)</script>', webpage, 'json_map'), playlist_id),
+            lambda x: x['props']['pageProps']['article']['body']['content']) or []
+
+        # Discard empty objects
+        video_ids = []
+        for video in json_map:
+            if video['data'].get('id'):
+                video_ids.append(video['data']['id'])
+
         entries = [
             self.url_result(
                 smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {
@@ -305,7 +316,7 @@ class ITVBTCCIE(InfoExtractor):
                     'referrer': url,
                 }),
                 ie=BrightcoveNewIE.ie_key(), video_id=video_id)
-            for video_id in re.findall(r'data-video-id=["\'](\d+)', webpage)]
+            for video_id in video_ids]
 
         title = self._og_search_title(webpage, fatal=False)
author	Tom-Oliver Heidel <github@tom-oliver.eu>	2020-11-30 02:48:02 +0100
committer	GitHub <noreply@github.com>	2020-11-30 02:48:02 +0100
commit	e29288d667a4ede4339f00313f7d2b7f00fd1281 (patch)
tree	54e8cff9ec0c6a6908ddd2fe24d77dc88aab3054
parent	94c29091d049f48962c3e81a1b5b0237ab54827d (diff)
parent	85da4055c06ee5a2cf3462b2aa8404bcf7197955 (diff)
download	hypervideo-pre-e29288d667a4ede4339f00313f7d2b7f00fd1281.tar.lz hypervideo-pre-e29288d667a4ede4339f00313f7d2b7f00fd1281.tar.xz hypervideo-pre-e29288d667a4ede4339f00313f7d2b7f00fd1281.zip