diff options
author | Bricio <216170+Bricio@users.noreply.github.com> | 2022-02-17 14:38:58 -0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-02-17 09:38:58 -0800 |
commit | 5b804e39066e01c8cb421957bad1ddbc8daa9831 (patch) | |
tree | a730d164dd755472853adf96831f2a082f730cca | |
parent | 6bb608d055e3dd3d73dcc010f945158153274238 (diff) | |
download | hypervideo-pre-5b804e39066e01c8cb421957bad1ddbc8daa9831.tar.lz hypervideo-pre-5b804e39066e01c8cb421957bad1ddbc8daa9831.tar.xz hypervideo-pre-5b804e39066e01c8cb421957bad1ddbc8daa9831.zip |
[washingtonpost] Fix extractor (#2796)
Closes #2778
Authored by: Bricio
-rw-r--r-- | yt_dlp/extractor/washingtonpost.py | 21 |
1 files changed, 18 insertions, 3 deletions
diff --git a/yt_dlp/extractor/washingtonpost.py b/yt_dlp/extractor/washingtonpost.py index 8afb1af83..9d6ae2870 100644 --- a/yt_dlp/extractor/washingtonpost.py +++ b/yt_dlp/extractor/washingtonpost.py @@ -5,6 +5,8 @@ import re from .common import InfoExtractor +from ..utils import traverse_obj + class WashingtonPostIE(InfoExtractor): IE_NAME = 'washingtonpost' @@ -50,7 +52,7 @@ class WashingtonPostArticleIE(InfoExtractor): 'title': 'Sinkhole of bureaucracy', }, 'playlist': [{ - 'md5': 'b9be794ceb56c7267d410a13f99d801a', + 'md5': '7ccf53ea8cbb77de5f570242b3b21a59', 'info_dict': { 'id': 'fc433c38-b146-11e3-b8b3-44b1d1cd4c1f', 'ext': 'mp4', @@ -59,9 +61,10 @@ class WashingtonPostArticleIE(InfoExtractor): 'description': 'Overly complicated paper pushing is nothing new to government bureaucracy. But the way federal retirement applications are filed may be the most outdated. David Fahrenthold explains.', 'timestamp': 1395440416, 'upload_date': '20140321', + 'thumbnail': r're:https://[^\.]+.cloudfront\.net/PAPERMINESplash\.jpg', }, }, { - 'md5': '1fff6a689d8770966df78c8cb6c8c17c', + 'md5': '7ccf53ea8cbb77de5f570242b3b21a59', 'info_dict': { 'id': '41255e28-b14a-11e3-b8b3-44b1d1cd4c1f', 'ext': 'mp4', @@ -70,6 +73,7 @@ class WashingtonPostArticleIE(InfoExtractor): 'duration': 2220, 'timestamp': 1395441819, 'upload_date': '20140321', + 'thumbnail': r're:https://[^\.]+.cloudfront\.net/BoyersSplash\.jpeg', }, }], }, { @@ -88,7 +92,11 @@ class WashingtonPostArticleIE(InfoExtractor): 'timestamp': 1419972442, 'title': 'Why black boxes don’t transmit data in real time', } - }] + }], + 'skip': 'Doesnt have a video anymore', + }, { + 'url': 'https://www.washingtonpost.com/nation/2021/08/05/dixie-river-fire-california-climate/', + 'only_matching': True, }] @classmethod @@ -106,6 +114,13 @@ class WashingtonPostArticleIE(InfoExtractor): <div\s+class="posttv-video-embed[^>]*?data-uuid=| data-video-uuid= )"([^"]+)"''', webpage) + + if not uuids: + json_data = self._search_nextjs_data(webpage, page_id) + for content_element in traverse_obj(json_data, ('props', 'pageProps', 'globalContent', 'content_elements')): + if content_element.get('type') == 'video': + uuids.append(content_element.get('_id')) + entries = [self.url_result('washingtonpost:%s' % uuid, 'WashingtonPost', uuid) for uuid in uuids] return { |