Date: Thu, 17 Feb 2022 14:38:58 -0300
Subject: [washingtonpost] Fix extractor (#2796)
Closes #2778
Authored by: Bricio
---
yt_dlp/extractor/washingtonpost.py | 21 ++++++++++++++++++---
1 file changed, 18 insertions(+), 3 deletions(-)
diff --git a/yt_dlp/extractor/washingtonpost.py b/yt_dlp/extractor/washingtonpost.py
index 8afb1af83..9d6ae2870 100644
--- a/yt_dlp/extractor/washingtonpost.py
+++ b/yt_dlp/extractor/washingtonpost.py
@@ -5,6 +5,8 @@ import re
from .common import InfoExtractor
+from ..utils import traverse_obj
+
class WashingtonPostIE(InfoExtractor):
IE_NAME = 'washingtonpost'
@@ -50,7 +52,7 @@ class WashingtonPostArticleIE(InfoExtractor):
'title': 'Sinkhole of bureaucracy',
},
'playlist': [{
- 'md5': 'b9be794ceb56c7267d410a13f99d801a',
+ 'md5': '7ccf53ea8cbb77de5f570242b3b21a59',
'info_dict': {
'id': 'fc433c38-b146-11e3-b8b3-44b1d1cd4c1f',
'ext': 'mp4',
@@ -59,9 +61,10 @@ class WashingtonPostArticleIE(InfoExtractor):
'description': 'Overly complicated paper pushing is nothing new to government bureaucracy. But the way federal retirement applications are filed may be the most outdated. David Fahrenthold explains.',
'timestamp': 1395440416,
'upload_date': '20140321',
+ 'thumbnail': r're:https://[^\.]+.cloudfront\.net/PAPERMINESplash\.jpg',
},
}, {
- 'md5': '1fff6a689d8770966df78c8cb6c8c17c',
+ 'md5': '7ccf53ea8cbb77de5f570242b3b21a59',
'info_dict': {
'id': '41255e28-b14a-11e3-b8b3-44b1d1cd4c1f',
'ext': 'mp4',
@@ -70,6 +73,7 @@ class WashingtonPostArticleIE(InfoExtractor):
'duration': 2220,
'timestamp': 1395441819,
'upload_date': '20140321',
+ 'thumbnail': r're:https://[^\.]+.cloudfront\.net/BoyersSplash\.jpeg',
},
}],
}, {
@@ -88,7 +92,11 @@ class WashingtonPostArticleIE(InfoExtractor):
'timestamp': 1419972442,
'title': 'Why black boxes don’t transmit data in real time',
}
- }]
+ }],
+ 'skip': 'Doesnt have a video anymore',
+ }, {
+ 'url': 'https://www.washingtonpost.com/nation/2021/08/05/dixie-river-fire-california-climate/',
+ 'only_matching': True,
}]
@classmethod
@@ -106,6 +114,13 @@ class WashingtonPostArticleIE(InfoExtractor):
]*?data-uuid=|
data-video-uuid=
)"([^"]+)"''', webpage)
+
+ if not uuids:
+ json_data = self._search_nextjs_data(webpage, page_id)
+ for content_element in traverse_obj(json_data, ('props', 'pageProps', 'globalContent', 'content_elements')):
+ if content_element.get('type') == 'video':
+ uuids.append(content_element.get('_id'))
+
entries = [self.url_result('washingtonpost:%s' % uuid, 'WashingtonPost', uuid) for uuid in uuids]
return {
--
cgit v1.2.3
From c571b3a6ab981d7287c2d3575b50b8f63dd830d8 Mon Sep 17 00:00:00 2001
From: MinePlayersPE <20515340+MinePlayersPE@users.noreply.github.com>
Date: Fri, 18 Feb 2022 15:12:20 +0700
Subject: [youtube] Fix n-sig extraction for phone player JS (#2816)
Authored by: MinePlayersPE
---
test/test_youtube_signature.py | 4 ++++
yt_dlp/extractor/youtube.py | 2 +-
2 files changed, 5 insertions(+), 1 deletion(-)
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index cb07d3e23..bbbba073f 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -90,6 +90,10 @@ _NSIG_TESTS = [
'https://www.youtube.com/s/player/e06dea74/player_ias.vflset/en_US/base.js',
'AiuodmaDDYw8d3y4bf', 'ankd8eza2T6Qmw',
),
+ (
+ 'https://www.youtube.com/s/player/5dd88d1d/player-plasma-ias-phone-en_US.vflset/base.js',
+ 'kSxKFLeqzv_ZyHSAt', 'n8gS8oRlHOxPFA',
+ ),
]
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index d5f9b6962..45eb9ec57 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -2413,7 +2413,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _extract_n_function_name(self, jscode):
nfunc, idx = self._search_regex(
- r'\.get\("n"\)\)&&\(b=(?P
[a-zA-Z0-9$]{3})(?:\[(?P\d+)\])?\([a-zA-Z0-9]\)',
+ r'\.get\("n"\)\)&&\(b=(?P[a-zA-Z0-9$]+)(?:\[(?P\d+)\])?\([a-zA-Z0-9]\)',
jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
if not idx:
return nfunc
--
cgit v1.2.3
From 549cb2a8362e1d9b8106da4d3fee4807f0c07a9a Mon Sep 17 00:00:00 2001
From: Bricio <216170+Bricio@users.noreply.github.com>
Date: Fri, 18 Feb 2022 05:15:17 -0300
Subject: [rtvs] Fix extractor (#2795)
Closes #2758
Authored by: Bricio
---
yt_dlp/extractor/rtvs.py | 74 +++++++++++++++++++++++++++++++++++++-----------
1 file changed, 58 insertions(+), 16 deletions(-)
diff --git a/yt_dlp/extractor/rtvs.py b/yt_dlp/extractor/rtvs.py
index 6573b260d..3ea0f1883 100644
--- a/yt_dlp/extractor/rtvs.py
+++ b/yt_dlp/extractor/rtvs.py
@@ -1,11 +1,19 @@
# coding: utf-8
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
+from ..utils import (
+ parse_duration,
+ traverse_obj,
+ unified_timestamp,
+)
+
class RTVSIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?rtvs\.sk/(?:radio|televizia)/archiv/\d+/(?P\d+)'
+ _VALID_URL = r'https?://(?:www\.)?rtvs\.sk/(?:radio|televizia)/archiv(?:/\d+)?/(?P\d+)/?(?:[#?]|$)'
_TESTS = [{
# radio archive
'url': 'http://www.rtvs.sk/radio/archiv/11224/414872',
@@ -13,23 +21,37 @@ class RTVSIE(InfoExtractor):
'info_dict': {
'id': '414872',
'ext': 'mp3',
- 'title': 'Ostrov pokladov 1 časť.mp3'
- },
- 'params': {
- 'skip_download': True,
+ 'title': 'Ostrov pokladov 1 časť.mp3',
+ 'duration': 2854,
+ 'thumbnail': 'https://www.rtvs.sk/media/a501/image/file/2/0000/b1R8.rtvs.jpg',
+ 'display_id': '135331',
}
}, {
# tv archive
'url': 'http://www.rtvs.sk/televizia/archiv/8249/63118',
- 'md5': '85e2c55cf988403b70cac24f5c086dc6',
'info_dict': {
'id': '63118',
'ext': 'mp4',
'title': 'Amaro Džives - Náš deň',
- 'description': 'Galavečer pri príležitosti Medzinárodného dňa Rómov.'
- },
- 'params': {
- 'skip_download': True,
+ 'description': 'Galavečer pri príležitosti Medzinárodného dňa Rómov.',
+ 'thumbnail': 'https://www.rtvs.sk/media/a501/image/file/2/0031/L7Qm.amaro_dzives_png.jpg',
+ 'timestamp': 1428555900,
+ 'upload_date': '20150409',
+ 'duration': 4986,
+ }
+ }, {
+ # tv archive
+ 'url': 'https://www.rtvs.sk/televizia/archiv/18083?utm_source=web&utm_medium=rozcestnik&utm_campaign=Robin',
+ 'info_dict': {
+ 'id': '18083',
+ 'ext': 'mp4',
+ 'title': 'Robin',
+ 'description': 'md5:2f70505a7b8364491003d65ff7a0940a',
+ 'timestamp': 1636652760,
+ 'display_id': '307655',
+ 'duration': 831,
+ 'upload_date': '20211111',
+ 'thumbnail': 'https://www.rtvs.sk/media/a501/image/file/2/0916/robin.jpg',
}
}]
@@ -37,11 +59,31 @@ class RTVSIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
+ iframe_id = self._search_regex(
+ r'