aboutsummaryrefslogtreecommitdiffstats
path: root/yt_dlp/extractor/cbc.py
diff options
context:
space:
mode:
authortrainman261 <trainman261@users.noreply.github.com>2023-08-20 18:35:57 +0200
committerGitHub <noreply@github.com>2023-08-20 16:35:57 +0000
commited711897814f3ee0b1822e4205e74133467e8f1c (patch)
tree8a57473c80e767e5d967412719bd49ab4c9b4376 /yt_dlp/extractor/cbc.py
parenta0de8bb8601146b8f87bf7cd562eef8bfb4690be (diff)
downloadhypervideo-pre-ed711897814f3ee0b1822e4205e74133467e8f1c.tar.lz
hypervideo-pre-ed711897814f3ee0b1822e4205e74133467e8f1c.tar.xz
hypervideo-pre-ed711897814f3ee0b1822e4205e74133467e8f1c.zip
[ie/CBCPlayerPlaylist] Add extractor (#7870)
Authored by: trainman261
Diffstat (limited to 'yt_dlp/extractor/cbc.py')
-rw-r--r--yt_dlp/extractor/cbc.py33
1 files changed, 33 insertions, 0 deletions
diff --git a/yt_dlp/extractor/cbc.py b/yt_dlp/extractor/cbc.py
index 9413281a5..b3c5471f7 100644
--- a/yt_dlp/extractor/cbc.py
+++ b/yt_dlp/extractor/cbc.py
@@ -2,6 +2,7 @@ import re
import json
import base64
import time
+import urllib.parse
from .common import InfoExtractor
from ..compat import (
@@ -228,6 +229,38 @@ class CBCPlayerIE(InfoExtractor):
}
+class CBCPlayerPlaylistIE(InfoExtractor):
+ IE_NAME = 'cbc.ca:player:playlist'
+ _VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?:player/)(?!play/)(?P<id>[^?#]+)'
+ _TESTS = [{
+ 'url': 'https://www.cbc.ca/player/news/TV%20Shows/The%20National/Latest%20Broadcast',
+ 'playlist_mincount': 25,
+ 'info_dict': {
+ 'id': 'news/tv shows/the national/latest broadcast',
+ }
+ }, {
+ 'url': 'https://www.cbc.ca/player/news/Canada/North',
+ 'playlist_mincount': 25,
+ 'info_dict': {
+ 'id': 'news/canada/north',
+ }
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = urllib.parse.unquote(self._match_id(url)).lower()
+ webpage = self._download_webpage(url, playlist_id)
+ json_content = self._search_json(
+ r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', playlist_id)
+
+ def entries():
+ for video_id in traverse_obj(json_content, (
+ 'video', 'clipsByCategory', lambda k, _: k.lower() == playlist_id, 'items', ..., 'id'
+ )):
+ yield self.url_result(f'https://www.cbc.ca/player/play/{video_id}', CBCPlayerIE)
+
+ return self.playlist_result(entries(), playlist_id)
+
+
class CBCGemIE(InfoExtractor):
IE_NAME = 'gem.cbc.ca'
_VALID_URL = r'https?://gem\.cbc\.ca/(?:media/)?(?P<id>[0-9a-z-]+/s[0-9]+[a-z][0-9]+)'