aboutsummaryrefslogtreecommitdiffstats
path: root/yt_dlp/extractor
diff options
context:
space:
mode:
Diffstat (limited to 'yt_dlp/extractor')
-rw-r--r--yt_dlp/extractor/abematv.py16
-rw-r--r--yt_dlp/extractor/adobepass.py51
-rw-r--r--yt_dlp/extractor/ant1newsgr.py4
-rw-r--r--yt_dlp/extractor/ard.py19
-rw-r--r--yt_dlp/extractor/ccma.py13
-rw-r--r--yt_dlp/extractor/common.py61
-rw-r--r--yt_dlp/extractor/extractors.py9
-rw-r--r--yt_dlp/extractor/facebook.py9
-rw-r--r--yt_dlp/extractor/fptplay.py102
-rw-r--r--yt_dlp/extractor/frontendmasters.py4
-rw-r--r--yt_dlp/extractor/generic.py13
-rw-r--r--yt_dlp/extractor/mgtv.py59
-rw-r--r--yt_dlp/extractor/mildom.py294
-rw-r--r--yt_dlp/extractor/nrk.py11
-rw-r--r--yt_dlp/extractor/openrec.py68
-rw-r--r--yt_dlp/extractor/panopto.py445
-rw-r--r--yt_dlp/extractor/peertube.py1
-rw-r--r--yt_dlp/extractor/periscope.py2
-rw-r--r--yt_dlp/extractor/pokemon.py40
-rw-r--r--yt_dlp/extractor/rokfin.py4
-rw-r--r--yt_dlp/extractor/soundcloud.py16
-rw-r--r--yt_dlp/extractor/sovietscloset.py2
-rw-r--r--yt_dlp/extractor/tiktok.py4
-rw-r--r--yt_dlp/extractor/xinpianchang.py95
-rw-r--r--yt_dlp/extractor/youtube.py113
-rw-r--r--yt_dlp/extractor/zingmp3.py36
26 files changed, 1190 insertions, 301 deletions
diff --git a/yt_dlp/extractor/abematv.py b/yt_dlp/extractor/abematv.py
index 66b12c72f..360fa4699 100644
--- a/yt_dlp/extractor/abematv.py
+++ b/yt_dlp/extractor/abematv.py
@@ -8,10 +8,6 @@ import struct
from base64 import urlsafe_b64encode
from binascii import unhexlify
-import typing
-if typing.TYPE_CHECKING:
- from ..YoutubeDL import YoutubeDL
-
from .common import InfoExtractor
from ..aes import aes_ecb_decrypt
from ..compat import (
@@ -36,15 +32,15 @@ from ..utils import (
# NOTE: network handler related code is temporary thing until network stack overhaul PRs are merged (#2861/#2862)
-def add_opener(self: 'YoutubeDL', handler):
+def add_opener(ydl, handler):
''' Add a handler for opening URLs, like _download_webpage '''
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
- assert isinstance(self._opener, compat_urllib_request.OpenerDirector)
- self._opener.add_handler(handler)
+ assert isinstance(ydl._opener, compat_urllib_request.OpenerDirector)
+ ydl._opener.add_handler(handler)
-def remove_opener(self: 'YoutubeDL', handler):
+def remove_opener(ydl, handler):
'''
Remove handler(s) for opening URLs
@param handler Either handler object itself or handler type.
@@ -52,8 +48,8 @@ def remove_opener(self: 'YoutubeDL', handler):
'''
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
- opener = self._opener
- assert isinstance(self._opener, compat_urllib_request.OpenerDirector)
+ opener = ydl._opener
+ assert isinstance(ydl._opener, compat_urllib_request.OpenerDirector)
if isinstance(handler, (type, tuple)):
find_cp = lambda x: isinstance(x, handler)
else:
diff --git a/yt_dlp/extractor/adobepass.py b/yt_dlp/extractor/adobepass.py
index bebcafa6b..f0eba8844 100644
--- a/yt_dlp/extractor/adobepass.py
+++ b/yt_dlp/extractor/adobepass.py
@@ -1345,6 +1345,11 @@ MSO_INFO = {
'username_field': 'username',
'password_field': 'password',
},
+ 'Suddenlink': {
+ 'name': 'Suddenlink',
+ 'username_field': 'username',
+ 'password_field': 'password',
+ },
}
@@ -1636,6 +1641,52 @@ class AdobePassIE(InfoExtractor):
query=hidden_data)
post_form(mvpd_confirm_page_res, 'Confirming Login')
+ elif mso_id == 'Suddenlink':
+ # Suddenlink is similar to SlingTV in using a tab history count and a meta refresh,
+ # but they also do a dynmaic redirect using javascript that has to be followed as well
+ first_bookend_page, urlh = post_form(
+ provider_redirect_page_res, 'Pressing Continue...')
+
+ hidden_data = self._hidden_inputs(first_bookend_page)
+ hidden_data['history_val'] = 1
+
+ provider_login_redirect_page = self._download_webpage(
+ urlh.geturl(), video_id, 'Sending First Bookend',
+ query=hidden_data)
+
+ provider_tryauth_url = self._html_search_regex(
+ r'url:\s*[\'"]([^\'"]+)', provider_login_redirect_page, 'ajaxurl')
+
+ provider_tryauth_page = self._download_webpage(
+ provider_tryauth_url, video_id, 'Submitting TryAuth',
+ query=hidden_data)
+
+ provider_login_page_res = self._download_webpage_handle(
+ f'https://authorize.suddenlink.net/saml/module.php/authSynacor/login.php?AuthState={provider_tryauth_page}',
+ video_id, 'Getting Login Page',
+ query=hidden_data)
+
+ provider_association_redirect, urlh = post_form(
+ provider_login_page_res, 'Logging in', {
+ mso_info['username_field']: username,
+ mso_info['password_field']: password
+ })
+
+ provider_refresh_redirect_url = extract_redirect_url(
+ provider_association_redirect, url=urlh.geturl())
+
+ last_bookend_page, urlh = self._download_webpage_handle(
+ provider_refresh_redirect_url, video_id,
+ 'Downloading Auth Association Redirect Page')
+
+ hidden_data = self._hidden_inputs(last_bookend_page)
+ hidden_data['history_val'] = 3
+
+ mvpd_confirm_page_res = self._download_webpage_handle(
+ urlh.geturl(), video_id, 'Sending Final Bookend',
+ query=hidden_data)
+
+ post_form(mvpd_confirm_page_res, 'Confirming Login')
else:
# Some providers (e.g. DIRECTV NOW) have another meta refresh
# based redirect that should be followed.
diff --git a/yt_dlp/extractor/ant1newsgr.py b/yt_dlp/extractor/ant1newsgr.py
index 7d70e0427..1075b461e 100644
--- a/yt_dlp/extractor/ant1newsgr.py
+++ b/yt_dlp/extractor/ant1newsgr.py
@@ -97,8 +97,8 @@ class Ant1NewsGrArticleIE(Ant1NewsGrBaseIE):
embed_urls = list(Ant1NewsGrEmbedIE._extract_urls(webpage))
if not embed_urls:
raise ExtractorError('no videos found for %s' % video_id, expected=True)
- return self.url_result_or_playlist_from_matches(
- embed_urls, video_id, info['title'], ie=Ant1NewsGrEmbedIE.ie_key(),
+ return self.playlist_from_matches(
+ embed_urls, video_id, info.get('title'), ie=Ant1NewsGrEmbedIE.ie_key(),
video_kwargs={'url_transparent': True, 'timestamp': info.get('timestamp')})
diff --git a/yt_dlp/extractor/ard.py b/yt_dlp/extractor/ard.py
index 4ad5d6ddd..7ea339b39 100644
--- a/yt_dlp/extractor/ard.py
+++ b/yt_dlp/extractor/ard.py
@@ -407,8 +407,9 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
(?:(?:beta|www)\.)?ardmediathek\.de/
(?:(?P<client>[^/]+)/)?
(?:player|live|video|(?P<playlist>sendung|sammlung))/
- (?:(?P<display_id>[^?#]+)/)?
- (?P<id>(?(playlist)|Y3JpZDovL)[a-zA-Z0-9]+)'''
+ (?:(?P<display_id>(?(playlist)[^?#]+?|[^?#]+))/)?
+ (?P<id>(?(playlist)|Y3JpZDovL)[a-zA-Z0-9]+)
+ (?(playlist)/(?P<season>\d+)?/?(?:[?#]|$))'''
_TESTS = [{
'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/',
@@ -437,6 +438,13 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
'upload_date': '20211108',
},
}, {
+ 'url': 'https://www.ardmediathek.de/sendung/beforeigners/beforeigners/staffel-1/Y3JpZDovL2Rhc2Vyc3RlLmRlL2JlZm9yZWlnbmVycw/1',
+ 'playlist_count': 6,
+ 'info_dict': {
+ 'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL2JlZm9yZWlnbmVycw',
+ 'title': 'beforeigners/beforeigners/staffel-1',
+ },
+ }, {
'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
'only_matching': True,
}, {
@@ -561,14 +569,15 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
break
pageNumber = pageNumber + 1
- return self.playlist_result(entries, playlist_title=display_id)
+ return self.playlist_result(entries, playlist_id, playlist_title=display_id)
def _real_extract(self, url):
- video_id, display_id, playlist_type, client = self._match_valid_url(url).group(
- 'id', 'display_id', 'playlist', 'client')
+ video_id, display_id, playlist_type, client, season_number = self._match_valid_url(url).group(
+ 'id', 'display_id', 'playlist', 'client', 'season')
display_id, client = display_id or video_id, client or 'ard'
if playlist_type:
+ # TODO: Extract only specified season
return self._ARD_extract_playlist(url, video_id, display_id, client, playlist_type)
player_page = self._download_json(
diff --git a/yt_dlp/extractor/ccma.py b/yt_dlp/extractor/ccma.py
index ea98f8688..9dbaabfa0 100644
--- a/yt_dlp/extractor/ccma.py
+++ b/yt_dlp/extractor/ccma.py
@@ -1,17 +1,14 @@
# coding: utf-8
from __future__ import unicode_literals
-import calendar
-import datetime
-
from .common import InfoExtractor
from ..utils import (
clean_html,
- extract_timezone,
int_or_none,
parse_duration,
parse_resolution,
try_get,
+ unified_timestamp,
url_or_none,
)
@@ -95,14 +92,8 @@ class CCMAIE(InfoExtractor):
duration = int_or_none(durada.get('milisegons'), 1000) or parse_duration(durada.get('text'))
tematica = try_get(informacio, lambda x: x['tematica']['text'])
- timestamp = None
data_utc = try_get(informacio, lambda x: x['data_emissio']['utc'])
- try:
- timezone, data_utc = extract_timezone(data_utc)
- timestamp = calendar.timegm((datetime.datetime.strptime(
- data_utc, '%Y-%d-%mT%H:%M:%S') - timezone).timetuple())
- except TypeError:
- pass
+ timestamp = unified_timestamp(data_utc)
subtitles = {}
subtitols = media.get('subtitols') or []
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index f86e7cb3e..354814433 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -226,6 +226,7 @@ class InfoExtractor(object):
The following fields are optional:
+ direct: True if a direct video file was given (must only be set by GenericIE)
alt_title: A secondary title of the video.
display_id An alternative identifier for the video, not necessarily
unique, but available before title. Typically, id is
@@ -274,7 +275,7 @@ class InfoExtractor(object):
* "url": A URL pointing to the subtitles file
It can optionally also have:
* "name": Name or description of the subtitles
- * http_headers: A dictionary of additional HTTP headers
+ * "http_headers": A dictionary of additional HTTP headers
to add to the request.
"ext" will be calculated from URL if missing
automatic_captions: Like 'subtitles'; contains automatically generated
@@ -425,8 +426,8 @@ class InfoExtractor(object):
title, description etc.
- Subclasses of this one should re-define the _real_initialize() and
- _real_extract() methods and define a _VALID_URL regexp.
+ Subclasses of this should define a _VALID_URL regexp and, re-define the
+ _real_extract() and (optionally) _real_initialize() methods.
Probably, they should also be added to the list of extractors.
Subclasses may also override suitable() if necessary, but ensure the function
@@ -661,7 +662,7 @@ class InfoExtractor(object):
return False
def set_downloader(self, downloader):
- """Sets the downloader for this IE."""
+ """Sets a YoutubeDL instance as the downloader for this IE."""
self._downloader = downloader
def _real_initialize(self):
@@ -670,7 +671,7 @@ class InfoExtractor(object):
def _real_extract(self, url):
"""Real extraction process. Redefine in subclasses."""
- pass
+ raise NotImplementedError('This method must be implemented by subclasses')
@classmethod
def ie_key(cls):
@@ -749,7 +750,7 @@ class InfoExtractor(object):
errmsg = '%s: %s' % (errnote, error_to_compat_str(err))
if fatal:
- raise ExtractorError(errmsg, sys.exc_info()[2], cause=err)
+ raise ExtractorError(errmsg, cause=err)
else:
self.report_warning(errmsg)
return False
@@ -1661,31 +1662,31 @@ class InfoExtractor(object):
'format_id': {'type': 'alias', 'field': 'id'},
'preference': {'type': 'alias', 'field': 'ie_pref'},
'language_preference': {'type': 'alias', 'field': 'lang'},
-
- # Deprecated
- 'dimension': {'type': 'alias', 'field': 'res'},
- 'resolution': {'type': 'alias', 'field': 'res'},
- 'extension': {'type': 'alias', 'field': 'ext'},
- 'bitrate': {'type': 'alias', 'field': 'br'},
- 'total_bitrate': {'type': 'alias', 'field': 'tbr'},
- 'video_bitrate': {'type': 'alias', 'field': 'vbr'},
- 'audio_bitrate': {'type': 'alias', 'field': 'abr'},
- 'framerate': {'type': 'alias', 'field': 'fps'},
- 'protocol': {'type': 'alias', 'field': 'proto'},
'source_preference': {'type': 'alias', 'field': 'source'},
+ 'protocol': {'type': 'alias', 'field': 'proto'},
'filesize_approx': {'type': 'alias', 'field': 'fs_approx'},
- 'filesize_estimate': {'type': 'alias', 'field': 'size'},
- 'samplerate': {'type': 'alias', 'field': 'asr'},
- 'video_ext': {'type': 'alias', 'field': 'vext'},
- 'audio_ext': {'type': 'alias', 'field': 'aext'},
- 'video_codec': {'type': 'alias', 'field': 'vcodec'},
- 'audio_codec': {'type': 'alias', 'field': 'acodec'},
- 'video': {'type': 'alias', 'field': 'hasvid'},
- 'has_video': {'type': 'alias', 'field': 'hasvid'},
- 'audio': {'type': 'alias', 'field': 'hasaud'},
- 'has_audio': {'type': 'alias', 'field': 'hasaud'},
- 'extractor': {'type': 'alias', 'field': 'ie_pref'},
- 'extractor_preference': {'type': 'alias', 'field': 'ie_pref'},
+
+ # Deprecated
+ 'dimension': {'type': 'alias', 'field': 'res', 'deprecated': True},
+ 'resolution': {'type': 'alias', 'field': 'res', 'deprecated': True},
+ 'extension': {'type': 'alias', 'field': 'ext', 'deprecated': True},
+ 'bitrate': {'type': 'alias', 'field': 'br', 'deprecated': True},
+ 'total_bitrate': {'type': 'alias', 'field': 'tbr', 'deprecated': True},
+ 'video_bitrate': {'type': 'alias', 'field': 'vbr', 'deprecated': True},
+ 'audio_bitrate': {'type': 'alias', 'field': 'abr', 'deprecated': True},
+ 'framerate': {'type': 'alias', 'field': 'fps', 'deprecated': True},
+ 'filesize_estimate': {'type': 'alias', 'field': 'size', 'deprecated': True},
+ 'samplerate': {'type': 'alias', 'field': 'asr', 'deprecated': True},
+ 'video_ext': {'type': 'alias', 'field': 'vext', 'deprecated': True},
+ 'audio_ext': {'type': 'alias', 'field': 'aext', 'deprecated': True},
+ 'video_codec': {'type': 'alias', 'field': 'vcodec', 'deprecated': True},
+ 'audio_codec': {'type': 'alias', 'field': 'acodec', 'deprecated': True},
+ 'video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True},
+ 'has_video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True},
+ 'audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True},
+ 'has_audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True},
+ 'extractor': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True},
+ 'extractor_preference': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True},
}
def __init__(self, ie, field_preference):
@@ -1785,7 +1786,7 @@ class InfoExtractor(object):
continue
if self._get_field_setting(field, 'type') == 'alias':
alias, field = field, self._get_field_setting(field, 'field')
- if alias not in ('format_id', 'preference', 'language_preference'):
+ if self._get_field_setting(alias, 'deprecated'):
self.ydl.deprecation_warning(
f'Format sorting alias {alias} is deprecated '
f'and may be removed in a future version. Please use {field} instead')
diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py
index 5448acf01..09b795c56 100644
--- a/yt_dlp/extractor/extractors.py
+++ b/yt_dlp/extractor/extractors.py
@@ -520,6 +520,7 @@ from .foxnews import (
FoxNewsArticleIE,
)
from .foxsports import FoxSportsIE
+from .fptplay import FptplayIE
from .franceculture import FranceCultureIE
from .franceinter import FranceInterIE
from .francetv import (
@@ -848,6 +849,7 @@ from .microsoftvirtualacademy import (
from .mildom import (
MildomIE,
MildomVodIE,
+ MildomClipIE,
MildomUserVodIE,
)
from .minds import (
@@ -1150,6 +1152,11 @@ from .palcomp3 import (
PalcoMP3VideoIE,
)
from .pandoratv import PandoraTVIE
+from .panopto import (
+ PanoptoIE,
+ PanoptoListIE,
+ PanoptoPlaylistIE
+)
from .paramountplus import (
ParamountPlusIE,
ParamountPlusSeriesIE,
@@ -1218,6 +1225,7 @@ from .podomatic import PodomaticIE
from .pokemon import (
PokemonIE,
PokemonWatchIE,
+ PokemonSoundLibraryIE,
)
from .pokergo import (
PokerGoIE,
@@ -2010,6 +2018,7 @@ from .ximalaya import (
XimalayaIE,
XimalayaAlbumIE
)
+from .xinpianchang import XinpianchangIE
from .xminus import XMinusIE
from .xnxx import XNXXIE
from .xstream import XstreamIE
diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py
index d39dcc058..ef57b221c 100644
--- a/yt_dlp/extractor/facebook.py
+++ b/yt_dlp/extractor/facebook.py
@@ -18,6 +18,7 @@ from ..utils import (
ExtractorError,
float_or_none,
get_element_by_id,
+ get_first,
int_or_none,
js_to_json,
merge_dicts,
@@ -405,11 +406,9 @@ class FacebookIE(InfoExtractor):
..., 'require', ..., ..., ..., '__bbox', 'result', 'data'), expected_type=dict) or []
media = [m for m in traverse_obj(post, (..., 'attachments', ..., 'media'), expected_type=dict) or []
if str(m.get('id')) == video_id and m.get('__typename') == 'Video']
- title = traverse_obj(media, (..., 'title', 'text'), get_all=False)
- description = traverse_obj(media, (
- ..., 'creation_story', 'comet_sections', 'message', 'story', 'message', 'text'), get_all=False)
- uploader_data = (traverse_obj(media, (..., 'owner'), get_all=False)
- or traverse_obj(post, (..., 'node', 'actors', ...), get_all=False) or {})
+ title = get_first(media, ('title', 'text'))
+ description = get_first(media, ('creation_story', 'comet_sections', 'message', 'story', 'message', 'text'))
+ uploader_data = get_first(media, 'owner') or get_first(post, ('node', 'actors', ...)) or {}
page_title = title or self._html_search_regex((
r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>(?P<content>[^<]*)</h2>',
diff --git a/yt_dlp/extractor/fptplay.py b/yt_dlp/extractor/fptplay.py
new file mode 100644
index 000000000..a34e90bb1
--- /dev/null
+++ b/yt_dlp/extractor/fptplay.py
@@ -0,0 +1,102 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import hashlib
+import time
+import urllib.parse
+
+from .common import InfoExtractor
+from ..utils import (
+ join_nonempty,
+)
+
+
+class FptplayIE(InfoExtractor):
+ _VALID_URL = r'https?://fptplay\.vn/(?P<type>xem-video)/[^/]+\-(?P<id>\w+)(?:/tap-(?P<episode>[^/]+)?/?(?:[?#]|$)|)'
+ _GEO_COUNTRIES = ['VN']
+ IE_NAME = 'fptplay'
+ IE_DESC = 'fptplay.vn'
+ _TESTS = [{
+ 'url': 'https://fptplay.vn/xem-video/nhan-duyen-dai-nhan-xin-dung-buoc-621a123016f369ebbde55945',
+ 'md5': 'ca0ee9bc63446c0c3e9a90186f7d6b33',
+ 'info_dict': {
+ 'id': '621a123016f369ebbde55945',
+ 'ext': 'mp4',
+ 'title': 'Nhân Duyên Đại Nhân Xin Dừng Bước - Ms. Cupid In Love',
+ 'description': 'md5:23cf7d1ce0ade8e21e76ae482e6a8c6c',
+ },
+ }, {
+ 'url': 'https://fptplay.vn/xem-video/ma-toi-la-dai-gia-61f3aa8a6b3b1d2e73c60eb5/tap-3',
+ 'md5': 'b35be968c909b3e4e1e20ca45dd261b1',
+ 'info_dict': {
+ 'id': '61f3aa8a6b3b1d2e73c60eb5',
+ 'ext': 'mp4',
+ 'title': 'Má Tôi Là Đại Gia - 3',
+ 'description': 'md5:ff8ba62fb6e98ef8875c42edff641d1c',
+ },
+ }, {
+ 'url': 'https://fptplay.vn/xem-video/nha-co-chuyen-hi-alls-well-ends-well-1997-6218995f6af792ee370459f0',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ type_url, video_id, episode = self._match_valid_url(url).group('type', 'id', 'episode')
+ webpage = self._download_webpage(url, video_id=video_id, fatal=False)
+ info = self._download_json(self.get_api_with_st_token(video_id, episode or 0), video_id)
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(info['data']['url'], video_id, 'mp4')
+ self._sort_formats(formats)
+ return {
+ 'id': video_id,
+ 'title': join_nonempty(
+ self._html_search_meta(('og:title', 'twitter:title'), webpage), episode, delim=' - '),
+ 'description': self._html_search_meta(['og:description', 'twitter:description'], webpage),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+ def get_api_with_st_token(self, video_id, episode):
+ path = f'/api/v6.2_w/stream/vod/{video_id}/{episode}/auto_vip'
+ timestamp = int(time.time()) + 10800
+
+ t = hashlib.md5(f'WEBv6Dkdsad90dasdjlALDDDS{timestamp}{path}'.encode()).hexdigest().upper()
+ r = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'
+ n = [int(f'0x{t[2 * o: 2 * o + 2]}', 16) for o in range(len(t) // 2)]
+
+ def convert(e):
+ t = ''
+ n = 0
+ i = [0, 0, 0]
+ a = [0, 0, 0, 0]
+ s = len(e)
+ c = 0
+ for z in range(s, 0, -1):
+ if n <= 3:
+ i[n] = e[c]
+ n += 1
+ c += 1
+ if 3 == n:
+ a[0] = (252 & i[0]) >> 2
+ a[1] = ((3 & i[0]) << 4) + ((240 & i[1]) >> 4)
+ a[2] = ((15 & i[1]) << 2) + ((192 & i[2]) >> 6)
+ a[3] = (63 & i[2])
+ for v in range(4):
+ t += r[a[v]]
+ n = 0
+ if n:
+ for o in range(n, 3):
+ i[o] = 0
+
+ for o in range(n + 1):
+ a[0] = (252 & i[0]) >> 2
+ a[1] = ((3 & i[0]) << 4) + ((240 & i[1]) >> 4)
+ a[2] = ((15 & i[1]) << 2) + ((192 & i[2]) >> 6)
+ a[3] = (63 & i[2])
+ t += r[a[o]]
+ n += 1
+ while n < 3:
+ t += ''
+ n += 1
+ return t
+
+ st_token = convert(n).replace('+', '-').replace('/', '_').replace('=', '')
+ return f'https://api.fptplay.net{path}?{urllib.parse.urlencode({"st": st_token, "e": timestamp})}'
diff --git a/yt_dlp/extractor/frontendmasters.py b/yt_dlp/extractor/frontendmasters.py
index 40b8cb0b4..0d29da29b 100644
--- a/yt_dlp/extractor/frontendmasters.py
+++ b/yt_dlp/extractor/frontendmasters.py
@@ -252,9 +252,9 @@ class FrontendMastersCourseIE(FrontendMastersPageBaseIE):
entries = []
for lesson in lessons:
lesson_name = lesson.get('slug')
- if not lesson_name:
- continue
lesson_id = lesson.get('hash') or lesson.get('statsId')
+ if not lesson_id or not lesson_name:
+ continue
entries.append(self._extract_lesson(chapters, lesson_id, lesson))
title = course.get('title')
diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py
index 0ddd050ff..6a8b8543b 100644
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@@ -146,6 +146,7 @@ from .tvp import TVPEmbedIE
from .blogger import BloggerIE
from .mainstreaming import MainStreamingIE
from .gfycat import GfycatIE
+from .panopto import PanoptoBaseIE
class GenericIE(InfoExtractor):
@@ -2498,6 +2499,15 @@ class GenericIE(InfoExtractor):
'id': '?vid=2295'
},
'playlist_count': 9
+ },
+ {
+ # Panopto embeds
+ 'url': 'https://www.monash.edu/learning-teaching/teachhq/learning-technologies/panopto/how-to/insert-a-quiz-into-a-panopto-video',
+ 'info_dict': {
+ 'title': 'Insert a quiz into a Panopto video',
+ 'id': 'insert-a-quiz-into-a-panopto-video'
+ },
+ 'playlist_count': 1
}
]
@@ -3723,6 +3733,9 @@ class GenericIE(InfoExtractor):
if gfycat_urls:
return self.playlist_from_matches(gfycat_urls, video_id, video_title, ie=GfycatIE.ie_key())
+ panopto_urls = PanoptoBaseIE._extract_urls(webpage)
+ if panopto_urls:
+ return self.playlist_from_matches(panopto_urls, video_id, video_title)
# Look for HTML5 media
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
if entries:
diff --git a/yt_dlp/extractor/mgtv.py b/yt_dlp/extractor/mgtv.py
index cab3aa045..4ac70ea57 100644
--- a/yt_dlp/extractor/mgtv.py
+++ b/yt_dlp/extractor/mgtv.py
@@ -13,12 +13,15 @@ from ..compat import (
from ..utils import (
ExtractorError,
int_or_none,
+ try_get,
+ url_or_none,
)
class MGTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:w(?:ww)?\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P<id>\d+)\.html'
IE_DESC = '芒果TV'
+ IE_NAME = 'MangoTV'
_TESTS = [{
'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html',
@@ -31,6 +34,32 @@ class MGTVIE(InfoExtractor):
'thumbnail': r're:^https?://.*\.jpg$',
},
}, {
+ 'url': 'https://w.mgtv.com/b/427837/15588271.html',
+ 'info_dict': {
+ 'id': '15588271',
+ 'ext': 'mp4',
+ 'title': '春日迟迟再出发 沉浸版',
+ 'description': 'md5:a7a05a05b1aa87bd50cae619b19bbca6',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'duration': 4026,
+ },
+ }, {
+ 'url': 'https://w.mgtv.com/b/333652/7329822.html',
+ 'info_dict': {
+ 'id': '7329822',
+ 'ext': 'mp4',
+ 'title': '拜托,请你爱我',
+ 'description': 'md5:cd81be6499bafe32e4d143abd822bf9c',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'duration': 2656,
+ },
+ }, {
+ 'url': 'https://w.mgtv.com/b/427837/15591647.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://w.mgtv.com/b/388252/15634192.html?fpa=33318&fpos=4&lastp=ch_home',
+ 'only_matching': True,
+ }, {
'url': 'http://www.mgtv.com/b/301817/3826653.html',
'only_matching': True,
}, {
@@ -40,12 +69,14 @@ class MGTVIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
- tk2 = base64.urlsafe_b64encode(b'did=%s|pno=1030|ver=0.3.0301|clit=%d' % (compat_str(uuid.uuid4()).encode(), time.time()))[::-1]
+ tk2 = base64.urlsafe_b64encode(
+ f'did={compat_str(uuid.uuid4()).encode()}|pno=1030|ver=0.3.0301|clit={int(time.time())}'.encode())[::-1]
try:
api_data = self._download_json(
'https://pcweb.api.mgtv.com/player/video', video_id, query={
'tk2': tk2,
'video_id': video_id,
+ 'type': 'pch5'
}, headers=self.geo_verification_headers())['data']
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
@@ -61,6 +92,7 @@ class MGTVIE(InfoExtractor):
'pm2': api_data['atc']['pm2'],
'tk2': tk2,
'video_id': video_id,
+ 'src': 'intelmgtv',
}, headers=self.geo_verification_headers())['data']
stream_domain = stream_data['stream_domain'][0]
@@ -71,7 +103,7 @@ class MGTVIE(InfoExtractor):
continue
format_data = self._download_json(
stream_domain + stream_path, video_id,
- note='Download video info for format #%d' % idx)
+ note=f'Download video info for format #{idx}')
format_url = format_data.get('info')
if not format_url:
continue
@@ -79,7 +111,7 @@ class MGTVIE(InfoExtractor):
r'_(\d+)_mp4/', format_url, 'tbr', default=None))
formats.append({
'format_id': compat_str(tbr or idx),
- 'url': format_url,
+ 'url': url_or_none(format_url),
'ext': 'mp4',
'tbr': tbr,
'protocol': 'm3u8_native',
@@ -97,4 +129,25 @@ class MGTVIE(InfoExtractor):
'description': info.get('desc'),
'duration': int_or_none(info.get('duration')),
'thumbnail': info.get('thumb'),
+ 'subtitles': self.extract_subtitles(video_id, stream_domain),
}
+
+ def _get_subtitles(self, video_id, domain):
+ info = self._download_json(f'https://pcweb.api.mgtv.com/video/title?videoId={video_id}',
+ video_id, fatal=False) or {}
+ subtitles = {}
+ for sub in try_get(info, lambda x: x['data']['title']) or []:
+ url_sub = sub.get('url')
+ if not url_sub:
+ continue
+ locale = sub.get('captionCountrySimpleName')
+ sub = self._download_json(f'{domain}{url_sub}', video_id, fatal=False,
+ note=f'Download subtitle for locale {sub.get("name")} ({locale})') or {}
+ sub_url = url_or_none(sub.get('info'))
+ if not sub_url:
+ continue
+ subtitles.setdefault(locale or 'en', []).append({
+ 'url': sub_url,
+ 'ext': 'srt'
+ })
+ return subtitles
diff --git a/yt_dlp/extractor/mildom.py b/yt_dlp/extractor/mildom.py
index b5a2e17f2..5f2df29c6 100644
--- a/yt_dlp/extractor/mildom.py
+++ b/yt_dlp/extractor/mildom.py
@@ -1,102 +1,42 @@
# coding: utf-8
from __future__ import unicode_literals
-import base64
-from datetime import datetime
-import itertools
+import functools
import json
from .common import InfoExtractor
from ..utils import (
- update_url_query,
- random_uuidv4,
- try_get,
+ determine_ext,
+ dict_get,
+ ExtractorError,
float_or_none,
- dict_get
-)
-from ..compat import (
- compat_str,
+ OnDemandPagedList,
+ random_uuidv4,
+ traverse_obj,
)
class MildomBaseIE(InfoExtractor):
_GUEST_ID = None
- _DISPATCHER_CONFIG = None
-
- def _call_api(self, url, video_id, query=None, note='Downloading JSON metadata', init=False):
- query = query or {}
- if query:
- query['__platform'] = 'web'
- url = update_url_query(url, self._common_queries(query, init=init))
- content = self._download_json(url, video_id, note=note)
- if content['code'] == 0:
- return content['body']
- else:
- self.raise_no_formats(
- f'Video not found or premium content. {content["code"]} - {content["message"]}',
- expected=True)
- def _common_queries(self, query={}, init=False):
- dc = self._fetch_dispatcher_config()
- r = {
- 'timestamp': self.iso_timestamp(),
- '__guest_id': '' if init else self.guest_id(),
- '__location': dc['location'],
- '__country': dc['country'],
- '__cluster': dc['cluster'],
- '__platform': 'web',
- '__la': self.lang_code(),
- '__pcv': 'v2.9.44',
- 'sfr': 'pc',
- 'accessToken': '',
- }
- r.update(query)
- return r
-
- def _fetch_dispatcher_config(self):
- if not self._DISPATCHER_CONFIG:
- tmp = self._download_json(
- 'https://disp.mildom.com/serverListV2', 'initialization',
- note='Downloading dispatcher_config', data=json.dumps({
- 'protover': 0,
- 'data': base64.b64encode(json.dumps({
- 'fr': 'web',
- 'sfr': 'pc',
- 'devi': 'Windows',
- 'la': 'ja',
- 'gid': None,
- 'loc': '',
- 'clu': '',
- 'wh': '1919*810',
- 'rtm': self.iso_timestamp(),
- 'ua': self.get_param('http_headers')['User-Agent'],
- }).encode('utf8')).decode('utf8').replace('\n', ''),
- }).encode('utf8'))
- self._DISPATCHER_CONFIG = self._parse_json(base64.b64decode(tmp['data']), 'initialization')
- return self._DISPATCHER_CONFIG
-
- @staticmethod
- def iso_timestamp():
- 'new Date().toISOString()'
- return datetime.utcnow().isoformat()[0:-3] + 'Z'
-
- def guest_id(self):
- 'getGuestId'
- if self._GUEST_ID:
- return self._GUEST_ID
- self._GUEST_ID = try_get(
- self, (
- lambda x: x._call_api(
- 'https://cloudac.mildom.com/nonolive/gappserv/guest/h5init', 'initialization',
- note='Downloading guest token', init=True)['guest_id'] or None,
- lambda x: x._get_cookies('https://www.mildom.com').get('gid').value,
- lambda x: x._get_cookies('https://m.mildom.com').get('gid').value,
- ), compat_str) or ''
- return self._GUEST_ID
-
- def lang_code(self):
- 'getCurrentLangCode'
- return 'ja'
+ def _call_api(self, url, video_id, query=None, note='Downloading JSON metadata', body=None):
+ if not self._GUEST_ID:
+ self._GUEST_ID = f'pc-gp-{random_uuidv4()}'
+
+ content = self._download_json(
+ url, video_id, note=note, data=json.dumps(body).encode() if body else None,
+ headers={'Content-Type': 'application/json'} if body else {},
+ query={
+ '__guest_id': self._GUEST_ID,
+ '__platform': 'web',
+ **(query or {}),
+ })
+
+ if content['code'] != 0:
+ raise ExtractorError(
+ f'Mildom says: {content["message"]} (code {content["code"]})',
+ expected=True)
+ return content['body']
class MildomIE(MildomBaseIE):
@@ -106,31 +46,13 @@ class MildomIE(MildomBaseIE):
def _real_extract(self, url):
video_id = self._match_id(url)
- url = 'https://www.mildom.com/%s' % video_id
-
- webpage = self._download_webpage(url, video_id)
+ webpage = self._download_webpage(f'https://www.mildom.com/{video_id}', video_id)
enterstudio = self._call_api(
'https://cloudac.mildom.com/nonolive/gappserv/live/enterstudio', video_id,
note='Downloading live metadata', query={'user_id': video_id})
result_video_id = enterstudio.get('log_id', video_id)
- title = try_get(
- enterstudio, (
- lambda x: self._html_search_meta('twitter:description', webpage),
- lambda x: x['anchor_intro'],
- ), compat_str)
- description = try_get(
- enterstudio, (
- lambda x: x['intro'],
- lambda x: x['live_intro'],
- ), compat_str)
- uploader = try_get(
- enterstudio, (
- lambda x: self._html_search_meta('twitter:title', webpage),
- lambda x: x['loginname'],
- ), compat_str)
-
servers = self._call_api(
'https://cloudac.mildom.com/nonolive/gappserv/live/liveserver', result_video_id,
note='Downloading live server list', query={
@@ -138,17 +60,20 @@ class MildomIE(MildomBaseIE):
'live_server_type': 'hls',
})
- stream_query = self._common_queries({
- 'streamReqId': random_uuidv4(),
- 'is_lhls': '0',
- })
- m3u8_url = update_url_query(servers['stream_server'] + '/%s_master.m3u8' % video_id, stream_query)
- formats = self._extract_m3u8_formats(m3u8_url, result_video_id, 'mp4', headers={
- 'Referer': 'https://www.mildom.com/',
- 'Origin': 'https://www.mildom.com',
- }, note='Downloading m3u8 information')
-
- del stream_query['streamReqId'], stream_query['timestamp']
+ playback_token = self._call_api(
+ 'https://cloudac.mildom.com/nonolive/gappserv/live/token', result_video_id,
+ note='Obtaining live playback token', body={'host_id': video_id, 'type': 'hls'})
+ playback_token = traverse_obj(playback_token, ('data', ..., 'token'), get_all=False)
+ if not playback_token:
+ raise ExtractorError('Failed to obtain live playback token')
+
+ formats = self._extract_m3u8_formats(
+ f'{servers["stream_server"]}/{video_id}_master.m3u8?{playback_token}',
+ result_video_id, 'mp4', headers={
+ 'Referer': 'https://www.mildom.com/',
+ 'Origin': 'https://www.mildom.com',
+ })
+
for fmt in formats:
fmt.setdefault('http_headers', {})['Referer'] = 'https://www.mildom.com/'
@@ -156,10 +81,10 @@ class MildomIE(MildomBaseIE):
return {
'id': result_video_id,
- 'title': title,
- 'description': description,
+ 'title': self._html_search_meta('twitter:description', webpage, default=None) or traverse_obj(enterstudio, 'anchor_intro'),
+ 'description': traverse_obj(enterstudio, 'intro', 'live_intro', expected_type=str),
'timestamp': float_or_none(enterstudio.get('live_start_ms'), scale=1000),
- 'uploader': uploader,
+ 'uploader': self._html_search_meta('twitter:title', webpage, default=None) or traverse_obj(enterstudio, 'loginname'),
'uploader_id': video_id,
'formats': formats,
'is_live': True,
@@ -168,7 +93,7 @@ class MildomIE(MildomBaseIE):
class MildomVodIE(MildomBaseIE):
IE_NAME = 'mildom:vod'
- IE_DESC = 'Download a VOD in Mildom'
+ IE_DESC = 'VOD in Mildom'
_VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/playback/(?P<user_id>\d+)/(?P<id>(?P=user_id)-[a-zA-Z0-9]+-?[0-9]*)'
_TESTS = [{
'url': 'https://www.mildom.com/playback/10882672/10882672-1597662269',
@@ -215,11 +140,8 @@ class MildomVodIE(MildomBaseIE):
}]
def _real_extract(self, url):
- m = self._match_valid_url(url)
- user_id, video_id = m.group('user_id'), m.group('id')
- url = 'https://www.mildom.com/playback/%s/%s' % (user_id, video_id)
-
- webpage = self._download_webpage(url, video_id)
+ user_id, video_id = self._match_valid_url(url).group('user_id', 'id')
+ webpage = self._download_webpage(f'https://www.mildom.com/playback/{user_id}/{video_id}', video_id)
autoplay = self._call_api(
'https://cloudac.mildom.com/nonolive/videocontent/playback/getPlaybackDetail', video_id,
@@ -227,20 +149,6 @@ class MildomVodIE(MildomBaseIE):
'v_id': video_id,
})['playback']
- title = try_get(
- autoplay, (
- lambda x: self._html_search_meta('og:description', webpage),
- lambda x: x['title'],
- ), compat_str)
- description = try_get(
- autoplay, (
- lambda x: x['video_intro'],
- ), compat_str)
- uploader = try_get(
- autoplay, (
- lambda x: x['author_info']['login_name'],
- ), compat_str)
-
formats = [{
'url': autoplay['audio_url'],
'format_id': 'audio',
@@ -265,17 +173,81 @@ class MildomVodIE(MildomBaseIE):
return {
'id': video_id,
- 'title': title,
- 'description': description,
- 'timestamp': float_or_none(autoplay['publish_time'], scale=1000),
- 'duration': float_or_none(autoplay['video_length'], scale=1000),
+ 'title': self._html_search_meta(('og:description', 'description'), webpage, default=None) or autoplay.get('title'),
+ 'description': traverse_obj(autoplay, 'video_intro'),
+ 'timestamp': float_or_none(autoplay.get('publish_time'), scale=1000),
+ 'duration': float_or_none(autoplay.get('video_length'), scale=1000),
'thumbnail': dict_get(autoplay, ('upload_pic', 'video_pic')),
- 'uploader': uploader,
+ 'uploader': traverse_obj(autoplay, ('author_info', 'login_name')),
'uploader_id': user_id,
'formats': formats,
}
+class MildomClipIE(MildomBaseIE):
+ IE_NAME = 'mildom:clip'
+ IE_DESC = 'Clip in Mildom'
+ _VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/clip/(?P<id>(?P<user_id>\d+)-[a-zA-Z0-9]+)'
+ _TESTS = [{
+ 'url': 'https://www.mildom.com/clip/10042245-63921673e7b147ebb0806d42b5ba5ce9',
+ 'info_dict': {
+ 'id': '10042245-63921673e7b147ebb0806d42b5ba5ce9',
+ 'title': '全然違ったよ',
+ 'timestamp': 1619181890,
+ 'duration': 59,
+ 'thumbnail': r're:https?://.+',
+ 'uploader': 'ざきんぽ',
+ 'uploader_id': '10042245',
+ },
+ }, {
+ 'url': 'https://www.mildom.com/clip/10111524-ebf4036e5aa8411c99fb3a1ae0902864',
+ 'info_dict': {
+ 'id': '10111524-ebf4036e5aa8411c99fb3a1ae0902864',
+ 'title': 'かっこいい',
+ 'timestamp': 1621094003,
+ 'duration': 59,
+ 'thumbnail': r're:https?://.+',
+ 'uploader': '(ルーキー',
+ 'uploader_id': '10111524',
+ },
+ }, {
+ 'url': 'https://www.mildom.com/clip/10660174-2c539e6e277c4aaeb4b1fbe8d22cb902',
+ 'info_dict': {
+ 'id': '10660174-2c539e6e277c4aaeb4b1fbe8d22cb902',
+ 'title': 'あ',
+ 'timestamp': 1614769431,
+ 'duration': 31,
+ 'thumbnail': r're:https?://.+',
+ 'uploader': 'ドルゴルスレンギーン=ダグワドルジ',
+ 'uploader_id': '10660174',
+ },
+ }]
+
+ def _real_extract(self, url):
+ user_id, video_id = self._match_valid_url(url).group('user_id', 'id')
+ webpage = self._download_webpage(f'https://www.mildom.com/clip/{video_id}', video_id)
+
+ clip_detail = self._call_api(
+ 'https://cloudac-cf-jp.mildom.com/nonolive/videocontent/clip/detail', video_id,
+ note='Downloading playback metadata', query={
+ 'clip_id': video_id,
+ })
+
+ return {
+ 'id': video_id,
+ 'title': self._html_search_meta(
+ ('og:description', 'description'), webpage, default=None) or clip_detail.get('title'),
+ 'timestamp': float_or_none(clip_detail.get('create_time')),
+ 'duration': float_or_none(clip_detail.get('length')),
+ 'thumbnail': clip_detail.get('cover'),
+ 'uploader': traverse_obj(clip_detail, ('user_info', 'loginname')),
+ 'uploader_id': user_id,
+
+ 'url': clip_detail['url'],
+ 'ext': determine_ext(clip_detail.get('url'), 'mp4'),
+ }
+
+
class MildomUserVodIE(MildomBaseIE):
IE_NAME = 'mildom:user:vod'
IE_DESC = 'Download all VODs from specific user in Mildom'
@@ -286,29 +258,32 @@ class MildomUserVodIE(MildomBaseIE):
'id': '10093333',
'title': 'Uploads from ねこばたけ',
},
- 'playlist_mincount': 351,
+ 'playlist_mincount': 732,
}, {
'url': 'https://www.mildom.com/profile/10882672',
'info_dict': {
'id': '10882672',
'title': 'Uploads from kson組長(けいそん)',
},
- 'playlist_mincount': 191,
+ 'playlist_mincount': 201,
}]
- def _entries(self, user_id):
- for page in itertools.count(1):
- reply = self._call_api(
- 'https://cloudac.mildom.com/nonolive/videocontent/profile/playbackList',
- user_id, note='Downloading page %d' % page, query={
- 'user_id': user_id,
- 'page': page,
- 'limit': '30',
- })
- if not reply:
- break
- for x in reply:
- yield self.url_result('https://www.mildom.com/playback/%s/%s' % (user_id, x['v_id']))
+ def _fetch_page(self, user_id, page):
+ page += 1
+ reply = self._call_api(
+ 'https://cloudac.mildom.com/nonolive/videocontent/profile/playbackList',
+ user_id, note=f'Downloading page {page}', query={
+ 'user_id': user_id,
+ 'page': page,
+ 'limit': '30',
+ })
+ if not reply:
+ return
+ for x in reply:
+ v_id = x.get('v_id')
+ if not v_id:
+ continue
+ yield self.url_result(f'https://www.mildom.com/playback/{user_id}/{v_id}')
def _real_extract(self, url):
user_id = self._match_id(url)
@@ -319,4 +294,5 @@ class MildomUserVodIE(MildomBaseIE):
query={'user_id': user_id}, note='Downloading user profile')['user_info']
return self.playlist_result(
- self._entries(user_id), user_id, 'Uploads from %s' % profile['loginname'])
+ OnDemandPagedList(functools.partial(self._fetch_page, user_id), 30),
+ user_id, f'Uploads from {profile["loginname"]}')
diff --git a/yt_dlp/extractor/nrk.py b/yt_dlp/extractor/nrk.py
index 49d58a685..4d723e886 100644
--- a/yt_dlp/extractor/nrk.py
+++ b/yt_dlp/extractor/nrk.py
@@ -8,6 +8,7 @@ import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
+ compat_HTTPError,
determine_ext,
ExtractorError,
int_or_none,
@@ -147,10 +148,14 @@ class NRKIE(NRKBaseIE):
def _real_extract(self, url):
video_id = self._match_id(url).split('/')[-1]
- path_templ = 'playback/%s/program/' + video_id
-
def call_playback_api(item, query=None):
- return self._call_api(path_templ % item, video_id, item, query=query)
+ try:
+ return self._call_api(f'playback/{item}/program/{video_id}', video_id, item, query=query)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
+ return self._call_api(f'playback/{item}/{video_id}', video_id, item, query=query)
+ raise
+
# known values for preferredCdn: akamai, iponly, minicdn and telenor
manifest = call_playback_api('manifest', {'preferredCdn': 'akamai'})
diff --git a/yt_dlp/extractor/openrec.py b/yt_dlp/extractor/openrec.py
index 0525b4830..b476c0986 100644
--- a/yt_dlp/extractor/openrec.py
+++ b/yt_dlp/extractor/openrec.py
@@ -4,10 +4,11 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
ExtractorError,
+ get_first,
int_or_none,
traverse_obj,
unified_strdate,
- unified_timestamp
+ unified_timestamp,
)
from ..compat import compat_str
@@ -19,42 +20,34 @@ class OpenRecBaseIE(InfoExtractor):
def _extract_movie(self, webpage, video_id, name, is_live):
window_stores = self._extract_pagestore(webpage, video_id)
- movie_store = traverse_obj(
- window_stores,
- ('v8', 'state', 'movie'),
- ('v8', 'movie'),
- expected_type=dict)
- if not movie_store:
+ movie_stores = [
+ # extract all three important data (most of data are duplicated each other, but slightly different!)
+ traverse_obj(window_stores, ('v8', 'state', 'movie'), expected_type=dict),
+ traverse_obj(window_stores, ('v8', 'movie'), expected_type=dict),
+ traverse_obj(window_stores, 'movieStore', expected_type=dict),
+ ]
+ if not any(movie_stores):
raise ExtractorError(f'Failed to extract {name} info')
- title = movie_store.get('title')
- description = movie_store.get('introduction')
- thumbnail = movie_store.get('thumbnailUrl')
-
- uploader = traverse_obj(movie_store, ('channel', 'user', 'name'), expected_type=compat_str)
- uploader_id = traverse_obj(movie_store, ('channel', 'user', 'id'), expected_type=compat_str)
-
- timestamp = int_or_none(traverse_obj(movie_store, ('publishedAt', 'time')), scale=1000)
-
- m3u8_playlists = movie_store.get('media') or {}
+ m3u8_playlists = get_first(movie_stores, 'media') or {}
formats = []
for name, m3u8_url in m3u8_playlists.items():
if not m3u8_url:
continue
formats.extend(self._extract_m3u8_formats(
- m3u8_url, video_id, ext='mp4', live=is_live, m3u8_id='hls-%s' % name))
+ m3u8_url, video_id, ext='mp4', live=is_live, m3u8_id=name))
self._sort_formats(formats)
return {
'id': video_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
+ 'title': get_first(movie_stores, 'title'),
+ 'description': get_first(movie_stores, 'introduction'),
+ 'thumbnail': get_first(movie_stores, 'thumbnailUrl'),
'formats': formats,
- 'uploader': uploader,
- 'uploader_id': uploader_id,
- 'timestamp': timestamp,
+ 'uploader': get_first(movie_stores, ('channel', 'user', 'name')),
+ 'uploader_id': get_first(movie_stores, ('channel', 'user', 'id')),
+ 'timestamp': int_or_none(get_first(movie_stores, ['publishedAt', 'time']), scale=1000) or unified_timestamp(get_first(movie_stores, 'publishedAt')),
'is_live': is_live,
}
@@ -72,7 +65,7 @@ class OpenRecIE(OpenRecBaseIE):
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage('https://www.openrec.tv/live/%s' % video_id, video_id)
+ webpage = self._download_webpage(f'https://www.openrec.tv/live/{video_id}', video_id)
return self._extract_movie(webpage, video_id, 'live', True)
@@ -96,7 +89,7 @@ class OpenRecCaptureIE(OpenRecBaseIE):
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage('https://www.openrec.tv/capture/%s' % video_id, video_id)
+ webpage = self._download_webpage(f'https://www.openrec.tv/capture/{video_id}', video_id)
window_stores = self._extract_pagestore(webpage, video_id)
movie_store = window_stores.get('movie')
@@ -104,15 +97,6 @@ class OpenRecCaptureIE(OpenRecBaseIE):
capture_data = window_stores.get('capture')
if not capture_data:
raise ExtractorError('Cannot extract title')
- title = capture_data.get('title')
- thumbnail = capture_data.get('thumbnailUrl')
- upload_date = unified_strdate(capture_data.get('createdAt'))
-
- uploader = traverse_obj(movie_store, ('channel', 'name'), expected_type=compat_str)
- uploader_id = traverse_obj(movie_store, ('channel', 'id'), expected_type=compat_str)
-
- timestamp = traverse_obj(movie_store, 'createdAt', expected_type=compat_str)
- timestamp = unified_timestamp(timestamp)
formats = self._extract_m3u8_formats(
capture_data.get('source'), video_id, ext='mp4')
@@ -120,13 +104,13 @@ class OpenRecCaptureIE(OpenRecBaseIE):
return {
'id': video_id,
- 'title': title,
- 'thumbnail': thumbnail,
+ 'title': capture_data.get('title'),
+ 'thumbnail': capture_data.get('thumbnailUrl'),
'formats': formats,
- 'timestamp': timestamp,
- 'uploader': uploader,
- 'uploader_id': uploader_id,
- 'upload_date': upload_date,
+ 'timestamp': unified_timestamp(traverse_obj(movie_store, 'createdAt', expected_type=compat_str)),
+ 'uploader': traverse_obj(movie_store, ('channel', 'name'), expected_type=compat_str),
+ 'uploader_id': traverse_obj(movie_store, ('channel', 'id'), expected_type=compat_str),
+ 'upload_date': unified_strdate(capture_data.get('createdAt')),
}
@@ -148,6 +132,6 @@ class OpenRecMovieIE(OpenRecBaseIE):
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage('https://www.openrec.tv/movie/%s' % video_id, video_id)
+ webpage = self._download_webpage(f'https://www.openrec.tv/movie/{video_id}', video_id)
return self._extract_movie(webpage, video_id, 'movie', False)
diff --git a/yt_dlp/extractor/panopto.py b/yt_dlp/extractor/panopto.py
new file mode 100644
index 000000000..d458dfe50
--- /dev/null
+++ b/yt_dlp/extractor/panopto.py
@@ -0,0 +1,445 @@
+import re
+import calendar
+import json
+import functools
+from datetime import datetime
+from random import random
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_urllib_parse_urlparse,
+ compat_urlparse
+)
+
+from ..utils import (
+ bug_reports_message,
+ ExtractorError,
+ get_first,
+ int_or_none,
+ OnDemandPagedList,
+ parse_qs,
+ traverse_obj,
+)
+
+
+class PanoptoBaseIE(InfoExtractor):
+ BASE_URL_RE = r'(?P<base_url>https?://[\w.]+\.panopto.(?:com|eu)/Panopto)'
+
+ def _call_api(self, base_url, path, video_id, data=None, fatal=True, **kwargs):
+ response = self._download_json(
+ base_url + path, video_id, data=json.dumps(data).encode('utf8') if data else None,
+ fatal=fatal, headers={'accept': 'application/json', 'content-type': 'application/json'}, **kwargs)
+ if not response:
+ return
+ error_code = response.get('ErrorCode')
+ if error_code == 2:
+ self.raise_login_required(method='cookies')
+ elif error_code is not None:
+ msg = f'Panopto said: {response.get("ErrorMessage")}'
+ if fatal:
+ raise ExtractorError(msg, video_id=video_id, expected=True)
+ else:
+ self.report_warning(msg, video_id=video_id)
+ return response
+
+ @staticmethod
+ def _parse_fragment(url):
+ return {k: json.loads(v[0]) for k, v in compat_urlparse.parse_qs(compat_urllib_parse_urlparse(url).fragment).items()}
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return [m.group('url') for m in re.finditer(
+ r'<iframe[^>]+src=["\'](?P<url>%s/Pages/(Viewer|Embed|Sessions/List)\.aspx[^"\']+)' % PanoptoIE.BASE_URL_RE,
+ webpage)]
+
+
+class PanoptoIE(PanoptoBaseIE):
+ _VALID_URL = PanoptoBaseIE.BASE_URL_RE + r'/Pages/(Viewer|Embed)\.aspx.*(?:\?|&)id=(?P<id>[a-f0-9-]+)'
+ _TESTS = [
+ {
+ 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=26b3ae9e-4a48-4dcc-96ba-0befba08a0fb',
+ 'info_dict': {
+ 'id': '26b3ae9e-4a48-4dcc-96ba-0befba08a0fb',
+ 'title': 'Panopto for Business - Use Cases',
+ 'timestamp': 1459184200,
+ 'thumbnail': r're:https://demo\.hosted\.panopto\.com/Panopto/Services/FrameGrabber\.svc/FrameRedirect\?objectId=26b3ae9e-4a48-4dcc-96ba-0befba08a0fb&mode=Delivery&random=[\d.]+',
+ 'upload_date': '20160328',
+ 'ext': 'mp4',
+ 'cast': [],
+ 'duration': 88.17099999999999,
+ 'average_rating': int,
+ 'uploader_id': '2db6b718-47a0-4b0b-9e17-ab0b00f42b1e',
+ 'channel_id': 'e4c6a2fc-1214-4ca0-8fb7-aef2e29ff63a',
+ 'channel': 'Showcase Videos'
+ },
+ },
+ {
+ 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=ed01b077-c9e5-4c7b-b8ff-15fa306d7a59',
+ 'info_dict': {
+ 'id': 'ed01b077-c9e5-4c7b-b8ff-15fa306d7a59',
+ 'title': 'Overcoming Top 4 Challenges of Enterprise Video',
+ 'uploader': 'Panopto Support',
+ 'timestamp': 1449409251,
+ 'thumbnail': r're:https://demo\.hosted\.panopto\.com/Panopto/Services/FrameGrabber\.svc/FrameRedirect\?objectId=ed01b077-c9e5-4c7b-b8ff-15fa306d7a59&mode=Delivery&random=[\d.]+',
+ 'upload_date': '20151206',
+ 'ext': 'mp4',
+ 'chapters': 'count:21',
+ 'cast': ['Panopto Support'],
+ 'uploader_id': 'a96d1a31-b4de-489b-9eee-b4a5b414372c',
+ 'average_rating': int,
+ 'description': 'md5:4391837802b3fc856dadf630c4b375d1',
+ 'duration': 1088.2659999999998,
+ 'channel_id': '9f3c1921-43bb-4bda-8b3a-b8d2f05a8546',
+ 'channel': 'Webcasts',
+ },
+ },
+ {
+ # Extra params in URL
+ 'url': 'https://howtovideos.hosted.panopto.com/Panopto/Pages/Viewer.aspx?randomparam=thisisnotreal&id=5fa74e93-3d87-4694-b60e-aaa4012214ed&advance=true',
+ 'info_dict': {
+ 'id': '5fa74e93-3d87-4694-b60e-aaa4012214ed',
+ 'ext': 'mp4',
+ 'duration': 129.513,
+ 'cast': ['Kathryn Kelly'],
+ 'uploader_id': '316a0a58-7fa2-4cd9-be1c-64270d284a56',
+ 'timestamp': 1569845768,
+ 'tags': ['Viewer', 'Enterprise'],
+ 'upload_date': '20190930',
+ 'thumbnail': r're:https://howtovideos\.hosted\.panopto\.com/Panopto/Services/FrameGrabber.svc/FrameRedirect\?objectId=5fa74e93-3d87-4694-b60e-aaa4012214ed&mode=Delivery&random=[\d.]+',
+ 'description': 'md5:2d844aaa1b1a14ad0e2601a0993b431f',
+ 'title': 'Getting Started: View a Video',
+ 'average_rating': int,
+ 'uploader': 'Kathryn Kelly',
+ 'channel_id': 'fb93bc3c-6750-4b80-a05b-a921013735d3',
+ 'channel': 'Getting Started',
+ }
+ },
+ {
+ # Does not allow normal Viewer.aspx. AUDIO livestream has no url, so should be skipped and only give one stream.
+ 'url': 'https://unisa.au.panopto.com/Panopto/Pages/Embed.aspx?id=9d9a0fa3-e99a-4ebd-a281-aac2017f4da4',
+ 'info_dict': {
+ 'id': '9d9a0fa3-e99a-4ebd-a281-aac2017f4da4',
+ 'ext': 'mp4',
+ 'cast': ['LTS CLI Script'],
+ 'duration': 2178.45,
+ 'description': 'md5:ee5cf653919f55b72bce2dbcf829c9fa',
+ 'channel_id': 'b23e673f-c287-4cb1-8344-aae9005a69f8',
+ 'average_rating': int,
+ 'uploader_id': '38377323-6a23-41e2-9ff6-a8e8004bf6f7',
+ 'uploader': 'LTS CLI Script',
+ 'timestamp': 1572458134,
+ 'title': 'WW2 Vets Interview 3 Ronald Stanley George',
+ 'thumbnail': r're:https://unisa\.au\.panopto\.com/Panopto/Services/FrameGrabber.svc/FrameRedirect\?objectId=9d9a0fa3-e99a-4ebd-a281-aac2017f4da4&mode=Delivery&random=[\d.]+',
+ 'channel': 'World War II Veteran Interviews',
+ 'upload_date': '20191030',
+ },
+ },
+ {
+ 'url': 'https://ucc.cloud.panopto.eu/Panopto/Pages/Viewer.aspx?id=0e8484a4-4ceb-4d98-a63f-ac0200b455cb',
+ 'only_matching': True
+ },
+ {
+ 'url': 'https://brown.hosted.panopto.com/Panopto/Pages/Embed.aspx?id=0b3ff73b-36a0-46c5-8455-aadf010a3638',
+ 'only_matching': True
+ },
+ ]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if PanoptoPlaylistIE.suitable(url) else super().suitable(url)
+
+ def _mark_watched(self, base_url, video_id, delivery_info):
+ duration = traverse_obj(delivery_info, ('Delivery', 'Duration'), expected_type=float)
+ invocation_id = delivery_info.get('InvocationId')
+ stream_id = traverse_obj(delivery_info, ('Delivery', 'Streams', ..., 'PublicID'), get_all=False, expected_type=str)
+ if invocation_id and stream_id and duration:
+ timestamp_str = f'/Date({calendar.timegm(datetime.utcnow().timetuple())}000)/'
+ data = {
+ 'streamRequests': [
+ {
+ 'ClientTimeStamp': timestamp_str,
+ 'ID': 0,
+ 'InvocationID': invocation_id,
+ 'PlaybackSpeed': 1,
+ 'SecondsListened': duration - 1,
+ 'SecondsRejected': 0,
+ 'StartPosition': 0,
+ 'StartReason': 2,
+ 'StopReason': None,
+ 'StreamID': stream_id,
+ 'TimeStamp': timestamp_str,
+ 'UpdatesRejected': 0
+ },
+ ]}
+
+ self._download_webpage(
+ base_url + '/Services/Analytics.svc/AddStreamRequests', video_id,
+ fatal=False, data=json.dumps(data).encode('utf8'), headers={'content-type': 'application/json'},
+ note='Marking watched', errnote='Unable to mark watched')
+
+ @staticmethod
+ def _extract_chapters(delivery):
+ chapters = []
+ for timestamp in delivery.get('Timestamps', []):
+ start, duration = int_or_none(timestamp.get('Time')), int_or_none(timestamp.get('Duration'))
+ if start is None or duration is None:
+ continue
+ chapters.append({
+ 'start_time': start,
+ 'end_time': start + duration,
+ 'title': timestamp.get('Caption')
+ })
+ return chapters
+
+ def _extract_streams_formats_and_subtitles(self, video_id, streams, **fmt_kwargs):
+ formats = []
+ subtitles = {}
+ for stream in streams or []:
+ stream_formats = []
+ http_stream_url = stream.get('StreamHttpUrl')
+ stream_url = stream.get('StreamUrl')
+
+ if http_stream_url:
+ stream_formats.append({'url': http_stream_url})
+
+ if stream_url:
+ media_type = stream.get('ViewerMediaFileTypeName')
+ if media_type in ('hls', ):
+ m3u8_formats, stream_subtitles = self._extract_m3u8_formats_and_subtitles(stream_url, video_id)
+ stream_formats.extend(m3u8_formats)
+ subtitles = self._merge_subtitles(subtitles, stream_subtitles)
+ else:
+ stream_formats.append({
+ 'url': stream_url
+ })
+ for fmt in stream_formats:
+ fmt.update({
+ 'format_note': stream.get('Tag'),
+ **fmt_kwargs
+ })
+ formats.extend(stream_formats)
+
+ return formats, subtitles
+
+ def _real_extract(self, url):
+ base_url, video_id = self._match_valid_url(url).group('base_url', 'id')
+ delivery_info = self._call_api(
+ base_url, '/Pages/Viewer/DeliveryInfo.aspx', video_id,
+ query={
+ 'deliveryId': video_id,
+ 'invocationId': '',
+ 'isLiveNotes': 'false',
+ 'refreshAuthCookie': 'true',
+ 'isActiveBroadcast': 'false',
+ 'isEditing': 'false',
+ 'isKollectiveAgentInstalled': 'false',
+ 'isEmbed': 'false',
+ 'responseType': 'json',
+ }
+ )
+
+ delivery = delivery_info['Delivery']
+ session_start_time = int_or_none(delivery.get('SessionStartTime'))
+
+ # Podcast stream is usually the combined streams. We will prefer that by default.
+ podcast_formats, podcast_subtitles = self._extract_streams_formats_and_subtitles(
+ video_id, delivery.get('PodcastStreams'), format_note='PODCAST')
+
+ streams_formats, streams_subtitles = self._extract_streams_formats_and_subtitles(
+ video_id, delivery.get('Streams'), preference=-10)
+
+ formats = podcast_formats + streams_formats
+ subtitles = self._merge_subtitles(podcast_subtitles, streams_subtitles)
+ self._sort_formats(formats)
+
+ self.mark_watched(base_url, video_id, delivery_info)
+
+ return {
+ 'id': video_id,
+ 'title': delivery.get('SessionName'),
+ 'cast': traverse_obj(delivery, ('Contributors', ..., 'DisplayName'), default=[], expected_type=lambda x: x or None),
+ 'timestamp': session_start_time - 11640000000 if session_start_time else None,
+ 'duration': delivery.get('Duration'),
+ 'thumbnail': base_url + f'/Services/FrameGrabber.svc/FrameRedirect?objectId={video_id}&mode=Delivery&random={random()}',
+ 'average_rating': delivery.get('AverageRating'),
+ 'chapters': self._extract_chapters(delivery) or None,
+ 'uploader': delivery.get('OwnerDisplayName') or None,
+ 'uploader_id': delivery.get('OwnerId'),
+ 'description': delivery.get('SessionAbstract'),
+ 'tags': traverse_obj(delivery, ('Tags', ..., 'Content')),
+ 'channel_id': delivery.get('SessionGroupPublicID'),
+ 'channel': traverse_obj(delivery, 'SessionGroupLongName', 'SessionGroupShortName', get_all=False),
+ 'formats': formats,
+ 'subtitles': subtitles
+ }
+
+
+class PanoptoPlaylistIE(PanoptoBaseIE):
+ _VALID_URL = PanoptoBaseIE.BASE_URL_RE + r'/Pages/(Viewer|Embed)\.aspx.*(?:\?|&)pid=(?P<id>[a-f0-9-]+)'
+ _TESTS = [
+ {
+ 'url': 'https://howtovideos.hosted.panopto.com/Panopto/Pages/Viewer.aspx?pid=f3b39fcf-882f-4849-93d6-a9f401236d36&id=5fa74e93-3d87-4694-b60e-aaa4012214ed&advance=true',
+ 'info_dict': {
+ 'title': 'Featured Video Tutorials',
+ 'id': 'f3b39fcf-882f-4849-93d6-a9f401236d36',
+ 'description': '',
+ },
+ 'playlist_mincount': 36
+ },
+ {
+ 'url': 'https://utsa.hosted.panopto.com/Panopto/Pages/Viewer.aspx?pid=e2900555-3ad4-4bdb-854d-ad2401686190',
+ 'info_dict': {
+ 'title': 'Library Website Introduction Playlist',
+ 'id': 'e2900555-3ad4-4bdb-854d-ad2401686190',
+ 'description': 'md5:f958bca50a1cbda15fdc1e20d32b3ecb',
+ },
+ 'playlist_mincount': 4
+ },
+
+ ]
+
+ def _entries(self, base_url, playlist_id, session_list_id):
+ session_list_info = self._call_api(
+ base_url, f'/Api/SessionLists/{session_list_id}?collections[0].maxCount=500&collections[0].name=items', playlist_id)
+
+ items = session_list_info['Items']
+ for item in items:
+ if item.get('TypeName') != 'Session':
+ self.report_warning('Got an item in the playlist that is not a Session' + bug_reports_message(), only_once=True)
+ continue
+ yield {
+ '_type': 'url',
+ 'id': item.get('Id'),
+ 'url': item.get('ViewerUri'),
+ 'title': item.get('Name'),
+ 'description': item.get('Description'),
+ 'duration': item.get('Duration'),
+ 'channel': traverse_obj(item, ('Parent', 'Name')),
+ 'channel_id': traverse_obj(item, ('Parent', 'Id'))
+ }
+
+ def _real_extract(self, url):
+ base_url, playlist_id = self._match_valid_url(url).group('base_url', 'id')
+
+ video_id = get_first(parse_qs(url), 'id')
+ if video_id:
+ if self.get_param('noplaylist'):
+ self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
+ return self.url_result(base_url + f'/Pages/Viewer.aspx?id={video_id}', ie_key=PanoptoIE.ie_key(), video_id=video_id)
+ else:
+ self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')
+
+ playlist_info = self._call_api(base_url, f'/Api/Playlists/{playlist_id}', playlist_id)
+ return self.playlist_result(
+ self._entries(base_url, playlist_id, playlist_info['SessionListId']),
+ playlist_id=playlist_id, playlist_title=playlist_info.get('Name'),
+ playlist_description=playlist_info.get('Description'))
+
+
+class PanoptoListIE(PanoptoBaseIE):
+ _VALID_URL = PanoptoBaseIE.BASE_URL_RE + r'/Pages/Sessions/List\.aspx'
+ _PAGE_SIZE = 250
+ _TESTS = [
+ {
+ 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Sessions/List.aspx#folderID=%22e4c6a2fc-1214-4ca0-8fb7-aef2e29ff63a%22',
+ 'info_dict': {
+ 'id': 'e4c6a2fc-1214-4ca0-8fb7-aef2e29ff63a',
+ 'title': 'Showcase Videos'
+ },
+ 'playlist_mincount': 140
+
+ },
+ {
+ 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Sessions/List.aspx#view=2&maxResults=250',
+ 'info_dict': {
+ 'id': 'panopto_list',
+ 'title': 'panopto_list'
+ },
+ 'playlist_mincount': 300
+ },
+ {
+ # Folder that contains 8 folders and a playlist
+ 'url': 'https://howtovideos.hosted.panopto.com/Panopto/Pages/Sessions/List.aspx?noredirect=true#folderID=%224b9de7ae-0080-4158-8496-a9ba01692c2e%22',
+ 'info_dict': {
+ 'id': '4b9de7ae-0080-4158-8496-a9ba01692c2e',
+ 'title': 'Video Tutorials'
+ },
+ 'playlist_mincount': 9
+ }
+
+ ]
+
+ def _fetch_page(self, base_url, query_params, display_id, page):
+
+ params = {
+ 'sortColumn': 1,
+ 'getFolderData': True,
+ 'includePlaylists': True,
+ **query_params,
+ 'page': page,
+ 'maxResults': self._PAGE_SIZE,
+ }
+
+ response = self._call_api(
+ base_url, '/Services/Data.svc/GetSessions', f'{display_id} page {page+1}',
+ data={'queryParameters': params}, fatal=False)
+
+ for result in get_first(response, 'Results', default=[]):
+ # This could be a video, playlist (or maybe something else)
+ item_id = result.get('DeliveryID')
+ yield {
+ '_type': 'url',
+ 'id': item_id,
+ 'title': result.get('SessionName'),
+ 'url': traverse_obj(result, 'ViewerUrl', 'EmbedUrl', get_all=False) or (base_url + f'/Pages/Viewer.aspx?id={item_id}'),
+ 'duration': result.get('Duration'),
+ 'channel': result.get('FolderName'),
+ 'channel_id': result.get('FolderID'),
+ }
+
+ for folder in get_first(response, 'Subfolders', default=[]):
+ folder_id = folder.get('ID')
+ yield self.url_result(
+ base_url + f'/Pages/Sessions/List.aspx#folderID="{folder_id}"',
+ ie_key=PanoptoListIE.ie_key(), video_id=folder_id, title=folder.get('Name'))
+
+ def _extract_folder_metadata(self, base_url, folder_id):
+ response = self._call_api(
+ base_url, '/Services/Data.svc/GetFolderInfo', folder_id,
+ data={'folderID': folder_id}, fatal=False)
+ return {
+ 'title': get_first(response, 'Name', default=[])
+ }
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ base_url = mobj.group('base_url')
+
+ query_params = self._parse_fragment(url)
+ folder_id, display_id = query_params.get('folderID'), 'panopto_list'
+
+ if query_params.get('isSubscriptionsPage'):
+ display_id = 'subscriptions'
+ if not query_params.get('subscribableTypes'):
+ query_params['subscribableTypes'] = [0, 1, 2]
+ elif query_params.get('isSharedWithMe'):
+ display_id = 'sharedwithme'
+ elif folder_id:
+ display_id = folder_id
+
+ query = query_params.get('query')
+ if query:
+ display_id += f': query "{query}"'
+
+ info = {
+ '_type': 'playlist',
+ 'id': display_id,
+ 'title': display_id,
+ }
+ if folder_id:
+ info.update(self._extract_folder_metadata(base_url, folder_id))
+
+ info['entries'] = OnDemandPagedList(
+ functools.partial(self._fetch_page, base_url, query_params, display_id), self._PAGE_SIZE)
+
+ return info
diff --git a/yt_dlp/extractor/peertube.py b/yt_dlp/extractor/peertube.py
index e0b2ab982..9d6b82178 100644
--- a/yt_dlp/extractor/peertube.py
+++ b/yt_dlp/extractor/peertube.py
@@ -87,6 +87,7 @@ class PeerTubeIE(InfoExtractor):
maindreieck-tv\.de|
mani\.tube|
manicphase\.me|
+ media\.fsfe\.org|
media\.gzevd\.de|
media\.inno3\.cricket|
media\.kaitaia\.life|
diff --git a/yt_dlp/extractor/periscope.py b/yt_dlp/extractor/periscope.py
index b93a02b7d..1a292b8ac 100644
--- a/yt_dlp/extractor/periscope.py
+++ b/yt_dlp/extractor/periscope.py
@@ -33,7 +33,7 @@ class PeriscopeBaseIE(InfoExtractor):
return {
'id': broadcast.get('id') or video_id,
- 'title': self._live_title(title) if is_live else title,
+ 'title': title,
'timestamp': parse_iso8601(broadcast.get('created_at')),
'uploader': uploader,
'uploader_id': broadcast.get('user_id') or broadcast.get('username'),
diff --git a/yt_dlp/extractor/pokemon.py b/yt_dlp/extractor/pokemon.py
index 402b574a7..b411390e2 100644
--- a/yt_dlp/extractor/pokemon.py
+++ b/yt_dlp/extractor/pokemon.py
@@ -1,6 +1,7 @@
# coding: utf-8
from __future__ import unicode_literals
+import re
from .common import InfoExtractor
from ..utils import (
@@ -138,3 +139,42 @@ class PokemonWatchIE(InfoExtractor):
'episode': video_data.get('title'),
'episode_number': int_or_none(video_data.get('episode')),
})
+
+
+class PokemonSoundLibraryIE(InfoExtractor):
+ _VALID_URL = r'https?://soundlibrary\.pokemon\.co\.jp'
+
+ _TESTS = [{
+ 'url': 'https://soundlibrary.pokemon.co.jp/',
+ 'info_dict': {
+ 'title': 'Pokémon Diamond and Pearl Sound Tracks',
+ },
+ 'playlist_mincount': 149,
+ }]
+
+ def _real_extract(self, url):
+ musicbox_webpage = self._download_webpage(
+ 'https://soundlibrary.pokemon.co.jp/musicbox', None,
+ 'Downloading list of songs')
+ song_titles = [x.group(1) for x in re.finditer(r'<span>([^>]+?)</span><br/>をてもち曲に加えます。', musicbox_webpage)]
+ song_titles = song_titles[4::2]
+
+ # each songs don't have permalink; instead we return all songs at once
+ song_entries = [{
+ 'id': f'pokemon-soundlibrary-{song_id}',
+ 'url': f'https://soundlibrary.pokemon.co.jp/api/assets/signing/sounds/wav/{song_id}.wav',
+ # note: the server always serves MP3 files, despite its extension of the URL above
+ 'ext': 'mp3',
+ 'acodec': 'mp3',
+ 'vcodec': 'none',
+ 'title': song_title,
+ 'track': song_title,
+ 'artist': 'Nintendo / Creatures Inc. / GAME FREAK inc.',
+ 'uploader': 'Pokémon',
+ 'release_year': 2006,
+ 'release_date': '20060928',
+ 'track_number': song_id,
+ 'album': 'Pokémon Diamond and Pearl',
+ } for song_id, song_title in enumerate(song_titles, 1)]
+
+ return self.playlist_result(song_entries, playlist_title='Pokémon Diamond and Pearl Sound Tracks')
diff --git a/yt_dlp/extractor/rokfin.py b/yt_dlp/extractor/rokfin.py
index 79a5b2336..0fd65db4b 100644
--- a/yt_dlp/extractor/rokfin.py
+++ b/yt_dlp/extractor/rokfin.py
@@ -100,7 +100,7 @@ class RokfinIE(InfoExtractor):
video_url, video_id, fatal=False, live=live_status == 'is_live')
if not formats:
- if metadata.get('premiumPlan'):
+ if traverse_obj(metadata, 'premiumPlan', 'premium'):
self.raise_login_required('This video is only available to premium users', True, method='cookies')
elif scheduled:
self.raise_no_formats(
@@ -129,7 +129,7 @@ class RokfinIE(InfoExtractor):
'tags': traverse_obj(metadata, ('tags', ..., 'title'), expected_type=str_or_none),
'live_status': live_status,
'availability': self._availability(
- needs_premium=bool(metadata.get('premiumPlan')),
+ needs_premium=bool(traverse_obj(metadata, 'premiumPlan', 'premium')),
is_private=False, needs_subscription=False, needs_auth=False, is_unlisted=False),
# 'comment_count': metadata.get('numComments'), # Data provided by website is wrong
'__post_extractor': self.extract_comments(video_id) if video_type == 'post' else None,
diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py
index 8146b3ef5..64b8a71b6 100644
--- a/yt_dlp/extractor/soundcloud.py
+++ b/yt_dlp/extractor/soundcloud.py
@@ -59,8 +59,16 @@ class SoundcloudEmbedIE(InfoExtractor):
class SoundcloudBaseIE(InfoExtractor):
+ _NETRC_MACHINE = 'soundcloud'
+
_API_V2_BASE = 'https://api-v2.soundcloud.com/'
_BASE_URL = 'https://soundcloud.com/'
+ _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36'
+ _API_AUTH_QUERY_TEMPLATE = '?client_id=%s'
+ _API_AUTH_URL_PW = 'https://api-auth.soundcloud.com/web-auth/sign-in/password%s'
+ _API_VERIFY_AUTH_TOKEN = 'https://api-auth.soundcloud.com/connect/session%s'
+ _access_token = None
+ _HEADERS = {}
def _store_client_id(self, client_id):
self._downloader.cache.store('soundcloud', 'client_id', client_id)
@@ -103,14 +111,6 @@ class SoundcloudBaseIE(InfoExtractor):
self._CLIENT_ID = self._downloader.cache.load('soundcloud', 'client_id') or 'a3e059563d7fd3372b49b37f00a00bcf'
self._login()
- _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36'
- _API_AUTH_QUERY_TEMPLATE = '?client_id=%s'
- _API_AUTH_URL_PW = 'https://api-auth.soundcloud.com/web-auth/sign-in/password%s'
- _API_VERIFY_AUTH_TOKEN = 'https://api-auth.soundcloud.com/connect/session%s'
- _access_token = None
- _HEADERS = {}
- _NETRC_MACHINE = 'soundcloud'
-
def _login(self):
username, password = self._get_login_info()
if username is None:
diff --git a/yt_dlp/extractor/sovietscloset.py b/yt_dlp/extractor/sovietscloset.py
index daf1c7450..4bc2263f0 100644
--- a/yt_dlp/extractor/sovietscloset.py
+++ b/yt_dlp/extractor/sovietscloset.py
@@ -67,6 +67,7 @@ class SovietsClosetIE(SovietsClosetBaseIE):
'series': 'The Witcher',
'season': 'Misc',
'episode_number': 13,
+ 'episode': 'Episode 13',
},
},
{
@@ -92,6 +93,7 @@ class SovietsClosetIE(SovietsClosetBaseIE):
'series': 'Arma 3',
'season': 'Zeus Games',
'episode_number': 3,
+ 'episode': 'Episode 3',
},
},
]
diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py
index 620973a9f..56cc2dcc6 100644
--- a/yt_dlp/extractor/tiktok.py
+++ b/yt_dlp/extractor/tiktok.py
@@ -15,6 +15,7 @@ from ..compat import (
from ..utils import (
ExtractorError,
HEADRequest,
+ get_first,
int_or_none,
join_nonempty,
LazyList,
@@ -816,8 +817,7 @@ class DouyinIE(TikTokIE):
render_data = self._parse_json(
render_data_json, video_id, transform_source=compat_urllib_parse_unquote)
- return self._parse_aweme_video_web(
- traverse_obj(render_data, (..., 'aweme', 'detail'), get_all=False), url)
+ return self._parse_aweme_video_web(get_first(render_data, ('aweme', 'detail')), url)
class TikTokVMIE(InfoExtractor):
diff --git a/yt_dlp/extractor/xinpianchang.py b/yt_dlp/extractor/xinpianchang.py
new file mode 100644
index 000000000..9832d2398
--- /dev/null
+++ b/yt_dlp/extractor/xinpianchang.py
@@ -0,0 +1,95 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ try_get,
+ update_url_query,
+ url_or_none,
+)
+
+
+class XinpianchangIE(InfoExtractor):
+ _VALID_URL = r'https?://www\.xinpianchang\.com/(?P<id>[^/]+?)(?:\D|$)'
+ IE_NAME = 'xinpianchang'
+ IE_DESC = 'xinpianchang.com'
+ _TESTS = [{
+ 'url': 'https://www.xinpianchang.com/a11766551',
+ 'info_dict': {
+ 'id': 'a11766551',
+ 'ext': 'mp4',
+ 'title': '北京2022冬奥会闭幕式再见短片-冰墩墩下班了',
+ 'description': 'md5:4a730c10639a82190fabe921c0fa4b87',
+ 'duration': 151,
+ 'thumbnail': r're:^https?://oss-xpc0\.xpccdn\.com.+/assets/',
+ 'uploader': '正时文创',
+ 'uploader_id': 10357277,
+ 'categories': ['宣传片', '国家城市', '广告', '其他'],
+ 'keywords': ['北京冬奥会', '冰墩墩', '再见', '告别', '冰墩墩哭了', '感动', '闭幕式', '熄火']
+ },
+ }, {
+ 'url': 'https://www.xinpianchang.com/a11762904',
+ 'info_dict': {
+ 'id': 'a11762904',
+ 'ext': 'mp4',
+ 'title': '冬奥会决胜时刻《法国派出三只鸡?》',
+ 'description': 'md5:55cb139ef8f48f0c877932d1f196df8b',
+ 'duration': 136,
+ 'thumbnail': r're:^https?://oss-xpc0\.xpccdn\.com.+/assets/',
+ 'uploader': '精品动画',
+ 'uploader_id': 10858927,
+ 'categories': ['动画', '三维CG'],
+ 'keywords': ['France Télévisions', '法国3台', '蠢萌', '冬奥会']
+ },
+ }, {
+ 'url': 'https://www.xinpianchang.com/a11779743?from=IndexPick&part=%E7%BC%96%E8%BE%91%E7%B2%BE%E9%80%89&index=2',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id=video_id)
+ domain = self.find_value_with_regex(var='requireNewDomain', webpage=webpage)
+ vid = self.find_value_with_regex(var='vid', webpage=webpage)
+ app_key = self.find_value_with_regex(var='modeServerAppKey', webpage=webpage)
+ api = update_url_query(f'{domain}/mod/api/v2/media/{vid}', {'appKey': app_key})
+ data = self._download_json(api, video_id=video_id)['data']
+ formats, subtitles = [], {}
+ for k, v in data.get('resource').items():
+ if k in ('dash', 'hls'):
+ v_url = v.get('url')
+ if not v_url:
+ continue
+ if k == 'dash':
+ fmts, subs = self._extract_mpd_formats_and_subtitles(v_url, video_id=video_id)
+ elif k == 'hls':
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(v_url, video_id=video_id)
+ formats.extend(fmts)
+ subtitles = self._merge_subtitles(subtitles, subs)
+ elif k == 'progressive':
+ formats.extend([{
+ 'url': url_or_none(prog.get('url')),
+ 'width': int_or_none(prog.get('width')),
+ 'height': int_or_none(prog.get('height')),
+ 'ext': 'mp4',
+ } for prog in v if prog.get('url') or []])
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': data.get('title'),
+ 'description': data.get('description'),
+ 'duration': int_or_none(data.get('duration')),
+ 'categories': data.get('categories'),
+ 'keywords': data.get('keywords'),
+ 'thumbnail': data.get('cover'),
+ 'uploader': try_get(data, lambda x: x['owner']['username']),
+ 'uploader_id': try_get(data, lambda x: x['owner']['id']),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+ def find_value_with_regex(self, var, webpage):
+ return self._search_regex(rf'var\s{var}\s=\s\"(?P<vid>[^\"]+)\"', webpage, name=var)
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index ee0277fd7..66bb8d9f0 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -39,6 +39,7 @@ from ..utils import (
ExtractorError,
float_or_none,
format_field,
+ get_first,
int_or_none,
is_html,
join_nonempty,
@@ -72,10 +73,6 @@ from ..utils import (
)
-def get_first(obj, keys, **kwargs):
- return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
-
-
# any clients starting with _ cannot be explicity requested by the user
INNERTUBE_CLIENTS = {
'web': {
@@ -2081,7 +2078,93 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'age_limit': 0,
'channel_follower_count': int
}, 'params': {'format': 'mhtml', 'skip_download': True}
- }
+ }, {
+ # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
+ 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
+ 'info_dict': {
+ 'id': '2NUZ8W2llS4',
+ 'ext': 'mp4',
+ 'title': 'The NP that test your phone performance 🙂',
+ 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
+ 'uploader': 'Leon Nguyen',
+ 'uploader_id': 'VNSXIII',
+ 'uploader_url': 'http://www.youtube.com/user/VNSXIII',
+ 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
+ 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
+ 'duration': 21,
+ 'view_count': int,
+ 'age_limit': 0,
+ 'categories': ['Gaming'],
+ 'tags': 'count:23',
+ 'playable_in_embed': True,
+ 'live_status': 'not_live',
+ 'upload_date': '20220103',
+ 'like_count': int,
+ 'availability': 'public',
+ 'channel': 'Leon Nguyen',
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
+ 'channel_follower_count': int
+ }
+ }, {
+ # date text is premiered video, ensure upload date in UTC (published 1641172509)
+ 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
+ 'info_dict': {
+ 'id': 'mzZzzBU6lrM',
+ 'ext': 'mp4',
+ 'title': 'I Met GeorgeNotFound In Real Life...',
+ 'description': 'md5:cca98a355c7184e750f711f3a1b22c84',
+ 'uploader': 'Quackity',
+ 'uploader_id': 'QuackityHQ',
+ 'uploader_url': 'http://www.youtube.com/user/QuackityHQ',
+ 'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',
+ 'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',
+ 'duration': 955,
+ 'view_count': int,
+ 'age_limit': 0,
+ 'categories': ['Entertainment'],
+ 'tags': 'count:26',
+ 'playable_in_embed': True,
+ 'live_status': 'not_live',
+ 'release_timestamp': 1641172509,
+ 'release_date': '20220103',
+ 'upload_date': '20220103',
+ 'like_count': int,
+ 'availability': 'public',
+ 'channel': 'Quackity',
+ 'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',
+ 'channel_follower_count': int
+ }
+ },
+ { # continuous livestream. Microformat upload date should be preferred.
+ # Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27
+ 'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',
+ 'info_dict': {
+ 'id': 'kgx4WGK0oNU',
+ 'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
+ 'ext': 'mp4',
+ 'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',
+ 'availability': 'public',
+ 'age_limit': 0,
+ 'release_timestamp': 1637975704,
+ 'upload_date': '20210619',
+ 'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
+ 'live_status': 'is_live',
+ 'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',
+ 'uploader': '阿鲍Abao',
+ 'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
+ 'channel': 'Abao in Tokyo',
+ 'channel_follower_count': int,
+ 'release_date': '20211127',
+ 'tags': 'count:39',
+ 'categories': ['People & Blogs'],
+ 'like_count': int,
+ 'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',
+ 'view_count': int,
+ 'playable_in_embed': True,
+ 'description': 'md5:2ef1d002cad520f65825346e2084e49d',
+ },
+ 'params': {'skip_download': True}
+ },
]
@classmethod
@@ -3008,6 +3091,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# Some formats may have much smaller duration than others (possibly damaged during encoding)
# Eg: 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) < approx_duration - 10000)
+ if is_damaged:
+ self.report_warning(f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
dct = {
'asr': int_or_none(fmt.get('audioSampleRate')),
'filesize': int_or_none(fmt.get('contentLength')),
@@ -3027,7 +3112,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'language': join_nonempty(audio_track.get('id', '').split('.')[0],
'desc' if language_preference < -1 else ''),
'language_preference': language_preference,
- 'preference': -10 if is_damaged else None,
+ # Strictly de-prioritize damaged and 3gp formats
+ 'preference': -10 if is_damaged else -2 if itag == '17' else None,
}
mime_mobj = re.match(
r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
@@ -3336,9 +3422,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# URL checking if user don't care about getting the best possible thumbnail
'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
'description': video_description,
- 'upload_date': unified_strdate(
- get_first(microformats, 'uploadDate')
- or search_meta('uploadDate')),
'uploader': get_first(video_details, 'author'),
'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
'uploader_url': owner_profile_url,
@@ -3489,6 +3572,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
for content in contents:
vpir = content.get('videoPrimaryInfoRenderer')
if vpir:
+ info['upload_date'] = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d')
stl = vpir.get('superTitleLink')
if stl:
stl = self._get_text(stl)
@@ -3567,6 +3651,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'channel_id': 'uploader_id',
'channel_url': 'uploader_url',
}
+
+ # The upload date for scheduled and current live streams / premieres in microformats
+ # is generally the true upload date. Although not in UTC, we will prefer that in this case.
+ # Note this changes to the published date when the stream/premiere has finished.
+ # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139
+ if not info.get('upload_date') or info.get('is_live') or info.get('live_status') == 'is_upcoming':
+ info['upload_date'] = (
+ unified_strdate(get_first(microformats, 'uploadDate'))
+ or unified_strdate(search_meta('uploadDate'))
+ or info.get('upload_date'))
+
for to, frm in fallbacks.items():
if not info.get(to):
info[to] = info.get(frm)
diff --git a/yt_dlp/extractor/zingmp3.py b/yt_dlp/extractor/zingmp3.py
index f84ba5cff..419bf30d8 100644
--- a/yt_dlp/extractor/zingmp3.py
+++ b/yt_dlp/extractor/zingmp3.py
@@ -9,7 +9,6 @@ from .common import InfoExtractor
from ..utils import (
int_or_none,
traverse_obj,
- HEADRequest,
)
@@ -106,18 +105,17 @@ class ZingMp3BaseIE(InfoExtractor):
def _real_initialize(self):
if not self.get_param('cookiefile') and not self.get_param('cookiesfrombrowser'):
- self._request_webpage(HEADRequest(self._DOMAIN), None, note='Updating cookies')
+ self._request_webpage(self.get_api_with_signature(name_api=self._SLUG_API['bai-hat'], param={'id': ''}),
+ None, note='Updating cookies')
def _real_extract(self, url):
song_id, type_url = self._match_valid_url(url).group('id', 'type')
-
api = self.get_api_with_signature(name_api=self._SLUG_API[type_url], param={'id': song_id})
-
return self._process_data(self._download_json(api, song_id)['data'], song_id, type_url)
def get_api_with_signature(self, name_api, param):
- sha256 = hashlib.sha256(''.join(f'{k}={v}' for k, v in param.items()).encode('utf-8')).hexdigest()
-
+ param.update({'ctime': '1'})
+ sha256 = hashlib.sha256(''.join(f'{i}={param[i]}' for i in sorted(param)).encode('utf-8')).hexdigest()
data = {
'apiKey': self._API_KEY,
'sig': hmac.new(self._SECRET_KEY, f'{name_api}{sha256}'.encode('utf-8'), hashlib.sha512).hexdigest(),
@@ -149,7 +147,7 @@ class ZingMp3IE(ZingMp3BaseIE):
},
}, {
'url': 'https://zingmp3.vn/video-clip/Suong-Hoa-Dua-Loi-K-ICM-RYO/ZO8ZF7C7.html',
- 'md5': 'e9c972b693aa88301ef981c8151c4343',
+ 'md5': 'c7f23d971ac1a4f675456ed13c9b9612',
'info_dict': {
'id': 'ZO8ZF7C7',
'title': 'Sương Hoa Đưa Lối',
@@ -158,6 +156,22 @@ class ZingMp3IE(ZingMp3BaseIE):
'duration': 207,
'track': 'Sương Hoa Đưa Lối',
'artist': 'K-ICM, RYO',
+ 'album': 'Sương Hoa Đưa Lối (Single)',
+ 'album_artist': 'K-ICM, RYO',
+ },
+ }, {
+ 'url': 'https://zingmp3.vn/bai-hat/Nguoi-Yeu-Toi-Lanh-Lung-Sat-Da-Mr-Siro/ZZ6IW7OU.html',
+ 'md5': '3e9f7a9bd0d965573dbff8d7c68b629d',
+ 'info_dict': {
+ 'id': 'ZZ6IW7OU',
+ 'title': 'Người Yêu Tôi Lạnh Lùng Sắt Đá',
+ 'ext': 'mp3',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'duration': 303,
+ 'track': 'Người Yêu Tôi Lạnh Lùng Sắt Đá',
+ 'artist': 'Mr. Siro',
+ 'album': 'Người Yêu Tôi Lạnh Lùng Sắt Đá (Single)',
+ 'album_artist': 'Mr. Siro',
},
}, {
'url': 'https://zingmp3.vn/embed/song/ZWZEI76B?start=false',
@@ -184,6 +198,14 @@ class ZingMp3AlbumIE(ZingMp3BaseIE):
},
'playlist_count': 9,
}, {
+ 'url': 'https://zingmp3.vn/album/Nhung-Bai-Hat-Hay-Nhat-Cua-Mr-Siro-Mr-Siro/ZWZAEZZD.html',
+ 'info_dict': {
+ '_type': 'playlist',
+ 'id': 'ZWZAEZZD',
+ 'title': 'Những Bài Hát Hay Nhất Của Mr. Siro',
+ },
+ 'playlist_count': 49,
+ }, {
'url': 'http://mp3.zing.vn/playlist/Duong-Hong-Loan-apollobee/IWCAACCB.html',
'only_matching': True,
}, {