diff options
author | Jesús <heckyel@hyperbola.info> | 2022-05-17 10:10:39 +0800 |
---|---|---|
committer | Jesús <heckyel@hyperbola.info> | 2022-05-17 10:10:39 +0800 |
commit | 4bbf329feb5a820ac21269fa426c95ca14d7af25 (patch) | |
tree | 2c147a162b4bddc7862ed5895f1f66edd9a675e8 /yt_dlp/extractor | |
parent | e21342911839b7796a5c788a7c3f13b06d975c64 (diff) | |
parent | 5faf6528fb701724ac32e0a487f92281c7800bda (diff) | |
download | hypervideo-pre-4bbf329feb5a820ac21269fa426c95ca14d7af25.tar.lz hypervideo-pre-4bbf329feb5a820ac21269fa426c95ca14d7af25.tar.xz hypervideo-pre-4bbf329feb5a820ac21269fa426c95ca14d7af25.zip |
updated from upstream | 17/05/2022 at 10:10
Diffstat (limited to 'yt_dlp/extractor')
938 files changed, 2853 insertions, 4501 deletions
diff --git a/yt_dlp/extractor/__init__.py b/yt_dlp/extractor/__init__.py index b35484246..afd3d05ac 100644 --- a/yt_dlp/extractor/__init__.py +++ b/yt_dlp/extractor/__init__.py @@ -1,24 +1,23 @@ +import contextlib import os from ..utils import load_plugins _LAZY_LOADER = False if not os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'): - try: - from .lazy_extractors import * + with contextlib.suppress(ImportError): + from .lazy_extractors import * # noqa: F403 from .lazy_extractors import _ALL_CLASSES _LAZY_LOADER = True - except ImportError: - pass if not _LAZY_LOADER: - from .extractors import * - _ALL_CLASSES = [ + from .extractors import * # noqa: F403 + _ALL_CLASSES = [ # noqa: F811 klass for name, klass in globals().items() if name.endswith('IE') and name != 'GenericIE' ] - _ALL_CLASSES.append(GenericIE) + _ALL_CLASSES.append(GenericIE) # noqa: F405 _PLUGIN_CLASSES = load_plugins('extractor', 'IE', globals()) _ALL_CLASSES = list(_PLUGIN_CLASSES.values()) + _ALL_CLASSES @@ -38,15 +37,17 @@ def gen_extractors(): return [klass() for klass in gen_extractor_classes()] -def list_extractors(age_limit): - """ - Return a list of extractors that are suitable for the given age, - sorted by extractor ID. - """ +def list_extractor_classes(age_limit=None): + """Return a list of extractors that are suitable for the given age, sorted by extractor name""" + yield from sorted(filter( + lambda ie: ie.is_suitable(age_limit) and ie != GenericIE, # noqa: F405 + gen_extractor_classes()), key=lambda ie: ie.IE_NAME.lower()) + yield GenericIE # noqa: F405 + - return sorted( - filter(lambda ie: ie.is_suitable(age_limit), gen_extractors()), - key=lambda ie: ie.IE_NAME.lower()) +def list_extractors(age_limit=None): + """Return a list of extractor instances that are suitable for the given age, sorted by extractor name""" + return [ie() for ie in list_extractor_classes(age_limit)] def get_info_extractor(ie_name): diff --git a/yt_dlp/extractor/abc.py b/yt_dlp/extractor/abc.py index 6fe195e82..03f10ab23 100644 --- a/yt_dlp/extractor/abc.py +++ b/yt_dlp/extractor/abc.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import hashlib import hmac import re diff --git a/yt_dlp/extractor/abcnews.py b/yt_dlp/extractor/abcnews.py index 296b8cec1..a57295b13 100644 --- a/yt_dlp/extractor/abcnews.py +++ b/yt_dlp/extractor/abcnews.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .amp import AMPIE from .common import InfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/abcotvs.py b/yt_dlp/extractor/abcotvs.py index 5bff46634..44a9f8ca5 100644 --- a/yt_dlp/extractor/abcotvs.py +++ b/yt_dlp/extractor/abcotvs.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/abematv.py b/yt_dlp/extractor/abematv.py index a839f0c1f..1b9deeae8 100644 --- a/yt_dlp/extractor/abematv.py +++ b/yt_dlp/extractor/abematv.py @@ -1,37 +1,34 @@ -import io -import json -import time +import base64 +import binascii import hashlib import hmac +import io +import json import re import struct -from base64 import urlsafe_b64encode -from binascii import unhexlify +import time +import urllib.response +import uuid from .common import InfoExtractor from ..aes import aes_ecb_decrypt -from ..compat import ( - compat_urllib_response, - compat_urllib_parse_urlparse, - compat_urllib_request, -) +from ..compat import compat_urllib_parse_urlparse, compat_urllib_request from ..utils import ( ExtractorError, + bytes_to_intlist, decode_base, int_or_none, - random_uuidv4, + intlist_to_bytes, request_to_url, time_seconds, - update_url_query, traverse_obj, - intlist_to_bytes, - bytes_to_intlist, + update_url_query, urljoin, ) - # NOTE: network handler related code is temporary thing until network stack overhaul PRs are merged (#2861/#2862) + def add_opener(ydl, handler): ''' Add a handler for opening URLs, like _download_webpage ''' # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426 @@ -130,7 +127,7 @@ class AbemaLicenseHandler(compat_urllib_request.BaseHandler): encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff)) h = hmac.new( - unhexlify(self.HKEY), + binascii.unhexlify(self.HKEY), (license_response['cid'] + self.ie._DEVICE_ID).encode('utf-8'), digestmod=hashlib.sha256) enckey = bytes_to_intlist(h.digest()) @@ -141,7 +138,7 @@ class AbemaLicenseHandler(compat_urllib_request.BaseHandler): url = request_to_url(url) ticket = compat_urllib_parse_urlparse(url).netloc response_data = self._get_videokey_from_ticket(ticket) - return compat_urllib_response.addinfourl(io.BytesIO(response_data), headers={ + return urllib.response.addinfourl(io.BytesIO(response_data), headers={ 'Content-Length': len(response_data), }, url=url, code=200) @@ -238,7 +235,7 @@ class AbemaTVIE(AbemaTVBaseIE): def mix_twist(nonce): nonlocal tmp - mix_once(urlsafe_b64encode(tmp).rstrip(b'=') + nonce) + mix_once(base64.urlsafe_b64encode(tmp).rstrip(b'=') + nonce) mix_once(self._SECRETKEY) mix_tmp(time_struct.tm_mon) @@ -247,13 +244,13 @@ class AbemaTVIE(AbemaTVBaseIE): mix_twist(ts_1hour_str) mix_tmp(time_struct.tm_hour % 5) - return urlsafe_b64encode(tmp).rstrip(b'=').decode('utf-8') + return base64.urlsafe_b64encode(tmp).rstrip(b'=').decode('utf-8') def _get_device_token(self): if self._USERTOKEN: return self._USERTOKEN - self._DEVICE_ID = random_uuidv4() + self._DEVICE_ID = str(uuid.uuid4()) aks = self._generate_aks(self._DEVICE_ID) user_data = self._download_json( 'https://api.abema.io/v1/users', None, note='Authorizing', diff --git a/yt_dlp/extractor/academicearth.py b/yt_dlp/extractor/academicearth.py index 34095501c..d9691cb5c 100644 --- a/yt_dlp/extractor/academicearth.py +++ b/yt_dlp/extractor/academicearth.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/acast.py b/yt_dlp/extractor/acast.py index 63587c5cf..f2f828f8e 100644 --- a/yt_dlp/extractor/acast.py +++ b/yt_dlp/extractor/acast.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/adn.py b/yt_dlp/extractor/adn.py index fca6e605d..18ddc5729 100644 --- a/yt_dlp/extractor/adn.py +++ b/yt_dlp/extractor/adn.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import base64 import binascii import json @@ -85,7 +82,7 @@ class ADNIE(InfoExtractor): # http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js dec_subtitles = unpad_pkcs7(aes_cbc_decrypt_bytes( compat_b64decode(enc_subtitles[24:]), - binascii.unhexlify(self._K + 'ab9f52f5baae7c72'), + binascii.unhexlify(self._K + '7fac1178830cfe0c'), compat_b64decode(enc_subtitles[:24]))) subtitles_json = self._parse_json(dec_subtitles.decode(), None, fatal=False) if not subtitles_json: diff --git a/yt_dlp/extractor/adobeconnect.py b/yt_dlp/extractor/adobeconnect.py index e2e6f93f3..8963b128a 100644 --- a/yt_dlp/extractor/adobeconnect.py +++ b/yt_dlp/extractor/adobeconnect.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_parse_qs, diff --git a/yt_dlp/extractor/adobepass.py b/yt_dlp/extractor/adobepass.py index 5d98301b8..a8e6c4363 100644 --- a/yt_dlp/extractor/adobepass.py +++ b/yt_dlp/extractor/adobepass.py @@ -1,14 +1,11 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import re import time +import urllib.error import xml.etree.ElementTree as etree from .common import InfoExtractor from ..compat import ( - compat_kwargs, compat_urlparse, compat_getpass ) @@ -1365,7 +1362,7 @@ class AdobePassIE(InfoExtractor): headers.update(kwargs.get('headers', {})) kwargs['headers'] = headers return super(AdobePassIE, self)._download_webpage_handle( - *args, **compat_kwargs(kwargs)) + *args, **kwargs) @staticmethod def _get_mvpd_resource(provider_id, title, guid, rating): @@ -1439,27 +1436,29 @@ class AdobePassIE(InfoExtractor): if authn_token and is_expired(authn_token, 'simpleTokenExpires'): authn_token = None if not authn_token: - # TODO add support for other TV Providers mso_id = self.get_param('ap_mso') - if not mso_id: - raise_mvpd_required() - username, password = self._get_login_info('ap_username', 'ap_password', mso_id) - if not username or not password: - raise_mvpd_required() - mso_info = MSO_INFO[mso_id] + if mso_id: + username, password = self._get_login_info('ap_username', 'ap_password', mso_id) + if not username or not password: + raise_mvpd_required() + mso_info = MSO_INFO[mso_id] - provider_redirect_page_res = self._download_webpage_handle( - self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id, - 'Downloading Provider Redirect Page', query={ - 'noflash': 'true', - 'mso_id': mso_id, - 'requestor_id': requestor_id, - 'no_iframe': 'false', - 'domain_name': 'adobe.com', - 'redirect_url': url, - }) + provider_redirect_page_res = self._download_webpage_handle( + self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id, + 'Downloading Provider Redirect Page', query={ + 'noflash': 'true', + 'mso_id': mso_id, + 'requestor_id': requestor_id, + 'no_iframe': 'false', + 'domain_name': 'adobe.com', + 'redirect_url': url, + }) + elif not self._cookies_passed: + raise_mvpd_required() - if mso_id == 'Comcast_SSO': + if not mso_id: + pass + elif mso_id == 'Comcast_SSO': # Comcast page flow varies by video site and whether you # are on Comcast's network. provider_redirect_page, urlh = provider_redirect_page_res @@ -1715,12 +1714,17 @@ class AdobePassIE(InfoExtractor): if mso_id != 'Rogers': post_form(mvpd_confirm_page_res, 'Confirming Login') - session = self._download_webpage( - self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id, - 'Retrieving Session', data=urlencode_postdata({ - '_method': 'GET', - 'requestor_id': requestor_id, - }), headers=mvpd_headers) + try: + session = self._download_webpage( + self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id, + 'Retrieving Session', data=urlencode_postdata({ + '_method': 'GET', + 'requestor_id': requestor_id, + }), headers=mvpd_headers) + except ExtractorError as e: + if not mso_id and isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 401: + raise_mvpd_required() + raise if '<pendingLogout' in session: self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {}) count += 1 diff --git a/yt_dlp/extractor/adobetv.py b/yt_dlp/extractor/adobetv.py index 3cfa1ff55..941254243 100644 --- a/yt_dlp/extractor/adobetv.py +++ b/yt_dlp/extractor/adobetv.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import functools import re diff --git a/yt_dlp/extractor/adultswim.py b/yt_dlp/extractor/adultswim.py index c97cfc161..1368954bc 100644 --- a/yt_dlp/extractor/adultswim.py +++ b/yt_dlp/extractor/adultswim.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .turner import TurnerBaseIE diff --git a/yt_dlp/extractor/aenetworks.py b/yt_dlp/extractor/aenetworks.py index 8025de5a3..86a10f2dc 100644 --- a/yt_dlp/extractor/aenetworks.py +++ b/yt_dlp/extractor/aenetworks.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .theplatform import ThePlatformIE from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/afreecatv.py b/yt_dlp/extractor/afreecatv.py index 28946e9dd..b0fd158f6 100644 --- a/yt_dlp/extractor/afreecatv.py +++ b/yt_dlp/extractor/afreecatv.py @@ -1,11 +1,7 @@ -# coding: utf-8 -from __future__ import unicode_literals - import functools import re from .common import InfoExtractor -from ..compat import compat_xpath from ..utils import ( ExtractorError, OnDemandPagedList, @@ -282,7 +278,7 @@ class AfreecaTVIE(InfoExtractor): else: raise ExtractorError('Unable to download video info') - video_element = video_xml.findall(compat_xpath('./track/video'))[-1] + video_element = video_xml.findall('./track/video')[-1] if video_element is None or video_element.text is None: raise ExtractorError( 'Video %s does not exist' % video_id, expected=True) @@ -312,7 +308,7 @@ class AfreecaTVIE(InfoExtractor): if not video_url: entries = [] - file_elements = video_element.findall(compat_xpath('./file')) + file_elements = video_element.findall('./file') one = len(file_elements) == 1 for file_num, file_element in enumerate(file_elements, start=1): file_url = url_or_none(file_element.text) diff --git a/yt_dlp/extractor/airmozilla.py b/yt_dlp/extractor/airmozilla.py index 9e38136b4..669556b98 100644 --- a/yt_dlp/extractor/airmozilla.py +++ b/yt_dlp/extractor/airmozilla.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/aliexpress.py b/yt_dlp/extractor/aliexpress.py index 9722fe9ac..2e83f2eb6 100644 --- a/yt_dlp/extractor/aliexpress.py +++ b/yt_dlp/extractor/aliexpress.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/aljazeera.py b/yt_dlp/extractor/aljazeera.py index 7bcdb7afb..124bab0d9 100644 --- a/yt_dlp/extractor/aljazeera.py +++ b/yt_dlp/extractor/aljazeera.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/allocine.py b/yt_dlp/extractor/allocine.py index 403a277e9..1f881e2a0 100644 --- a/yt_dlp/extractor/allocine.py +++ b/yt_dlp/extractor/allocine.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/alphaporno.py b/yt_dlp/extractor/alphaporno.py index 3a6d99f6b..8d5b472d3 100644 --- a/yt_dlp/extractor/alphaporno.py +++ b/yt_dlp/extractor/alphaporno.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( parse_iso8601, diff --git a/yt_dlp/extractor/alsace20tv.py b/yt_dlp/extractor/alsace20tv.py index 4aae6fe74..d16ab496e 100644 --- a/yt_dlp/extractor/alsace20tv.py +++ b/yt_dlp/extractor/alsace20tv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/alura.py b/yt_dlp/extractor/alura.py index d2e2df270..b76ccb2a1 100644 --- a/yt_dlp/extractor/alura.py +++ b/yt_dlp/extractor/alura.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/amara.py b/yt_dlp/extractor/amara.py index 61d469574..5018710e0 100644 --- a/yt_dlp/extractor/amara.py +++ b/yt_dlp/extractor/amara.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from .youtube import YoutubeIE from .vimeo import VimeoIE diff --git a/yt_dlp/extractor/amazon.py b/yt_dlp/extractor/amazon.py index 07b1b1861..de4917adc 100644 --- a/yt_dlp/extractor/amazon.py +++ b/yt_dlp/extractor/amazon.py @@ -1,4 +1,3 @@ -# coding: utf-8 from .common import InfoExtractor from ..utils import int_or_none diff --git a/yt_dlp/extractor/amcnetworks.py b/yt_dlp/extractor/amcnetworks.py index e38e215d3..e04ecf65f 100644 --- a/yt_dlp/extractor/amcnetworks.py +++ b/yt_dlp/extractor/amcnetworks.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .theplatform import ThePlatformIE diff --git a/yt_dlp/extractor/americastestkitchen.py b/yt_dlp/extractor/americastestkitchen.py index 6e6099a03..f5747cf1e 100644 --- a/yt_dlp/extractor/americastestkitchen.py +++ b/yt_dlp/extractor/americastestkitchen.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/amp.py b/yt_dlp/extractor/amp.py index 24c684cad..73b72b085 100644 --- a/yt_dlp/extractor/amp.py +++ b/yt_dlp/extractor/amp.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/animelab.py b/yt_dlp/extractor/animelab.py index 1c2cc47dd..fe2b70aed 100644 --- a/yt_dlp/extractor/animelab.py +++ b/yt_dlp/extractor/animelab.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( @@ -56,11 +53,6 @@ class AnimeLabBaseIE(InfoExtractor): class AnimeLabIE(AnimeLabBaseIE): _VALID_URL = r'https?://(?:www\.)?animelab\.com/player/(?P<id>[^/]+)' - # the following tests require authentication, but a free account will suffice - # just set 'usenetrc' to true in test/local_parameters.json if you use a .netrc file - # or you can set 'username' and 'password' there - # the tests also select a specific format so that the same video is downloaded - # regardless of whether the user is premium or not (needs testing on a premium account) _TEST = { 'url': 'https://www.animelab.com/player/fullmetal-alchemist-brotherhood-episode-42', 'md5': '05bde4b91a5d1ff46ef5b94df05b0f7f', @@ -79,9 +71,9 @@ class AnimeLabIE(AnimeLabBaseIE): 'season_id': '38', }, 'params': { + # Ensure the same video is downloaded whether the user is premium or not 'format': '[format_id=21711_yeshardsubbed_ja-JP][height=480]', }, - 'skip': 'All AnimeLab content requires authentication', } def _real_extract(self, url): diff --git a/yt_dlp/extractor/animeondemand.py b/yt_dlp/extractor/animeondemand.py index 2e674d58f..de49db4ea 100644 --- a/yt_dlp/extractor/animeondemand.py +++ b/yt_dlp/extractor/animeondemand.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/ant1newsgr.py b/yt_dlp/extractor/ant1newsgr.py index 1075b461e..cd0f36856 100644 --- a/yt_dlp/extractor/ant1newsgr.py +++ b/yt_dlp/extractor/ant1newsgr.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re import urllib.parse diff --git a/yt_dlp/extractor/anvato.py b/yt_dlp/extractor/anvato.py index 0d444fc33..09dfffdb0 100644 --- a/yt_dlp/extractor/anvato.py +++ b/yt_dlp/extractor/anvato.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import base64 import hashlib import json @@ -8,6 +5,7 @@ import random import re import time +from .anvato_token_generator import NFLTokenGenerator from .common import InfoExtractor from ..aes import aes_encrypt from ..compat import compat_str @@ -22,16 +20,6 @@ from ..utils import ( unsmuggle_url, ) -# This import causes a ModuleNotFoundError on some systems for unknown reason. -# See issues: -# https://github.com/yt-dlp/yt-dlp/issues/35 -# https://github.com/ytdl-org/youtube-dl/issues/27449 -# https://github.com/animelover1984/youtube-dl/issues/17 -try: - from .anvato_token_generator import NFLTokenGenerator -except ImportError: - NFLTokenGenerator = None - def md5_text(s): if not isinstance(s, compat_str): diff --git a/yt_dlp/extractor/anvato_token_generator/__init__.py b/yt_dlp/extractor/anvato_token_generator/__init__.py index 6e223db9f..6530caf53 100644 --- a/yt_dlp/extractor/anvato_token_generator/__init__.py +++ b/yt_dlp/extractor/anvato_token_generator/__init__.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .nfl import NFLTokenGenerator __all__ = [ diff --git a/yt_dlp/extractor/anvato_token_generator/common.py b/yt_dlp/extractor/anvato_token_generator/common.py index b959a903b..3800b5808 100644 --- a/yt_dlp/extractor/anvato_token_generator/common.py +++ b/yt_dlp/extractor/anvato_token_generator/common.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - class TokenGenerator: def generate(self, anvack, mcp_id): raise NotImplementedError('This method must be implemented by subclasses') diff --git a/yt_dlp/extractor/anvato_token_generator/nfl.py b/yt_dlp/extractor/anvato_token_generator/nfl.py index 97a2b245f..9ee4aa002 100644 --- a/yt_dlp/extractor/anvato_token_generator/nfl.py +++ b/yt_dlp/extractor/anvato_token_generator/nfl.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json from .common import TokenGenerator diff --git a/yt_dlp/extractor/aol.py b/yt_dlp/extractor/aol.py index 4766a2c77..b67db2adc 100644 --- a/yt_dlp/extractor/aol.py +++ b/yt_dlp/extractor/aol.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .yahoo import YahooIE diff --git a/yt_dlp/extractor/apa.py b/yt_dlp/extractor/apa.py index 1736cdf56..847be6edf 100644 --- a/yt_dlp/extractor/apa.py +++ b/yt_dlp/extractor/apa.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/aparat.py b/yt_dlp/extractor/aparat.py index 1057233cf..cd6cd1c79 100644 --- a/yt_dlp/extractor/aparat.py +++ b/yt_dlp/extractor/aparat.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( get_element_by_id, diff --git a/yt_dlp/extractor/appleconnect.py b/yt_dlp/extractor/appleconnect.py index 494f8330c..d00b0f906 100644 --- a/yt_dlp/extractor/appleconnect.py +++ b/yt_dlp/extractor/appleconnect.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( str_to_int, diff --git a/yt_dlp/extractor/applepodcasts.py b/yt_dlp/extractor/applepodcasts.py index 9139ff777..49bbeab82 100644 --- a/yt_dlp/extractor/applepodcasts.py +++ b/yt_dlp/extractor/applepodcasts.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/appletrailers.py b/yt_dlp/extractor/appletrailers.py index 8140e332b..6b63f070d 100644 --- a/yt_dlp/extractor/appletrailers.py +++ b/yt_dlp/extractor/appletrailers.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re import json diff --git a/yt_dlp/extractor/archiveorg.py b/yt_dlp/extractor/archiveorg.py index 2ab3c1beb..c85d5297d 100644 --- a/yt_dlp/extractor/archiveorg.py +++ b/yt_dlp/extractor/archiveorg.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re import json from .common import InfoExtractor @@ -479,7 +476,7 @@ class YoutubeWebArchiveIE(InfoExtractor): def _extract_yt_initial_variable(self, webpage, regex, video_id, name): return self._parse_json(self._search_regex( - (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE), + (fr'{regex}\s*{self._YT_INITIAL_BOUNDARY_RE}', regex), webpage, name, default='{}'), video_id, fatal=False) def _extract_webpage_title(self, webpage): @@ -597,7 +594,7 @@ class YoutubeWebArchiveIE(InfoExtractor): response = self._call_cdx_api( video_id, f'https://www.youtube.com/watch?v={video_id}', filters=['mimetype:text/html'], collapse=['timestamp:6', 'digest'], query={'matchType': 'prefix'}) or [] - all_captures = sorted([int_or_none(r['timestamp']) for r in response if int_or_none(r['timestamp']) is not None]) + all_captures = sorted(int_or_none(r['timestamp']) for r in response if int_or_none(r['timestamp']) is not None) # Prefer the new polymer UI captures as we support extracting more metadata from them # WBM captures seem to all switch to this layout ~July 2020 diff --git a/yt_dlp/extractor/arcpublishing.py b/yt_dlp/extractor/arcpublishing.py index 8880e5c95..2e3f3cc5f 100644 --- a/yt_dlp/extractor/arcpublishing.py +++ b/yt_dlp/extractor/arcpublishing.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/ard.py b/yt_dlp/extractor/ard.py index 7ea339b39..f294679ef 100644 --- a/yt_dlp/extractor/ard.py +++ b/yt_dlp/extractor/ard.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import re diff --git a/yt_dlp/extractor/arkena.py b/yt_dlp/extractor/arkena.py index 4f4f457c1..9da2bfd5e 100644 --- a/yt_dlp/extractor/arkena.py +++ b/yt_dlp/extractor/arkena.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/arnes.py b/yt_dlp/extractor/arnes.py index 050c252e3..96b134fa0 100644 --- a/yt_dlp/extractor/arnes.py +++ b/yt_dlp/extractor/arnes.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_parse_qs, diff --git a/yt_dlp/extractor/arte.py b/yt_dlp/extractor/arte.py index c2f2c1bd3..443b0d4b9 100644 --- a/yt_dlp/extractor/arte.py +++ b/yt_dlp/extractor/arte.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/asiancrush.py b/yt_dlp/extractor/asiancrush.py index 7f1940fca..23f310edb 100644 --- a/yt_dlp/extractor/asiancrush.py +++ b/yt_dlp/extractor/asiancrush.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import functools import re diff --git a/yt_dlp/extractor/atresplayer.py b/yt_dlp/extractor/atresplayer.py index 465af4ed3..39d1f1cc5 100644 --- a/yt_dlp/extractor/atresplayer.py +++ b/yt_dlp/extractor/atresplayer.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import compat_HTTPError from ..utils import ( diff --git a/yt_dlp/extractor/atttechchannel.py b/yt_dlp/extractor/atttechchannel.py index 8f93fb353..6ff4ec0ad 100644 --- a/yt_dlp/extractor/atttechchannel.py +++ b/yt_dlp/extractor/atttechchannel.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import unified_strdate diff --git a/yt_dlp/extractor/atvat.py b/yt_dlp/extractor/atvat.py index 481a09737..2311837e9 100644 --- a/yt_dlp/extractor/atvat.py +++ b/yt_dlp/extractor/atvat.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import datetime from .common import InfoExtractor diff --git a/yt_dlp/extractor/audimedia.py b/yt_dlp/extractor/audimedia.py index 6bd48ef15..c1c4f67d0 100644 --- a/yt_dlp/extractor/audimedia.py +++ b/yt_dlp/extractor/audimedia.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/audioboom.py b/yt_dlp/extractor/audioboom.py index c51837b40..dc19a3874 100644 --- a/yt_dlp/extractor/audioboom.py +++ b/yt_dlp/extractor/audioboom.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/audiomack.py b/yt_dlp/extractor/audiomack.py index 19775cf0f..5c4160fe4 100644 --- a/yt_dlp/extractor/audiomack.py +++ b/yt_dlp/extractor/audiomack.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools import time diff --git a/yt_dlp/extractor/audius.py b/yt_dlp/extractor/audius.py index fa64995d5..189d1224f 100644 --- a/yt_dlp/extractor/audius.py +++ b/yt_dlp/extractor/audius.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import random from .common import InfoExtractor diff --git a/yt_dlp/extractor/awaan.py b/yt_dlp/extractor/awaan.py index f5e559c9f..d289f6be3 100644 --- a/yt_dlp/extractor/awaan.py +++ b/yt_dlp/extractor/awaan.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import base64 from .common import InfoExtractor diff --git a/yt_dlp/extractor/aws.py b/yt_dlp/extractor/aws.py index dccfeaf73..c2b22922b 100644 --- a/yt_dlp/extractor/aws.py +++ b/yt_dlp/extractor/aws.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import datetime import hashlib import hmac diff --git a/yt_dlp/extractor/azmedien.py b/yt_dlp/extractor/azmedien.py index 0168340b9..d1686eed6 100644 --- a/yt_dlp/extractor/azmedien.py +++ b/yt_dlp/extractor/azmedien.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/baidu.py b/yt_dlp/extractor/baidu.py index 364fd9459..8786d67e0 100644 --- a/yt_dlp/extractor/baidu.py +++ b/yt_dlp/extractor/baidu.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import unescapeHTML diff --git a/yt_dlp/extractor/banbye.py b/yt_dlp/extractor/banbye.py index 3d4d36ec3..92f567c5d 100644 --- a/yt_dlp/extractor/banbye.py +++ b/yt_dlp/extractor/banbye.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import math from .common import InfoExtractor diff --git a/yt_dlp/extractor/bandaichannel.py b/yt_dlp/extractor/bandaichannel.py index f1bcdef7a..2e3233376 100644 --- a/yt_dlp/extractor/bandaichannel.py +++ b/yt_dlp/extractor/bandaichannel.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .brightcove import BrightcoveNewIE from ..utils import extract_attributes diff --git a/yt_dlp/extractor/bandcamp.py b/yt_dlp/extractor/bandcamp.py index 745055e2d..6f806d84e 100644 --- a/yt_dlp/extractor/bandcamp.py +++ b/yt_dlp/extractor/bandcamp.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import random import re import time @@ -439,7 +436,7 @@ class BandcampUserIE(InfoExtractor): uploader = self._match_id(url) webpage = self._download_webpage(url, uploader) - discography_data = (re.findall(r'<li data-item-id=["\'][^>]+>\s*<a href=["\']([^"\']+)', webpage) + discography_data = (re.findall(r'<li data-item-id=["\'][^>]+>\s*<a href=["\'](?![^"\'/]*?/merch)([^"\']+)', webpage) or re.findall(r'<div[^>]+trackTitle["\'][^"\']+["\']([^"\']+)', webpage)) return self.playlist_from_matches( diff --git a/yt_dlp/extractor/bannedvideo.py b/yt_dlp/extractor/bannedvideo.py index 3db1151f6..ec9bdd8ca 100644 --- a/yt_dlp/extractor/bannedvideo.py +++ b/yt_dlp/extractor/bannedvideo.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/bbc.py b/yt_dlp/extractor/bbc.py index 29ad7ded7..9cb019a49 100644 --- a/yt_dlp/extractor/bbc.py +++ b/yt_dlp/extractor/bbc.py @@ -1,6 +1,4 @@ -# coding: utf-8 -from __future__ import unicode_literals - +import xml.etree.ElementTree import functools import itertools import json @@ -8,7 +6,6 @@ import re from .common import InfoExtractor from ..compat import ( - compat_etree_Element, compat_HTTPError, compat_str, compat_urllib_error, @@ -318,7 +315,7 @@ class BBCCoUkIE(InfoExtractor): continue captions = self._download_xml( cc_url, programme_id, 'Downloading captions', fatal=False) - if not isinstance(captions, compat_etree_Element): + if not isinstance(captions, xml.etree.ElementTree.Element): continue subtitles['en'] = [ { diff --git a/yt_dlp/extractor/beatport.py b/yt_dlp/extractor/beatport.py index e1cf8b4fe..f71f1f308 100644 --- a/yt_dlp/extractor/beatport.py +++ b/yt_dlp/extractor/beatport.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/beeg.py b/yt_dlp/extractor/beeg.py index 717fff3a6..5957e370a 100644 --- a/yt_dlp/extractor/beeg.py +++ b/yt_dlp/extractor/beeg.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/behindkink.py b/yt_dlp/extractor/behindkink.py index 2c97f9817..ca4498150 100644 --- a/yt_dlp/extractor/behindkink.py +++ b/yt_dlp/extractor/behindkink.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import url_basename diff --git a/yt_dlp/extractor/bellmedia.py b/yt_dlp/extractor/bellmedia.py index 904c17ed0..8f9849d9b 100644 --- a/yt_dlp/extractor/bellmedia.py +++ b/yt_dlp/extractor/bellmedia.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor diff --git a/yt_dlp/extractor/bet.py b/yt_dlp/extractor/bet.py index 2c7144235..6b867d135 100644 --- a/yt_dlp/extractor/bet.py +++ b/yt_dlp/extractor/bet.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .mtv import MTVServicesInfoExtractor from ..utils import unified_strdate diff --git a/yt_dlp/extractor/bfi.py b/yt_dlp/extractor/bfi.py index 60c8944b5..76f0516a4 100644 --- a/yt_dlp/extractor/bfi.py +++ b/yt_dlp/extractor/bfi.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/bfmtv.py b/yt_dlp/extractor/bfmtv.py index 501f69d80..48526e38b 100644 --- a/yt_dlp/extractor/bfmtv.py +++ b/yt_dlp/extractor/bfmtv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/bibeltv.py b/yt_dlp/extractor/bibeltv.py index 56c2bfee8..fd20aadad 100644 --- a/yt_dlp/extractor/bibeltv.py +++ b/yt_dlp/extractor/bibeltv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/bigflix.py b/yt_dlp/extractor/bigflix.py index 28e3e59f6..6b2797ca0 100644 --- a/yt_dlp/extractor/bigflix.py +++ b/yt_dlp/extractor/bigflix.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/bigo.py b/yt_dlp/extractor/bigo.py index ddf76ac55..f39e15002 100644 --- a/yt_dlp/extractor/bigo.py +++ b/yt_dlp/extractor/bigo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ExtractorError, urlencode_postdata diff --git a/yt_dlp/extractor/bild.py b/yt_dlp/extractor/bild.py index b8dfbd42b..f3dea33c4 100644 --- a/yt_dlp/extractor/bild.py +++ b/yt_dlp/extractor/bild.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index a9574758c..ead0dd88b 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -1,5 +1,3 @@ -# coding: utf-8 - import base64 import hashlib import itertools @@ -20,6 +18,7 @@ from ..utils import ( float_or_none, mimetype2ext, parse_iso8601, + qualities, traverse_obj, parse_count, smuggle_url, @@ -998,3 +997,88 @@ class BiliIntlSeriesIE(BiliIntlBaseIE): self._entries(series_id), series_id, series_info.get('title'), series_info.get('description'), categories=traverse_obj(series_info, ('styles', ..., 'title'), expected_type=str_or_none), thumbnail=url_or_none(series_info.get('horizontal_cover')), view_count=parse_count(series_info.get('view'))) + + +class BiliLiveIE(InfoExtractor): + _VALID_URL = r'https?://live.bilibili.com/(?P<id>\d+)' + + _TESTS = [{ + 'url': 'https://live.bilibili.com/196', + 'info_dict': { + 'id': '33989', + 'description': "周六杂谈回,其他时候随机游戏。 | \n录播:@下播型泛式录播组。 | \n直播通知群(全员禁言):666906670,902092584,59971⑧481 (功能一样,别多加)", + 'ext': 'flv', + 'title': "太空狼人杀联动,不被爆杀就算赢", + 'thumbnail': "https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg", + 'timestamp': 1650802769, + }, + 'skip': 'not live' + }, { + 'url': 'https://live.bilibili.com/196?broadcast_type=0&is_room_feed=1?spm_id_from=333.999.space_home.strengthen_live_card.click', + 'only_matching': True + }] + + _FORMATS = { + 80: {'format_id': 'low', 'format_note': '流畅'}, + 150: {'format_id': 'high_res', 'format_note': '高清'}, + 250: {'format_id': 'ultra_high_res', 'format_note': '超清'}, + 400: {'format_id': 'blue_ray', 'format_note': '蓝光'}, + 10000: {'format_id': 'source', 'format_note': '原画'}, + 20000: {'format_id': '4K', 'format_note': '4K'}, + 30000: {'format_id': 'dolby', 'format_note': '杜比'}, + } + + _quality = staticmethod(qualities(list(_FORMATS))) + + def _call_api(self, path, room_id, query): + api_result = self._download_json(f'https://api.live.bilibili.com/{path}', room_id, query=query) + if api_result.get('code') != 0: + raise ExtractorError(api_result.get('message') or 'Unable to download JSON metadata') + return api_result.get('data') or {} + + def _parse_formats(self, qn, fmt): + for codec in fmt.get('codec') or []: + if codec.get('current_qn') != qn: + continue + for url_info in codec['url_info']: + yield { + 'url': f'{url_info["host"]}{codec["base_url"]}{url_info["extra"]}', + 'ext': fmt.get('format_name'), + 'vcodec': codec.get('codec_name'), + 'quality': self._quality(qn), + **self._FORMATS[qn], + } + + def _real_extract(self, url): + room_id = self._match_id(url) + room_data = self._call_api('room/v1/Room/get_info', room_id, {'id': room_id}) + if room_data.get('live_status') == 0: + raise ExtractorError('Streamer is not live', expected=True) + + formats = [] + for qn in self._FORMATS.keys(): + stream_data = self._call_api('xlive/web-room/v2/index/getRoomPlayInfo', room_id, { + 'room_id': room_id, + 'qn': qn, + 'codec': '0,1', + 'format': '0,2', + 'mask': '0', + 'no_playurl': '0', + 'platform': 'web', + 'protocol': '0,1', + }) + for fmt in traverse_obj(stream_data, ('playurl_info', 'playurl', 'stream', ..., 'format', ...)) or []: + formats.extend(self._parse_formats(qn, fmt)) + self._sort_formats(formats) + + return { + 'id': room_id, + 'title': room_data.get('title'), + 'description': room_data.get('description'), + 'thumbnail': room_data.get('user_cover'), + 'timestamp': stream_data.get('live_time'), + 'formats': formats, + 'http_headers': { + 'Referer': url, + }, + } diff --git a/yt_dlp/extractor/biobiochiletv.py b/yt_dlp/extractor/biobiochiletv.py index dc86c57c5..180c9656e 100644 --- a/yt_dlp/extractor/biobiochiletv.py +++ b/yt_dlp/extractor/biobiochiletv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/biqle.py b/yt_dlp/extractor/biqle.py index 2b57bade3..3a4234491 100644 --- a/yt_dlp/extractor/biqle.py +++ b/yt_dlp/extractor/biqle.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from .vk import VKIE from ..compat import compat_b64decode diff --git a/yt_dlp/extractor/bitchute.py b/yt_dlp/extractor/bitchute.py index dcae6f4cc..c831092d4 100644 --- a/yt_dlp/extractor/bitchute.py +++ b/yt_dlp/extractor/bitchute.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools import re diff --git a/yt_dlp/extractor/bitwave.py b/yt_dlp/extractor/bitwave.py index e6e093f59..bd8eac1f1 100644 --- a/yt_dlp/extractor/bitwave.py +++ b/yt_dlp/extractor/bitwave.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/blackboardcollaborate.py b/yt_dlp/extractor/blackboardcollaborate.py index 8ae294198..8f41c897a 100644 --- a/yt_dlp/extractor/blackboardcollaborate.py +++ b/yt_dlp/extractor/blackboardcollaborate.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import parse_iso8601 diff --git a/yt_dlp/extractor/bleacherreport.py b/yt_dlp/extractor/bleacherreport.py index d1bf8e829..8d8fabe33 100644 --- a/yt_dlp/extractor/bleacherreport.py +++ b/yt_dlp/extractor/bleacherreport.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from .amp import AMPIE from ..utils import ( diff --git a/yt_dlp/extractor/blinkx.py b/yt_dlp/extractor/blinkx.py deleted file mode 100644 index d70a3b30f..000000000 --- a/yt_dlp/extractor/blinkx.py +++ /dev/null @@ -1,86 +0,0 @@ -from __future__ import unicode_literals - -import json - -from .common import InfoExtractor -from ..utils import ( - remove_start, - int_or_none, -) - - -class BlinkxIE(InfoExtractor): - _VALID_URL = r'(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)' - IE_NAME = 'blinkx' - - _TEST = { - 'url': 'http://www.blinkx.com/ce/Da0Gw3xc5ucpNduzLuDDlv4WC9PuI4fDi1-t6Y3LyfdY2SZS5Urbvn-UPJvrvbo8LTKTc67Wu2rPKSQDJyZeeORCR8bYkhs8lI7eqddznH2ofh5WEEdjYXnoRtj7ByQwt7atMErmXIeYKPsSDuMAAqJDlQZ-3Ff4HJVeH_s3Gh8oQ', - 'md5': '337cf7a344663ec79bf93a526a2e06c7', - 'info_dict': { - 'id': 'Da0Gw3xc', - 'ext': 'mp4', - 'title': 'No Daily Show for John Oliver; HBO Show Renewed - IGN News', - 'uploader': 'IGN News', - 'upload_date': '20150217', - 'timestamp': 1424215740, - 'description': 'HBO has renewed Last Week Tonight With John Oliver for two more seasons.', - 'duration': 47.743333, - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - display_id = video_id[:8] - - api_url = ('https://apib4.blinkx.com/api.php?action=play_video&' - + 'video=%s' % video_id) - data_json = self._download_webpage(api_url, display_id) - data = json.loads(data_json)['api']['results'][0] - duration = None - thumbnails = [] - formats = [] - for m in data['media']: - if m['type'] == 'jpg': - thumbnails.append({ - 'url': m['link'], - 'width': int(m['w']), - 'height': int(m['h']), - }) - elif m['type'] == 'original': - duration = float(m['d']) - elif m['type'] == 'youtube': - yt_id = m['link'] - self.to_screen('Youtube video detected: %s' % yt_id) - return self.url_result(yt_id, 'Youtube', video_id=yt_id) - elif m['type'] in ('flv', 'mp4'): - vcodec = remove_start(m['vcodec'], 'ff') - acodec = remove_start(m['acodec'], 'ff') - vbr = int_or_none(m.get('vbr') or m.get('vbitrate'), 1000) - abr = int_or_none(m.get('abr') or m.get('abitrate'), 1000) - tbr = vbr + abr if vbr and abr else None - format_id = '%s-%sk-%s' % (vcodec, tbr, m['w']) - formats.append({ - 'format_id': format_id, - 'url': m['link'], - 'vcodec': vcodec, - 'acodec': acodec, - 'abr': abr, - 'vbr': vbr, - 'tbr': tbr, - 'width': int_or_none(m.get('w')), - 'height': int_or_none(m.get('h')), - }) - - self._sort_formats(formats) - - return { - 'id': display_id, - 'fullid': video_id, - 'title': data['title'], - 'formats': formats, - 'uploader': data.get('channel_name'), - 'timestamp': data.get('pubdate_epoch'), - 'description': data.get('description'), - 'thumbnails': thumbnails, - 'duration': duration, - } diff --git a/yt_dlp/extractor/blogger.py b/yt_dlp/extractor/blogger.py index dba131cb0..d7aa7f94e 100644 --- a/yt_dlp/extractor/blogger.py +++ b/yt_dlp/extractor/blogger.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from ..utils import ( diff --git a/yt_dlp/extractor/bloomberg.py b/yt_dlp/extractor/bloomberg.py index 2fbfad1ba..c0aaeae02 100644 --- a/yt_dlp/extractor/bloomberg.py +++ b/yt_dlp/extractor/bloomberg.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/bokecc.py b/yt_dlp/extractor/bokecc.py index 6a89d36f4..0c081750e 100644 --- a/yt_dlp/extractor/bokecc.py +++ b/yt_dlp/extractor/bokecc.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import compat_parse_qs from ..utils import ExtractorError diff --git a/yt_dlp/extractor/bongacams.py b/yt_dlp/extractor/bongacams.py index 4e346e7b6..cbef0fc53 100644 --- a/yt_dlp/extractor/bongacams.py +++ b/yt_dlp/extractor/bongacams.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/bostonglobe.py b/yt_dlp/extractor/bostonglobe.py index 57882fbee..92f8ea2cb 100644 --- a/yt_dlp/extractor/bostonglobe.py +++ b/yt_dlp/extractor/bostonglobe.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/box.py b/yt_dlp/extractor/box.py index 8214086a6..5842de88a 100644 --- a/yt_dlp/extractor/box.py +++ b/yt_dlp/extractor/box.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/bpb.py b/yt_dlp/extractor/bpb.py index 8f6ef3cf0..388f1f94f 100644 --- a/yt_dlp/extractor/bpb.py +++ b/yt_dlp/extractor/bpb.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor @@ -16,7 +13,6 @@ class BpbIE(InfoExtractor): _TEST = { 'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr', - # md5 fails in Python 2.6 due to buggy server response and wrong handling of urllib2 'md5': 'c4f84c8a8044ca9ff68bb8441d300b3f', 'info_dict': { 'id': '297', diff --git a/yt_dlp/extractor/br.py b/yt_dlp/extractor/br.py index 0155827d8..faac442e8 100644 --- a/yt_dlp/extractor/br.py +++ b/yt_dlp/extractor/br.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/bravotv.py b/yt_dlp/extractor/bravotv.py index 139d51c09..d4895848e 100644 --- a/yt_dlp/extractor/bravotv.py +++ b/yt_dlp/extractor/bravotv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .adobepass import AdobePassIE diff --git a/yt_dlp/extractor/breakcom.py b/yt_dlp/extractor/breakcom.py index f38789f99..51c8c822f 100644 --- a/yt_dlp/extractor/breakcom.py +++ b/yt_dlp/extractor/breakcom.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from .youtube import YoutubeIE from ..utils import ( diff --git a/yt_dlp/extractor/breitbart.py b/yt_dlp/extractor/breitbart.py index e029aa627..a2b04fcce 100644 --- a/yt_dlp/extractor/breitbart.py +++ b/yt_dlp/extractor/breitbart.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/brightcove.py b/yt_dlp/extractor/brightcove.py index dcd332b43..936c34e15 100644 --- a/yt_dlp/extractor/brightcove.py +++ b/yt_dlp/extractor/brightcove.py @@ -1,9 +1,7 @@ -# coding: utf-8 -from __future__ import unicode_literals - import base64 import re import struct +import xml.etree.ElementTree from .adobepass import AdobePassIE from .common import InfoExtractor @@ -12,7 +10,6 @@ from ..compat import ( compat_HTTPError, compat_parse_qs, compat_urlparse, - compat_xml_parse_error, ) from ..utils import ( clean_html, @@ -166,7 +163,7 @@ class BrightcoveLegacyIE(InfoExtractor): try: object_doc = compat_etree_fromstring(object_str.encode('utf-8')) - except compat_xml_parse_error: + except xml.etree.ElementTree.ParseError: return fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars') diff --git a/yt_dlp/extractor/businessinsider.py b/yt_dlp/extractor/businessinsider.py index 73a57b1e4..4b3f5e68b 100644 --- a/yt_dlp/extractor/businessinsider.py +++ b/yt_dlp/extractor/businessinsider.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from .jwplatform import JWPlatformIE diff --git a/yt_dlp/extractor/buzzfeed.py b/yt_dlp/extractor/buzzfeed.py index ec411091e..1b4cba63e 100644 --- a/yt_dlp/extractor/buzzfeed.py +++ b/yt_dlp/extractor/buzzfeed.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import re diff --git a/yt_dlp/extractor/byutv.py b/yt_dlp/extractor/byutv.py index f4d5086ed..eca2e294e 100644 --- a/yt_dlp/extractor/byutv.py +++ b/yt_dlp/extractor/byutv.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/c56.py b/yt_dlp/extractor/c56.py index a853c530c..1d98ea598 100644 --- a/yt_dlp/extractor/c56.py +++ b/yt_dlp/extractor/c56.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import js_to_json diff --git a/yt_dlp/extractor/cableav.py b/yt_dlp/extractor/cableav.py index 77efdf45a..3200b5677 100644 --- a/yt_dlp/extractor/cableav.py +++ b/yt_dlp/extractor/cableav.py @@ -1,4 +1,3 @@ -# coding: utf-8 from .common import InfoExtractor diff --git a/yt_dlp/extractor/callin.py b/yt_dlp/extractor/callin.py index 1f3b7cfff..fc5da7028 100644 --- a/yt_dlp/extractor/callin.py +++ b/yt_dlp/extractor/callin.py @@ -1,4 +1,3 @@ -# coding: utf-8 from .common import InfoExtractor from ..utils import ( traverse_obj, diff --git a/yt_dlp/extractor/caltrans.py b/yt_dlp/extractor/caltrans.py index 9ac740f7e..e52dfb170 100644 --- a/yt_dlp/extractor/caltrans.py +++ b/yt_dlp/extractor/caltrans.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/cam4.py b/yt_dlp/extractor/cam4.py index 2a3931fd0..4256b28e0 100644 --- a/yt_dlp/extractor/cam4.py +++ b/yt_dlp/extractor/cam4.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/camdemy.py b/yt_dlp/extractor/camdemy.py index 8f0c6c545..c7079e422 100644 --- a/yt_dlp/extractor/camdemy.py +++ b/yt_dlp/extractor/camdemy.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/cammodels.py b/yt_dlp/extractor/cammodels.py index 3dc19377b..32fbffcc2 100644 --- a/yt_dlp/extractor/cammodels.py +++ b/yt_dlp/extractor/cammodels.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/camwithher.py b/yt_dlp/extractor/camwithher.py index bbc5205fd..a0b3749ed 100644 --- a/yt_dlp/extractor/camwithher.py +++ b/yt_dlp/extractor/camwithher.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/canalalpha.py b/yt_dlp/extractor/canalalpha.py index 0365cb2f6..f2ec9355f 100644 --- a/yt_dlp/extractor/canalalpha.py +++ b/yt_dlp/extractor/canalalpha.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/canalc2.py b/yt_dlp/extractor/canalc2.py index 407cc8084..c9bb94c40 100644 --- a/yt_dlp/extractor/canalc2.py +++ b/yt_dlp/extractor/canalc2.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/canalplus.py b/yt_dlp/extractor/canalplus.py index 211ea267a..b184398e2 100644 --- a/yt_dlp/extractor/canalplus.py +++ b/yt_dlp/extractor/canalplus.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( # ExtractorError, diff --git a/yt_dlp/extractor/canvas.py b/yt_dlp/extractor/canvas.py index 8b9903774..8eff4a57c 100644 --- a/yt_dlp/extractor/canvas.py +++ b/yt_dlp/extractor/canvas.py @@ -1,4 +1,3 @@ -from __future__ import unicode_literals import json diff --git a/yt_dlp/extractor/carambatv.py b/yt_dlp/extractor/carambatv.py index 7e5cc90fb..087ea8aa0 100644 --- a/yt_dlp/extractor/carambatv.py +++ b/yt_dlp/extractor/carambatv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/cartoonnetwork.py b/yt_dlp/extractor/cartoonnetwork.py index 48b33617f..4dd7ac46d 100644 --- a/yt_dlp/extractor/cartoonnetwork.py +++ b/yt_dlp/extractor/cartoonnetwork.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .turner import TurnerBaseIE from ..utils import int_or_none diff --git a/yt_dlp/extractor/cbc.py b/yt_dlp/extractor/cbc.py index fba8bf965..cac3f1e9d 100644 --- a/yt_dlp/extractor/cbc.py +++ b/yt_dlp/extractor/cbc.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re import json import base64 diff --git a/yt_dlp/extractor/cbs.py b/yt_dlp/extractor/cbs.py index 2af36ea82..e32539c9e 100644 --- a/yt_dlp/extractor/cbs.py +++ b/yt_dlp/extractor/cbs.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .theplatform import ThePlatformFeedIE from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/cbsinteractive.py b/yt_dlp/extractor/cbsinteractive.py index 9d4f75435..7abeecf78 100644 --- a/yt_dlp/extractor/cbsinteractive.py +++ b/yt_dlp/extractor/cbsinteractive.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .cbs import CBSIE from ..utils import int_or_none diff --git a/yt_dlp/extractor/cbslocal.py b/yt_dlp/extractor/cbslocal.py index 3b7e1a8b9..c6495c95f 100644 --- a/yt_dlp/extractor/cbslocal.py +++ b/yt_dlp/extractor/cbslocal.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .anvato import AnvatoIE from .sendtonews import SendtoNewsIE from ..compat import compat_urlparse diff --git a/yt_dlp/extractor/cbsnews.py b/yt_dlp/extractor/cbsnews.py index 1285ed65e..76925b4f9 100644 --- a/yt_dlp/extractor/cbsnews.py +++ b/yt_dlp/extractor/cbsnews.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re import zlib diff --git a/yt_dlp/extractor/cbssports.py b/yt_dlp/extractor/cbssports.py index b8a6e5967..56a255149 100644 --- a/yt_dlp/extractor/cbssports.py +++ b/yt_dlp/extractor/cbssports.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - # from .cbs import CBSBaseIE from .common import InfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/ccc.py b/yt_dlp/extractor/ccc.py index 36e6dff72..b11e1f74e 100644 --- a/yt_dlp/extractor/ccc.py +++ b/yt_dlp/extractor/ccc.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/ccma.py b/yt_dlp/extractor/ccma.py index 9dbaabfa0..ca739f8a1 100644 --- a/yt_dlp/extractor/ccma.py +++ b/yt_dlp/extractor/ccma.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/cctv.py b/yt_dlp/extractor/cctv.py index 0ed5f327b..623cbb342 100644 --- a/yt_dlp/extractor/cctv.py +++ b/yt_dlp/extractor/cctv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/cda.py b/yt_dlp/extractor/cda.py index 72c47050f..9b257bee9 100644 --- a/yt_dlp/extractor/cda.py +++ b/yt_dlp/extractor/cda.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import codecs import re import json diff --git a/yt_dlp/extractor/ceskatelevize.py b/yt_dlp/extractor/ceskatelevize.py index ddf66b207..331b350f1 100644 --- a/yt_dlp/extractor/ceskatelevize.py +++ b/yt_dlp/extractor/ceskatelevize.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/cgtn.py b/yt_dlp/extractor/cgtn.py index 89f173887..aaafa02d1 100644 --- a/yt_dlp/extractor/cgtn.py +++ b/yt_dlp/extractor/cgtn.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( try_get, diff --git a/yt_dlp/extractor/channel9.py b/yt_dlp/extractor/channel9.py index 90024dbba..90a1ab2be 100644 --- a/yt_dlp/extractor/channel9.py +++ b/yt_dlp/extractor/channel9.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/charlierose.py b/yt_dlp/extractor/charlierose.py index 42c9af263..27f8b33e5 100644 --- a/yt_dlp/extractor/charlierose.py +++ b/yt_dlp/extractor/charlierose.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import remove_end diff --git a/yt_dlp/extractor/chaturbate.py b/yt_dlp/extractor/chaturbate.py index 8da51f919..d39210bf7 100644 --- a/yt_dlp/extractor/chaturbate.py +++ b/yt_dlp/extractor/chaturbate.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/chilloutzone.py b/yt_dlp/extractor/chilloutzone.py index fd5202b9e..1a2f77c4e 100644 --- a/yt_dlp/extractor/chilloutzone.py +++ b/yt_dlp/extractor/chilloutzone.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/chingari.py b/yt_dlp/extractor/chingari.py index e6841fb8b..7e8c0bfc9 100644 --- a/yt_dlp/extractor/chingari.py +++ b/yt_dlp/extractor/chingari.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools import json @@ -48,6 +45,8 @@ class ChingariBaseIE(InfoExtractor): return { 'id': id, + 'extractor_key': ChingariIE.ie_key(), + 'extractor': 'Chingari', 'title': compat_urllib_parse_unquote_plus(clean_html(post_data.get('caption'))), 'description': compat_urllib_parse_unquote_plus(clean_html(post_data.get('caption'))), 'duration': media_data.get('duration'), @@ -105,11 +104,11 @@ class ChingariUserIE(ChingariBaseIE): _VALID_URL = r'https?://(?:www\.)?chingari\.io/(?!share/post)(?P<id>[^/?]+)' _TESTS = [{ 'url': 'https://chingari.io/dada1023', - 'playlist_mincount': 3, 'info_dict': { 'id': 'dada1023', }, - 'entries': [{ + 'params': {'playlistend': 3}, + 'playlist': [{ 'url': 'https://chingari.io/share/post?id=614781f3ade60b3a0bfff42a', 'info_dict': { 'id': '614781f3ade60b3a0bfff42a', diff --git a/yt_dlp/extractor/chirbit.py b/yt_dlp/extractor/chirbit.py index 8d75cdf19..452711d97 100644 --- a/yt_dlp/extractor/chirbit.py +++ b/yt_dlp/extractor/chirbit.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/cinchcast.py b/yt_dlp/extractor/cinchcast.py index b861d54b0..393df3698 100644 --- a/yt_dlp/extractor/cinchcast.py +++ b/yt_dlp/extractor/cinchcast.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( unified_strdate, diff --git a/yt_dlp/extractor/cinemax.py b/yt_dlp/extractor/cinemax.py index 2c3ff8d4f..54cab2285 100644 --- a/yt_dlp/extractor/cinemax.py +++ b/yt_dlp/extractor/cinemax.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .hbo import HBOBaseIE diff --git a/yt_dlp/extractor/ciscolive.py b/yt_dlp/extractor/ciscolive.py index 349c5eb50..066857817 100644 --- a/yt_dlp/extractor/ciscolive.py +++ b/yt_dlp/extractor/ciscolive.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools from .common import InfoExtractor diff --git a/yt_dlp/extractor/ciscowebex.py b/yt_dlp/extractor/ciscowebex.py index 882dae91b..e1aae9bda 100644 --- a/yt_dlp/extractor/ciscowebex.py +++ b/yt_dlp/extractor/ciscowebex.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/cjsw.py b/yt_dlp/extractor/cjsw.py index 1dea0d7c7..c37a3b848 100644 --- a/yt_dlp/extractor/cjsw.py +++ b/yt_dlp/extractor/cjsw.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/cliphunter.py b/yt_dlp/extractor/cliphunter.py index f2ca7a337..7e5fd3175 100644 --- a/yt_dlp/extractor/cliphunter.py +++ b/yt_dlp/extractor/cliphunter.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/clippit.py b/yt_dlp/extractor/clippit.py index a1a7a774c..006a713b2 100644 --- a/yt_dlp/extractor/clippit.py +++ b/yt_dlp/extractor/clippit.py @@ -1,7 +1,3 @@ -# coding: utf-8 - -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( parse_iso8601, diff --git a/yt_dlp/extractor/cliprs.py b/yt_dlp/extractor/cliprs.py index d55b26d59..567f77b94 100644 --- a/yt_dlp/extractor/cliprs.py +++ b/yt_dlp/extractor/cliprs.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .onet import OnetBaseIE diff --git a/yt_dlp/extractor/clipsyndicate.py b/yt_dlp/extractor/clipsyndicate.py index 6cdb42f5a..606444321 100644 --- a/yt_dlp/extractor/clipsyndicate.py +++ b/yt_dlp/extractor/clipsyndicate.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( find_xpath_attr, diff --git a/yt_dlp/extractor/closertotruth.py b/yt_dlp/extractor/closertotruth.py index 517e121e0..e78e26a11 100644 --- a/yt_dlp/extractor/closertotruth.py +++ b/yt_dlp/extractor/closertotruth.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/cloudflarestream.py b/yt_dlp/extractor/cloudflarestream.py index 2fdcfbb3a..0333d5def 100644 --- a/yt_dlp/extractor/cloudflarestream.py +++ b/yt_dlp/extractor/cloudflarestream.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import base64 import re diff --git a/yt_dlp/extractor/cloudy.py b/yt_dlp/extractor/cloudy.py index 85ca20ecc..848643e26 100644 --- a/yt_dlp/extractor/cloudy.py +++ b/yt_dlp/extractor/cloudy.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( str_to_int, diff --git a/yt_dlp/extractor/clubic.py b/yt_dlp/extractor/clubic.py index 98f9cb596..ce8621296 100644 --- a/yt_dlp/extractor/clubic.py +++ b/yt_dlp/extractor/clubic.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/clyp.py b/yt_dlp/extractor/clyp.py index e6b2ac4d4..c64726ca2 100644 --- a/yt_dlp/extractor/clyp.py +++ b/yt_dlp/extractor/clyp.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( float_or_none, diff --git a/yt_dlp/extractor/cmt.py b/yt_dlp/extractor/cmt.py index a4ddb9160..4eec066dd 100644 --- a/yt_dlp/extractor/cmt.py +++ b/yt_dlp/extractor/cmt.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .mtv import MTVIE # TODO Remove - Reason: Outdated Site diff --git a/yt_dlp/extractor/cnbc.py b/yt_dlp/extractor/cnbc.py index da3730cc8..68fd025b7 100644 --- a/yt_dlp/extractor/cnbc.py +++ b/yt_dlp/extractor/cnbc.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import smuggle_url diff --git a/yt_dlp/extractor/cnn.py b/yt_dlp/extractor/cnn.py index af11d95b4..96482eaf5 100644 --- a/yt_dlp/extractor/cnn.py +++ b/yt_dlp/extractor/cnn.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from .turner import TurnerBaseIE from ..utils import url_basename diff --git a/yt_dlp/extractor/comedycentral.py b/yt_dlp/extractor/comedycentral.py index 5a12ab5e6..05fc9f2b5 100644 --- a/yt_dlp/extractor/comedycentral.py +++ b/yt_dlp/extractor/comedycentral.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .mtv import MTVServicesInfoExtractor diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 9914910d0..ebeca4395 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1,46 +1,46 @@ -# coding: utf-8 -from __future__ import unicode_literals - import base64 import collections import hashlib import itertools import json +import math import netrc import os import random -import re import sys import time -import math +import xml.etree.ElementTree from ..compat import ( compat_cookiejar_Cookie, compat_cookies_SimpleCookie, - compat_etree_Element, compat_etree_fromstring, compat_expanduser, compat_getpass, compat_http_client, compat_os_name, - compat_Pattern, compat_str, compat_urllib_error, compat_urllib_parse_unquote, compat_urllib_parse_urlencode, compat_urllib_request, compat_urlparse, - compat_xml_parse_error, + re, ) from ..downloader import FileDownloader -from ..downloader.f4m import ( - get_base_url, - remove_encrypted_media, -) +from ..downloader.f4m import get_base_url, remove_encrypted_media from ..utils import ( + JSON_LD_RE, + NO_DEFAULT, + ExtractorError, + GeoRestrictedError, + GeoUtils, + RegexNotFoundError, + UnsupportedError, age_restricted, base_url, bug_reports_message, + classproperty, clean_html, determine_ext, determine_protocol, @@ -48,20 +48,15 @@ from ..utils import ( encode_data_uri, error_to_compat_str, extract_attributes, - ExtractorError, filter_dict, fix_xml_ampersands, float_or_none, format_field, - GeoRestrictedError, - GeoUtils, int_or_none, join_nonempty, js_to_json, - JSON_LD_RE, mimetype2ext, network_exceptions, - NO_DEFAULT, orderedSet, parse_bitrate, parse_codecs, @@ -69,7 +64,6 @@ from ..utils import ( parse_iso8601, parse_m3u8_attributes, parse_resolution, - RegexNotFoundError, sanitize_filename, sanitized_Request, str_or_none, @@ -78,7 +72,6 @@ from ..utils import ( traverse_obj, try_get, unescapeHTML, - UnsupportedError, unified_strdate, unified_timestamp, update_Request, @@ -93,7 +86,7 @@ from ..utils import ( ) -class InfoExtractor(object): +class InfoExtractor: """Information Extractor class. Information extractors are the classes that, given a URL, extract @@ -111,7 +104,9 @@ class InfoExtractor(object): For a video, the dictionaries must include the following fields: id: Video identifier. - title: Video title, unescaped. + title: Video title, unescaped. Set to an empty string if video has + no title as opposed to "None" which signifies that the + extractor failed to obtain a title Additionally, it must contain either a formats entry or a url one: @@ -216,8 +211,10 @@ class InfoExtractor(object): * no_resume The server does not support resuming the (HTTP or RTMP) download. Boolean. * has_drm The format has DRM and cannot be downloaded. Boolean - * downloader_options A dictionary of downloader options as - described in FileDownloader (For internal use only) + * downloader_options A dictionary of downloader options + (For internal use only) + * http_chunk_size Chunk size for HTTP downloads + * ffmpeg_args Extra arguments for ffmpeg downloader RTMP formats can also have the additional fields: page_url, app, play_path, tc_url, flash_version, rtmp_live, rtmp_conn, rtmp_protocol, rtmp_real_time @@ -473,14 +470,18 @@ class InfoExtractor(object): _WORKING = True _NETRC_MACHINE = None IE_DESC = None + SEARCH_KEY = None - _LOGIN_HINTS = { - 'any': 'Use --cookies, --cookies-from-browser, --username and --password, or --netrc to provide account credentials', - 'cookies': ( - 'Use --cookies-from-browser or --cookies for the authentication. ' - 'See https://github.com/ytdl-org/youtube-dl#how-do-i-pass-cookies-to-youtube-dl for how to manually pass cookies'), - 'password': 'Use --username and --password, or --netrc to provide account credentials', - } + def _login_hint(self, method=NO_DEFAULT, netrc=None): + password_hint = f'--username and --password, or --netrc ({netrc or self._NETRC_MACHINE}) to provide account credentials' + return { + None: '', + 'any': f'Use --cookies, --cookies-from-browser, {password_hint}', + 'password': f'Use {password_hint}', + 'cookies': ( + 'Use --cookies-from-browser or --cookies for the authentication. ' + 'See https://github.com/ytdl-org/youtube-dl#how-do-i-pass-cookies-to-youtube-dl for how to manually pass cookies'), + }[method if method is not NO_DEFAULT else 'any' if self.supports_login() else 'cookies'] def __init__(self, downloader=None): """Constructor. Receives an optional downloader (a YoutubeDL instance). @@ -543,7 +544,7 @@ class InfoExtractor(object): if username: self._perform_login(username, password) elif self.get_param('username') and False not in (self.IE_DESC, self._NETRC_MACHINE): - self.report_warning(f'Login with password is not supported for this website. {self._LOGIN_HINTS["cookies"]}') + self.report_warning(f'Login with password is not supported for this website. {self._login_hint("cookies")}') self._real_initialize() self._ready = True @@ -629,7 +630,7 @@ class InfoExtractor(object): if country: self._x_forwarded_for_ip = GeoUtils.random_ipv4(country) self._downloader.write_debug( - 'Using fake IP %s (%s) as X-Forwarded-For' % (self._x_forwarded_for_ip, country.upper())) + f'Using fake IP {self._x_forwarded_for_ip} ({country.upper()}) as X-Forwarded-For') def extract(self, url): """Extracts URL information and returns it in list of dicts.""" @@ -710,9 +711,9 @@ class InfoExtractor(object): """A string for getting the InfoExtractor with get_info_extractor""" return cls.__name__[:-2] - @property - def IE_NAME(self): - return compat_str(type(self).__name__[:-2]) + @classproperty + def IE_NAME(cls): + return cls.__name__[:-2] @staticmethod def __can_accept_status_code(err, expected_status): @@ -742,9 +743,9 @@ class InfoExtractor(object): self.report_download_webpage(video_id) elif note is not False: if video_id is None: - self.to_screen('%s' % (note,)) + self.to_screen(str(note)) else: - self.to_screen('%s: %s' % (video_id, note)) + self.to_screen(f'{video_id}: {note}') # Some sites check X-Forwarded-For HTTP header in order to figure out # the origin of the client behind proxy. This allows bypassing geo @@ -780,7 +781,7 @@ class InfoExtractor(object): if errnote is None: errnote = 'Unable to download webpage' - errmsg = '%s: %s' % (errnote, error_to_compat_str(err)) + errmsg = f'{errnote}: {error_to_compat_str(err)}' if fatal: raise ExtractorError(errmsg, cause=err) else: @@ -861,7 +862,7 @@ class InfoExtractor(object): dump = base64.b64encode(webpage_bytes).decode('ascii') self._downloader.to_screen(dump) if self.get_param('write_pages', False): - basen = '%s_%s' % (video_id, urlh.geturl()) + basen = f'{video_id}_{urlh.geturl()}' trim_length = self.get_param('trim_file_name') or 240 if len(basen) > trim_length: h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest() @@ -951,7 +952,7 @@ class InfoExtractor(object): fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None): """ - Return a tuple (xml as an compat_etree_Element, URL handle). + Return a tuple (xml as an xml.etree.ElementTree.Element, URL handle). See _download_webpage docstring for arguments specification. """ @@ -972,7 +973,7 @@ class InfoExtractor(object): transform_source=None, fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None): """ - Return the xml as an compat_etree_Element. + Return the xml as an xml.etree.ElementTree.Element. See _download_webpage docstring for arguments specification. """ @@ -988,7 +989,7 @@ class InfoExtractor(object): xml_string = transform_source(xml_string) try: return compat_etree_fromstring(xml_string.encode('utf-8')) - except compat_xml_parse_error as ve: + except xml.etree.ElementTree.ParseError as ve: errmsg = '%s: Failed to parse XML ' % video_id if fatal: raise ExtractorError(errmsg, cause=ve) @@ -1099,10 +1100,10 @@ class InfoExtractor(object): def to_screen(self, msg, *args, **kwargs): """Print msg to screen, prefixing it with '[ie_name]'""" - self._downloader.to_screen('[%s] %s' % (self.IE_NAME, msg), *args, **kwargs) + self._downloader.to_screen(f'[{self.IE_NAME}] {msg}', *args, **kwargs) def write_debug(self, msg, *args, **kwargs): - self._downloader.write_debug('[%s] %s' % (self.IE_NAME, msg), *args, **kwargs) + self._downloader.write_debug(f'[{self.IE_NAME}] {msg}', *args, **kwargs) def get_param(self, name, default=None, *args, **kwargs): if self._downloader: @@ -1135,11 +1136,7 @@ class InfoExtractor(object): self.get_param('ignore_no_formats_error') or self.get_param('wait_for_video')): self.report_warning(msg) return - if method is NO_DEFAULT: - method = 'any' if self.supports_login() else 'cookies' - if method is not None: - assert method in self._LOGIN_HINTS, 'Invalid login method' - msg = '%s. %s' % (msg, self._LOGIN_HINTS[method]) + msg += format_field(self._login_hint(method), template='. %s') raise ExtractorError(msg, expected=True) def raise_geo_restricted( @@ -1205,7 +1202,7 @@ class InfoExtractor(object): """ if string is None: mobj = None - elif isinstance(pattern, (str, compat_Pattern)): + elif isinstance(pattern, (str, re.Pattern)): mobj = re.search(pattern, string, flags) else: for p in pattern: @@ -1258,7 +1255,7 @@ class InfoExtractor(object): else: raise netrc.NetrcParseError( 'No authenticators for %s' % netrc_machine) - except (IOError, netrc.NetrcParseError) as err: + except (OSError, netrc.NetrcParseError) as err: self.report_warning( 'parsing .netrc: %s' % error_to_compat_str(err)) @@ -1928,8 +1925,7 @@ class InfoExtractor(object): def _sort_formats(self, formats, field_preference=[]): if not formats: return - format_sort = self.FormatSort(self, field_preference) - formats.sort(key=lambda f: format_sort.calculate_preference(f)) + formats.sort(key=self.FormatSort(self, field_preference).calculate_preference) def _check_formats(self, formats, video_id): if formats: @@ -1990,17 +1986,19 @@ class InfoExtractor(object): def _extract_f4m_formats(self, manifest_url, video_id, preference=None, quality=None, f4m_id=None, transform_source=lambda s: fix_xml_ampersands(s).strip(), fatal=True, m3u8_id=None, data=None, headers={}, query={}): - manifest = self._download_xml( + res = self._download_xml_handle( manifest_url, video_id, 'Downloading f4m manifest', 'Unable to download f4m manifest', # Some manifests may be malformed, e.g. prosiebensat1 generated manifests # (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244) transform_source=transform_source, fatal=fatal, data=data, headers=headers, query=query) - - if manifest is False: + if res is False: return [] + manifest, urlh = res + manifest_url = urlh.geturl() + return self._parse_f4m_formats( manifest, manifest_url, video_id, preference=preference, quality=quality, f4m_id=f4m_id, transform_source=transform_source, fatal=fatal, m3u8_id=m3u8_id) @@ -2008,7 +2006,7 @@ class InfoExtractor(object): def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, quality=None, f4m_id=None, transform_source=lambda s: fix_xml_ampersands(s).strip(), fatal=True, m3u8_id=None): - if not isinstance(manifest, compat_etree_Element) and not fatal: + if not isinstance(manifest, xml.etree.ElementTree.Element) and not fatal: return [] # currently yt-dlp cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy @@ -2408,12 +2406,14 @@ class InfoExtractor(object): return '/'.join(out) def _extract_smil_formats_and_subtitles(self, smil_url, video_id, fatal=True, f4m_params=None, transform_source=None): - smil = self._download_smil(smil_url, video_id, fatal=fatal, transform_source=transform_source) - - if smil is False: + res = self._download_smil(smil_url, video_id, fatal=fatal, transform_source=transform_source) + if res is False: assert not fatal return [], {} + smil, urlh = res + smil_url = urlh.geturl() + namespace = self._parse_smil_namespace(smil) fmts = self._parse_smil_formats( @@ -2430,13 +2430,17 @@ class InfoExtractor(object): return fmts def _extract_smil_info(self, smil_url, video_id, fatal=True, f4m_params=None): - smil = self._download_smil(smil_url, video_id, fatal=fatal) - if smil is False: + res = self._download_smil(smil_url, video_id, fatal=fatal) + if res is False: return {} + + smil, urlh = res + smil_url = urlh.geturl() + return self._parse_smil(smil, smil_url, video_id, f4m_params=f4m_params) def _download_smil(self, smil_url, video_id, fatal=True, transform_source=None): - return self._download_xml( + return self._download_xml_handle( smil_url, video_id, 'Downloading SMIL file', 'Unable to download SMIL file', fatal=fatal, transform_source=transform_source) @@ -2615,11 +2619,15 @@ class InfoExtractor(object): return subtitles def _extract_xspf_playlist(self, xspf_url, playlist_id, fatal=True): - xspf = self._download_xml( + res = self._download_xml_handle( xspf_url, playlist_id, 'Downloading xpsf playlist', 'Unable to download xspf manifest', fatal=fatal) - if xspf is False: + if res is False: return [] + + xspf, urlh = res + xspf_url = urlh.geturl() + return self._parse_xspf( xspf, playlist_id, xspf_url=xspf_url, xspf_base_url=base_url(xspf_url)) @@ -2684,7 +2692,10 @@ class InfoExtractor(object): mpd_doc, urlh = res if mpd_doc is None: return [], {} - mpd_base_url = base_url(urlh.geturl()) + + # We could have been redirected to a new url when we retrieved our mpd file. + mpd_url = urlh.geturl() + mpd_base_url = base_url(mpd_url) return self._parse_mpd_formats_and_subtitles( mpd_doc, mpd_id, mpd_base_url, mpd_url) @@ -2800,7 +2811,7 @@ class InfoExtractor(object): content_type = 'video' elif codecs['acodec'] != 'none': content_type = 'audio' - elif codecs.get('tcodec', 'none') != 'none': + elif codecs.get('scodec', 'none') != 'none': content_type = 'text' elif mimetype2ext(mime_type) in ('tt', 'dfxp', 'ttml', 'xml', 'json'): content_type = 'text' @@ -3334,7 +3345,7 @@ class InfoExtractor(object): http_f = f.copy() del http_f['manifest_url'] http_url = re.sub( - REPL_REGEX, protocol + r'://%s/\g<1>%s\3' % (http_host, qualities[i]), f['url']) + REPL_REGEX, protocol + fr'://{http_host}/\g<1>{qualities[i]}\3', f['url']) http_f.update({ 'format_id': http_f['format_id'].replace('hls-', protocol + '-'), 'url': http_url, @@ -3355,7 +3366,7 @@ class InfoExtractor(object): formats = [] def manifest_url(manifest): - m_url = '%s/%s' % (http_base_url, manifest) + m_url = f'{http_base_url}/{manifest}' if query: m_url += '?%s' % query return m_url @@ -3392,7 +3403,7 @@ class InfoExtractor(object): for protocol in ('rtmp', 'rtsp'): if protocol not in skip_protocols: formats.append({ - 'url': '%s:%s' % (protocol, url_base), + 'url': f'{protocol}:{url_base}', 'format_id': protocol, 'protocol': protocol, }) @@ -3558,7 +3569,7 @@ class InfoExtractor(object): def _int(self, v, name, fatal=False, **kwargs): res = int_or_none(v, **kwargs) if res is None: - msg = 'Failed to extract %s: Could not parse value %r' % (name, v) + msg = f'Failed to extract {name}: Could not parse value {v!r}' if fatal: raise ExtractorError(msg) else: @@ -3568,7 +3579,7 @@ class InfoExtractor(object): def _float(self, v, name, fatal=False, **kwargs): res = float_or_none(v, **kwargs) if res is None: - msg = 'Failed to extract %s: Could not parse value %r' % (name, v) + msg = f'Failed to extract {name}: Could not parse value {v!r}' if fatal: raise ExtractorError(msg) else: @@ -3606,9 +3617,7 @@ class InfoExtractor(object): for header, cookies in url_handle.headers.items(): if header.lower() != 'set-cookie': continue - if sys.version_info[0] >= 3: - cookies = cookies.encode('iso-8859-1') - cookies = cookies.decode('utf-8') + cookies = cookies.encode('iso-8859-1').decode('utf-8') cookie_value = re.search( r'%s=(.+?);.*?\b[Dd]omain=(.+?)(?:[,;]|$)' % cookie, cookies) if cookie_value: @@ -3616,34 +3625,55 @@ class InfoExtractor(object): self._set_cookie(domain, cookie, value) break - def get_testcases(self, include_onlymatching=False): - t = getattr(self, '_TEST', None) + @classmethod + def get_testcases(cls, include_onlymatching=False): + t = getattr(cls, '_TEST', None) if t: - assert not hasattr(self, '_TESTS'), \ - '%s has _TEST and _TESTS' % type(self).__name__ + assert not hasattr(cls, '_TESTS'), f'{cls.ie_key()}IE has _TEST and _TESTS' tests = [t] else: - tests = getattr(self, '_TESTS', []) + tests = getattr(cls, '_TESTS', []) for t in tests: if not include_onlymatching and t.get('only_matching', False): continue - t['name'] = type(self).__name__[:-len('IE')] + t['name'] = cls.ie_key() yield t - def is_suitable(self, age_limit): - """ Test whether the extractor is generally suitable for the given - age limit (i.e. pornographic sites are not, all others usually are) """ - - any_restricted = False - for tc in self.get_testcases(include_onlymatching=False): - if tc.get('playlist', []): - tc = tc['playlist'][0] - is_restricted = age_restricted( - tc.get('info_dict', {}).get('age_limit'), age_limit) - if not is_restricted: - return True - any_restricted = any_restricted or is_restricted - return not any_restricted + @classproperty + def age_limit(cls): + """Get age limit from the testcases""" + return max(traverse_obj( + tuple(cls.get_testcases(include_onlymatching=False)), + (..., (('playlist', 0), None), 'info_dict', 'age_limit')) or [0]) + + @classmethod + def is_suitable(cls, age_limit): + """Test whether the extractor is generally suitable for the given age limit""" + return not age_restricted(cls.age_limit, age_limit) + + @classmethod + def description(cls, *, markdown=True, search_examples=None): + """Description of the extractor""" + desc = '' + if cls._NETRC_MACHINE: + if markdown: + desc += f' [<abbr title="netrc machine"><em>{cls._NETRC_MACHINE}</em></abbr>]' + else: + desc += f' [{cls._NETRC_MACHINE}]' + if cls.IE_DESC is False: + desc += ' [HIDDEN]' + elif cls.IE_DESC: + desc += f' {cls.IE_DESC}' + if cls.SEARCH_KEY: + desc += f'; "{cls.SEARCH_KEY}:" prefix' + if search_examples: + _COUNTS = ('', '5', '10', 'all') + desc += f' (Example: "{cls.SEARCH_KEY}{random.choice(_COUNTS)}:{random.choice(search_examples)}")' + if not cls.working(): + desc += ' (**Currently broken**)' if markdown else ' (Currently broken)' + + name = f' - **{cls.IE_NAME}**' if markdown else cls.IE_NAME + return f'{name}:{desc}' if desc else name def extract_subtitles(self, *args, **kwargs): if (self.get_param('writesubtitles', False) @@ -3688,7 +3718,7 @@ class InfoExtractor(object): def _merge_subtitle_items(subtitle_list1, subtitle_list2): """ Merge subtitle items for one language. Items with duplicated URLs/data will be dropped. """ - list1_data = set((item.get('url'), item.get('data')) for item in subtitle_list1) + list1_data = {(item.get('url'), item.get('data')) for item in subtitle_list1} ret = list(subtitle_list1) ret.extend(item for item in subtitle_list2 if (item.get('url'), item.get('data')) not in list1_data) return ret @@ -3712,11 +3742,15 @@ class InfoExtractor(object): def _get_automatic_captions(self, *args, **kwargs): raise NotImplementedError('This method must be implemented by subclasses') + @property + def _cookies_passed(self): + """Whether cookies have been passed to YoutubeDL""" + return self.get_param('cookiefile') is not None or self.get_param('cookiesfrombrowser') is not None + def mark_watched(self, *args, **kwargs): if not self.get_param('mark_watched', False): return - if (self.supports_login() and self._get_login_info()[0] is not None - or self.get_param('cookiefile') or self.get_param('cookiesfrombrowser')): + if self.supports_login() and self._get_login_info()[0] is not None or self._cookies_passed: self._mark_watched(*args, **kwargs) def _mark_watched(self, *args, **kwargs): @@ -3801,7 +3835,7 @@ class SearchInfoExtractor(InfoExtractor): else: n = int(prefix) if n <= 0: - raise ExtractorError('invalid download number %s for query "%s"' % (n, query)) + raise ExtractorError(f'invalid download number {n} for query "{query}"') elif n > self._MAX_RESULTS: self.report_warning('%s returns max %i results (you requested %i)' % (self._SEARCH_KEY, self._MAX_RESULTS, n)) n = self._MAX_RESULTS @@ -3818,6 +3852,6 @@ class SearchInfoExtractor(InfoExtractor): """Returns an iterator of search results""" raise NotImplementedError('This method must be implemented by subclasses') - @property - def SEARCH_KEY(self): - return self._SEARCH_KEY + @classproperty + def SEARCH_KEY(cls): + return cls._SEARCH_KEY diff --git a/yt_dlp/extractor/commonmistakes.py b/yt_dlp/extractor/commonmistakes.py index 051269652..62bd51fd7 100644 --- a/yt_dlp/extractor/commonmistakes.py +++ b/yt_dlp/extractor/commonmistakes.py @@ -1,7 +1,3 @@ -from __future__ import unicode_literals - -import sys - from .common import InfoExtractor from ..utils import ExtractorError @@ -35,9 +31,7 @@ class UnicodeBOMIE(InfoExtractor): IE_DESC = False _VALID_URL = r'(?P<bom>\ufeff)(?P<id>.*)$' - # Disable test for python 3.2 since BOM is broken in re in this version - # (see https://github.com/ytdl-org/youtube-dl/issues/9751) - _TESTS = [] if (3, 0) < sys.version_info <= (3, 3) else [{ + _TESTS = [{ 'url': '\ufeffhttp://www.youtube.com/watch?v=BaW_jenozKc', 'only_matching': True, }] diff --git a/yt_dlp/extractor/commonprotocols.py b/yt_dlp/extractor/commonprotocols.py index 3708c6ad2..e8f19b9e0 100644 --- a/yt_dlp/extractor/commonprotocols.py +++ b/yt_dlp/extractor/commonprotocols.py @@ -1,10 +1,5 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor -from ..compat import ( - compat_urlparse, -) +from ..compat import compat_urlparse class RtmpIE(InfoExtractor): diff --git a/yt_dlp/extractor/condenast.py b/yt_dlp/extractor/condenast.py index 54e7af8b0..cf6e40cb8 100644 --- a/yt_dlp/extractor/condenast.py +++ b/yt_dlp/extractor/condenast.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/contv.py b/yt_dlp/extractor/contv.py index 84b462d40..50648a536 100644 --- a/yt_dlp/extractor/contv.py +++ b/yt_dlp/extractor/contv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( float_or_none, diff --git a/yt_dlp/extractor/corus.py b/yt_dlp/extractor/corus.py index 119461375..7b83c0390 100644 --- a/yt_dlp/extractor/corus.py +++ b/yt_dlp/extractor/corus.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .theplatform import ThePlatformFeedIE from ..utils import ( dict_get, diff --git a/yt_dlp/extractor/coub.py b/yt_dlp/extractor/coub.py index e90aa1954..b462acaf0 100644 --- a/yt_dlp/extractor/coub.py +++ b/yt_dlp/extractor/coub.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/cozytv.py b/yt_dlp/extractor/cozytv.py index d49f1ca74..5ef5afcc2 100644 --- a/yt_dlp/extractor/cozytv.py +++ b/yt_dlp/extractor/cozytv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import unified_strdate diff --git a/yt_dlp/extractor/cpac.py b/yt_dlp/extractor/cpac.py index 22741152c..65ac2497f 100644 --- a/yt_dlp/extractor/cpac.py +++ b/yt_dlp/extractor/cpac.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( @@ -12,13 +9,6 @@ from ..utils import ( urljoin, ) -# compat_range -try: - if callable(xrange): - range = xrange -except (NameError, TypeError): - pass - class CPACIE(InfoExtractor): IE_NAME = 'cpac' diff --git a/yt_dlp/extractor/cracked.py b/yt_dlp/extractor/cracked.py index f77a68ece..c6aabccc6 100644 --- a/yt_dlp/extractor/cracked.py +++ b/yt_dlp/extractor/cracked.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/crackle.py b/yt_dlp/extractor/crackle.py index db4962c42..319374f3b 100644 --- a/yt_dlp/extractor/crackle.py +++ b/yt_dlp/extractor/crackle.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals, division - import hashlib import hmac import re diff --git a/yt_dlp/extractor/craftsy.py b/yt_dlp/extractor/craftsy.py index ed2f4420e..307bfb946 100644 --- a/yt_dlp/extractor/craftsy.py +++ b/yt_dlp/extractor/craftsy.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .brightcove import BrightcoveNewIE from .common import InfoExtractor diff --git a/yt_dlp/extractor/crooksandliars.py b/yt_dlp/extractor/crooksandliars.py index 7fb782db7..c831a3ae0 100644 --- a/yt_dlp/extractor/crooksandliars.py +++ b/yt_dlp/extractor/crooksandliars.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/crowdbunker.py b/yt_dlp/extractor/crowdbunker.py index 72906afef..75d90b5c5 100644 --- a/yt_dlp/extractor/crowdbunker.py +++ b/yt_dlp/extractor/crowdbunker.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools from .common import InfoExtractor diff --git a/yt_dlp/extractor/crunchyroll.py b/yt_dlp/extractor/crunchyroll.py index 7edb645f8..bb1dbbaad 100644 --- a/yt_dlp/extractor/crunchyroll.py +++ b/yt_dlp/extractor/crunchyroll.py @@ -1,18 +1,15 @@ -# coding: utf-8 -from __future__ import unicode_literals - import base64 import re import json import zlib +import xml.etree.ElementTree from hashlib import sha1 from math import pow, sqrt, floor from .common import InfoExtractor from .vrv import VRVBaseIE from ..compat import ( compat_b64decode, - compat_etree_Element, compat_etree_fromstring, compat_str, compat_urllib_parse_urlencode, @@ -395,7 +392,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 'Downloading subtitles for ' + sub_name, data={ 'subtitle_script_id': sub_id, }) - if not isinstance(sub_doc, compat_etree_Element): + if not isinstance(sub_doc, xml.etree.ElementTree.Element): continue sid = sub_doc.get('id') iv = xpath_text(sub_doc, 'iv', 'subtitle iv') @@ -525,7 +522,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 'video_quality': stream_quality, 'current_page': url, }) - if isinstance(streamdata, compat_etree_Element): + if isinstance(streamdata, xml.etree.ElementTree.Element): stream_info = streamdata.find('./{default}preload/stream_info') if stream_info is not None: stream_infos.append(stream_info) @@ -536,7 +533,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 'video_format': stream_format, 'video_encode_quality': stream_quality, }) - if isinstance(stream_info, compat_etree_Element): + if isinstance(stream_info, xml.etree.ElementTree.Element): stream_infos.append(stream_info) for stream_info in stream_infos: video_encode_id = xpath_text(stream_info, './video_encode_id') @@ -611,7 +608,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text season = episode = episode_number = duration = None - if isinstance(metadata, compat_etree_Element): + if isinstance(metadata, xml.etree.ElementTree.Element): season = xpath_text(metadata, 'series_title') episode = xpath_text(metadata, 'episode_title') episode_number = int_or_none(xpath_text(metadata, 'episode_number')) diff --git a/yt_dlp/extractor/cspan.py b/yt_dlp/extractor/cspan.py index f51159bbe..cb1523617 100644 --- a/yt_dlp/extractor/cspan.py +++ b/yt_dlp/extractor/cspan.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/ctsnews.py b/yt_dlp/extractor/ctsnews.py index 679f1d92e..cec178f03 100644 --- a/yt_dlp/extractor/ctsnews.py +++ b/yt_dlp/extractor/ctsnews.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import unified_timestamp from .youtube import YoutubeIE diff --git a/yt_dlp/extractor/ctv.py b/yt_dlp/extractor/ctv.py index 756bcc2be..f125c1ce9 100644 --- a/yt_dlp/extractor/ctv.py +++ b/yt_dlp/extractor/ctv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/ctvnews.py b/yt_dlp/extractor/ctvnews.py index 952f4c747..ad3f0d8e4 100644 --- a/yt_dlp/extractor/ctvnews.py +++ b/yt_dlp/extractor/ctvnews.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/cultureunplugged.py b/yt_dlp/extractor/cultureunplugged.py index 9002e4cef..2fb22800f 100644 --- a/yt_dlp/extractor/cultureunplugged.py +++ b/yt_dlp/extractor/cultureunplugged.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import time from .common import InfoExtractor diff --git a/yt_dlp/extractor/curiositystream.py b/yt_dlp/extractor/curiositystream.py index b8abcf7a5..5b76b29ff 100644 --- a/yt_dlp/extractor/curiositystream.py +++ b/yt_dlp/extractor/curiositystream.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/cwtv.py b/yt_dlp/extractor/cwtv.py index 73382431b..07239f39c 100644 --- a/yt_dlp/extractor/cwtv.py +++ b/yt_dlp/extractor/cwtv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/cybrary.py b/yt_dlp/extractor/cybrary.py index c278f0fe0..7da581828 100644 --- a/yt_dlp/extractor/cybrary.py +++ b/yt_dlp/extractor/cybrary.py @@ -1,5 +1,4 @@ -# coding: utf-8 -from .common import InfoExtractor +from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/daftsex.py b/yt_dlp/extractor/daftsex.py index 6037fd9ca..0fe014f76 100644 --- a/yt_dlp/extractor/daftsex.py +++ b/yt_dlp/extractor/daftsex.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_b64decode from ..utils import ( diff --git a/yt_dlp/extractor/dailymail.py b/yt_dlp/extractor/dailymail.py index 67b88fd56..5451dbf00 100644 --- a/yt_dlp/extractor/dailymail.py +++ b/yt_dlp/extractor/dailymail.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/dailymotion.py b/yt_dlp/extractor/dailymotion.py index 9cb56185b..3b090d5e0 100644 --- a/yt_dlp/extractor/dailymotion.py +++ b/yt_dlp/extractor/dailymotion.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import functools import json import re diff --git a/yt_dlp/extractor/damtomo.py b/yt_dlp/extractor/damtomo.py index 456cd35a4..962d9741b 100644 --- a/yt_dlp/extractor/damtomo.py +++ b/yt_dlp/extractor/damtomo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/daum.py b/yt_dlp/extractor/daum.py index 4362e92cb..a1f197b0b 100644 --- a/yt_dlp/extractor/daum.py +++ b/yt_dlp/extractor/daum.py @@ -1,7 +1,3 @@ -# coding: utf-8 - -from __future__ import unicode_literals - import itertools from .common import InfoExtractor diff --git a/yt_dlp/extractor/dbtv.py b/yt_dlp/extractor/dbtv.py index 8e73176a6..2beccd8b5 100644 --- a/yt_dlp/extractor/dbtv.py +++ b/yt_dlp/extractor/dbtv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/dctp.py b/yt_dlp/extractor/dctp.py index e700f8d86..24bb6aca2 100644 --- a/yt_dlp/extractor/dctp.py +++ b/yt_dlp/extractor/dctp.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/deezer.py b/yt_dlp/extractor/deezer.py index 7ba02e552..bee1c7501 100644 --- a/yt_dlp/extractor/deezer.py +++ b/yt_dlp/extractor/deezer.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/defense.py b/yt_dlp/extractor/defense.py index 9fe144e14..7d73ea862 100644 --- a/yt_dlp/extractor/defense.py +++ b/yt_dlp/extractor/defense.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/democracynow.py b/yt_dlp/extractor/democracynow.py index 5c9c0ecdc..af327e6c6 100644 --- a/yt_dlp/extractor/democracynow.py +++ b/yt_dlp/extractor/democracynow.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re import os.path diff --git a/yt_dlp/extractor/dfb.py b/yt_dlp/extractor/dfb.py index 97f70fc7b..5aca72988 100644 --- a/yt_dlp/extractor/dfb.py +++ b/yt_dlp/extractor/dfb.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import unified_strdate diff --git a/yt_dlp/extractor/dhm.py b/yt_dlp/extractor/dhm.py index aee72a6ed..3d42fc2b0 100644 --- a/yt_dlp/extractor/dhm.py +++ b/yt_dlp/extractor/dhm.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import parse_duration diff --git a/yt_dlp/extractor/digg.py b/yt_dlp/extractor/digg.py index 913c1750f..86e8a6fac 100644 --- a/yt_dlp/extractor/digg.py +++ b/yt_dlp/extractor/digg.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import js_to_json diff --git a/yt_dlp/extractor/digitalconcerthall.py b/yt_dlp/extractor/digitalconcerthall.py index 8398ae30e..c891ad0a6 100644 --- a/yt_dlp/extractor/digitalconcerthall.py +++ b/yt_dlp/extractor/digitalconcerthall.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/digiteka.py b/yt_dlp/extractor/digiteka.py index d63204778..5d244cb08 100644 --- a/yt_dlp/extractor/digiteka.py +++ b/yt_dlp/extractor/digiteka.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/discovery.py b/yt_dlp/extractor/discovery.py index fd3ad75c7..fd3fc8fb0 100644 --- a/yt_dlp/extractor/discovery.py +++ b/yt_dlp/extractor/discovery.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import random import string diff --git a/yt_dlp/extractor/discoverygo.py b/yt_dlp/extractor/discoverygo.py index 9e7b14a7d..7b4278c88 100644 --- a/yt_dlp/extractor/discoverygo.py +++ b/yt_dlp/extractor/discoverygo.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/discoveryvr.py b/yt_dlp/extractor/discoveryvr.py deleted file mode 100644 index cb63c2649..000000000 --- a/yt_dlp/extractor/discoveryvr.py +++ /dev/null @@ -1,59 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import parse_duration - - -class DiscoveryVRIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?discoveryvr\.com/watch/(?P<id>[^/?#]+)' - _TEST = { - 'url': 'http://www.discoveryvr.com/watch/discovery-vr-an-introduction', - 'md5': '32b1929798c464a54356378b7912eca4', - 'info_dict': { - 'id': 'discovery-vr-an-introduction', - 'ext': 'mp4', - 'title': 'Discovery VR - An Introduction', - 'description': 'md5:80d418a10efb8899d9403e61d8790f06', - } - } - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - - bootstrap_data = self._search_regex( - r'root\.DVR\.bootstrapData\s+=\s+"({.+?})";', - webpage, 'bootstrap data') - bootstrap_data = self._parse_json( - bootstrap_data.encode('utf-8').decode('unicode_escape'), - display_id) - videos = self._parse_json(bootstrap_data['videos'], display_id)['allVideos'] - video_data = next(video for video in videos if video.get('slug') == display_id) - - series = video_data.get('showTitle') - title = episode = video_data.get('title') or series - if series and series != title: - title = '%s - %s' % (series, title) - - formats = [] - for f, format_id in (('cdnUriM3U8', 'mobi'), ('webVideoUrlSd', 'sd'), ('webVideoUrlHd', 'hd')): - f_url = video_data.get(f) - if not f_url: - continue - formats.append({ - 'format_id': format_id, - 'url': f_url, - }) - - return { - 'id': display_id, - 'display_id': display_id, - 'title': title, - 'description': video_data.get('description'), - 'thumbnail': video_data.get('thumbnail'), - 'duration': parse_duration(video_data.get('runTime')), - 'formats': formats, - 'episode': episode, - 'series': series, - } diff --git a/yt_dlp/extractor/disney.py b/yt_dlp/extractor/disney.py index 0ad7b1f46..f9af59a57 100644 --- a/yt_dlp/extractor/disney.py +++ b/yt_dlp/extractor/disney.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/dispeak.py b/yt_dlp/extractor/dispeak.py index 3d651f3ab..d4f3324e7 100644 --- a/yt_dlp/extractor/dispeak.py +++ b/yt_dlp/extractor/dispeak.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/dlive.py b/yt_dlp/extractor/dlive.py index 7410eb6c8..31b4a568f 100644 --- a/yt_dlp/extractor/dlive.py +++ b/yt_dlp/extractor/dlive.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/doodstream.py b/yt_dlp/extractor/doodstream.py index f692127c2..f1001c778 100644 --- a/yt_dlp/extractor/doodstream.py +++ b/yt_dlp/extractor/doodstream.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import string import random import time diff --git a/yt_dlp/extractor/dotsub.py b/yt_dlp/extractor/dotsub.py index 148605c0b..079f83750 100644 --- a/yt_dlp/extractor/dotsub.py +++ b/yt_dlp/extractor/dotsub.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( float_or_none, diff --git a/yt_dlp/extractor/douyutv.py b/yt_dlp/extractor/douyutv.py index 26a8d645c..477f4687c 100644 --- a/yt_dlp/extractor/douyutv.py +++ b/yt_dlp/extractor/douyutv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import time import hashlib import re diff --git a/yt_dlp/extractor/dplay.py b/yt_dlp/extractor/dplay.py index a25f27c3a..5c4f3c892 100644 --- a/yt_dlp/extractor/dplay.py +++ b/yt_dlp/extractor/dplay.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import uuid @@ -11,6 +8,7 @@ from ..utils import ( ExtractorError, float_or_none, int_or_none, + remove_start, strip_or_none, try_get, unified_timestamp, @@ -314,7 +312,7 @@ class DPlayIE(DPlayBaseIE): def _real_extract(self, url): mobj = self._match_valid_url(url) display_id = mobj.group('id') - domain = mobj.group('domain').lstrip('www.') + domain = remove_start(mobj.group('domain'), 'www.') country = mobj.group('country') or mobj.group('subdomain_country') or mobj.group('plus_country') host = 'disco-api.' + domain if domain[0] == 'd' else 'eu2-prod.disco-api.com' return self._get_disco_api_info( diff --git a/yt_dlp/extractor/drbonanza.py b/yt_dlp/extractor/drbonanza.py index ea0f06d3d..dca8c89d0 100644 --- a/yt_dlp/extractor/drbonanza.py +++ b/yt_dlp/extractor/drbonanza.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( js_to_json, diff --git a/yt_dlp/extractor/dreisat.py b/yt_dlp/extractor/dreisat.py index 5a07c18f4..80a724607 100644 --- a/yt_dlp/extractor/dreisat.py +++ b/yt_dlp/extractor/dreisat.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .zdf import ZDFIE diff --git a/yt_dlp/extractor/drooble.py b/yt_dlp/extractor/drooble.py index 058425095..106e5c457 100644 --- a/yt_dlp/extractor/drooble.py +++ b/yt_dlp/extractor/drooble.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/dropbox.py b/yt_dlp/extractor/dropbox.py index 2559657ad..6ac0c713a 100644 --- a/yt_dlp/extractor/dropbox.py +++ b/yt_dlp/extractor/dropbox.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import os.path import re diff --git a/yt_dlp/extractor/dropout.py b/yt_dlp/extractor/dropout.py index 2fa61950c..475825eb8 100644 --- a/yt_dlp/extractor/dropout.py +++ b/yt_dlp/extractor/dropout.py @@ -1,4 +1,3 @@ -# coding: utf-8 from .common import InfoExtractor from .vimeo import VHXEmbedIE from ..utils import ( diff --git a/yt_dlp/extractor/drtuber.py b/yt_dlp/extractor/drtuber.py index 540b86a16..3149e319f 100644 --- a/yt_dlp/extractor/drtuber.py +++ b/yt_dlp/extractor/drtuber.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/drtv.py b/yt_dlp/extractor/drtv.py index 37e4d5b26..708b72fae 100644 --- a/yt_dlp/extractor/drtv.py +++ b/yt_dlp/extractor/drtv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import binascii import hashlib import re @@ -26,7 +23,7 @@ class DRTVIE(InfoExtractor): _VALID_URL = r'''(?x) https?:// (?: - (?:www\.)?dr\.dk/(?:tv/se|nyheder|radio(?:/ondemand)?)/(?:[^/]+/)*| + (?:www\.)?dr\.dk/(?:tv/se|nyheder|(?:radio|lyd)(?:/ondemand)?)/(?:[^/]+/)*| (?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/(?:se|episode|program)/ ) (?P<id>[\da-z_-]+) @@ -54,6 +51,7 @@ class DRTVIE(InfoExtractor): 'release_year': 2016, }, 'expected_warnings': ['Unable to download f4m manifest'], + 'skip': 'this video has been removed', }, { # embed 'url': 'https://www.dr.dk/nyheder/indland/live-christianias-rydning-af-pusher-street-er-i-gang', @@ -74,31 +72,41 @@ class DRTVIE(InfoExtractor): # with SignLanguage formats 'url': 'https://www.dr.dk/tv/se/historien-om-danmark/-/historien-om-danmark-stenalder', 'info_dict': { - 'id': 'historien-om-danmark-stenalder', + 'id': '00831690010', 'ext': 'mp4', 'title': 'Historien om Danmark: Stenalder', 'description': 'md5:8c66dcbc1669bbc6f873879880f37f2a', 'timestamp': 1546628400, 'upload_date': '20190104', - 'duration': 3502.56, + 'duration': 3504.618, 'formats': 'mincount:20', + 'release_year': 2017, + 'season_id': 'urn:dr:mu:bundle:5afc03ad6187a4065ca5fd35', + 'season_number': 1, + 'season': 'Historien om Danmark', + 'series': 'Historien om Danmark', }, 'params': { 'skip_download': True, }, }, { - 'url': 'https://www.dr.dk/radio/p4kbh/regionale-nyheder-kh4/p4-nyheder-2019-06-26-17-30-9', + 'url': 'https://www.dr.dk/lyd/p4kbh/regionale-nyheder-kh4/p4-nyheder-2019-06-26-17-30-9', 'only_matching': True, }, { 'url': 'https://www.dr.dk/drtv/se/bonderoeven_71769', 'info_dict': { 'id': '00951930010', 'ext': 'mp4', - 'title': 'Bonderøven (1:8)', - 'description': 'md5:3cf18fc0d3b205745d4505f896af8121', - 'timestamp': 1546542000, - 'upload_date': '20190103', + 'title': 'Bonderøven 2019 (1:8)', + 'description': 'md5:b6dcfe9b6f0bea6703e9a0092739a5bd', + 'timestamp': 1603188600, + 'upload_date': '20201020', 'duration': 2576.6, + 'season': 'Bonderøven 2019', + 'season_id': 'urn:dr:mu:bundle:5c201667a11fa01ca4528ce5', + 'release_year': 2019, + 'season_number': 2019, + 'series': 'Frank & Kastaniegaarden' }, 'params': { 'skip_download': True, @@ -112,6 +120,24 @@ class DRTVIE(InfoExtractor): }, { 'url': 'https://www.dr.dk/drtv/program/jagten_220924', 'only_matching': True, + }, { + 'url': 'https://www.dr.dk/lyd/p4aarhus/regionale-nyheder-ar4/regionale-nyheder-2022-05-05-12-30-3', + 'info_dict': { + 'id': 'urn:dr:mu:programcard:6265cb2571401424d0360113', + 'title': "Regionale nyheder", + 'ext': 'mp4', + 'duration': 120.043, + 'series': 'P4 Østjylland regionale nyheder', + 'timestamp': 1651746600, + 'season': 'Regionale nyheder', + 'release_year': 0, + 'season_id': 'urn:dr:mu:bundle:61c26889539f0201586b73c5', + 'description': '', + 'upload_date': '20220505', + }, + 'params': { + 'skip_download': True, + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/dtube.py b/yt_dlp/extractor/dtube.py index ad247b7dd..25a98f625 100644 --- a/yt_dlp/extractor/dtube.py +++ b/yt_dlp/extractor/dtube.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from socket import timeout diff --git a/yt_dlp/extractor/duboku.py b/yt_dlp/extractor/duboku.py index a87597873..24403842d 100644 --- a/yt_dlp/extractor/duboku.py +++ b/yt_dlp/extractor/duboku.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/dumpert.py b/yt_dlp/extractor/dumpert.py index d9d9afdec..dc61115ff 100644 --- a/yt_dlp/extractor/dumpert.py +++ b/yt_dlp/extractor/dumpert.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/dvtv.py b/yt_dlp/extractor/dvtv.py index 08663cffb..61d469f11 100644 --- a/yt_dlp/extractor/dvtv.py +++ b/yt_dlp/extractor/dvtv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/dw.py b/yt_dlp/extractor/dw.py index 6eaee07b4..ee2365ddd 100644 --- a/yt_dlp/extractor/dw.py +++ b/yt_dlp/extractor/dw.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/eagleplatform.py b/yt_dlp/extractor/eagleplatform.py index f86731a0c..e2ecd4b7c 100644 --- a/yt_dlp/extractor/eagleplatform.py +++ b/yt_dlp/extractor/eagleplatform.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/ebaumsworld.py b/yt_dlp/extractor/ebaumsworld.py index c97682cd3..0854d0344 100644 --- a/yt_dlp/extractor/ebaumsworld.py +++ b/yt_dlp/extractor/ebaumsworld.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/echomsk.py b/yt_dlp/extractor/echomsk.py index 6b7cc652f..850eabbff 100644 --- a/yt_dlp/extractor/echomsk.py +++ b/yt_dlp/extractor/echomsk.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/egghead.py b/yt_dlp/extractor/egghead.py index b6b86768c..d5c954961 100644 --- a/yt_dlp/extractor/egghead.py +++ b/yt_dlp/extractor/egghead.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/ehow.py b/yt_dlp/extractor/ehow.py index b1cd4f5d4..74469ce36 100644 --- a/yt_dlp/extractor/ehow.py +++ b/yt_dlp/extractor/ehow.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_urllib_parse_unquote diff --git a/yt_dlp/extractor/eighttracks.py b/yt_dlp/extractor/eighttracks.py index 9a44f89f3..3dd9ab1b3 100644 --- a/yt_dlp/extractor/eighttracks.py +++ b/yt_dlp/extractor/eighttracks.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import random diff --git a/yt_dlp/extractor/einthusan.py b/yt_dlp/extractor/einthusan.py index 7af279a53..37be68c61 100644 --- a/yt_dlp/extractor/einthusan.py +++ b/yt_dlp/extractor/einthusan.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/eitb.py b/yt_dlp/extractor/eitb.py index ee5ead18b..01a47f6fd 100644 --- a/yt_dlp/extractor/eitb.py +++ b/yt_dlp/extractor/eitb.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( float_or_none, diff --git a/yt_dlp/extractor/ellentube.py b/yt_dlp/extractor/ellentube.py index d451bc048..bcd458cdf 100644 --- a/yt_dlp/extractor/ellentube.py +++ b/yt_dlp/extractor/ellentube.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/elonet.py b/yt_dlp/extractor/elonet.py index 9c6aea28e..f99e12250 100644 --- a/yt_dlp/extractor/elonet.py +++ b/yt_dlp/extractor/elonet.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import determine_ext diff --git a/yt_dlp/extractor/elpais.py b/yt_dlp/extractor/elpais.py index b89f6db62..7c6c88075 100644 --- a/yt_dlp/extractor/elpais.py +++ b/yt_dlp/extractor/elpais.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import strip_jsonp, unified_strdate diff --git a/yt_dlp/extractor/embedly.py b/yt_dlp/extractor/embedly.py index a5820b21e..a8d1f3c55 100644 --- a/yt_dlp/extractor/embedly.py +++ b/yt_dlp/extractor/embedly.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_urllib_parse_unquote diff --git a/yt_dlp/extractor/engadget.py b/yt_dlp/extractor/engadget.py index 733bf322f..e7c5d7bf1 100644 --- a/yt_dlp/extractor/engadget.py +++ b/yt_dlp/extractor/engadget.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/epicon.py b/yt_dlp/extractor/epicon.py index cd19325bc..89424785e 100644 --- a/yt_dlp/extractor/epicon.py +++ b/yt_dlp/extractor/epicon.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/eporner.py b/yt_dlp/extractor/eporner.py index 25a0d9799..6bc70c5c6 100644 --- a/yt_dlp/extractor/eporner.py +++ b/yt_dlp/extractor/eporner.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( encode_base_n, diff --git a/yt_dlp/extractor/eroprofile.py b/yt_dlp/extractor/eroprofile.py index 5d5e7f244..2b61f3be7 100644 --- a/yt_dlp/extractor/eroprofile.py +++ b/yt_dlp/extractor/eroprofile.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/ertgr.py b/yt_dlp/extractor/ertgr.py index 19ce23f01..507f0a5c1 100644 --- a/yt_dlp/extractor/ertgr.py +++ b/yt_dlp/extractor/ertgr.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import re diff --git a/yt_dlp/extractor/escapist.py b/yt_dlp/extractor/escapist.py index 4cd815ebc..5d9c46f72 100644 --- a/yt_dlp/extractor/escapist.py +++ b/yt_dlp/extractor/escapist.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/espn.py b/yt_dlp/extractor/espn.py index dc50f3b8b..8fad70e6b 100644 --- a/yt_dlp/extractor/espn.py +++ b/yt_dlp/extractor/espn.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/esri.py b/yt_dlp/extractor/esri.py index e9dcaeb1d..1736788db 100644 --- a/yt_dlp/extractor/esri.py +++ b/yt_dlp/extractor/esri.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/europa.py b/yt_dlp/extractor/europa.py index 60ab2ce13..ea20b4d4d 100644 --- a/yt_dlp/extractor/europa.py +++ b/yt_dlp/extractor/europa.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/europeantour.py b/yt_dlp/extractor/europeantour.py index e28f067be..1995a745d 100644 --- a/yt_dlp/extractor/europeantour.py +++ b/yt_dlp/extractor/europeantour.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/euscreen.py b/yt_dlp/extractor/euscreen.py index 2759e7436..4435f08e0 100644 --- a/yt_dlp/extractor/euscreen.py +++ b/yt_dlp/extractor/euscreen.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/everyonesmixtape.py b/yt_dlp/extractor/everyonesmixtape.py deleted file mode 100644 index 80cb032be..000000000 --- a/yt_dlp/extractor/everyonesmixtape.py +++ /dev/null @@ -1,76 +0,0 @@ -from __future__ import unicode_literals - - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - sanitized_Request, -) - - -class EveryonesMixtapeIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?everyonesmixtape\.com/#/mix/(?P<id>[0-9a-zA-Z]+)(?:/(?P<songnr>[0-9]))?$' - - _TESTS = [{ - 'url': 'http://everyonesmixtape.com/#/mix/m7m0jJAbMQi/5', - 'info_dict': { - 'id': '5bfseWNmlds', - 'ext': 'mp4', - 'title': "Passion Pit - \"Sleepyhead\" (Official Music Video)", - 'uploader': 'FKR.TV', - 'uploader_id': 'frenchkissrecords', - 'description': "Music video for \"Sleepyhead\" from Passion Pit's debut EP Chunk Of Change.\nBuy on iTunes: https://itunes.apple.com/us/album/chunk-of-change-ep/id300087641\n\nDirected by The Wilderness.\n\nhttp://www.passionpitmusic.com\nhttp://www.frenchkissrecords.com", - 'upload_date': '20081015' - }, - 'params': { - 'skip_download': True, # This is simply YouTube - } - }, { - 'url': 'http://everyonesmixtape.com/#/mix/m7m0jJAbMQi', - 'info_dict': { - 'id': 'm7m0jJAbMQi', - 'title': 'Driving', - }, - 'playlist_count': 24 - }] - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - playlist_id = mobj.group('id') - - pllist_url = 'http://everyonesmixtape.com/mixtape.php?a=getMixes&u=-1&linked=%s&explore=' % playlist_id - pllist_req = sanitized_Request(pllist_url) - pllist_req.add_header('X-Requested-With', 'XMLHttpRequest') - - playlist_list = self._download_json( - pllist_req, playlist_id, note='Downloading playlist metadata') - try: - playlist_no = next(playlist['id'] - for playlist in playlist_list - if playlist['code'] == playlist_id) - except StopIteration: - raise ExtractorError('Playlist id not found') - - pl_url = 'http://everyonesmixtape.com/mixtape.php?a=getMix&id=%s&userId=null&code=' % playlist_no - pl_req = sanitized_Request(pl_url) - pl_req.add_header('X-Requested-With', 'XMLHttpRequest') - playlist = self._download_json( - pl_req, playlist_id, note='Downloading playlist info') - - entries = [{ - '_type': 'url', - 'url': t['url'], - 'title': t['title'], - } for t in playlist['tracks']] - - if mobj.group('songnr'): - songnr = int(mobj.group('songnr')) - 1 - return entries[songnr] - - playlist_title = playlist['mixData']['name'] - return { - '_type': 'playlist', - 'id': playlist_id, - 'title': playlist_title, - 'entries': entries, - } diff --git a/yt_dlp/extractor/expotv.py b/yt_dlp/extractor/expotv.py index 95a897782..92eaf4248 100644 --- a/yt_dlp/extractor/expotv.py +++ b/yt_dlp/extractor/expotv.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/expressen.py b/yt_dlp/extractor/expressen.py index dc8b855d2..a1b8e9bc9 100644 --- a/yt_dlp/extractor/expressen.py +++ b/yt_dlp/extractor/expressen.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 0cb686304..9c5a5f482 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1,5 +1,4 @@ -# flake8: noqa -from __future__ import unicode_literals +# flake8: noqa: F401 from .abc import ( ABCIE, @@ -169,6 +168,7 @@ from .bilibili import ( BilibiliChannelIE, BiliIntlIE, BiliIntlSeriesIE, + BiliLiveIE, ) from .biobiochiletv import BioBioChileTVIE from .bitchute import ( @@ -501,6 +501,7 @@ from .fc2 import ( FC2LiveIE, ) from .fczenit import FczenitIE +from .fifa import FifaIE from .filmmodu import FilmmoduIE from .filmon import ( FilmOnIE, @@ -590,6 +591,7 @@ from .go import GoIE from .godtube import GodTubeIE from .gofile import GofileIE from .golem import GolemIE +from .goodgame import GoodGameIE from .googledrive import GoogleDriveIE from .googlepodcasts import ( GooglePodcastsIE, @@ -600,7 +602,11 @@ from .gopro import GoProIE from .goshgay import GoshgayIE from .gotostage import GoToStageIE from .gputechconf import GPUTechConfIE -from .gronkh import GronkhIE +from .gronkh import ( + GronkhIE, + GronkhFeedIE, + GronkhVodsIE +) from .groupon import GrouponIE from .hbo import HBOIE from .hearthisat import HearThisAtIE @@ -617,6 +623,7 @@ from .hitrecord import HitRecordIE from .hotnewhiphop import HotNewHipHopIE from .hotstar import ( HotStarIE, + HotStarPrefixIE, HotStarPlaylistIE, HotStarSeriesIE, ) @@ -640,6 +647,7 @@ from .hungama import ( HungamaAlbumPlaylistIE, ) from .hypem import HypemIE +from .icareus import IcareusIE from .ichinanalive import ( IchinanaLiveIE, IchinanaLiveClipIE, @@ -699,7 +707,11 @@ from .ivi import ( IviCompilationIE ) from .ivideon import IvideonIE -from .iwara import IwaraIE +from .iwara import ( + IwaraIE, + IwaraPlaylistIE, + IwaraUserIE, +) from .izlesene import IzleseneIE from .jable import ( JableIE, @@ -785,6 +797,10 @@ from .lifenews import ( LifeNewsIE, LifeEmbedIE, ) +from .likee import ( + LikeeIE, + LikeeUserIE +) from .limelight import ( LimelightMediaIE, LimelightChannelIE, @@ -813,7 +829,10 @@ from .lnkgo import ( ) from .localnews8 import LocalNews8IE from .lovehomeporn import LoveHomePornIE -from .lrt import LRTIE +from .lrt import ( + LRTVODIE, + LRTStreamIE +) from .lynda import ( LyndaIE, LyndaCourseIE @@ -843,6 +862,7 @@ from .markiza import ( MarkizaPageIE, ) from .massengeschmacktv import MassengeschmackTVIE +from .masters import MastersIE from .matchtv import MatchTVIE from .mdr import MDRIE from .medaltv import MedalTVIE @@ -999,7 +1019,8 @@ from .ndr import ( from .ndtv import NDTVIE from .nebula import ( NebulaIE, - NebulaCollectionIE, + NebulaSubscriptionsIE, + NebulaChannelIE, ) from .nerdcubed import NerdCubedFeedIE from .netzkino import NetzkinoIE @@ -1248,6 +1269,7 @@ from .pluralsight import ( PluralsightIE, PluralsightCourseIE, ) +from .podchaser import PodchaserIE from .podomatic import PodomaticIE from .pokemon import ( PokemonIE, @@ -1383,6 +1405,7 @@ from .rokfin import ( RokfinIE, RokfinStackIE, RokfinChannelIE, + RokfinSearchIE, ) from .roosterteeth import RoosterTeethIE, RoosterTeethSeriesIE from .rottentomatoes import RottenTomatoesIE @@ -1884,10 +1907,7 @@ from .vice import ( from .vidbit import VidbitIE from .viddler import ViddlerIE from .videa import VideaIE -from .videocampus_sachsen import ( - VideocampusSachsenIE, - VideocampusSachsenEmbedIE, -) +from .videocampus_sachsen import VideocampusSachsenIE from .videodetective import VideoDetectiveIE from .videofyme import VideofyMeIE from .videomore import ( @@ -2111,6 +2131,7 @@ from .youtube import ( YoutubeSearchURLIE, YoutubeMusicSearchURLIE, YoutubeSubscriptionsIE, + YoutubeStoriesIE, YoutubeTruncatedIDIE, YoutubeTruncatedURLIE, YoutubeYtBeIE, @@ -2145,6 +2166,10 @@ from .zhihu import ZhihuIE from .zingmp3 import ( ZingMp3IE, ZingMp3AlbumIE, + ZingMp3ChartHomeIE, + ZingMp3WeekChartIE, + ZingMp3ChartMusicVideoIE, + ZingMp3UserIE, ) from .zoom import ZoomIE from .zype import ZypeIE diff --git a/yt_dlp/extractor/extremetube.py b/yt_dlp/extractor/extremetube.py index acd4090fa..99520b6a0 100644 --- a/yt_dlp/extractor/extremetube.py +++ b/yt_dlp/extractor/extremetube.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from ..utils import str_to_int from .keezmovies import KeezMoviesIE diff --git a/yt_dlp/extractor/eyedotv.py b/yt_dlp/extractor/eyedotv.py index f62ddebae..d8b068e9c 100644 --- a/yt_dlp/extractor/eyedotv.py +++ b/yt_dlp/extractor/eyedotv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( xpath_text, diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py index 5e0e2facf..de45f9298 100644 --- a/yt_dlp/extractor/facebook.py +++ b/yt_dlp/extractor/facebook.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import re @@ -397,10 +394,8 @@ class FacebookIE(InfoExtractor): r'handleWithCustomApplyEach\(\s*ScheduledApplyEach\s*,\s*(\{.+?\})\s*\);', webpage)] post = traverse_obj(post_data, ( ..., 'require', ..., ..., ..., '__bbox', 'result', 'data'), expected_type=dict) or [] - media = traverse_obj( - post, - (..., 'attachments', ..., 'media', lambda _, m: str(m['id']) == video_id and m['__typename'] == 'Video'), - expected_type=dict) + media = traverse_obj(post, (..., 'attachments', ..., lambda k, v: ( + k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')), expected_type=dict) title = get_first(media, ('title', 'text')) description = get_first(media, ('creation_story', 'comet_sections', 'message', 'story', 'message', 'text')) uploader_data = get_first(media, 'owner') or get_first(post, ('node', 'actors', ...)) or {} @@ -528,7 +523,8 @@ class FacebookIE(InfoExtractor): info = { 'id': v_id, 'formats': formats, - 'thumbnail': try_get(video, lambda x: x['thumbnailImage']['uri']), + 'thumbnail': traverse_obj( + video, ('thumbnailImage', 'uri'), ('preferred_thumbnail', 'image', 'uri')), 'uploader_id': try_get(video, lambda x: x['owner']['id']), 'timestamp': int_or_none(video.get('publish_time')), 'duration': float_or_none(video.get('playable_duration_in_ms'), 1000), diff --git a/yt_dlp/extractor/fancode.py b/yt_dlp/extractor/fancode.py index 7ea16c61d..9716e581a 100644 --- a/yt_dlp/extractor/fancode.py +++ b/yt_dlp/extractor/fancode.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str diff --git a/yt_dlp/extractor/faz.py b/yt_dlp/extractor/faz.py index 312ee2aee..cc12fda2b 100644 --- a/yt_dlp/extractor/faz.py +++ b/yt_dlp/extractor/faz.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/fc2.py b/yt_dlp/extractor/fc2.py index 54a83aa16..225677b00 100644 --- a/yt_dlp/extractor/fc2.py +++ b/yt_dlp/extractor/fc2.py @@ -1,16 +1,13 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor from ..compat import ( compat_parse_qs, ) +from ..dependencies import websockets from ..utils import ( ExtractorError, WebSocketsWrapper, - has_websockets, js_to_json, sanitized_Request, std_headers, @@ -173,7 +170,7 @@ class FC2LiveIE(InfoExtractor): }] def _real_extract(self, url): - if not has_websockets: + if not websockets: raise ExtractorError('websockets library is not available. Please install it.', expected=True) video_id = self._match_id(url) webpage = self._download_webpage('https://live.fc2.com/%s/' % video_id, video_id) diff --git a/yt_dlp/extractor/fczenit.py b/yt_dlp/extractor/fczenit.py index 8db7c5963..df40888e1 100644 --- a/yt_dlp/extractor/fczenit.py +++ b/yt_dlp/extractor/fczenit.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/fifa.py b/yt_dlp/extractor/fifa.py new file mode 100644 index 000000000..bdc8d7fbf --- /dev/null +++ b/yt_dlp/extractor/fifa.py @@ -0,0 +1,109 @@ +from .common import InfoExtractor + +from ..utils import ( + int_or_none, + traverse_obj, + unified_timestamp, +) + + +class FifaIE(InfoExtractor): + _VALID_URL = r'https?://www.fifa.com/fifaplus/(?P<locale>\w{2})/watch/([^#?]+/)?(?P<id>\w+)' + _TESTS = [{ + 'url': 'https://www.fifa.com/fifaplus/en/watch/7on10qPcnyLajDDU3ntg6y', + 'info_dict': { + 'id': '7on10qPcnyLajDDU3ntg6y', + 'title': 'Italy v France | Final | 2006 FIFA World Cup Germany™ | Full Match Replay', + 'description': 'md5:f4520d0ee80529c8ba4134a7d692ff8b', + 'ext': 'mp4', + 'categories': ['FIFA Tournaments', 'Replay'], + 'thumbnail': 'https://digitalhub.fifa.com/transform/fa6f0b3e-a2e9-4cf7-9f32-53c57bcb7360/2006_Final_ITA_FRA', + 'duration': 8164, + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.fifa.com/fifaplus/pt/watch/1cg5r5Qt6Qt12ilkDgb1sV', + 'info_dict': { + 'id': '1cg5r5Qt6Qt12ilkDgb1sV', + 'title': 'Brasil x Alemanha | Semifinais | Copa do Mundo FIFA Brasil 2014 | Compacto', + 'description': 'md5:ba4ffcc084802b062beffc3b4c4b19d6', + 'ext': 'mp4', + 'categories': ['FIFA Tournaments', 'Highlights'], + 'thumbnail': 'https://digitalhub.fifa.com/transform/d8fe6f61-276d-4a73-a7fe-6878a35fd082/FIFAPLS_100EXTHL_2014BRAvGER_TMB', + 'duration': 901, + 'release_timestamp': 1404777600, + 'release_date': '20140708', + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.fifa.com/fifaplus/fr/watch/3C6gQH9C2DLwzNx7BMRQdp', + 'info_dict': { + 'id': '3C6gQH9C2DLwzNx7BMRQdp', + 'title': 'Le but de Josimar contre le Irlande du Nord | Buts classiques', + 'description': 'md5:16f9f789f09960bfe7220fe67af31f34', + 'ext': 'mp4', + 'categories': ['FIFA Tournaments', 'Goal'], + 'duration': 28, + 'thumbnail': 'https://digitalhub.fifa.com/transform/f9301391-f8d9-48b5-823e-c093ac5e3e11/CG_MEN_1986_JOSIMAR', + }, + 'params': {'skip_download': 'm3u8'}, + }] + + def _real_extract(self, url): + video_id, locale = self._match_valid_url(url).group('id', 'locale') + webpage = self._download_webpage(url, video_id) + + preconnect_link = self._search_regex( + r'<link[^>]+rel\s*=\s*"preconnect"[^>]+href\s*=\s*"([^"]+)"', webpage, 'Preconnect Link') + + json_data = self._download_json( + f'{preconnect_link}/video/GetVideoPlayerData/{video_id}', video_id, + 'Downloading Video Player Data', query={'includeIdents': True, 'locale': locale}) + + video_details = self._download_json( + f'{preconnect_link}/sections/videoDetails/{video_id}', video_id, 'Downloading Video Details', fatal=False) + + preplay_parameters = self._download_json( + f'{preconnect_link}/video/GetVerizonPreplayParameters', video_id, 'Downloading Preplay Parameters', query={ + 'entryId': video_id, + 'assetId': json_data['verizonAssetId'], + 'useExternalId': False, + 'requiresToken': json_data['requiresToken'], + 'adConfig': 'fifaplusvideo', + 'prerollAds': True, + 'adVideoId': json_data['externalVerizonAssetId'], + 'preIdentId': json_data['preIdentId'], + 'postIdentId': json_data['postIdentId'], + }) + + cid = f'{json_data["preIdentId"]},{json_data["verizonAssetId"]},{json_data["postIdentId"]}' + content_data = self._download_json( + f'https://content.uplynk.com/preplay/{cid}/multiple.json', video_id, 'Downloading Content Data', query={ + 'v': preplay_parameters['preplayAPIVersion'], + 'tc': preplay_parameters['tokenCheckAlgorithmVersion'], + 'rn': preplay_parameters['randomNumber'], + 'exp': preplay_parameters['tokenExpirationDate'], + 'ct': preplay_parameters['contentType'], + 'cid': cid, + 'mbtracks': preplay_parameters['tracksAssetNumber'], + 'ad': preplay_parameters['adConfiguration'], + 'ad.preroll': int(preplay_parameters['adPreroll']), + 'ad.cmsid': preplay_parameters['adCMSSourceId'], + 'ad.vid': preplay_parameters['adSourceVideoID'], + 'sig': preplay_parameters['signature'], + }) + + formats, subtitles = self._extract_m3u8_formats_and_subtitles(content_data['playURL'], video_id) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': json_data.get('title'), + 'description': json_data.get('description'), + 'duration': int_or_none(json_data.get('duration')), + 'release_timestamp': unified_timestamp(video_details.get('dateOfRelease')), + 'categories': traverse_obj(video_details, (('videoCategory', 'videoSubcategory'),)), + 'thumbnail': traverse_obj(video_details, ('backgroundImage', 'src')), + 'formats': formats, + 'subtitles': subtitles, + } diff --git a/yt_dlp/extractor/filmmodu.py b/yt_dlp/extractor/filmmodu.py index 2746876d5..d74131192 100644 --- a/yt_dlp/extractor/filmmodu.py +++ b/yt_dlp/extractor/filmmodu.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import int_or_none diff --git a/yt_dlp/extractor/filmon.py b/yt_dlp/extractor/filmon.py index 7b43ecc0f..7040231be 100644 --- a/yt_dlp/extractor/filmon.py +++ b/yt_dlp/extractor/filmon.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_str, diff --git a/yt_dlp/extractor/filmweb.py b/yt_dlp/extractor/filmweb.py index 5e323b4f8..cfea1f2fb 100644 --- a/yt_dlp/extractor/filmweb.py +++ b/yt_dlp/extractor/filmweb.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor diff --git a/yt_dlp/extractor/firsttv.py b/yt_dlp/extractor/firsttv.py index ccad173b7..99c27e0c3 100644 --- a/yt_dlp/extractor/firsttv.py +++ b/yt_dlp/extractor/firsttv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_str, diff --git a/yt_dlp/extractor/fivetv.py b/yt_dlp/extractor/fivetv.py index d6bebd19b..448c332b3 100644 --- a/yt_dlp/extractor/fivetv.py +++ b/yt_dlp/extractor/fivetv.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import int_or_none diff --git a/yt_dlp/extractor/flickr.py b/yt_dlp/extractor/flickr.py index 2ed6c2bdc..552ecd43a 100644 --- a/yt_dlp/extractor/flickr.py +++ b/yt_dlp/extractor/flickr.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_str, diff --git a/yt_dlp/extractor/folketinget.py b/yt_dlp/extractor/folketinget.py index b3df93f28..0e69fa32f 100644 --- a/yt_dlp/extractor/folketinget.py +++ b/yt_dlp/extractor/folketinget.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_parse_qs from ..utils import ( diff --git a/yt_dlp/extractor/footyroom.py b/yt_dlp/extractor/footyroom.py index 118325b6d..4a1316b50 100644 --- a/yt_dlp/extractor/footyroom.py +++ b/yt_dlp/extractor/footyroom.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from .streamable import StreamableIE diff --git a/yt_dlp/extractor/formula1.py b/yt_dlp/extractor/formula1.py index 67662e6de..0a8ef850e 100644 --- a/yt_dlp/extractor/formula1.py +++ b/yt_dlp/extractor/formula1.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/fourtube.py b/yt_dlp/extractor/fourtube.py index d4d955b6b..c6af100f3 100644 --- a/yt_dlp/extractor/fourtube.py +++ b/yt_dlp/extractor/fourtube.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/fox.py b/yt_dlp/extractor/fox.py index 4c52b9ac6..5996e86bb 100644 --- a/yt_dlp/extractor/fox.py +++ b/yt_dlp/extractor/fox.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import uuid diff --git a/yt_dlp/extractor/fox9.py b/yt_dlp/extractor/fox9.py index 91f8f7b8a..dfbafa7dd 100644 --- a/yt_dlp/extractor/fox9.py +++ b/yt_dlp/extractor/fox9.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/foxgay.py b/yt_dlp/extractor/foxgay.py index 1c53e0642..4abc2cfd0 100644 --- a/yt_dlp/extractor/foxgay.py +++ b/yt_dlp/extractor/foxgay.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import itertools from .common import InfoExtractor diff --git a/yt_dlp/extractor/foxnews.py b/yt_dlp/extractor/foxnews.py index 18fa0a5ef..cee4d6b49 100644 --- a/yt_dlp/extractor/foxnews.py +++ b/yt_dlp/extractor/foxnews.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .amp import AMPIE diff --git a/yt_dlp/extractor/foxsports.py b/yt_dlp/extractor/foxsports.py index 2b2cb6c6f..f9d7fe52a 100644 --- a/yt_dlp/extractor/foxsports.py +++ b/yt_dlp/extractor/foxsports.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/fptplay.py b/yt_dlp/extractor/fptplay.py index c23fe6c53..1872d8a1c 100644 --- a/yt_dlp/extractor/fptplay.py +++ b/yt_dlp/extractor/fptplay.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import hashlib import time import urllib.parse diff --git a/yt_dlp/extractor/franceculture.py b/yt_dlp/extractor/franceculture.py index 9dc28d801..6bd9912f3 100644 --- a/yt_dlp/extractor/franceculture.py +++ b/yt_dlp/extractor/franceculture.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/franceinter.py b/yt_dlp/extractor/franceinter.py index ae822a50e..779249b84 100644 --- a/yt_dlp/extractor/franceinter.py +++ b/yt_dlp/extractor/franceinter.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import month_by_name diff --git a/yt_dlp/extractor/francetv.py b/yt_dlp/extractor/francetv.py index 347a766d8..5902eaca0 100644 --- a/yt_dlp/extractor/francetv.py +++ b/yt_dlp/extractor/francetv.py @@ -1,8 +1,3 @@ -# coding: utf-8 - -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/freesound.py b/yt_dlp/extractor/freesound.py index 138b6bc58..9724dbdf0 100644 --- a/yt_dlp/extractor/freesound.py +++ b/yt_dlp/extractor/freesound.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/freespeech.py b/yt_dlp/extractor/freespeech.py index ea9c3e317..aea551379 100644 --- a/yt_dlp/extractor/freespeech.py +++ b/yt_dlp/extractor/freespeech.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from .youtube import YoutubeIE diff --git a/yt_dlp/extractor/frontendmasters.py b/yt_dlp/extractor/frontendmasters.py index fc67a8437..e0529b7ba 100644 --- a/yt_dlp/extractor/frontendmasters.py +++ b/yt_dlp/extractor/frontendmasters.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/fujitv.py b/yt_dlp/extractor/fujitv.py index 4fdfe12ab..d7f49accd 100644 --- a/yt_dlp/extractor/fujitv.py +++ b/yt_dlp/extractor/fujitv.py @@ -1,5 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals from ..utils import HEADRequest from .common import InfoExtractor @@ -19,7 +17,7 @@ class FujiTVFODPlus7IE(InfoExtractor): 'url': 'https://fod.fujitv.co.jp/title/5d40/5d40110076', 'info_dict': { 'id': '5d40110076', - 'ext': 'mp4', + 'ext': 'ts', 'title': '#1318 『まる子、まぼろしの洋館を見る』の巻', 'series': 'ちびまる子ちゃん', 'series_id': '5d40', @@ -30,7 +28,7 @@ class FujiTVFODPlus7IE(InfoExtractor): 'url': 'https://fod.fujitv.co.jp/title/5d40/5d40810083', 'info_dict': { 'id': '5d40810083', - 'ext': 'mp4', + 'ext': 'ts', 'title': '#1324 『まる子とオニの子』の巻/『結成!2月をムダにしない会』の巻', 'description': 'md5:3972d900b896adc8ab1849e310507efa', 'series': 'ちびまる子ちゃん', @@ -47,13 +45,13 @@ class FujiTVFODPlus7IE(InfoExtractor): if token: json_info = self._download_json('https://fod-sp.fujitv.co.jp/apps/api/episode/detail/?ep_id=%s&is_premium=false' % video_id, video_id, headers={'x-authorization': f'Bearer {token.value}'}, fatal=False) else: - self.report_warning(f'The token cookie is needed to extract video metadata. {self._LOGIN_HINTS["cookies"]}') + self.report_warning(f'The token cookie is needed to extract video metadata. {self._login_hint("cookies")}') formats, subtitles = [], {} src_json = self._download_json(f'{self._BASE_URL}abrjson_v2/tv_android/{video_id}', video_id) for src in src_json['video_selector']: if not src.get('url'): continue - fmt, subs = self._extract_m3u8_formats_and_subtitles(src['url'], video_id, 'mp4') + fmt, subs = self._extract_m3u8_formats_and_subtitles(src['url'], video_id, 'ts') for f in fmt: f.update(dict(zip(('height', 'width'), self._BITRATE_MAP.get(f.get('tbr'), ())))) diff --git a/yt_dlp/extractor/funimation.py b/yt_dlp/extractor/funimation.py index 6aa9bc9ce..12cacd3b4 100644 --- a/yt_dlp/extractor/funimation.py +++ b/yt_dlp/extractor/funimation.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import random import re import string @@ -245,6 +242,9 @@ class FunimationIE(FunimationBaseIE): 'language_preference': language_preference(lang.lower()), }) formats.extend(current_formats) + if not formats and (requested_languages or requested_versions): + self.raise_no_formats( + 'There are no video formats matching the requested languages/versions', expected=True, video_id=display_id) self._remove_duplicate_formats(formats) self._sort_formats(formats, ('lang', 'source')) diff --git a/yt_dlp/extractor/funk.py b/yt_dlp/extractor/funk.py index 2c5cfe864..539d719c5 100644 --- a/yt_dlp/extractor/funk.py +++ b/yt_dlp/extractor/funk.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from .nexx import NexxIE from ..utils import ( diff --git a/yt_dlp/extractor/fusion.py b/yt_dlp/extractor/fusion.py index a3f44b812..46bda49ea 100644 --- a/yt_dlp/extractor/fusion.py +++ b/yt_dlp/extractor/fusion.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/fxnetworks.py b/yt_dlp/extractor/fxnetworks.py deleted file mode 100644 index 00e67426b..000000000 --- a/yt_dlp/extractor/fxnetworks.py +++ /dev/null @@ -1,77 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .adobepass import AdobePassIE -from ..utils import ( - extract_attributes, - int_or_none, - parse_age_limit, - smuggle_url, - update_url_query, -) - - -class FXNetworksIE(AdobePassIE): - _VALID_URL = r'https?://(?:www\.)?(?:fxnetworks|simpsonsworld)\.com/video/(?P<id>\d+)' - _TESTS = [{ - 'url': 'http://www.fxnetworks.com/video/1032565827847', - 'md5': '8d99b97b4aa7a202f55b6ed47ea7e703', - 'info_dict': { - 'id': 'dRzwHC_MMqIv', - 'ext': 'mp4', - 'title': 'First Look: Better Things - Season 2', - 'description': 'Because real life is like a fart. Watch this FIRST LOOK to see what inspired the new season of Better Things.', - 'age_limit': 14, - 'uploader': 'NEWA-FNG-FX', - 'upload_date': '20170825', - 'timestamp': 1503686274, - 'episode_number': 0, - 'season_number': 2, - 'series': 'Better Things', - }, - 'add_ie': ['ThePlatform'], - }, { - 'url': 'http://www.simpsonsworld.com/video/716094019682', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - if 'The content you are trying to access is not available in your region.' in webpage: - self.raise_geo_restricted() - video_data = extract_attributes(self._search_regex( - r'(<a.+?rel="https?://link\.theplatform\.com/s/.+?</a>)', webpage, 'video data')) - player_type = self._search_regex(r'playerType\s*=\s*[\'"]([^\'"]+)', webpage, 'player type', default=None) - release_url = video_data['rel'] - title = video_data['data-title'] - rating = video_data.get('data-rating') - query = { - 'mbr': 'true', - } - if player_type == 'movies': - query.update({ - 'manifest': 'm3u', - }) - else: - query.update({ - 'switch': 'http', - }) - if video_data.get('data-req-auth') == '1': - resource = self._get_mvpd_resource( - video_data['data-channel'], title, - video_data.get('data-guid'), rating) - query['auth'] = self._extract_mvpd_auth(url, video_id, 'fx', resource) - - return { - '_type': 'url_transparent', - 'id': video_id, - 'title': title, - 'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}), - 'series': video_data.get('data-show-title'), - 'episode_number': int_or_none(video_data.get('data-episode')), - 'season_number': int_or_none(video_data.get('data-season')), - 'thumbnail': video_data.get('data-large-thumb'), - 'age_limit': parse_age_limit(rating), - 'ie_key': 'ThePlatform', - } diff --git a/yt_dlp/extractor/gab.py b/yt_dlp/extractor/gab.py index 9ba0b1ca1..7ed81f761 100644 --- a/yt_dlp/extractor/gab.py +++ b/yt_dlp/extractor/gab.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/gaia.py b/yt_dlp/extractor/gaia.py index 5b0195c63..4ace0544a 100644 --- a/yt_dlp/extractor/gaia.py +++ b/yt_dlp/extractor/gaia.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import ( compat_str, diff --git a/yt_dlp/extractor/gameinformer.py b/yt_dlp/extractor/gameinformer.py index f1b96c172..2664edb81 100644 --- a/yt_dlp/extractor/gameinformer.py +++ b/yt_dlp/extractor/gameinformer.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .brightcove import BrightcoveNewIE from .common import InfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/gamejolt.py b/yt_dlp/extractor/gamejolt.py index a13e528f5..440b832fc 100644 --- a/yt_dlp/extractor/gamejolt.py +++ b/yt_dlp/extractor/gamejolt.py @@ -1,4 +1,3 @@ -# coding: utf-8 import itertools import json import math diff --git a/yt_dlp/extractor/gamespot.py b/yt_dlp/extractor/gamespot.py index 7a1beae3c..e1d317377 100644 --- a/yt_dlp/extractor/gamespot.py +++ b/yt_dlp/extractor/gamespot.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .once import OnceIE from ..compat import compat_urllib_parse_unquote diff --git a/yt_dlp/extractor/gamestar.py b/yt_dlp/extractor/gamestar.py index e882fa671..e9966f532 100644 --- a/yt_dlp/extractor/gamestar.py +++ b/yt_dlp/extractor/gamestar.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/gaskrank.py b/yt_dlp/extractor/gaskrank.py index 03acd2a73..76ddcc40e 100644 --- a/yt_dlp/extractor/gaskrank.py +++ b/yt_dlp/extractor/gaskrank.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/gazeta.py b/yt_dlp/extractor/gazeta.py index 367187080..c6868a672 100644 --- a/yt_dlp/extractor/gazeta.py +++ b/yt_dlp/extractor/gazeta.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor diff --git a/yt_dlp/extractor/gdcvault.py b/yt_dlp/extractor/gdcvault.py index c3ad6b4ce..2878bbd88 100644 --- a/yt_dlp/extractor/gdcvault.py +++ b/yt_dlp/extractor/gdcvault.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/gedidigital.py b/yt_dlp/extractor/gedidigital.py index ec386c218..4ae5362b4 100644 --- a/yt_dlp/extractor/gedidigital.py +++ b/yt_dlp/extractor/gedidigital.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor @@ -14,7 +11,7 @@ from ..utils import ( class GediDigitalIE(InfoExtractor): - _VALID_URL = r'''(?x)(?P<url>(?:https?:)//video\. + _VALID_URL = r'''(?x:(?P<url>(?:https?:)//video\. (?: (?: (?:espresso\.)?repubblica @@ -36,7 +33,7 @@ class GediDigitalIE(InfoExtractor): |corrierealpi |lasentinella )\.gelocal - )\.it(?:/[^/]+){2,4}/(?P<id>\d+))(?:$|[?&].*)''' + )\.it(?:/[^/]+){2,4}/(?P<id>\d+))(?:$|[?&].*))''' _TESTS = [{ 'url': 'https://video.lastampa.it/politica/il-paradosso-delle-regionali-la-lega-vince-ma-sembra-aver-perso/121559/121683', 'md5': '84658d7fb9e55a6e57ecc77b73137494', diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index bd56ad289..f594d02c2 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -1,30 +1,126 @@ -# coding: utf-8 - -from __future__ import unicode_literals - import os import re -import sys +import xml.etree.ElementTree +from .ant1newsgr import Ant1NewsGrEmbedIE +from .anvato import AnvatoIE +from .apa import APAIE +from .arcpublishing import ArcPublishingIE +from .arkena import ArkenaIE +from .arte import ArteTVEmbedIE +from .bitchute import BitChuteIE +from .blogger import BloggerIE +from .brightcove import BrightcoveLegacyIE, BrightcoveNewIE +from .channel9 import Channel9IE +from .cloudflarestream import CloudflareStreamIE from .common import InfoExtractor +from .commonprotocols import RtmpIE +from .condenast import CondeNastIE +from .dailymail import DailyMailIE +from .dailymotion import DailymotionIE +from .dbtv import DBTVIE +from .digiteka import DigitekaIE +from .drtuber import DrTuberIE +from .eagleplatform import EaglePlatformIE +from .ertgr import ERTWebtvEmbedIE +from .expressen import ExpressenIE +from .facebook import FacebookIE +from .foxnews import FoxNewsIE +from .gedidigital import GediDigitalIE +from .gfycat import GfycatIE +from .glomex import GlomexEmbedIE +from .googledrive import GoogleDriveIE +from .indavideo import IndavideoEmbedIE +from .instagram import InstagramIE +from .joj import JojIE +from .jwplatform import JWPlatformIE +from .kaltura import KalturaIE +from .kinja import KinjaEmbedIE +from .limelight import LimelightBaseIE +from .mainstreaming import MainStreamingIE +from .medialaan import MedialaanIE +from .mediaset import MediasetIE +from .mediasite import MediasiteIE +from .megaphone import MegaphoneIE +from .megatvcom import MegaTVComEmbedIE +from .mofosex import MofosexEmbedIE +from .mtv import MTVServicesEmbeddedIE +from .myvi import MyviIE +from .nbc import NBCSportsVPlayerIE +from .nexx import NexxEmbedIE, NexxIE +from .odnoklassniki import OdnoklassnikiIE +from .onionstudios import OnionStudiosIE +from .ooyala import OoyalaIE +from .panopto import PanoptoBaseIE +from .peertube import PeerTubeIE +from .piksel import PikselIE +from .pladform import PladformIE +from .pornhub import PornHubIE +from .rcs import RCSEmbedsIE +from .redtube import RedTubeIE +from .rumble import RumbleEmbedIE +from .rutube import RutubeIE +from .rutv import RUTVIE +from .ruutu import RuutuIE +from .senategov import SenateISVPIE +from .simplecast import SimplecastIE +from .soundcloud import SoundcloudEmbedIE +from .spankwire import SpankwireIE +from .sportbox import SportBoxIE +from .spotify import SpotifyBaseIE +from .springboardplatform import SpringboardPlatformIE +from .svt import SVTIE +from .teachable import TeachableIE +from .ted import TedEmbedIE +from .theplatform import ThePlatformIE +from .threeqsdn import ThreeQSDNIE +from .tnaflix import TNAFlixNetworkEmbedIE +from .tube8 import Tube8IE +from .tunein import TuneInBaseIE +from .tvc import TVCIE +from .tvopengr import TVOpenGrEmbedIE +from .tvp import TVPEmbedIE +from .twentymin import TwentyMinutenIE +from .udn import UDNEmbedIE +from .ustream import UstreamIE +from .vbox7 import Vbox7IE +from .vice import ViceIE +from .videa import VideaIE +from .videomore import VideomoreIE +from .videopress import VideoPressIE +from .viewlift import ViewLiftEmbedIE +from .vimeo import VHXEmbedIE, VimeoIE +from .viqeo import ViqeoIE +from .vk import VKIE +from .vshare import VShareIE +from .vzaar import VzaarIE +from .washingtonpost import WashingtonPostIE +from .webcaster import WebcasterFeedIE +from .wimtv import WimTVIE +from .wistia import WistiaIE +from .xfileshare import XFileShareIE +from .xhamster import XHamsterEmbedIE +from .yapfiles import YapFilesIE +from .youporn import YouPornIE from .youtube import YoutubeIE +from .zype import ZypeIE from ..compat import ( compat_etree_fromstring, compat_str, compat_urllib_parse_unquote, compat_urlparse, - compat_xml_parse_error, ) from ..utils import ( + KNOWN_EXTENSIONS, + ExtractorError, + HEADRequest, + UnsupportedError, determine_ext, dict_get, - ExtractorError, float_or_none, - HEADRequest, int_or_none, is_html, js_to_json, - KNOWN_EXTENSIONS, merge_dicts, mimetype2ext, orderedSet, @@ -36,120 +132,11 @@ from ..utils import ( unescapeHTML, unified_timestamp, unsmuggle_url, - UnsupportedError, url_or_none, xpath_attr, xpath_text, xpath_with_ns, ) -from .commonprotocols import RtmpIE -from .brightcove import ( - BrightcoveLegacyIE, - BrightcoveNewIE, -) -from .nexx import ( - NexxIE, - NexxEmbedIE, -) -from .nbc import NBCSportsVPlayerIE -from .ooyala import OoyalaIE -from .rutv import RUTVIE -from .tvc import TVCIE -from .sportbox import SportBoxIE -from .myvi import MyviIE -from .condenast import CondeNastIE -from .udn import UDNEmbedIE -from .senategov import SenateISVPIE -from .svt import SVTIE -from .pornhub import PornHubIE -from .xhamster import XHamsterEmbedIE -from .tnaflix import TNAFlixNetworkEmbedIE -from .drtuber import DrTuberIE -from .redtube import RedTubeIE -from .tube8 import Tube8IE -from .mofosex import MofosexEmbedIE -from .spankwire import SpankwireIE -from .youporn import YouPornIE -from .vimeo import ( - VimeoIE, - VHXEmbedIE, -) -from .dailymotion import DailymotionIE -from .dailymail import DailyMailIE -from .onionstudios import OnionStudiosIE -from .viewlift import ViewLiftEmbedIE -from .mtv import MTVServicesEmbeddedIE -from .pladform import PladformIE -from .videomore import VideomoreIE -from .webcaster import WebcasterFeedIE -from .googledrive import GoogleDriveIE -from .jwplatform import JWPlatformIE -from .digiteka import DigitekaIE -from .arkena import ArkenaIE -from .instagram import InstagramIE -from .threeqsdn import ThreeQSDNIE -from .theplatform import ThePlatformIE -from .kaltura import KalturaIE -from .eagleplatform import EaglePlatformIE -from .facebook import FacebookIE -from .soundcloud import SoundcloudEmbedIE -from .tunein import TuneInBaseIE -from .vbox7 import Vbox7IE -from .dbtv import DBTVIE -from .piksel import PikselIE -from .videa import VideaIE -from .twentymin import TwentyMinutenIE -from .ustream import UstreamIE -from .arte import ArteTVEmbedIE -from .videopress import VideoPressIE -from .rutube import RutubeIE -from .glomex import GlomexEmbedIE -from .megatvcom import MegaTVComEmbedIE -from .ant1newsgr import Ant1NewsGrEmbedIE -from .limelight import LimelightBaseIE -from .anvato import AnvatoIE -from .washingtonpost import WashingtonPostIE -from .wistia import WistiaIE -from .mediaset import MediasetIE -from .joj import JojIE -from .megaphone import MegaphoneIE -from .vzaar import VzaarIE -from .channel9 import Channel9IE -from .vshare import VShareIE -from .mediasite import MediasiteIE -from .springboardplatform import SpringboardPlatformIE -from .ted import TedEmbedIE -from .yapfiles import YapFilesIE -from .vice import ViceIE -from .xfileshare import XFileShareIE -from .cloudflarestream import CloudflareStreamIE -from .peertube import PeerTubeIE -from .teachable import TeachableIE -from .indavideo import IndavideoEmbedIE -from .apa import APAIE -from .foxnews import FoxNewsIE -from .viqeo import ViqeoIE -from .expressen import ExpressenIE -from .zype import ZypeIE -from .odnoklassniki import OdnoklassnikiIE -from .vk import VKIE -from .kinja import KinjaEmbedIE -from .gedidigital import GediDigitalIE -from .rcs import RCSEmbedsIE -from .bitchute import BitChuteIE -from .rumble import RumbleEmbedIE -from .arcpublishing import ArcPublishingIE -from .medialaan import MedialaanIE -from .simplecast import SimplecastIE -from .wimtv import WimTVIE -from .tvopengr import TVOpenGrEmbedIE -from .ertgr import ERTWebtvEmbedIE -from .tvp import TVPEmbedIE -from .blogger import BloggerIE -from .mainstreaming import MainStreamingIE -from .gfycat import GfycatIE -from .panopto import PanoptoBaseIE -from .ruutu import RuutuIE class GenericIE(InfoExtractor): @@ -1043,20 +1030,6 @@ class GenericIE(InfoExtractor): 'filesize': 24687186, }, }, - { - 'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz', - 'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4', - 'info_dict': { - 'id': 'uxjb0lwrcz', - 'ext': 'mp4', - 'title': 'Conversation about Hexagonal Rails Part 1', - 'description': 'a Martin Fowler video from ThoughtWorks', - 'duration': 1715.0, - 'uploader': 'thoughtworks.wistia.com', - 'timestamp': 1401832161, - 'upload_date': '20140603', - }, - }, # Wistia standard embed (async) { 'url': 'https://www.getdrip.com/university/brennan-dunn-drip-workshop/', @@ -2530,6 +2503,29 @@ class GenericIE(InfoExtractor): 'upload_date': '20220308', }, }, + { + # Multiple Ruutu embeds + 'url': 'https://www.hs.fi/kotimaa/art-2000008762560.html', + 'info_dict': { + 'title': 'Koronavirus | Epidemiahuippu voi olla Suomessa ohi, mutta koronaviruksen poistamista yleisvaarallisten tautien joukosta harkitaan vasta syksyllä', + 'id': 'art-2000008762560' + }, + 'playlist_count': 3 + }, + { + # Ruutu embed in hs.fi with a single video + 'url': 'https://www.hs.fi/kotimaa/art-2000008793421.html', + 'md5': 'f8964e65d8fada6e8a562389bf366bb4', + 'info_dict': { + 'id': '4081841', + 'ext': 'mp4', + 'title': 'Puolustusvoimat siirsi panssariajoneuvoja harjoituksiin Niinisaloon 2.5.2022', + 'thumbnail': r're:^https?://.+\.jpg$', + 'duration': 138, + 'age_limit': 0, + 'upload_date': '20220504', + }, + }, ] def report_following_redirect(self, new_url): @@ -2629,7 +2625,7 @@ class GenericIE(InfoExtractor): entries.append({ 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0], - 'title': '%s - %s' % (title, n.tag), + 'title': f'{title} - {n.tag}', 'url': compat_urlparse.urljoin(url, url_n.text), 'duration': float_or_none(n.find('./duration').text), }) @@ -2651,7 +2647,7 @@ class GenericIE(InfoExtractor): for o in range(len(newmagic) - 1, -1, -1): new = '' - l = (o + sum([int(n) for n in license[o:]])) % 32 + l = (o + sum(int(n) for n in license[o:])) % 32 for i in range(0, len(newmagic)): if i == o: @@ -2828,7 +2824,7 @@ class GenericIE(InfoExtractor): try: try: doc = compat_etree_fromstring(webpage) - except compat_xml_parse_error: + except xml.etree.ElementTree.ParseError: doc = compat_etree_fromstring(webpage.encode('utf-8')) if doc.tag == 'rss': self.report_detected('RSS feed') @@ -2863,7 +2859,7 @@ class GenericIE(InfoExtractor): self.report_detected('F4M manifest') self._sort_formats(info_dict['formats']) return info_dict - except compat_xml_parse_error: + except xml.etree.ElementTree.ParseError: pass # Is it a Camtasia project? @@ -3178,6 +3174,11 @@ class GenericIE(InfoExtractor): if sportbox_urls: return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie=SportBoxIE.ie_key()) + # Look for embedded Spotify player + spotify_urls = SpotifyBaseIE._extract_embed_urls(webpage) + if spotify_urls: + return self.playlist_from_matches(spotify_urls, video_id, video_title) + # Look for embedded XHamster player xhamster_urls = XHamsterEmbedIE._extract_urls(webpage) if xhamster_urls: @@ -3757,9 +3758,9 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches(panopto_urls, video_id, video_title) # Look for Ruutu embeds - ruutu_url = RuutuIE._extract_url(webpage) - if ruutu_url: - return self.url_result(ruutu_url, RuutuIE) + ruutu_urls = RuutuIE._extract_urls(webpage) + if ruutu_urls: + return self.playlist_from_matches(ruutu_urls, video_id, video_title) # Look for HTML5 media entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') @@ -3773,7 +3774,7 @@ class GenericIE(InfoExtractor): else: for num, entry in enumerate(entries, start=1): entry.update({ - 'id': '%s-%s' % (video_id, num), + 'id': f'{video_id}-{num}', 'title': '%s (%d)' % (video_title, num), }) for entry in entries: @@ -4011,9 +4012,6 @@ class GenericIE(InfoExtractor): # Look also in Refresh HTTP header refresh_header = head_response.headers.get('Refresh') if refresh_header: - # In python 2 response HTTP headers are bytestrings - if sys.version_info < (3, 0) and isinstance(refresh_header, str): - refresh_header = refresh_header.decode('iso-8859-1') found = re.search(REDIRECT_REGEX, refresh_header) if found: new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1))) @@ -4108,7 +4106,7 @@ class GenericIE(InfoExtractor): entries.append(entry_info_dict) if len(entries) == 1: - return entries[0] + return merge_dicts(entries[0], info_dict) else: for num, e in enumerate(entries, start=1): # 'url' results don't have a title diff --git a/yt_dlp/extractor/gettr.py b/yt_dlp/extractor/gettr.py index 327a4d0b8..9bd6200b6 100644 --- a/yt_dlp/extractor/gettr.py +++ b/yt_dlp/extractor/gettr.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( bool_or_none, diff --git a/yt_dlp/extractor/gfycat.py b/yt_dlp/extractor/gfycat.py index 2ad03e2b2..60f06ccd7 100644 --- a/yt_dlp/extractor/gfycat.py +++ b/yt_dlp/extractor/gfycat.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor @@ -13,7 +10,7 @@ from ..utils import ( class GfycatIE(InfoExtractor): - _VALID_URL = r'(?i)https?://(?:(?:www|giant|thumbs)\.)?gfycat\.com/(?:ru/|ifr/|gifs/detail/)?(?P<id>[^-/?#\."\']+)' + _VALID_URL = r'https?://(?:(?:www|giant|thumbs)\.)?gfycat\.com/(?i:ru/|ifr/|gifs/detail/)?(?P<id>[^-/?#\."\']+)' _TESTS = [{ 'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher', 'info_dict': { diff --git a/yt_dlp/extractor/giantbomb.py b/yt_dlp/extractor/giantbomb.py index 1920923fc..5d6b208aa 100644 --- a/yt_dlp/extractor/giantbomb.py +++ b/yt_dlp/extractor/giantbomb.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/giga.py b/yt_dlp/extractor/giga.py index 5a9992a27..9e835a6da 100644 --- a/yt_dlp/extractor/giga.py +++ b/yt_dlp/extractor/giga.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools from .common import InfoExtractor diff --git a/yt_dlp/extractor/gigya.py b/yt_dlp/extractor/gigya.py index 412178492..c5bc86bb4 100644 --- a/yt_dlp/extractor/gigya.py +++ b/yt_dlp/extractor/gigya.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/glide.py b/yt_dlp/extractor/glide.py index 12af859be..2bffb26dc 100644 --- a/yt_dlp/extractor/glide.py +++ b/yt_dlp/extractor/glide.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/globo.py b/yt_dlp/extractor/globo.py index f6aaae1e9..8915ebf48 100644 --- a/yt_dlp/extractor/globo.py +++ b/yt_dlp/extractor/globo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import base64 import hashlib import json diff --git a/yt_dlp/extractor/glomex.py b/yt_dlp/extractor/glomex.py index d9ef4338f..85ffa4c05 100644 --- a/yt_dlp/extractor/glomex.py +++ b/yt_dlp/extractor/glomex.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re import urllib.parse diff --git a/yt_dlp/extractor/go.py b/yt_dlp/extractor/go.py index f92e16600..07d13d1c3 100644 --- a/yt_dlp/extractor/go.py +++ b/yt_dlp/extractor/go.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .adobepass import AdobePassIE diff --git a/yt_dlp/extractor/godtube.py b/yt_dlp/extractor/godtube.py index 96e68b4d2..697540155 100644 --- a/yt_dlp/extractor/godtube.py +++ b/yt_dlp/extractor/godtube.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( parse_duration, diff --git a/yt_dlp/extractor/gofile.py b/yt_dlp/extractor/gofile.py index 858bac52c..ddbce2ee8 100644 --- a/yt_dlp/extractor/gofile.py +++ b/yt_dlp/extractor/gofile.py @@ -1,4 +1,5 @@ -# coding: utf-8 +import hashlib + from .common import InfoExtractor from ..utils import ( ExtractorError, @@ -38,6 +39,15 @@ class GofileIE(InfoExtractor): 'id': 'TMjXd9', }, 'playlist_count': 1, + }, { + 'url': 'https://gofile.io/d/gqOtRf', + 'info_dict': { + 'id': 'gqOtRf', + }, + 'playlist_mincount': 1, + 'params': { + 'videopassword': 'password', + }, }] _TOKEN = None @@ -53,14 +63,22 @@ class GofileIE(InfoExtractor): self._set_cookie('gofile.io', 'accountToken', self._TOKEN) def _entries(self, file_id): - files = self._download_json('https://api.gofile.io/getContent', 'Gofile', note='Getting filelist', query={ + query_params = { 'contentId': file_id, 'token': self._TOKEN, 'websiteToken': 12345, - }) + } + password = self.get_param('videopassword') + if password: + query_params['password'] = hashlib.sha256(password.encode('utf-8')).hexdigest() + files = self._download_json( + 'https://api.gofile.io/getContent', file_id, note='Getting filelist', query=query_params) status = files['status'] - if status != 'ok': + if status == 'error-passwordRequired': + raise ExtractorError( + 'This video is protected by a password, use the --video-password option', expected=True) + elif status != 'ok': raise ExtractorError(f'{self.IE_NAME} said: status {status}', expected=True) found_files = False diff --git a/yt_dlp/extractor/golem.py b/yt_dlp/extractor/golem.py index 47a068e74..8416b5aa4 100644 --- a/yt_dlp/extractor/golem.py +++ b/yt_dlp/extractor/golem.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_str, diff --git a/yt_dlp/extractor/goodgame.py b/yt_dlp/extractor/goodgame.py new file mode 100644 index 000000000..0866647e6 --- /dev/null +++ b/yt_dlp/extractor/goodgame.py @@ -0,0 +1,58 @@ +from .common import InfoExtractor +from ..utils import ( + clean_html, + int_or_none, + str_or_none, + traverse_obj, +) + + +class GoodGameIE(InfoExtractor): + IE_NAME = 'goodgame:stream' + _VALID_URL = r'https?://goodgame\.ru/channel/(?P<id>\w+)' + _TESTS = [{ + 'url': 'https://goodgame.ru/channel/Pomi/#autoplay', + 'info_dict': { + 'id': 'pomi', + 'ext': 'mp4', + 'title': r're:Reynor vs Special \(1/2,bo3\) Wardi Spring EU \- playoff \(финальный день\) \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', + 'channel_id': '1644', + 'channel': 'Pomi', + 'channel_url': 'https://goodgame.ru/channel/Pomi/', + 'description': 'md5:4a87b775ee7b2b57bdccebe285bbe171', + 'thumbnail': r're:^https?://.*\.jpg$', + 'live_status': 'is_live', + 'view_count': int, + }, + 'params': {'skip_download': 'm3u8'}, + 'skip': 'May not be online', + }] + + def _real_extract(self, url): + channel_name = self._match_id(url) + response = self._download_json(f'https://api2.goodgame.ru/v2/streams/{channel_name}', channel_name) + player_id = response['channel']['gg_player_src'] + + formats, subtitles = [], {} + if response.get('status') == 'Live': + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + f'https://hls.goodgame.ru/manifest/{player_id}_master.m3u8', + channel_name, 'mp4', live=True) + else: + self.raise_no_formats('User is offline', expected=True, video_id=channel_name) + + self._sort_formats(formats) + return { + 'id': player_id, + 'formats': formats, + 'subtitles': subtitles, + 'title': traverse_obj(response, ('channel', 'title')), + 'channel': channel_name, + 'channel_id': str_or_none(traverse_obj(response, ('channel', 'id'))), + 'channel_url': response.get('url'), + 'description': clean_html(traverse_obj(response, ('channel', 'description'))), + 'thumbnail': traverse_obj(response, ('channel', 'thumb')), + 'is_live': bool(formats), + 'view_count': int_or_none(response.get('viewers')), + 'age_limit': 18 if traverse_obj(response, ('channel', 'adult')) else None, + } diff --git a/yt_dlp/extractor/googledrive.py b/yt_dlp/extractor/googledrive.py index 7b5bf280f..c0905f86a 100644 --- a/yt_dlp/extractor/googledrive.py +++ b/yt_dlp/extractor/googledrive.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/googlepodcasts.py b/yt_dlp/extractor/googlepodcasts.py index 25631e213..8b2351ba8 100644 --- a/yt_dlp/extractor/googlepodcasts.py +++ b/yt_dlp/extractor/googlepodcasts.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/googlesearch.py b/yt_dlp/extractor/googlesearch.py index 4b8b1bcbb..67ca0e5e0 100644 --- a/yt_dlp/extractor/googlesearch.py +++ b/yt_dlp/extractor/googlesearch.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import itertools import re diff --git a/yt_dlp/extractor/gopro.py b/yt_dlp/extractor/gopro.py index 10cc1aec1..14d6b2187 100644 --- a/yt_dlp/extractor/gopro.py +++ b/yt_dlp/extractor/gopro.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/goshgay.py b/yt_dlp/extractor/goshgay.py index 377981d3e..9a1f32b7e 100644 --- a/yt_dlp/extractor/goshgay.py +++ b/yt_dlp/extractor/goshgay.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_parse_qs, diff --git a/yt_dlp/extractor/gotostage.py b/yt_dlp/extractor/gotostage.py index 6aa96106a..112293bef 100644 --- a/yt_dlp/extractor/gotostage.py +++ b/yt_dlp/extractor/gotostage.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/gputechconf.py b/yt_dlp/extractor/gputechconf.py index 73dc62c49..2d13bf491 100644 --- a/yt_dlp/extractor/gputechconf.py +++ b/yt_dlp/extractor/gputechconf.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/gronkh.py b/yt_dlp/extractor/gronkh.py index c9f1dd256..c112c7857 100644 --- a/yt_dlp/extractor/gronkh.py +++ b/yt_dlp/extractor/gronkh.py @@ -1,8 +1,11 @@ -# coding: utf-8 -from __future__ import unicode_literals +import functools from .common import InfoExtractor -from ..utils import unified_strdate +from ..utils import ( + OnDemandPagedList, + traverse_obj, + unified_strdate, +) class GronkhIE(InfoExtractor): @@ -44,3 +47,54 @@ class GronkhIE(InfoExtractor): 'formats': formats, 'subtitles': subtitles, } + + +class GronkhFeedIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?gronkh\.tv(?:/feed)?/?(?:#|$)' + IE_NAME = 'gronkh:feed' + + _TESTS = [{ + 'url': 'https://gronkh.tv/feed', + 'info_dict': { + 'id': 'feed', + }, + 'playlist_count': 16, + }, { + 'url': 'https://gronkh.tv', + 'only_matching': True, + }] + + def _entries(self): + for type_ in ('recent', 'views'): + info = self._download_json( + f'https://api.gronkh.tv/v1/video/discovery/{type_}', 'feed', note=f'Downloading {type_} API JSON') + for item in traverse_obj(info, ('discovery', ...)) or []: + yield self.url_result(f'https://gronkh.tv/watch/stream/{item["episode"]}', GronkhIE, item.get('title')) + + def _real_extract(self, url): + return self.playlist_result(self._entries(), 'feed') + + +class GronkhVodsIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?gronkh\.tv/vods/streams/?(?:#|$)' + IE_NAME = 'gronkh:vods' + + _TESTS = [{ + 'url': 'https://gronkh.tv/vods/streams', + 'info_dict': { + 'id': 'vods', + }, + 'playlist_mincount': 150, + }] + _PER_PAGE = 25 + + def _fetch_page(self, page): + items = traverse_obj(self._download_json( + 'https://api.gronkh.tv/v1/search', 'vods', query={'offset': self._PER_PAGE * page, 'first': self._PER_PAGE}, + note=f'Downloading stream video page {page + 1}'), ('results', 'videos', ...)) + for item in items or []: + yield self.url_result(f'https://gronkh.tv/watch/stream/{item["episode"]}', GronkhIE, item['episode'], item.get('title')) + + def _real_extract(self, url): + entries = OnDemandPagedList(functools.partial(self._fetch_page), self._PER_PAGE) + return self.playlist_result(entries, 'vods') diff --git a/yt_dlp/extractor/groupon.py b/yt_dlp/extractor/groupon.py index a6da90931..362d3ff83 100644 --- a/yt_dlp/extractor/groupon.py +++ b/yt_dlp/extractor/groupon.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/hbo.py b/yt_dlp/extractor/hbo.py index 68df748f5..f54628665 100644 --- a/yt_dlp/extractor/hbo.py +++ b/yt_dlp/extractor/hbo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/hearthisat.py b/yt_dlp/extractor/hearthisat.py index a3d6a055f..9aa1325af 100644 --- a/yt_dlp/extractor/hearthisat.py +++ b/yt_dlp/extractor/hearthisat.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/heise.py b/yt_dlp/extractor/heise.py index cbe564a3c..84e5d3023 100644 --- a/yt_dlp/extractor/heise.py +++ b/yt_dlp/extractor/heise.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from .kaltura import KalturaIE from .youtube import YoutubeIE diff --git a/yt_dlp/extractor/hellporno.py b/yt_dlp/extractor/hellporno.py index 92d32cdcc..fd0327228 100644 --- a/yt_dlp/extractor/hellporno.py +++ b/yt_dlp/extractor/hellporno.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/helsinki.py b/yt_dlp/extractor/helsinki.py index 575fb332a..b7c826055 100644 --- a/yt_dlp/extractor/helsinki.py +++ b/yt_dlp/extractor/helsinki.py @@ -1,7 +1,3 @@ -# coding: utf-8 - -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import js_to_json diff --git a/yt_dlp/extractor/hentaistigma.py b/yt_dlp/extractor/hentaistigma.py index 86a93de4d..ca5ffc2ae 100644 --- a/yt_dlp/extractor/hentaistigma.py +++ b/yt_dlp/extractor/hentaistigma.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/hgtv.py b/yt_dlp/extractor/hgtv.py index a4f332565..c40017db1 100644 --- a/yt_dlp/extractor/hgtv.py +++ b/yt_dlp/extractor/hgtv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/hidive.py b/yt_dlp/extractor/hidive.py index 46d7d62ab..a6a71d630 100644 --- a/yt_dlp/extractor/hidive.py +++ b/yt_dlp/extractor/hidive.py @@ -1,4 +1,3 @@ -# coding: utf-8 import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/historicfilms.py b/yt_dlp/extractor/historicfilms.py index 56343e98f..c428feede 100644 --- a/yt_dlp/extractor/historicfilms.py +++ b/yt_dlp/extractor/historicfilms.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import parse_duration diff --git a/yt_dlp/extractor/hitbox.py b/yt_dlp/extractor/hitbox.py index 0470d0a99..a7e4424b6 100644 --- a/yt_dlp/extractor/hitbox.py +++ b/yt_dlp/extractor/hitbox.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/hitrecord.py b/yt_dlp/extractor/hitrecord.py index fd5dc2935..902af44fa 100644 --- a/yt_dlp/extractor/hitrecord.py +++ b/yt_dlp/extractor/hitrecord.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/hketv.py b/yt_dlp/extractor/hketv.py index 1f3502b90..4c616d1dd 100644 --- a/yt_dlp/extractor/hketv.py +++ b/yt_dlp/extractor/hketv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/hotnewhiphop.py b/yt_dlp/extractor/hotnewhiphop.py index 4703e1894..f8570cb86 100644 --- a/yt_dlp/extractor/hotnewhiphop.py +++ b/yt_dlp/extractor/hotnewhiphop.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_b64decode from ..utils import ( diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py index a0ce1f10a..d9223a416 100644 --- a/yt_dlp/extractor/hotstar.py +++ b/yt_dlp/extractor/hotstar.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import hashlib import hmac import re @@ -17,6 +14,7 @@ from ..utils import ( determine_ext, ExtractorError, int_or_none, + join_nonempty, str_or_none, try_get, url_or_none, @@ -24,6 +22,8 @@ from ..utils import ( class HotStarBaseIE(InfoExtractor): + _BASE_URL = 'https://www.hotstar.com' + _API_URL = 'https://api.hotstar.com' _AKAMAI_ENCRYPTION_KEY = b'\x05\xfc\x1a\x01\xca\xc9\x4b\xc4\x12\xfc\x53\x12\x07\x75\xf9\xee' def _call_api_impl(self, path, video_id, query, st=None, cookies=None): @@ -36,7 +36,7 @@ class HotStarBaseIE(InfoExtractor): token = cookies.get('userUP').value else: token = self._download_json( - 'https://api.hotstar.com/um/v3/users', + f'{self._API_URL}/um/v3/users', video_id, note='Downloading token', data=json.dumps({"device_ids": [{"id": compat_str(uuid.uuid4()), "type": "device_id"}]}).encode('utf-8'), headers={ @@ -46,12 +46,13 @@ class HotStarBaseIE(InfoExtractor): })['user_identity'] response = self._download_json( - 'https://api.hotstar.com/' + path, video_id, headers={ + f'{self._API_URL}/{path}', video_id, query=query, + headers={ 'hotstarauth': auth, 'x-hs-appversion': '6.72.2', 'x-hs-platform': 'web', 'x-hs-usertoken': token, - }, query=query) + }) if response['message'] != "Playback URL's fetched successfully": raise ExtractorError( @@ -59,17 +60,19 @@ class HotStarBaseIE(InfoExtractor): return response['data'] def _call_api(self, path, video_id, query_name='contentId'): - return self._download_json('https://api.hotstar.com/' + path, video_id=video_id, query={ - query_name: video_id, - 'tas': 10000, - }, headers={ - 'x-country-code': 'IN', - 'x-platform-code': 'PCTV', - }) + return self._download_json( + f'{self._API_URL}/{path}', video_id=video_id, + query={ + query_name: video_id, + 'tas': 10000, + }, headers={ + 'x-country-code': 'IN', + 'x-platform-code': 'PCTV', + }) def _call_api_v2(self, path, video_id, st=None, cookies=None): return self._call_api_impl( - '%s/content/%s' % (path, video_id), video_id, st=st, cookies=cookies, query={ + f'{path}/content/{video_id}', video_id, st=st, cookies=cookies, query={ 'desired-config': 'audio_channel:stereo|container:fmp4|dynamic_range:hdr|encryption:plain|ladder:tv|package:dash|resolution:fhd|subs-tag:HotstarVIP|video_codec:h265', 'device-id': cookies.get('device_id').value if cookies.get('device_id') else compat_str(uuid.uuid4()), 'os-name': 'Windows', @@ -80,24 +83,15 @@ class HotStarBaseIE(InfoExtractor): class HotStarIE(HotStarBaseIE): IE_NAME = 'hotstar' _VALID_URL = r'''(?x) - (?: - hotstar\:| - https?://(?:www\.)?hotstar\.com(?:/in)?/(?!in/) - ) - (?: - (?P<type>movies|sports|episode|(?P<tv>tv)) - (?: - \:| - /[^/?#]+/ - (?(tv) - (?:[^/?#]+/){2}| - (?:[^/?#]+/)* - ) - )| - [^/?#]+/ - )? - (?P<id>\d{10}) - ''' + https?://(?:www\.)?hotstar\.com(?:/in)?/(?!in/) + (?: + (?P<type>movies|sports|episode|(?P<tv>tv))/ + (?(tv)(?:[^/?#]+/){2}|[^?#]*) + )? + [^/?#]+/ + (?P<id>\d{10}) + ''' + _TESTS = [{ 'url': 'https://www.hotstar.com/can-you-not-spread-rumours/1000076273', 'info_dict': { @@ -108,39 +102,9 @@ class HotStarIE(HotStarBaseIE): 'timestamp': 1447248600, 'upload_date': '20151111', 'duration': 381, + 'episode': 'Can You Not Spread Rumours?', }, }, { - 'url': 'hotstar:1000076273', - 'only_matching': True, - }, { - 'url': 'https://www.hotstar.com/movies/radha-gopalam/1000057157', - 'info_dict': { - 'id': '1000057157', - 'ext': 'mp4', - 'title': 'Radha Gopalam', - 'description': 'md5:be3bc342cc120bbc95b3b0960e2b0d22', - 'timestamp': 1140805800, - 'upload_date': '20060224', - 'duration': 9182, - }, - }, { - 'url': 'hotstar:movies:1000057157', - 'only_matching': True, - }, { - 'url': 'https://www.hotstar.com/in/sports/cricket/follow-the-blues-2021/recap-eng-fight-back-on-day-2/1260066104', - 'only_matching': True, - }, { - 'url': 'https://www.hotstar.com/in/sports/football/most-costly-pl-transfers-ft-grealish/1260065956', - 'only_matching': True, - }, { - # contentData - 'url': 'hotstar:sports:1260065956', - 'only_matching': True, - }, { - # contentData - 'url': 'hotstar:sports:1260066104', - 'only_matching': True, - }, { 'url': 'https://www.hotstar.com/tv/ek-bhram-sarvagun-sampanna/s-2116/janhvi-targets-suman/1000234847', 'info_dict': { 'id': '1000234847', @@ -158,12 +122,19 @@ class HotStarIE(HotStarBaseIE): 'season_id': 6771, 'episode': 'Janhvi Targets Suman', 'episode_number': 8, - }, + } }, { - 'url': 'hotstar:episode:1000234847', + 'url': 'https://www.hotstar.com/movies/radha-gopalam/1000057157', + 'only_matching': True, + }, { + 'url': 'https://www.hotstar.com/in/sports/cricket/follow-the-blues-2021/recap-eng-fight-back-on-day-2/1260066104', + 'only_matching': True, + }, { + 'url': 'https://www.hotstar.com/in/sports/football/most-costly-pl-transfers-ft-grealish/1260065956', 'only_matching': True, }] _GEO_BYPASS = False + _TYPE = { 'movies': 'movie', 'sports': 'match', @@ -172,41 +143,53 @@ class HotStarIE(HotStarBaseIE): None: 'content', } + _IGNORE_MAP = { + 'res': 'resolution', + 'vcodec': 'video_codec', + 'dr': 'dynamic_range', + } + + @classmethod + def _video_url(cls, video_id, video_type=None, *, slug='ignore_me', root=None): + assert None in (video_type, root) + if not root: + root = join_nonempty(cls._BASE_URL, video_type, delim='/') + return f'{root}/{slug}/{video_id}' + def _real_extract(self, url): - mobj = self._match_valid_url(url) - video_id = mobj.group('id') - video_type = mobj.group('type') - cookies = self._get_cookies(url) + video_id, video_type = self._match_valid_url(url).group('id', 'type') video_type = self._TYPE.get(video_type, video_type) - video_data = self._call_api(f'o/v1/{video_type}/detail', video_id)['body']['results']['item'] - title = video_data['title'] + cookies = self._get_cookies(url) # Cookies before any request + video_data = self._call_api(f'o/v1/{video_type}/detail', video_id)['body']['results']['item'] if not self.get_param('allow_unplayable_formats') and video_data.get('drmProtected'): self.report_drm(video_id) - headers = {'Referer': 'https://www.hotstar.com/in'} - formats = [] - subs = {} + # See https://github.com/yt-dlp/yt-dlp/issues/396 + st = self._download_webpage_handle(f'{self._BASE_URL}/in', video_id)[1].headers.get('x-origin-date') + geo_restricted = False - _, urlh = self._download_webpage_handle('https://www.hotstar.com/in', video_id) - # Required to fix https://github.com/yt-dlp/yt-dlp/issues/396 - st = urlh.headers.get('x-origin-date') + formats, subs = [], {} + headers = {'Referer': f'{self._BASE_URL}/in'} + # change to v2 in the future playback_sets = self._call_api_v2('play/v1/playback', video_id, st=st, cookies=cookies)['playBackSets'] for playback_set in playback_sets: if not isinstance(playback_set, dict): continue - dr = re.search(r'dynamic_range:(?P<dr>[a-z]+)', playback_set.get('tagsCombination')).group('dr') + tags = str_or_none(playback_set.get('tagsCombination')) or '' + if any(f'{prefix}:{ignore}' in tags + for key, prefix in self._IGNORE_MAP.items() + for ignore in self._configuration_arg(key)): + continue + format_url = url_or_none(playback_set.get('playbackUrl')) if not format_url: continue - format_url = re.sub( - r'(?<=//staragvod)(\d)', r'web\1', format_url) - tags = str_or_none(playback_set.get('tagsCombination')) or '' - ingored_res, ignored_vcodec, ignored_dr = self._configuration_arg('res'), self._configuration_arg('vcodec'), self._configuration_arg('dr') - if any(f'resolution:{ig_res}' in tags for ig_res in ingored_res) or any(f'video_codec:{ig_vc}' in tags for ig_vc in ignored_vcodec) or any(f'dynamic_range:{ig_dr}' in tags for ig_dr in ignored_dr): - continue + format_url = re.sub(r'(?<=//staragvod)(\d)', r'web\1', format_url) + dr = re.search(r'dynamic_range:(?P<dr>[a-z]+)', playback_set.get('tagsCombination')).group('dr') ext = determine_ext(format_url) + current_formats, current_subs = [], {} try: if 'package:hls' in tags or ext == 'm3u8': @@ -218,8 +201,7 @@ class HotStarIE(HotStarBaseIE): current_formats, current_subs = self._extract_mpd_formats_and_subtitles( format_url, video_id, mpd_id=f'{dr}-dash', headers=headers) elif ext == 'f4m': - # produce broken files - pass + pass # XXX: produce broken files else: current_formats = [{ 'url': format_url, @@ -230,6 +212,7 @@ class HotStarIE(HotStarBaseIE): if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: geo_restricted = True continue + if tags and 'encryption:plain' not in tags: for f in current_formats: f['has_drm'] = True @@ -238,18 +221,19 @@ class HotStarIE(HotStarBaseIE): for f in current_formats: if not f.get('langauge'): f['language'] = lang + formats.extend(current_formats) subs = self._merge_subtitles(subs, current_subs) + if not formats and geo_restricted: self.raise_geo_restricted(countries=['IN'], metadata_available=True) self._sort_formats(formats) - for f in formats: f.setdefault('http_headers', {}).update(headers) return { 'id': video_id, - 'title': title, + 'title': video_data.get('title'), 'description': video_data.get('description'), 'duration': int_or_none(video_data.get('duration')), 'timestamp': int_or_none(video_data.get('broadcastDate') or video_data.get('startDate')), @@ -261,14 +245,48 @@ class HotStarIE(HotStarBaseIE): 'season': video_data.get('seasonName'), 'season_number': int_or_none(video_data.get('seasonNo')), 'season_id': video_data.get('seasonId'), - 'episode': title, + 'episode': video_data.get('title'), 'episode_number': int_or_none(video_data.get('episodeNo')), - 'http_headers': { - 'Referer': 'https://www.hotstar.com/in', - } } +class HotStarPrefixIE(InfoExtractor): + """ The "hotstar:" prefix is no longer in use, but this is kept for backward compatibility """ + IE_DESC = False + _VALID_URL = r'hotstar:(?:(?P<type>\w+):)?(?P<id>\d+)$' + _TESTS = [{ + 'url': 'hotstar:1000076273', + 'only_matching': True, + }, { + 'url': 'hotstar:movies:1000057157', + 'info_dict': { + 'id': '1000057157', + 'ext': 'mp4', + 'title': 'Radha Gopalam', + 'description': 'md5:be3bc342cc120bbc95b3b0960e2b0d22', + 'timestamp': 1140805800, + 'upload_date': '20060224', + 'duration': 9182, + 'episode': 'Radha Gopalam', + }, + }, { + 'url': 'hotstar:episode:1000234847', + 'only_matching': True, + }, { + # contentData + 'url': 'hotstar:sports:1260065956', + 'only_matching': True, + }, { + # contentData + 'url': 'hotstar:sports:1260066104', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id, video_type = self._match_valid_url(url).group('id', 'type') + return self.url_result(HotStarIE._video_url(video_id, video_type), HotStarIE, video_id) + + class HotStarPlaylistIE(HotStarBaseIE): IE_NAME = 'hotstar:playlist' _VALID_URL = r'https?://(?:www\.)?hotstar\.com/tv/[^/]+/s-\w+/list/[^/]+/t-(?P<id>\w+)' @@ -288,11 +306,8 @@ class HotStarPlaylistIE(HotStarBaseIE): collection = self._call_api('o/v1/tray/find', playlist_id, 'uqId')['body']['results'] entries = [ - self.url_result( - 'https://www.hotstar.com/%s' % video['contentId'], - ie=HotStarIE.ie_key(), video_id=video['contentId']) - for video in collection['assets']['items'] - if video.get('contentId')] + self.url_result(HotStarIE._video_url(video['contentId']), HotStarIE, video['contentId']) + for video in collection['assets']['items'] if video.get('contentId')] return self.playlist_result(entries, playlist_id) @@ -326,16 +341,13 @@ class HotStarSeriesIE(HotStarBaseIE): 'x-country-code': 'IN', 'x-platform-code': 'PCTV', } - detail_json = self._download_json('https://api.hotstar.com/o/v1/show/detail?contentId=' + series_id, - video_id=series_id, headers=headers) - id = compat_str(try_get(detail_json, lambda x: x['body']['results']['item']['id'], int)) - item_json = self._download_json('https://api.hotstar.com/o/v1/tray/g/1/items?etid=0&tao=0&tas=10000&eid=' + id, - video_id=series_id, headers=headers) - entries = [ - self.url_result( - '%s/ignoreme/%d' % (url, video['contentId']), - ie=HotStarIE.ie_key(), video_id=video['contentId']) - for video in item_json['body']['results']['items'] - if video.get('contentId')] + detail_json = self._download_json( + f'{self._API_URL}/o/v1/show/detail?contentId={series_id}', series_id, headers=headers) + id = try_get(detail_json, lambda x: x['body']['results']['item']['id'], int) + item_json = self._download_json( + f'{self._API_URL}/o/v1/tray/g/1/items?etid=0&tao=0&tas=10000&eid={id}', series_id, headers=headers) - return self.playlist_result(entries, series_id) + return self.playlist_result([ + self.url_result(HotStarIE._video_url(video['contentId'], root=url), HotStarIE, video['contentId']) + for video in item_json['body']['results']['items'] if video.get('contentId') + ], series_id) diff --git a/yt_dlp/extractor/howcast.py b/yt_dlp/extractor/howcast.py index 7e36b85ad..59cf80f1a 100644 --- a/yt_dlp/extractor/howcast.py +++ b/yt_dlp/extractor/howcast.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import parse_iso8601 diff --git a/yt_dlp/extractor/howstuffworks.py b/yt_dlp/extractor/howstuffworks.py index cf90ab3c9..c49c0899e 100644 --- a/yt_dlp/extractor/howstuffworks.py +++ b/yt_dlp/extractor/howstuffworks.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( find_xpath_attr, diff --git a/yt_dlp/extractor/hrfensehen.py b/yt_dlp/extractor/hrfensehen.py index e39ded254..6f7ed9b4b 100644 --- a/yt_dlp/extractor/hrfensehen.py +++ b/yt_dlp/extractor/hrfensehen.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import re diff --git a/yt_dlp/extractor/hrti.py b/yt_dlp/extractor/hrti.py index 36d600773..773ae0c9a 100644 --- a/yt_dlp/extractor/hrti.py +++ b/yt_dlp/extractor/hrti.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/hse.py b/yt_dlp/extractor/hse.py index 9144ff8dc..9faf46a5d 100644 --- a/yt_dlp/extractor/hse.py +++ b/yt_dlp/extractor/hse.py @@ -1,4 +1,3 @@ -# coding: utf-8 from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/huajiao.py b/yt_dlp/extractor/huajiao.py index 4ca275dda..c498fa330 100644 --- a/yt_dlp/extractor/huajiao.py +++ b/yt_dlp/extractor/huajiao.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( parse_duration, diff --git a/yt_dlp/extractor/huffpost.py b/yt_dlp/extractor/huffpost.py index 54385bafa..7286dbcd7 100644 --- a/yt_dlp/extractor/huffpost.py +++ b/yt_dlp/extractor/huffpost.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/hungama.py b/yt_dlp/extractor/hungama.py index 821b16e5d..938a24296 100644 --- a/yt_dlp/extractor/hungama.py +++ b/yt_dlp/extractor/hungama.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/huya.py b/yt_dlp/extractor/huya.py index 4e96f22fa..9dd5e41b3 100644 --- a/yt_dlp/extractor/huya.py +++ b/yt_dlp/extractor/huya.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import hashlib import random diff --git a/yt_dlp/extractor/hypem.py b/yt_dlp/extractor/hypem.py index 9ca28d632..54db7b3eb 100644 --- a/yt_dlp/extractor/hypem.py +++ b/yt_dlp/extractor/hypem.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import int_or_none diff --git a/yt_dlp/extractor/icareus.py b/yt_dlp/extractor/icareus.py new file mode 100644 index 000000000..dc7a2f0ba --- /dev/null +++ b/yt_dlp/extractor/icareus.py @@ -0,0 +1,180 @@ +import re + +from .common import InfoExtractor +from ..utils import ( + clean_html, + determine_ext, + get_element_by_class, + int_or_none, + merge_dicts, + parse_bitrate, + parse_resolution, + remove_end, + str_or_none, + url_or_none, + urlencode_postdata, +) + + +class IcareusIE(InfoExtractor): + _DOMAINS = '|'.join(map(re.escape, ( + 'asahitv.fi', + 'helsinkikanava.fi', + 'hyvinvointitv.fi', + 'inez.fi', + 'permanto.fi', + 'suite.icareus.com', + 'videos.minifiddlers.org', + ))) + _VALID_URL = rf'(?P<base_url>https?://(?:www\.)?(?:{_DOMAINS}))/[^?#]+/player/[^?#]+\?(?:[^#]+&)?(?:assetId|eventId)=(?P<id>\d+)' + _TESTS = [{ + 'url': 'https://www.helsinkikanava.fi/fi_FI/web/helsinkikanava/player/vod?assetId=68021894', + 'md5': 'ca0b62ffc814a5411dfa6349cf5adb8a', + 'info_dict': { + 'id': '68021894', + 'ext': 'mp4', + 'title': 'Perheiden parhaaksi', + 'description': 'md5:295785ea408e5ac00708766465cc1325', + 'thumbnail': 'https://www.helsinkikanava.fi/image/image_gallery?img_id=68022501', + 'upload_date': '20200924', + 'timestamp': 1600938300, + }, + }, { # Recorded livestream + 'url': 'https://www.helsinkikanava.fi/fi/web/helsinkikanava/player/event/view?eventId=76241489', + 'md5': '014327e69dfa7b949fcc861f6d162d6d', + 'info_dict': { + 'id': '76258304', + 'ext': 'mp4', + 'title': 'Helsingin kaupungin ja HUSin tiedotustilaisuus koronaepidemiatilanteesta 24.11.2020', + 'description': 'md5:3129d041c6fbbcdc7fe68d9a938fef1c', + 'thumbnail': 'https://icareus-suite.secure2.footprint.net/image/image_gallery?img_id=76288630', + 'upload_date': '20201124', + 'timestamp': 1606206600, + }, + }, { # Non-m3u8 stream + 'url': 'https://suite.icareus.com/fi/web/westend-indians/player/vod?assetId=47567389', + 'md5': '72fc04ee971bbedc44405cdf16c990b6', + 'info_dict': { + 'id': '47567389', + 'ext': 'mp4', + 'title': 'Omatoiminen harjoittelu - Laukominen', + 'description': '', + 'thumbnail': 'https://suite.icareus.com/image/image_gallery?img_id=47568162', + 'upload_date': '20200319', + 'timestamp': 1584658080, + }, + }, { + 'url': 'https://asahitv.fi/fi/web/asahi/player/vod?assetId=89415818', + 'only_matching': True + }, { + 'url': 'https://hyvinvointitv.fi/fi/web/hyvinvointitv/player/vod?assetId=89149730', + 'only_matching': True + }, { + 'url': 'https://inez.fi/fi/web/inez-media/player/vod?assetId=71328822', + 'only_matching': True + }, { + 'url': 'https://www.permanto.fi/fi/web/alfatv/player/vod?assetId=135497515', + 'only_matching': True + }, { + 'url': 'https://videos.minifiddlers.org/web/international-minifiddlers/player/vod?assetId=1982759', + 'only_matching': True + }] + + def _real_extract(self, url): + base_url, temp_id = self._match_valid_url(url).groups() + webpage = self._download_webpage(url, temp_id) + + video_id = self._search_regex(r"_icareus\['itemId'\]\s*=\s*'(\d+)'", webpage, 'video_id') + organization_id = self._search_regex(r"_icareus\['organizationId'\]\s*=\s*'(\d+)'", webpage, 'organization_id') + + assets = self._download_json( + self._search_regex(r'var\s+publishingServiceURL\s*=\s*"(http[^"]+)";', webpage, 'api_base'), + video_id, data=urlencode_postdata({ + 'version': '03', + 'action': 'getAssetPlaybackUrls', + 'organizationId': organization_id, + 'assetId': video_id, + 'token': self._search_regex(r"_icareus\['token'\]\s*=\s*'([a-f0-9]+)'", webpage, 'icareus_token'), + })) + + subtitles = { + remove_end(sdesc.split(' ')[0], ':'): [{'url': url_or_none(surl)}] + for _, sdesc, surl in assets.get('subtitles') or [] + } + + formats = [{ + 'format': item.get('name'), + 'format_id': 'audio', + 'vcodec': 'none', + 'url': url_or_none(item['url']), + 'tbr': int_or_none(self._search_regex( + r'\((\d+)\s*k\)', item.get('name') or '', 'audio bitrate', default=None)), + } for item in assets.get('audio_urls') or [] if url_or_none(item.get('url'))] + + for item in assets.get('urls') or []: + video_url = url_or_none(item.get('url')) + if video_url is None: + continue + ext = determine_ext(video_url) + if ext == 'm3u8': + fmts, subs = self._extract_m3u8_formats_and_subtitles( + video_url, video_id, 'mp4', m3u8_id='hls', fatal=False) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + else: + fmt = item.get('name') + formats.append({ + 'url': video_url, + 'format': fmt, + 'tbr': parse_bitrate(fmt), + 'format_id': str_or_none(item.get('id')), + **parse_resolution(fmt), + }) + + info, token, live_title = self._search_json_ld(webpage, video_id, default={}), None, None + if not info: + token = self._search_regex( + r'data\s*:\s*{action:"getAsset".*?token:\'([a-f0-9]+)\'}', webpage, 'token', default=None) + if not token: + live_title = get_element_by_class('unpublished-info-item future-event-title', webpage) + + if token: + metadata = self._download_json( + f'{base_url}/icareus-suite-api-portlet/publishing', + video_id, fatal=False, data=urlencode_postdata({ + 'version': '03', + 'action': 'getAsset', + 'organizationId': organization_id, + 'assetId': video_id, + 'languageId': 'en_US', + 'userId': '0', + 'token': token, + })) or {} + info = { + 'title': metadata.get('name'), + 'description': metadata.get('description'), + 'timestamp': int_or_none(metadata.get('date'), scale=1000), + 'duration': int_or_none(metadata.get('duration')), + 'thumbnail': url_or_none(metadata.get('thumbnailMedium')), + } + elif live_title: # Recorded livestream + info = { + 'title': live_title, + 'description': get_element_by_class('unpublished-info-item future-event-description', webpage), + 'timestamp': int_or_none(self._search_regex( + r'var startEvent\s*=\s*(\d+);', webpage, 'uploadDate', fatal=False), scale=1000), + } + + thumbnails = info.get('thumbnails') or [{ + 'url': url_or_none(info.get('thumbnail') or assets.get('thumbnail')) + }] + + self._sort_formats(formats) + return merge_dicts({ + 'id': video_id, + 'title': None, + 'formats': formats, + 'subtitles': subtitles, + 'description': clean_html(info.get('description')), + 'thumbnails': thumbnails if thumbnails[0]['url'] else None, + }, info) diff --git a/yt_dlp/extractor/ichinanalive.py b/yt_dlp/extractor/ichinanalive.py index cb39f821c..ffff36cc1 100644 --- a/yt_dlp/extractor/ichinanalive.py +++ b/yt_dlp/extractor/ichinanalive.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ExtractorError, str_or_none, traverse_obj, unified_strdate from ..compat import compat_str diff --git a/yt_dlp/extractor/ign.py b/yt_dlp/extractor/ign.py index c826eb3ba..bfb1e9d64 100644 --- a/yt_dlp/extractor/ign.py +++ b/yt_dlp/extractor/ign.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/iheart.py b/yt_dlp/extractor/iheart.py index b54c05eeb..2c6a5b6a1 100644 --- a/yt_dlp/extractor/iheart.py +++ b/yt_dlp/extractor/iheart.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/imdb.py b/yt_dlp/extractor/imdb.py index 96cee2e2f..74cab7dc1 100644 --- a/yt_dlp/extractor/imdb.py +++ b/yt_dlp/extractor/imdb.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import base64 import json import re diff --git a/yt_dlp/extractor/imggaming.py b/yt_dlp/extractor/imggaming.py index ce7b21ab2..5b8bfda96 100644 --- a/yt_dlp/extractor/imggaming.py +++ b/yt_dlp/extractor/imggaming.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/imgur.py b/yt_dlp/extractor/imgur.py index dfa473752..a3bb47615 100644 --- a/yt_dlp/extractor/imgur.py +++ b/yt_dlp/extractor/imgur.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/ina.py b/yt_dlp/extractor/ina.py index b3b2683cb..56038f1ca 100644 --- a/yt_dlp/extractor/ina.py +++ b/yt_dlp/extractor/ina.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/inc.py b/yt_dlp/extractor/inc.py index d5b258a0f..9b3fe9ac1 100644 --- a/yt_dlp/extractor/inc.py +++ b/yt_dlp/extractor/inc.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from .kaltura import KalturaIE diff --git a/yt_dlp/extractor/indavideo.py b/yt_dlp/extractor/indavideo.py index 4c16243ec..fb041a182 100644 --- a/yt_dlp/extractor/indavideo.py +++ b/yt_dlp/extractor/indavideo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/infoq.py b/yt_dlp/extractor/infoq.py index 347cc5154..6b31701eb 100644 --- a/yt_dlp/extractor/infoq.py +++ b/yt_dlp/extractor/infoq.py @@ -1,15 +1,13 @@ -# coding: utf-8 - -from __future__ import unicode_literals - from ..compat import ( compat_b64decode, compat_urllib_parse_unquote, compat_urlparse, ) from ..utils import ( + ExtractorError, determine_ext, update_url_query, + traverse_obj, ) from .bokecc import BokeCCBaseIE @@ -38,6 +36,7 @@ class InfoQIE(BokeCCBaseIE): 'ext': 'flv', 'description': 'md5:308d981fb28fa42f49f9568322c683ff', }, + 'skip': 'Sorry, the page you visited does not exist', }, { 'url': 'https://www.infoq.com/presentations/Simple-Made-Easy', 'md5': '0e34642d4d9ef44bf86f66f6399672db', @@ -90,8 +89,10 @@ class InfoQIE(BokeCCBaseIE): }] def _extract_http_audio(self, webpage, video_id): - fields = self._form_hidden_inputs('mp3Form', webpage) - http_audio_url = fields.get('filename') + try: + http_audio_url = traverse_obj(self._form_hidden_inputs('mp3Form', webpage), 'filename') + except ExtractorError: + http_audio_url = None if not http_audio_url: return [] diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py index 970f2c8ab..05000e2fb 100644 --- a/yt_dlp/extractor/instagram.py +++ b/yt_dlp/extractor/instagram.py @@ -1,5 +1,3 @@ -# coding: utf-8 - import itertools import hashlib import json diff --git a/yt_dlp/extractor/internazionale.py b/yt_dlp/extractor/internazionale.py index 45e2af690..c8f70785f 100644 --- a/yt_dlp/extractor/internazionale.py +++ b/yt_dlp/extractor/internazionale.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import unified_timestamp diff --git a/yt_dlp/extractor/internetvideoarchive.py b/yt_dlp/extractor/internetvideoarchive.py index 880918cd7..6a8e30d73 100644 --- a/yt_dlp/extractor/internetvideoarchive.py +++ b/yt_dlp/extractor/internetvideoarchive.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json import re diff --git a/yt_dlp/extractor/iprima.py b/yt_dlp/extractor/iprima.py index 1a2038453..5e0b523dc 100644 --- a/yt_dlp/extractor/iprima.py +++ b/yt_dlp/extractor/iprima.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re import time diff --git a/yt_dlp/extractor/iqiyi.py b/yt_dlp/extractor/iqiyi.py index d07b39d48..a0298f1a1 100644 --- a/yt_dlp/extractor/iqiyi.py +++ b/yt_dlp/extractor/iqiyi.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import hashlib import itertools import re diff --git a/yt_dlp/extractor/itprotv.py b/yt_dlp/extractor/itprotv.py index 64cb4e69a..4ac12603a 100644 --- a/yt_dlp/extractor/itprotv.py +++ b/yt_dlp/extractor/itprotv.py @@ -1,5 +1,3 @@ -# coding: utf-8 - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/itv.py b/yt_dlp/extractor/itv.py index f1591403f..26d77a469 100644 --- a/yt_dlp/extractor/itv.py +++ b/yt_dlp/extractor/itv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/ivi.py b/yt_dlp/extractor/ivi.py index 098ab6665..f469a6adf 100644 --- a/yt_dlp/extractor/ivi.py +++ b/yt_dlp/extractor/ivi.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import re diff --git a/yt_dlp/extractor/ivideon.py b/yt_dlp/extractor/ivideon.py index 44b220846..538a961b7 100644 --- a/yt_dlp/extractor/ivideon.py +++ b/yt_dlp/extractor/ivideon.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import ( compat_urllib_parse_urlencode, diff --git a/yt_dlp/extractor/iwara.py b/yt_dlp/extractor/iwara.py index c0e01e352..4b88da35f 100644 --- a/yt_dlp/extractor/iwara.py +++ b/yt_dlp/extractor/iwara.py @@ -1,21 +1,28 @@ -# coding: utf-8 -from __future__ import unicode_literals import re +import urllib from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlparse from ..utils import ( int_or_none, mimetype2ext, remove_end, url_or_none, + urljoin, unified_strdate, strip_or_none, ) -class IwaraIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.|ecchi\.)?iwara\.tv/videos/(?P<id>[a-zA-Z0-9]+)' +class IwaraBaseIE(InfoExtractor): + _BASE_REGEX = r'(?P<base_url>https?://(?:www\.|ecchi\.)?iwara\.tv)' + + def _extract_playlist(self, base_url, webpage): + for path in re.findall(r'class="title">\s*<a[^<]+href="([^"]+)', webpage): + yield self.url_result(urljoin(base_url, path)) + + +class IwaraIE(IwaraBaseIE): + _VALID_URL = fr'{IwaraBaseIE._BASE_REGEX}/videos/(?P<id>[a-zA-Z0-9]+)' _TESTS = [{ 'url': 'http://iwara.tv/videos/amVwUl1EHpAD9RD', # md5 is unstable @@ -60,7 +67,7 @@ class IwaraIE(InfoExtractor): webpage, urlh = self._download_webpage_handle(url, video_id) - hostname = compat_urllib_parse_urlparse(urlh.geturl()).hostname + hostname = urllib.parse.urlparse(urlh.geturl()).hostname # ecchi is 'sexy' in Japanese age_limit = 18 if hostname.split('.')[0] == 'ecchi' else 0 @@ -120,3 +127,81 @@ class IwaraIE(InfoExtractor): 'upload_date': upload_date, 'description': description, } + + +class IwaraPlaylistIE(IwaraBaseIE): + _VALID_URL = fr'{IwaraBaseIE._BASE_REGEX}/playlist/(?P<id>[^/?#&]+)' + IE_NAME = 'iwara:playlist' + + _TESTS = [{ + 'url': 'https://ecchi.iwara.tv/playlist/best-enf', + 'info_dict': { + 'title': 'Best enf', + 'uploader': 'Jared98112', + 'id': 'best-enf', + }, + 'playlist_mincount': 1097, + }, { + # urlencoded + 'url': 'https://ecchi.iwara.tv/playlist/%E3%83%97%E3%83%AC%E3%82%A4%E3%83%AA%E3%82%B9%E3%83%88-2', + 'info_dict': { + 'id': 'プレイリスト-2', + 'title': 'プレイリスト', + 'uploader': 'mainyu', + }, + 'playlist_mincount': 91, + }] + + def _real_extract(self, url): + playlist_id, base_url = self._match_valid_url(url).group('id', 'base_url') + playlist_id = urllib.parse.unquote(playlist_id) + webpage = self._download_webpage(url, playlist_id) + + return { + '_type': 'playlist', + 'id': playlist_id, + 'title': self._html_search_regex(r'class="title"[^>]*>([^<]+)', webpage, 'title', fatal=False), + 'uploader': self._html_search_regex(r'<h2>([^<]+)', webpage, 'uploader', fatal=False), + 'entries': self._extract_playlist(base_url, webpage), + } + + +class IwaraUserIE(IwaraBaseIE): + _VALID_URL = fr'{IwaraBaseIE._BASE_REGEX}/users/(?P<id>[^/?#&]+)' + IE_NAME = 'iwara:user' + + _TESTS = [{ + 'url': 'https://ecchi.iwara.tv/users/CuteMMD', + 'info_dict': { + 'id': 'CuteMMD', + }, + 'playlist_mincount': 198, + }, { + # urlencoded + 'url': 'https://ecchi.iwara.tv/users/%E5%92%95%E5%98%BF%E5%98%BF', + 'info_dict': { + 'id': '咕嘿嘿', + }, + 'playlist_mincount': 141, + }] + + def _entries(self, playlist_id, base_url, webpage): + yield from self._extract_playlist(base_url, webpage) + + page_urls = re.findall( + r'class="pager-item"[^>]*>\s*<a[^<]+href="([^"]+)', webpage) + + for n, path in enumerate(page_urls, 2): + yield from self._extract_playlist( + base_url, self._download_webpage( + urljoin(base_url, path), playlist_id, note=f'Downloading playlist page {n}')) + + def _real_extract(self, url): + playlist_id, base_url = self._match_valid_url(url).group('id', 'base_url') + playlist_id = urllib.parse.unquote(playlist_id) + + webpage = self._download_webpage( + f'{base_url}/users/{playlist_id}/videos', playlist_id) + + return self.playlist_result( + self._entries(playlist_id, base_url, webpage), playlist_id) diff --git a/yt_dlp/extractor/izlesene.py b/yt_dlp/extractor/izlesene.py index f8fca6c8f..6520ecf6d 100644 --- a/yt_dlp/extractor/izlesene.py +++ b/yt_dlp/extractor/izlesene.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_str, diff --git a/yt_dlp/extractor/jable.py b/yt_dlp/extractor/jable.py index b294aee70..6840654cc 100644 --- a/yt_dlp/extractor/jable.py +++ b/yt_dlp/extractor/jable.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/jamendo.py b/yt_dlp/extractor/jamendo.py index 755d9703b..d960ee51c 100644 --- a/yt_dlp/extractor/jamendo.py +++ b/yt_dlp/extractor/jamendo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import hashlib import random @@ -31,10 +28,11 @@ class JamendoIE(InfoExtractor): 'ext': 'flac', # 'title': 'Maya Filipič - Stories from Emona I', 'title': 'Stories from Emona I', - # 'artist': 'Maya Filipič', + 'artist': 'Maya Filipič', + 'album': 'Between two worlds', 'track': 'Stories from Emona I', 'duration': 210, - 'thumbnail': r're:^https?://.*\.jpg', + 'thumbnail': 'https://usercontent.jamendo.com?type=album&id=29279&width=300&trackid=196219', 'timestamp': 1217438117, 'upload_date': '20080730', 'license': 'by-nc-nd', @@ -48,11 +46,11 @@ class JamendoIE(InfoExtractor): 'only_matching': True, }] - def _call_api(self, resource, resource_id): + def _call_api(self, resource, resource_id, fatal=True): path = '/api/%ss' % resource rand = compat_str(random.random()) return self._download_json( - 'https://www.jamendo.com' + path, resource_id, query={ + 'https://www.jamendo.com' + path, resource_id, fatal=fatal, query={ 'id[]': resource_id, }, headers={ 'X-Jam-Call': '$%s*%s~' % (hashlib.sha1((path + rand).encode()).hexdigest(), rand) @@ -74,6 +72,8 @@ class JamendoIE(InfoExtractor): # if artist_name: # title = '%s - %s' % (artist_name, title) # album = get_model('album') + artist = self._call_api("artist", track.get('artistId'), fatal=False) + album = self._call_api("album", track.get('albumId'), fatal=False) formats = [{ 'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294' @@ -121,9 +121,9 @@ class JamendoIE(InfoExtractor): 'title': title, 'description': track.get('description'), 'duration': int_or_none(track.get('duration')), - # 'artist': artist_name, + 'artist': artist.get('name'), 'track': track_name, - # 'album': album.get('name'), + 'album': album.get('name'), 'formats': formats, 'license': '-'.join(license) if license else None, 'timestamp': int_or_none(track.get('dateCreated')), @@ -148,22 +148,38 @@ class JamendoAlbumIE(JamendoIE): 'info_dict': { 'id': '1032333', 'ext': 'flac', - 'title': 'Shearer - Warmachine', + 'title': 'Warmachine', 'artist': 'Shearer', 'track': 'Warmachine', 'timestamp': 1368089771, 'upload_date': '20130509', + 'view_count': int, + 'thumbnail': 'https://usercontent.jamendo.com?type=album&id=121486&width=300&trackid=1032333', + 'duration': 190, + 'license': 'by', + 'album': 'Duck On Cover', + 'average_rating': 4, + 'tags': ['rock', 'drums', 'bass', 'world', 'punk', 'neutral'], + 'like_count': int, } }, { 'md5': '1f358d7b2f98edfe90fd55dac0799d50', 'info_dict': { 'id': '1032330', 'ext': 'flac', - 'title': 'Shearer - Without Your Ghost', + 'title': 'Without Your Ghost', 'artist': 'Shearer', 'track': 'Without Your Ghost', 'timestamp': 1368089771, 'upload_date': '20130509', + 'duration': 192, + 'tags': ['rock', 'drums', 'bass', 'world', 'punk'], + 'album': 'Duck On Cover', + 'thumbnail': 'https://usercontent.jamendo.com?type=album&id=121486&width=300&trackid=1032330', + 'view_count': int, + 'average_rating': 4, + 'license': 'by', + 'like_count': int, } }], 'params': { diff --git a/yt_dlp/extractor/jeuxvideo.py b/yt_dlp/extractor/jeuxvideo.py index 77c0f520c..56ea15cf9 100644 --- a/yt_dlp/extractor/jeuxvideo.py +++ b/yt_dlp/extractor/jeuxvideo.py @@ -1,8 +1,3 @@ -# coding: utf-8 - -from __future__ import unicode_literals - - from .common import InfoExtractor diff --git a/yt_dlp/extractor/joj.py b/yt_dlp/extractor/joj.py index 7350f537c..a01411be1 100644 --- a/yt_dlp/extractor/joj.py +++ b/yt_dlp/extractor/joj.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/jove.py b/yt_dlp/extractor/jove.py index 4b7dfc526..245fe73d4 100644 --- a/yt_dlp/extractor/jove.py +++ b/yt_dlp/extractor/jove.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/jwplatform.py b/yt_dlp/extractor/jwplatform.py index 5aa508bf9..8dbbb2926 100644 --- a/yt_dlp/extractor/jwplatform.py +++ b/yt_dlp/extractor/jwplatform.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/kakao.py b/yt_dlp/extractor/kakao.py index 483ab7128..a5014d931 100644 --- a/yt_dlp/extractor/kakao.py +++ b/yt_dlp/extractor/kakao.py @@ -1,7 +1,3 @@ -# coding: utf-8 - -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_HTTPError from ..utils import ( @@ -109,6 +105,7 @@ class KakaoIE(InfoExtractor): resp = self._parse_json(e.cause.read().decode(), video_id) if resp.get('code') == 'GeoBlocked': self.raise_geo_restricted() + raise fmt_url = traverse_obj(fmt_url_json, ('videoLocation', 'url')) if not fmt_url: diff --git a/yt_dlp/extractor/kaltura.py b/yt_dlp/extractor/kaltura.py index f6dfc9caa..afad279bd 100644 --- a/yt_dlp/extractor/kaltura.py +++ b/yt_dlp/extractor/kaltura.py @@ -1,8 +1,6 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re import base64 +import json +import re from .common import InfoExtractor from ..compat import ( @@ -16,6 +14,7 @@ from ..utils import ( int_or_none, unsmuggle_url, smuggle_url, + traverse_obj, ) @@ -36,7 +35,7 @@ class KalturaIE(InfoExtractor): ) ''' _SERVICE_URL = 'http://cdnapi.kaltura.com' - _SERVICE_BASE = '/api_v3/index.php' + _SERVICE_BASE = '/api_v3/service/multirequest' # See https://github.com/kaltura/server/blob/master/plugins/content/caption/base/lib/model/enums/CaptionType.php _CAPTION_TYPES = { 1: 'srt', @@ -172,30 +171,35 @@ class KalturaIE(InfoExtractor): def _kaltura_api_call(self, video_id, actions, service_url=None, *args, **kwargs): params = actions[0] - if len(actions) > 1: - for i, a in enumerate(actions[1:], start=1): - for k, v in a.items(): - params['%d:%s' % (i, k)] = v + params.update({i: a for i, a in enumerate(actions[1:], start=1)}) data = self._download_json( (service_url or self._SERVICE_URL) + self._SERVICE_BASE, - video_id, query=params, *args, **kwargs) + video_id, data=json.dumps(params).encode('utf-8'), + headers={ + 'Content-Type': 'application/json', + 'Accept-Encoding': 'gzip, deflate, br', + }, *args, **kwargs) + + for idx, status in enumerate(data): + if not isinstance(status, dict): + continue + if status.get('objectType') == 'KalturaAPIException': + raise ExtractorError( + '%s said: %s (%d)' % (self.IE_NAME, status['message'], idx)) - status = data if len(actions) == 1 else data[0] - if status.get('objectType') == 'KalturaAPIException': - raise ExtractorError( - '%s said: %s' % (self.IE_NAME, status['message'])) + data[1] = traverse_obj(data, (1, 'objects', 0)) return data def _get_video_info(self, video_id, partner_id, service_url=None): actions = [ { - 'action': 'null', - 'apiVersion': '3.1.5', - 'clientTag': 'kdp:v3.8.5', + 'apiVersion': '3.3.0', + 'clientTag': 'html5:v3.1.0', 'format': 1, # JSON, 2 = XML, 3 = PHP - 'service': 'multirequest', + 'ks': '', + 'partnerId': partner_id, }, { 'expiry': 86400, @@ -204,12 +208,14 @@ class KalturaIE(InfoExtractor): 'widgetId': '_%s' % partner_id, }, { - 'action': 'get', - 'entryId': video_id, + 'action': 'list', + 'filter': {'redirectFromEntryId': video_id}, 'service': 'baseentry', 'ks': '{1:result:ks}', - 'responseProfile:fields': 'createdAt,dataUrl,duration,name,plays,thumbnailUrl,userId', - 'responseProfile:type': 1, + 'responseProfile': { + 'type': 1, + 'fields': 'createdAt,dataUrl,duration,name,plays,thumbnailUrl,userId', + }, }, { 'action': 'getbyentryid', diff --git a/yt_dlp/extractor/kanalplay.py b/yt_dlp/extractor/kanalplay.py deleted file mode 100644 index 5e24f7e21..000000000 --- a/yt_dlp/extractor/kanalplay.py +++ /dev/null @@ -1,96 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - float_or_none, - srt_subtitles_timecode, -) - - -class KanalPlayIE(InfoExtractor): - IE_DESC = 'Kanal 5/9/11 Play' - _VALID_URL = r'https?://(?:www\.)?kanal(?P<channel_id>5|9|11)play\.se/(?:#!/)?(?:play/)?program/\d+/video/(?P<id>\d+)' - _TESTS = [{ - 'url': 'http://www.kanal5play.se/#!/play/program/3060212363/video/3270012277', - 'info_dict': { - 'id': '3270012277', - 'ext': 'flv', - 'title': 'Saknar både dusch och avlopp', - 'description': 'md5:6023a95832a06059832ae93bc3c7efb7', - 'duration': 2636.36, - }, - 'params': { - # rtmp download - 'skip_download': True, - } - }, { - 'url': 'http://www.kanal9play.se/#!/play/program/335032/video/246042', - 'only_matching': True, - }, { - 'url': 'http://www.kanal11play.se/#!/play/program/232835958/video/367135199', - 'only_matching': True, - }] - - def _fix_subtitles(self, subs): - return '\r\n\r\n'.join( - '%s\r\n%s --> %s\r\n%s' - % ( - num, - srt_subtitles_timecode(item['startMillis'] / 1000.0), - srt_subtitles_timecode(item['endMillis'] / 1000.0), - item['text'], - ) for num, item in enumerate(subs, 1)) - - def _get_subtitles(self, channel_id, video_id): - subs = self._download_json( - 'http://www.kanal%splay.se/api/subtitles/%s' % (channel_id, video_id), - video_id, 'Downloading subtitles JSON', fatal=False) - return {'sv': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]} if subs else {} - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - video_id = mobj.group('id') - channel_id = mobj.group('channel_id') - - video = self._download_json( - 'http://www.kanal%splay.se/api/getVideo?format=FLASH&videoId=%s' % (channel_id, video_id), - video_id) - - reasons_for_no_streams = video.get('reasonsForNoStreams') - if reasons_for_no_streams: - raise ExtractorError( - '%s returned error: %s' % (self.IE_NAME, '\n'.join(reasons_for_no_streams)), - expected=True) - - title = video['title'] - description = video.get('description') - duration = float_or_none(video.get('length'), 1000) - thumbnail = video.get('posterUrl') - - stream_base_url = video['streamBaseUrl'] - - formats = [{ - 'url': stream_base_url, - 'play_path': stream['source'], - 'ext': 'flv', - 'tbr': float_or_none(stream.get('bitrate'), 1000), - 'rtmp_real_time': True, - } for stream in video['streams']] - self._sort_formats(formats) - - subtitles = {} - if video.get('hasSubtitle'): - subtitles = self.extract_subtitles(channel_id, video_id) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'duration': duration, - 'formats': formats, - 'subtitles': subtitles, - } diff --git a/yt_dlp/extractor/karaoketv.py b/yt_dlp/extractor/karaoketv.py index bfccf89b0..381dc00ad 100644 --- a/yt_dlp/extractor/karaoketv.py +++ b/yt_dlp/extractor/karaoketv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/karrierevideos.py b/yt_dlp/extractor/karrierevideos.py index 7b291e0a0..28d4841aa 100644 --- a/yt_dlp/extractor/karrierevideos.py +++ b/yt_dlp/extractor/karrierevideos.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( diff --git a/yt_dlp/extractor/keezmovies.py b/yt_dlp/extractor/keezmovies.py index 06dbcbb40..79f9c7fa7 100644 --- a/yt_dlp/extractor/keezmovies.py +++ b/yt_dlp/extractor/keezmovies.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/kelbyone.py b/yt_dlp/extractor/kelbyone.py index 20c26cf48..dea056c12 100644 --- a/yt_dlp/extractor/kelbyone.py +++ b/yt_dlp/extractor/kelbyone.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import int_or_none diff --git a/yt_dlp/extractor/ketnet.py b/yt_dlp/extractor/ketnet.py index e0599d02f..ab6276727 100644 --- a/yt_dlp/extractor/ketnet.py +++ b/yt_dlp/extractor/ketnet.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .canvas import CanvasIE from .common import InfoExtractor from ..compat import compat_urllib_parse_unquote diff --git a/yt_dlp/extractor/khanacademy.py b/yt_dlp/extractor/khanacademy.py index 87e520378..5333036a8 100644 --- a/yt_dlp/extractor/khanacademy.py +++ b/yt_dlp/extractor/khanacademy.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json from .common import InfoExtractor @@ -27,16 +25,21 @@ class KhanAcademyBaseIE(InfoExtractor): def _real_extract(self, url): display_id = self._match_id(url) - component_props = self._parse_json(self._download_json( - 'https://www.khanacademy.org/api/internal/graphql', + content = self._download_json( + 'https://www.khanacademy.org/api/internal/graphql/FetchContentData', display_id, query={ - 'hash': 1604303425, + 'fastly_cacheable': 'persist_until_publish', + 'hash': '4134764944', + 'lang': 'en', 'variables': json.dumps({ 'path': display_id, - 'queryParams': '', + 'queryParams': 'lang=en', + 'isModal': False, + 'followRedirects': True, + 'countryCode': 'US', }), - })['data']['contentJson'], display_id)['componentProps'] - return self._parse_component_props(component_props) + })['data']['contentJson'] + return self._parse_component_props(self._parse_json(content, display_id)['componentProps']) class KhanAcademyIE(KhanAcademyBaseIE): diff --git a/yt_dlp/extractor/kickstarter.py b/yt_dlp/extractor/kickstarter.py index d4da8f484..c0d851d96 100644 --- a/yt_dlp/extractor/kickstarter.py +++ b/yt_dlp/extractor/kickstarter.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import smuggle_url diff --git a/yt_dlp/extractor/kinja.py b/yt_dlp/extractor/kinja.py index 1be8b4809..c00abfbc1 100644 --- a/yt_dlp/extractor/kinja.py +++ b/yt_dlp/extractor/kinja.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/kinopoisk.py b/yt_dlp/extractor/kinopoisk.py index cdbb642e2..84a2489a3 100644 --- a/yt_dlp/extractor/kinopoisk.py +++ b/yt_dlp/extractor/kinopoisk.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( dict_get, diff --git a/yt_dlp/extractor/konserthusetplay.py b/yt_dlp/extractor/konserthusetplay.py index dd42bb2f2..1e177c363 100644 --- a/yt_dlp/extractor/konserthusetplay.py +++ b/yt_dlp/extractor/konserthusetplay.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/koo.py b/yt_dlp/extractor/koo.py index 088db1cb0..892d355ba 100644 --- a/yt_dlp/extractor/koo.py +++ b/yt_dlp/extractor/koo.py @@ -1,5 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/krasview.py b/yt_dlp/extractor/krasview.py index d27d052ff..4323aa429 100644 --- a/yt_dlp/extractor/krasview.py +++ b/yt_dlp/extractor/krasview.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/ku6.py b/yt_dlp/extractor/ku6.py index a574408e5..31b4ea0c6 100644 --- a/yt_dlp/extractor/ku6.py +++ b/yt_dlp/extractor/ku6.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/kusi.py b/yt_dlp/extractor/kusi.py index 707fe1821..f1221ef1b 100644 --- a/yt_dlp/extractor/kusi.py +++ b/yt_dlp/extractor/kusi.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import random from .common import InfoExtractor diff --git a/yt_dlp/extractor/kuwo.py b/yt_dlp/extractor/kuwo.py index 460a4252f..0c9518e66 100644 --- a/yt_dlp/extractor/kuwo.py +++ b/yt_dlp/extractor/kuwo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/la7.py b/yt_dlp/extractor/la7.py index de985e450..5d52decdb 100644 --- a/yt_dlp/extractor/la7.py +++ b/yt_dlp/extractor/la7.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/laola1tv.py b/yt_dlp/extractor/laola1tv.py index b5d27c2f0..4014a9256 100644 --- a/yt_dlp/extractor/laola1tv.py +++ b/yt_dlp/extractor/laola1tv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import re diff --git a/yt_dlp/extractor/lastfm.py b/yt_dlp/extractor/lastfm.py index 5215717e8..7ba666d06 100644 --- a/yt_dlp/extractor/lastfm.py +++ b/yt_dlp/extractor/lastfm.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/lbry.py b/yt_dlp/extractor/lbry.py index 5d5457c53..953ce2e18 100644 --- a/yt_dlp/extractor/lbry.py +++ b/yt_dlp/extractor/lbry.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import functools import json diff --git a/yt_dlp/extractor/lci.py b/yt_dlp/extractor/lci.py index 920872f5c..e7d2f8a24 100644 --- a/yt_dlp/extractor/lci.py +++ b/yt_dlp/extractor/lci.py @@ -1,26 +1,28 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor class LCIIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?lci\.fr/[^/]+/[\w-]+-(?P<id>\d+)\.html' - _TEST = { - 'url': 'http://www.lci.fr/international/etats-unis-a-j-62-hillary-clinton-reste-sans-voix-2001679.html', - 'md5': '2fdb2538b884d4d695f9bd2bde137e6c', + _VALID_URL = r'https?://(?:www\.)?(?:lci|tf1info)\.fr/[^/]+/[\w-]+-(?P<id>\d+)\.html' + _TESTS = [{ + 'url': 'https://www.tf1info.fr/politique/election-presidentielle-2022-second-tour-j-2-marine-le-pen-et-emmanuel-macron-en-interview-de-lci-vendredi-soir-2217486.html', 'info_dict': { - 'id': '13244802', + 'id': '13875948', 'ext': 'mp4', - 'title': 'Hillary Clinton et sa quinte de toux, en plein meeting', - 'description': 'md5:a4363e3a960860132f8124b62f4a01c9', - } - } + 'title': 'md5:660df5481fd418bc3bbb0d070e6fdb5a', + 'thumbnail': 'https://photos.tf1.fr/1280/720/presidentielle-2022-marine-le-pen-et-emmanuel-macron-invites-de-lci-ce-vendredi-9c0e73-e1a036-0@1x.jpg', + 'upload_date': '20220422', + 'duration': 33, + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://www.lci.fr/politique/election-presidentielle-2022-second-tour-j-2-marine-le-pen-et-emmanuel-macron-en-interview-de-lci-vendredi-soir-2217486.html', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - wat_id = self._search_regex( - (r'data-watid=[\'"](\d+)', r'idwat["\']?\s*:\s*["\']?(\d+)'), - webpage, 'wat id') + wat_id = self._search_regex(r'watId["\']?\s*:\s*["\']?(\d+)', webpage, 'wat id') return self.url_result('wat:' + wat_id, 'Wat', wat_id) diff --git a/yt_dlp/extractor/lcp.py b/yt_dlp/extractor/lcp.py index ade27a99e..87543d56f 100644 --- a/yt_dlp/extractor/lcp.py +++ b/yt_dlp/extractor/lcp.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from .arkena import ArkenaIE diff --git a/yt_dlp/extractor/lecture2go.py b/yt_dlp/extractor/lecture2go.py index 81b5d41be..bee4e7587 100644 --- a/yt_dlp/extractor/lecture2go.py +++ b/yt_dlp/extractor/lecture2go.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/lecturio.py b/yt_dlp/extractor/lecturio.py index 0ee1eeb4d..c3d0cb193 100644 --- a/yt_dlp/extractor/lecturio.py +++ b/yt_dlp/extractor/lecturio.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/leeco.py b/yt_dlp/extractor/leeco.py index d5e11423c..258e396cb 100644 --- a/yt_dlp/extractor/leeco.py +++ b/yt_dlp/extractor/leeco.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import datetime import hashlib import re diff --git a/yt_dlp/extractor/lego.py b/yt_dlp/extractor/lego.py index 901f43bcf..7d0238a1f 100644 --- a/yt_dlp/extractor/lego.py +++ b/yt_dlp/extractor/lego.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import uuid from .common import InfoExtractor diff --git a/yt_dlp/extractor/lemonde.py b/yt_dlp/extractor/lemonde.py index 3306892e8..c916791af 100644 --- a/yt_dlp/extractor/lemonde.py +++ b/yt_dlp/extractor/lemonde.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/lenta.py b/yt_dlp/extractor/lenta.py index 2ebd4e577..10aac984e 100644 --- a/yt_dlp/extractor/lenta.py +++ b/yt_dlp/extractor/lenta.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/libraryofcongress.py b/yt_dlp/extractor/libraryofcongress.py index 03f205144..afe3c98a1 100644 --- a/yt_dlp/extractor/libraryofcongress.py +++ b/yt_dlp/extractor/libraryofcongress.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/libsyn.py b/yt_dlp/extractor/libsyn.py index d1fcda4ef..8245a3481 100644 --- a/yt_dlp/extractor/libsyn.py +++ b/yt_dlp/extractor/libsyn.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/lifenews.py b/yt_dlp/extractor/lifenews.py index 49a0a5989..8c7d2064d 100644 --- a/yt_dlp/extractor/lifenews.py +++ b/yt_dlp/extractor/lifenews.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/likee.py b/yt_dlp/extractor/likee.py new file mode 100644 index 000000000..b53e7a5ca --- /dev/null +++ b/yt_dlp/extractor/likee.py @@ -0,0 +1,193 @@ +import json + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + js_to_json, + parse_iso8601, + str_or_none, + traverse_obj, +) + + +class LikeeIE(InfoExtractor): + IE_NAME = 'likee' + _VALID_URL = r'(?x)https?://(www\.)?likee\.video/(?:(?P<channel_name>[^/]+)/video/|v/)(?P<id>\w+)' + _TESTS = [{ + 'url': 'https://likee.video/@huynh_hong_quan_/video/7093444807096327263', + 'info_dict': { + 'id': '7093444807096327263', + 'ext': 'mp4', + 'title': '🤴🤴🤴', + 'description': 'md5:9a7ebe816f0e78722ee5ed76f75983b4', + 'thumbnail': r're:^https?://.+\.jpg', + 'uploader': 'Huỳnh Hồng Quân ', + 'play_count': int, + 'download_count': int, + 'artist': 'Huỳnh Hồng Quân ', + 'timestamp': 1651571320, + 'upload_date': '20220503', + 'view_count': int, + 'uploader_id': 'huynh_hong_quan_', + 'duration': 12374, + 'comment_count': int, + 'like_count': int, + }, + }, { + 'url': 'https://likee.video/@649222262/video/7093167848050058862', + 'info_dict': { + 'id': '7093167848050058862', + 'ext': 'mp4', + 'title': 'likee video #7093167848050058862', + 'description': 'md5:3f971c8c6ee8a216f2b1a9094c5de99f', + 'thumbnail': r're:^https?://.+\.jpg', + 'comment_count': int, + 'like_count': int, + 'uploader': 'Vương Phước Nhi', + 'download_count': int, + 'timestamp': 1651506835, + 'upload_date': '20220502', + 'duration': 60024, + 'play_count': int, + 'artist': 'Vương Phước Nhi', + 'uploader_id': '649222262', + 'view_count': int, + }, + }, { + 'url': 'https://likee.video/@fernanda_rivasg/video/6932224568407629502', + 'info_dict': { + 'id': '6932224568407629502', + 'ext': 'mp4', + 'title': 'Un trend viejito🔥 #LIKEE #Ferlovers #trend ', + 'description': 'md5:c42b903a72a99d6d8b73e3d1126fbcef', + 'thumbnail': r're:^https?://.+\.jpg', + 'comment_count': int, + 'duration': 9684, + 'uploader_id': 'fernanda_rivasg', + 'view_count': int, + 'play_count': int, + 'artist': 'La Cami La✨', + 'download_count': int, + 'like_count': int, + 'uploader': 'Fernanda Rivas🎶', + 'timestamp': 1614034308, + 'upload_date': '20210222', + }, + }, { + 'url': 'https://likee.video/v/k6QcOp', + 'info_dict': { + 'id': 'k6QcOp', + 'ext': 'mp4', + 'title': '#AguaChallenge tú ya lo intentaste?😱🤩', + 'description': 'md5:b0cc462689d4ff2b624daa4dba7640d9', + 'thumbnail': r're:^https?://.+\.jpg', + 'comment_count': int, + 'duration': 18014, + 'play_count': int, + 'view_count': int, + 'timestamp': 1611694774, + 'like_count': int, + 'uploader': 'Fernanda Rivas🎶', + 'uploader_id': 'fernanda_rivasg', + 'download_count': int, + 'artist': 'ʟᴇʀɪᴋ_ᴜɴɪᴄᴏʀɴ♡︎', + 'upload_date': '20210126', + }, + }, { + 'url': 'https://www.likee.video/@649222262/video/7093167848050058862', + 'only_matching': True, + }, { + 'url': 'https://www.likee.video/v/k6QcOp', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + info = self._parse_json( + self._search_regex(r'window\.data\s=\s({.+?});', webpage, 'video info'), + video_id, transform_source=js_to_json) + video_url = traverse_obj(info, 'video_url', ('originVideoInfo', 'video_url')) + if not video_url: + self.raise_no_formats('Video was deleted', expected=True) + formats = [{ + 'format_id': 'mp4-with-watermark', + 'url': video_url, + 'height': info.get('video_height'), + 'width': info.get('video_width'), + }, { + 'format_id': 'mp4-without-watermark', + 'url': video_url.replace('_4', ''), + 'height': info.get('video_height'), + 'width': info.get('video_width'), + 'quality': 1, + }] + self._sort_formats(formats) + return { + 'id': video_id, + 'title': info.get('msgText'), + 'description': info.get('share_desc'), + 'view_count': int_or_none(info.get('video_count')), + 'like_count': int_or_none(info.get('likeCount')), + 'play_count': int_or_none(info.get('play_count')), + 'download_count': int_or_none(info.get('download_count')), + 'comment_count': int_or_none(info.get('comment_count')), + 'uploader': str_or_none(info.get('nick_name')), + 'uploader_id': str_or_none(info.get('likeeId')), + 'artist': str_or_none(traverse_obj(info, ('sound', 'owner_name'))), + 'timestamp': parse_iso8601(info.get('uploadDate')), + 'thumbnail': info.get('coverUrl'), + 'duration': int_or_none(traverse_obj(info, ('option_data', 'dur'))), + 'formats': formats, + } + + +class LikeeUserIE(InfoExtractor): + IE_NAME = 'likee:user' + _VALID_URL = r'https?://(www\.)?likee\.video/(?P<id>[^/]+)/?$' + _TESTS = [{ + 'url': 'https://likee.video/@fernanda_rivasg', + 'info_dict': { + 'id': '925638334', + 'title': 'fernanda_rivasg', + }, + 'playlist_mincount': 500, + }, { + 'url': 'https://likee.video/@may_hmoob', + 'info_dict': { + 'id': '2943949041', + 'title': 'may_hmoob', + }, + 'playlist_mincount': 80, + }] + _PAGE_SIZE = 50 + _API_GET_USER_VIDEO = 'https://api.like-video.com/likee-activity-flow-micro/videoApi/getUserVideo' + + def _entries(self, user_name, user_id): + last_post_id = '' + while True: + user_videos = self._download_json( + self._API_GET_USER_VIDEO, user_name, + data=json.dumps({ + 'uid': user_id, + 'count': self._PAGE_SIZE, + 'lastPostId': last_post_id, + 'tabType': 0, + }).encode('utf-8'), + headers={'content-type': 'application/json'}, + note=f'Get user info with lastPostId #{last_post_id}') + items = traverse_obj(user_videos, ('data', 'videoList')) + if not items: + break + for item in items: + last_post_id = item['postId'] + yield self.url_result(f'https://likee.video/{user_name}/video/{last_post_id}') + + def _real_extract(self, url): + user_name = self._match_id(url) + webpage = self._download_webpage(url, user_name) + info = self._parse_json( + self._search_regex(r'window\.data\s*=\s*({.+?});', webpage, 'user info'), + user_name, transform_source=js_to_json) + user_id = traverse_obj(info, ('userinfo', 'uid')) + return self.playlist_result(self._entries(user_name, user_id), user_id, traverse_obj(info, ('userinfo', 'user_name'))) diff --git a/yt_dlp/extractor/limelight.py b/yt_dlp/extractor/limelight.py index b20681ad1..25667fc07 100644 --- a/yt_dlp/extractor/limelight.py +++ b/yt_dlp/extractor/limelight.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/line.py b/yt_dlp/extractor/line.py index 987c43430..63b6c002a 100644 --- a/yt_dlp/extractor/line.py +++ b/yt_dlp/extractor/line.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/linkedin.py b/yt_dlp/extractor/linkedin.py index 0f57bfa06..27f1080b4 100644 --- a/yt_dlp/extractor/linkedin.py +++ b/yt_dlp/extractor/linkedin.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from itertools import zip_longest import re diff --git a/yt_dlp/extractor/linuxacademy.py b/yt_dlp/extractor/linuxacademy.py index 6aff88e13..bf22855a9 100644 --- a/yt_dlp/extractor/linuxacademy.py +++ b/yt_dlp/extractor/linuxacademy.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json import random diff --git a/yt_dlp/extractor/litv.py b/yt_dlp/extractor/litv.py index 16b475a44..31826ac99 100644 --- a/yt_dlp/extractor/litv.py +++ b/yt_dlp/extractor/litv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/livejournal.py b/yt_dlp/extractor/livejournal.py index 3a9f4553f..96bd8b233 100644 --- a/yt_dlp/extractor/livejournal.py +++ b/yt_dlp/extractor/livejournal.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import int_or_none diff --git a/yt_dlp/extractor/livestream.py b/yt_dlp/extractor/livestream.py index 45bf26d26..4b90c22c5 100644 --- a/yt_dlp/extractor/livestream.py +++ b/yt_dlp/extractor/livestream.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re import itertools diff --git a/yt_dlp/extractor/lnkgo.py b/yt_dlp/extractor/lnkgo.py index bd2dffac0..3bb52777f 100644 --- a/yt_dlp/extractor/lnkgo.py +++ b/yt_dlp/extractor/lnkgo.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/localnews8.py b/yt_dlp/extractor/localnews8.py index c3e9d10fa..6f3f02c70 100644 --- a/yt_dlp/extractor/localnews8.py +++ b/yt_dlp/extractor/localnews8.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor diff --git a/yt_dlp/extractor/lovehomeporn.py b/yt_dlp/extractor/lovehomeporn.py index ca4b5f375..ba5a13acd 100644 --- a/yt_dlp/extractor/lovehomeporn.py +++ b/yt_dlp/extractor/lovehomeporn.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .nuevo import NuevoBaseIE diff --git a/yt_dlp/extractor/lrt.py b/yt_dlp/extractor/lrt.py index 4024aef73..a49fd592f 100644 --- a/yt_dlp/extractor/lrt.py +++ b/yt_dlp/extractor/lrt.py @@ -1,21 +1,59 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( clean_html, merge_dicts, + traverse_obj, + url_or_none, ) -class LRTIE(InfoExtractor): - IE_NAME = 'lrt.lt' +class LRTBaseIE(InfoExtractor): + def _extract_js_var(self, webpage, var_name, default=None): + return self._search_regex( + fr'{var_name}\s*=\s*(["\'])((?:(?!\1).)+)\1', + webpage, var_name.replace('_', ' '), default, group=2) + + +class LRTStreamIE(LRTBaseIE): + _VALID_URL = r'https?://(?:www\.)?lrt\.lt/mediateka/tiesiogiai/(?P<id>[\w-]+)' + _TESTS = [{ + 'url': 'https://www.lrt.lt/mediateka/tiesiogiai/lrt-opus', + 'info_dict': { + 'id': 'lrt-opus', + 'live_status': 'is_live', + 'title': 're:^LRT Opus.+$', + 'ext': 'mp4' + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + streams_data = self._download_json(self._extract_js_var(webpage, 'tokenURL'), video_id) + + formats, subtitles = [], {} + for stream_url in traverse_obj(streams_data, ( + 'response', 'data', lambda k, _: k.startswith('content')), expected_type=url_or_none): + fmts, subs = self._extract_m3u8_formats_and_subtitles(stream_url, video_id, 'mp4', m3u8_id='hls', live=True) + formats.extend(fmts) + subtitles = self._merge_subtitles(subtitles, subs) + self._sort_formats(formats) + + stream_title = self._extract_js_var(webpage, 'video_title', 'LRT') + return { + 'id': video_id, + 'formats': formats, + 'subtitles': subtitles, + 'is_live': True, + 'title': f'{self._og_search_title(webpage)} - {stream_title}' + } + + +class LRTVODIE(LRTBaseIE): _VALID_URL = r'https?://(?:www\.)?lrt\.lt(?P<path>/mediateka/irasas/(?P<id>[0-9]+))' _TESTS = [{ # m3u8 download 'url': 'https://www.lrt.lt/mediateka/irasas/2000127261/greita-ir-gardu-sicilijos-ikvepta-klasikiniu-makaronu-su-baklazanais-vakariene', - 'md5': '85cb2bb530f31d91a9c65b479516ade4', 'info_dict': { 'id': '2000127261', 'ext': 'mp4', @@ -24,6 +62,8 @@ class LRTIE(InfoExtractor): 'duration': 3035, 'timestamp': 1604079000, 'upload_date': '20201030', + 'tags': ['LRT TELEVIZIJA', 'Beatos virtuvė', 'Beata Nicholson', 'Makaronai', 'Baklažanai', 'Vakarienė', 'Receptas'], + 'thumbnail': 'https://www.lrt.lt/img/2020/10/30/764041-126478-1287x836.jpg' }, }, { # direct mp3 download @@ -40,11 +80,6 @@ class LRTIE(InfoExtractor): }, }] - def _extract_js_var(self, webpage, var_name, default): - return self._search_regex( - r'%s\s*=\s*(["\'])((?:(?!\1).)+)\1' % var_name, - webpage, var_name.replace('_', ' '), default, group=2) - def _real_extract(self, url): path, video_id = self._match_valid_url(url).groups() webpage = self._download_webpage(url, video_id) diff --git a/yt_dlp/extractor/lynda.py b/yt_dlp/extractor/lynda.py index ce304743f..1ae7f9d4f 100644 --- a/yt_dlp/extractor/lynda.py +++ b/yt_dlp/extractor/lynda.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/m6.py b/yt_dlp/extractor/m6.py index 9806875e8..9dcc60164 100644 --- a/yt_dlp/extractor/m6.py +++ b/yt_dlp/extractor/m6.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/magentamusik360.py b/yt_dlp/extractor/magentamusik360.py index 5c274902f..5d0cb3bfb 100644 --- a/yt_dlp/extractor/magentamusik360.py +++ b/yt_dlp/extractor/magentamusik360.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/mailru.py b/yt_dlp/extractor/mailru.py index 5d9f80bb3..5f30d0eaa 100644 --- a/yt_dlp/extractor/mailru.py +++ b/yt_dlp/extractor/mailru.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools import json import re diff --git a/yt_dlp/extractor/mainstreaming.py b/yt_dlp/extractor/mainstreaming.py index 0f349a7a3..c144c7592 100644 --- a/yt_dlp/extractor/mainstreaming.py +++ b/yt_dlp/extractor/mainstreaming.py @@ -1,4 +1,3 @@ -# coding: utf-8 import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/malltv.py b/yt_dlp/extractor/malltv.py index fadfd9338..bfd6008b3 100644 --- a/yt_dlp/extractor/malltv.py +++ b/yt_dlp/extractor/malltv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/mangomolo.py b/yt_dlp/extractor/mangomolo.py index 68ce138b3..a392e9b54 100644 --- a/yt_dlp/extractor/mangomolo.py +++ b/yt_dlp/extractor/mangomolo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_b64decode, diff --git a/yt_dlp/extractor/manoto.py b/yt_dlp/extractor/manoto.py index d12aa5f60..dc8653f5d 100644 --- a/yt_dlp/extractor/manoto.py +++ b/yt_dlp/extractor/manoto.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/manyvids.py b/yt_dlp/extractor/manyvids.py index bd24f8853..1f537d267 100644 --- a/yt_dlp/extractor/manyvids.py +++ b/yt_dlp/extractor/manyvids.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/maoritv.py b/yt_dlp/extractor/maoritv.py index 0d23fec75..67780eafc 100644 --- a/yt_dlp/extractor/maoritv.py +++ b/yt_dlp/extractor/maoritv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/markiza.py b/yt_dlp/extractor/markiza.py index def960a0c..53ed79158 100644 --- a/yt_dlp/extractor/markiza.py +++ b/yt_dlp/extractor/markiza.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/massengeschmacktv.py b/yt_dlp/extractor/massengeschmacktv.py index b381d31b4..4508e4391 100644 --- a/yt_dlp/extractor/massengeschmacktv.py +++ b/yt_dlp/extractor/massengeschmacktv.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/masters.py b/yt_dlp/extractor/masters.py new file mode 100644 index 000000000..d1ce07f10 --- /dev/null +++ b/yt_dlp/extractor/masters.py @@ -0,0 +1,39 @@ +from __future__ import unicode_literals +from .common import InfoExtractor +from ..utils import ( + traverse_obj, + unified_strdate, +) + + +class MastersIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?masters\.com/en_US/watch/(?P<date>\d{4}-\d{2}-\d{2})/(?P<id>\d+)' + _TESTS = [{ + 'url': 'https://www.masters.com/en_US/watch/2022-04-07/16493755593805191/sungjae_im_thursday_interview_2022.html', + 'info_dict': { + 'id': '16493755593805191', + 'ext': 'mp4', + 'title': 'Sungjae Im: Thursday Interview 2022', + 'upload_date': '20220407', + 'thumbnail': r're:^https?://.*\.jpg$', + } + }] + + def _real_extract(self, url): + video_id, upload_date = self._match_valid_url(url).group('id', 'date') + content_resp = self._download_json( + f'https://www.masters.com/relatedcontent/rest/v2/masters_v1/en/content/masters_v1_{video_id}_en', + video_id) + formats, subtitles = self._extract_m3u8_formats_and_subtitles(traverse_obj(content_resp, ('media', 'm3u8')), video_id, 'mp4') + self._sort_formats(formats) + + thumbnails = [{'id': name, 'url': url} for name, url in traverse_obj(content_resp, ('images', 0), default={}).items()] + + return { + 'id': video_id, + 'title': content_resp.get('title'), + 'formats': formats, + 'subtitles': subtitles, + 'upload_date': unified_strdate(upload_date), + 'thumbnails': thumbnails, + } diff --git a/yt_dlp/extractor/matchtv.py b/yt_dlp/extractor/matchtv.py index e003b8d25..94ae20b26 100644 --- a/yt_dlp/extractor/matchtv.py +++ b/yt_dlp/extractor/matchtv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import random from .common import InfoExtractor diff --git a/yt_dlp/extractor/mdr.py b/yt_dlp/extractor/mdr.py index 3ca174c2b..b44cf809a 100644 --- a/yt_dlp/extractor/mdr.py +++ b/yt_dlp/extractor/mdr.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( diff --git a/yt_dlp/extractor/medaltv.py b/yt_dlp/extractor/medaltv.py index 59cc30736..527b50cb0 100644 --- a/yt_dlp/extractor/medaltv.py +++ b/yt_dlp/extractor/medaltv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/mediaite.py b/yt_dlp/extractor/mediaite.py index b670f0d61..0f9079b11 100644 --- a/yt_dlp/extractor/mediaite.py +++ b/yt_dlp/extractor/mediaite.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor diff --git a/yt_dlp/extractor/mediaklikk.py b/yt_dlp/extractor/mediaklikk.py index 18ff3befa..f9a449377 100644 --- a/yt_dlp/extractor/mediaklikk.py +++ b/yt_dlp/extractor/mediaklikk.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from ..utils import ( unified_strdate ) diff --git a/yt_dlp/extractor/medialaan.py b/yt_dlp/extractor/medialaan.py index 788acf7fb..297f8c4b2 100644 --- a/yt_dlp/extractor/medialaan.py +++ b/yt_dlp/extractor/medialaan.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/mediaset.py b/yt_dlp/extractor/mediaset.py index d6b456c5d..60c454dda 100644 --- a/yt_dlp/extractor/mediaset.py +++ b/yt_dlp/extractor/mediaset.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import functools import re diff --git a/yt_dlp/extractor/mediasite.py b/yt_dlp/extractor/mediasite.py index fbf9223b2..30464bad0 100644 --- a/yt_dlp/extractor/mediasite.py +++ b/yt_dlp/extractor/mediasite.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re import json diff --git a/yt_dlp/extractor/medici.py b/yt_dlp/extractor/medici.py index cd910238e..328ccd2c9 100644 --- a/yt_dlp/extractor/medici.py +++ b/yt_dlp/extractor/medici.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( unified_strdate, diff --git a/yt_dlp/extractor/megaphone.py b/yt_dlp/extractor/megaphone.py index 5bafa6cf4..0c150ef45 100644 --- a/yt_dlp/extractor/megaphone.py +++ b/yt_dlp/extractor/megaphone.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/megatvcom.py b/yt_dlp/extractor/megatvcom.py index 0d6793acd..ec481d016 100644 --- a/yt_dlp/extractor/megatvcom.py +++ b/yt_dlp/extractor/megatvcom.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/meipai.py b/yt_dlp/extractor/meipai.py index 2445b8b39..95b6dfe52 100644 --- a/yt_dlp/extractor/meipai.py +++ b/yt_dlp/extractor/meipai.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/melonvod.py b/yt_dlp/extractor/melonvod.py index bd8cf13ab..0cbc961c4 100644 --- a/yt_dlp/extractor/melonvod.py +++ b/yt_dlp/extractor/melonvod.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/meta.py b/yt_dlp/extractor/meta.py index cdb46e163..7c11e6017 100644 --- a/yt_dlp/extractor/meta.py +++ b/yt_dlp/extractor/meta.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from .pladform import PladformIE from ..utils import ( diff --git a/yt_dlp/extractor/metacafe.py b/yt_dlp/extractor/metacafe.py index 7b2d4a003..31fec86d2 100644 --- a/yt_dlp/extractor/metacafe.py +++ b/yt_dlp/extractor/metacafe.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json import re diff --git a/yt_dlp/extractor/metacritic.py b/yt_dlp/extractor/metacritic.py index 1424288e7..543bdffad 100644 --- a/yt_dlp/extractor/metacritic.py +++ b/yt_dlp/extractor/metacritic.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/mgoon.py b/yt_dlp/extractor/mgoon.py index 184c311be..c41c51384 100644 --- a/yt_dlp/extractor/mgoon.py +++ b/yt_dlp/extractor/mgoon.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/mgtv.py b/yt_dlp/extractor/mgtv.py index 4ac70ea57..96f3fb982 100644 --- a/yt_dlp/extractor/mgtv.py +++ b/yt_dlp/extractor/mgtv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import base64 import time import uuid diff --git a/yt_dlp/extractor/miaopai.py b/yt_dlp/extractor/miaopai.py index cf0610bdf..329ce3658 100644 --- a/yt_dlp/extractor/miaopai.py +++ b/yt_dlp/extractor/miaopai.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/microsoftstream.py b/yt_dlp/extractor/microsoftstream.py index 4d5a9df1f..2dde82a75 100644 --- a/yt_dlp/extractor/microsoftstream.py +++ b/yt_dlp/extractor/microsoftstream.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from base64 import b64decode from .common import InfoExtractor diff --git a/yt_dlp/extractor/microsoftvirtualacademy.py b/yt_dlp/extractor/microsoftvirtualacademy.py index 46abd2a6d..f15f00ee5 100644 --- a/yt_dlp/extractor/microsoftvirtualacademy.py +++ b/yt_dlp/extractor/microsoftvirtualacademy.py @@ -1,11 +1,6 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor -from ..compat import ( - compat_xpath, -) from ..utils import ( int_or_none, parse_duration, @@ -70,9 +65,9 @@ class MicrosoftVirtualAcademyIE(MicrosoftVirtualAcademyBaseIE): formats = [] - for sources in settings.findall(compat_xpath('.//MediaSources')): + for sources in settings.findall('.//MediaSources'): sources_type = sources.get('videoType') - for source in sources.findall(compat_xpath('./MediaSource')): + for source in sources.findall('./MediaSource'): video_url = source.text if not video_url or not video_url.startswith('http'): continue @@ -101,7 +96,7 @@ class MicrosoftVirtualAcademyIE(MicrosoftVirtualAcademyBaseIE): self._sort_formats(formats) subtitles = {} - for source in settings.findall(compat_xpath('.//MarkerResourceSource')): + for source in settings.findall('.//MarkerResourceSource'): subtitle_url = source.text if not subtitle_url: continue diff --git a/yt_dlp/extractor/mildom.py b/yt_dlp/extractor/mildom.py index 5f2df29c6..c7a61dfa0 100644 --- a/yt_dlp/extractor/mildom.py +++ b/yt_dlp/extractor/mildom.py @@ -1,8 +1,6 @@ -# coding: utf-8 -from __future__ import unicode_literals - import functools import json +import uuid from .common import InfoExtractor from ..utils import ( @@ -11,7 +9,6 @@ from ..utils import ( ExtractorError, float_or_none, OnDemandPagedList, - random_uuidv4, traverse_obj, ) @@ -21,7 +18,7 @@ class MildomBaseIE(InfoExtractor): def _call_api(self, url, video_id, query=None, note='Downloading JSON metadata', body=None): if not self._GUEST_ID: - self._GUEST_ID = f'pc-gp-{random_uuidv4()}' + self._GUEST_ID = f'pc-gp-{str(uuid.uuid4())}' content = self._download_json( url, video_id, note=note, data=json.dumps(body).encode() if body else None, diff --git a/yt_dlp/extractor/minds.py b/yt_dlp/extractor/minds.py index 9da07207b..393d20604 100644 --- a/yt_dlp/extractor/minds.py +++ b/yt_dlp/extractor/minds.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/ministrygrid.py b/yt_dlp/extractor/ministrygrid.py index 8ad9239c5..053c6726c 100644 --- a/yt_dlp/extractor/ministrygrid.py +++ b/yt_dlp/extractor/ministrygrid.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/minoto.py b/yt_dlp/extractor/minoto.py index 603ce940b..e799cd3bc 100644 --- a/yt_dlp/extractor/minoto.py +++ b/yt_dlp/extractor/minoto.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/miomio.py b/yt_dlp/extractor/miomio.py index 40f72d66f..a0a041ea5 100644 --- a/yt_dlp/extractor/miomio.py +++ b/yt_dlp/extractor/miomio.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import random from .common import InfoExtractor diff --git a/yt_dlp/extractor/mirrativ.py b/yt_dlp/extractor/mirrativ.py index 2111de615..8192f2b46 100644 --- a/yt_dlp/extractor/mirrativ.py +++ b/yt_dlp/extractor/mirrativ.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/mit.py b/yt_dlp/extractor/mit.py index 60e456978..38cc0c274 100644 --- a/yt_dlp/extractor/mit.py +++ b/yt_dlp/extractor/mit.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re import json diff --git a/yt_dlp/extractor/mitele.py b/yt_dlp/extractor/mitele.py index b5937233b..12b2b2432 100644 --- a/yt_dlp/extractor/mitele.py +++ b/yt_dlp/extractor/mitele.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .telecinco import TelecincoIE from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/mixch.py b/yt_dlp/extractor/mixch.py index 31f450dfa..3f430a717 100644 --- a/yt_dlp/extractor/mixch.py +++ b/yt_dlp/extractor/mixch.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/mixcloud.py b/yt_dlp/extractor/mixcloud.py index c2dd078ac..796f268f4 100644 --- a/yt_dlp/extractor/mixcloud.py +++ b/yt_dlp/extractor/mixcloud.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import itertools from .common import InfoExtractor @@ -9,7 +7,6 @@ from ..compat import ( compat_ord, compat_str, compat_urllib_parse_unquote, - compat_zip ) from ..utils import ( ExtractorError, @@ -76,7 +73,7 @@ class MixcloudIE(MixcloudBaseIE): """Encrypt/Decrypt XOR cipher. Both ways are possible because it's XOR.""" return ''.join([ compat_chr(compat_ord(ch) ^ compat_ord(k)) - for ch, k in compat_zip(ciphertext, itertools.cycle(key))]) + for ch, k in zip(ciphertext, itertools.cycle(key))]) def _real_extract(self, url): username, slug = self._match_valid_url(url).groups() diff --git a/yt_dlp/extractor/mlb.py b/yt_dlp/extractor/mlb.py index b69301d97..5fb97083a 100644 --- a/yt_dlp/extractor/mlb.py +++ b/yt_dlp/extractor/mlb.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/mlssoccer.py b/yt_dlp/extractor/mlssoccer.py index 1d6d4b804..9383f1358 100644 --- a/yt_dlp/extractor/mlssoccer.py +++ b/yt_dlp/extractor/mlssoccer.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/mnet.py b/yt_dlp/extractor/mnet.py index 0e26ca1b3..65e3d476a 100644 --- a/yt_dlp/extractor/mnet.py +++ b/yt_dlp/extractor/mnet.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/moevideo.py b/yt_dlp/extractor/moevideo.py index a3f1b3866..fda08cae9 100644 --- a/yt_dlp/extractor/moevideo.py +++ b/yt_dlp/extractor/moevideo.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/mofosex.py b/yt_dlp/extractor/mofosex.py index 5234cac02..66a098c97 100644 --- a/yt_dlp/extractor/mofosex.py +++ b/yt_dlp/extractor/mofosex.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/mojvideo.py b/yt_dlp/extractor/mojvideo.py index 16d94052b..d47ad0742 100644 --- a/yt_dlp/extractor/mojvideo.py +++ b/yt_dlp/extractor/mojvideo.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/morningstar.py b/yt_dlp/extractor/morningstar.py index 71a22a614..e9fcfe3e2 100644 --- a/yt_dlp/extractor/morningstar.py +++ b/yt_dlp/extractor/morningstar.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor diff --git a/yt_dlp/extractor/motherless.py b/yt_dlp/extractor/motherless.py index 111c7c544..9e53a8a97 100644 --- a/yt_dlp/extractor/motherless.py +++ b/yt_dlp/extractor/motherless.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import datetime import re diff --git a/yt_dlp/extractor/motorsport.py b/yt_dlp/extractor/motorsport.py index c9d1ab64d..b292aeb9a 100644 --- a/yt_dlp/extractor/motorsport.py +++ b/yt_dlp/extractor/motorsport.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_urlparse, diff --git a/yt_dlp/extractor/movieclips.py b/yt_dlp/extractor/movieclips.py index 5453da1ac..4777f440e 100644 --- a/yt_dlp/extractor/movieclips.py +++ b/yt_dlp/extractor/movieclips.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( smuggle_url, diff --git a/yt_dlp/extractor/moviepilot.py b/yt_dlp/extractor/moviepilot.py index 4605d3481..ca541567a 100644 --- a/yt_dlp/extractor/moviepilot.py +++ b/yt_dlp/extractor/moviepilot.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .dailymotion import DailymotionIE from .common import InfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/moviezine.py b/yt_dlp/extractor/moviezine.py index 730da4bd7..5757322d6 100644 --- a/yt_dlp/extractor/moviezine.py +++ b/yt_dlp/extractor/moviezine.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor diff --git a/yt_dlp/extractor/movingimage.py b/yt_dlp/extractor/movingimage.py index 4f62d628a..cdd8ba4dc 100644 --- a/yt_dlp/extractor/movingimage.py +++ b/yt_dlp/extractor/movingimage.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( unescapeHTML, diff --git a/yt_dlp/extractor/msn.py b/yt_dlp/extractor/msn.py index f34e2102c..6f4935e51 100644 --- a/yt_dlp/extractor/msn.py +++ b/yt_dlp/extractor/msn.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/mtv.py b/yt_dlp/extractor/mtv.py index be5de0a70..d161c33c1 100644 --- a/yt_dlp/extractor/mtv.py +++ b/yt_dlp/extractor/mtv.py @@ -1,13 +1,7 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_xpath, -) +from ..compat import compat_str from ..utils import ( ExtractorError, find_xpath_attr, @@ -167,9 +161,9 @@ class MTVServicesInfoExtractor(InfoExtractor): itemdoc, './/{http://search.yahoo.com/mrss/}category', 'scheme', 'urn:mtvn:video_title') if title_el is None: - title_el = itemdoc.find(compat_xpath('.//{http://search.yahoo.com/mrss/}title')) + title_el = itemdoc.find('.//{http://search.yahoo.com/mrss/}title') if title_el is None: - title_el = itemdoc.find(compat_xpath('.//title')) + title_el = itemdoc.find('.//title') if title_el.text is None: title_el = None diff --git a/yt_dlp/extractor/muenchentv.py b/yt_dlp/extractor/muenchentv.py index a53929e1b..b9681d1bd 100644 --- a/yt_dlp/extractor/muenchentv.py +++ b/yt_dlp/extractor/muenchentv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/murrtube.py b/yt_dlp/extractor/murrtube.py index 1eb5de660..508d51247 100644 --- a/yt_dlp/extractor/murrtube.py +++ b/yt_dlp/extractor/murrtube.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import functools import json diff --git a/yt_dlp/extractor/musescore.py b/yt_dlp/extractor/musescore.py index 09fadf8d9..289ae5733 100644 --- a/yt_dlp/extractor/musescore.py +++ b/yt_dlp/extractor/musescore.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/musicdex.py b/yt_dlp/extractor/musicdex.py index 05f722091..4d8e74f6b 100644 --- a/yt_dlp/extractor/musicdex.py +++ b/yt_dlp/extractor/musicdex.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( date_from_str, diff --git a/yt_dlp/extractor/mwave.py b/yt_dlp/extractor/mwave.py index a67276596..0cbb16736 100644 --- a/yt_dlp/extractor/mwave.py +++ b/yt_dlp/extractor/mwave.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/mxplayer.py b/yt_dlp/extractor/mxplayer.py index 3c2afd838..cdc340a80 100644 --- a/yt_dlp/extractor/mxplayer.py +++ b/yt_dlp/extractor/mxplayer.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import compat_str from ..utils import try_get diff --git a/yt_dlp/extractor/mychannels.py b/yt_dlp/extractor/mychannels.py index d820d4eb8..8a70c1f7b 100644 --- a/yt_dlp/extractor/mychannels.py +++ b/yt_dlp/extractor/mychannels.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor diff --git a/yt_dlp/extractor/myspace.py b/yt_dlp/extractor/myspace.py index 4227d4248..63d36c30a 100644 --- a/yt_dlp/extractor/myspace.py +++ b/yt_dlp/extractor/myspace.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/myspass.py b/yt_dlp/extractor/myspass.py index 1775d5f0b..28ac982d6 100644 --- a/yt_dlp/extractor/myspass.py +++ b/yt_dlp/extractor/myspass.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/myvi.py b/yt_dlp/extractor/myvi.py index 75d286365..b31cf4493 100644 --- a/yt_dlp/extractor/myvi.py +++ b/yt_dlp/extractor/myvi.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/myvideoge.py b/yt_dlp/extractor/myvideoge.py index 0a1d7d0cb..513d4cb77 100644 --- a/yt_dlp/extractor/myvideoge.py +++ b/yt_dlp/extractor/myvideoge.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import js_to_json diff --git a/yt_dlp/extractor/myvidster.py b/yt_dlp/extractor/myvidster.py index 2117d302d..c91f294bf 100644 --- a/yt_dlp/extractor/myvidster.py +++ b/yt_dlp/extractor/myvidster.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/n1.py b/yt_dlp/extractor/n1.py index fdb7f32db..cc0ff533e 100644 --- a/yt_dlp/extractor/n1.py +++ b/yt_dlp/extractor/n1.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/nate.py b/yt_dlp/extractor/nate.py index 072faf6ea..c83b2acbd 100644 --- a/yt_dlp/extractor/nate.py +++ b/yt_dlp/extractor/nate.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools from .common import InfoExtractor diff --git a/yt_dlp/extractor/nationalgeographic.py b/yt_dlp/extractor/nationalgeographic.py index ee12e2b47..f22317d56 100644 --- a/yt_dlp/extractor/nationalgeographic.py +++ b/yt_dlp/extractor/nationalgeographic.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from .fox import FOXIE from ..utils import ( diff --git a/yt_dlp/extractor/naver.py b/yt_dlp/extractor/naver.py index a6821ba86..a230d9cdd 100644 --- a/yt_dlp/extractor/naver.py +++ b/yt_dlp/extractor/naver.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/nba.py b/yt_dlp/extractor/nba.py index 359cc52b7..e95c1b795 100644 --- a/yt_dlp/extractor/nba.py +++ b/yt_dlp/extractor/nba.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import functools import re diff --git a/yt_dlp/extractor/nbc.py b/yt_dlp/extractor/nbc.py index 109403440..365c2e60d 100644 --- a/yt_dlp/extractor/nbc.py +++ b/yt_dlp/extractor/nbc.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import base64 import json import re @@ -581,7 +579,7 @@ class NBCOlympicsStreamIE(AdobePassIE): for f in formats: # -http_seekable requires ffmpeg 4.3+ but it doesnt seem possible to # download with ffmpeg without this option - f['_ffmpeg_args'] = ['-seekable', '0', '-http_seekable', '0', '-icy', '0'] + f['downloader_options'] = {'ffmpeg_args': ['-seekable', '0', '-http_seekable', '0', '-icy', '0']} self._sort_formats(formats) return { diff --git a/yt_dlp/extractor/ndr.py b/yt_dlp/extractor/ndr.py index 1917254b8..de0142ccf 100644 --- a/yt_dlp/extractor/ndr.py +++ b/yt_dlp/extractor/ndr.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/ndtv.py b/yt_dlp/extractor/ndtv.py index bc3eb9160..fbb033169 100644 --- a/yt_dlp/extractor/ndtv.py +++ b/yt_dlp/extractor/ndtv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_urllib_parse_unquote_plus diff --git a/yt_dlp/extractor/nebula.py b/yt_dlp/extractor/nebula.py index 77f253519..ff9a2adf0 100644 --- a/yt_dlp/extractor/nebula.py +++ b/yt_dlp/extractor/nebula.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools import json import time @@ -21,9 +18,8 @@ class NebulaBaseIE(InfoExtractor): _nebula_bearer_token = None _zype_access_token = None - def _perform_nebula_auth(self): - username, password = self._get_login_info() - if not (username and password): + def _perform_nebula_auth(self, username, password): + if not username or not password: self.raise_login_required() data = json.dumps({'email': username, 'password': password}).encode('utf8') @@ -54,7 +50,7 @@ class NebulaBaseIE(InfoExtractor): return response['key'] - def _retrieve_nebula_api_token(self): + def _retrieve_nebula_api_token(self, username=None, password=None): """ Check cookie jar for valid token. Try to authenticate using credentials if no valid token can be found in the cookie jar. @@ -68,7 +64,7 @@ class NebulaBaseIE(InfoExtractor): if nebula_api_token: return nebula_api_token - return self._perform_nebula_auth() + return self._perform_nebula_auth(username, password) def _call_nebula_api(self, url, video_id=None, method='GET', auth_type='api', note=''): assert method in ('GET', 'POST',) @@ -149,8 +145,7 @@ class NebulaBaseIE(InfoExtractor): } def _perform_login(self, username=None, password=None): - # FIXME: username should be passed from here to inner functions - self._nebula_api_token = self._retrieve_nebula_api_token() + self._nebula_api_token = self._retrieve_nebula_api_token(username, password) self._nebula_bearer_token = self._fetch_nebula_bearer_token() self._zype_access_token = self._fetch_zype_access_token() @@ -160,7 +155,7 @@ class NebulaIE(NebulaBaseIE): _TESTS = [ { 'url': 'https://nebula.app/videos/that-time-disney-remade-beauty-and-the-beast', - 'md5': 'fe79c4df8b3aa2fea98a93d027465c7e', + 'md5': '14944cfee8c7beeea106320c47560efc', 'info_dict': { 'id': '5c271b40b13fd613090034fd', 'ext': 'mp4', @@ -172,14 +167,21 @@ class NebulaIE(NebulaBaseIE): 'channel_id': 'lindsayellis', 'uploader': 'Lindsay Ellis', 'uploader_id': 'lindsayellis', - }, - 'params': { - 'usenetrc': True, + 'timestamp': 1533009600, + 'uploader_url': 'https://nebula.app/lindsayellis', + 'series': 'Lindsay Ellis', + 'average_rating': int, + 'display_id': 'that-time-disney-remade-beauty-and-the-beast', + 'channel_url': 'https://nebula.app/lindsayellis', + 'creator': 'Lindsay Ellis', + 'duration': 2212, + 'view_count': int, + 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*', }, }, { 'url': 'https://nebula.app/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore', - 'md5': '6d4edd14ce65720fa63aba5c583fb328', + 'md5': 'd05739cf6c38c09322422f696b569c23', 'info_dict': { 'id': '5e7e78171aaf320001fbd6be', 'ext': 'mp4', @@ -191,14 +193,20 @@ class NebulaIE(NebulaBaseIE): 'channel_id': 'realengineering', 'uploader': 'Real Engineering', 'uploader_id': 'realengineering', - }, - 'params': { - 'usenetrc': True, + 'view_count': int, + 'series': 'Real Engineering', + 'average_rating': int, + 'display_id': 'the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore', + 'creator': 'Real Engineering', + 'duration': 841, + 'channel_url': 'https://nebula.app/realengineering', + 'uploader_url': 'https://nebula.app/realengineering', + 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*', }, }, { 'url': 'https://nebula.app/videos/money-episode-1-the-draw', - 'md5': '8c7d272910eea320f6f8e6d3084eecf5', + 'md5': 'ebe28a7ad822b9ee172387d860487868', 'info_dict': { 'id': '5e779ebdd157bc0001d1c75a', 'ext': 'mp4', @@ -210,9 +218,15 @@ class NebulaIE(NebulaBaseIE): 'channel_id': 'tom-scott-presents-money', 'uploader': 'Tom Scott Presents: Money', 'uploader_id': 'tom-scott-presents-money', - }, - 'params': { - 'usenetrc': True, + 'uploader_url': 'https://nebula.app/tom-scott-presents-money', + 'duration': 825, + 'channel_url': 'https://nebula.app/tom-scott-presents-money', + 'view_count': int, + 'series': 'Tom Scott Presents: Money', + 'display_id': 'money-episode-1-the-draw', + 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*', + 'average_rating': int, + 'creator': 'Tom Scott Presents: Money', }, }, { @@ -233,9 +247,37 @@ class NebulaIE(NebulaBaseIE): return self._build_video_info(video) -class NebulaCollectionIE(NebulaBaseIE): - IE_NAME = 'nebula:collection' - _VALID_URL = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app)/(?!videos/)(?P<id>[-\w]+)' +class NebulaSubscriptionsIE(NebulaBaseIE): + IE_NAME = 'nebula:subscriptions' + _VALID_URL = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app)/myshows' + _TESTS = [ + { + 'url': 'https://nebula.app/myshows', + 'playlist_mincount': 1, + 'info_dict': { + 'id': 'myshows', + }, + }, + ] + + def _generate_playlist_entries(self): + next_url = 'https://content.watchnebula.com/library/video/?page_size=100' + page_num = 1 + while next_url: + channel = self._call_nebula_api(next_url, 'myshows', auth_type='bearer', + note=f'Retrieving subscriptions page {page_num}') + for episode in channel['results']: + yield self._build_video_info(episode) + next_url = channel['next'] + page_num += 1 + + def _real_extract(self, url): + return self.playlist_result(self._generate_playlist_entries(), 'myshows') + + +class NebulaChannelIE(NebulaBaseIE): + IE_NAME = 'nebula:channel' + _VALID_URL = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app)/(?!myshows|videos/)(?P<id>[-\w]+)' _TESTS = [ { 'url': 'https://nebula.app/tom-scott-presents-money', @@ -245,9 +287,6 @@ class NebulaCollectionIE(NebulaBaseIE): 'description': 'Tom Scott hosts a series all about trust, negotiation and money.', }, 'playlist_count': 5, - 'params': { - 'usenetrc': True, - }, }, { 'url': 'https://nebula.app/lindsayellis', 'info_dict': { @@ -256,9 +295,6 @@ class NebulaCollectionIE(NebulaBaseIE): 'description': 'Enjoy these hottest of takes on Disney, Transformers, and Musicals.', }, 'playlist_mincount': 100, - 'params': { - 'usenetrc': True, - }, }, ] diff --git a/yt_dlp/extractor/nerdcubed.py b/yt_dlp/extractor/nerdcubed.py index 9feccc672..7c801b5d3 100644 --- a/yt_dlp/extractor/nerdcubed.py +++ b/yt_dlp/extractor/nerdcubed.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import datetime from .common import InfoExtractor diff --git a/yt_dlp/extractor/neteasemusic.py b/yt_dlp/extractor/neteasemusic.py index 57b4774b6..4def7e76b 100644 --- a/yt_dlp/extractor/neteasemusic.py +++ b/yt_dlp/extractor/neteasemusic.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from hashlib import md5 from base64 import b64encode from datetime import datetime diff --git a/yt_dlp/extractor/netzkino.py b/yt_dlp/extractor/netzkino.py index 4ad0d8e96..49b29b67c 100644 --- a/yt_dlp/extractor/netzkino.py +++ b/yt_dlp/extractor/netzkino.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/newgrounds.py b/yt_dlp/extractor/newgrounds.py index 6525a6d8a..ba24720e3 100644 --- a/yt_dlp/extractor/newgrounds.py +++ b/yt_dlp/extractor/newgrounds.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import functools import re diff --git a/yt_dlp/extractor/newstube.py b/yt_dlp/extractor/newstube.py index 479141ae0..20db46057 100644 --- a/yt_dlp/extractor/newstube.py +++ b/yt_dlp/extractor/newstube.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import base64 import hashlib diff --git a/yt_dlp/extractor/newsy.py b/yt_dlp/extractor/newsy.py index cf3164100..9fde6c079 100644 --- a/yt_dlp/extractor/newsy.py +++ b/yt_dlp/extractor/newsy.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( js_to_json, diff --git a/yt_dlp/extractor/nextmedia.py b/yt_dlp/extractor/nextmedia.py index 7bd1290bf..1f83089fc 100644 --- a/yt_dlp/extractor/nextmedia.py +++ b/yt_dlp/extractor/nextmedia.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( diff --git a/yt_dlp/extractor/nexx.py b/yt_dlp/extractor/nexx.py index a521bb6e4..01376be3d 100644 --- a/yt_dlp/extractor/nexx.py +++ b/yt_dlp/extractor/nexx.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import hashlib import random import re diff --git a/yt_dlp/extractor/nfb.py b/yt_dlp/extractor/nfb.py index a12e503de..79c6aaf0c 100644 --- a/yt_dlp/extractor/nfb.py +++ b/yt_dlp/extractor/nfb.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import int_or_none diff --git a/yt_dlp/extractor/nfhsnetwork.py b/yt_dlp/extractor/nfhsnetwork.py index 802f6caf0..e6f98b036 100644 --- a/yt_dlp/extractor/nfhsnetwork.py +++ b/yt_dlp/extractor/nfhsnetwork.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/nfl.py b/yt_dlp/extractor/nfl.py index 821276a31..e5810b346 100644 --- a/yt_dlp/extractor/nfl.py +++ b/yt_dlp/extractor/nfl.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/nhk.py b/yt_dlp/extractor/nhk.py index 3b8efc3e6..cf2ec7b79 100644 --- a/yt_dlp/extractor/nhk.py +++ b/yt_dlp/extractor/nhk.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/nhl.py b/yt_dlp/extractor/nhl.py index d3a5e17e9..884f9e2ae 100644 --- a/yt_dlp/extractor/nhl.py +++ b/yt_dlp/extractor/nhl.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/nick.py b/yt_dlp/extractor/nick.py index ba7da7602..2a228d8de 100644 --- a/yt_dlp/extractor/nick.py +++ b/yt_dlp/extractor/nick.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .mtv import MTVServicesInfoExtractor from ..utils import update_url_query diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py index 4eb6ed070..a80b544f8 100644 --- a/yt_dlp/extractor/niconico.py +++ b/yt_dlp/extractor/niconico.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import datetime import functools import itertools @@ -10,8 +7,6 @@ import time from .common import InfoExtractor, SearchInfoExtractor from ..compat import ( - compat_parse_qs, - compat_urllib_parse_urlparse, compat_HTTPError, ) from ..utils import ( @@ -35,6 +30,7 @@ from ..utils import ( update_url_query, url_or_none, urlencode_postdata, + urljoin, ) @@ -195,7 +191,7 @@ class NiconicoIE(InfoExtractor): self._request_webpage( 'https://account.nicovideo.jp/login', None, note='Acquiring Login session') - urlh = self._request_webpage( + page = self._download_webpage( 'https://account.nicovideo.jp/login/redirector?show_button_twitter=1&site=niconico&show_button_facebook=1', None, note='Logging in', errnote='Unable to log in', data=urlencode_postdata(login_form_strs), @@ -203,19 +199,32 @@ class NiconicoIE(InfoExtractor): 'Referer': 'https://account.nicovideo.jp/login', 'Content-Type': 'application/x-www-form-urlencoded', }) - if urlh is False: - login_ok = False - else: - parts = compat_urllib_parse_urlparse(urlh.geturl()) - if compat_parse_qs(parts.query).get('message', [None])[0] == 'cant_login': - login_ok = False + if 'oneTimePw' in page: + post_url = self._search_regex( + r'<form[^>]+action=(["\'])(?P<url>.+?)\1', page, 'post url', group='url') + page = self._download_webpage( + urljoin('https://account.nicovideo.jp', post_url), None, + note='Performing MFA', errnote='Unable to complete MFA', + data=urlencode_postdata({ + 'otp': self._get_tfa_info('6 digits code') + }), headers={ + 'Content-Type': 'application/x-www-form-urlencoded', + }) + if 'oneTimePw' in page or 'formError' in page: + err_msg = self._html_search_regex( + r'formError["\']+>(.*?)</div>', page, 'form_error', + default='There\'s an error but the message can\'t be parsed.', + flags=re.DOTALL) + self.report_warning(f'Unable to log in: MFA challenge failed, "{err_msg}"') + return False + login_ok = 'class="notice error"' not in page if not login_ok: - self.report_warning('unable to log in: bad username or password') + self.report_warning('Unable to log in: bad username or password') return login_ok def _get_heartbeat_info(self, info_dict): video_id, video_src_id, audio_src_id = info_dict['url'].split(':')[1].split('/') - dmc_protocol = info_dict['_expected_protocol'] + dmc_protocol = info_dict['expected_protocol'] api_data = ( info_dict.get('_api_data') @@ -369,7 +378,7 @@ class NiconicoIE(InfoExtractor): 'width': traverse_obj(video_quality, ('metadata', 'resolution', 'width')), 'quality': -2 if 'low' in video_quality['id'] else None, 'protocol': 'niconico_dmc', - '_expected_protocol': dmc_protocol, + 'expected_protocol': dmc_protocol, # XXX: This is not a documented field 'http_headers': { 'Origin': 'https://www.nicovideo.jp', 'Referer': 'https://www.nicovideo.jp/watch/' + video_id, diff --git a/yt_dlp/extractor/ninecninemedia.py b/yt_dlp/extractor/ninecninemedia.py index 781842721..462caf466 100644 --- a/yt_dlp/extractor/ninecninemedia.py +++ b/yt_dlp/extractor/ninecninemedia.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( float_or_none, diff --git a/yt_dlp/extractor/ninegag.py b/yt_dlp/extractor/ninegag.py index 14390823b..00ca95ea2 100644 --- a/yt_dlp/extractor/ninegag.py +++ b/yt_dlp/extractor/ninegag.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/ninenow.py b/yt_dlp/extractor/ninenow.py index 6043674ba..b970f8ccb 100644 --- a/yt_dlp/extractor/ninenow.py +++ b/yt_dlp/extractor/ninenow.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/nintendo.py b/yt_dlp/extractor/nintendo.py index ff8f70ba6..ed839af25 100644 --- a/yt_dlp/extractor/nintendo.py +++ b/yt_dlp/extractor/nintendo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/nitter.py b/yt_dlp/extractor/nitter.py index 8bb709cd7..251bf444f 100644 --- a/yt_dlp/extractor/nitter.py +++ b/yt_dlp/extractor/nitter.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( diff --git a/yt_dlp/extractor/njpwworld.py b/yt_dlp/extractor/njpwworld.py index 68c8c8e52..e761cf257 100644 --- a/yt_dlp/extractor/njpwworld.py +++ b/yt_dlp/extractor/njpwworld.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/nobelprize.py b/yt_dlp/extractor/nobelprize.py index 4dfdb09d6..35b64530f 100644 --- a/yt_dlp/extractor/nobelprize.py +++ b/yt_dlp/extractor/nobelprize.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( js_to_json, diff --git a/yt_dlp/extractor/noco.py b/yt_dlp/extractor/noco.py deleted file mode 100644 index 28af909d5..000000000 --- a/yt_dlp/extractor/noco.py +++ /dev/null @@ -1,228 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re -import time -import hashlib - -from .common import InfoExtractor -from ..compat import ( - compat_str, -) -from ..utils import ( - clean_html, - ExtractorError, - int_or_none, - float_or_none, - parse_iso8601, - parse_qs, - sanitized_Request, - urlencode_postdata, -) - - -class NocoIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)' - _LOGIN_URL = 'https://noco.tv/do.php' - _API_URL_TEMPLATE = 'https://api.noco.tv/1.1/%s?ts=%s&tk=%s' - _SUB_LANG_TEMPLATE = '&sub_lang=%s' - _NETRC_MACHINE = 'noco' - - _TESTS = [ - { - 'url': 'http://noco.tv/emission/11538/nolife/ami-ami-idol-hello-france/', - 'md5': '0a993f0058ddbcd902630b2047ef710e', - 'info_dict': { - 'id': '11538', - 'ext': 'mp4', - 'title': 'Ami Ami Idol - Hello! France', - 'description': 'md5:4eaab46ab68fa4197a317a88a53d3b86', - 'upload_date': '20140412', - 'uploader': 'Nolife', - 'uploader_id': 'NOL', - 'duration': 2851.2, - }, - 'skip': 'Requires noco account', - }, - { - 'url': 'http://noco.tv/emission/12610/lbl42/the-guild/s01e01-wake-up-call', - 'md5': 'c190f1f48e313c55838f1f412225934d', - 'info_dict': { - 'id': '12610', - 'ext': 'mp4', - 'title': 'The Guild #1 - Wake-Up Call', - 'timestamp': 1403863200, - 'upload_date': '20140627', - 'uploader': 'LBL42', - 'uploader_id': 'LBL', - 'duration': 233.023, - }, - 'skip': 'Requires noco account', - } - ] - - def _perform_login(self, username, password): - login = self._download_json( - self._LOGIN_URL, None, 'Logging in', - data=urlencode_postdata({ - 'a': 'login', - 'cookie': '1', - 'username': username, - 'password': password, - }), - headers={ - 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', - }) - - if 'erreur' in login: - raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True) - - @staticmethod - def _ts(): - return int(time.time() * 1000) - - def _call_api(self, path, video_id, note, sub_lang=None): - ts = compat_str(self._ts() + self._ts_offset) - tk = hashlib.md5((hashlib.md5(ts.encode('ascii')).hexdigest() + '#8S?uCraTedap6a').encode('ascii')).hexdigest() - url = self._API_URL_TEMPLATE % (path, ts, tk) - if sub_lang: - url += self._SUB_LANG_TEMPLATE % sub_lang - - request = sanitized_Request(url) - request.add_header('Referer', self._referer) - - resp = self._download_json(request, video_id, note) - - if isinstance(resp, dict) and resp.get('error'): - self._raise_error(resp['error'], resp['description']) - - return resp - - def _raise_error(self, error, description): - raise ExtractorError( - '%s returned error: %s - %s' % (self.IE_NAME, error, description), - expected=True) - - def _real_extract(self, url): - video_id = self._match_id(url) - - # Timestamp adjustment offset between server time and local time - # must be calculated in order to use timestamps closest to server's - # in all API requests (see https://github.com/ytdl-org/youtube-dl/issues/7864) - webpage = self._download_webpage(url, video_id) - - player_url = self._search_regex( - r'(["\'])(?P<player>https?://noco\.tv/(?:[^/]+/)+NocoPlayer.+?\.swf.*?)\1', - webpage, 'noco player', group='player', - default='http://noco.tv/cdata/js/player/NocoPlayer-v1.2.40.swf') - - qs = parse_qs(player_url) - ts = int_or_none(qs.get('ts', [None])[0]) - self._ts_offset = ts - self._ts() if ts else 0 - self._referer = player_url - - medias = self._call_api( - 'shows/%s/medias' % video_id, - video_id, 'Downloading video JSON') - - show = self._call_api( - 'shows/by_id/%s' % video_id, - video_id, 'Downloading show JSON')[0] - - options = self._call_api( - 'users/init', video_id, - 'Downloading user options JSON')['options'] - audio_lang_pref = options.get('audio_language') or options.get('language', 'fr') - - if audio_lang_pref == 'original': - audio_lang_pref = show['original_lang'] - if len(medias) == 1: - audio_lang_pref = list(medias.keys())[0] - elif audio_lang_pref not in medias: - audio_lang_pref = 'fr' - - qualities = self._call_api( - 'qualities', - video_id, 'Downloading qualities JSON') - - formats = [] - - for audio_lang, audio_lang_dict in medias.items(): - preference = 1 if audio_lang == audio_lang_pref else 0 - for sub_lang, lang_dict in audio_lang_dict['video_list'].items(): - for format_id, fmt in lang_dict['quality_list'].items(): - format_id_extended = 'audio-%s_sub-%s_%s' % (audio_lang, sub_lang, format_id) - - video = self._call_api( - 'shows/%s/video/%s/%s' % (video_id, format_id.lower(), audio_lang), - video_id, 'Downloading %s video JSON' % format_id_extended, - sub_lang if sub_lang != 'none' else None) - - file_url = video['file'] - if not file_url: - continue - - if file_url in ['forbidden', 'not found']: - popmessage = video['popmessage'] - self._raise_error(popmessage['title'], popmessage['message']) - - formats.append({ - 'url': file_url, - 'format_id': format_id_extended, - 'width': int_or_none(fmt.get('res_width')), - 'height': int_or_none(fmt.get('res_lines')), - 'abr': int_or_none(fmt.get('audiobitrate'), 1000), - 'vbr': int_or_none(fmt.get('videobitrate'), 1000), - 'filesize': int_or_none(fmt.get('filesize')), - 'format_note': qualities[format_id].get('quality_name'), - 'quality': qualities[format_id].get('priority'), - 'language_preference': preference, - }) - - self._sort_formats(formats) - - timestamp = parse_iso8601(show.get('online_date_start_utc'), ' ') - - if timestamp is not None and timestamp < 0: - timestamp = None - - uploader = show.get('partner_name') - uploader_id = show.get('partner_key') - duration = float_or_none(show.get('duration_ms'), 1000) - - thumbnails = [] - for thumbnail_key, thumbnail_url in show.items(): - m = re.search(r'^screenshot_(?P<width>\d+)x(?P<height>\d+)$', thumbnail_key) - if not m: - continue - thumbnails.append({ - 'url': thumbnail_url, - 'width': int(m.group('width')), - 'height': int(m.group('height')), - }) - - episode = show.get('show_TT') or show.get('show_OT') - family = show.get('family_TT') or show.get('family_OT') - episode_number = show.get('episode_number') - - title = '' - if family: - title += family - if episode_number: - title += ' #' + compat_str(episode_number) - if episode: - title += ' - ' + compat_str(episode) - - description = show.get('show_resume') or show.get('family_resume') - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnails': thumbnails, - 'timestamp': timestamp, - 'uploader': uploader, - 'uploader_id': uploader_id, - 'duration': duration, - 'formats': formats, - } diff --git a/yt_dlp/extractor/nonktube.py b/yt_dlp/extractor/nonktube.py index ca1424e06..f191be33b 100644 --- a/yt_dlp/extractor/nonktube.py +++ b/yt_dlp/extractor/nonktube.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .nuevo import NuevoBaseIE diff --git a/yt_dlp/extractor/noodlemagazine.py b/yt_dlp/extractor/noodlemagazine.py index 2f170bbfe..3e04da67e 100644 --- a/yt_dlp/extractor/noodlemagazine.py +++ b/yt_dlp/extractor/noodlemagazine.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( parse_duration, diff --git a/yt_dlp/extractor/noovo.py b/yt_dlp/extractor/noovo.py index b40770d07..acbb74c6e 100644 --- a/yt_dlp/extractor/noovo.py +++ b/yt_dlp/extractor/noovo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .brightcove import BrightcoveNewIE from .common import InfoExtractor from ..compat import compat_str diff --git a/yt_dlp/extractor/normalboots.py b/yt_dlp/extractor/normalboots.py index 61fe571df..07babcd2c 100644 --- a/yt_dlp/extractor/normalboots.py +++ b/yt_dlp/extractor/normalboots.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from .jwplatform import JWPlatformIE diff --git a/yt_dlp/extractor/nosvideo.py b/yt_dlp/extractor/nosvideo.py index 53c500c35..b6d3ea40c 100644 --- a/yt_dlp/extractor/nosvideo.py +++ b/yt_dlp/extractor/nosvideo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/nova.py b/yt_dlp/extractor/nova.py index 00a64f88d..6875d26ba 100644 --- a/yt_dlp/extractor/nova.py +++ b/yt_dlp/extractor/nova.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/novaplay.py b/yt_dlp/extractor/novaplay.py index bfb2c8751..4f1a84651 100644 --- a/yt_dlp/extractor/novaplay.py +++ b/yt_dlp/extractor/novaplay.py @@ -1,4 +1,3 @@ -# coding: utf-8 from .common import InfoExtractor from ..utils import int_or_none, parse_duration, parse_iso8601 diff --git a/yt_dlp/extractor/nowness.py b/yt_dlp/extractor/nowness.py index b2c715f41..fc9043bce 100644 --- a/yt_dlp/extractor/nowness.py +++ b/yt_dlp/extractor/nowness.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .brightcove import ( BrightcoveLegacyIE, BrightcoveNewIE, diff --git a/yt_dlp/extractor/noz.py b/yt_dlp/extractor/noz.py index ccafd7723..b42a56f7e 100644 --- a/yt_dlp/extractor/noz.py +++ b/yt_dlp/extractor/noz.py @@ -1,17 +1,11 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse_unquote, - compat_xpath, -) from ..utils import ( int_or_none, find_xpath_attr, xpath_text, update_url_query, ) +from ..compat import compat_urllib_parse_unquote class NozIE(InfoExtractor): @@ -50,7 +44,7 @@ class NozIE(InfoExtractor): duration = int_or_none(xpath_text( doc, './/article/movie/file/duration')) formats = [] - for qnode in doc.findall(compat_xpath('.//article/movie/file/qualities/qual')): + for qnode in doc.findall('.//article/movie/file/qualities/qual'): http_url_ele = find_xpath_attr( qnode, './html_urls/video_url', 'format', 'video/mp4') http_url = http_url_ele.text if http_url_ele is not None else None diff --git a/yt_dlp/extractor/npo.py b/yt_dlp/extractor/npo.py index a8aaef6f1..0b5f32c2e 100644 --- a/yt_dlp/extractor/npo.py +++ b/yt_dlp/extractor/npo.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/npr.py b/yt_dlp/extractor/npr.py index 49f062d7a..6d93f154c 100644 --- a/yt_dlp/extractor/npr.py +++ b/yt_dlp/extractor/npr.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/nrk.py b/yt_dlp/extractor/nrk.py index 0cf26d598..553c55132 100644 --- a/yt_dlp/extractor/nrk.py +++ b/yt_dlp/extractor/nrk.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools import random import re diff --git a/yt_dlp/extractor/nrl.py b/yt_dlp/extractor/nrl.py index 0bd5086ae..798d03417 100644 --- a/yt_dlp/extractor/nrl.py +++ b/yt_dlp/extractor/nrl.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/ntvcojp.py b/yt_dlp/extractor/ntvcojp.py index c9af91188..422ec6eb0 100644 --- a/yt_dlp/extractor/ntvcojp.py +++ b/yt_dlp/extractor/ntvcojp.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/ntvde.py b/yt_dlp/extractor/ntvde.py index 035582ee8..d252ced86 100644 --- a/yt_dlp/extractor/ntvde.py +++ b/yt_dlp/extractor/ntvde.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/ntvru.py b/yt_dlp/extractor/ntvru.py index c47d1dfa4..c8df110e8 100644 --- a/yt_dlp/extractor/ntvru.py +++ b/yt_dlp/extractor/ntvru.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/nuevo.py b/yt_dlp/extractor/nuevo.py index be1e09d37..ec54041f1 100644 --- a/yt_dlp/extractor/nuevo.py +++ b/yt_dlp/extractor/nuevo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/nuvid.py b/yt_dlp/extractor/nuvid.py index 84fb97d6a..fafcc8f4b 100644 --- a/yt_dlp/extractor/nuvid.py +++ b/yt_dlp/extractor/nuvid.py @@ -1,5 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/nytimes.py b/yt_dlp/extractor/nytimes.py index 99964737d..f388688c4 100644 --- a/yt_dlp/extractor/nytimes.py +++ b/yt_dlp/extractor/nytimes.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import hmac import hashlib import base64 diff --git a/yt_dlp/extractor/nzherald.py b/yt_dlp/extractor/nzherald.py index e5601b495..7c9efd922 100644 --- a/yt_dlp/extractor/nzherald.py +++ b/yt_dlp/extractor/nzherald.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .brightcove import BrightcoveNewIE from .common import InfoExtractor diff --git a/yt_dlp/extractor/nzz.py b/yt_dlp/extractor/nzz.py index 61ee77adb..ac3b73156 100644 --- a/yt_dlp/extractor/nzz.py +++ b/yt_dlp/extractor/nzz.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/odatv.py b/yt_dlp/extractor/odatv.py index 314527f98..24ab93942 100644 --- a/yt_dlp/extractor/odatv.py +++ b/yt_dlp/extractor/odatv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/odnoklassniki.py b/yt_dlp/extractor/odnoklassniki.py index 293f1aa60..36a7f5f4e 100644 --- a/yt_dlp/extractor/odnoklassniki.py +++ b/yt_dlp/extractor/odnoklassniki.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/oktoberfesttv.py b/yt_dlp/extractor/oktoberfesttv.py index 276567436..e0ac8563a 100644 --- a/yt_dlp/extractor/oktoberfesttv.py +++ b/yt_dlp/extractor/oktoberfesttv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/olympics.py b/yt_dlp/extractor/olympics.py index 784f282c7..42ea94905 100644 --- a/yt_dlp/extractor/olympics.py +++ b/yt_dlp/extractor/olympics.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, @@ -56,7 +53,7 @@ class OlympicsReplayIE(InfoExtractor): }) m3u8_url = self._download_json( f'https://olympics.com/tokenGenerator?url={m3u8_url}', uuid, note='Downloading m3u8 url') - formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, uuid, m3u8_id='hls') + formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, uuid, 'mp4', m3u8_id='hls') self._sort_formats(formats) return { diff --git a/yt_dlp/extractor/on24.py b/yt_dlp/extractor/on24.py index d4d824430..779becc70 100644 --- a/yt_dlp/extractor/on24.py +++ b/yt_dlp/extractor/on24.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/once.py b/yt_dlp/extractor/once.py index 3e44b7829..460b82d02 100644 --- a/yt_dlp/extractor/once.py +++ b/yt_dlp/extractor/once.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/ondemandkorea.py b/yt_dlp/extractor/ondemandkorea.py index e933ea2cc..84687ef47 100644 --- a/yt_dlp/extractor/ondemandkorea.py +++ b/yt_dlp/extractor/ondemandkorea.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/onefootball.py b/yt_dlp/extractor/onefootball.py index 826faadd2..41815bef1 100644 --- a/yt_dlp/extractor/onefootball.py +++ b/yt_dlp/extractor/onefootball.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/onet.py b/yt_dlp/extractor/onet.py index 95177a213..ea46d7def 100644 --- a/yt_dlp/extractor/onet.py +++ b/yt_dlp/extractor/onet.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/onionstudios.py b/yt_dlp/extractor/onionstudios.py index cf5c39e66..9776b4d97 100644 --- a/yt_dlp/extractor/onionstudios.py +++ b/yt_dlp/extractor/onionstudios.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/ooyala.py b/yt_dlp/extractor/ooyala.py index 20cfa0a87..77017f08b 100644 --- a/yt_dlp/extractor/ooyala.py +++ b/yt_dlp/extractor/ooyala.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import base64 import re diff --git a/yt_dlp/extractor/opencast.py b/yt_dlp/extractor/opencast.py index cf8d91717..c640224dd 100644 --- a/yt_dlp/extractor/opencast.py +++ b/yt_dlp/extractor/opencast.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/openload.py b/yt_dlp/extractor/openload.py index fe4740aae..61e3a8b86 100644 --- a/yt_dlp/extractor/openload.py +++ b/yt_dlp/extractor/openload.py @@ -1,22 +1,17 @@ -# coding: utf-8 -from __future__ import unicode_literals - +import contextlib import json import os import subprocess import tempfile -from ..compat import ( - compat_urlparse, - compat_kwargs, -) +from ..compat import compat_urlparse from ..utils import ( + ExtractorError, + Popen, check_executable, encodeArgument, - ExtractorError, get_exe_version, is_outdated_version, - Popen, ) @@ -37,13 +32,11 @@ def cookie_to_dict(cookie): cookie_dict['secure'] = cookie.secure if cookie.discard is not None: cookie_dict['discard'] = cookie.discard - try: + with contextlib.suppress(TypeError): if (cookie.has_nonstandard_attr('httpOnly') or cookie.has_nonstandard_attr('httponly') or cookie.has_nonstandard_attr('HttpOnly')): cookie_dict['httponly'] = True - except TypeError: - pass return cookie_dict @@ -51,7 +44,7 @@ def cookie_jar_to_list(cookie_jar): return [cookie_to_dict(cookie) for cookie in cookie_jar] -class PhantomJSwrapper(object): +class PhantomJSwrapper: """PhantomJS wrapper class This class is experimental. @@ -135,10 +128,8 @@ class PhantomJSwrapper(object): def __del__(self): for name in self._TMP_FILE_NAMES: - try: + with contextlib.suppress(OSError, KeyError): os.remove(self._TMP_FILES[name].name) - except (IOError, OSError, KeyError): - pass def _save_cookies(self, url): cookies = cookie_jar_to_list(self.extractor._downloader.cookiejar) @@ -158,7 +149,7 @@ class PhantomJSwrapper(object): cookie['rest'] = {'httpOnly': None} if 'expiry' in cookie: cookie['expire_time'] = cookie['expiry'] - self.extractor._set_cookie(**compat_kwargs(cookie)) + self.extractor._set_cookie(**cookie) def get(self, url, html=None, video_id=None, note=None, note2='Executing JS on webpage', headers={}, jscode='saveAndExit();'): """ @@ -218,9 +209,9 @@ class PhantomJSwrapper(object): f.write(self._TEMPLATE.format(**replaces).encode('utf-8')) if video_id is None: - self.extractor.to_screen('%s' % (note2,)) + self.extractor.to_screen(f'{note2}') else: - self.extractor.to_screen('%s: %s' % (video_id, note2)) + self.extractor.to_screen(f'{video_id}: {note2}') p = Popen( [self.exe, '--ssl-protocol=any', self._TMP_FILES['script'].name], diff --git a/yt_dlp/extractor/openrec.py b/yt_dlp/extractor/openrec.py index 5eb1cdbad..6c1eb8f3a 100644 --- a/yt_dlp/extractor/openrec.py +++ b/yt_dlp/extractor/openrec.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, @@ -38,8 +35,8 @@ class OpenRecBaseIE(InfoExtractor): raise ExtractorError(f'Failed to extract {name} info') formats = list(self._expand_media(video_id, get_first(movie_stores, 'media'))) - if not formats and is_live: - # archived livestreams + if not formats: + # archived livestreams or subscriber-only videos cookies = self._get_cookies('https://www.openrec.tv/') detail = self._download_json( f'https://apiv5.openrec.tv/api/v5/movies/{video_id}/detail', video_id, diff --git a/yt_dlp/extractor/ora.py b/yt_dlp/extractor/ora.py index 422d0b330..09b121422 100644 --- a/yt_dlp/extractor/ora.py +++ b/yt_dlp/extractor/ora.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor from ..compat import compat_urlparse diff --git a/yt_dlp/extractor/orf.py b/yt_dlp/extractor/orf.py index 0628977a0..56309ffcb 100644 --- a/yt_dlp/extractor/orf.py +++ b/yt_dlp/extractor/orf.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import functools import re diff --git a/yt_dlp/extractor/outsidetv.py b/yt_dlp/extractor/outsidetv.py index c5333b08c..b1fcbd6a7 100644 --- a/yt_dlp/extractor/outsidetv.py +++ b/yt_dlp/extractor/outsidetv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/packtpub.py b/yt_dlp/extractor/packtpub.py index 62c52cd6e..51778d8a2 100644 --- a/yt_dlp/extractor/packtpub.py +++ b/yt_dlp/extractor/packtpub.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/palcomp3.py b/yt_dlp/extractor/palcomp3.py index d0a62fb17..4b0801c1a 100644 --- a/yt_dlp/extractor/palcomp3.py +++ b/yt_dlp/extractor/palcomp3.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/pandoratv.py b/yt_dlp/extractor/pandoratv.py index 623005338..3747f31d2 100644 --- a/yt_dlp/extractor/pandoratv.py +++ b/yt_dlp/extractor/pandoratv.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import ( compat_str, diff --git a/yt_dlp/extractor/paramountplus.py b/yt_dlp/extractor/paramountplus.py index 94a9319ea..7987d77c6 100644 --- a/yt_dlp/extractor/paramountplus.py +++ b/yt_dlp/extractor/paramountplus.py @@ -1,4 +1,3 @@ -from __future__ import unicode_literals import itertools from .common import InfoExtractor diff --git a/yt_dlp/extractor/parliamentliveuk.py b/yt_dlp/extractor/parliamentliveuk.py index 974d65482..38cb03164 100644 --- a/yt_dlp/extractor/parliamentliveuk.py +++ b/yt_dlp/extractor/parliamentliveuk.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import uuid diff --git a/yt_dlp/extractor/parlview.py b/yt_dlp/extractor/parlview.py index c85eaa7dc..f31ae576c 100644 --- a/yt_dlp/extractor/parlview.py +++ b/yt_dlp/extractor/parlview.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/patreon.py b/yt_dlp/extractor/patreon.py index 963a0d6fb..cce9843d4 100644 --- a/yt_dlp/extractor/patreon.py +++ b/yt_dlp/extractor/patreon.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools from .common import InfoExtractor diff --git a/yt_dlp/extractor/pbs.py b/yt_dlp/extractor/pbs.py index e48a2b8e0..4e6674e85 100644 --- a/yt_dlp/extractor/pbs.py +++ b/yt_dlp/extractor/pbs.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/pearvideo.py b/yt_dlp/extractor/pearvideo.py index 1d777221c..e76305acd 100644 --- a/yt_dlp/extractor/pearvideo.py +++ b/yt_dlp/extractor/pearvideo.py @@ -1,12 +1,10 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor from ..utils import ( qualities, unified_timestamp, + traverse_obj, ) @@ -39,6 +37,14 @@ class PearVideoIE(InfoExtractor): } for mobj in re.finditer( r'(?P<id>[a-zA-Z]+)Url\s*=\s*(["\'])(?P<url>(?:https?:)?//.+?)\2', webpage)] + if not formats: + info = self._download_json( + 'https://www.pearvideo.com/videoStatus.jsp', video_id=video_id, + query={'contId': video_id}, headers={'Referer': url}) + formats = [{ + 'format_id': k, + 'url': v.replace(info['systemTime'], f'cont-{video_id}') if k == 'srcUrl' else v + } for k, v in traverse_obj(info, ('videoInfo', 'videos'), default={}).items() if v] self._sort_formats(formats) title = self._search_regex( diff --git a/yt_dlp/extractor/peekvids.py b/yt_dlp/extractor/peekvids.py index 4bf68559a..f1c4469d6 100644 --- a/yt_dlp/extractor/peekvids.py +++ b/yt_dlp/extractor/peekvids.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/peertube.py b/yt_dlp/extractor/peertube.py index 9d6b82178..0d3bc18a8 100644 --- a/yt_dlp/extractor/peertube.py +++ b/yt_dlp/extractor/peertube.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import functools import re diff --git a/yt_dlp/extractor/peertv.py b/yt_dlp/extractor/peertv.py index 002d33a88..821abe496 100644 --- a/yt_dlp/extractor/peertv.py +++ b/yt_dlp/extractor/peertv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import js_to_json diff --git a/yt_dlp/extractor/peloton.py b/yt_dlp/extractor/peloton.py index 7d832253f..8e50ffc7f 100644 --- a/yt_dlp/extractor/peloton.py +++ b/yt_dlp/extractor/peloton.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import re diff --git a/yt_dlp/extractor/people.py b/yt_dlp/extractor/people.py index 6ca95715e..c5143c3ed 100644 --- a/yt_dlp/extractor/people.py +++ b/yt_dlp/extractor/people.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/performgroup.py b/yt_dlp/extractor/performgroup.py index c00d39375..824495f40 100644 --- a/yt_dlp/extractor/performgroup.py +++ b/yt_dlp/extractor/performgroup.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import int_or_none diff --git a/yt_dlp/extractor/periscope.py b/yt_dlp/extractor/periscope.py index 1a292b8ac..fc8591a2c 100644 --- a/yt_dlp/extractor/periscope.py +++ b/yt_dlp/extractor/periscope.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/philharmoniedeparis.py b/yt_dlp/extractor/philharmoniedeparis.py index 9f4899c09..22164caaa 100644 --- a/yt_dlp/extractor/philharmoniedeparis.py +++ b/yt_dlp/extractor/philharmoniedeparis.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/phoenix.py b/yt_dlp/extractor/phoenix.py index e3ea01443..5fa133afe 100644 --- a/yt_dlp/extractor/phoenix.py +++ b/yt_dlp/extractor/phoenix.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .youtube import YoutubeIE diff --git a/yt_dlp/extractor/photobucket.py b/yt_dlp/extractor/photobucket.py index 53aebe2d9..71e9a4805 100644 --- a/yt_dlp/extractor/photobucket.py +++ b/yt_dlp/extractor/photobucket.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/piapro.py b/yt_dlp/extractor/piapro.py index ae160623b..d8d9c7801 100644 --- a/yt_dlp/extractor/piapro.py +++ b/yt_dlp/extractor/piapro.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( diff --git a/yt_dlp/extractor/picarto.py b/yt_dlp/extractor/picarto.py index adf21fda8..54999a832 100644 --- a/yt_dlp/extractor/picarto.py +++ b/yt_dlp/extractor/picarto.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/piksel.py b/yt_dlp/extractor/piksel.py index 84c3de2f0..14a540859 100644 --- a/yt_dlp/extractor/piksel.py +++ b/yt_dlp/extractor/piksel.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/pinkbike.py b/yt_dlp/extractor/pinkbike.py index 9f3501f77..313b5cce0 100644 --- a/yt_dlp/extractor/pinkbike.py +++ b/yt_dlp/extractor/pinkbike.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/pinterest.py b/yt_dlp/extractor/pinterest.py index 80e9cd00e..171f9e4eb 100644 --- a/yt_dlp/extractor/pinterest.py +++ b/yt_dlp/extractor/pinterest.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/pixivsketch.py b/yt_dlp/extractor/pixivsketch.py index f0ad0b24a..bfdb8b24e 100644 --- a/yt_dlp/extractor/pixivsketch.py +++ b/yt_dlp/extractor/pixivsketch.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/pladform.py b/yt_dlp/extractor/pladform.py index 99ade85ec..301f5c838 100644 --- a/yt_dlp/extractor/pladform.py +++ b/yt_dlp/extractor/pladform.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/planetmarathi.py b/yt_dlp/extractor/planetmarathi.py index 07ac15b54..03b9d6aaa 100644 --- a/yt_dlp/extractor/planetmarathi.py +++ b/yt_dlp/extractor/planetmarathi.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( try_get, diff --git a/yt_dlp/extractor/platzi.py b/yt_dlp/extractor/platzi.py index 17f52e7f4..29d3210ac 100644 --- a/yt_dlp/extractor/platzi.py +++ b/yt_dlp/extractor/platzi.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_b64decode, diff --git a/yt_dlp/extractor/playfm.py b/yt_dlp/extractor/playfm.py index 4298cbe30..e895ba480 100644 --- a/yt_dlp/extractor/playfm.py +++ b/yt_dlp/extractor/playfm.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/playplustv.py b/yt_dlp/extractor/playplustv.py index cad2c3a0f..05dbaf066 100644 --- a/yt_dlp/extractor/playplustv.py +++ b/yt_dlp/extractor/playplustv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/plays.py b/yt_dlp/extractor/plays.py index ddfc6f148..700dfe407 100644 --- a/yt_dlp/extractor/plays.py +++ b/yt_dlp/extractor/plays.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/playstuff.py b/yt_dlp/extractor/playstuff.py index 5a329957f..b424ba187 100644 --- a/yt_dlp/extractor/playstuff.py +++ b/yt_dlp/extractor/playstuff.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/playtvak.py b/yt_dlp/extractor/playtvak.py index 30c8a599e..f7e5ddbe7 100644 --- a/yt_dlp/extractor/playtvak.py +++ b/yt_dlp/extractor/playtvak.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_urlparse, diff --git a/yt_dlp/extractor/playvid.py b/yt_dlp/extractor/playvid.py index e1c406b6c..5ffefc934 100644 --- a/yt_dlp/extractor/playvid.py +++ b/yt_dlp/extractor/playvid.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/playwire.py b/yt_dlp/extractor/playwire.py index 9c9e597b5..ab7f71493 100644 --- a/yt_dlp/extractor/playwire.py +++ b/yt_dlp/extractor/playwire.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( dict_get, diff --git a/yt_dlp/extractor/pluralsight.py b/yt_dlp/extractor/pluralsight.py index 2a5e0e488..b50152ad8 100644 --- a/yt_dlp/extractor/pluralsight.py +++ b/yt_dlp/extractor/pluralsight.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import collections import json import os diff --git a/yt_dlp/extractor/plutotv.py b/yt_dlp/extractor/plutotv.py index 26aff1af5..6e8f46fa3 100644 --- a/yt_dlp/extractor/plutotv.py +++ b/yt_dlp/extractor/plutotv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re import uuid diff --git a/yt_dlp/extractor/podchaser.py b/yt_dlp/extractor/podchaser.py new file mode 100644 index 000000000..290c48817 --- /dev/null +++ b/yt_dlp/extractor/podchaser.py @@ -0,0 +1,97 @@ +import functools +import json + +from .common import InfoExtractor +from ..utils import ( + OnDemandPagedList, + float_or_none, + str_or_none, + str_to_int, + traverse_obj, + unified_timestamp, +) + + +class PodchaserIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?podchaser\.com/podcasts/[\w-]+-(?P<podcast_id>\d+)(?:/episodes/[\w-]+-(?P<id>\d+))?' + _PAGE_SIZE = 100 + _TESTS = [{ + 'url': 'https://www.podchaser.com/podcasts/cum-town-36924/episodes/ep-285-freeze-me-off-104365585', + 'info_dict': { + 'id': '104365585', + 'title': 'Ep. 285 – freeze me off', + 'description': 'cam ahn', + 'thumbnail': r're:^https?://.*\.jpg$', + 'ext': 'mp3', + 'categories': ['Comedy'], + 'tags': ['comedy', 'dark humor'], + 'series': 'Cum Town', + 'duration': 3708, + 'timestamp': 1636531259, + 'upload_date': '20211110', + 'rating': 4.0 + } + }, { + 'url': 'https://www.podchaser.com/podcasts/the-bone-zone-28853', + 'info_dict': { + 'id': '28853', + 'title': 'The Bone Zone', + 'description': 'Podcast by The Bone Zone', + }, + 'playlist_count': 275 + }, { + 'url': 'https://www.podchaser.com/podcasts/sean-carrolls-mindscape-scienc-699349/episodes', + 'info_dict': { + 'id': '699349', + 'title': 'Sean Carroll\'s Mindscape: Science, Society, Philosophy, Culture, Arts, and Ideas', + 'description': 'md5:2cbd8f4749891a84dc8235342e0b5ff1' + }, + 'playlist_mincount': 225 + }] + + @staticmethod + def _parse_episode(episode, podcast): + return { + 'id': str(episode.get('id')), + 'title': episode.get('title'), + 'description': episode.get('description'), + 'url': episode.get('audio_url'), + 'thumbnail': episode.get('image_url'), + 'duration': str_to_int(episode.get('length')), + 'timestamp': unified_timestamp(episode.get('air_date')), + 'rating': float_or_none(episode.get('rating')), + 'categories': list(set(traverse_obj(podcast, (('summary', None), 'categories', ..., 'text')))), + 'tags': traverse_obj(podcast, ('tags', ..., 'text')), + 'series': podcast.get('title'), + } + + def _call_api(self, path, *args, **kwargs): + return self._download_json(f'https://api.podchaser.com/{path}', *args, **kwargs) + + def _fetch_page(self, podcast_id, podcast, page): + json_response = self._call_api( + 'list/episode', podcast_id, + headers={'Content-Type': 'application/json;charset=utf-8'}, + data=json.dumps({ + 'start': page * self._PAGE_SIZE, + 'count': self._PAGE_SIZE, + 'sort_order': 'SORT_ORDER_RECENT', + 'filters': { + 'podcast_id': podcast_id + }, + 'options': {} + }).encode()) + + for episode in json_response['entities']: + yield self._parse_episode(episode, podcast) + + def _real_extract(self, url): + podcast_id, episode_id = self._match_valid_url(url).group('podcast_id', 'id') + podcast = self._call_api(f'podcasts/{podcast_id}', episode_id or podcast_id) + if not episode_id: + return self.playlist_result( + OnDemandPagedList(functools.partial(self._fetch_page, podcast_id, podcast), self._PAGE_SIZE), + str_or_none(podcast.get('id')), podcast.get('title'), podcast.get('description')) + + episode = self._call_api(f'episodes/{episode_id}', episode_id) + return self._parse_episode(episode, podcast) diff --git a/yt_dlp/extractor/podomatic.py b/yt_dlp/extractor/podomatic.py index 673a3ab94..985bfae9d 100644 --- a/yt_dlp/extractor/podomatic.py +++ b/yt_dlp/extractor/podomatic.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/pokemon.py b/yt_dlp/extractor/pokemon.py index b411390e2..eef0d02ca 100644 --- a/yt_dlp/extractor/pokemon.py +++ b/yt_dlp/extractor/pokemon.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/pokergo.py b/yt_dlp/extractor/pokergo.py index c9e2fed12..5c7baadf2 100644 --- a/yt_dlp/extractor/pokergo.py +++ b/yt_dlp/extractor/pokergo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import base64 from .common import InfoExtractor diff --git a/yt_dlp/extractor/polsatgo.py b/yt_dlp/extractor/polsatgo.py index 1e3f46c07..e44d951e6 100644 --- a/yt_dlp/extractor/polsatgo.py +++ b/yt_dlp/extractor/polsatgo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from uuid import uuid4 import json diff --git a/yt_dlp/extractor/polskieradio.py b/yt_dlp/extractor/polskieradio.py index b2b3eb29c..514753b64 100644 --- a/yt_dlp/extractor/polskieradio.py +++ b/yt_dlp/extractor/polskieradio.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools import json import math diff --git a/yt_dlp/extractor/popcorntimes.py b/yt_dlp/extractor/popcorntimes.py index 5f9d0e720..ed741a07b 100644 --- a/yt_dlp/extractor/popcorntimes.py +++ b/yt_dlp/extractor/popcorntimes.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import ( compat_b64decode, diff --git a/yt_dlp/extractor/popcorntv.py b/yt_dlp/extractor/popcorntv.py index 66d2e5094..77984626f 100644 --- a/yt_dlp/extractor/popcorntv.py +++ b/yt_dlp/extractor/popcorntv.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( extract_attributes, diff --git a/yt_dlp/extractor/porn91.py b/yt_dlp/extractor/porn91.py index 20eac647a..af4a0dc9c 100644 --- a/yt_dlp/extractor/porn91.py +++ b/yt_dlp/extractor/porn91.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( parse_duration, diff --git a/yt_dlp/extractor/porncom.py b/yt_dlp/extractor/porncom.py index 83df22141..2ebd3fa09 100644 --- a/yt_dlp/extractor/porncom.py +++ b/yt_dlp/extractor/porncom.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/pornez.py b/yt_dlp/extractor/pornez.py index 713dc0080..df0e44a69 100644 --- a/yt_dlp/extractor/pornez.py +++ b/yt_dlp/extractor/pornez.py @@ -1,5 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals from .common import InfoExtractor from ..utils import int_or_none diff --git a/yt_dlp/extractor/pornflip.py b/yt_dlp/extractor/pornflip.py index accf45269..26536bc65 100644 --- a/yt_dlp/extractor/pornflip.py +++ b/yt_dlp/extractor/pornflip.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/pornhd.py b/yt_dlp/extractor/pornhd.py index 9dbd72f1d..06a44ddd1 100644 --- a/yt_dlp/extractor/pornhd.py +++ b/yt_dlp/extractor/pornhd.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/pornhub.py b/yt_dlp/extractor/pornhub.py index 17c8c9100..d296ccacb 100644 --- a/yt_dlp/extractor/pornhub.py +++ b/yt_dlp/extractor/pornhub.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import functools import itertools import math diff --git a/yt_dlp/extractor/pornotube.py b/yt_dlp/extractor/pornotube.py index 1b5b9a320..e0960f4c6 100644 --- a/yt_dlp/extractor/pornotube.py +++ b/yt_dlp/extractor/pornotube.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/pornovoisines.py b/yt_dlp/extractor/pornovoisines.py index 18459fc94..96d2da7c7 100644 --- a/yt_dlp/extractor/pornovoisines.py +++ b/yt_dlp/extractor/pornovoisines.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/pornoxo.py b/yt_dlp/extractor/pornoxo.py index 489dc2b25..5104d8a49 100644 --- a/yt_dlp/extractor/pornoxo.py +++ b/yt_dlp/extractor/pornoxo.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( str_to_int, diff --git a/yt_dlp/extractor/presstv.py b/yt_dlp/extractor/presstv.py index bfb2eb71e..26ce74a59 100644 --- a/yt_dlp/extractor/presstv.py +++ b/yt_dlp/extractor/presstv.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import remove_start diff --git a/yt_dlp/extractor/projectveritas.py b/yt_dlp/extractor/projectveritas.py index 9e9867ba5..e4aa4bd35 100644 --- a/yt_dlp/extractor/projectveritas.py +++ b/yt_dlp/extractor/projectveritas.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/prosiebensat1.py b/yt_dlp/extractor/prosiebensat1.py index e89bbfd27..cb5ada1b9 100644 --- a/yt_dlp/extractor/prosiebensat1.py +++ b/yt_dlp/extractor/prosiebensat1.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from hashlib import sha1 diff --git a/yt_dlp/extractor/prx.py b/yt_dlp/extractor/prx.py index 80561b80a..5bb183270 100644 --- a/yt_dlp/extractor/prx.py +++ b/yt_dlp/extractor/prx.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools from .common import InfoExtractor, SearchInfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/puhutv.py b/yt_dlp/extractor/puhutv.py index ca71665e0..a5dac1dff 100644 --- a/yt_dlp/extractor/puhutv.py +++ b/yt_dlp/extractor/puhutv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_HTTPError, diff --git a/yt_dlp/extractor/puls4.py b/yt_dlp/extractor/puls4.py index 80091b85f..3c13d1f56 100644 --- a/yt_dlp/extractor/puls4.py +++ b/yt_dlp/extractor/puls4.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .prosiebensat1 import ProSiebenSat1BaseIE from ..utils import ( unified_strdate, diff --git a/yt_dlp/extractor/pyvideo.py b/yt_dlp/extractor/pyvideo.py index 869619723..7b25166b2 100644 --- a/yt_dlp/extractor/pyvideo.py +++ b/yt_dlp/extractor/pyvideo.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/qqmusic.py b/yt_dlp/extractor/qqmusic.py index 0106d166f..fa2454df4 100644 --- a/yt_dlp/extractor/qqmusic.py +++ b/yt_dlp/extractor/qqmusic.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import random import re import time diff --git a/yt_dlp/extractor/r7.py b/yt_dlp/extractor/r7.py index e2202d603..b459efceb 100644 --- a/yt_dlp/extractor/r7.py +++ b/yt_dlp/extractor/r7.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import int_or_none diff --git a/yt_dlp/extractor/radiko.py b/yt_dlp/extractor/radiko.py index 1e60de153..dbb748715 100644 --- a/yt_dlp/extractor/radiko.py +++ b/yt_dlp/extractor/radiko.py @@ -1,29 +1,22 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re import base64 -import calendar -import datetime +import re +import urllib.parse from .common import InfoExtractor from ..utils import ( ExtractorError, - update_url_query, clean_html, + time_seconds, + try_call, unified_timestamp, + update_url_query, ) -from ..compat import compat_urllib_parse class RadikoBaseIE(InfoExtractor): _FULL_KEY = None def _auth_client(self): - auth_cache = self._downloader.cache.load('radiko', 'auth_data') - if auth_cache: - return auth_cache - _, auth1_handle = self._download_webpage_handle( 'https://radiko.jp/v2/api/auth1', None, 'Downloading authentication page', headers={ @@ -92,8 +85,8 @@ class RadikoBaseIE(InfoExtractor): def _extract_formats(self, video_id, station, is_onair, ft, cursor, auth_token, area_id, query): m3u8_playlist_data = self._download_xml( - 'https://radiko.jp/v3/station/stream/pc_html5/%s.xml' % station, video_id, - note='Downloading m3u8 information') + f'https://radiko.jp/v3/station/stream/pc_html5/{station}.xml', video_id, + note='Downloading stream information') m3u8_urls = m3u8_playlist_data.findall('.//url') formats = [] @@ -105,7 +98,7 @@ class RadikoBaseIE(InfoExtractor): 'station_id': station, **query, 'l': '15', - 'lsid': '77d0678df93a1034659c14d6fc89f018', + 'lsid': '88ecea37e968c1f17d5413312d9f8003', 'type': 'b', }) if playlist_url in found: @@ -115,20 +108,21 @@ class RadikoBaseIE(InfoExtractor): time_to_skip = None if is_onair else cursor - ft + domain = urllib.parse.urlparse(playlist_url).netloc subformats = self._extract_m3u8_formats( playlist_url, video_id, ext='m4a', - live=True, fatal=False, m3u8_id=None, + live=True, fatal=False, m3u8_id=domain, + note=f'Downloading m3u8 information from {domain}', headers={ 'X-Radiko-AreaId': area_id, 'X-Radiko-AuthToken': auth_token, }) for sf in subformats: - domain = sf['format_id'] = compat_urllib_parse.urlparse(sf['url']).netloc - if re.match(r'^[cf]-radiko\.smartstream\.ne\.jp$', domain): + if re.fullmatch(r'[cf]-radiko\.smartstream\.ne\.jp', domain): # Prioritize live radio vs playback based on extractor sf['preference'] = 100 if is_onair else -100 if not is_onair and url_attrib['timefree'] == '1' and time_to_skip: - sf['_ffmpeg_args'] = ['-ss', time_to_skip] + sf['downloader_options'] = {'ffmpeg_args': ['-ss', time_to_skip]} formats.extend(subformats) self._sort_formats(formats) @@ -154,31 +148,29 @@ class RadikoIE(RadikoBaseIE): def _real_extract(self, url): station, video_id = self._match_valid_url(url).groups() vid_int = unified_timestamp(video_id, False) - - auth_token, area_id = self._auth_client() - prog, station_program, ft, radio_begin, radio_end = self._find_program(video_id, station, vid_int) - title = prog.find('title').text - description = clean_html(prog.find('info').text) - station_name = station_program.find('.//name').text - - formats = self._extract_formats( - video_id=video_id, station=station, is_onair=False, - ft=ft, cursor=vid_int, auth_token=auth_token, area_id=area_id, - query={ - 'start_at': radio_begin, - 'ft': radio_begin, - 'end_at': radio_end, - 'to': radio_end, - 'seek': video_id, - }) + auth_cache = self._downloader.cache.load('radiko', 'auth_data') + for attempt in range(2): + auth_token, area_id = (not attempt and auth_cache) or self._auth_client() + formats = self._extract_formats( + video_id=video_id, station=station, is_onair=False, + ft=ft, cursor=vid_int, auth_token=auth_token, area_id=area_id, + query={ + 'start_at': radio_begin, + 'ft': radio_begin, + 'end_at': radio_end, + 'to': radio_end, + 'seek': video_id, + }) + if formats: + break return { 'id': video_id, - 'title': title, - 'description': description, - 'uploader': station_name, + 'title': try_call(lambda: prog.find('title').text), + 'description': clean_html(try_call(lambda: prog.find('info').text)), + 'uploader': try_call(lambda: station_program.find('.//name').text), 'uploader_id': station, 'timestamp': vid_int, 'formats': formats, @@ -208,8 +200,7 @@ class RadikoRadioIE(RadikoBaseIE): auth_token, area_id = self._auth_client() # get current time in JST (GMT+9:00 w/o DST) - vid_now = datetime.datetime.now(datetime.timezone(datetime.timedelta(hours=9))) - vid_now = calendar.timegm(vid_now.timetuple()) + vid_now = time_seconds(hours=9) prog, station_program, ft, _, _ = self._find_program(station, station, vid_now) diff --git a/yt_dlp/extractor/radiobremen.py b/yt_dlp/extractor/radiobremen.py index 2c35f9845..99ba050d0 100644 --- a/yt_dlp/extractor/radiobremen.py +++ b/yt_dlp/extractor/radiobremen.py @@ -1,7 +1,3 @@ -# coding: utf-8 - -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/radiocanada.py b/yt_dlp/extractor/radiocanada.py index 4b4445c30..dd6f899a4 100644 --- a/yt_dlp/extractor/radiocanada.py +++ b/yt_dlp/extractor/radiocanada.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import compat_HTTPError from ..utils import ( diff --git a/yt_dlp/extractor/radiode.py b/yt_dlp/extractor/radiode.py index 038287363..befb0b72b 100644 --- a/yt_dlp/extractor/radiode.py +++ b/yt_dlp/extractor/radiode.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/radiofrance.py b/yt_dlp/extractor/radiofrance.py index 082238bbc..8fef54dab 100644 --- a/yt_dlp/extractor/radiofrance.py +++ b/yt_dlp/extractor/radiofrance.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/radiojavan.py b/yt_dlp/extractor/radiojavan.py index 3f74f0c01..6a6118899 100644 --- a/yt_dlp/extractor/radiojavan.py +++ b/yt_dlp/extractor/radiojavan.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/radiokapital.py b/yt_dlp/extractor/radiokapital.py index 2e93e034f..8f9737ac3 100644 --- a/yt_dlp/extractor/radiokapital.py +++ b/yt_dlp/extractor/radiokapital.py @@ -1,5 +1,3 @@ -# coding: utf-8 - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/radiozet.py b/yt_dlp/extractor/radiozet.py index 2e1ff36c2..67520172e 100644 --- a/yt_dlp/extractor/radiozet.py +++ b/yt_dlp/extractor/radiozet.py @@ -1,4 +1,3 @@ -# coding: utf-8 from .common import InfoExtractor from ..utils import ( traverse_obj, diff --git a/yt_dlp/extractor/rai.py b/yt_dlp/extractor/rai.py index 7c72d60c6..31199e32e 100644 --- a/yt_dlp/extractor/rai.py +++ b/yt_dlp/extractor/rai.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/raywenderlich.py b/yt_dlp/extractor/raywenderlich.py index f04d51f7b..e0e3c3ead 100644 --- a/yt_dlp/extractor/raywenderlich.py +++ b/yt_dlp/extractor/raywenderlich.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/rbmaradio.py b/yt_dlp/extractor/rbmaradio.py index 9642fbbe1..86c63dbb7 100644 --- a/yt_dlp/extractor/rbmaradio.py +++ b/yt_dlp/extractor/rbmaradio.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/rcs.py b/yt_dlp/extractor/rcs.py index ace611bc9..abbc167c0 100644 --- a/yt_dlp/extractor/rcs.py +++ b/yt_dlp/extractor/rcs.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/rcti.py b/yt_dlp/extractor/rcti.py index ac42e58d9..0cfecbc9a 100644 --- a/yt_dlp/extractor/rcti.py +++ b/yt_dlp/extractor/rcti.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import random import time diff --git a/yt_dlp/extractor/rds.py b/yt_dlp/extractor/rds.py index 0c497856e..9a2e0d985 100644 --- a/yt_dlp/extractor/rds.py +++ b/yt_dlp/extractor/rds.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( parse_duration, diff --git a/yt_dlp/extractor/redbulltv.py b/yt_dlp/extractor/redbulltv.py index 756a3666b..2f0e41c5b 100644 --- a/yt_dlp/extractor/redbulltv.py +++ b/yt_dlp/extractor/redbulltv.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import compat_HTTPError from ..utils import ( diff --git a/yt_dlp/extractor/reddit.py b/yt_dlp/extractor/reddit.py index a042a59cc..aabc8dba9 100644 --- a/yt_dlp/extractor/reddit.py +++ b/yt_dlp/extractor/reddit.py @@ -1,4 +1,5 @@ import random +from urllib.parse import urlparse from .common import InfoExtractor from ..utils import ( @@ -19,6 +20,7 @@ class RedditIE(InfoExtractor): 'info_dict': { 'id': 'zv89llsvexdz', 'ext': 'mp4', + 'display_id': '6rrwyj', 'title': 'That small heart attack.', 'thumbnail': r're:^https?://.*\.(?:jpg|png)', 'thumbnails': 'count:4', @@ -158,6 +160,15 @@ class RedditIE(InfoExtractor): 'duration': int_or_none(reddit_video.get('duration')), } + parsed_url = urlparse(video_url) + if parsed_url.netloc == 'v.redd.it': + self.raise_no_formats('This video is processing', expected=True, video_id=video_id) + return { + **info, + 'id': parsed_url.path.split('/')[1], + 'display_id': video_id, + } + # Not hosted on reddit, must continue extraction return { **info, diff --git a/yt_dlp/extractor/redgifs.py b/yt_dlp/extractor/redgifs.py index 55196b768..e3712a1d6 100644 --- a/yt_dlp/extractor/redgifs.py +++ b/yt_dlp/extractor/redgifs.py @@ -1,4 +1,3 @@ -# coding: utf-8 import functools from .common import InfoExtractor diff --git a/yt_dlp/extractor/redtube.py b/yt_dlp/extractor/redtube.py index 7fee54fee..ab7c505da 100644 --- a/yt_dlp/extractor/redtube.py +++ b/yt_dlp/extractor/redtube.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/regiotv.py b/yt_dlp/extractor/regiotv.py index e250a52f0..6114841fb 100644 --- a/yt_dlp/extractor/regiotv.py +++ b/yt_dlp/extractor/regiotv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/rentv.py b/yt_dlp/extractor/rentv.py index 7c8909d95..ab47ee552 100644 --- a/yt_dlp/extractor/rentv.py +++ b/yt_dlp/extractor/rentv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/restudy.py b/yt_dlp/extractor/restudy.py index d47fb45ca..cd3c20d7a 100644 --- a/yt_dlp/extractor/restudy.py +++ b/yt_dlp/extractor/restudy.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/reuters.py b/yt_dlp/extractor/reuters.py index 9dc482d21..1428b7cc9 100644 --- a/yt_dlp/extractor/reuters.py +++ b/yt_dlp/extractor/reuters.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/reverbnation.py b/yt_dlp/extractor/reverbnation.py index 4cb99c244..06b6c3c2f 100644 --- a/yt_dlp/extractor/reverbnation.py +++ b/yt_dlp/extractor/reverbnation.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( qualities, diff --git a/yt_dlp/extractor/rice.py b/yt_dlp/extractor/rice.py index cf2bb1b51..9ca47f3d4 100644 --- a/yt_dlp/extractor/rice.py +++ b/yt_dlp/extractor/rice.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/rmcdecouverte.py b/yt_dlp/extractor/rmcdecouverte.py index 8bfce3416..8d29b302b 100644 --- a/yt_dlp/extractor/rmcdecouverte.py +++ b/yt_dlp/extractor/rmcdecouverte.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from .brightcove import BrightcoveLegacyIE from ..compat import ( diff --git a/yt_dlp/extractor/rockstargames.py b/yt_dlp/extractor/rockstargames.py index cd6904bc9..5f1db0f05 100644 --- a/yt_dlp/extractor/rockstargames.py +++ b/yt_dlp/extractor/rockstargames.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/rokfin.py b/yt_dlp/extractor/rokfin.py index 0fd65db4b..ad53d697e 100644 --- a/yt_dlp/extractor/rokfin.py +++ b/yt_dlp/extractor/rokfin.py @@ -1,26 +1,33 @@ -# coding: utf-8 import itertools +import json +import re +import urllib.parse from datetime import datetime -from .common import InfoExtractor +from .common import InfoExtractor, SearchInfoExtractor from ..utils import ( - determine_ext, ExtractorError, + determine_ext, float_or_none, format_field, int_or_none, str_or_none, traverse_obj, + try_get, + unescapeHTML, unified_timestamp, url_or_none, + urlencode_postdata, ) - _API_BASE_URL = 'https://prod-api-v2.production.rokfin.com/api/v2/public/' class RokfinIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?rokfin\.com/(?P<id>(?P<type>post|stream)/\d+)' + _NETRC_MACHINE = 'rokfin' + _AUTH_BASE = 'https://secure.rokfin.com/auth/realms/rokfin-web/protocol/openid-connect' + _access_mgmt_tokens = {} # OAuth 2.0: RFC 6749, Sec. 1.4-5 _TESTS = [{ 'url': 'https://www.rokfin.com/post/57548/Mitt-Romneys-Crazy-Solution-To-Climate-Change', 'info_dict': { @@ -84,8 +91,7 @@ class RokfinIE(InfoExtractor): def _real_extract(self, url): video_id, video_type = self._match_valid_url(url).group('id', 'type') - - metadata = self._download_json(f'{_API_BASE_URL}{video_id}', video_id) + metadata = self._download_json_using_access_token(f'{_API_BASE_URL}{video_id}', video_id) scheduled = unified_timestamp(metadata.get('scheduledAt')) live_status = ('was_live' if metadata.get('stoppedAt') @@ -160,6 +166,79 @@ class RokfinIE(InfoExtractor): if not raw_comments.get('content') or is_last or (page_n > pages_total if pages_total else is_last is not False): return + def _perform_login(self, username, password): + # https://openid.net/specs/openid-connect-core-1_0.html#CodeFlowAuth (Sec. 3.1) + login_page = self._download_webpage( + f'{self._AUTH_BASE}/auth?client_id=web&redirect_uri=https%3A%2F%2Frokfin.com%2Ffeed&response_mode=fragment&response_type=code&scope=openid', + None, note='loading login page', errnote='error loading login page') + authentication_point_url = unescapeHTML(self._search_regex( + r'<form\s+[^>]+action\s*=\s*"(https://secure\.rokfin\.com/auth/realms/rokfin-web/login-actions/authenticate\?[^"]+)"', + login_page, name='Authentication URL')) + + resp_body = self._download_webpage( + authentication_point_url, None, note='logging in', fatal=False, expected_status=404, + data=urlencode_postdata({'username': username, 'password': password, 'rememberMe': 'off', 'credentialId': ''})) + if not self._authentication_active(): + if re.search(r'(?i)(invalid\s+username\s+or\s+password)', resp_body or ''): + raise ExtractorError('invalid username/password', expected=True) + raise ExtractorError('Login failed') + + urlh = self._request_webpage( + f'{self._AUTH_BASE}/auth', None, + note='granting user authorization', errnote='user authorization rejected by Rokfin', + query={ + 'client_id': 'web', + 'prompt': 'none', + 'redirect_uri': 'https://rokfin.com/silent-check-sso.html', + 'response_mode': 'fragment', + 'response_type': 'code', + 'scope': 'openid', + }) + self._access_mgmt_tokens = self._download_json( + f'{self._AUTH_BASE}/token', None, + note='getting access credentials', errnote='error getting access credentials', + data=urlencode_postdata({ + 'code': urllib.parse.parse_qs(urllib.parse.urldefrag(urlh.geturl()).fragment).get('code')[0], + 'client_id': 'web', + 'grant_type': 'authorization_code', + 'redirect_uri': 'https://rokfin.com/silent-check-sso.html' + })) + + def _authentication_active(self): + return not ( + {'KEYCLOAK_IDENTITY', 'KEYCLOAK_IDENTITY_LEGACY', 'KEYCLOAK_SESSION', 'KEYCLOAK_SESSION_LEGACY'} + - set(self._get_cookies(self._AUTH_BASE))) + + def _get_auth_token(self): + return try_get(self._access_mgmt_tokens, lambda x: ' '.join([x['token_type'], x['access_token']])) + + def _download_json_using_access_token(self, url_or_request, video_id, headers={}, query={}): + assert 'authorization' not in headers + headers = headers.copy() + auth_token = self._get_auth_token() + refresh_token = self._access_mgmt_tokens.get('refresh_token') + if auth_token: + headers['authorization'] = auth_token + + json_string, urlh = self._download_webpage_handle( + url_or_request, video_id, headers=headers, query=query, expected_status=401) + if not auth_token or urlh.code != 401 or refresh_token is None: + return self._parse_json(json_string, video_id) + + self._access_mgmt_tokens = self._download_json( + f'{self._AUTH_BASE}/token', video_id, + note='User authorization expired or canceled by Rokfin. Re-authorizing ...', errnote='Failed to re-authorize', + data=urlencode_postdata({ + 'grant_type': 'refresh_token', + 'refresh_token': refresh_token, + 'client_id': 'web' + })) + headers['authorization'] = self._get_auth_token() + if headers['authorization'] is None: + raise ExtractorError('User authorization lost', expected=True) + + return self._download_json(url_or_request, video_id, headers=headers, query=query) + class RokfinPlaylistBaseIE(InfoExtractor): _TYPES = { @@ -183,6 +262,7 @@ class RokfinPlaylistBaseIE(InfoExtractor): class RokfinStackIE(RokfinPlaylistBaseIE): IE_NAME = 'rokfin:stack' + IE_DESC = 'Rokfin Stacks' _VALID_URL = r'https?://(?:www\.)?rokfin\.com/stack/(?P<id>[^/]+)' _TESTS = [{ 'url': 'https://www.rokfin.com/stack/271/Tulsi-Gabbard-Portsmouth-Townhall-FULL--Feb-9-2020', @@ -200,6 +280,7 @@ class RokfinStackIE(RokfinPlaylistBaseIE): class RokfinChannelIE(RokfinPlaylistBaseIE): IE_NAME = 'rokfin:channel' + IE_DESC = 'Rokfin Channels' _VALID_URL = r'https?://(?:www\.)?rokfin\.com/(?!((feed/?)|(discover/?)|(channels/?))$)(?P<id>[^/]+)/?$' _TESTS = [{ 'url': 'https://rokfin.com/TheConvoCouch', @@ -254,3 +335,76 @@ class RokfinChannelIE(RokfinPlaylistBaseIE): return self.playlist_result( self._entries(channel_id, channel_name, self._TABS[tab]), f'{channel_id}-{tab}', f'{channel_name} - {tab.title()}', str_or_none(channel_info.get('description'))) + + +class RokfinSearchIE(SearchInfoExtractor): + IE_NAME = 'rokfin:search' + IE_DESC = 'Rokfin Search' + _SEARCH_KEY = 'rkfnsearch' + _TYPES = { + 'video': (('id', 'raw'), 'post'), + 'audio': (('id', 'raw'), 'post'), + 'stream': (('content_id', 'raw'), 'stream'), + 'dead_stream': (('content_id', 'raw'), 'stream'), + 'stack': (('content_id', 'raw'), 'stack'), + } + _TESTS = [{ + 'url': 'rkfnsearch5:"zelenko"', + 'playlist_count': 5, + 'info_dict': { + 'id': '"zelenko"', + 'title': '"zelenko"', + } + }] + _db_url = None + _db_access_key = None + + def _real_initialize(self): + self._db_url, self._db_access_key = self._downloader.cache.load(self.ie_key(), 'auth', default=(None, None)) + if not self._db_url: + self._get_db_access_credentials() + + def _search_results(self, query): + total_pages = None + for page_number in itertools.count(1): + search_results = self._run_search_query( + query, data={'query': query, 'page': {'size': 100, 'current': page_number}}, + note=f'Downloading page {page_number}{format_field(total_pages, template=" of ~%s")}') + total_pages = traverse_obj(search_results, ('meta', 'page', 'total_pages'), expected_type=int_or_none) + + for result in search_results.get('results') or []: + video_id_key, video_type = self._TYPES.get(traverse_obj(result, ('content_type', 'raw')), (None, None)) + video_id = traverse_obj(result, video_id_key, expected_type=int_or_none) + if video_id and video_type: + yield self.url_result(url=f'https://rokfin.com/{video_type}/{video_id}') + if not search_results.get('results'): + return + + def _run_search_query(self, video_id, data, **kwargs): + data = json.dumps(data).encode() + for attempt in range(2): + search_results = self._download_json( + self._db_url, video_id, data=data, fatal=(attempt == 1), + headers={'authorization': self._db_access_key}, **kwargs) + if search_results: + return search_results + self.write_debug('Updating access credentials') + self._get_db_access_credentials(video_id) + + def _get_db_access_credentials(self, video_id=None): + auth_data = {'SEARCH_KEY': None, 'ENDPOINT_BASE': None} + notfound_err_page = self._download_webpage( + 'https://rokfin.com/discover', video_id, expected_status=404, note='Downloading home page') + for js_file_path in re.findall(r'<script\b[^>]*\ssrc\s*=\s*"(/static/js/[^">]+)"', notfound_err_page): + js_content = self._download_webpage( + f'https://rokfin.com{js_file_path}', video_id, note='Downloading JavaScript file', fatal=False) + auth_data.update(re.findall( + rf'REACT_APP_({"|".join(auth_data.keys())})\s*:\s*"([^"]+)"', js_content or '')) + if not all(auth_data.values()): + continue + + self._db_url = url_or_none(f'{auth_data["ENDPOINT_BASE"]}/api/as/v1/engines/rokfin-search/search.json') + self._db_access_key = f'Bearer {auth_data["SEARCH_KEY"]}' + self._downloader.cache.store(self.ie_key(), 'auth', (self._db_url, self._db_access_key)) + return + raise ExtractorError('Unable to extract access credentials') diff --git a/yt_dlp/extractor/roosterteeth.py b/yt_dlp/extractor/roosterteeth.py index a55dd4f8b..011dadfaa 100644 --- a/yt_dlp/extractor/roosterteeth.py +++ b/yt_dlp/extractor/roosterteeth.py @@ -1,4 +1,3 @@ -# coding: utf-8 from .common import InfoExtractor from ..compat import compat_HTTPError from ..utils import ( diff --git a/yt_dlp/extractor/rottentomatoes.py b/yt_dlp/extractor/rottentomatoes.py index 14c8e8236..f133c851b 100644 --- a/yt_dlp/extractor/rottentomatoes.py +++ b/yt_dlp/extractor/rottentomatoes.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from .internetvideoarchive import InternetVideoArchiveIE diff --git a/yt_dlp/extractor/rozhlas.py b/yt_dlp/extractor/rozhlas.py index fccf69401..a8189676f 100644 --- a/yt_dlp/extractor/rozhlas.py +++ b/yt_dlp/extractor/rozhlas.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/rtbf.py b/yt_dlp/extractor/rtbf.py index 4b61fdb17..a300a2482 100644 --- a/yt_dlp/extractor/rtbf.py +++ b/yt_dlp/extractor/rtbf.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/rte.py b/yt_dlp/extractor/rte.py index 1fbc72915..93faf1b32 100644 --- a/yt_dlp/extractor/rte.py +++ b/yt_dlp/extractor/rte.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/rtl2.py b/yt_dlp/extractor/rtl2.py index e29171474..afa0d33cf 100644 --- a/yt_dlp/extractor/rtl2.py +++ b/yt_dlp/extractor/rtl2.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/rtlnl.py b/yt_dlp/extractor/rtlnl.py index 9eaa06f25..ed89554ab 100644 --- a/yt_dlp/extractor/rtlnl.py +++ b/yt_dlp/extractor/rtlnl.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/rtnews.py b/yt_dlp/extractor/rtnews.py index 68b6044b6..6be9945f7 100644 --- a/yt_dlp/extractor/rtnews.py +++ b/yt_dlp/extractor/rtnews.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/rtp.py b/yt_dlp/extractor/rtp.py index c165ade78..5928a207a 100644 --- a/yt_dlp/extractor/rtp.py +++ b/yt_dlp/extractor/rtp.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import js_to_json import re diff --git a/yt_dlp/extractor/rtrfm.py b/yt_dlp/extractor/rtrfm.py index 93d51e8ed..7381d8202 100644 --- a/yt_dlp/extractor/rtrfm.py +++ b/yt_dlp/extractor/rtrfm.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/rts.py b/yt_dlp/extractor/rts.py index 865a73024..e5ba1a26b 100644 --- a/yt_dlp/extractor/rts.py +++ b/yt_dlp/extractor/rts.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .srgssr import SRGSSRIE diff --git a/yt_dlp/extractor/rtve.py b/yt_dlp/extractor/rtve.py index 7a1dc6f32..42a602968 100644 --- a/yt_dlp/extractor/rtve.py +++ b/yt_dlp/extractor/rtve.py @@ -1,9 +1,5 @@ -# coding: utf-8 -from __future__ import unicode_literals - import base64 import io -import sys from .common import InfoExtractor from ..compat import ( @@ -20,8 +16,6 @@ from ..utils import ( try_get, ) -_bytes_to_chr = (lambda x: x) if sys.version_info[0] == 2 else (lambda x: map(chr, x)) - class RTVEALaCartaIE(InfoExtractor): IE_NAME = 'rtve.es:alacarta' @@ -90,7 +84,7 @@ class RTVEALaCartaIE(InfoExtractor): alphabet = [] e = 0 d = 0 - for l in _bytes_to_chr(alphabet_data): + for l in alphabet_data.decode('iso-8859-1'): if d == 0: alphabet.append(l) d = e = (e + 1) % 4 @@ -100,7 +94,7 @@ class RTVEALaCartaIE(InfoExtractor): f = 0 e = 3 b = 1 - for letter in _bytes_to_chr(url_data): + for letter in url_data.decode('iso-8859-1'): if f == 0: l = int(letter) * 10 f = 1 diff --git a/yt_dlp/extractor/rtvnh.py b/yt_dlp/extractor/rtvnh.py index 6a00f7007..58af3dda2 100644 --- a/yt_dlp/extractor/rtvnh.py +++ b/yt_dlp/extractor/rtvnh.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ExtractorError diff --git a/yt_dlp/extractor/rtvs.py b/yt_dlp/extractor/rtvs.py index 3ea0f1883..fb06efa4b 100644 --- a/yt_dlp/extractor/rtvs.py +++ b/yt_dlp/extractor/rtvs.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/ruhd.py b/yt_dlp/extractor/ruhd.py index 3c8053a26..abaa3f9ea 100644 --- a/yt_dlp/extractor/ruhd.py +++ b/yt_dlp/extractor/ruhd.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/rule34video.py b/yt_dlp/extractor/rule34video.py index a602a9f33..bb113d822 100644 --- a/yt_dlp/extractor/rule34video.py +++ b/yt_dlp/extractor/rule34video.py @@ -1,5 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals import re from ..utils import parse_duration diff --git a/yt_dlp/extractor/rumble.py b/yt_dlp/extractor/rumble.py index a0d5f88d9..50c383d79 100644 --- a/yt_dlp/extractor/rumble.py +++ b/yt_dlp/extractor/rumble.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools import re diff --git a/yt_dlp/extractor/rutube.py b/yt_dlp/extractor/rutube.py index 2f753b41f..ecfcea939 100644 --- a/yt_dlp/extractor/rutube.py +++ b/yt_dlp/extractor/rutube.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re import itertools diff --git a/yt_dlp/extractor/rutv.py b/yt_dlp/extractor/rutv.py index 0ea8253fa..adf78ddb0 100644 --- a/yt_dlp/extractor/rutv.py +++ b/yt_dlp/extractor/rutv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/ruutu.py b/yt_dlp/extractor/ruutu.py index 5a30e3360..c6d94c100 100644 --- a/yt_dlp/extractor/ruutu.py +++ b/yt_dlp/extractor/ruutu.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import re @@ -41,6 +38,7 @@ class RuutuIE(InfoExtractor): 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 114, 'age_limit': 0, + 'upload_date': '20150508', }, }, { @@ -54,6 +52,9 @@ class RuutuIE(InfoExtractor): 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 40, 'age_limit': 0, + 'upload_date': '20150507', + 'series': 'Superpesis', + 'categories': ['Urheilu'], }, }, { @@ -66,6 +67,8 @@ class RuutuIE(InfoExtractor): 'description': 'md5:7d90f358c47542e3072ff65d7b1bcffe', 'thumbnail': r're:^https?://.*\.jpg$', 'age_limit': 0, + 'upload_date': '20151012', + 'series': 'Läpivalaisu', }, }, # Episode where <SourceFile> is "NOT-USED", but has other @@ -85,6 +88,9 @@ class RuutuIE(InfoExtractor): 'description': 'md5:bbb6963df17dfd0ecd9eb9a61bf14b52', 'thumbnail': r're:^https?://.*\.jpg$', 'age_limit': 0, + 'upload_date': '20190320', + 'series': 'Mysteeritarinat', + 'duration': 1324, }, 'expected_warnings': [ 'HTTP Error 502: Bad Gateway', @@ -129,14 +135,30 @@ class RuutuIE(InfoExtractor): _API_BASE = 'https://gatling.nelonenmedia.fi' @classmethod - def _extract_url(cls, webpage): + def _extract_urls(cls, webpage): + # nelonen.fi settings = try_call( lambda: json.loads(re.search( r'jQuery\.extend\(Drupal\.settings, ({.+?})\);', webpage).group(1), strict=False)) - video_id = traverse_obj(settings, ( - 'mediaCrossbowSettings', 'file', 'field_crossbow_video_id', 'und', 0, 'value')) - if video_id: - return f'http://www.ruutu.fi/video/{video_id}' + if settings: + video_id = traverse_obj(settings, ( + 'mediaCrossbowSettings', 'file', 'field_crossbow_video_id', 'und', 0, 'value')) + if video_id: + return [f'http://www.ruutu.fi/video/{video_id}'] + # hs.fi and is.fi + settings = try_call( + lambda: json.loads(re.search( + '(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>', + webpage).group(1), strict=False)) + if settings: + video_ids = set(traverse_obj(settings, ( + 'props', 'pageProps', 'page', 'assetData', 'splitBody', ..., 'video', 'sourceId')) or []) + if video_ids: + return [f'http://www.ruutu.fi/video/{v}' for v in video_ids] + video_id = traverse_obj(settings, ( + 'props', 'pageProps', 'page', 'assetData', 'mainVideo', 'sourceId')) + if video_id: + return [f'http://www.ruutu.fi/video/{video_id}'] def _real_extract(self, url): video_id = self._match_id(url) @@ -209,10 +231,10 @@ class RuutuIE(InfoExtractor): extract_formats(video_xml.find('./Clip')) def pv(name): - node = find_xpath_attr( - video_xml, './Clip/PassthroughVariables/variable', 'name', name) - if node is not None: - return node.get('value') + value = try_call(lambda: find_xpath_attr( + video_xml, './Clip/PassthroughVariables/variable', 'name', name).get('value')) + if value != 'NA': + return value or None if not formats: if (not self.get_param('allow_unplayable_formats') @@ -237,6 +259,6 @@ class RuutuIE(InfoExtractor): 'series': pv('series_name'), 'season_number': int_or_none(pv('season_number')), 'episode_number': int_or_none(pv('episode_number')), - 'categories': themes.split(',') if themes else [], + 'categories': themes.split(',') if themes else None, 'formats': formats, } diff --git a/yt_dlp/extractor/ruv.py b/yt_dlp/extractor/ruv.py index d806ed068..12499d6ca 100644 --- a/yt_dlp/extractor/ruv.py +++ b/yt_dlp/extractor/ruv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/safari.py b/yt_dlp/extractor/safari.py index 7b4571daa..450a661e9 100644 --- a/yt_dlp/extractor/safari.py +++ b/yt_dlp/extractor/safari.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import re diff --git a/yt_dlp/extractor/saitosan.py b/yt_dlp/extractor/saitosan.py index 621335ca0..d2f60e92f 100644 --- a/yt_dlp/extractor/saitosan.py +++ b/yt_dlp/extractor/saitosan.py @@ -1,7 +1,3 @@ -# coding: utf-8 - -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ExtractorError, try_get diff --git a/yt_dlp/extractor/samplefocus.py b/yt_dlp/extractor/samplefocus.py index 806c3c354..e9f5c227b 100644 --- a/yt_dlp/extractor/samplefocus.py +++ b/yt_dlp/extractor/samplefocus.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/sapo.py b/yt_dlp/extractor/sapo.py index df202a3a4..9a601a01c 100644 --- a/yt_dlp/extractor/sapo.py +++ b/yt_dlp/extractor/sapo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/savefrom.py b/yt_dlp/extractor/savefrom.py index 98efdc2a4..9c9e74b6d 100644 --- a/yt_dlp/extractor/savefrom.py +++ b/yt_dlp/extractor/savefrom.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import os.path from .common import InfoExtractor diff --git a/yt_dlp/extractor/sbs.py b/yt_dlp/extractor/sbs.py index 4090f6385..711524406 100644 --- a/yt_dlp/extractor/sbs.py +++ b/yt_dlp/extractor/sbs.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( smuggle_url, diff --git a/yt_dlp/extractor/screencast.py b/yt_dlp/extractor/screencast.py index 69a0d01f3..e3dbaab69 100644 --- a/yt_dlp/extractor/screencast.py +++ b/yt_dlp/extractor/screencast.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_parse_qs, diff --git a/yt_dlp/extractor/screencastomatic.py b/yt_dlp/extractor/screencastomatic.py index 0afdc1715..f2f281f47 100644 --- a/yt_dlp/extractor/screencastomatic.py +++ b/yt_dlp/extractor/screencastomatic.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( get_element_by_class, diff --git a/yt_dlp/extractor/scrippsnetworks.py b/yt_dlp/extractor/scrippsnetworks.py index 84918b67f..c3cee6e4a 100644 --- a/yt_dlp/extractor/scrippsnetworks.py +++ b/yt_dlp/extractor/scrippsnetworks.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import hashlib diff --git a/yt_dlp/extractor/scte.py b/yt_dlp/extractor/scte.py index 7215cf5d1..d839ffcde 100644 --- a/yt_dlp/extractor/scte.py +++ b/yt_dlp/extractor/scte.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/seeker.py b/yt_dlp/extractor/seeker.py index e5c18c7a5..65eb16a09 100644 --- a/yt_dlp/extractor/seeker.py +++ b/yt_dlp/extractor/seeker.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/senategov.py b/yt_dlp/extractor/senategov.py index b295184a1..bced14328 100644 --- a/yt_dlp/extractor/senategov.py +++ b/yt_dlp/extractor/senategov.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/sendtonews.py b/yt_dlp/extractor/sendtonews.py index 858547b54..cf4b93d45 100644 --- a/yt_dlp/extractor/sendtonews.py +++ b/yt_dlp/extractor/sendtonews.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/servus.py b/yt_dlp/extractor/servus.py index 1610ddc2c..ac030ea41 100644 --- a/yt_dlp/extractor/servus.py +++ b/yt_dlp/extractor/servus.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/sevenplus.py b/yt_dlp/extractor/sevenplus.py index 9867961f0..8e95bc230 100644 --- a/yt_dlp/extractor/sevenplus.py +++ b/yt_dlp/extractor/sevenplus.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import re diff --git a/yt_dlp/extractor/sexu.py b/yt_dlp/extractor/sexu.py index 3df51520b..000f7e166 100644 --- a/yt_dlp/extractor/sexu.py +++ b/yt_dlp/extractor/sexu.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/seznamzpravy.py b/yt_dlp/extractor/seznamzpravy.py index eef4975cb..891bfcfee 100644 --- a/yt_dlp/extractor/seznamzpravy.py +++ b/yt_dlp/extractor/seznamzpravy.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/shahid.py b/yt_dlp/extractor/shahid.py index ab45d9ce4..53ca86b73 100644 --- a/yt_dlp/extractor/shahid.py +++ b/yt_dlp/extractor/shahid.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import math import re diff --git a/yt_dlp/extractor/shared.py b/yt_dlp/extractor/shared.py index 93ab2a167..5bc097b0d 100644 --- a/yt_dlp/extractor/shared.py +++ b/yt_dlp/extractor/shared.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_b64decode, diff --git a/yt_dlp/extractor/shemaroome.py b/yt_dlp/extractor/shemaroome.py index 45c12915a..c0780abe2 100644 --- a/yt_dlp/extractor/shemaroome.py +++ b/yt_dlp/extractor/shemaroome.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..aes import aes_cbc_decrypt, unpad_pkcs7 from ..compat import ( diff --git a/yt_dlp/extractor/showroomlive.py b/yt_dlp/extractor/showroomlive.py index 1aada69ac..cd681a035 100644 --- a/yt_dlp/extractor/showroomlive.py +++ b/yt_dlp/extractor/showroomlive.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/simplecast.py b/yt_dlp/extractor/simplecast.py index 857e9414f..ecbb6123b 100644 --- a/yt_dlp/extractor/simplecast.py +++ b/yt_dlp/extractor/simplecast.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/sina.py b/yt_dlp/extractor/sina.py index b62b0c3e5..d30d57d85 100644 --- a/yt_dlp/extractor/sina.py +++ b/yt_dlp/extractor/sina.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( HEADRequest, diff --git a/yt_dlp/extractor/sixplay.py b/yt_dlp/extractor/sixplay.py index fd747f59b..b7b7d7d7f 100644 --- a/yt_dlp/extractor/sixplay.py +++ b/yt_dlp/extractor/sixplay.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import ( compat_str, diff --git a/yt_dlp/extractor/skeb.py b/yt_dlp/extractor/skeb.py index 81aecb311..e02f8cef0 100644 --- a/yt_dlp/extractor/skeb.py +++ b/yt_dlp/extractor/skeb.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ExtractorError, determine_ext, parse_qs, traverse_obj diff --git a/yt_dlp/extractor/sky.py b/yt_dlp/extractor/sky.py index ad1e62d88..0a8b6cc76 100644 --- a/yt_dlp/extractor/sky.py +++ b/yt_dlp/extractor/sky.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/skyit.py b/yt_dlp/extractor/skyit.py index ddb43c075..438fb60e3 100644 --- a/yt_dlp/extractor/skyit.py +++ b/yt_dlp/extractor/skyit.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_parse_qs, diff --git a/yt_dlp/extractor/skylinewebcams.py b/yt_dlp/extractor/skylinewebcams.py index 47bbb7632..4292bb2ae 100644 --- a/yt_dlp/extractor/skylinewebcams.py +++ b/yt_dlp/extractor/skylinewebcams.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/skynewsarabia.py b/yt_dlp/extractor/skynewsarabia.py index fffc9aa22..6264b04bb 100644 --- a/yt_dlp/extractor/skynewsarabia.py +++ b/yt_dlp/extractor/skynewsarabia.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/skynewsau.py b/yt_dlp/extractor/skynewsau.py index 8e079ee31..43a9c82cf 100644 --- a/yt_dlp/extractor/skynewsau.py +++ b/yt_dlp/extractor/skynewsau.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( try_get, diff --git a/yt_dlp/extractor/slideshare.py b/yt_dlp/extractor/slideshare.py index 9b3ad0ad4..ab9dad0ec 100644 --- a/yt_dlp/extractor/slideshare.py +++ b/yt_dlp/extractor/slideshare.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/slideslive.py b/yt_dlp/extractor/slideslive.py index df6084647..72ca56057 100644 --- a/yt_dlp/extractor/slideslive.py +++ b/yt_dlp/extractor/slideslive.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( bool_or_none, diff --git a/yt_dlp/extractor/slutload.py b/yt_dlp/extractor/slutload.py index 661f9e59d..8e6e89c9a 100644 --- a/yt_dlp/extractor/slutload.py +++ b/yt_dlp/extractor/slutload.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/snotr.py b/yt_dlp/extractor/snotr.py index 0bb548255..6889f1929 100644 --- a/yt_dlp/extractor/snotr.py +++ b/yt_dlp/extractor/snotr.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( parse_duration, diff --git a/yt_dlp/extractor/sohu.py b/yt_dlp/extractor/sohu.py index 3bff5c595..c3a135955 100644 --- a/yt_dlp/extractor/sohu.py +++ b/yt_dlp/extractor/sohu.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/sonyliv.py b/yt_dlp/extractor/sonyliv.py index 5b6849fc9..17d28478f 100644 --- a/yt_dlp/extractor/sonyliv.py +++ b/yt_dlp/extractor/sonyliv.py @@ -1,7 +1,5 @@ -# coding: utf-8 -from __future__ import unicode_literals - import datetime +import json import math import random import time @@ -85,21 +83,32 @@ class SonyLIVIE(InfoExtractor): raise ExtractorError(f'Invalid username/password; {self._LOGIN_HINT}') self.report_login() - data = '''{"mobileNumber":"%s","channelPartnerID":"MSMIND","country":"IN","timestamp":"%s", - "otpSize":6,"loginType":"REGISTERORSIGNIN","isMobileMandatory":true} - ''' % (username, datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%MZ")) otp_request_json = self._download_json( 'https://apiv2.sonyliv.com/AGL/1.6/A/ENG/WEB/IN/HR/CREATEOTP-V2', - None, note='Sending OTP', data=data.encode(), headers=self._HEADERS) + None, note='Sending OTP', headers=self._HEADERS, data=json.dumps({ + 'mobileNumber': username, + 'channelPartnerID': 'MSMIND', + 'country': 'IN', + 'timestamp': datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%MZ'), + 'otpSize': 6, + 'loginType': 'REGISTERORSIGNIN', + 'isMobileMandatory': True, + }).encode()) if otp_request_json['resultCode'] == 'KO': raise ExtractorError(otp_request_json['message'], expected=True) - otp_code = self._get_tfa_info('OTP') - data = '''{"channelPartnerID":"MSMIND","mobileNumber":"%s","country":"IN","otp":"%s", - "dmaId":"IN","ageConfirmation":true,"timestamp":"%s","isMobileMandatory":true} - ''' % (username, otp_code, datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%MZ")) + otp_verify_json = self._download_json( 'https://apiv2.sonyliv.com/AGL/2.0/A/ENG/WEB/IN/HR/CONFIRMOTP-V2', - None, note='Verifying OTP', data=data.encode(), headers=self._HEADERS) + None, note='Verifying OTP', headers=self._HEADERS, data=json.dumps({ + 'channelPartnerID': 'MSMIND', + 'mobileNumber': username, + 'country': 'IN', + 'otp': self._get_tfa_info('OTP'), + 'dmaId': 'IN', + 'ageConfirmation': True, + 'timestamp': datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%MZ'), + 'isMobileMandatory': True, + }).encode()) if otp_verify_json['resultCode'] == 'KO': raise ExtractorError(otp_request_json['message'], expected=True) self._HEADERS['authorization'] = otp_verify_json['resultObj']['accessToken'] diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py index bbc79c2be..6dfa50c60 100644 --- a/yt_dlp/extractor/soundcloud.py +++ b/yt_dlp/extractor/soundcloud.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools import re import json @@ -12,7 +9,6 @@ from .common import ( ) from ..compat import ( compat_HTTPError, - compat_kwargs, compat_str, ) from ..utils import ( @@ -96,7 +92,7 @@ class SoundcloudBaseIE(InfoExtractor): query['client_id'] = self._CLIENT_ID kwargs['query'] = query try: - return super()._download_json(*args, **compat_kwargs(kwargs)) + return super()._download_json(*args, **kwargs) except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403): self._store_client_id(None) diff --git a/yt_dlp/extractor/soundgasm.py b/yt_dlp/extractor/soundgasm.py index d608eb7a7..9e59c7c0e 100644 --- a/yt_dlp/extractor/soundgasm.py +++ b/yt_dlp/extractor/soundgasm.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/southpark.py b/yt_dlp/extractor/southpark.py index 942a52dcf..855f1d6d3 100644 --- a/yt_dlp/extractor/southpark.py +++ b/yt_dlp/extractor/southpark.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .mtv import MTVServicesInfoExtractor diff --git a/yt_dlp/extractor/sovietscloset.py b/yt_dlp/extractor/sovietscloset.py index 4bc2263f0..fc5a492a6 100644 --- a/yt_dlp/extractor/sovietscloset.py +++ b/yt_dlp/extractor/sovietscloset.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( try_get, diff --git a/yt_dlp/extractor/spankbang.py b/yt_dlp/extractor/spankbang.py index dd849ae13..1aa8eaba1 100644 --- a/yt_dlp/extractor/spankbang.py +++ b/yt_dlp/extractor/spankbang.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/spankwire.py b/yt_dlp/extractor/spankwire.py index e97c1d23e..603f17e9d 100644 --- a/yt_dlp/extractor/spankwire.py +++ b/yt_dlp/extractor/spankwire.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/spiegel.py b/yt_dlp/extractor/spiegel.py index 58f2ed353..3701e295a 100644 --- a/yt_dlp/extractor/spiegel.py +++ b/yt_dlp/extractor/spiegel.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from .jwplatform import JWPlatformIE diff --git a/yt_dlp/extractor/spiegeltv.py b/yt_dlp/extractor/spiegeltv.py deleted file mode 100644 index 6ccf4c342..000000000 --- a/yt_dlp/extractor/spiegeltv.py +++ /dev/null @@ -1,17 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from .nexx import NexxIE - - -class SpiegeltvIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?spiegel\.tv/videos/(?P<id>\d+)' - _TEST = { - 'url': 'http://www.spiegel.tv/videos/161681-flug-mh370/', - 'only_matching': True, - } - - def _real_extract(self, url): - return self.url_result( - 'https://api.nexx.cloud/v3/748/videos/byid/%s' - % self._match_id(url), ie=NexxIE.ie_key()) diff --git a/yt_dlp/extractor/spike.py b/yt_dlp/extractor/spike.py index 5805f3d44..5c1c78d8f 100644 --- a/yt_dlp/extractor/spike.py +++ b/yt_dlp/extractor/spike.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .mtv import MTVServicesInfoExtractor diff --git a/yt_dlp/extractor/sport5.py b/yt_dlp/extractor/sport5.py index 35c57d62a..f4ac98b6e 100644 --- a/yt_dlp/extractor/sport5.py +++ b/yt_dlp/extractor/sport5.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ExtractorError diff --git a/yt_dlp/extractor/sportbox.py b/yt_dlp/extractor/sportbox.py index b9017fd2a..1041cc7d1 100644 --- a/yt_dlp/extractor/sportbox.py +++ b/yt_dlp/extractor/sportbox.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/sportdeutschland.py b/yt_dlp/extractor/sportdeutschland.py index 15b488ab7..75074b310 100644 --- a/yt_dlp/extractor/sportdeutschland.py +++ b/yt_dlp/extractor/sportdeutschland.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/spotify.py b/yt_dlp/extractor/spotify.py index 826f98cff..a2068a1b6 100644 --- a/yt_dlp/extractor/spotify.py +++ b/yt_dlp/extractor/spotify.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import re @@ -22,7 +19,7 @@ class SpotifyBaseIE(InfoExtractor): 'MinimalShow': '13ee079672fad3f858ea45a55eb109553b4fb0969ed793185b2e34cbb6ee7cc0', 'ShowEpisodes': 'e0e5ce27bd7748d2c59b4d44ba245a8992a05be75d6fabc3b20753fc8857444d', } - _VALID_URL_TEMPL = r'https?://open\.spotify\.com/%s/(?P<id>[^/?&#]+)' + _VALID_URL_TEMPL = r'https?://open\.spotify\.com/(?:embed-podcast/|embed/|)%s/(?P<id>[^/?&#]+)' def _real_initialize(self): self._ACCESS_TOKEN = self._download_json( @@ -96,11 +93,18 @@ class SpotifyBaseIE(InfoExtractor): 'series': series, } + @classmethod + def _extract_embed_urls(cls, webpage): + return re.findall( + r'<iframe[^>]+src="(https?://open\.spotify.com/embed/[^"]+)"', + webpage) + class SpotifyIE(SpotifyBaseIE): IE_NAME = 'spotify' + IE_DESC = 'Spotify episodes' _VALID_URL = SpotifyBaseIE._VALID_URL_TEMPL % 'episode' - _TEST = { + _TESTS = [{ 'url': 'https://open.spotify.com/episode/4Z7GAJ50bgctf6uclHlWKo', 'md5': '74010a1e3fa4d9e1ab3aa7ad14e42d3b', 'info_dict': { @@ -112,7 +116,10 @@ class SpotifyIE(SpotifyBaseIE): 'release_date': '20201217', 'series': "The Guardian's Audio Long Reads", } - } + }, { + 'url': 'https://open.spotify.com/embed/episode/4TvCsKKs2thXmarHigWvXE?si=7eatS8AbQb6RxqO2raIuWA', + 'only_matching': True, + }] def _real_extract(self, url): episode_id = self._match_id(url) @@ -125,6 +132,7 @@ class SpotifyIE(SpotifyBaseIE): class SpotifyShowIE(SpotifyBaseIE): IE_NAME = 'spotify:show' + IE_DESC = 'Spotify shows' _VALID_URL = SpotifyBaseIE._VALID_URL_TEMPL % 'show' _TEST = { 'url': 'https://open.spotify.com/show/4PM9Ke6l66IRNpottHKV9M', diff --git a/yt_dlp/extractor/spreaker.py b/yt_dlp/extractor/spreaker.py index 6c7e40ae4..36a9bd291 100644 --- a/yt_dlp/extractor/spreaker.py +++ b/yt_dlp/extractor/spreaker.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools from .common import InfoExtractor diff --git a/yt_dlp/extractor/springboardplatform.py b/yt_dlp/extractor/springboardplatform.py index 49ac1f559..8e156bf1a 100644 --- a/yt_dlp/extractor/springboardplatform.py +++ b/yt_dlp/extractor/springboardplatform.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/sprout.py b/yt_dlp/extractor/sprout.py index e243732f2..444a6c270 100644 --- a/yt_dlp/extractor/sprout.py +++ b/yt_dlp/extractor/sprout.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .adobepass import AdobePassIE from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/srgssr.py b/yt_dlp/extractor/srgssr.py index f9919816d..6dd312985 100644 --- a/yt_dlp/extractor/srgssr.py +++ b/yt_dlp/extractor/srgssr.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/srmediathek.py b/yt_dlp/extractor/srmediathek.py index 359dadaa3..3cc39870f 100644 --- a/yt_dlp/extractor/srmediathek.py +++ b/yt_dlp/extractor/srmediathek.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .ard import ARDMediathekBaseIE from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/stanfordoc.py b/yt_dlp/extractor/stanfordoc.py index 0003075ac..be0f4afc1 100644 --- a/yt_dlp/extractor/stanfordoc.py +++ b/yt_dlp/extractor/stanfordoc.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/startv.py b/yt_dlp/extractor/startv.py index 411320ede..bb6e8f1ea 100644 --- a/yt_dlp/extractor/startv.py +++ b/yt_dlp/extractor/startv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_str, diff --git a/yt_dlp/extractor/steam.py b/yt_dlp/extractor/steam.py index 4ed0fb592..ab22fdbc6 100644 --- a/yt_dlp/extractor/steam.py +++ b/yt_dlp/extractor/steam.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/stitcher.py b/yt_dlp/extractor/stitcher.py index 822782507..2fd200f87 100644 --- a/yt_dlp/extractor/stitcher.py +++ b/yt_dlp/extractor/stitcher.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/storyfire.py b/yt_dlp/extractor/storyfire.py index e18a59a49..716190220 100644 --- a/yt_dlp/extractor/storyfire.py +++ b/yt_dlp/extractor/storyfire.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import functools from .common import InfoExtractor diff --git a/yt_dlp/extractor/streamable.py b/yt_dlp/extractor/streamable.py index 808129649..a2935b04b 100644 --- a/yt_dlp/extractor/streamable.py +++ b/yt_dlp/extractor/streamable.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/streamanity.py b/yt_dlp/extractor/streamanity.py index 2e2d5eedf..f8c37c0dd 100644 --- a/yt_dlp/extractor/streamanity.py +++ b/yt_dlp/extractor/streamanity.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/streamcloud.py b/yt_dlp/extractor/streamcloud.py index b97bb4374..728980921 100644 --- a/yt_dlp/extractor/streamcloud.py +++ b/yt_dlp/extractor/streamcloud.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/streamcz.py b/yt_dlp/extractor/streamcz.py index 4cb9923e2..85fc3a3c3 100644 --- a/yt_dlp/extractor/streamcz.py +++ b/yt_dlp/extractor/streamcz.py @@ -1,4 +1,3 @@ -# coding: utf-8 import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/streamff.py b/yt_dlp/extractor/streamff.py index 6b190bb3b..93c42942c 100644 --- a/yt_dlp/extractor/streamff.py +++ b/yt_dlp/extractor/streamff.py @@ -1,4 +1,3 @@ -# coding: utf-8 from .common import InfoExtractor from ..utils import int_or_none, parse_iso8601 diff --git a/yt_dlp/extractor/streetvoice.py b/yt_dlp/extractor/streetvoice.py index f21681ae7..a32c8bc37 100644 --- a/yt_dlp/extractor/streetvoice.py +++ b/yt_dlp/extractor/streetvoice.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/stretchinternet.py b/yt_dlp/extractor/stretchinternet.py index ec08eae55..e438dee11 100644 --- a/yt_dlp/extractor/stretchinternet.py +++ b/yt_dlp/extractor/stretchinternet.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/stripchat.py b/yt_dlp/extractor/stripchat.py index 0d4a0ce4c..a7c7b0649 100644 --- a/yt_dlp/extractor/stripchat.py +++ b/yt_dlp/extractor/stripchat.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_str, diff --git a/yt_dlp/extractor/stv.py b/yt_dlp/extractor/stv.py index ba5661d74..618dc4329 100644 --- a/yt_dlp/extractor/stv.py +++ b/yt_dlp/extractor/stv.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( compat_str, diff --git a/yt_dlp/extractor/sunporno.py b/yt_dlp/extractor/sunporno.py index 59b77bf92..19498701c 100644 --- a/yt_dlp/extractor/sunporno.py +++ b/yt_dlp/extractor/sunporno.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/sverigesradio.py b/yt_dlp/extractor/sverigesradio.py index aa0691f0d..4a4b5cf7e 100644 --- a/yt_dlp/extractor/sverigesradio.py +++ b/yt_dlp/extractor/sverigesradio.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/svt.py b/yt_dlp/extractor/svt.py index 8ca62e370..e0c436b67 100644 --- a/yt_dlp/extractor/svt.py +++ b/yt_dlp/extractor/svt.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/swrmediathek.py b/yt_dlp/extractor/swrmediathek.py index 0f615979e..deebdd1a4 100644 --- a/yt_dlp/extractor/swrmediathek.py +++ b/yt_dlp/extractor/swrmediathek.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( parse_duration, diff --git a/yt_dlp/extractor/syfy.py b/yt_dlp/extractor/syfy.py index def7e5a2c..c79d27a0d 100644 --- a/yt_dlp/extractor/syfy.py +++ b/yt_dlp/extractor/syfy.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .adobepass import AdobePassIE from ..utils import ( update_url_query, diff --git a/yt_dlp/extractor/sztvhu.py b/yt_dlp/extractor/sztvhu.py index cfad33146..1cbc2a3cf 100644 --- a/yt_dlp/extractor/sztvhu.py +++ b/yt_dlp/extractor/sztvhu.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/tagesschau.py b/yt_dlp/extractor/tagesschau.py index 6e03d0a7d..9b9513f07 100644 --- a/yt_dlp/extractor/tagesschau.py +++ b/yt_dlp/extractor/tagesschau.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/tass.py b/yt_dlp/extractor/tass.py index 6d336da78..d20dacfc1 100644 --- a/yt_dlp/extractor/tass.py +++ b/yt_dlp/extractor/tass.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/tastytrade.py b/yt_dlp/extractor/tastytrade.py deleted file mode 100644 index 7fe96bd5f..000000000 --- a/yt_dlp/extractor/tastytrade.py +++ /dev/null @@ -1,43 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from .ooyala import OoyalaIE - - -class TastyTradeIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?tastytrade\.com/tt/shows/[^/]+/episodes/(?P<id>[^/?#&]+)' - - _TESTS = [{ - 'url': 'https://www.tastytrade.com/tt/shows/market-measures/episodes/correlation-in-short-volatility-06-28-2017', - 'info_dict': { - 'id': 'F3bnlzbToeI6pLEfRyrlfooIILUjz4nM', - 'ext': 'mp4', - 'title': 'A History of Teaming', - 'description': 'md5:2a9033db8da81f2edffa4c99888140b3', - 'duration': 422.255, - }, - 'params': { - 'skip_download': True, - }, - 'add_ie': ['Ooyala'], - }, { - 'url': 'https://www.tastytrade.com/tt/shows/daily-dose/episodes/daily-dose-06-30-2017', - 'only_matching': True, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - - ooyala_code = self._search_regex( - r'data-media-id=(["\'])(?P<code>(?:(?!\1).)+)\1', - webpage, 'ooyala code', group='code') - - info = self._search_json_ld(webpage, display_id, fatal=False) - info.update({ - '_type': 'url_transparent', - 'ie_key': OoyalaIE.ie_key(), - 'url': 'ooyala:%s' % ooyala_code, - 'display_id': display_id, - }) - return info diff --git a/yt_dlp/extractor/tbs.py b/yt_dlp/extractor/tbs.py index c7d62ff4e..808c6c73d 100644 --- a/yt_dlp/extractor/tbs.py +++ b/yt_dlp/extractor/tbs.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .turner import TurnerBaseIE diff --git a/yt_dlp/extractor/tdslifeway.py b/yt_dlp/extractor/tdslifeway.py index 101c6ee31..3623a68c8 100644 --- a/yt_dlp/extractor/tdslifeway.py +++ b/yt_dlp/extractor/tdslifeway.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/teachable.py b/yt_dlp/extractor/teachable.py index 232eaa521..e480d7610 100644 --- a/yt_dlp/extractor/teachable.py +++ b/yt_dlp/extractor/teachable.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/teachertube.py b/yt_dlp/extractor/teachertube.py index e22f0114c..2bf836abd 100644 --- a/yt_dlp/extractor/teachertube.py +++ b/yt_dlp/extractor/teachertube.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/teachingchannel.py b/yt_dlp/extractor/teachingchannel.py index 624cdb3ad..275f6d1f9 100644 --- a/yt_dlp/extractor/teachingchannel.py +++ b/yt_dlp/extractor/teachingchannel.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/teamcoco.py b/yt_dlp/extractor/teamcoco.py index 5793b711f..840702ed9 100644 --- a/yt_dlp/extractor/teamcoco.py +++ b/yt_dlp/extractor/teamcoco.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .turner import TurnerBaseIE diff --git a/yt_dlp/extractor/teamtreehouse.py b/yt_dlp/extractor/teamtreehouse.py index 64522ec4c..dd802db5b 100644 --- a/yt_dlp/extractor/teamtreehouse.py +++ b/yt_dlp/extractor/teamtreehouse.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/techtalks.py b/yt_dlp/extractor/techtalks.py index 78f07319b..d37de360b 100644 --- a/yt_dlp/extractor/techtalks.py +++ b/yt_dlp/extractor/techtalks.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/tele13.py b/yt_dlp/extractor/tele13.py index f8a27550e..8e35bc85f 100644 --- a/yt_dlp/extractor/tele13.py +++ b/yt_dlp/extractor/tele13.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from .youtube import YoutubeIE from ..utils import ( diff --git a/yt_dlp/extractor/tele5.py b/yt_dlp/extractor/tele5.py index c7beee153..58d343b44 100644 --- a/yt_dlp/extractor/tele5.py +++ b/yt_dlp/extractor/tele5.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .dplay import DPlayIE from ..compat import compat_urlparse from ..utils import ( diff --git a/yt_dlp/extractor/telebruxelles.py b/yt_dlp/extractor/telebruxelles.py index 9e8c89bd6..8d87b6ec1 100644 --- a/yt_dlp/extractor/telebruxelles.py +++ b/yt_dlp/extractor/telebruxelles.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/telecinco.py b/yt_dlp/extractor/telecinco.py index eecd6a5c9..a9c0755f4 100644 --- a/yt_dlp/extractor/telecinco.py +++ b/yt_dlp/extractor/telecinco.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import re diff --git a/yt_dlp/extractor/telegraaf.py b/yt_dlp/extractor/telegraaf.py index 2dc020537..bc9a8d608 100644 --- a/yt_dlp/extractor/telegraaf.py +++ b/yt_dlp/extractor/telegraaf.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/telegram.py b/yt_dlp/extractor/telegram.py index 2dfa261e9..bb9ca8c45 100644 --- a/yt_dlp/extractor/telegram.py +++ b/yt_dlp/extractor/telegram.py @@ -1,4 +1,5 @@ from .common import InfoExtractor +from ..utils import clean_html, get_element_by_class class TelegramEmbedIE(InfoExtractor): @@ -17,8 +18,8 @@ class TelegramEmbedIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - webpage_embed = self._download_webpage(f'{url}?embed=1', video_id) + webpage = self._download_webpage(url, video_id, query={'embed': 0}) + webpage_embed = self._download_webpage(url, video_id, query={'embed': 1}, note='Downloading ermbed page') formats = [{ 'url': self._proto_relative_url(self._search_regex( @@ -29,9 +30,12 @@ class TelegramEmbedIE(InfoExtractor): return { 'id': video_id, - 'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, fatal=True), - 'description': self._html_search_meta(['og:description', 'twitter:description'], webpage, fatal=True), - 'thumbnail': self._search_regex(r'tgme_widget_message_video_thumb"[^>]+background-image:url\(\'([^\']+)\'\)', - webpage_embed, 'thumbnail'), + 'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None), + 'description': self._html_search_meta( + ['og:description', 'twitter:description'], webpage, + default=clean_html(get_element_by_class('tgme_widget_message_text', webpage_embed))), + 'thumbnail': self._search_regex( + r'tgme_widget_message_video_thumb"[^>]+background-image:url\(\'([^\']+)\'\)', + webpage_embed, 'thumbnail'), 'formats': formats, } diff --git a/yt_dlp/extractor/telemb.py b/yt_dlp/extractor/telemb.py index ac2d603b6..7e444c0d0 100644 --- a/yt_dlp/extractor/telemb.py +++ b/yt_dlp/extractor/telemb.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/telemundo.py b/yt_dlp/extractor/telemundo.py index ebcecf55f..64954b8f1 100644 --- a/yt_dlp/extractor/telemundo.py +++ b/yt_dlp/extractor/telemundo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( try_get, diff --git a/yt_dlp/extractor/telequebec.py b/yt_dlp/extractor/telequebec.py index 4bef2fe76..e89137269 100644 --- a/yt_dlp/extractor/telequebec.py +++ b/yt_dlp/extractor/telequebec.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/teletask.py b/yt_dlp/extractor/teletask.py index b9e2ef8ca..a73dd68fb 100644 --- a/yt_dlp/extractor/teletask.py +++ b/yt_dlp/extractor/teletask.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/telewebion.py b/yt_dlp/extractor/telewebion.py index 1207b1a1b..550549f05 100644 --- a/yt_dlp/extractor/telewebion.py +++ b/yt_dlp/extractor/telewebion.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/tennistv.py b/yt_dlp/extractor/tennistv.py index 58fdecebe..80acaf190 100644 --- a/yt_dlp/extractor/tennistv.py +++ b/yt_dlp/extractor/tennistv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/tenplay.py b/yt_dlp/extractor/tenplay.py index 5c7b54531..fc4781447 100644 --- a/yt_dlp/extractor/tenplay.py +++ b/yt_dlp/extractor/tenplay.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from datetime import datetime import base64 diff --git a/yt_dlp/extractor/testurl.py b/yt_dlp/extractor/testurl.py index 8bc512a9c..32cae429e 100644 --- a/yt_dlp/extractor/testurl.py +++ b/yt_dlp/extractor/testurl.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor @@ -10,55 +8,36 @@ class TestURLIE(InfoExtractor): """ Allows addressing of the test cases as test:yout.*be_1 """ IE_DESC = False # Do not list - _VALID_URL = r'test(?:url)?:(?P<id>(?P<extractor>.+?)(?:_(?P<num>[0-9]+))?)$' + _VALID_URL = r'test(?:url)?:(?P<extractor>.+?)(?:_(?P<num>[0-9]+))?$' def _real_extract(self, url): - from ..extractor import gen_extractors + from ..extractor import gen_extractor_classes - mobj = self._match_valid_url(url) - video_id = mobj.group('id') - extractor_id = mobj.group('extractor') - all_extractors = gen_extractors() + extractor_id, num = self._match_valid_url(url).group('extractor', 'num') rex = re.compile(extractor_id, flags=re.IGNORECASE) - matching_extractors = [ - e for e in all_extractors if rex.search(e.IE_NAME)] + matching_extractors = [e for e in gen_extractor_classes() if rex.search(e.IE_NAME)] if len(matching_extractors) == 0: - raise ExtractorError( - 'No extractors matching %r found' % extractor_id, - expected=True) + raise ExtractorError('No extractors matching {extractor_id!r} found', expected=True) elif len(matching_extractors) > 1: - # Is it obvious which one to pick? - try: + try: # Check for exact match extractor = next( ie for ie in matching_extractors if ie.IE_NAME.lower() == extractor_id.lower()) except StopIteration: raise ExtractorError( - ('Found multiple matching extractors: %s' % - ' '.join(ie.IE_NAME for ie in matching_extractors)), + 'Found multiple matching extractors: %s' % ' '.join(ie.IE_NAME for ie in matching_extractors), expected=True) else: extractor = matching_extractors[0] - num_str = mobj.group('num') - num = int(num_str) if num_str else 0 - - testcases = [] - t = getattr(extractor, '_TEST', None) - if t: - testcases.append(t) - testcases.extend(getattr(extractor, '_TESTS', [])) - + testcases = tuple(extractor.get_testcases(True)) try: - tc = testcases[num] + tc = testcases[int(num or 0)] except IndexError: raise ExtractorError( - ('Test case %d not found, got only %d tests' % - (num, len(testcases))), - expected=True) - - self.to_screen('Test URL: %s' % tc['url']) + f'Test case {num or 0} not found, got only {len(testcases)} tests', expected=True) - return self.url_result(tc['url'], video_id=video_id) + self.to_screen(f'Test URL: {tc["url"]}') + return self.url_result(tc['url']) diff --git a/yt_dlp/extractor/tf1.py b/yt_dlp/extractor/tf1.py index 44785bc65..4cf0322b3 100644 --- a/yt_dlp/extractor/tf1.py +++ b/yt_dlp/extractor/tf1.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/tfo.py b/yt_dlp/extractor/tfo.py index 0631cb7ab..a24789cb3 100644 --- a/yt_dlp/extractor/tfo.py +++ b/yt_dlp/extractor/tfo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/theintercept.py b/yt_dlp/extractor/theintercept.py index f23b58713..a991a4dfd 100644 --- a/yt_dlp/extractor/theintercept.py +++ b/yt_dlp/extractor/theintercept.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/theplatform.py b/yt_dlp/extractor/theplatform.py index c2729f12d..bf7efc013 100644 --- a/yt_dlp/extractor/theplatform.py +++ b/yt_dlp/extractor/theplatform.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re import time import hmac diff --git a/yt_dlp/extractor/thestar.py b/yt_dlp/extractor/thestar.py index c3f118894..293c34c06 100644 --- a/yt_dlp/extractor/thestar.py +++ b/yt_dlp/extractor/thestar.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/thesun.py b/yt_dlp/extractor/thesun.py index 15d4a6932..ba5848283 100644 --- a/yt_dlp/extractor/thesun.py +++ b/yt_dlp/extractor/thesun.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/theta.py b/yt_dlp/extractor/theta.py index 8b6d70a9f..3ec6b9711 100644 --- a/yt_dlp/extractor/theta.py +++ b/yt_dlp/extractor/theta.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import try_get diff --git a/yt_dlp/extractor/theweatherchannel.py b/yt_dlp/extractor/theweatherchannel.py index 9e506c9e0..9e94cd1ea 100644 --- a/yt_dlp/extractor/theweatherchannel.py +++ b/yt_dlp/extractor/theweatherchannel.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .theplatform import ThePlatformIE diff --git a/yt_dlp/extractor/thisamericanlife.py b/yt_dlp/extractor/thisamericanlife.py index 91e45f2c3..9a3d79840 100644 --- a/yt_dlp/extractor/thisamericanlife.py +++ b/yt_dlp/extractor/thisamericanlife.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/thisav.py b/yt_dlp/extractor/thisav.py index 6bb00b3ab..b1cd57d1f 100644 --- a/yt_dlp/extractor/thisav.py +++ b/yt_dlp/extractor/thisav.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import remove_end diff --git a/yt_dlp/extractor/thisoldhouse.py b/yt_dlp/extractor/thisoldhouse.py index 8a1d17311..55b6413ae 100644 --- a/yt_dlp/extractor/thisoldhouse.py +++ b/yt_dlp/extractor/thisoldhouse.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import HEADRequest diff --git a/yt_dlp/extractor/threeqsdn.py b/yt_dlp/extractor/threeqsdn.py index 00a51dccd..1c0baf5ed 100644 --- a/yt_dlp/extractor/threeqsdn.py +++ b/yt_dlp/extractor/threeqsdn.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/threespeak.py b/yt_dlp/extractor/threespeak.py index fe6a9554a..ce28a37c0 100644 --- a/yt_dlp/extractor/threespeak.py +++ b/yt_dlp/extractor/threespeak.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index 987b0c43b..4ba993582 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools import random import string diff --git a/yt_dlp/extractor/tinypic.py b/yt_dlp/extractor/tinypic.py index 39056e52e..216208cbd 100644 --- a/yt_dlp/extractor/tinypic.py +++ b/yt_dlp/extractor/tinypic.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/tmz.py b/yt_dlp/extractor/tmz.py index aee2273b8..ffb30c6b8 100644 --- a/yt_dlp/extractor/tmz.py +++ b/yt_dlp/extractor/tmz.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor @@ -21,8 +18,10 @@ class TMZIE(InfoExtractor): "title": "No Charges Against Hillary Clinton? Harvey Says It Ain't Over Yet", "description": "Harvey talks about Director Comey’s decision not to prosecute Hillary Clinton.", "timestamp": 1467831837, - "uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}", + "uploader": "TMZ Staff", "upload_date": "20160706", + "thumbnail": "https://imagez.tmz.com/image/5e/4by3/2016/07/06/5eea7dc01baa5c2e83eb06930c170e46_xl.jpg", + "duration": 772.0, }, }, { @@ -33,8 +32,10 @@ class TMZIE(InfoExtractor): "title": "Angry Bagel Shop Guy Says He Doesn't Trust Women", "description": "The enraged man who went viral for ranting about women on dating sites before getting ragdolled in a bagel shop is defending his misogyny ... he says it's women's fault in the first place.", "timestamp": 1562889485, - "uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}", + "uploader": "TMZ Staff", "upload_date": "20190711", + "thumbnail": "https://imagez.tmz.com/image/a8/4by3/2019/07/12/a85480d27b2f50a7bfea2322151d67a5_xl.jpg", + "duration": 123.0, }, }, { @@ -46,8 +47,10 @@ class TMZIE(InfoExtractor): "title": "Bobby Brown Tells Crowd ... Bobbi Kristina is Awake", "description": 'Bobby Brown stunned his audience during a concert Saturday night, when he told the crowd, "Bobbi is awake. She\'s watching me."', "timestamp": 1429467813, - "uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}", + "uploader": "TMZ Staff", "upload_date": "20150419", + "duration": 29.0, + "thumbnail": "https://imagez.tmz.com/image/15/4by3/2015/04/20/1539c7ae136359fc979236fa6a9449dd_xl.jpg", }, }, { @@ -59,8 +62,10 @@ class TMZIE(InfoExtractor): "description": "Patti LaBelle made it known loud and clear last night ... NO " "ONE gets on her stage and strips down.", "timestamp": 1442683746, - "uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}", + "uploader": "TMZ Staff", "upload_date": "20150919", + "duration": 104.0, + "thumbnail": "https://imagez.tmz.com/image/5e/4by3/2015/09/20/5e57d7575062528082994e18ac3f0f48_xl.jpg", }, }, { @@ -71,8 +76,10 @@ class TMZIE(InfoExtractor): "title": "NBA's Adam Silver -- Blake Griffin's a Great Guy ... He'll Learn from This", "description": "Two pretty parts of this video with NBA Commish Adam Silver.", "timestamp": 1454010989, - "uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}", + "uploader": "TMZ Staff", "upload_date": "20160128", + "duration": 59.0, + "thumbnail": "https://imagez.tmz.com/image/38/4by3/2016/01/29/3856e83e0beb57059ec412122b842fb1_xl.jpg", }, }, { @@ -83,8 +90,10 @@ class TMZIE(InfoExtractor): "title": "Trump Star Vandal -- I'm Not Afraid of Donald or the Cops!", "description": "James Otis is the the guy who took a pickaxe to Donald Trump's star on the Walk of Fame, and he tells TMZ .. he's ready and willing to go to jail for the crime.", "timestamp": 1477500095, - "uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}", + "uploader": "TMZ Staff", "upload_date": "20161026", + "thumbnail": "https://imagez.tmz.com/image/0d/4by3/2016/10/27/0d904814d4a75dcf9cc3b8cfd1edc1a3_xl.jpg", + "duration": 128.0, }, }, { @@ -99,8 +108,10 @@ class TMZIE(InfoExtractor): "swinging their billy clubs at both Anti-Fascist and Pro-Trump " "demonstrators.", "timestamp": 1604182772, - "uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}", + "uploader": "TMZ Staff", "upload_date": "20201031", + "duration": 96.0, + "thumbnail": "https://imagez.tmz.com/image/f3/4by3/2020/10/31/f37bd5a8aef84497866f425130c58be3_xl.jpg", }, }, { @@ -111,8 +122,23 @@ class TMZIE(InfoExtractor): "title": "SICK LAMBO GERVONTA DAVIS IN HIS NEW RIDE RIGHT AFTER KO AFTER LEO EsNews Boxing", "uploader": "ESNEWS", "description": "md5:49675bc58883ccf80474b8aa701e1064", - "upload_date": "20201101", + "upload_date": "20201102", "uploader_id": "ESNEWS", + "uploader_url": "http://www.youtube.com/user/ESNEWS", + "like_count": int, + "channel_id": "UCI-Oq7oFGakzSzHFlTtsUsQ", + "channel": "ESNEWS", + "view_count": int, + "duration": 225, + "live_status": "not_live", + "thumbnail": "https://i.ytimg.com/vi_webp/Dddb6IGe-ws/maxresdefault.webp", + "channel_url": "https://www.youtube.com/channel/UCI-Oq7oFGakzSzHFlTtsUsQ", + "channel_follower_count": int, + "playable_in_embed": True, + "categories": ["Sports"], + "age_limit": 0, + "tags": "count:10", + "availability": "public", }, }, { @@ -120,12 +146,20 @@ class TMZIE(InfoExtractor): "info_dict": { "id": "1329450007125225473", "ext": "mp4", - "title": "TheMacLife - BREAKING: Conor McGregor (@thenotoriousmma) has signed his bout agreement for his rematch with Dustin Poirier for January 23.", - "uploader": "TheMacLife", + "title": "The Mac Life - BREAKING: Conor McGregor (@thenotoriousmma) has signed his bout agreement for his rematch with Dustin Poirier for January 23.", + "uploader": "The Mac Life", "description": "md5:56e6009bbc3d12498e10d08a8e1f1c69", "upload_date": "20201119", - "uploader_id": "Maclifeofficial", + "uploader_id": "TheMacLife", "timestamp": 1605800556, + "thumbnail": "https://pbs.twimg.com/media/EnMmfT8XYAExgxJ.jpg?name=small", + "like_count": int, + "duration": 11.812, + "uploader_url": "https://twitter.com/TheMacLife", + "age_limit": 0, + "repost_count": int, + "tags": [], + "comment_count": int, }, }, ] diff --git a/yt_dlp/extractor/tnaflix.py b/yt_dlp/extractor/tnaflix.py index d7617f708..6b766f3cc 100644 --- a/yt_dlp/extractor/tnaflix.py +++ b/yt_dlp/extractor/tnaflix.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/toggle.py b/yt_dlp/extractor/toggle.py index eb873495f..51a51d84b 100644 --- a/yt_dlp/extractor/toggle.py +++ b/yt_dlp/extractor/toggle.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import re diff --git a/yt_dlp/extractor/toggo.py b/yt_dlp/extractor/toggo.py index da5f0c4d1..9f98cfaf0 100644 --- a/yt_dlp/extractor/toggo.py +++ b/yt_dlp/extractor/toggo.py @@ -4,7 +4,7 @@ from ..utils import int_or_none, parse_qs class ToggoIE(InfoExtractor): IE_NAME = 'toggo' - _VALID_URL = r'https?://(?:www\.)?toggo\.de/[\w-]+/folge/(?P<id>[\w-]+)' + _VALID_URL = r'https?://(?:www\.)?toggo\.de/(?:toggolino/)?[^/?#]+/folge/(?P<id>[^/?#]+)' _TESTS = [{ 'url': 'https://www.toggo.de/weihnachtsmann--co-kg/folge/ein-geschenk-fuer-zwei', 'info_dict': { @@ -27,6 +27,12 @@ class ToggoIE(InfoExtractor): 'upload_date': '20200217', }, 'params': {'skip_download': True}, + }, { + 'url': 'https://www.toggo.de/grizzy--die-lemminge/folge/ab-durch-die-wand-vogelfrei-rock\'n\'lemming', + 'only_matching': True, + }, { + 'url': 'https://www.toggo.de/toggolino/paw-patrol/folge/der-wetter-zeppelin-der-chili-kochwettbewerb', + 'only_matching': True, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/tokentube.py b/yt_dlp/extractor/tokentube.py index 579623fed..a30cabb3c 100644 --- a/yt_dlp/extractor/tokentube.py +++ b/yt_dlp/extractor/tokentube.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import functools import re diff --git a/yt_dlp/extractor/tonline.py b/yt_dlp/extractor/tonline.py index 9b6a40db5..720282663 100644 --- a/yt_dlp/extractor/tonline.py +++ b/yt_dlp/extractor/tonline.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import int_or_none, join_nonempty diff --git a/yt_dlp/extractor/toongoggles.py b/yt_dlp/extractor/toongoggles.py index df13d64c0..1b8fc3acd 100644 --- a/yt_dlp/extractor/toongoggles.py +++ b/yt_dlp/extractor/toongoggles.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/toutv.py b/yt_dlp/extractor/toutv.py index 1d5da1040..349c0bded 100644 --- a/yt_dlp/extractor/toutv.py +++ b/yt_dlp/extractor/toutv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .radiocanada import RadioCanadaIE diff --git a/yt_dlp/extractor/toypics.py b/yt_dlp/extractor/toypics.py index f705a06c9..bc7336186 100644 --- a/yt_dlp/extractor/toypics.py +++ b/yt_dlp/extractor/toypics.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor import re diff --git a/yt_dlp/extractor/traileraddict.py b/yt_dlp/extractor/traileraddict.py index 514f4793e..5c4a138c4 100644 --- a/yt_dlp/extractor/traileraddict.py +++ b/yt_dlp/extractor/traileraddict.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/trilulilu.py b/yt_dlp/extractor/trilulilu.py index a800449e9..fb97be737 100644 --- a/yt_dlp/extractor/trilulilu.py +++ b/yt_dlp/extractor/trilulilu.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/trovo.py b/yt_dlp/extractor/trovo.py index 65ea13ddb..c049025a3 100644 --- a/yt_dlp/extractor/trovo.py +++ b/yt_dlp/extractor/trovo.py @@ -1,8 +1,7 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools import json +import random +import string from .common import InfoExtractor from ..utils import ( @@ -18,10 +17,20 @@ class TrovoBaseIE(InfoExtractor): _VALID_URL_BASE = r'https?://(?:www\.)?trovo\.live/' _HEADERS = {'Origin': 'https://trovo.live'} - def _call_api(self, video_id, query=None, data=None): - return self._download_json( - 'https://gql.trovo.live/', video_id, query=query, data=data, - headers={'Accept': 'application/json'}) + def _call_api(self, video_id, data): + if 'persistedQuery' in data.get('extensions', {}): + url = 'https://gql.trovo.live' + else: + url = 'https://api-web.trovo.live/graphql' + + resp = self._download_json( + url, video_id, data=json.dumps([data]).encode(), headers={'Accept': 'application/json'}, + query={ + 'qid': ''.join(random.choices(string.ascii_uppercase + string.digits, k=10)), + })[0] + if 'errors' in resp: + raise ExtractorError(f'Trovo said: {resp["errors"][0]["message"]}') + return resp['data'][data['operationName']] def _extract_streamer_info(self, data): streamer_info = data.get('streamerInfo') or {} @@ -38,27 +47,14 @@ class TrovoIE(TrovoBaseIE): def _real_extract(self, url): username = self._match_id(url) - live_info = self._call_api(username, query={ - 'query': '''{ - getLiveInfo(params: {userName: "%s"}) { - isLive - programInfo { - coverUrl - id - streamInfo { - desc - playUrl - } - title - } - streamerInfo { - nickName - uid - userName - } - } -}''' % username, - })['data']['getLiveInfo'] + live_info = self._call_api(username, data={ + 'operationName': 'live_LiveReaderService_GetLiveInfo', + 'variables': { + 'params': { + 'userName': username, + }, + }, + }) if live_info.get('isLive') == 0: raise ExtractorError('%s is offline' % username, expected=True) program_info = live_info['programInfo'] @@ -93,56 +89,61 @@ class TrovoIE(TrovoBaseIE): class TrovoVodIE(TrovoBaseIE): _VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?:clip|video)/(?P<id>[^/?&#]+)' _TESTS = [{ - 'url': 'https://trovo.live/video/ltv-100095501_100095501_1609596043', + 'url': 'https://trovo.live/clip/lc-5285890818705062210?ltab=videos', + 'params': {'getcomments': True}, 'info_dict': { - 'id': 'ltv-100095501_100095501_1609596043', + 'id': 'lc-5285890818705062210', 'ext': 'mp4', - 'title': 'Spontaner 12 Stunden Stream! - Ok Boomer!', - 'uploader': 'Exsl', - 'timestamp': 1609640305, - 'upload_date': '20210103', - 'uploader_id': '100095501', - 'duration': 43977, + 'title': 'fatal moaning for a super good🤣🤣', + 'uploader': 'OneTappedYou', + 'timestamp': 1621628019, + 'upload_date': '20210521', + 'uploader_id': '100719456', + 'duration': 31, 'view_count': int, 'like_count': int, 'comment_count': int, - 'comments': 'mincount:8', - 'categories': ['Grand Theft Auto V'], + 'comments': 'mincount:1', + 'categories': ['Call of Duty: Mobile'], + 'uploader_url': 'https://trovo.live/OneTappedYou', + 'thumbnail': r're:^https?://.*\.jpg', }, - 'skip': '404' }, { - 'url': 'https://trovo.live/clip/lc-5285890810184026005', + 'url': 'https://trovo.live/video/ltv-100095501_100095501_1609596043', 'only_matching': True, }] def _real_extract(self, url): vid = self._match_id(url) - resp = self._call_api(vid, data=json.dumps([{ - 'query': '''{ - batchGetVodDetailInfo(params: {vids: ["%s"]}) { - VodDetailInfos - } -}''' % vid, - }, { - 'query': '''{ - getCommentList(params: {appInfo: {postID: "%s"}, pageSize: 1000000000, preview: {}}) { - commentList { - author { - nickName - uid - } - commentID - content - createdAt - parentID - } - } -}''' % vid, - }]).encode()) - vod_detail_info = resp[0]['data']['batchGetVodDetailInfo']['VodDetailInfos'][vid] + + # NOTE: It is also possible to extract this info from the Nuxt data on the website, + # however that seems unreliable - sometimes it randomly doesn't return the data, + # at least when using a non-residential IP. + resp = self._call_api(vid, data={ + 'operationName': 'batchGetVodDetailInfo', + 'variables': { + 'params': { + 'vids': [vid], + }, + }, + 'extensions': { + 'persistedQuery': { + 'version': 1, + 'sha256Hash': 'ceae0355d66476e21a1dd8e8af9f68de95b4019da2cda8b177c9a2255dad31d0', + }, + }, + }) + vod_detail_info = resp['VodDetailInfos'][vid] vod_info = vod_detail_info['vodInfo'] title = vod_info['title'] + if try_get(vod_info, lambda x: x['playbackRights']['playbackRights'] != 'Normal'): + playback_rights_setting = vod_info['playbackRights']['playbackRightsSetting'] + if playback_rights_setting == 'SubscriberOnly': + raise ExtractorError('This video is only available for subscribers', expected=True) + else: + raise ExtractorError(f'This video is not available ({playback_rights_setting})', expected=True) + language = vod_info.get('languageName') formats = [] for play_info in (vod_info.get('playInfos') or []): @@ -166,23 +167,6 @@ class TrovoVodIE(TrovoBaseIE): category = vod_info.get('categoryName') get_count = lambda x: int_or_none(vod_info.get(x + 'Num')) - comment_list = try_get(resp, lambda x: x[1]['data']['getCommentList']['commentList'], list) or [] - comments = [] - for comment in comment_list: - content = comment.get('content') - if not content: - continue - author = comment.get('author') or {} - parent = comment.get('parentID') - comments.append({ - 'author': author.get('nickName'), - 'author_id': str_or_none(author.get('uid')), - 'id': str_or_none(comment.get('commentID')), - 'text': content, - 'timestamp': int_or_none(comment.get('createdAt')), - 'parent': 'root' if parent == 0 else str_or_none(parent), - }) - info = { 'id': vid, 'title': title, @@ -193,12 +177,51 @@ class TrovoVodIE(TrovoBaseIE): 'view_count': get_count('watch'), 'like_count': get_count('like'), 'comment_count': get_count('comment'), - 'comments': comments, 'categories': [category] if category else None, + '__post_extractor': self.extract_comments(vid), } info.update(self._extract_streamer_info(vod_detail_info)) return info + def _get_comments(self, vid): + for page in itertools.count(1): + comments_json = self._call_api(vid, data={ + 'operationName': 'getCommentList', + 'variables': { + 'params': { + 'appInfo': { + 'postID': vid, + }, + 'preview': {}, + 'pageSize': 99, + 'page': page, + }, + }, + 'extensions': { + 'persistedQuery': { + 'version': 1, + 'sha256Hash': 'be8e5f9522ddac7f7c604c0d284fd22481813263580849926c4c66fb767eed25', + }, + }, + }) + for comment in comments_json['commentList']: + content = comment.get('content') + if not content: + continue + author = comment.get('author') or {} + parent = comment.get('parentID') + yield { + 'author': author.get('nickName'), + 'author_id': str_or_none(author.get('uid')), + 'id': str_or_none(comment.get('commentID')), + 'text': content, + 'timestamp': int_or_none(comment.get('createdAt')), + 'parent': 'root' if parent == 0 else str_or_none(parent), + } + + if comments_json['lastPage']: + break + class TrovoChannelBaseIE(TrovoBaseIE): def _get_vod_json(self, page, uid): @@ -218,9 +241,15 @@ class TrovoChannelBaseIE(TrovoBaseIE): def _real_extract(self, url): id = self._match_id(url) - uid = str(self._call_api(id, query={ - 'query': '{getLiveInfo(params:{userName:"%s"}){streamerInfo{uid}}}' % id - })['data']['getLiveInfo']['streamerInfo']['uid']) + live_info = self._call_api(id, data={ + 'operationName': 'live_LiveReaderService_GetLiveInfo', + 'variables': { + 'params': { + 'userName': id, + }, + }, + }) + uid = str(live_info['streamerInfo']['uid']) return self.playlist_result(self._entries(uid), playlist_id=uid) @@ -236,13 +265,25 @@ class TrovoChannelVodIE(TrovoChannelBaseIE): }, }] - _QUERY = '{getChannelLtvVideoInfos(params:{pageSize:99,currPage:%d,channelID:%s}){hasMore,vodInfos{vid}}}' _TYPE = 'video' def _get_vod_json(self, page, uid): - return self._call_api(uid, query={ - 'query': self._QUERY % (page, uid) - })['data']['getChannelLtvVideoInfos'] + return self._call_api(uid, data={ + 'operationName': 'getChannelLtvVideoInfos', + 'variables': { + 'params': { + 'channelID': int(uid), + 'pageSize': 99, + 'currPage': page, + }, + }, + 'extensions': { + 'persistedQuery': { + 'version': 1, + 'sha256Hash': '78fe32792005eab7e922cafcdad9c56bed8bbc5f5df3c7cd24fcb84a744f5f78', + }, + }, + }) class TrovoChannelClipIE(TrovoChannelBaseIE): @@ -257,10 +298,22 @@ class TrovoChannelClipIE(TrovoChannelBaseIE): }, }] - _QUERY = '{getChannelClipVideoInfos(params:{pageSize:99,currPage:%d,channelID:%s,albumType:VOD_CLIP_ALBUM_TYPE_LATEST}){hasMore,vodInfos{vid}}}' _TYPE = 'clip' def _get_vod_json(self, page, uid): - return self._call_api(uid, query={ - 'query': self._QUERY % (page, uid) - })['data']['getChannelClipVideoInfos'] + return self._call_api(uid, data={ + 'operationName': 'getChannelClipVideoInfos', + 'variables': { + 'params': { + 'channelID': int(uid), + 'pageSize': 99, + 'currPage': page, + }, + }, + 'extensions': { + 'persistedQuery': { + 'version': 1, + 'sha256Hash': 'e7924bfe20059b5c75fc8ff9e7929f43635681a7bdf3befa01072ed22c8eff31', + }, + }, + }) diff --git a/yt_dlp/extractor/trueid.py b/yt_dlp/extractor/trueid.py index fc98303ab..696343627 100644 --- a/yt_dlp/extractor/trueid.py +++ b/yt_dlp/extractor/trueid.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_HTTPError from ..utils import ( diff --git a/yt_dlp/extractor/trunews.py b/yt_dlp/extractor/trunews.py index cca5b5ceb..d5ce86ece 100644 --- a/yt_dlp/extractor/trunews.py +++ b/yt_dlp/extractor/trunews.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/trutv.py b/yt_dlp/extractor/trutv.py index c09ff897c..ea0f2f40e 100644 --- a/yt_dlp/extractor/trutv.py +++ b/yt_dlp/extractor/trutv.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .turner import TurnerBaseIE from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/tube8.py b/yt_dlp/extractor/tube8.py index db93b0182..32e80d9d2 100644 --- a/yt_dlp/extractor/tube8.py +++ b/yt_dlp/extractor/tube8.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from ..utils import ( diff --git a/yt_dlp/extractor/tubitv.py b/yt_dlp/extractor/tubitv.py index 31feb9a70..9c8e1ac87 100644 --- a/yt_dlp/extractor/tubitv.py +++ b/yt_dlp/extractor/tubitv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/tudou.py b/yt_dlp/extractor/tudou.py deleted file mode 100644 index 7421378a8..000000000 --- a/yt_dlp/extractor/tudou.py +++ /dev/null @@ -1,49 +0,0 @@ -# coding: utf-8 - -from __future__ import unicode_literals - -from .common import InfoExtractor - - -class TudouPlaylistIE(InfoExtractor): - IE_NAME = 'tudou:playlist' - _VALID_URL = r'https?://(?:www\.)?tudou\.com/listplay/(?P<id>[\w-]{11})\.html' - _TESTS = [{ - 'url': 'http://www.tudou.com/listplay/zzdE77v6Mmo.html', - 'info_dict': { - 'id': 'zzdE77v6Mmo', - }, - 'playlist_mincount': 209, - }] - - def _real_extract(self, url): - playlist_id = self._match_id(url) - playlist_data = self._download_json( - 'http://www.tudou.com/tvp/plist.action?lcode=%s' % playlist_id, playlist_id) - entries = [self.url_result( - 'http://www.tudou.com/programs/view/%s' % item['icode'], - 'Tudou', item['icode'], - item['kw']) for item in playlist_data['items']] - return self.playlist_result(entries, playlist_id) - - -class TudouAlbumIE(InfoExtractor): - IE_NAME = 'tudou:album' - _VALID_URL = r'https?://(?:www\.)?tudou\.com/album(?:cover|play)/(?P<id>[\w-]{11})' - _TESTS = [{ - 'url': 'http://www.tudou.com/albumplay/v5qckFJvNJg.html', - 'info_dict': { - 'id': 'v5qckFJvNJg', - }, - 'playlist_mincount': 45, - }] - - def _real_extract(self, url): - album_id = self._match_id(url) - album_data = self._download_json( - 'http://www.tudou.com/tvp/alist.action?acode=%s' % album_id, album_id) - entries = [self.url_result( - 'http://www.tudou.com/programs/view/%s' % item['icode'], - 'Tudou', item['icode'], - item['kw']) for item in album_data['items']] - return self.playlist_result(entries, album_id) diff --git a/yt_dlp/extractor/tumblr.py b/yt_dlp/extractor/tumblr.py index 8086f613d..5d6615100 100644 --- a/yt_dlp/extractor/tumblr.py +++ b/yt_dlp/extractor/tumblr.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/tunein.py b/yt_dlp/extractor/tunein.py index 7e51de89e..e3d3f2a96 100644 --- a/yt_dlp/extractor/tunein.py +++ b/yt_dlp/extractor/tunein.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/tunepk.py b/yt_dlp/extractor/tunepk.py index 9d42651ce..2973d15ec 100644 --- a/yt_dlp/extractor/tunepk.py +++ b/yt_dlp/extractor/tunepk.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/turbo.py b/yt_dlp/extractor/turbo.py index f6bbf2529..e3f8941c4 100644 --- a/yt_dlp/extractor/turbo.py +++ b/yt_dlp/extractor/turbo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/turner.py b/yt_dlp/extractor/turner.py index 519dc323c..fae8b51e7 100644 --- a/yt_dlp/extractor/turner.py +++ b/yt_dlp/extractor/turner.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .adobepass import AdobePassIE @@ -144,7 +141,7 @@ class TurnerBaseIE(AdobePassIE): m3u8_id=format_id or 'hls', fatal=False) if '/secure/' in video_url and '?hdnea=' in video_url: for f in m3u8_formats: - f['_ffmpeg_args'] = ['-seekable', '0'] + f['downloader_options'] = {'ffmpeg_args': ['-seekable', '0']} formats.extend(m3u8_formats) elif ext == 'f4m': formats.extend(self._extract_f4m_formats( diff --git a/yt_dlp/extractor/tv2.py b/yt_dlp/extractor/tv2.py index 977da30fe..391baa6c5 100644 --- a/yt_dlp/extractor/tv2.py +++ b/yt_dlp/extractor/tv2.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/tv2dk.py b/yt_dlp/extractor/tv2dk.py index ec5cbdf03..0af286312 100644 --- a/yt_dlp/extractor/tv2dk.py +++ b/yt_dlp/extractor/tv2dk.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import re diff --git a/yt_dlp/extractor/tv2hu.py b/yt_dlp/extractor/tv2hu.py index f2104358b..6ac07716b 100644 --- a/yt_dlp/extractor/tv2hu.py +++ b/yt_dlp/extractor/tv2hu.py @@ -1,6 +1,4 @@ # encoding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( traverse_obj, diff --git a/yt_dlp/extractor/tv4.py b/yt_dlp/extractor/tv4.py index 4043e6366..e8cdd5c8c 100644 --- a/yt_dlp/extractor/tv4.py +++ b/yt_dlp/extractor/tv4.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/tv5mondeplus.py b/yt_dlp/extractor/tv5mondeplus.py index a0832d28f..d449cdc04 100644 --- a/yt_dlp/extractor/tv5mondeplus.py +++ b/yt_dlp/extractor/tv5mondeplus.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/tv5unis.py b/yt_dlp/extractor/tv5unis.py index 398b85db5..978255b17 100644 --- a/yt_dlp/extractor/tv5unis.py +++ b/yt_dlp/extractor/tv5unis.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/tva.py b/yt_dlp/extractor/tva.py index 52a4ddf32..9afe23328 100644 --- a/yt_dlp/extractor/tva.py +++ b/yt_dlp/extractor/tva.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( float_or_none, diff --git a/yt_dlp/extractor/tvanouvelles.py b/yt_dlp/extractor/tvanouvelles.py index 1086176a2..b9f5e110e 100644 --- a/yt_dlp/extractor/tvanouvelles.py +++ b/yt_dlp/extractor/tvanouvelles.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/tvc.py b/yt_dlp/extractor/tvc.py index 008f64cc2..4ccc8f522 100644 --- a/yt_dlp/extractor/tvc.py +++ b/yt_dlp/extractor/tvc.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/tver.py b/yt_dlp/extractor/tver.py index f23af1f14..b04575bd5 100644 --- a/yt_dlp/extractor/tver.py +++ b/yt_dlp/extractor/tver.py @@ -1,11 +1,10 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, + join_nonempty, smuggle_url, str_or_none, + strip_or_none, traverse_obj, ) @@ -14,19 +13,16 @@ class TVerIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?tver\.jp/(?:(?P<type>lp|corner|series|episodes?|feature|tokyo2020/video)/)+(?P<id>[a-zA-Z0-9]+)' _TESTS = [{ 'skip': 'videos are only available for 7 days', - 'url': 'https://tver.jp/episodes/ephss8yveb', + 'url': 'https://tver.jp/episodes/ep83nf3w4p', 'info_dict': { - 'title': '#44 料理と値段と店主にびっくり オモてなしすぎウマい店 2時間SP', - 'description': 'md5:66985373a66fed8ad3cd595a3cfebb13', - }, - 'add_ie': ['BrightcoveNew'], - }, { - 'skip': 'videos are only available for 7 days', - 'url': 'https://tver.jp/lp/episodes/ep6f16g26p', - 'info_dict': { - # sorry but this is "correct" - 'title': '4月11日(月)23時06分 ~ 放送予定', - 'description': 'md5:4029cc5f4b1e8090dfc5b7bd2bc5cd0b', + 'title': '家事ヤロウ!!! 売り場席巻のチーズSP&財前直見×森泉親子の脱東京暮らし密着!', + 'description': 'md5:dc2c06b6acc23f1e7c730c513737719b', + 'series': '家事ヤロウ!!!', + 'episode': '売り場席巻のチーズSP&財前直見×森泉親子の脱東京暮らし密着!', + 'alt_title': '売り場席巻のチーズSP&財前直見×森泉親子の脱東京暮らし密着!', + 'channel': 'テレビ朝日', + 'onair_label': '5月3日(火)放送分', + 'ext_title': '家事ヤロウ!!! 売り場席巻のチーズSP&財前直見×森泉親子の脱東京暮らし密着! テレビ朝日 5月3日(火)放送分', }, 'add_ie': ['BrightcoveNew'], }, { @@ -81,14 +77,26 @@ class TVerIE(InfoExtractor): 'x-tver-platform-type': 'web' }) + additional_content_info = traverse_obj( + additional_info, ('result', 'episode', 'content'), get_all=False) or {} + episode = strip_or_none(additional_content_info.get('title')) + series = str_or_none(additional_content_info.get('seriesTitle')) + title = ( + join_nonempty(series, episode, delim=' ') + or str_or_none(video_info.get('title'))) + provider = str_or_none(additional_content_info.get('productionProviderName')) + onair_label = str_or_none(additional_content_info.get('broadcastDateLabel')) + return { '_type': 'url_transparent', - 'title': str_or_none(video_info.get('title')), + 'title': title, + 'series': series, + 'episode': episode, + # an another title which is considered "full title" for some viewers + 'alt_title': join_nonempty(title, provider, onair_label, delim=' '), + 'channel': provider, 'description': str_or_none(video_info.get('description')), 'url': smuggle_url( self.BRIGHTCOVE_URL_TEMPLATE % (p_id, r_id), {'geo_countries': ['JP']}), - 'series': traverse_obj( - additional_info, ('result', ('episode', 'series'), 'content', ('seriesTitle', 'title')), - get_all=False), 'ie_key': 'BrightcoveNew', } diff --git a/yt_dlp/extractor/tvigle.py b/yt_dlp/extractor/tvigle.py index aa25ba0dc..cc1d35dc2 100644 --- a/yt_dlp/extractor/tvigle.py +++ b/yt_dlp/extractor/tvigle.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/tvland.py b/yt_dlp/extractor/tvland.py index 9ebf57f74..481d5eb19 100644 --- a/yt_dlp/extractor/tvland.py +++ b/yt_dlp/extractor/tvland.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .mtv import MTVServicesInfoExtractor # TODO: Remove - Reason not used anymore - Service moved to youtube diff --git a/yt_dlp/extractor/tvn24.py b/yt_dlp/extractor/tvn24.py index de0fb5063..22b605823 100644 --- a/yt_dlp/extractor/tvn24.py +++ b/yt_dlp/extractor/tvn24.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/tvnet.py b/yt_dlp/extractor/tvnet.py index aa1e9d923..5820bb4a7 100644 --- a/yt_dlp/extractor/tvnet.py +++ b/yt_dlp/extractor/tvnet.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/tvnoe.py b/yt_dlp/extractor/tvnoe.py index 26a5aeae4..712fbb275 100644 --- a/yt_dlp/extractor/tvnoe.py +++ b/yt_dlp/extractor/tvnoe.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/tvnow.py b/yt_dlp/extractor/tvnow.py index b31818477..4aa558d83 100644 --- a/yt_dlp/extractor/tvnow.py +++ b/yt_dlp/extractor/tvnow.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/tvopengr.py b/yt_dlp/extractor/tvopengr.py index a11cdc6b0..aded261f3 100644 --- a/yt_dlp/extractor/tvopengr.py +++ b/yt_dlp/extractor/tvopengr.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/tvp.py b/yt_dlp/extractor/tvp.py index 48e2c6e76..69168f655 100644 --- a/yt_dlp/extractor/tvp.py +++ b/yt_dlp/extractor/tvp.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools import random import re diff --git a/yt_dlp/extractor/tvplay.py b/yt_dlp/extractor/tvplay.py index b5dbc5526..f815b5137 100644 --- a/yt_dlp/extractor/tvplay.py +++ b/yt_dlp/extractor/tvplay.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/tvplayer.py b/yt_dlp/extractor/tvplayer.py index 5970596b2..31d70b6b8 100644 --- a/yt_dlp/extractor/tvplayer.py +++ b/yt_dlp/extractor/tvplayer.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_HTTPError, diff --git a/yt_dlp/extractor/tweakers.py b/yt_dlp/extractor/tweakers.py index 2b10d9bca..6d1f92bbb 100644 --- a/yt_dlp/extractor/tweakers.py +++ b/yt_dlp/extractor/tweakers.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/twentyfourvideo.py b/yt_dlp/extractor/twentyfourvideo.py index ae19e11e1..baeb85d47 100644 --- a/yt_dlp/extractor/twentyfourvideo.py +++ b/yt_dlp/extractor/twentyfourvideo.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( parse_iso8601, diff --git a/yt_dlp/extractor/twentymin.py b/yt_dlp/extractor/twentymin.py index a42977f39..616c3c36e 100644 --- a/yt_dlp/extractor/twentymin.py +++ b/yt_dlp/extractor/twentymin.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/twentythreevideo.py b/yt_dlp/extractor/twentythreevideo.py index e8cf5a1e9..290c3761e 100644 --- a/yt_dlp/extractor/twentythreevideo.py +++ b/yt_dlp/extractor/twentythreevideo.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import int_or_none diff --git a/yt_dlp/extractor/twitcasting.py b/yt_dlp/extractor/twitcasting.py index 7f3fa0735..0dbb97a36 100644 --- a/yt_dlp/extractor/twitcasting.py +++ b/yt_dlp/extractor/twitcasting.py @@ -1,11 +1,8 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools import re from .common import InfoExtractor -from ..downloader.websocket import has_websockets +from ..dependencies import websockets from ..utils import ( clean_html, ExtractorError, @@ -164,7 +161,7 @@ class TwitCastingIE(InfoExtractor): note='Downloading source quality m3u8', headers=self._M3U8_HEADERS, fatal=False)) - if has_websockets: + if websockets: qq = qualities(['base', 'mobilesource', 'main']) streams = traverse_obj(stream_server_data, ('llfmp4', 'streams')) or {} for mode, ws_url in streams.items(): @@ -190,6 +187,7 @@ class TwitCastingIE(InfoExtractor): infodict = { # No problem here since there's only one manifest 'formats': formats, + 'http_headers': self._M3U8_HEADERS, } else: infodict = { diff --git a/yt_dlp/extractor/twitch.py b/yt_dlp/extractor/twitch.py index 10de74c8e..834350d12 100644 --- a/yt_dlp/extractor/twitch.py +++ b/yt_dlp/extractor/twitch.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import collections import itertools import json diff --git a/yt_dlp/extractor/twitter.py b/yt_dlp/extractor/twitter.py index 8ccc38e24..af6750333 100644 --- a/yt_dlp/extractor/twitter.py +++ b/yt_dlp/extractor/twitter.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/udemy.py b/yt_dlp/extractor/udemy.py index 235f89713..d35cd0d43 100644 --- a/yt_dlp/extractor/udemy.py +++ b/yt_dlp/extractor/udemy.py @@ -1,11 +1,8 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor from ..compat import ( compat_HTTPError, - compat_kwargs, compat_str, compat_urllib_request, compat_urlparse, @@ -132,7 +129,7 @@ class UdemyIE(InfoExtractor): headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36' kwargs['headers'] = headers ret = super(UdemyIE, self)._download_webpage_handle( - *args, **compat_kwargs(kwargs)) + *args, **kwargs) if not ret: return ret webpage, _ = ret diff --git a/yt_dlp/extractor/udn.py b/yt_dlp/extractor/udn.py index 2c8e5c7b4..4fa74b9e8 100644 --- a/yt_dlp/extractor/udn.py +++ b/yt_dlp/extractor/udn.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/ufctv.py b/yt_dlp/extractor/ufctv.py index 3d74ba071..2c1c5e0ff 100644 --- a/yt_dlp/extractor/ufctv.py +++ b/yt_dlp/extractor/ufctv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .imggaming import ImgGamingBaseIE diff --git a/yt_dlp/extractor/ukcolumn.py b/yt_dlp/extractor/ukcolumn.py index d2626f0d3..aade79f20 100644 --- a/yt_dlp/extractor/ukcolumn.py +++ b/yt_dlp/extractor/ukcolumn.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from ..utils import ( unescapeHTML, urljoin, diff --git a/yt_dlp/extractor/uktvplay.py b/yt_dlp/extractor/uktvplay.py index f28fd514d..abea07ab5 100644 --- a/yt_dlp/extractor/uktvplay.py +++ b/yt_dlp/extractor/uktvplay.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/umg.py b/yt_dlp/extractor/umg.py index c1b65d189..e6ed656b9 100644 --- a/yt_dlp/extractor/umg.py +++ b/yt_dlp/extractor/umg.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/unistra.py b/yt_dlp/extractor/unistra.py index 685d74f35..083c87209 100644 --- a/yt_dlp/extractor/unistra.py +++ b/yt_dlp/extractor/unistra.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/unity.py b/yt_dlp/extractor/unity.py index 73daacf29..d1b0ecbf3 100644 --- a/yt_dlp/extractor/unity.py +++ b/yt_dlp/extractor/unity.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from .youtube import YoutubeIE diff --git a/yt_dlp/extractor/uol.py b/yt_dlp/extractor/uol.py index 1baee0b10..e3d9127d8 100644 --- a/yt_dlp/extractor/uol.py +++ b/yt_dlp/extractor/uol.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_str, diff --git a/yt_dlp/extractor/uplynk.py b/yt_dlp/extractor/uplynk.py index 9adb96943..04c96f388 100644 --- a/yt_dlp/extractor/uplynk.py +++ b/yt_dlp/extractor/uplynk.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/urort.py b/yt_dlp/extractor/urort.py index 020425fc7..296799d38 100644 --- a/yt_dlp/extractor/urort.py +++ b/yt_dlp/extractor/urort.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_urllib_parse, diff --git a/yt_dlp/extractor/urplay.py b/yt_dlp/extractor/urplay.py index eb2ab26e1..30bd3dcbf 100644 --- a/yt_dlp/extractor/urplay.py +++ b/yt_dlp/extractor/urplay.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( dict_get, diff --git a/yt_dlp/extractor/usanetwork.py b/yt_dlp/extractor/usanetwork.py index d953e460b..d6b58a51c 100644 --- a/yt_dlp/extractor/usanetwork.py +++ b/yt_dlp/extractor/usanetwork.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .nbc import NBCIE diff --git a/yt_dlp/extractor/usatoday.py b/yt_dlp/extractor/usatoday.py index b2103448d..3243f3e3b 100644 --- a/yt_dlp/extractor/usatoday.py +++ b/yt_dlp/extractor/usatoday.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/ustream.py b/yt_dlp/extractor/ustream.py index 4a7a8f879..fff21667a 100644 --- a/yt_dlp/extractor/ustream.py +++ b/yt_dlp/extractor/ustream.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import random import re diff --git a/yt_dlp/extractor/ustudio.py b/yt_dlp/extractor/ustudio.py index 92509d1bf..fd5dad0fc 100644 --- a/yt_dlp/extractor/ustudio.py +++ b/yt_dlp/extractor/ustudio.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/utreon.py b/yt_dlp/extractor/utreon.py index 4986635f2..1213ae1bf 100644 --- a/yt_dlp/extractor/utreon.py +++ b/yt_dlp/extractor/utreon.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( dict_get, diff --git a/yt_dlp/extractor/varzesh3.py b/yt_dlp/extractor/varzesh3.py index 32655b96d..2c13cbdc0 100644 --- a/yt_dlp/extractor/varzesh3.py +++ b/yt_dlp/extractor/varzesh3.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/vbox7.py b/yt_dlp/extractor/vbox7.py index 8152acefd..76c844cb8 100644 --- a/yt_dlp/extractor/vbox7.py +++ b/yt_dlp/extractor/vbox7.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/veehd.py b/yt_dlp/extractor/veehd.py index a6dc3c8d8..5ecd88726 100644 --- a/yt_dlp/extractor/veehd.py +++ b/yt_dlp/extractor/veehd.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re import json diff --git a/yt_dlp/extractor/veo.py b/yt_dlp/extractor/veo.py index d87bb5b47..25d462a7d 100644 --- a/yt_dlp/extractor/veo.py +++ b/yt_dlp/extractor/veo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/veoh.py b/yt_dlp/extractor/veoh.py index d9afb5617..70280ae85 100644 --- a/yt_dlp/extractor/veoh.py +++ b/yt_dlp/extractor/veoh.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/vesti.py b/yt_dlp/extractor/vesti.py index 002047dbf..e9731a941 100644 --- a/yt_dlp/extractor/vesti.py +++ b/yt_dlp/extractor/vesti.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/vevo.py b/yt_dlp/extractor/vevo.py index 8a0f29259..bc0187511 100644 --- a/yt_dlp/extractor/vevo.py +++ b/yt_dlp/extractor/vevo.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re import json diff --git a/yt_dlp/extractor/vgtv.py b/yt_dlp/extractor/vgtv.py index 9d6090b08..6564b7b0b 100644 --- a/yt_dlp/extractor/vgtv.py +++ b/yt_dlp/extractor/vgtv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/vh1.py b/yt_dlp/extractor/vh1.py index 862c5c7dc..41b8a4607 100644 --- a/yt_dlp/extractor/vh1.py +++ b/yt_dlp/extractor/vh1.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .mtv import MTVServicesInfoExtractor # TODO Remove - Reason: Outdated Site diff --git a/yt_dlp/extractor/vice.py b/yt_dlp/extractor/vice.py index c8c30559e..abb4a6fa0 100644 --- a/yt_dlp/extractor/vice.py +++ b/yt_dlp/extractor/vice.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import functools import hashlib import json diff --git a/yt_dlp/extractor/vidbit.py b/yt_dlp/extractor/vidbit.py index 91f45b7cc..2813032db 100644 --- a/yt_dlp/extractor/vidbit.py +++ b/yt_dlp/extractor/vidbit.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( diff --git a/yt_dlp/extractor/viddler.py b/yt_dlp/extractor/viddler.py index ecc48246f..f491b67ef 100644 --- a/yt_dlp/extractor/viddler.py +++ b/yt_dlp/extractor/viddler.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( float_or_none, diff --git a/yt_dlp/extractor/videa.py b/yt_dlp/extractor/videa.py index 90d705092..251eb78fe 100644 --- a/yt_dlp/extractor/videa.py +++ b/yt_dlp/extractor/videa.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import random import re import string diff --git a/yt_dlp/extractor/videocampus_sachsen.py b/yt_dlp/extractor/videocampus_sachsen.py index 96e98573f..906412f08 100644 --- a/yt_dlp/extractor/videocampus_sachsen.py +++ b/yt_dlp/extractor/videocampus_sachsen.py @@ -1,12 +1,70 @@ -# coding: utf-8 +import re + from .common import InfoExtractor +from ..compat import compat_HTTPError +from ..utils import ExtractorError class VideocampusSachsenIE(InfoExtractor): - _VALID_URL = r'''(?x)https?://videocampus\.sachsen\.de/(?: + IE_NAME = 'Vimp' + _INSTANCES = ( + 'campus.demo.vimp.com', + 'corporate.demo.vimp.com', + 'dancehalldatabase.com', + 'educhannel.hs-gesundheit.de', + 'emedia.ls.haw-hamburg.de', + 'globale-evolution.net', + 'k210039.vimp.mivitec.net', + 'media.cmslegal.com', + 'media.hs-furtwangen.de', + 'media.hwr-berlin.de', + 'mediathek.dkfz.de', + 'mediathek.htw-berlin.de', + 'mediathek.polizei-bw.de', + 'medien.hs-merseburg.de', + 'mportal.europa-uni.de', + 'pacific.demo.vimp.com', + 'slctv.com', + 'tube.isbonline.cn', + 'univideo.uni-kassel.de', + 'ursula2.genetics.emory.edu', + 'ursulablicklevideoarchiv.com', + 'v.agrarumweltpaedagogik.at', + 'video.eplay-tv.de', + 'video.fh-dortmund.de', + 'video.hs-offenburg.de', + 'video.hs-pforzheim.de', + 'video.hspv.nrw.de', + 'video.irtshdf.fr', + 'video.pareygo.de', + 'video.tu-freiberg.de', + 'videocampus.sachsen.de', + 'videoportal.uni-freiburg.de', + 'videoportal.vm.uni-freiburg.de', + 'videos.duoc.cl', + 'videos.uni-paderborn.de', + 'vimp-bemus.udk-berlin.de', + 'vimp.aekwl.de', + 'vimp.hs-mittweida.de', + 'vimp.oth-regensburg.de', + 'vimp.ph-heidelberg.de', + 'vimp.sma-events.com', + 'vimp.weka-fachmedien.de', + 'webtv.univ-montp3.fr', + 'www.b-tu.de/media', + 'www.bigcitytv.de', + 'www.cad-videos.de', + 'www.fh-bielefeld.de/medienportal', + 'www.orvovideo.com', + 'www.rwe.tv', + 'www.wenglor-media.com', + 'www2.univ-sba.dz', + ) + _VALID_URL = r'''(?x)https?://(?P<host>%s)/(?: m/(?P<tmp_id>[0-9a-f]+)| - (?:category/)?video/(?P<display_id>[\w-]+)/(?P<id>[0-9a-f]{32}) - )''' + (?:category/)?video/(?P<display_id>[\w-]+)/(?P<id>[0-9a-f]{32})| + media/embed.*(?:\?|&)key=(?P<embed_id>[0-9a-f]{32}&?) + )''' % ('|'.join(map(re.escape, _INSTANCES))) _TESTS = [ { @@ -14,6 +72,7 @@ class VideocampusSachsenIE(InfoExtractor): 'info_dict': { 'id': 'e6b9349905c1628631f175712250f2a1', 'title': 'Konstruktiver Entwicklungsprozess Vorlesung 7', + 'description': 'Konstruktiver Entwicklungsprozess Vorlesung 7', 'ext': 'mp4', }, }, @@ -22,6 +81,7 @@ class VideocampusSachsenIE(InfoExtractor): 'info_dict': { 'id': 'fc99c527e4205b121cb7c74433469262', 'title': 'Was ist selbstgesteuertes Lernen?', + 'description': 'md5:196aa3b0509a526db62f84679522a2f5', 'display_id': 'Was-ist-selbstgesteuertes-Lernen', 'ext': 'mp4', }, @@ -31,43 +91,32 @@ class VideocampusSachsenIE(InfoExtractor): 'info_dict': { 'id': '09d4ed029002eb1bdda610f1103dd54c', 'title': 'Tutorial zur Nutzung von Adobe Connect aus Veranstalter-Sicht', + 'description': 'md5:3d379ca3cc17b9da6784d7f58cca4d58', 'display_id': 'Tutorial-zur-Nutzung-von-Adobe-Connect-aus-Veranstalter-Sicht', 'ext': 'mp4', }, }, - ] - - def _real_extract(self, url): - video_id, tmp_id, display_id = self._match_valid_url(url).group('id', 'tmp_id', 'display_id') - webpage = self._download_webpage(url, video_id or tmp_id, fatal=False) or '' - - if not tmp_id: - video_id = self._html_search_regex( - r'src="https?://videocampus\.sachsen\.de/media/embed\?key=([0-9a-f]+)&', - webpage, 'video_id') - - title = self._html_search_regex( - (r'<h1>(?P<content>[^<]+)</h1>', *self._meta_regex('title')), - webpage, 'title', group='content', fatal=False) - - formats, subtitles = self._extract_m3u8_formats_and_subtitles( - f'https://videocampus.sachsen.de/media/hlsMedium/key/{video_id}/format/auto/ext/mp4/learning/0/path/m3u8', - video_id, 'mp4', 'm3u8_native', m3u8_id='hls') - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'display_id': display_id, - 'formats': formats, - 'subtitles': subtitles - } - - -class VideocampusSachsenEmbedIE(InfoExtractor): - _VALID_URL = r'https?://videocampus.sachsen.de/media/embed\?key=(?P<id>[0-9a-f]+)' - - _TESTS = [ + { + 'url': 'https://www2.univ-sba.dz/video/Presentation-de-la-Faculte-de-droit-et-des-sciences-politiques-Journee-portes-ouvertes-202122/0183356e41af7bfb83d7667b20d9b6a3', + 'info_dict': { + 'url': 'https://www2.univ-sba.dz/getMedium/0183356e41af7bfb83d7667b20d9b6a3.mp4', + 'id': '0183356e41af7bfb83d7667b20d9b6a3', + 'title': 'Présentation de la Faculté de droit et des sciences politiques - Journée portes ouvertes 2021/22', + 'description': 'md5:508958bd93e0ca002ac731d94182a54f', + 'display_id': 'Presentation-de-la-Faculte-de-droit-et-des-sciences-politiques-Journee-portes-ouvertes-202122', + 'ext': 'mp4', + } + }, + { + 'url': 'https://vimp.weka-fachmedien.de/video/Preisverleihung-Produkte-des-Jahres-2022/c8816f1cc942c12b6cce57c835cffd7c', + 'info_dict': { + 'id': 'c8816f1cc942c12b6cce57c835cffd7c', + 'title': 'Preisverleihung »Produkte des Jahres 2022«', + 'description': 'md5:60c347568ca89aa25b772c4ea564ebd3', + 'display_id': 'Preisverleihung-Produkte-des-Jahres-2022', + 'ext': 'mp4', + }, + }, { 'url': 'https://videocampus.sachsen.de/media/embed?key=fc99c527e4205b121cb7c74433469262', 'info_dict': { @@ -79,18 +128,41 @@ class VideocampusSachsenEmbedIE(InfoExtractor): ] def _real_extract(self, url): - video_id = self._match_id(url) + host, video_id, tmp_id, display_id, embed_id = self._match_valid_url(url).group( + 'host', 'id', 'tmp_id', 'display_id', 'embed_id') + webpage = self._download_webpage(url, video_id or tmp_id, fatal=False) or '' + + if not video_id: + video_id = embed_id or self._html_search_regex( + rf'src="https?://{host}/media/embed.*(?:\?|&)key=([0-9a-f]+)&?', + webpage, 'video_id') - webpage = self._download_webpage(url, video_id) - title = self._html_search_regex(r'<img[^>]*title="([^"<]+)"', webpage, 'title', fatal=False) - formats, subtitles = self._extract_m3u8_formats_and_subtitles( - f'https://videocampus.sachsen.de/media/hlsMedium/key/{video_id}/format/auto/ext/mp4/learning/0/path/m3u8', - video_id, 'mp4', 'm3u8_native', m3u8_id='hls') + if not (display_id or tmp_id): + # Title, description from embedded page's meta wouldn't be correct + title = self._html_search_regex(r'<img[^>]* title="([^"<]+)"', webpage, 'title', fatal=False) + description = None + else: + title = self._html_search_meta(('og:title', 'twitter:title', 'title'), webpage, fatal=False) + description = self._html_search_meta( + ('og:description', 'twitter:description', 'description'), webpage, default=None) + + formats, subtitles = [], {} + try: + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + f'https://{host}/media/hlsMedium/key/{video_id}/format/auto/ext/mp4/learning/0/path/m3u8', + video_id, 'mp4', m3u8_id='hls', fatal=True) + except ExtractorError as e: + if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (404, 500): + raise + + formats.append({'url': f'https://{host}/getMedium/{video_id}.mp4'}) self._sort_formats(formats) return { 'id': video_id, 'title': title, + 'description': description, + 'display_id': display_id, 'formats': formats, - 'subtitles': subtitles, + 'subtitles': subtitles } diff --git a/yt_dlp/extractor/videodetective.py b/yt_dlp/extractor/videodetective.py index fe70db713..7928a41c2 100644 --- a/yt_dlp/extractor/videodetective.py +++ b/yt_dlp/extractor/videodetective.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from .internetvideoarchive import InternetVideoArchiveIE diff --git a/yt_dlp/extractor/videofyme.py b/yt_dlp/extractor/videofyme.py index cd3f50a63..1d1c8f7b7 100644 --- a/yt_dlp/extractor/videofyme.py +++ b/yt_dlp/extractor/videofyme.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/videomore.py b/yt_dlp/extractor/videomore.py index 17ef3b1b9..09d12d192 100644 --- a/yt_dlp/extractor/videomore.py +++ b/yt_dlp/extractor/videomore.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/videopress.py b/yt_dlp/extractor/videopress.py index 6376ff096..3c5e27a9d 100644 --- a/yt_dlp/extractor/videopress.py +++ b/yt_dlp/extractor/videopress.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/vidio.py b/yt_dlp/extractor/vidio.py index 6bfb8d442..599996bf9 100644 --- a/yt_dlp/extractor/vidio.py +++ b/yt_dlp/extractor/vidio.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/vidlii.py b/yt_dlp/extractor/vidlii.py index a63919ff2..b9845affd 100644 --- a/yt_dlp/extractor/vidlii.py +++ b/yt_dlp/extractor/vidlii.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/vidzi.py b/yt_dlp/extractor/vidzi.py deleted file mode 100644 index 42ea4952c..000000000 --- a/yt_dlp/extractor/vidzi.py +++ /dev/null @@ -1,68 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - decode_packed_codes, - js_to_json, - NO_DEFAULT, - PACKED_CODES_RE, -) - - -class VidziIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?vidzi\.(?:tv|cc|si|nu)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)' - _TESTS = [{ - 'url': 'http://vidzi.tv/cghql9yq6emu.html', - 'md5': '4f16c71ca0c8c8635ab6932b5f3f1660', - 'info_dict': { - 'id': 'cghql9yq6emu', - 'ext': 'mp4', - 'title': 'youtube-dl test video 1\\\\2\'3/4<5\\\\6ä7↭', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { - 'url': 'http://vidzi.tv/embed-4z2yb0rzphe9-600x338.html', - 'only_matching': True, - }, { - 'url': 'http://vidzi.cc/cghql9yq6emu.html', - 'only_matching': True, - }, { - 'url': 'https://vidzi.si/rph9gztxj1et.html', - 'only_matching': True, - }, { - 'url': 'http://vidzi.nu/cghql9yq6emu.html', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage( - 'http://vidzi.tv/%s' % video_id, video_id) - title = self._html_search_regex( - r'(?s)<h2 class="video-title">(.*?)</h2>', webpage, 'title') - - codes = [webpage] - codes.extend([ - decode_packed_codes(mobj.group(0)).replace('\\\'', '\'') - for mobj in re.finditer(PACKED_CODES_RE, webpage)]) - for num, code in enumerate(codes, 1): - jwplayer_data = self._parse_json( - self._search_regex( - r'setup\(([^)]+)\)', code, 'jwplayer data', - default=NO_DEFAULT if num == len(codes) else '{}'), - video_id, transform_source=lambda s: js_to_json( - re.sub(r'\s*\+\s*window\[.+?\]', '', s))) - if jwplayer_data: - break - - info_dict = self._parse_jwplayer_data(jwplayer_data, video_id, require_title=False) - info_dict['title'] = title - - return info_dict diff --git a/yt_dlp/extractor/vier.py b/yt_dlp/extractor/vier.py index 94aa350e7..eab894ab6 100644 --- a/yt_dlp/extractor/vier.py +++ b/yt_dlp/extractor/vier.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re import itertools diff --git a/yt_dlp/extractor/viewlift.py b/yt_dlp/extractor/viewlift.py index 4627f66fd..d081a2f12 100644 --- a/yt_dlp/extractor/viewlift.py +++ b/yt_dlp/extractor/viewlift.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json import re diff --git a/yt_dlp/extractor/viidea.py b/yt_dlp/extractor/viidea.py index 0da06818b..157ce4d8f 100644 --- a/yt_dlp/extractor/viidea.py +++ b/yt_dlp/extractor/viidea.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/viki.py b/yt_dlp/extractor/viki.py index 8a930798d..a922b195c 100644 --- a/yt_dlp/extractor/viki.py +++ b/yt_dlp/extractor/viki.py @@ -1,5 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals import hashlib import hmac import json diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py index 972fb480b..59c5353ab 100644 --- a/yt_dlp/extractor/vimeo.py +++ b/yt_dlp/extractor/vimeo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import base64 import functools import re @@ -8,7 +5,6 @@ import itertools from .common import InfoExtractor from ..compat import ( - compat_kwargs, compat_HTTPError, compat_str, compat_urlparse, @@ -109,7 +105,7 @@ class VimeoBaseInfoExtractor(InfoExtractor): def _extract_vimeo_config(self, webpage, video_id, *args, **kwargs): vimeo_config = self._search_regex( r'vimeo\.config\s*=\s*(?:({.+?})|_extend\([^,]+,\s+({.+?})\));', - webpage, 'vimeo config', *args, **compat_kwargs(kwargs)) + webpage, 'vimeo config', *args, **kwargs) if vimeo_config: return self._parse_json(vimeo_config, video_id) @@ -123,7 +119,7 @@ class VimeoBaseInfoExtractor(InfoExtractor): def _parse_config(self, config, video_id): video_data = config['video'] - video_title = video_data['title'] + video_title = video_data.get('title') live_event = video_data.get('live_event') or {} is_live = live_event.get('status') == 'started' request = config.get('request') or {} @@ -1337,7 +1333,7 @@ class VimeoReviewIE(VimeoBaseInfoExtractor): class VimeoWatchLaterIE(VimeoChannelIE): IE_NAME = 'vimeo:watchlater' - IE_DESC = 'Vimeo watch later list, "vimeowatchlater" keyword (requires authentication)' + IE_DESC = 'Vimeo watch later list, ":vimeowatchlater" keyword (requires authentication)' _VALID_URL = r'https://vimeo\.com/(?:home/)?watchlater|:vimeowatchlater' _TITLE = 'Watch Later' _LOGIN_REQUIRED = True diff --git a/yt_dlp/extractor/vimm.py b/yt_dlp/extractor/vimm.py index 060b92ba6..3522b8e33 100644 --- a/yt_dlp/extractor/vimm.py +++ b/yt_dlp/extractor/vimm.py @@ -1,4 +1,3 @@ -# coding: utf-8 from .common import InfoExtractor diff --git a/yt_dlp/extractor/vimple.py b/yt_dlp/extractor/vimple.py index c74b43766..a8b16dd29 100644 --- a/yt_dlp/extractor/vimple.py +++ b/yt_dlp/extractor/vimple.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import int_or_none diff --git a/yt_dlp/extractor/vine.py b/yt_dlp/extractor/vine.py index e59b1037b..bbf43a83f 100644 --- a/yt_dlp/extractor/vine.py +++ b/yt_dlp/extractor/vine.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/viqeo.py b/yt_dlp/extractor/viqeo.py index be7dfa814..d214223e9 100644 --- a/yt_dlp/extractor/viqeo.py +++ b/yt_dlp/extractor/viqeo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/viu.py b/yt_dlp/extractor/viu.py index ba627ca5b..63b6fd3a1 100644 --- a/yt_dlp/extractor/viu.py +++ b/yt_dlp/extractor/viu.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re import json import uuid diff --git a/yt_dlp/extractor/vk.py b/yt_dlp/extractor/vk.py index cbc315961..3b105e6c0 100644 --- a/yt_dlp/extractor/vk.py +++ b/yt_dlp/extractor/vk.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import collections import re @@ -593,7 +590,6 @@ class VKWallPostIE(VKBaseIE): }], 'params': { 'skip_download': True, - 'usenetrc': True, }, 'skip': 'Requires vk account credentials', }, { @@ -604,9 +600,6 @@ class VKWallPostIE(VKBaseIE): 'title': 'Сергей Горбунов - Wall post 85155021_6319', }, 'playlist_count': 1, - 'params': { - 'usenetrc': True, - }, 'skip': 'Requires vk account credentials', }, { # wall page URL diff --git a/yt_dlp/extractor/vlive.py b/yt_dlp/extractor/vlive.py index ae35c976c..c60801417 100644 --- a/yt_dlp/extractor/vlive.py +++ b/yt_dlp/extractor/vlive.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools import json diff --git a/yt_dlp/extractor/vodlocker.py b/yt_dlp/extractor/vodlocker.py index 02c9617d2..1c7236ed3 100644 --- a/yt_dlp/extractor/vodlocker.py +++ b/yt_dlp/extractor/vodlocker.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/vodpl.py b/yt_dlp/extractor/vodpl.py index 9e919708e..8af1572d0 100644 --- a/yt_dlp/extractor/vodpl.py +++ b/yt_dlp/extractor/vodpl.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .onet import OnetBaseIE diff --git a/yt_dlp/extractor/vodplatform.py b/yt_dlp/extractor/vodplatform.py index 74d2257e7..2b45dcd86 100644 --- a/yt_dlp/extractor/vodplatform.py +++ b/yt_dlp/extractor/vodplatform.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import unescapeHTML diff --git a/yt_dlp/extractor/voicerepublic.py b/yt_dlp/extractor/voicerepublic.py index a52e40afa..e8cbd0e32 100644 --- a/yt_dlp/extractor/voicerepublic.py +++ b/yt_dlp/extractor/voicerepublic.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/voicy.py b/yt_dlp/extractor/voicy.py index 37c7d5685..e4570a03a 100644 --- a/yt_dlp/extractor/voicy.py +++ b/yt_dlp/extractor/voicy.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/voot.py b/yt_dlp/extractor/voot.py index a9b66b95c..7ac38a813 100644 --- a/yt_dlp/extractor/voot.py +++ b/yt_dlp/extractor/voot.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/voxmedia.py b/yt_dlp/extractor/voxmedia.py index 661208125..a7bf298aa 100644 --- a/yt_dlp/extractor/voxmedia.py +++ b/yt_dlp/extractor/voxmedia.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from .once import OnceIE from ..compat import compat_urllib_parse_unquote diff --git a/yt_dlp/extractor/vrak.py b/yt_dlp/extractor/vrak.py index daa247cce..198c0a294 100644 --- a/yt_dlp/extractor/vrak.py +++ b/yt_dlp/extractor/vrak.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/vrt.py b/yt_dlp/extractor/vrt.py index 10dc94abc..26f48bf67 100644 --- a/yt_dlp/extractor/vrt.py +++ b/yt_dlp/extractor/vrt.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( extract_attributes, diff --git a/yt_dlp/extractor/vrv.py b/yt_dlp/extractor/vrv.py index 00e1006c4..35662753e 100644 --- a/yt_dlp/extractor/vrv.py +++ b/yt_dlp/extractor/vrv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import base64 import json import hashlib diff --git a/yt_dlp/extractor/vshare.py b/yt_dlp/extractor/vshare.py index b4874ac39..8ef75d30e 100644 --- a/yt_dlp/extractor/vshare.py +++ b/yt_dlp/extractor/vshare.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/vtm.py b/yt_dlp/extractor/vtm.py index 093f1aa69..6381fd311 100644 --- a/yt_dlp/extractor/vtm.py +++ b/yt_dlp/extractor/vtm.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/vuclip.py b/yt_dlp/extractor/vuclip.py index 55e087bdb..0e562983d 100644 --- a/yt_dlp/extractor/vuclip.py +++ b/yt_dlp/extractor/vuclip.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/vupload.py b/yt_dlp/extractor/vupload.py index b561f63f7..23ea70c77 100644 --- a/yt_dlp/extractor/vupload.py +++ b/yt_dlp/extractor/vupload.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( parse_duration, diff --git a/yt_dlp/extractor/vvvvid.py b/yt_dlp/extractor/vvvvid.py index 3faa90fbd..ccc44d08a 100644 --- a/yt_dlp/extractor/vvvvid.py +++ b/yt_dlp/extractor/vvvvid.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/vyborymos.py b/yt_dlp/extractor/vyborymos.py index 4d93666c5..386518795 100644 --- a/yt_dlp/extractor/vyborymos.py +++ b/yt_dlp/extractor/vyborymos.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str diff --git a/yt_dlp/extractor/vzaar.py b/yt_dlp/extractor/vzaar.py index 54f88bba8..7ce0ba9f5 100644 --- a/yt_dlp/extractor/vzaar.py +++ b/yt_dlp/extractor/vzaar.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/wakanim.py b/yt_dlp/extractor/wakanim.py index a70a71961..155008f8c 100644 --- a/yt_dlp/extractor/wakanim.py +++ b/yt_dlp/extractor/wakanim.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from urllib.parse import unquote from .common import InfoExtractor diff --git a/yt_dlp/extractor/walla.py b/yt_dlp/extractor/walla.py index 00f081bca..6b954c5cc 100644 --- a/yt_dlp/extractor/walla.py +++ b/yt_dlp/extractor/walla.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/wasdtv.py b/yt_dlp/extractor/wasdtv.py index 38c10dc62..bf1ad65b2 100644 --- a/yt_dlp/extractor/wasdtv.py +++ b/yt_dlp/extractor/wasdtv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/washingtonpost.py b/yt_dlp/extractor/washingtonpost.py index 9d6ae2870..7274eaa39 100644 --- a/yt_dlp/extractor/washingtonpost.py +++ b/yt_dlp/extractor/washingtonpost.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/wat.py b/yt_dlp/extractor/wat.py index 9ff4523db..e6a89adf6 100644 --- a/yt_dlp/extractor/wat.py +++ b/yt_dlp/extractor/wat.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( @@ -57,7 +54,7 @@ class WatIE(InfoExtractor): # 'http://www.wat.tv/interface/contentv4s/' + video_id, video_id) video_data = self._download_json( 'https://mediainfo.tf1.fr/mediainfocombo/' + video_id, - video_id, query={'context': 'MYTF1'}) + video_id, query={'context': 'MYTF1', 'pver': '4020003'}) video_info = video_data['media'] error_desc = video_info.get('error_desc') diff --git a/yt_dlp/extractor/watchbox.py b/yt_dlp/extractor/watchbox.py index d19d80102..e41148d4a 100644 --- a/yt_dlp/extractor/watchbox.py +++ b/yt_dlp/extractor/watchbox.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/watchindianporn.py b/yt_dlp/extractor/watchindianporn.py index a86819173..3ded2d1d4 100644 --- a/yt_dlp/extractor/watchindianporn.py +++ b/yt_dlp/extractor/watchindianporn.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/wdr.py b/yt_dlp/extractor/wdr.py index ef58a66c3..d0ad69477 100644 --- a/yt_dlp/extractor/wdr.py +++ b/yt_dlp/extractor/wdr.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/webcaster.py b/yt_dlp/extractor/webcaster.py index a858e992c..374fe35cd 100644 --- a/yt_dlp/extractor/webcaster.py +++ b/yt_dlp/extractor/webcaster.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/webofstories.py b/yt_dlp/extractor/webofstories.py index f2b8d19b4..fde9300b0 100644 --- a/yt_dlp/extractor/webofstories.py +++ b/yt_dlp/extractor/webofstories.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/weibo.py b/yt_dlp/extractor/weibo.py index dafa2af3b..d5a52ce20 100644 --- a/yt_dlp/extractor/weibo.py +++ b/yt_dlp/extractor/weibo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor import json diff --git a/yt_dlp/extractor/weiqitv.py b/yt_dlp/extractor/weiqitv.py index 7e0befd39..c9ff64154 100644 --- a/yt_dlp/extractor/weiqitv.py +++ b/yt_dlp/extractor/weiqitv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/whowatch.py b/yt_dlp/extractor/whowatch.py index e4b610d00..21574471c 100644 --- a/yt_dlp/extractor/whowatch.py +++ b/yt_dlp/extractor/whowatch.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/willow.py b/yt_dlp/extractor/willow.py index 4d3d62f95..6c71e9a04 100644 --- a/yt_dlp/extractor/willow.py +++ b/yt_dlp/extractor/willow.py @@ -1,4 +1,3 @@ -# coding: utf-8 from ..utils import ExtractorError from .common import InfoExtractor diff --git a/yt_dlp/extractor/wimtv.py b/yt_dlp/extractor/wimtv.py index ea953bf77..263844d72 100644 --- a/yt_dlp/extractor/wimtv.py +++ b/yt_dlp/extractor/wimtv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor @@ -15,14 +12,14 @@ from ..utils import ( class WimTVIE(InfoExtractor): _player = None _UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}' - _VALID_URL = r'''(?x) + _VALID_URL = r'''(?x: https?://platform.wim.tv/ (?: (?:embed/)?\? |\#/webtv/.+?/ ) (?P<type>vod|live|cast)[=/] - (?P<id>%s).*?''' % _UUID_RE + (?P<id>%s).*?)''' % _UUID_RE _TESTS = [{ # vod stream 'url': 'https://platform.wim.tv/embed/?vod=db29fb32-bade-47b6-a3a6-cb69fe80267a', diff --git a/yt_dlp/extractor/wistia.py b/yt_dlp/extractor/wistia.py index a170966c3..3cbcb4aa0 100644 --- a/yt_dlp/extractor/wistia.py +++ b/yt_dlp/extractor/wistia.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor @@ -14,7 +12,7 @@ from ..utils import ( class WistiaBaseIE(InfoExtractor): _VALID_ID_REGEX = r'(?P<id>[a-z0-9]{10})' - _VALID_URL_BASE = r'https?://(?:fast\.)?wistia\.(?:net|com)/embed/' + _VALID_URL_BASE = r'https?://(?:\w+\.)?wistia\.(?:net|com)/(?:embed/)?' _EMBED_BASE_URL = 'http://fast.wistia.com/embed/' def _download_embed_config(self, config_type, config_id, referer): @@ -175,7 +173,7 @@ class WistiaIE(WistiaBaseIE): class WistiaPlaylistIE(WistiaBaseIE): - _VALID_URL = r'%splaylists/%s' % (WistiaIE._VALID_URL_BASE, WistiaIE._VALID_ID_REGEX) + _VALID_URL = r'%splaylists/%s' % (WistiaBaseIE._VALID_URL_BASE, WistiaBaseIE._VALID_ID_REGEX) _TEST = { 'url': 'https://fast.wistia.net/embed/playlists/aodt9etokc', diff --git a/yt_dlp/extractor/worldstarhiphop.py b/yt_dlp/extractor/worldstarhiphop.py index 82587b4ce..c6948a1eb 100644 --- a/yt_dlp/extractor/worldstarhiphop.py +++ b/yt_dlp/extractor/worldstarhiphop.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/wppilot.py b/yt_dlp/extractor/wppilot.py index 3003a0f10..6349e5326 100644 --- a/yt_dlp/extractor/wppilot.py +++ b/yt_dlp/extractor/wppilot.py @@ -1,5 +1,3 @@ -# coding: utf-8 - from .common import InfoExtractor from ..utils import ( try_get, diff --git a/yt_dlp/extractor/wsj.py b/yt_dlp/extractor/wsj.py index 67236f377..8be3645e3 100644 --- a/yt_dlp/extractor/wsj.py +++ b/yt_dlp/extractor/wsj.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/wwe.py b/yt_dlp/extractor/wwe.py index bebc77bb5..9bbd477c3 100644 --- a/yt_dlp/extractor/wwe.py +++ b/yt_dlp/extractor/wwe.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/xbef.py b/yt_dlp/extractor/xbef.py index 4c41e98b2..ac69528a3 100644 --- a/yt_dlp/extractor/xbef.py +++ b/yt_dlp/extractor/xbef.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_urllib_parse_unquote diff --git a/yt_dlp/extractor/xboxclips.py b/yt_dlp/extractor/xboxclips.py index 9bac982f8..235b567d9 100644 --- a/yt_dlp/extractor/xboxclips.py +++ b/yt_dlp/extractor/xboxclips.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/xfileshare.py b/yt_dlp/extractor/xfileshare.py index cd97c77dc..28b6ecb6e 100644 --- a/yt_dlp/extractor/xfileshare.py +++ b/yt_dlp/extractor/xfileshare.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/xhamster.py b/yt_dlp/extractor/xhamster.py index 9d4ed47d4..ff15d3707 100644 --- a/yt_dlp/extractor/xhamster.py +++ b/yt_dlp/extractor/xhamster.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import itertools import re diff --git a/yt_dlp/extractor/xiami.py b/yt_dlp/extractor/xiami.py index 769aab331..71b2956a8 100644 --- a/yt_dlp/extractor/xiami.py +++ b/yt_dlp/extractor/xiami.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_urllib_parse_unquote from ..utils import int_or_none diff --git a/yt_dlp/extractor/ximalaya.py b/yt_dlp/extractor/ximalaya.py index 802d1bb1b..c3447fba0 100644 --- a/yt_dlp/extractor/ximalaya.py +++ b/yt_dlp/extractor/ximalaya.py @@ -1,7 +1,3 @@ -# coding: utf-8 - -from __future__ import unicode_literals - import itertools import re diff --git a/yt_dlp/extractor/xinpianchang.py b/yt_dlp/extractor/xinpianchang.py index 9832d2398..96e23bb8d 100644 --- a/yt_dlp/extractor/xinpianchang.py +++ b/yt_dlp/extractor/xinpianchang.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/xminus.py b/yt_dlp/extractor/xminus.py index 36e5ead1e..5f113810f 100644 --- a/yt_dlp/extractor/xminus.py +++ b/yt_dlp/extractor/xminus.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re import time diff --git a/yt_dlp/extractor/xnxx.py b/yt_dlp/extractor/xnxx.py index 27f991627..14beb1347 100644 --- a/yt_dlp/extractor/xnxx.py +++ b/yt_dlp/extractor/xnxx.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/xstream.py b/yt_dlp/extractor/xstream.py index 792843df5..42bffb071 100644 --- a/yt_dlp/extractor/xstream.py +++ b/yt_dlp/extractor/xstream.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/xtube.py b/yt_dlp/extractor/xtube.py index abd319188..93a6a3f33 100644 --- a/yt_dlp/extractor/xtube.py +++ b/yt_dlp/extractor/xtube.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import itertools import re diff --git a/yt_dlp/extractor/xuite.py b/yt_dlp/extractor/xuite.py index 0276c0dbb..52423a327 100644 --- a/yt_dlp/extractor/xuite.py +++ b/yt_dlp/extractor/xuite.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/xvideos.py b/yt_dlp/extractor/xvideos.py index d5261b6ab..50b939496 100644 --- a/yt_dlp/extractor/xvideos.py +++ b/yt_dlp/extractor/xvideos.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/xxxymovies.py b/yt_dlp/extractor/xxxymovies.py index 0d536015c..e3e3a9fe6 100644 --- a/yt_dlp/extractor/xxxymovies.py +++ b/yt_dlp/extractor/xxxymovies.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( parse_duration, diff --git a/yt_dlp/extractor/yahoo.py b/yt_dlp/extractor/yahoo.py index 20504de2c..3fe6192bf 100644 --- a/yt_dlp/extractor/yahoo.py +++ b/yt_dlp/extractor/yahoo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import hashlib import itertools import re diff --git a/yt_dlp/extractor/yandexdisk.py b/yt_dlp/extractor/yandexdisk.py index c15f3a4f3..d87a7f9be 100644 --- a/yt_dlp/extractor/yandexdisk.py +++ b/yt_dlp/extractor/yandexdisk.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/yandexmusic.py b/yt_dlp/extractor/yandexmusic.py index a3558cc12..8ea416a1d 100644 --- a/yt_dlp/extractor/yandexmusic.py +++ b/yt_dlp/extractor/yandexmusic.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import hashlib import itertools diff --git a/yt_dlp/extractor/yandexvideo.py b/yt_dlp/extractor/yandexvideo.py index 7d3966bf1..37ff514b3 100644 --- a/yt_dlp/extractor/yandexvideo.py +++ b/yt_dlp/extractor/yandexvideo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools import re diff --git a/yt_dlp/extractor/yapfiles.py b/yt_dlp/extractor/yapfiles.py index cfb368de9..8fabdf81c 100644 --- a/yt_dlp/extractor/yapfiles.py +++ b/yt_dlp/extractor/yapfiles.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/yesjapan.py b/yt_dlp/extractor/yesjapan.py index 681338c96..b45fa8f14 100644 --- a/yt_dlp/extractor/yesjapan.py +++ b/yt_dlp/extractor/yesjapan.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( HEADRequest, diff --git a/yt_dlp/extractor/yinyuetai.py b/yt_dlp/extractor/yinyuetai.py index 1fd8d35c6..b28c39380 100644 --- a/yt_dlp/extractor/yinyuetai.py +++ b/yt_dlp/extractor/yinyuetai.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ExtractorError diff --git a/yt_dlp/extractor/ynet.py b/yt_dlp/extractor/ynet.py index c4ae4d88e..444785947 100644 --- a/yt_dlp/extractor/ynet.py +++ b/yt_dlp/extractor/ynet.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re import json diff --git a/yt_dlp/extractor/youjizz.py b/yt_dlp/extractor/youjizz.py index 111623ffe..cd12be500 100644 --- a/yt_dlp/extractor/youjizz.py +++ b/yt_dlp/extractor/youjizz.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/youku.py b/yt_dlp/extractor/youku.py index b50579915..45856fbbe 100644 --- a/yt_dlp/extractor/youku.py +++ b/yt_dlp/extractor/youku.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import random import re import string diff --git a/yt_dlp/extractor/younow.py b/yt_dlp/extractor/younow.py index 583aea38d..76d89f3ce 100644 --- a/yt_dlp/extractor/younow.py +++ b/yt_dlp/extractor/younow.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools from .common import InfoExtractor diff --git a/yt_dlp/extractor/youporn.py b/yt_dlp/extractor/youporn.py index 5feb568e7..5aea82295 100644 --- a/yt_dlp/extractor/youporn.py +++ b/yt_dlp/extractor/youporn.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/yourporn.py b/yt_dlp/extractor/yourporn.py index 98347491e..38f42a991 100644 --- a/yt_dlp/extractor/yourporn.py +++ b/yt_dlp/extractor/yourporn.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/yourupload.py b/yt_dlp/extractor/yourupload.py index 9fa772838..def63293a 100644 --- a/yt_dlp/extractor/yourupload.py +++ b/yt_dlp/extractor/yourupload.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import urljoin diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index f284487b8..5546aa9a3 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -1,7 +1,4 @@ -# coding: utf-8 - -from __future__ import unicode_literals - +import base64 import calendar import copy import datetime @@ -14,9 +11,9 @@ import os.path import random import re import sys +import threading import time import traceback -import threading from .common import InfoExtractor, SearchInfoExtractor from ..compat import ( @@ -31,12 +28,14 @@ from ..compat import ( ) from ..jsinterp import JSInterpreter from ..utils import ( + NO_DEFAULT, + ExtractorError, bug_reports_message, + classproperty, clean_html, datetime_from_str, dict_get, error_to_compat_str, - ExtractorError, float_or_none, format_field, get_first, @@ -46,7 +45,6 @@ from ..utils import ( js_to_json, mimetype2ext, network_exceptions, - NO_DEFAULT, orderedSet, parse_codecs, parse_count, @@ -72,7 +70,6 @@ from ..utils import ( variadic, ) - # any clients starting with _ cannot be explicity requested by the user INNERTUBE_CLIENTS = { 'web': { @@ -292,7 +289,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): # invidious-redirect websites r'(?:www\.)?redirect\.invidious\.io', r'(?:(?:www|dev)\.)?invidio\.us', - # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md + # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md r'(?:www\.)?invidious\.pussthecat\.org', r'(?:www\.)?invidious\.zee\.li', r'(?:www\.)?invidious\.ethibox\.fr', @@ -352,6 +349,16 @@ class YoutubeBaseInfoExtractor(InfoExtractor): r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion', r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion', r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion', + # piped instances from https://github.com/TeamPiped/Piped/wiki/Instances + r'(?:www\.)?piped\.kavin\.rocks', + r'(?:www\.)?piped\.silkky\.cloud', + r'(?:www\.)?piped\.tokhmi\.xyz', + r'(?:www\.)?piped\.moomoo\.me', + r'(?:www\.)?il\.ax', + r'(?:www\.)?piped\.syncpundit\.com', + r'(?:www\.)?piped\.mha\.fi', + r'(?:www\.)?piped\.mint\.lgbt', + r'(?:www\.)?piped\.privacy\.com\.de', ) def _initialize_consent(self): @@ -387,9 +394,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): self._check_login_required() def _check_login_required(self): - if (self._LOGIN_REQUIRED - and self.get_param('cookiefile') is None - and self.get_param('cookiesfrombrowser') is None): + if self._LOGIN_REQUIRED and not self._cookies_passed: self.raise_login_required('Login details are needed to download this content', method='cookies') _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;' @@ -452,7 +457,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): return None # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323 sapisidhash = hashlib.sha1( - f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest() + f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest() return f'SAPISIDHASH {time_now}_{sapisidhash}' def _call_api(self, ep, query, video_id, fatal=True, headers=None, @@ -466,14 +471,14 @@ class YoutubeBaseInfoExtractor(InfoExtractor): if headers: real_headers.update(headers) return self._download_json( - 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep), + f'https://{api_hostname or self._get_innertube_host(default_client)}/youtubei/v1/{ep}', video_id=video_id, fatal=fatal, note=note, errnote=errnote, data=json.dumps(data).encode('utf8'), headers=real_headers, query={'key': api_key or self._extract_api_key(), 'prettyPrint': 'false'}) def extract_yt_initial_data(self, item_id, webpage, fatal=True): data = self._search_regex( - (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE), + (fr'{self._YT_INITIAL_DATA_RE}\s*{self._YT_INITIAL_BOUNDARY_RE}', self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal) if data: return self._parse_json(data, item_id, fatal=fatal) @@ -657,7 +662,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): warnings.append([alert_type, alert_message]) for alert_type, alert_message in (warnings + errors[:-1]): - self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once) + self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once) if errors: raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected) @@ -2204,7 +2209,33 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'description': 'md5:2ef1d002cad520f65825346e2084e49d', }, 'params': {'skip_download': True} - }, + }, { + # Story. Requires specific player params to work. + # Note: stories get removed after some period of time + 'url': 'https://www.youtube.com/watch?v=yN3x1t3sieA', + 'info_dict': { + 'id': 'yN3x1t3sieA', + 'ext': 'mp4', + 'uploader': 'Linus Tech Tips', + 'duration': 13, + 'channel': 'Linus Tech Tips', + 'playable_in_embed': True, + 'tags': [], + 'age_limit': 0, + 'uploader_url': 'http://www.youtube.com/user/LinusTechTips', + 'upload_date': '20220402', + 'thumbnail': 'https://i.ytimg.com/vi_webp/yN3x1t3sieA/maxresdefault.webp', + 'title': 'Story', + 'live_status': 'not_live', + 'uploader_id': 'LinusTechTips', + 'view_count': int, + 'description': '', + 'channel_id': 'UCXuqSBlHAE6Xw-yeJA0Tunw', + 'categories': ['Science & Technology'], + 'channel_url': 'https://www.youtube.com/channel/UCXuqSBlHAE6Xw-yeJA0Tunw', + 'availability': 'unlisted', + } + } ] @classmethod @@ -2214,10 +2245,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): qs = parse_qs(url) if qs.get('list', [None])[0]: return False - return super(YoutubeIE, cls).suitable(url) + return super().suitable(url) def __init__(self, *args, **kwargs): - super(YoutubeIE, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) self._code_cache = {} self._player_cache = {} @@ -2413,8 +2444,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): player_id = self._extract_player_info(player_url) # Read from filesystem cache - func_id = 'js_%s_%s' % ( - player_id, self._signature_cache_id(example_sig)) + func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}' assert os.path.basename(func_id) == func_id cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id) @@ -2441,7 +2471,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): starts = '' if start == 0 else str(start) ends = (':%d' % (end + step)) if end + step >= 0 else ':' steps = '' if step == 1 else (':%d' % step) - return 's[%s%s%s]' % (starts, ends, steps) + return f's[{starts}{ends}{steps}]' step = None # Quelch pyflakes warnings - start will be set when step is set @@ -2603,7 +2633,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # cpn generation algorithm is reverse engineered from base.js. # In fact it works even with dummy cpn. CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_' - cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))) + cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)) qs.update({ 'ver': ['2'], @@ -2714,7 +2744,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _extract_yt_initial_variable(self, webpage, regex, video_id, name): return self._parse_json(self._search_regex( - (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE), + (fr'{regex}\s*{self._YT_INITIAL_BOUNDARY_RE}', regex), webpage, name, default='{}'), video_id, fatal=False) def _extract_comment(self, comment_renderer, parent=None): @@ -2812,8 +2842,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): comment_entries_iter = self._comment_entries( comment_replies_renderer, ytcfg, video_id, parent=comment.get('id'), tracker=tracker) - for reply_comment in itertools.islice(comment_entries_iter, min(max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments']))): - yield reply_comment + yield from itertools.islice(comment_entries_iter, min( + max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments']))) # Keeps track of counts across recursive calls if not tracker: @@ -2837,12 +2867,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor): lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4) continuation = self._extract_continuation(root_continuation_data) - message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1) - if message and not parent: - self.report_warning(message, video_id=video_id) response = None + is_forced_continuation = False is_first_continuation = parent is None + if is_first_continuation and not continuation: + # Sometimes you can get comments by generating the continuation yourself, + # even if YouTube initially reports them being disabled - e.g. stories comments. + # Note: if the comment section is actually disabled, YouTube may return a response with + # required check_get_keys missing. So we will disable that check initially in this case. + continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id)) + is_forced_continuation = True for page_num in itertools.count(0): if not continuation: @@ -2863,8 +2898,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): response = self._extract_response( item_id=None, query=continuation, ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix, - check_get_keys='onResponseReceivedEndpoints') - + check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None) + is_forced_continuation = False continuation_contents = traverse_obj( response, 'onResponseReceivedEndpoints', expected_type=list, default=[]) @@ -2889,6 +2924,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if continuation: break + message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1) + if message and not parent and tracker['running_total'] == 0: + self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True) + + @staticmethod + def _generate_comment_continuation(video_id): + """ + Generates initial comment section continuation token from given video id + """ + token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section' + return base64.b64encode(token.encode()).decode() + def _get_comments(self, ytcfg, video_id, contents, webpage): """Entry for comment extraction""" def _real_comment_extract(contents): @@ -2942,7 +2989,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): headers = self.generate_api_headers( ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client) - yt_query = {'videoId': video_id} + yt_query = { + 'videoId': video_id, + 'params': '8AEB' # enable stories + } yt_query.update(self._generate_player_context(sts)) return self._extract_response( item_id=video_id, ep='player', query=yt_query, @@ -2955,7 +3005,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): requested_clients = [] default = ['android', 'web'] allowed_clients = sorted( - [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'], + (client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'), key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True) for client in self._configuration_arg('player_client'): if client in allowed_clients: @@ -3113,7 +3163,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'n': self._decrypt_nsig(query['n'][0], video_id, player_url)}) except ExtractorError as e: self.report_warning( - f'nsig extraction failed: You may experience throttling for some formats\n' + 'nsig extraction failed: You may experience throttling for some formats\n' f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True) throttled = True @@ -3132,7 +3182,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # Eg: __2ABJjxzNo, ySuUZEjARPY is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500) if is_damaged: - self.report_warning(f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True) + self.report_warning( + f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True) dct = { 'asr': int_or_none(fmt.get('audioSampleRate')), 'filesize': int_or_none(fmt.get('contentLength')), @@ -3142,7 +3193,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): ' (default)' if language_preference > 0 else ''), fmt.get('qualityLabel') or quality.replace('audio_quality_', ''), throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '), - 'source_preference': -10 if throttled else -1, + # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372 + 'source_preference': -10 if throttled else -5 if itag == '22' else -1, 'fps': int_or_none(fmt.get('fps')) or None, 'height': height, 'quality': q(quality), @@ -3180,6 +3232,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): skip_manifests = self._configuration_arg('skip') if not self.get_param('youtube_include_hls_manifest', True): skip_manifests.append('hls') + if not self.get_param('youtube_include_dash_manifest', True): + skip_manifests.append('dash') get_dash = 'dash' not in skip_manifests and ( not is_live or live_from_start or self._configuration_arg('include_live_dash')) get_hls = not live_from_start and 'hls' not in skip_manifests @@ -3257,7 +3311,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): webpage = None if 'webpage' not in self._configuration_arg('player_skip'): webpage = self._download_webpage( - webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False) + webpage_url + '&bpctr=9999999999&has_verified=1&pp=8AEB', video_id, fatal=False) master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg() @@ -3408,13 +3462,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor): original_thumbnails = thumbnails.copy() # The best resolution thumbnails sometimes does not appear in the webpage - # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340 + # See: https://github.com/yt-dlp/yt-dlp/issues/340 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029> thumbnail_names = [ - 'maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3', - 'hqdefault', 'hq1', 'hq2', 'hq3', '0', - 'mqdefault', 'mq1', 'mq2', 'mq3', - 'default', '1', '2', '3' + # While the *1,*2,*3 thumbnails are just below their correspnding "*default" variants + # in resolution, these are not the custom thumbnail. So de-prioritize them + 'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default', + 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3' ] n_thumbnail_names = len(thumbnail_names) thumbnails.extend({ @@ -3592,17 +3646,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor): headers=self.generate_api_headers(ytcfg=master_ytcfg), note='Downloading initial data API JSON') - try: - # This will error if there is no livechat + try: # This will error if there is no livechat initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation'] + except (KeyError, IndexError, TypeError): + pass + else: info.setdefault('subtitles', {})['live_chat'] = [{ - 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies + 'url': f'https://www.youtube.com/watch?v={video_id}', # url is needed to set cookies 'video_id': video_id, 'ext': 'json', 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay', }] - except (KeyError, IndexError, TypeError): - pass if initial_data: info['chapters'] = ( @@ -3624,7 +3678,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN': info['location'] = stl else: - mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl) + mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl) if mobj: info.update({ 'series': mobj.group(1), @@ -3702,7 +3756,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): unified_strdate(get_first(microformats, 'uploadDate')) or unified_strdate(search_meta('uploadDate'))) if not upload_date or (not info.get('is_live') and not info.get('was_live') and info.get('live_status') != 'is_upcoming'): - upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d') + upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d') or upload_date info['upload_date'] = upload_date for to, frm in fallbacks.items(): @@ -3865,8 +3919,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): # TODO: add support for nested playlists so each shelf is processed # as separate playlist # TODO: this includes only first N items - for entry in self._grid_entries(renderer): - yield entry + yield from self._grid_entries(renderer) renderer = content.get('horizontalListRenderer') if renderer: # TODO @@ -3886,8 +3939,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): title = self._get_text(shelf_renderer, 'title') yield self.url_result(shelf_url, video_title=title) # Shelf may not contain shelf URL, fallback to extraction from content - for entry in self._shelf_entries_from_content(shelf_renderer): - yield entry + yield from self._shelf_entries_from_content(shelf_renderer) def _playlist_entries(self, video_list_renderer): for content in video_list_renderer['contents']: @@ -3965,8 +4017,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): renderer = content.get('backstagePostThreadRenderer') if not isinstance(renderer, dict): continue - for entry in self._post_thread_entries(renderer): - yield entry + yield from self._post_thread_entries(renderer) r''' # unused def _rich_grid_entries(self, contents): @@ -4036,8 +4087,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): parent_renderer = ( try_get(tab_content, lambda x: x['sectionListRenderer'], dict) or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {}) - for entry in extract_entries(parent_renderer): - yield entry + yield from extract_entries(parent_renderer) continuation = continuation_list[0] for page_num in itertools.count(1): @@ -4046,7 +4096,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): headers = self.generate_api_headers( ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data) response = self._extract_response( - item_id='%s page %s' % (item_id, page_num), + item_id=f'{item_id} page {page_num}', query=continuation, headers=headers, ytcfg=ytcfg, check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints')) @@ -4070,8 +4120,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): continue continuation_renderer = value continuation_list = [None] - for entry in known_continuation_renderers[key](continuation_renderer): - yield entry + yield from known_continuation_renderers[key](continuation_renderer) continuation = continuation_list[0] or self._extract_continuation(continuation_renderer) break if continuation_renderer: @@ -4097,8 +4146,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): continue video_items_renderer = {known_renderers[key][1]: continuation_items} continuation_list = [None] - for entry in known_renderers[key][0](video_items_renderer): - yield entry + yield from known_renderers[key][0](video_items_renderer) continuation = continuation_list[0] or self._extract_continuation(video_items_renderer) break if video_items_renderer: @@ -4223,7 +4271,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): self._extract_visitor_data(data, ytcfg)), **metadata) - def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg): + def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg): first_id = last_id = response = None for page_num in itertools.count(1): videos = list(self._playlist_entries(playlist)) @@ -4232,11 +4280,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1 if start >= len(videos): return - for video in videos[start:]: - if video['id'] == first_id: - self.to_screen('First video %s found again; Assuming end of Mix' % first_id) - return - yield video + yield from videos[start:] first_id = first_id or videos[0]['id'] last_id = videos[-1]['id'] watch_endpoint = try_get( @@ -4267,13 +4311,18 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): playlist_url = urljoin(url, try_get( playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'], compat_str)) - if playlist_url and playlist_url != url: + + # Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1] + # [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg + is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id) + + if playlist_url and playlist_url != url and not is_known_unviewable: return self.url_result( playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id, video_title=title) return self.playlist_result( - self._extract_mix_playlist(playlist, playlist_id, data, ytcfg), + self._extract_inline_playlist(playlist, playlist_id, data, ytcfg), playlist_id=playlist_id, playlist_title=title) def _extract_availability(self, data): @@ -4470,7 +4519,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): ('continuationContents', ), ) display_id = f'query "{query}"' - check_get_keys = tuple(set(keys[0] for keys in content_keys)) + check_get_keys = tuple({keys[0] for keys in content_keys}) ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {} self._report_playlist_authcheck(ytcfg, fatal=False) @@ -5180,8 +5229,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): @classmethod def suitable(cls, url): - return False if YoutubeIE.suitable(url) else super( - YoutubeTabIE, cls).suitable(url) + return False if YoutubeIE.suitable(url) else super().suitable(url) _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$') @@ -5228,7 +5276,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): # Handle both video/playlist URLs qs = parse_qs(url) - video_id, playlist_id = [qs.get(key, [None])[0] for key in ('v', 'list')] + video_id, playlist_id = (qs.get(key, [None])[0] for key in ('v', 'list')) if not video_id and mobj['not_channel'].startswith('watch'): if not playlist_id: @@ -5414,7 +5462,7 @@ class YoutubePlaylistIE(InfoExtractor): qs = parse_qs(url) if qs.get('v', [None])[0]: return False - return super(YoutubePlaylistIE, cls).suitable(url) + return super().suitable(url) def _real_extract(self, url): playlist_id = self._match_id(url) @@ -5741,16 +5789,17 @@ class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor): class YoutubeFeedsInfoExtractor(InfoExtractor): """ Base class for feed extractors - Subclasses must define the _FEED_NAME property. + Subclasses must re-define the _FEED_NAME property. """ _LOGIN_REQUIRED = True + _FEED_NAME = 'feeds' def _real_initialize(self): YoutubeBaseInfoExtractor._check_login_required(self) - @property + @classproperty def IE_NAME(self): - return 'youtube:%s' % self._FEED_NAME + return f'youtube:{self._FEED_NAME}' def _real_extract(self, url): return self.url_result( @@ -5811,6 +5860,22 @@ class YoutubeHistoryIE(YoutubeFeedsInfoExtractor): }] +class YoutubeStoriesIE(InfoExtractor): + IE_DESC = 'YouTube channel stories; "ytstories:" prefix' + IE_NAME = 'youtube:stories' + _VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$' + _TESTS = [{ + 'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg', + 'only_matching': True, + }] + + def _real_extract(self, url): + playlist_id = f'RLTD{self._match_id(url)}' + return self.url_result( + f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1', + ie=YoutubeTabIE, video_id=playlist_id) + + class YoutubeTruncatedURLIE(InfoExtractor): IE_NAME = 'youtube:truncated_url' IE_DESC = False # Do not list @@ -5883,5 +5948,5 @@ class YoutubeTruncatedIDIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) raise ExtractorError( - 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url), + f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.', expected=True) diff --git a/yt_dlp/extractor/zapiks.py b/yt_dlp/extractor/zapiks.py index 161b011ab..a1546fd88 100644 --- a/yt_dlp/extractor/zapiks.py +++ b/yt_dlp/extractor/zapiks.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/zattoo.py b/yt_dlp/extractor/zattoo.py index 8614ca23d..16f827a7e 100644 --- a/yt_dlp/extractor/zattoo.py +++ b/yt_dlp/extractor/zattoo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from uuid import uuid4 diff --git a/yt_dlp/extractor/zdf.py b/yt_dlp/extractor/zdf.py index 5f4d26622..a388ff562 100644 --- a/yt_dlp/extractor/zdf.py +++ b/yt_dlp/extractor/zdf.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/zee5.py b/yt_dlp/extractor/zee5.py index 9e411d83f..9ff36052e 100644 --- a/yt_dlp/extractor/zee5.py +++ b/yt_dlp/extractor/zee5.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor @@ -96,14 +93,14 @@ class Zee5IE(InfoExtractor): def _perform_login(self, username, password): if len(username) == 10 and username.isdigit() and self._USER_TOKEN is None: self.report_login() - otp_request_json = self._download_json('https://b2bapi.zee5.com/device/sendotp_v1.php?phoneno=91{}'.format(username), + otp_request_json = self._download_json(f'https://b2bapi.zee5.com/device/sendotp_v1.php?phoneno=91{username}', None, note='Sending OTP') if otp_request_json['code'] == 0: self.to_screen(otp_request_json['message']) else: raise ExtractorError(otp_request_json['message'], expected=True) otp_code = self._get_tfa_info('OTP') - otp_verify_json = self._download_json('https://b2bapi.zee5.com/device/verifyotp_v1.php?phoneno=91{}&otp={}&guest_token={}&platform=web'.format(username, otp_code, self._DEVICE_ID), + otp_verify_json = self._download_json(f'https://b2bapi.zee5.com/device/verifyotp_v1.php?phoneno=91{username}&otp={otp_code}&guest_token={self._DEVICE_ID}&platform=web', None, note='Verifying OTP', fatal=False) if not otp_verify_json: raise ExtractorError('Unable to verify OTP.', expected=True) @@ -227,13 +224,13 @@ class Zee5SeriesIE(InfoExtractor): 'X-Access-Token': access_token_request['token'], 'Referer': 'https://www.zee5.com/', } - show_url = 'https://gwapi.zee5.com/content/tvshow/{}?translation=en&country=IN'.format(show_id) + show_url = f'https://gwapi.zee5.com/content/tvshow/{show_id}?translation=en&country=IN' page_num = 0 show_json = self._download_json(show_url, video_id=show_id, headers=headers) for season in show_json.get('seasons') or []: season_id = try_get(season, lambda x: x['id'], compat_str) - next_url = 'https://gwapi.zee5.com/content/tvshow/?season_id={}&type=episode&translation=en&country=IN&on_air=false&asset_subtype=tvshow&page=1&limit=100'.format(season_id) + next_url = f'https://gwapi.zee5.com/content/tvshow/?season_id={season_id}&type=episode&translation=en&country=IN&on_air=false&asset_subtype=tvshow&page=1&limit=100' while next_url: page_num += 1 episodes_json = self._download_json( diff --git a/yt_dlp/extractor/zhihu.py b/yt_dlp/extractor/zhihu.py index 278a9438e..70eb3ccd1 100644 --- a/yt_dlp/extractor/zhihu.py +++ b/yt_dlp/extractor/zhihu.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import format_field, float_or_none, int_or_none diff --git a/yt_dlp/extractor/zingmp3.py b/yt_dlp/extractor/zingmp3.py index 419bf30d8..8b2d842ff 100644 --- a/yt_dlp/extractor/zingmp3.py +++ b/yt_dlp/extractor/zingmp3.py @@ -1,131 +1,77 @@ -# coding: utf-8 -from __future__ import unicode_literals - +import functools import hashlib import hmac +import json import urllib.parse from .common import InfoExtractor from ..utils import ( + OnDemandPagedList, int_or_none, traverse_obj, + urljoin, ) class ZingMp3BaseIE(InfoExtractor): - _VALID_URL_TMPL = r'https?://(?:mp3\.zing|zingmp3)\.vn/(?P<type>(?:%s))/[^/]+/(?P<id>\w+)(?:\.html|\?)' + _VALID_URL_TMPL = r'https?://(?:mp3\.zing|zingmp3)\.vn/(?P<type>(?:%s))/[^/?#]+/(?P<id>\w+)(?:\.html|\?)' _GEO_COUNTRIES = ['VN'] _DOMAIN = 'https://zingmp3.vn' - _SLUG_API = { + _PER_PAGE = 50 + _API_SLUGS = { + # Audio/video 'bai-hat': '/api/v2/page/get/song', 'embed': '/api/v2/page/get/song', 'video-clip': '/api/v2/page/get/video', + 'lyric': '/api/v2/lyric/get/lyric', + 'song-streaming': '/api/v2/song/get/streaming', + # Playlist 'playlist': '/api/v2/page/get/playlist', 'album': '/api/v2/page/get/playlist', - 'lyric': '/api/v2/lyric/get/lyric', - 'song_streaming': '/api/v2/song/get/streaming', + # Chart + 'zing-chart': '/api/v2/page/get/chart-home', + 'zing-chart-tuan': '/api/v2/page/get/week-chart', + 'moi-phat-hanh': '/api/v2/page/get/newrelease-chart', + 'the-loai-video': '/api/v2/video/get/list', + # User + 'info-artist': '/api/v2/page/get/artist', + 'user-list-song': '/api/v2/song/get/list', + 'user-list-video': '/api/v2/video/get/list', } - _API_KEY = '88265e23d4284f25963e6eedac8fbfa3' - _SECRET_KEY = b'2aa2d1c561e809b267f3638c4a307aab' - - def _extract_item(self, item, song_id, type_url, fatal): - item_id = item.get('encodeId') or song_id - title = item.get('title') or item.get('alias') - - if type_url == 'video-clip': - source = item.get('streaming') - else: - api = self.get_api_with_signature(name_api=self._SLUG_API.get('song_streaming'), param={'id': item_id}) - source = self._download_json(api, video_id=item_id).get('data') - - formats = [] - for k, v in (source or {}).items(): - if not v: - continue - if k in ('mp4', 'hls'): - for res, video_url in v.items(): - if not video_url: - continue - if k == 'hls': - formats.extend(self._extract_m3u8_formats( - video_url, item_id, 'mp4', - 'm3u8_native', m3u8_id=k, fatal=False)) - elif k == 'mp4': - formats.append({ - 'format_id': 'mp4-' + res, - 'url': video_url, - 'height': int_or_none(self._search_regex( - r'^(\d+)p', res, 'resolution', default=None)), - }) - continue - elif v == 'VIP': - continue - formats.append({ - 'ext': 'mp3', - 'format_id': k, - 'tbr': int_or_none(k), - 'url': self._proto_relative_url(v), - 'vcodec': 'none', - }) - if not formats: - if not fatal: - return - msg = item.get('msg') - if msg == 'Sorry, this content is not available in your country.': - self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True) - self.raise_no_formats(msg, expected=True) - self._sort_formats(formats) - - lyric = item.get('lyric') - if not lyric: - api = self.get_api_with_signature(name_api=self._SLUG_API.get("lyric"), param={'id': item_id}) - info_lyric = self._download_json(api, video_id=item_id) - lyric = traverse_obj(info_lyric, ('data', 'file')) - subtitles = { - 'origin': [{ - 'url': lyric, - }], - } if lyric else None - - album = item.get('album') or {} - - return { - 'id': item_id, - 'title': title, - 'formats': formats, - 'thumbnail': traverse_obj(item, 'thumbnail', 'thumbnailM'), - 'subtitles': subtitles, - 'duration': int_or_none(item.get('duration')), - 'track': title, - 'artist': traverse_obj(item, 'artistsNames', 'artists_names'), - 'album': traverse_obj(album, 'name', 'title'), - 'album_artist': traverse_obj(album, 'artistsNames', 'artists_names'), + def _api_url(self, url_type, params): + api_slug = self._API_SLUGS[url_type] + params.update({'ctime': '1'}) + sha256 = hashlib.sha256( + ''.join(f'{k}={v}' for k, v in sorted(params.items())).encode()).hexdigest() + data = { + **params, + 'apiKey': '88265e23d4284f25963e6eedac8fbfa3', + 'sig': hmac.new( + b'2aa2d1c561e809b267f3638c4a307aab', f'{api_slug}{sha256}'.encode(), hashlib.sha512).hexdigest(), } + return f'{self._DOMAIN}{api_slug}?{urllib.parse.urlencode(data)}' - def _real_initialize(self): - if not self.get_param('cookiefile') and not self.get_param('cookiesfrombrowser'): - self._request_webpage(self.get_api_with_signature(name_api=self._SLUG_API['bai-hat'], param={'id': ''}), - None, note='Updating cookies') + def _call_api(self, url_type, params, display_id=None, **kwargs): + resp = self._download_json( + self._api_url(url_type, params), display_id or params.get('id'), + note=f'Downloading {url_type} JSON metadata', **kwargs) + return (resp or {}).get('data') or {} - def _real_extract(self, url): - song_id, type_url = self._match_valid_url(url).group('id', 'type') - api = self.get_api_with_signature(name_api=self._SLUG_API[type_url], param={'id': song_id}) - return self._process_data(self._download_json(api, song_id)['data'], song_id, type_url) + def _real_initialize(self): + if not self._cookies_passed: + self._request_webpage( + self._api_url('bai-hat', {'id': ''}), None, note='Updating cookies') - def get_api_with_signature(self, name_api, param): - param.update({'ctime': '1'}) - sha256 = hashlib.sha256(''.join(f'{i}={param[i]}' for i in sorted(param)).encode('utf-8')).hexdigest() - data = { - 'apiKey': self._API_KEY, - 'sig': hmac.new(self._SECRET_KEY, f'{name_api}{sha256}'.encode('utf-8'), hashlib.sha512).hexdigest(), - **param, - } - return f'{self._DOMAIN}{name_api}?{urllib.parse.urlencode(data)}' + def _parse_items(self, items): + for url in traverse_obj(items, (..., 'link')) or []: + yield self.url_result(urljoin(self._DOMAIN, url)) class ZingMp3IE(ZingMp3BaseIE): _VALID_URL = ZingMp3BaseIE._VALID_URL_TMPL % 'bai-hat|video-clip|embed' + IE_NAME = 'zingmp3' + IE_DESC = 'zingmp3.vn' _TESTS = [{ 'url': 'https://mp3.zing.vn/bai-hat/Xa-Mai-Xa-Bao-Thy/ZWZB9WAB.html', 'md5': 'ead7ae13693b3205cbc89536a077daed', @@ -147,7 +93,7 @@ class ZingMp3IE(ZingMp3BaseIE): }, }, { 'url': 'https://zingmp3.vn/video-clip/Suong-Hoa-Dua-Loi-K-ICM-RYO/ZO8ZF7C7.html', - 'md5': 'c7f23d971ac1a4f675456ed13c9b9612', + 'md5': '3c2081e79471a2f4a3edd90b70b185ea', 'info_dict': { 'id': 'ZO8ZF7C7', 'title': 'Sương Hoa Đưa Lối', @@ -180,11 +126,64 @@ class ZingMp3IE(ZingMp3BaseIE): 'url': 'https://zingmp3.vn/bai-hat/Xa-Mai-Xa-Bao-Thy/ZWZB9WAB.html', 'only_matching': True, }] - IE_NAME = 'zingmp3' - IE_DESC = 'zingmp3.vn' - def _process_data(self, data, song_id, type_url): - return self._extract_item(data, song_id, type_url, True) + def _real_extract(self, url): + song_id, url_type = self._match_valid_url(url).group('id', 'type') + item = self._call_api(url_type, {'id': song_id}) + + item_id = item.get('encodeId') or song_id + if url_type == 'video-clip': + source = item.get('streaming') + source['mp4'] = self._download_json( + 'http://api.mp3.zing.vn/api/mobile/video/getvideoinfo', item_id, + query={'requestdata': json.dumps({'id': item_id})}, + note='Downloading mp4 JSON metadata').get('source') + else: + source = self._call_api('song-streaming', {'id': item_id}) + + formats = [] + for k, v in (source or {}).items(): + if not v or v == 'VIP': + continue + if k not in ('mp4', 'hls'): + formats.append({ + 'ext': 'mp3', + 'format_id': k, + 'tbr': int_or_none(k), + 'url': self._proto_relative_url(v), + 'vcodec': 'none', + }) + continue + for res, video_url in v.items(): + if not video_url: + continue + if k == 'hls': + formats.extend(self._extract_m3u8_formats(video_url, item_id, 'mp4', m3u8_id=k, fatal=False)) + continue + formats.append({ + 'format_id': f'mp4-{res}', + 'url': video_url, + 'height': int_or_none(res), + }) + + if not formats and item.get('msg') == 'Sorry, this content is not available in your country.': + self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True) + self._sort_formats(formats) + + lyric = item.get('lyric') or self._call_api('lyric', {'id': item_id}, fatal=False).get('file') + + return { + 'id': item_id, + 'title': traverse_obj(item, 'title', 'alias'), + 'thumbnail': traverse_obj(item, 'thumbnail', 'thumbnailM'), + 'duration': int_or_none(item.get('duration')), + 'track': traverse_obj(item, 'title', 'alias'), + 'artist': traverse_obj(item, 'artistsNames', 'artists_names'), + 'album': traverse_obj(item, ('album', ('name', 'title')), get_all=False), + 'album_artist': traverse_obj(item, ('album', ('artistsNames', 'artists_names')), get_all=False), + 'formats': formats, + 'subtitles': {'origin': [{'url': lyric}]} if lyric else None, + } class ZingMp3AlbumIE(ZingMp3BaseIE): @@ -192,19 +191,17 @@ class ZingMp3AlbumIE(ZingMp3BaseIE): _TESTS = [{ 'url': 'http://mp3.zing.vn/album/Lau-Dai-Tinh-Ai-Bang-Kieu-Minh-Tuyet/ZWZBWDAF.html', 'info_dict': { - '_type': 'playlist', 'id': 'ZWZBWDAF', 'title': 'Lâu Đài Tình Ái', }, - 'playlist_count': 9, + 'playlist_mincount': 9, }, { 'url': 'https://zingmp3.vn/album/Nhung-Bai-Hat-Hay-Nhat-Cua-Mr-Siro-Mr-Siro/ZWZAEZZD.html', 'info_dict': { - '_type': 'playlist', 'id': 'ZWZAEZZD', 'title': 'Những Bài Hát Hay Nhất Của Mr. Siro', }, - 'playlist_count': 49, + 'playlist_mincount': 49, }, { 'url': 'http://mp3.zing.vn/playlist/Duong-Hong-Loan-apollobee/IWCAACCB.html', 'only_matching': True, @@ -214,12 +211,176 @@ class ZingMp3AlbumIE(ZingMp3BaseIE): }] IE_NAME = 'zingmp3:album' - def _process_data(self, data, song_id, type_url): - def entries(): - for item in traverse_obj(data, ('song', 'items')) or []: - entry = self._extract_item(item, song_id, type_url, False) - if entry: - yield entry + def _real_extract(self, url): + song_id, url_type = self._match_valid_url(url).group('id', 'type') + data = self._call_api(url_type, {'id': song_id}) + return self.playlist_result( + self._parse_items(traverse_obj(data, ('song', 'items'))), + traverse_obj(data, 'id', 'encodeId'), traverse_obj(data, 'name', 'title')) + + +class ZingMp3ChartHomeIE(ZingMp3BaseIE): + _VALID_URL = r'https?://(?:mp3\.zing|zingmp3)\.vn/(?P<id>(?:zing-chart|moi-phat-hanh))/?(?:[#?]|$)' + _TESTS = [{ + 'url': 'https://zingmp3.vn/zing-chart', + 'info_dict': { + 'id': 'zing-chart', + }, + 'playlist_mincount': 100, + }, { + 'url': 'https://zingmp3.vn/moi-phat-hanh', + 'info_dict': { + 'id': 'moi-phat-hanh', + }, + 'playlist_mincount': 100, + }] + IE_NAME = 'zingmp3:chart-home' + + def _real_extract(self, url): + url_type = self._match_id(url) + data = self._call_api(url_type, {'id': url_type}) + items = traverse_obj(data, ('RTChart', 'items') if url_type == 'zing-chart' else 'items') + return self.playlist_result(self._parse_items(items), url_type) + + +class ZingMp3WeekChartIE(ZingMp3BaseIE): + _VALID_URL = ZingMp3BaseIE._VALID_URL_TMPL % 'zing-chart-tuan' + IE_NAME = 'zingmp3:week-chart' + _TESTS = [{ + 'url': 'https://zingmp3.vn/zing-chart-tuan/Bai-hat-Viet-Nam/IWZ9Z08I.html', + 'info_dict': { + 'id': 'IWZ9Z08I', + 'title': 'zing-chart-vn', + }, + 'playlist_mincount': 10, + }, { + 'url': 'https://zingmp3.vn/zing-chart-tuan/Bai-hat-US-UK/IWZ9Z0BW.html', + 'info_dict': { + 'id': 'IWZ9Z0BW', + 'title': 'zing-chart-us', + }, + 'playlist_mincount': 10, + }, { + 'url': 'https://zingmp3.vn/zing-chart-tuan/Bai-hat-KPop/IWZ9Z0BO.html', + 'info_dict': { + 'id': 'IWZ9Z0BO', + 'title': 'zing-chart-korea', + }, + 'playlist_mincount': 10, + }] + + def _real_extract(self, url): + song_id, url_type = self._match_valid_url(url).group('id', 'type') + data = self._call_api(url_type, {'id': song_id}) + return self.playlist_result( + self._parse_items(data['items']), song_id, f'zing-chart-{data.get("country", "")}') + - return self.playlist_result(entries(), traverse_obj(data, 'id', 'encodeId'), - traverse_obj(data, 'name', 'title')) +class ZingMp3ChartMusicVideoIE(ZingMp3BaseIE): + _VALID_URL = r'https?://(?:mp3\.zing|zingmp3)\.vn/(?P<type>the-loai-video)/(?P<regions>[^/]+)/(?P<id>[^\.]+)' + IE_NAME = 'zingmp3:chart-music-video' + _TESTS = [{ + 'url': 'https://zingmp3.vn/the-loai-video/Viet-Nam/IWZ9Z08I.html', + 'info_dict': { + 'id': 'IWZ9Z08I', + 'title': 'the-loai-video_Viet-Nam', + }, + 'playlist_mincount': 400, + }, { + 'url': 'https://zingmp3.vn/the-loai-video/Au-My/IWZ9Z08O.html', + 'info_dict': { + 'id': 'IWZ9Z08O', + 'title': 'the-loai-video_Au-My', + }, + 'playlist_mincount': 40, + }, { + 'url': 'https://zingmp3.vn/the-loai-video/Han-Quoc/IWZ9Z08W.html', + 'info_dict': { + 'id': 'IWZ9Z08W', + 'title': 'the-loai-video_Han-Quoc', + }, + 'playlist_mincount': 30, + }, { + 'url': 'https://zingmp3.vn/the-loai-video/Khong-Loi/IWZ9Z086.html', + 'info_dict': { + 'id': 'IWZ9Z086', + 'title': 'the-loai-video_Khong-Loi', + }, + 'playlist_mincount': 10, + }] + + def _fetch_page(self, song_id, url_type, page): + return self._parse_items(self._call_api(url_type, { + 'id': song_id, + 'type': 'genre', + 'page': page + 1, + 'count': self._PER_PAGE + }).get('items')) + + def _real_extract(self, url): + song_id, regions, url_type = self._match_valid_url(url).group('id', 'regions', 'type') + return self.playlist_result( + OnDemandPagedList(functools.partial(self._fetch_page, song_id, url_type), self._PER_PAGE), + song_id, f'{url_type}_{regions}') + + +class ZingMp3UserIE(ZingMp3BaseIE): + _VALID_URL = r'https?://(?:mp3\.zing|zingmp3)\.vn/(?P<user>[^/]+)/(?P<type>bai-hat|single|album|video)/?(?:[?#]|$)' + IE_NAME = 'zingmp3:user' + _TESTS = [{ + 'url': 'https://zingmp3.vn/Mr-Siro/bai-hat', + 'info_dict': { + 'id': 'IWZ98609', + 'title': 'Mr. Siro - bai-hat', + 'description': 'md5:85ab29bd7b21725c12bf76fd1d6922e5', + }, + 'playlist_mincount': 91, + }, { + 'url': 'https://zingmp3.vn/Mr-Siro/album', + 'info_dict': { + 'id': 'IWZ98609', + 'title': 'Mr. Siro - album', + 'description': 'md5:85ab29bd7b21725c12bf76fd1d6922e5', + }, + 'playlist_mincount': 3, + }, { + 'url': 'https://zingmp3.vn/Mr-Siro/single', + 'info_dict': { + 'id': 'IWZ98609', + 'title': 'Mr. Siro - single', + 'description': 'md5:85ab29bd7b21725c12bf76fd1d6922e5', + }, + 'playlist_mincount': 20, + }, { + 'url': 'https://zingmp3.vn/Mr-Siro/video', + 'info_dict': { + 'id': 'IWZ98609', + 'title': 'Mr. Siro - video', + 'description': 'md5:85ab29bd7b21725c12bf76fd1d6922e5', + }, + 'playlist_mincount': 15, + }] + + def _fetch_page(self, user_id, url_type, page): + url_type = 'user-list-song' if url_type == 'bai-hat' else 'user-list-video' + return self._parse_items(self._call_api(url_type, { + 'id': user_id, + 'type': 'artist', + 'page': page + 1, + 'count': self._PER_PAGE + }, query={'sort': 'new', 'sectionId': 'aSong'}).get('items')) + + def _real_extract(self, url): + user_alias, url_type = self._match_valid_url(url).group('user', 'type') + if not url_type: + url_type = 'bai-hat' + + user_info = self._call_api('info-artist', {}, user_alias, query={'alias': user_alias}) + if url_type in ('bai-hat', 'video'): + entries = OnDemandPagedList( + functools.partial(self._fetch_page, user_info['id'], url_type), self._PER_PAGE) + else: + entries = self._parse_items(traverse_obj(user_info, ( + 'sections', lambda _, v: v['link'] == f'/{user_alias}/{url_type}', 'items', ...))) + return self.playlist_result( + entries, user_info['id'], f'{user_info.get("name")} - {url_type}', user_info.get('biography')) diff --git a/yt_dlp/extractor/zoom.py b/yt_dlp/extractor/zoom.py index c00548839..a455f8c04 100644 --- a/yt_dlp/extractor/zoom.py +++ b/yt_dlp/extractor/zoom.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/zype.py b/yt_dlp/extractor/zype.py index 7663cb36b..6f2fbb9e9 100644 --- a/yt_dlp/extractor/zype.py +++ b/yt_dlp/extractor/zype.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor |