aboutsummaryrefslogtreecommitdiffstats
path: root/yt_dlp/extractor
diff options
context:
space:
mode:
Diffstat (limited to 'yt_dlp/extractor')
-rw-r--r--yt_dlp/extractor/abematv.py4
-rw-r--r--yt_dlp/extractor/audius.py4
-rw-r--r--yt_dlp/extractor/common.py39
-rw-r--r--yt_dlp/extractor/commonprotocols.py5
-rw-r--r--yt_dlp/extractor/curiositystream.py10
-rw-r--r--yt_dlp/extractor/espn.py4
-rw-r--r--yt_dlp/extractor/generic.py40
-rw-r--r--yt_dlp/extractor/giga.py9
-rw-r--r--yt_dlp/extractor/hitbox.py6
-rw-r--r--yt_dlp/extractor/lnkgo.py2
-rw-r--r--yt_dlp/extractor/nrk.py7
-rw-r--r--yt_dlp/extractor/puls4.py7
-rw-r--r--yt_dlp/extractor/stv.py2
-rw-r--r--yt_dlp/extractor/youtube.py99
14 files changed, 104 insertions, 134 deletions
diff --git a/yt_dlp/extractor/abematv.py b/yt_dlp/extractor/abematv.py
index 0706f8559..a75efdd0f 100644
--- a/yt_dlp/extractor/abematv.py
+++ b/yt_dlp/extractor/abematv.py
@@ -7,13 +7,13 @@ import json
import re
import struct
import time
+import urllib.parse
import urllib.request
import urllib.response
import uuid
from .common import InfoExtractor
from ..aes import aes_ecb_decrypt
-from ..compat import compat_urllib_parse_urlparse
from ..utils import (
ExtractorError,
bytes_to_intlist,
@@ -137,7 +137,7 @@ class AbemaLicenseHandler(urllib.request.BaseHandler):
def abematv_license_open(self, url):
url = request_to_url(url)
- ticket = compat_urllib_parse_urlparse(url).netloc
+ ticket = urllib.parse.urlparse(url).netloc
response_data = self._get_videokey_from_ticket(ticket)
return urllib.response.addinfourl(io.BytesIO(response_data), headers={
'Content-Length': len(response_data),
diff --git a/yt_dlp/extractor/audius.py b/yt_dlp/extractor/audius.py
index 189d1224f..0105d9db8 100644
--- a/yt_dlp/extractor/audius.py
+++ b/yt_dlp/extractor/audius.py
@@ -1,8 +1,8 @@
import random
from .common import InfoExtractor
-from ..utils import ExtractorError, try_get, compat_str, str_or_none
-from ..compat import compat_urllib_parse_unquote
+from ..compat import compat_str, compat_urllib_parse_unquote
+from ..utils import ExtractorError, str_or_none, try_get
class AudiusBaseIE(InfoExtractor):
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 4c37044f6..aceb1052a 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -13,19 +13,12 @@ import os
import random
import sys
import time
+import urllib.parse
import urllib.request
import xml.etree.ElementTree
from ..compat import functools, re # isort: split
-from ..compat import (
- compat_etree_fromstring,
- compat_expanduser,
- compat_os_name,
- compat_str,
- compat_urllib_parse_unquote,
- compat_urllib_parse_urlencode,
- compat_urlparse,
-)
+from ..compat import compat_etree_fromstring, compat_expanduser, compat_os_name
from ..downloader import FileDownloader
from ..downloader.f4m import get_base_url, remove_encrypted_media
from ..utils import (
@@ -834,7 +827,7 @@ class InfoExtractor:
"""
# Strip hashes from the URL (#1038)
- if isinstance(url_or_request, (compat_str, str)):
+ if isinstance(url_or_request, str):
url_or_request = url_or_request.partition('#')[0]
urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query, expected_status=expected_status)
@@ -1427,7 +1420,7 @@ class InfoExtractor:
return {}
def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None):
- if isinstance(json_ld, compat_str):
+ if isinstance(json_ld, str):
json_ld = self._parse_json(json_ld, video_id, fatal=fatal)
if not json_ld:
return {}
@@ -1517,7 +1510,7 @@ class InfoExtractor:
# both types can have 'name' property(inherited from 'Thing' type). [1]
# however some websites are using 'Text' type instead.
# 1. https://schema.org/VideoObject
- 'uploader': author.get('name') if isinstance(author, dict) else author if isinstance(author, compat_str) else None,
+ 'uploader': author.get('name') if isinstance(author, dict) else author if isinstance(author, str) else None,
'filesize': int_or_none(float_or_none(e.get('contentSize'))),
'tbr': int_or_none(e.get('bitrate')),
'width': int_or_none(e.get('width')),
@@ -2166,7 +2159,7 @@ class InfoExtractor:
]), m3u8_doc)
def format_url(url):
- return url if re.match(r'^https?://', url) else compat_urlparse.urljoin(m3u8_url, url)
+ return url if re.match(r'^https?://', url) else urllib.parse.urljoin(m3u8_url, url)
if self.get_param('hls_split_discontinuity', False):
def _extract_m3u8_playlist_indices(manifest_url=None, m3u8_doc=None):
@@ -2539,7 +2532,7 @@ class InfoExtractor:
})
continue
- src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src)
+ src_url = src if src.startswith('http') else urllib.parse.urljoin(base, src)
src_url = src_url.strip()
if proto == 'm3u8' or src_ext == 'm3u8':
@@ -2562,7 +2555,7 @@ class InfoExtractor:
'plugin': 'flowplayer-3.2.0.1',
}
f4m_url += '&' if '?' in f4m_url else '?'
- f4m_url += compat_urllib_parse_urlencode(f4m_params)
+ f4m_url += urllib.parse.urlencode(f4m_params)
formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False))
elif src_ext == 'mpd':
formats.extend(self._extract_mpd_formats(
@@ -2832,7 +2825,7 @@ class InfoExtractor:
if re.match(r'^https?://', base_url):
break
if mpd_base_url and base_url.startswith('/'):
- base_url = compat_urlparse.urljoin(mpd_base_url, base_url)
+ base_url = urllib.parse.urljoin(mpd_base_url, base_url)
elif mpd_base_url and not re.match(r'^https?://', base_url):
if not mpd_base_url.endswith('/'):
mpd_base_url += '/'
@@ -3102,7 +3095,7 @@ class InfoExtractor:
sampling_rate = int_or_none(track.get('SamplingRate'))
track_url_pattern = re.sub(r'{[Bb]itrate}', track.attrib['Bitrate'], url_pattern)
- track_url_pattern = compat_urlparse.urljoin(ism_url, track_url_pattern)
+ track_url_pattern = urllib.parse.urljoin(ism_url, track_url_pattern)
fragments = []
fragment_ctx = {
@@ -3121,7 +3114,7 @@ class InfoExtractor:
fragment_ctx['duration'] = (next_fragment_time - fragment_ctx['time']) / fragment_repeat
for _ in range(fragment_repeat):
fragments.append({
- 'url': re.sub(r'{start[ _]time}', compat_str(fragment_ctx['time']), track_url_pattern),
+ 'url': re.sub(r'{start[ _]time}', str(fragment_ctx['time']), track_url_pattern),
'duration': fragment_ctx['duration'] / stream_timescale,
})
fragment_ctx['time'] += fragment_ctx['duration']
@@ -3365,7 +3358,7 @@ class InfoExtractor:
return formats, subtitles
def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
- query = compat_urlparse.urlparse(url).query
+ query = urllib.parse.urlparse(url).query
url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url)
mobj = re.search(
r'(?:(?:http|rtmp|rtsp)(?P<s>s)?:)?(?P<url>//[^?]+)', url)
@@ -3471,7 +3464,7 @@ class InfoExtractor:
if not isinstance(track, dict):
continue
track_kind = track.get('kind')
- if not track_kind or not isinstance(track_kind, compat_str):
+ if not track_kind or not isinstance(track_kind, str):
continue
if track_kind.lower() not in ('captions', 'subtitles'):
continue
@@ -3544,7 +3537,7 @@ class InfoExtractor:
# Often no height is provided but there is a label in
# format like "1080p", "720p SD", or 1080.
height = int_or_none(self._search_regex(
- r'^(\d{3,4})[pP]?(?:\b|$)', compat_str(source.get('label') or ''),
+ r'^(\d{3,4})[pP]?(?:\b|$)', str(source.get('label') or ''),
'height', default=None))
a_format = {
'url': source_url,
@@ -3770,10 +3763,10 @@ class InfoExtractor:
return headers
def _generic_id(self, url):
- return compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
+ return urllib.parse.unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
def _generic_title(self, url):
- return compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0])
+ return urllib.parse.unquote(os.path.splitext(url_basename(url))[0])
@staticmethod
def _availability(is_private=None, needs_premium=None, needs_subscription=None, needs_auth=None, is_unlisted=None):
diff --git a/yt_dlp/extractor/commonprotocols.py b/yt_dlp/extractor/commonprotocols.py
index e8f19b9e0..2f93e8ea5 100644
--- a/yt_dlp/extractor/commonprotocols.py
+++ b/yt_dlp/extractor/commonprotocols.py
@@ -1,5 +1,6 @@
+import urllib.parse
+
from .common import InfoExtractor
-from ..compat import compat_urlparse
class RtmpIE(InfoExtractor):
@@ -23,7 +24,7 @@ class RtmpIE(InfoExtractor):
'formats': [{
'url': url,
'ext': 'flv',
- 'format_id': compat_urlparse.urlparse(url).scheme,
+ 'format_id': urllib.parse.urlparse(url).scheme,
}],
}
diff --git a/yt_dlp/extractor/curiositystream.py b/yt_dlp/extractor/curiositystream.py
index be4e53e44..a105b6ce2 100644
--- a/yt_dlp/extractor/curiositystream.py
+++ b/yt_dlp/extractor/curiositystream.py
@@ -1,12 +1,8 @@
import re
from .common import InfoExtractor
-from ..utils import (
- int_or_none,
- urlencode_postdata,
- compat_str,
- ExtractorError,
-)
+from ..compat import compat_str
+from ..utils import ExtractorError, int_or_none, urlencode_postdata
class CuriosityStreamBaseIE(InfoExtractor):
@@ -50,7 +46,7 @@ class CuriosityStreamIE(CuriosityStreamBaseIE):
IE_NAME = 'curiositystream'
_VALID_URL = r'https?://(?:app\.)?curiositystream\.com/video/(?P<id>\d+)'
_TESTS = [{
- 'url': 'https://app.curiositystream.com/video/2',
+ 'url': 'http://app.curiositystream.com/video/2',
'info_dict': {
'id': '2',
'ext': 'mp4',
diff --git a/yt_dlp/extractor/espn.py b/yt_dlp/extractor/espn.py
index 44e0c0989..451148636 100644
--- a/yt_dlp/extractor/espn.py
+++ b/yt_dlp/extractor/espn.py
@@ -3,8 +3,8 @@ import json
import re
import urllib.parse
-from .common import InfoExtractor
from .adobepass import AdobePassIE
+from .common import InfoExtractor
from .once import OnceIE
from ..utils import (
determine_ext,
@@ -197,7 +197,7 @@ class ESPNArticleIE(InfoExtractor):
@classmethod
def suitable(cls, url):
- return False if (ESPNIE.suitable(url) or WatchESPNIE.suitable(url)) else super(ESPNArticleIE, cls).suitable(url)
+ return False if (ESPNIE.suitable(url) or WatchESPNIE.suitable(url)) else super().suitable(url)
def _real_extract(self, url):
video_id = self._match_id(url)
diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py
index b8c5be7a0..c2f754453 100644
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@@ -1,5 +1,6 @@
import os
import re
+import urllib.parse
import xml.etree.ElementTree
from .ant1newsgr import Ant1NewsGrEmbedIE
@@ -106,12 +107,7 @@ from .yapfiles import YapFilesIE
from .youporn import YouPornIE
from .youtube import YoutubeIE
from .zype import ZypeIE
-from ..compat import (
- compat_etree_fromstring,
- compat_str,
- compat_urllib_parse_unquote,
- compat_urlparse,
-)
+from ..compat import compat_etree_fromstring
from ..utils import (
KNOWN_EXTENSIONS,
ExtractorError,
@@ -2703,7 +2699,7 @@ class GenericIE(InfoExtractor):
title = self._html_search_meta('DC.title', webpage, fatal=True)
- camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
+ camtasia_url = urllib.parse.urljoin(url, camtasia_cfg)
camtasia_cfg = self._download_xml(
camtasia_url, video_id,
note='Downloading camtasia configuration',
@@ -2719,7 +2715,7 @@ class GenericIE(InfoExtractor):
entries.append({
'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
'title': f'{title} - {n.tag}',
- 'url': compat_urlparse.urljoin(url, url_n.text),
+ 'url': urllib.parse.urljoin(url, url_n.text),
'duration': float_or_none(n.find('./duration').text),
})
@@ -2771,7 +2767,7 @@ class GenericIE(InfoExtractor):
if url.startswith('//'):
return self.url_result(self.http_scheme() + url)
- parsed_url = compat_urlparse.urlparse(url)
+ parsed_url = urllib.parse.urlparse(url)
if not parsed_url.scheme:
default_search = self.get_param('default_search')
if default_search is None:
@@ -2847,7 +2843,7 @@ class GenericIE(InfoExtractor):
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
if m:
self.report_detected('direct video link')
- format_id = compat_str(m.group('format_id'))
+ format_id = str(m.group('format_id'))
subtitles = {}
if format_id.endswith('mpegurl'):
formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4')
@@ -2966,7 +2962,7 @@ class GenericIE(InfoExtractor):
# Unescaping the whole page allows to handle those cases in a generic way
# FIXME: unescaping the whole page may break URLs, commenting out for now.
# There probably should be a second run of generic extractor on unescaped webpage.
- # webpage = compat_urllib_parse_unquote(webpage)
+ # webpage = urllib.parse.unquote(webpage)
# Unescape squarespace embeds to be detected by generic extractor,
# see https://github.com/ytdl-org/youtube-dl/issues/21294
@@ -3239,7 +3235,7 @@ class GenericIE(InfoExtractor):
return self.url_result(mobj.group('url'))
mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
if mobj is not None:
- return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
+ return self.url_result(urllib.parse.unquote(mobj.group('url')))
# Look for funnyordie embed
matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
@@ -3492,7 +3488,7 @@ class GenericIE(InfoExtractor):
r'<iframe[^>]+src="(?:https?:)?(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
if mobj is not None:
return self.url_result(
- compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
+ urllib.parse.urljoin(url, mobj.group('url')), 'UDNEmbed')
# Look for Senate ISVP iframe
senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
@@ -3725,7 +3721,7 @@ class GenericIE(InfoExtractor):
if mediasite_urls:
entries = [
self.url_result(smuggle_url(
- compat_urlparse.urljoin(url, mediasite_url),
+ urllib.parse.urljoin(url, mediasite_url),
{'UrlReferrer': url}), ie=MediasiteIE.ie_key())
for mediasite_url in mediasite_urls]
return self.playlist_result(entries, video_id, video_title)
@@ -3920,11 +3916,11 @@ class GenericIE(InfoExtractor):
subtitles = {}
for source in sources:
src = source.get('src')
- if not src or not isinstance(src, compat_str):
+ if not src or not isinstance(src, str):
continue
- src = compat_urlparse.urljoin(url, src)
+ src = urllib.parse.urljoin(url, src)
src_type = source.get('type')
- if isinstance(src_type, compat_str):
+ if isinstance(src_type, str):
src_type = src_type.lower()
ext = determine_ext(src).lower()
if src_type == 'video/youtube':
@@ -3958,7 +3954,7 @@ class GenericIE(InfoExtractor):
if not src:
continue
subtitles.setdefault(dict_get(sub, ('language', 'srclang')) or 'und', []).append({
- 'url': compat_urlparse.urljoin(url, src),
+ 'url': urllib.parse.urljoin(url, src),
'name': sub.get('label'),
'http_headers': {
'Referer': full_response.geturl(),
@@ -3985,7 +3981,7 @@ class GenericIE(InfoExtractor):
return True
if RtmpIE.suitable(vurl):
return True
- vpath = compat_urlparse.urlparse(vurl).path
+ vpath = urllib.parse.urlparse(vurl).path
vext = determine_ext(vpath, None)
return vext not in (None, 'swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js', 'xml')
@@ -4113,7 +4109,7 @@ class GenericIE(InfoExtractor):
if refresh_header:
found = re.search(REDIRECT_REGEX, refresh_header)
if found:
- new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
+ new_url = urllib.parse.urljoin(url, unescapeHTML(found.group(1)))
if new_url != url:
self.report_following_redirect(new_url)
return {
@@ -4139,8 +4135,8 @@ class GenericIE(InfoExtractor):
for video_url in orderedSet(found):
video_url = unescapeHTML(video_url)
video_url = video_url.replace('\\/', '/')
- video_url = compat_urlparse.urljoin(url, video_url)
- video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
+ video_url = urllib.parse.urljoin(url, video_url)
+ video_id = urllib.parse.unquote(os.path.basename(video_url))
# Sometimes, jwplayer extraction will result in a YouTube URL
if YoutubeIE.suitable(video_url):
diff --git a/yt_dlp/extractor/giga.py b/yt_dlp/extractor/giga.py
index 9e835a6da..e728598f7 100644
--- a/yt_dlp/extractor/giga.py
+++ b/yt_dlp/extractor/giga.py
@@ -1,13 +1,8 @@
import itertools
from .common import InfoExtractor
-from ..utils import (
- qualities,
- compat_str,
- parse_duration,
- parse_iso8601,
- str_to_int,
-)
+from ..compat import compat_str
+from ..utils import parse_duration, parse_iso8601, qualities, str_to_int
class GigaIE(InfoExtractor):
diff --git a/yt_dlp/extractor/hitbox.py b/yt_dlp/extractor/hitbox.py
index a7e4424b6..6ecdd390c 100644
--- a/yt_dlp/extractor/hitbox.py
+++ b/yt_dlp/extractor/hitbox.py
@@ -1,13 +1,13 @@
import re
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
clean_html,
- parse_iso8601,
+ determine_ext,
float_or_none,
int_or_none,
- compat_str,
- determine_ext,
+ parse_iso8601,
)
diff --git a/yt_dlp/extractor/lnkgo.py b/yt_dlp/extractor/lnkgo.py
index 3bb52777f..9ea08ec5a 100644
--- a/yt_dlp/extractor/lnkgo.py
+++ b/yt_dlp/extractor/lnkgo.py
@@ -1,7 +1,7 @@
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
clean_html,
- compat_str,
format_field,
int_or_none,
parse_iso8601,
diff --git a/yt_dlp/extractor/nrk.py b/yt_dlp/extractor/nrk.py
index 553c55132..fcbafe418 100644
--- a/yt_dlp/extractor/nrk.py
+++ b/yt_dlp/extractor/nrk.py
@@ -3,18 +3,17 @@ import random
import re
from .common import InfoExtractor
-from ..compat import compat_str
+from ..compat import compat_HTTPError, compat_str
from ..utils import (
- compat_HTTPError,
- determine_ext,
ExtractorError,
+ determine_ext,
int_or_none,
parse_duration,
parse_iso8601,
str_or_none,
try_get,
- urljoin,
url_or_none,
+ urljoin,
)
diff --git a/yt_dlp/extractor/puls4.py b/yt_dlp/extractor/puls4.py
index 3c13d1f56..38c5d1109 100644
--- a/yt_dlp/extractor/puls4.py
+++ b/yt_dlp/extractor/puls4.py
@@ -1,9 +1,6 @@
from .prosiebensat1 import ProSiebenSat1BaseIE
-from ..utils import (
- unified_strdate,
- parse_duration,
- compat_str,
-)
+from ..compat import compat_str
+from ..utils import parse_duration, unified_strdate
class Puls4IE(ProSiebenSat1BaseIE):
diff --git a/yt_dlp/extractor/stv.py b/yt_dlp/extractor/stv.py
index 618dc4329..c879fb52e 100644
--- a/yt_dlp/extractor/stv.py
+++ b/yt_dlp/extractor/stv.py
@@ -1,6 +1,6 @@
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
- compat_str,
float_or_none,
int_or_none,
smuggle_url,
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index 37a6d4c75..ebc3381a2 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -13,18 +13,11 @@ import sys
import threading
import time
import traceback
+import urllib.error
import urllib.parse
from .common import InfoExtractor, SearchInfoExtractor
-from ..compat import functools # isort: split
-from ..compat import (
- compat_HTTPError,
- compat_parse_qs,
- compat_str,
- compat_urllib_parse_urlencode,
- compat_urllib_parse_urlparse,
- compat_urlparse,
-)
+from ..compat import functools
from ..jsinterp import JSInterpreter
from ..utils import (
NO_DEFAULT,
@@ -381,11 +374,11 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
pref = {}
if pref_cookie:
try:
- pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))
+ pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
except ValueError:
self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
pref.update({'hl': 'en', 'tz': 'UTC'})
- self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))
+ self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
def _real_initialize(self):
self._initialize_pref()
@@ -413,19 +406,19 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
def _extract_client_name(self, ytcfg, default_client='web'):
return self._ytcfg_get_safe(
ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
- lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
+ lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
def _extract_client_version(self, ytcfg, default_client='web'):
return self._ytcfg_get_safe(
ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
- lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
+ lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
def _select_api_hostname(self, req_api_hostname, default_client=None):
return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
or req_api_hostname or self._get_innertube_host(default_client or 'web'))
def _extract_api_key(self, ytcfg=None, default_client='web'):
- return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
+ return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
def _extract_context(self, ytcfg=None, default_client='web'):
context = get_first(
@@ -497,7 +490,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
# Deprecated?
def _extract_identity_token(self, ytcfg=None, webpage=None):
if ytcfg:
- token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
+ token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
if token:
return token
if webpage:
@@ -513,12 +506,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
"""
for data in args:
# ytcfg includes channel_syncid if on secondary channel
- delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
+ delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)
if delegated_sid:
return delegated_sid
sync_ids = (try_get(
data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
- lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')
+ lambda x: x['DATASYNC_ID']), str) or '').split('||')
if len(sync_ids) >= 2 and sync_ids[1]:
# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
# and just "user_syncid||" for primary channel. We only want the channel_syncid
@@ -552,7 +545,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))
headers = {
- 'X-YouTube-Client-Name': compat_str(
+ 'X-YouTube-Client-Name': str(
self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
'Origin': origin,
@@ -612,7 +605,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
def _extract_continuation_ep_data(cls, continuation_ep: dict):
if isinstance(continuation_ep, dict):
continuation = try_get(
- continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
+ continuation_ep, lambda x: x['continuationCommand']['token'], str)
if not continuation:
return
ctp = continuation_ep.get('clickTrackingParams')
@@ -672,7 +665,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
def _extract_badges(self, renderer: dict):
badges = set()
for badge in try_get(renderer, lambda x: x['badges'], list) or []:
- label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
+ label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], str)
if label:
badges.add(label.lower())
return badges
@@ -687,7 +680,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
obj = [obj]
for item in obj:
- text = try_get(item, lambda x: x['simpleText'], compat_str)
+ text = try_get(item, lambda x: x['simpleText'], str)
if text:
return text
runs = try_get(item, lambda x: x['runs'], list) or []
@@ -789,20 +782,20 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
except ExtractorError as e:
if isinstance(e.cause, network_exceptions):
- if isinstance(e.cause, compat_HTTPError):
+ if isinstance(e.cause, urllib.error.HTTPError):
first_bytes = e.cause.read(512)
if not is_html(first_bytes):
yt_error = try_get(
self._parse_json(
self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
- lambda x: x['error']['message'], compat_str)
+ lambda x: x['error']['message'], str)
if yt_error:
self._report_alerts([('ERROR', yt_error)], fatal=False)
# Downloading page may result in intermittent 5xx HTTP error
# Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
# We also want to catch all other network exceptions since errors in later pages can be troublesome
# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
- if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
+ if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):
last_error = error_to_compat_str(e.cause or e.msg)
if count < retries:
continue
@@ -2345,7 +2338,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# Obtain from MPD's maximum seq value
old_mpd_url = mpd_url
last_error = ctx.pop('last_error', None)
- expire_fast = immediate or last_error and isinstance(last_error, compat_HTTPError) and last_error.code == 403
+ expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403
mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
or (mpd_url, stream_number, False))
if not refresh_sequence:
@@ -2427,7 +2420,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _extract_player_url(self, *ytcfgs, webpage=None):
player_url = traverse_obj(
ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
- get_all=False, expected_type=compat_str)
+ get_all=False, expected_type=str)
if not player_url:
return
return urljoin('https://www.youtube.com', player_url)
@@ -2444,7 +2437,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _signature_cache_id(self, example_sig):
""" Return a string representation of a signature """
- return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
+ return '.'.join(str(len(part)) for part in example_sig.split('.'))
@classmethod
def _extract_player_info(cls, player_url):
@@ -2526,7 +2519,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
cache_spec = [ord(c) for c in cache_res]
expr_code = ' + '.join(gen_sig_code(cache_spec))
signature_id_tuple = '(%s)' % (
- ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
+ ', '.join(str(len(p)) for p in example_sig.split('.')))
code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
' return %s\n') % (signature_id_tuple, expr_code)
self.to_screen('Extracted signature function:\n' + code)
@@ -2649,8 +2642,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if not url:
self.report_warning(f'Unable to mark {label}watched')
return
- parsed_url = compat_urlparse.urlparse(url)
- qs = compat_urlparse.parse_qs(parsed_url.query)
+ parsed_url = urllib.parse.urlparse(url)
+ qs = urllib.parse.parse_qs(parsed_url.query)
# cpn generation algorithm is reverse engineered from base.js.
# In fact it works even with dummy cpn.
@@ -2675,8 +2668,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'et': video_length,
})
- url = compat_urlparse.urlunparse(
- parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
+ url = urllib.parse.urlunparse(
+ parsed_url._replace(query=urllib.parse.urlencode(qs, True)))
self._download_webpage(
url, video_id, f'Marking {label}watched',
@@ -2793,12 +2786,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')
author = self._get_text(comment_renderer, 'authorText')
author_id = try_get(comment_renderer,
- lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
+ lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)
votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
- lambda x: x['likeCount']), compat_str)) or 0
+ lambda x: x['likeCount']), str)) or 0
author_thumbnail = try_get(comment_renderer,
- lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
+ lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], str)
author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
is_favorited = 'creatorHeart' in (try_get(
@@ -3178,7 +3171,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
fmt_url = fmt.get('url')
if not fmt_url:
- sc = compat_parse_qs(fmt.get('signatureCipher'))
+ sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))
fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
encrypted_sig = try_get(sc, lambda x: x['s'][0])
if not all((sc, fmt_url, player_url, encrypted_sig)):
@@ -3419,12 +3412,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# Unquote should take place before split on comma (,) since textual
# fields may contain comma as well (see
# https://github.com/ytdl-org/youtube-dl/issues/8536)
- feed_data = compat_parse_qs(
+ feed_data = urllib.parse.parse_qs(
urllib.parse.unquote_plus(feed))
def feed_entry(name):
return try_get(
- feed_data, lambda x: x[name][0], compat_str)
+ feed_data, lambda x: x[name][0], str)
feed_id = feed_entry('id')
if not feed_id:
@@ -3651,9 +3644,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
info['automatic_captions'] = automatic_captions
info['subtitles'] = subtitles
- parsed_url = compat_urllib_parse_urlparse(url)
+ parsed_url = urllib.parse.urlparse(url)
for component in [parsed_url.fragment, parsed_url.query]:
- query = compat_parse_qs(component)
+ query = urllib.parse.parse_qs(component)
for k, v in query.items():
for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
d_k += '_time'
@@ -3946,7 +3939,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
# generic endpoint URL support
ep_url = urljoin('https://www.youtube.com/', try_get(
renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
- compat_str))
+ str))
if ep_url:
for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
if ie.suitable(ep_url):
@@ -3990,7 +3983,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
def _shelf_entries(self, shelf_renderer, skip_channels=False):
ep = try_get(
shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
- compat_str)
+ str)
shelf_url = urljoin('https://www.youtube.com', ep)
if shelf_url:
# Skipping links to another channels, note that checking for
@@ -4050,7 +4043,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
yield entry
# playlist attachment
playlist_id = try_get(
- post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
+ post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)
if playlist_id:
yield self.url_result(
'https://www.youtube.com/playlist?list=%s' % playlist_id,
@@ -4061,7 +4054,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
if not isinstance(run, dict):
continue
ep_url = try_get(
- run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
+ run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)
if not ep_url:
continue
if not YoutubeIE.suitable(ep_url):
@@ -4238,10 +4231,10 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
uploader['uploader'] = self._search_regex(
r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text)
uploader['uploader_id'] = try_get(
- owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
+ owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], str)
uploader['uploader_url'] = urljoin(
'https://www.youtube.com/',
- try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
+ try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], str))
return {k: v for k, v in uploader.items() if v is not None}
def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
@@ -4369,13 +4362,13 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
title = playlist.get('title') or try_get(
- data, lambda x: x['titleText']['simpleText'], compat_str)
+ data, lambda x: x['titleText']['simpleText'], str)
playlist_id = playlist.get('playlistId') or item_id
# Delegating everything except mix playlists to regular tab-based playlist URL
playlist_url = urljoin(url, try_get(
playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
- compat_str))
+ str))
# Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
# [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
@@ -4446,7 +4439,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
continue
nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
text = try_get(
- nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
+ nav_item_renderer, lambda x: x['text']['simpleText'], str)
if not text or text.lower() != 'show unavailable videos':
continue
browse_endpoint = try_get(
@@ -4488,7 +4481,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
except ExtractorError as e:
if isinstance(e.cause, network_exceptions):
- if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
+ if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):
last_error = error_to_compat_str(e.cause or e.msg)
if count < retries:
continue
@@ -5301,8 +5294,8 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
@YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
def _real_extract(self, url, smuggled_data):
item_id = self._match_id(url)
- url = compat_urlparse.urlunparse(
- compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
+ url = urllib.parse.urlunparse(
+ urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))
compat_opts = self.get_param('compat_opts', [])
def get_mobj(url):
@@ -5322,7 +5315,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
mdata = self._extract_tab_endpoint(
f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
- get_all=False, expected_type=compat_str)
+ get_all=False, expected_type=str)
if not murl:
raise ExtractorError('Failed to resolve album to playlist')
return self.url_result(murl, ie=YoutubeTabIE.ie_key())