aboutsummaryrefslogtreecommitdiffstats
path: root/yt_dlp/extractor/generic.py
diff options
context:
space:
mode:
Diffstat (limited to 'yt_dlp/extractor/generic.py')
-rw-r--r--yt_dlp/extractor/generic.py40
1 files changed, 18 insertions, 22 deletions
diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py
index b8c5be7a0..c2f754453 100644
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@@ -1,5 +1,6 @@
import os
import re
+import urllib.parse
import xml.etree.ElementTree
from .ant1newsgr import Ant1NewsGrEmbedIE
@@ -106,12 +107,7 @@ from .yapfiles import YapFilesIE
from .youporn import YouPornIE
from .youtube import YoutubeIE
from .zype import ZypeIE
-from ..compat import (
- compat_etree_fromstring,
- compat_str,
- compat_urllib_parse_unquote,
- compat_urlparse,
-)
+from ..compat import compat_etree_fromstring
from ..utils import (
KNOWN_EXTENSIONS,
ExtractorError,
@@ -2703,7 +2699,7 @@ class GenericIE(InfoExtractor):
title = self._html_search_meta('DC.title', webpage, fatal=True)
- camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
+ camtasia_url = urllib.parse.urljoin(url, camtasia_cfg)
camtasia_cfg = self._download_xml(
camtasia_url, video_id,
note='Downloading camtasia configuration',
@@ -2719,7 +2715,7 @@ class GenericIE(InfoExtractor):
entries.append({
'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
'title': f'{title} - {n.tag}',
- 'url': compat_urlparse.urljoin(url, url_n.text),
+ 'url': urllib.parse.urljoin(url, url_n.text),
'duration': float_or_none(n.find('./duration').text),
})
@@ -2771,7 +2767,7 @@ class GenericIE(InfoExtractor):
if url.startswith('//'):
return self.url_result(self.http_scheme() + url)
- parsed_url = compat_urlparse.urlparse(url)
+ parsed_url = urllib.parse.urlparse(url)
if not parsed_url.scheme:
default_search = self.get_param('default_search')
if default_search is None:
@@ -2847,7 +2843,7 @@ class GenericIE(InfoExtractor):
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
if m:
self.report_detected('direct video link')
- format_id = compat_str(m.group('format_id'))
+ format_id = str(m.group('format_id'))
subtitles = {}
if format_id.endswith('mpegurl'):
formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4')
@@ -2966,7 +2962,7 @@ class GenericIE(InfoExtractor):
# Unescaping the whole page allows to handle those cases in a generic way
# FIXME: unescaping the whole page may break URLs, commenting out for now.
# There probably should be a second run of generic extractor on unescaped webpage.
- # webpage = compat_urllib_parse_unquote(webpage)
+ # webpage = urllib.parse.unquote(webpage)
# Unescape squarespace embeds to be detected by generic extractor,
# see https://github.com/ytdl-org/youtube-dl/issues/21294
@@ -3239,7 +3235,7 @@ class GenericIE(InfoExtractor):
return self.url_result(mobj.group('url'))
mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
if mobj is not None:
- return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
+ return self.url_result(urllib.parse.unquote(mobj.group('url')))
# Look for funnyordie embed
matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
@@ -3492,7 +3488,7 @@ class GenericIE(InfoExtractor):
r'<iframe[^>]+src="(?:https?:)?(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
if mobj is not None:
return self.url_result(
- compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
+ urllib.parse.urljoin(url, mobj.group('url')), 'UDNEmbed')
# Look for Senate ISVP iframe
senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
@@ -3725,7 +3721,7 @@ class GenericIE(InfoExtractor):
if mediasite_urls:
entries = [
self.url_result(smuggle_url(
- compat_urlparse.urljoin(url, mediasite_url),
+ urllib.parse.urljoin(url, mediasite_url),
{'UrlReferrer': url}), ie=MediasiteIE.ie_key())
for mediasite_url in mediasite_urls]
return self.playlist_result(entries, video_id, video_title)
@@ -3920,11 +3916,11 @@ class GenericIE(InfoExtractor):
subtitles = {}
for source in sources:
src = source.get('src')
- if not src or not isinstance(src, compat_str):
+ if not src or not isinstance(src, str):
continue
- src = compat_urlparse.urljoin(url, src)
+ src = urllib.parse.urljoin(url, src)
src_type = source.get('type')
- if isinstance(src_type, compat_str):
+ if isinstance(src_type, str):
src_type = src_type.lower()
ext = determine_ext(src).lower()
if src_type == 'video/youtube':
@@ -3958,7 +3954,7 @@ class GenericIE(InfoExtractor):
if not src:
continue
subtitles.setdefault(dict_get(sub, ('language', 'srclang')) or 'und', []).append({
- 'url': compat_urlparse.urljoin(url, src),
+ 'url': urllib.parse.urljoin(url, src),
'name': sub.get('label'),
'http_headers': {
'Referer': full_response.geturl(),
@@ -3985,7 +3981,7 @@ class GenericIE(InfoExtractor):
return True
if RtmpIE.suitable(vurl):
return True
- vpath = compat_urlparse.urlparse(vurl).path
+ vpath = urllib.parse.urlparse(vurl).path
vext = determine_ext(vpath, None)
return vext not in (None, 'swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js', 'xml')
@@ -4113,7 +4109,7 @@ class GenericIE(InfoExtractor):
if refresh_header:
found = re.search(REDIRECT_REGEX, refresh_header)
if found:
- new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
+ new_url = urllib.parse.urljoin(url, unescapeHTML(found.group(1)))
if new_url != url:
self.report_following_redirect(new_url)
return {
@@ -4139,8 +4135,8 @@ class GenericIE(InfoExtractor):
for video_url in orderedSet(found):
video_url = unescapeHTML(video_url)
video_url = video_url.replace('\\/', '/')
- video_url = compat_urlparse.urljoin(url, video_url)
- video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
+ video_url = urllib.parse.urljoin(url, video_url)
+ video_id = urllib.parse.unquote(os.path.basename(video_url))
# Sometimes, jwplayer extraction will result in a YouTube URL
if YoutubeIE.suitable(video_url):