aboutsummaryrefslogtreecommitdiffstats
path: root/yt_dlp/extractor
diff options
context:
space:
mode:
Diffstat (limited to 'yt_dlp/extractor')
-rw-r--r--yt_dlp/extractor/bandcamp.py1
-rw-r--r--yt_dlp/extractor/common.py7
-rw-r--r--yt_dlp/extractor/crunchyroll.py2
-rw-r--r--yt_dlp/extractor/daystar.py48
-rw-r--r--yt_dlp/extractor/extractors.py6
-rw-r--r--yt_dlp/extractor/gettr.py82
-rw-r--r--yt_dlp/extractor/instagram.py3
-rw-r--r--yt_dlp/extractor/lbry.py9
-rw-r--r--yt_dlp/extractor/mildom.py3
-rw-r--r--yt_dlp/extractor/openload.py3
-rw-r--r--yt_dlp/extractor/rtve.py3
-rw-r--r--yt_dlp/extractor/telegram.py37
-rw-r--r--yt_dlp/extractor/twitch.py2
-rw-r--r--yt_dlp/extractor/videocampus_sachsen.py96
-rw-r--r--yt_dlp/extractor/vimeo.py3
-rw-r--r--yt_dlp/extractor/youtube.py10
-rw-r--r--yt_dlp/extractor/zingmp3.py135
17 files changed, 354 insertions, 96 deletions
diff --git a/yt_dlp/extractor/bandcamp.py b/yt_dlp/extractor/bandcamp.py
index 42223dab7..745055e2d 100644
--- a/yt_dlp/extractor/bandcamp.py
+++ b/yt_dlp/extractor/bandcamp.py
@@ -183,6 +183,7 @@ class BandcampIE(InfoExtractor):
'format_note': f.get('description'),
'filesize': parse_filesize(f.get('size_mb')),
'vcodec': 'none',
+ 'acodec': format_id.split('-')[0],
})
self._sort_formats(formats)
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index d8bb21137..dbf5ef8d4 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -639,7 +639,7 @@ class InfoExtractor(object):
}
if hasattr(e, 'countries'):
kwargs['countries'] = e.countries
- raise type(e)(e.msg, **kwargs)
+ raise type(e)(e.orig_msg, **kwargs)
except compat_http_client.IncompleteRead as e:
raise ExtractorError('A network error has occurred.', cause=e, expected=True, video_id=self.get_temp_id(url))
except (KeyError, StopIteration) as e:
@@ -1101,6 +1101,7 @@ class InfoExtractor(object):
if metadata_available and (
self.get_param('ignore_no_formats_error') or self.get_param('wait_for_video')):
self.report_warning(msg)
+ return
if method is not None:
msg = '%s. %s' % (msg, self._LOGIN_HINTS[method])
raise ExtractorError(msg, expected=True)
@@ -1617,7 +1618,7 @@ class InfoExtractor(object):
'vcodec': {'type': 'ordered', 'regex': True,
'order': ['av0?1', 'vp0?9.2', 'vp0?9', '[hx]265|he?vc?', '[hx]264|avc', 'vp0?8', 'mp4v|h263', 'theora', '', None, 'none']},
'acodec': {'type': 'ordered', 'regex': True,
- 'order': ['[af]lac', 'wav|aiff', 'opus', 'vorbis', 'aac', 'mp?4a?', 'mp3', 'e-?a?c-?3', 'ac-?3', 'dts', '', None, 'none']},
+ 'order': ['[af]lac', 'wav|aiff', 'opus', 'vorbis|ogg', 'aac', 'mp?4a?', 'mp3', 'e-?a?c-?3', 'ac-?3', 'dts', '', None, 'none']},
'hdr': {'type': 'ordered', 'regex': True, 'field': 'dynamic_range',
'order': ['dv', '(hdr)?12', r'(hdr)?10\+', '(hdr)?10', 'hlg', '', 'sdr', None]},
'proto': {'type': 'ordered', 'regex': True, 'field': 'protocol',
@@ -3678,7 +3679,7 @@ class InfoExtractor(object):
def mark_watched(self, *args, **kwargs):
if not self.get_param('mark_watched', False):
return
- if (self._get_login_info()[0] is not None
+ if (hasattr(self, '_NETRC_MACHINE') and self._get_login_info()[0] is not None
or self.get_param('cookiefile')
or self.get_param('cookiesfrombrowser')):
self._mark_watched(*args, **kwargs)
diff --git a/yt_dlp/extractor/crunchyroll.py b/yt_dlp/extractor/crunchyroll.py
index ffe291098..b6ba5ef56 100644
--- a/yt_dlp/extractor/crunchyroll.py
+++ b/yt_dlp/extractor/crunchyroll.py
@@ -85,7 +85,7 @@ class CrunchyrollBaseIE(InfoExtractor):
'session_id': session_id
}).encode('ascii'))
if login_response['code'] != 'ok':
- raise ExtractorError('Login failed. Bad username or password?', expected=True)
+ raise ExtractorError('Login failed. Server message: %s' % login_response['message'], expected=True)
if not self._get_cookies(self._LOGIN_URL).get('etp_rt'):
raise ExtractorError('Login succeeded but did not set etp_rt cookie')
diff --git a/yt_dlp/extractor/daystar.py b/yt_dlp/extractor/daystar.py
new file mode 100644
index 000000000..4f59d904f
--- /dev/null
+++ b/yt_dlp/extractor/daystar.py
@@ -0,0 +1,48 @@
+from .common import InfoExtractor
+from ..utils import js_to_json, urljoin
+
+
+class DaystarClipIE(InfoExtractor):
+ IE_NAME = 'daystar:clip'
+ _VALID_URL = r'https?://player\.daystar\.tv/(?P<id>\w+)'
+ _TESTS = [{
+ 'url': 'https://player.daystar.tv/0MTO2ITM',
+ 'info_dict': {
+ 'id': '0MTO2ITM',
+ 'ext': 'mp4',
+ 'title': 'The Dark World of COVID Pt. 1 | Aaron Siri',
+ 'description': 'a420d320dda734e5f29458df3606c5f4',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ src_iframe = self._search_regex(r'\<iframe[^>]+src="([^"]+)"', webpage, 'src iframe')
+ webpage_iframe = self._download_webpage(
+ src_iframe.replace('player.php', 'config2.php'), video_id, headers={'Referer': src_iframe})
+
+ sources = self._parse_json(self._search_regex(
+ r'sources\:\s*(\[.*?\])', webpage_iframe, 'm3u8 source'), video_id, transform_source=js_to_json)
+
+ formats, subtitles = [], {}
+ for source in sources:
+ file = source.get('file')
+ if file and source.get('type') == 'm3u8':
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ urljoin('https://www.lightcast.com/embed/', file),
+ video_id, 'mp4', fatal=False, headers={'Referer': src_iframe})
+ formats.extend(fmts)
+ subtitles = self._merge_subtitles(subtitles, subs)
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': self._html_search_meta(['og:title', 'twitter:title'], webpage),
+ 'description': self._html_search_meta(['og:description', 'twitter:description'], webpage),
+ 'thumbnail': self._search_regex(r'image:\s*"([^"]+)', webpage_iframe, 'thumbnail'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py
index 0f26dc24f..5ef1901e4 100644
--- a/yt_dlp/extractor/extractors.py
+++ b/yt_dlp/extractor/extractors.py
@@ -348,6 +348,7 @@ from .daum import (
DaumPlaylistIE,
DaumUserIE,
)
+from .daystar import DaystarClipIE
from .dbtv import DBTVIE
from .dctp import DctpTvIE
from .deezer import (
@@ -1593,6 +1594,7 @@ from .tele13 import Tele13IE
from .telebruxelles import TeleBruxellesIE
from .telecinco import TelecincoIE
from .telegraaf import TelegraafIE
+from .telegram import TelegramEmbedIE
from .telemb import TeleMBIE
from .telemundo import TelemundoIE
from .telequebec import (
@@ -1833,6 +1835,10 @@ from .vice import (
from .vidbit import VidbitIE
from .viddler import ViddlerIE
from .videa import VideaIE
+from .videocampus_sachsen import (
+ VideocampusSachsenIE,
+ VideocampusSachsenEmbedIE,
+)
from .videodetective import VideoDetectiveIE
from .videofyme import VideofyMeIE
from .videomore import (
diff --git a/yt_dlp/extractor/gettr.py b/yt_dlp/extractor/gettr.py
index 9842edd81..327a4d0b8 100644
--- a/yt_dlp/extractor/gettr.py
+++ b/yt_dlp/extractor/gettr.py
@@ -8,8 +8,8 @@ from ..utils import (
dict_get,
float_or_none,
int_or_none,
- remove_end,
str_or_none,
+ traverse_obj,
try_get,
url_or_none,
urljoin,
@@ -36,8 +36,9 @@ class GettrIE(GettrBaseIE):
'ext': 'mp4',
'uploader': 'EpochTV',
'uploader_id': 'epochtv',
+ 'upload_date': '20210927',
'thumbnail': r're:^https?://.+/out\.jpg',
- 'timestamp': 1632782451058,
+ 'timestamp': 1632782451.058,
'duration': 58.5585,
'tags': ['hornofafrica', 'explorations'],
}
@@ -50,43 +51,69 @@ class GettrIE(GettrBaseIE):
'ext': 'mp4',
'uploader': 'Neues Forum Freiheit',
'uploader_id': 'nf_freiheit',
+ 'upload_date': '20210718',
'thumbnail': r're:^https?://.+/out\.jpg',
- 'timestamp': 1626594455017,
+ 'timestamp': 1626594455.017,
'duration': 23,
'tags': 'count:12',
}
+ }, {
+ # quote post
+ 'url': 'https://gettr.com/post/pxn5b743a9',
+ 'only_matching': True,
+ }, {
+ # quote with video
+ 'url': 'https://gettr.com/post/pxtiiz5ca2',
+ 'only_matching': True,
+ }, {
+ # streaming embed
+ 'url': 'https://gettr.com/post/pxlu8p3b13',
+ 'only_matching': True,
+ }, {
+ # youtube embed
+ 'url': 'https://gettr.com/post/pv6wp9e24c',
+ 'only_matching': True,
+ 'add_ie': ['Youtube'],
}]
def _real_extract(self, url):
post_id = self._match_id(url)
webpage = self._download_webpage(url, post_id)
-
api_data = self._call_api('post/%s?incl="poststats|userinfo"' % post_id, post_id)
post_data = api_data.get('data')
- user_data = try_get(api_data, lambda x: x['aux']['uinf'][post_data['uid']]) or {}
+ user_data = try_get(api_data, lambda x: x['aux']['uinf'][post_data['uid']], dict) or {}
- if post_data.get('nfound'):
- raise ExtractorError(post_data.get('txt'), expected=True)
+ vid = post_data.get('vid')
+ ovid = post_data.get('ovid')
+
+ if post_data.get('p_type') == 'stream':
+ return self.url_result(f'https://gettr.com/streaming/{post_id}', ie='GettrStreaming', video_id=post_id)
+
+ if not (ovid or vid):
+ embed_url = url_or_none(post_data.get('prevsrc'))
+ shared_post_id = traverse_obj(api_data, ('aux', 'shrdpst', '_id'), ('data', 'rpstIds', 0), expected_type=str)
+
+ if embed_url:
+ return self.url_result(embed_url)
+ elif shared_post_id:
+ return self.url_result(f'https://gettr.com/post/{shared_post_id}', ie='Gettr', video_id=shared_post_id)
+ else:
+ raise ExtractorError('There\'s no video in this post.')
title = description = str_or_none(
post_data.get('txt') or self._og_search_description(webpage))
uploader = str_or_none(
user_data.get('nickname')
- or remove_end(self._og_search_title(webpage), ' on GETTR'))
+ or self._search_regex(r'^(.+?) on GETTR', self._og_search_title(webpage, default=''), 'uploader', fatal=False))
+
if uploader:
title = '%s - %s' % (uploader, title)
- if not dict_get(post_data, ['vid', 'ovid']):
- raise ExtractorError('There\'s no video in this post.')
-
- vid = post_data.get('vid')
- ovid = post_data.get('ovid')
-
- formats = self._extract_m3u8_formats(
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
urljoin(self._MEDIA_BASE_URL, vid), post_id, 'mp4',
- entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) if vid else []
+ entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) if vid else ([], {})
if ovid:
formats.append({
@@ -103,15 +130,16 @@ class GettrIE(GettrBaseIE):
'id': post_id,
'title': title,
'description': description,
- 'thumbnail': url_or_none(
- urljoin(self._MEDIA_BASE_URL, post_data.get('main'))
- or self._og_search_thumbnail(webpage)),
- 'timestamp': int_or_none(post_data.get('cdate')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'uploader': uploader,
'uploader_id': str_or_none(
dict_get(user_data, ['_id', 'username'])
or post_data.get('uid')),
- 'uploader': uploader,
- 'formats': formats,
+ 'thumbnail': url_or_none(
+ urljoin(self._MEDIA_BASE_URL, post_data.get('main'))
+ or self._html_search_meta(['og:image', 'image'], webpage, 'thumbnail', fatal=False)),
+ 'timestamp': float_or_none(dict_get(post_data, ['cdate', 'udate']), scale=1000),
'duration': float_or_none(post_data.get('vid_dur')),
'tags': post_data.get('htgs'),
}
@@ -165,19 +193,19 @@ class GettrStreamingIE(GettrBaseIE):
thumbnails = [{
'url': urljoin(self._MEDIA_BASE_URL, thumbnail),
- } for thumbnail in try_get(video_info, lambda x: x['postData']['imgs']) or []]
+ } for thumbnail in try_get(video_info, lambda x: x['postData']['imgs'], list) or []]
self._sort_formats(formats)
return {
'id': video_id,
- 'title': try_get(video_info, lambda x: x['postData']['ttl']),
- 'description': try_get(video_info, lambda x: x['postData']['dsc']),
+ 'title': try_get(video_info, lambda x: x['postData']['ttl'], str),
+ 'description': try_get(video_info, lambda x: x['postData']['dsc'], str),
'formats': formats,
'subtitles': subtitles,
'thumbnails': thumbnails,
- 'uploader': try_get(video_info, lambda x: x['liveHostInfo']['nickname']),
- 'uploader_id': try_get(video_info, lambda x: x['liveHostInfo']['_id']),
+ 'uploader': try_get(video_info, lambda x: x['liveHostInfo']['nickname'], str),
+ 'uploader_id': try_get(video_info, lambda x: x['liveHostInfo']['_id'], str),
'view_count': int_or_none(live_info.get('viewsCount')),
'timestamp': float_or_none(live_info.get('startAt'), scale=1000),
'duration': float_or_none(live_info.get('duration'), scale=1000),
diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py
index a2cc9f748..3bb786d6a 100644
--- a/yt_dlp/extractor/instagram.py
+++ b/yt_dlp/extractor/instagram.py
@@ -17,7 +17,6 @@ from ..utils import (
get_element_by_attribute,
int_or_none,
lowercase_escape,
- std_headers,
str_or_none,
str_to_int,
traverse_obj,
@@ -503,7 +502,7 @@ class InstagramPlaylistBaseIE(InstagramBaseIE):
'%s' % rhx_gis,
'',
'%s:%s' % (rhx_gis, csrf_token),
- '%s:%s:%s' % (rhx_gis, csrf_token, std_headers['User-Agent']),
+ '%s:%s:%s' % (rhx_gis, csrf_token, self.get_param('http_headers')['User-Agent']),
]
# try all of the ways to generate a GIS query, and not only use the
diff --git a/yt_dlp/extractor/lbry.py b/yt_dlp/extractor/lbry.py
index 1405ce0c7..5d5457c53 100644
--- a/yt_dlp/extractor/lbry.py
+++ b/yt_dlp/extractor/lbry.py
@@ -17,6 +17,7 @@ from ..utils import (
parse_qs,
OnDemandPagedList,
try_get,
+ UnsupportedError,
urljoin,
)
@@ -196,11 +197,11 @@ class LBRYIE(LBRYBaseIE):
live_data = self._download_json(
f'https://api.live.odysee.com/v1/odysee/live/{claim_id}', claim_id,
note='Downloading livestream JSON metadata')['data']
- if not live_data['live']:
- raise ExtractorError('This stream is not live', expected=True)
- streaming_url = final_url = live_data['url']
+ streaming_url = final_url = live_data.get('url')
+ if not final_url and not live_data.get('live'):
+ self.raise_no_formats('This stream is not live', True, claim_id)
else:
- raise ExtractorError('Unsupported URL', expected=True)
+ raise UnsupportedError(url)
info = self._parse_stream(result, url)
if determine_ext(final_url) == 'm3u8':
diff --git a/yt_dlp/extractor/mildom.py b/yt_dlp/extractor/mildom.py
index ddeaa7021..b5a2e17f2 100644
--- a/yt_dlp/extractor/mildom.py
+++ b/yt_dlp/extractor/mildom.py
@@ -8,7 +8,6 @@ import json
from .common import InfoExtractor
from ..utils import (
- std_headers,
update_url_query,
random_uuidv4,
try_get,
@@ -70,7 +69,7 @@ class MildomBaseIE(InfoExtractor):
'clu': '',
'wh': '1919*810',
'rtm': self.iso_timestamp(),
- 'ua': std_headers['User-Agent'],
+ 'ua': self.get_param('http_headers')['User-Agent'],
}).encode('utf8')).decode('utf8').replace('\n', ''),
}).encode('utf8'))
self._DISPATCHER_CONFIG = self._parse_json(base64.b64decode(tmp['data']), 'initialization')
diff --git a/yt_dlp/extractor/openload.py b/yt_dlp/extractor/openload.py
index 6ec54509b..36927009d 100644
--- a/yt_dlp/extractor/openload.py
+++ b/yt_dlp/extractor/openload.py
@@ -16,7 +16,6 @@ from ..utils import (
ExtractorError,
get_exe_version,
is_outdated_version,
- std_headers,
Popen,
)
@@ -208,7 +207,7 @@ class PhantomJSwrapper(object):
replaces = self.options
replaces['url'] = url
- user_agent = headers.get('User-Agent') or std_headers['User-Agent']
+ user_agent = headers.get('User-Agent') or self.get_param('http_headers')['User-Agent']
replaces['ua'] = user_agent.replace('"', '\\"')
replaces['jscode'] = jscode
diff --git a/yt_dlp/extractor/rtve.py b/yt_dlp/extractor/rtve.py
index af1bb943d..7a1dc6f32 100644
--- a/yt_dlp/extractor/rtve.py
+++ b/yt_dlp/extractor/rtve.py
@@ -17,7 +17,6 @@ from ..utils import (
qualities,
remove_end,
remove_start,
- std_headers,
try_get,
)
@@ -71,7 +70,7 @@ class RTVEALaCartaIE(InfoExtractor):
}]
def _real_initialize(self):
- user_agent_b64 = base64.b64encode(std_headers['User-Agent'].encode('utf-8')).decode('utf-8')
+ user_agent_b64 = base64.b64encode(self.get_param('http_headers')['User-Agent'].encode('utf-8')).decode('utf-8')
self._manager = self._download_json(
'http://www.rtve.es/odin/loki/' + user_agent_b64,
None, 'Fetching manager info')['manager']
diff --git a/yt_dlp/extractor/telegram.py b/yt_dlp/extractor/telegram.py
new file mode 100644
index 000000000..2dfa261e9
--- /dev/null
+++ b/yt_dlp/extractor/telegram.py
@@ -0,0 +1,37 @@
+from .common import InfoExtractor
+
+
+class TelegramEmbedIE(InfoExtractor):
+ IE_NAME = 'telegram:embed'
+ _VALID_URL = r'https?://t\.me/(?P<channel_name>[^/]+)/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://t.me/europa_press/613',
+ 'info_dict': {
+ 'id': '613',
+ 'ext': 'mp4',
+ 'title': 'Europa Press',
+ 'description': '6ce2d7e8d56eda16d80607b23db7b252',
+ 'thumbnail': r're:^https?:\/\/cdn.*?telesco\.pe\/file\/\w+',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ webpage_embed = self._download_webpage(f'{url}?embed=1', video_id)
+
+ formats = [{
+ 'url': self._proto_relative_url(self._search_regex(
+ '<video[^>]+src="([^"]+)"', webpage_embed, 'source')),
+ 'ext': 'mp4',
+ }]
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, fatal=True),
+ 'description': self._html_search_meta(['og:description', 'twitter:description'], webpage, fatal=True),
+ 'thumbnail': self._search_regex(r'tgme_widget_message_video_thumb"[^>]+background-image:url\(\'([^\']+)\'\)',
+ webpage_embed, 'thumbnail'),
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/twitch.py b/yt_dlp/extractor/twitch.py
index 54e500edd..bee26c3a3 100644
--- a/yt_dlp/extractor/twitch.py
+++ b/yt_dlp/extractor/twitch.py
@@ -1048,7 +1048,7 @@ class TwitchClipsIE(TwitchBaseIE):
'title': clip.get('title') or video_id,
'formats': formats,
'duration': int_or_none(clip.get('durationSeconds')),
- 'views': int_or_none(clip.get('viewCount')),
+ 'view_count': int_or_none(clip.get('viewCount')),
'timestamp': unified_timestamp(clip.get('createdAt')),
'thumbnails': thumbnails,
'creator': try_get(clip, lambda x: x['broadcaster']['displayName'], compat_str),
diff --git a/yt_dlp/extractor/videocampus_sachsen.py b/yt_dlp/extractor/videocampus_sachsen.py
new file mode 100644
index 000000000..96e98573f
--- /dev/null
+++ b/yt_dlp/extractor/videocampus_sachsen.py
@@ -0,0 +1,96 @@
+# coding: utf-8
+from .common import InfoExtractor
+
+
+class VideocampusSachsenIE(InfoExtractor):
+ _VALID_URL = r'''(?x)https?://videocampus\.sachsen\.de/(?:
+ m/(?P<tmp_id>[0-9a-f]+)|
+ (?:category/)?video/(?P<display_id>[\w-]+)/(?P<id>[0-9a-f]{32})
+ )'''
+
+ _TESTS = [
+ {
+ 'url': 'https://videocampus.sachsen.de/m/e0d6c8ce6e394c188f1342f1ab7c50ed6fc4490b808699801def5cb2e46d76ca7367f622a9f516c542ffb805b24d6b643bd7c81f385acaac4c59081b87a2767b',
+ 'info_dict': {
+ 'id': 'e6b9349905c1628631f175712250f2a1',
+ 'title': 'Konstruktiver Entwicklungsprozess Vorlesung 7',
+ 'ext': 'mp4',
+ },
+ },
+ {
+ 'url': 'https://videocampus.sachsen.de/video/Was-ist-selbstgesteuertes-Lernen/fc99c527e4205b121cb7c74433469262',
+ 'info_dict': {
+ 'id': 'fc99c527e4205b121cb7c74433469262',
+ 'title': 'Was ist selbstgesteuertes Lernen?',
+ 'display_id': 'Was-ist-selbstgesteuertes-Lernen',
+ 'ext': 'mp4',
+ },
+ },
+ {
+ 'url': 'https://videocampus.sachsen.de/category/video/Tutorial-zur-Nutzung-von-Adobe-Connect-aus-Veranstalter-Sicht/09d4ed029002eb1bdda610f1103dd54c/100',
+ 'info_dict': {
+ 'id': '09d4ed029002eb1bdda610f1103dd54c',
+ 'title': 'Tutorial zur Nutzung von Adobe Connect aus Veranstalter-Sicht',
+ 'display_id': 'Tutorial-zur-Nutzung-von-Adobe-Connect-aus-Veranstalter-Sicht',
+ 'ext': 'mp4',
+ },
+ },
+ ]
+
+ def _real_extract(self, url):
+ video_id, tmp_id, display_id = self._match_valid_url(url).group('id', 'tmp_id', 'display_id')
+ webpage = self._download_webpage(url, video_id or tmp_id, fatal=False) or ''
+
+ if not tmp_id:
+ video_id = self._html_search_regex(
+ r'src="https?://videocampus\.sachsen\.de/media/embed\?key=([0-9a-f]+)&',
+ webpage, 'video_id')
+
+ title = self._html_search_regex(
+ (r'<h1>(?P<content>[^<]+)</h1>', *self._meta_regex('title')),
+ webpage, 'title', group='content', fatal=False)
+
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ f'https://videocampus.sachsen.de/media/hlsMedium/key/{video_id}/format/auto/ext/mp4/learning/0/path/m3u8',
+ video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'display_id': display_id,
+ 'formats': formats,
+ 'subtitles': subtitles
+ }
+
+
+class VideocampusSachsenEmbedIE(InfoExtractor):
+ _VALID_URL = r'https?://videocampus.sachsen.de/media/embed\?key=(?P<id>[0-9a-f]+)'
+
+ _TESTS = [
+ {
+ 'url': 'https://videocampus.sachsen.de/media/embed?key=fc99c527e4205b121cb7c74433469262',
+ 'info_dict': {
+ 'id': 'fc99c527e4205b121cb7c74433469262',
+ 'title': 'Was ist selbstgesteuertes Lernen?',
+ 'ext': 'mp4',
+ },
+ }
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+ title = self._html_search_regex(r'<img[^>]*title="([^"<]+)"', webpage, 'title', fatal=False)
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ f'https://videocampus.sachsen.de/media/hlsMedium/key/{video_id}/format/auto/ext/mp4/learning/0/path/m3u8',
+ video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py
index 1a9fd00e4..77ffb4bfb 100644
--- a/yt_dlp/extractor/vimeo.py
+++ b/yt_dlp/extractor/vimeo.py
@@ -28,7 +28,6 @@ from ..utils import (
parse_qs,
sanitized_Request,
smuggle_url,
- std_headers,
str_or_none,
try_get,
unified_timestamp,
@@ -758,7 +757,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
def _real_extract(self, url):
url, data = unsmuggle_url(url, {})
- headers = std_headers.copy()
+ headers = self.get_param('http_headers').copy()
if 'http_headers' in data:
headers.update(data['http_headers'])
if 'Referer' not in headers:
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index 602d48e3c..4e812af99 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -240,13 +240,15 @@ def build_innertube_clients():
base_client, *variant = client.split('_')
ytcfg['priority'] = 10 * priority(base_client)
- if variant == ['embedded']:
- ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
- INNERTUBE_CLIENTS[f'{base_client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
+ if not variant:
+ INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
+ agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
agegate_ytcfg['priority'] -= 1
+ elif variant == ['embedded']:
+ ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
ytcfg['priority'] -= 2
- elif variant:
+ else:
ytcfg['priority'] -= 3
diff --git a/yt_dlp/extractor/zingmp3.py b/yt_dlp/extractor/zingmp3.py
index a3edc158f..f84ba5cff 100644
--- a/yt_dlp/extractor/zingmp3.py
+++ b/yt_dlp/extractor/zingmp3.py
@@ -1,22 +1,47 @@
# coding: utf-8
from __future__ import unicode_literals
+import hashlib
+import hmac
+import urllib.parse
+
from .common import InfoExtractor
from ..utils import (
int_or_none,
+ traverse_obj,
+ HEADRequest,
)
class ZingMp3BaseIE(InfoExtractor):
- _VALID_URL_TMPL = r'https?://(?:mp3\.zing|zingmp3)\.vn/(?:%s)/[^/]+/(?P<id>\w+)\.html'
+ _VALID_URL_TMPL = r'https?://(?:mp3\.zing|zingmp3)\.vn/(?P<type>(?:%s))/[^/]+/(?P<id>\w+)(?:\.html|\?)'
_GEO_COUNTRIES = ['VN']
+ _DOMAIN = 'https://zingmp3.vn'
+ _SLUG_API = {
+ 'bai-hat': '/api/v2/page/get/song',
+ 'embed': '/api/v2/page/get/song',
+ 'video-clip': '/api/v2/page/get/video',
+ 'playlist': '/api/v2/page/get/playlist',
+ 'album': '/api/v2/page/get/playlist',
+ 'lyric': '/api/v2/lyric/get/lyric',
+ 'song_streaming': '/api/v2/song/get/streaming',
+ }
+
+ _API_KEY = '88265e23d4284f25963e6eedac8fbfa3'
+ _SECRET_KEY = b'2aa2d1c561e809b267f3638c4a307aab'
- def _extract_item(self, item, fatal):
- item_id = item['id']
- title = item.get('name') or item['title']
+ def _extract_item(self, item, song_id, type_url, fatal):
+ item_id = item.get('encodeId') or song_id
+ title = item.get('title') or item.get('alias')
+
+ if type_url == 'video-clip':
+ source = item.get('streaming')
+ else:
+ api = self.get_api_with_signature(name_api=self._SLUG_API.get('song_streaming'), param={'id': item_id})
+ source = self._download_json(api, video_id=item_id).get('data')
formats = []
- for k, v in (item.get('source') or {}).items():
+ for k, v in (source or {}).items():
if not v:
continue
if k in ('mp4', 'hls'):
@@ -34,31 +59,35 @@ class ZingMp3BaseIE(InfoExtractor):
'height': int_or_none(self._search_regex(
r'^(\d+)p', res, 'resolution', default=None)),
})
- else:
- formats.append({
- 'ext': 'mp3',
- 'format_id': k,
- 'tbr': int_or_none(k),
- 'url': self._proto_relative_url(v),
- 'vcodec': 'none',
- })
+ continue
+ elif v == 'VIP':
+ continue
+ formats.append({
+ 'ext': 'mp3',
+ 'format_id': k,
+ 'tbr': int_or_none(k),
+ 'url': self._proto_relative_url(v),
+ 'vcodec': 'none',
+ })
if not formats:
if not fatal:
return
- msg = item['msg']
+ msg = item.get('msg')
if msg == 'Sorry, this content is not available in your country.':
self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
self.raise_no_formats(msg, expected=True)
self._sort_formats(formats)
- subtitles = None
lyric = item.get('lyric')
- if lyric:
- subtitles = {
- 'origin': [{
- 'url': lyric,
- }],
- }
+ if not lyric:
+ api = self.get_api_with_signature(name_api=self._SLUG_API.get("lyric"), param={'id': item_id})
+ info_lyric = self._download_json(api, video_id=item_id)
+ lyric = traverse_obj(info_lyric, ('data', 'file'))
+ subtitles = {
+ 'origin': [{
+ 'url': lyric,
+ }],
+ } if lyric else None
album = item.get('album') or {}
@@ -66,30 +95,41 @@ class ZingMp3BaseIE(InfoExtractor):
'id': item_id,
'title': title,
'formats': formats,
- 'thumbnail': item.get('thumbnail'),
+ 'thumbnail': traverse_obj(item, 'thumbnail', 'thumbnailM'),
'subtitles': subtitles,
'duration': int_or_none(item.get('duration')),
'track': title,
- 'artist': item.get('artists_names'),
- 'album': album.get('name') or album.get('title'),
- 'album_artist': album.get('artists_names'),
+ 'artist': traverse_obj(item, 'artistsNames', 'artists_names'),
+ 'album': traverse_obj(album, 'name', 'title'),
+ 'album_artist': traverse_obj(album, 'artistsNames', 'artists_names'),
}
+ def _real_initialize(self):
+ if not self.get_param('cookiefile') and not self.get_param('cookiesfrombrowser'):
+ self._request_webpage(HEADRequest(self._DOMAIN), None, note='Updating cookies')
+
def _real_extract(self, url):
- page_id = self._match_id(url)
- webpage = self._download_webpage(
- url.replace('://zingmp3.vn/', '://mp3.zing.vn/'),
- page_id, query={'play_song': 1})
- data_path = self._search_regex(
- r'data-xml="([^"]+)', webpage, 'data path')
- return self._process_data(self._download_json(
- 'https://mp3.zing.vn/xhr' + data_path, page_id)['data'])
+ song_id, type_url = self._match_valid_url(url).group('id', 'type')
+
+ api = self.get_api_with_signature(name_api=self._SLUG_API[type_url], param={'id': song_id})
+
+ return self._process_data(self._download_json(api, song_id)['data'], song_id, type_url)
+
+ def get_api_with_signature(self, name_api, param):
+ sha256 = hashlib.sha256(''.join(f'{k}={v}' for k, v in param.items()).encode('utf-8')).hexdigest()
+
+ data = {
+ 'apiKey': self._API_KEY,
+ 'sig': hmac.new(self._SECRET_KEY, f'{name_api}{sha256}'.encode('utf-8'), hashlib.sha512).hexdigest(),
+ **param,
+ }
+ return f'{self._DOMAIN}{name_api}?{urllib.parse.urlencode(data)}'
class ZingMp3IE(ZingMp3BaseIE):
- _VALID_URL = ZingMp3BaseIE._VALID_URL_TMPL % 'bai-hat|video-clip'
+ _VALID_URL = ZingMp3BaseIE._VALID_URL_TMPL % 'bai-hat|video-clip|embed'
_TESTS = [{
- 'url': 'http://mp3.zing.vn/bai-hat/Xa-Mai-Xa-Bao-Thy/ZWZB9WAB.html',
+ 'url': 'https://mp3.zing.vn/bai-hat/Xa-Mai-Xa-Bao-Thy/ZWZB9WAB.html',
'md5': 'ead7ae13693b3205cbc89536a077daed',
'info_dict': {
'id': 'ZWZB9WAB',
@@ -108,7 +148,7 @@ class ZingMp3IE(ZingMp3BaseIE):
'album_artist': 'Bảo Thy',
},
}, {
- 'url': 'https://mp3.zing.vn/video-clip/Suong-Hoa-Dua-Loi-K-ICM-RYO/ZO8ZF7C7.html',
+ 'url': 'https://zingmp3.vn/video-clip/Suong-Hoa-Dua-Loi-K-ICM-RYO/ZO8ZF7C7.html',
'md5': 'e9c972b693aa88301ef981c8151c4343',
'info_dict': {
'id': 'ZO8ZF7C7',
@@ -120,14 +160,17 @@ class ZingMp3IE(ZingMp3BaseIE):
'artist': 'K-ICM, RYO',
},
}, {
+ 'url': 'https://zingmp3.vn/embed/song/ZWZEI76B?start=false',
+ 'only_matching': True,
+ }, {
'url': 'https://zingmp3.vn/bai-hat/Xa-Mai-Xa-Bao-Thy/ZWZB9WAB.html',
'only_matching': True,
}]
IE_NAME = 'zingmp3'
- IE_DESC = 'mp3.zing.vn'
+ IE_DESC = 'zingmp3.vn'
- def _process_data(self, data):
- return self._extract_item(data, True)
+ def _process_data(self, data, song_id, type_url):
+ return self._extract_item(data, song_id, type_url, True)
class ZingMp3AlbumIE(ZingMp3BaseIE):
@@ -139,7 +182,7 @@ class ZingMp3AlbumIE(ZingMp3BaseIE):
'id': 'ZWZBWDAF',
'title': 'Lâu Đài Tình Ái',
},
- 'playlist_count': 10,
+ 'playlist_count': 9,
}, {
'url': 'http://mp3.zing.vn/playlist/Duong-Hong-Loan-apollobee/IWCAACCB.html',
'only_matching': True,
@@ -149,12 +192,12 @@ class ZingMp3AlbumIE(ZingMp3BaseIE):
}]
IE_NAME = 'zingmp3:album'
- def _process_data(self, data):
+ def _process_data(self, data, song_id, type_url):
def entries():
- for item in (data.get('items') or []):
- entry = self._extract_item(item, False)
+ for item in traverse_obj(data, ('song', 'items')) or []:
+ entry = self._extract_item(item, song_id, type_url, False)
if entry:
yield entry
- info = data.get('info') or {}
- return self.playlist_result(
- entries(), info.get('id'), info.get('name') or info.get('title'))
+
+ return self.playlist_result(entries(), traverse_obj(data, 'id', 'encodeId'),
+ traverse_obj(data, 'name', 'title'))