aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.github/workflows/build.yml32
-rw-r--r--docs/supportedsites.md1
-rw-r--r--youtube_dlc/YoutubeDL.py9
-rw-r--r--youtube_dlc/downloader/common.py24
-rw-r--r--youtube_dlc/extractor/brightcove.py13
-rw-r--r--youtube_dlc/extractor/extractors.py5
-rw-r--r--youtube_dlc/extractor/mtv.py7
-rw-r--r--youtube_dlc/extractor/newgrounds.py107
-rw-r--r--youtube_dlc/extractor/nitter.py167
-rw-r--r--youtube_dlc/extractor/skyitalia.py119
-rw-r--r--youtube_dlc/extractor/xtube.py47
-rw-r--r--youtube_dlc/extractor/youtube.py50
-rw-r--r--youtube_dlc/options.py2
-rw-r--r--youtube_dlc/update.py20
14 files changed, 510 insertions, 93 deletions
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 8db7e92f2..f5d94dc49 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -57,7 +57,7 @@ jobs:
id: sha2_file
env:
SHA2: ${{ hashFiles('youtube-dlc') }}
- run: echo "::set-output name=sha2_unix::${env:SHA2}"
+ run: echo "::set-output name=sha2_unix::$SHA2"
- name: Install dependencies for pypi
run: |
python -m pip install --upgrade pip
@@ -98,12 +98,12 @@ jobs:
upload_url: ${{ needs.build_unix.outputs.upload_url }}
asset_path: ./dist/youtube-dlc.exe
asset_name: youtube-dlc.exe
- asset_content_type: application/octet-stream
+ asset_content_type: application/vnd.microsoft.portable-executable
- name: Get SHA2-256SUMS for youtube-dlc.exe
id: sha2_file_win
env:
- SHA2: ${{ hashFiles('dist/youtube-dlc.exe') }}
- run: echo "::set-output name=sha2_windows::${env:SHA2}"
+ SHA2_win: ${{ hashFiles('dist/youtube-dlc.exe') }}
+ run: echo "::set-output name=sha2_windows::$SHA2_win"
build_windows32:
@@ -133,12 +133,12 @@ jobs:
upload_url: ${{ needs.build_unix.outputs.upload_url }}
asset_path: ./dist/youtube-dlc_x86.exe
asset_name: youtube-dlc_x86.exe
- asset_content_type: application/octet-stream
+ asset_content_type: application/vnd.microsoft.portable-executable
- name: Get SHA2-256SUMS for youtube-dlc_x86.exe
id: sha2_file_win32
env:
- SHA2: ${{ hashFiles('dist/youtube-dlc_x86.exe') }}
- run: echo "::set-output name=sha2_windows32::${env:SHA2}"
+ SHA2_win32: ${{ hashFiles('dist/youtube-dlc_x86.exe') }}
+ run: echo "::set-output name=sha2_windows32::$SHA2_win32"
- name: Make SHA2-256SUMS file
env:
SHA2_WINDOWS: ${{ needs.build_windows.outputs.sha2_windows }}
@@ -146,6 +146,18 @@ jobs:
SHA2_UNIX: ${{ needs.build_unix.outputs.sha2_unix }}
YTDLC_VERSION: ${{ needs.build_unix.outputs.ytdlc_version }}
run: |
- echo "$SHA2_WINDOWS youtube-dlc.exe" > SHA2-256SUMS
- echo "$SHA2_WINDOWS32 youtube-dlc32.exe" > SHA2-256SUMS
- echo "$SHA2_UNIX youtube-dlc" >> SHA2-256SUMS
+ echo "version:$YTDLC_VERSION" >> SHA2-256SUMS
+ echo "youtube-dlc.exe:$SHA2_WINDOWS" >> SHA2-256SUMS
+ echo "youtube-dlc32.exe:$SHA2_WINDOWS32" >> SHA2-256SUMS
+ echo "youtube-dlc:$SHA2_UNIX" >> SHA2-256SUMS
+
+ - name: Upload 256SUMS file
+ id: upload-sums
+ uses: actions/upload-release-asset@v1
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ with:
+ upload_url: ${{ needs.build_unix.outputs.upload_url }}
+ asset_path: ./SHA2-256SUMS
+ asset_name: SHA2-256SUMS
+ asset_content_type: text/plain
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index c46d122ff..3b98e7a12 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -582,6 +582,7 @@
- **niconico**: ニコニコ動画
- **NiconicoPlaylist**
- **Nintendo**
+ - **Nitter**
- **njoy**: N-JOY
- **njoy:embed**
- **NJPWWorld**: 新日本プロレスワールド
diff --git a/youtube_dlc/YoutubeDL.py b/youtube_dlc/YoutubeDL.py
index f959a4e47..dd55ba0f2 100644
--- a/youtube_dlc/YoutubeDL.py
+++ b/youtube_dlc/YoutubeDL.py
@@ -1857,13 +1857,13 @@ class YoutubeDL(object):
self.report_error('Cannot write annotations file: ' + annofn)
return
- def dl(name, info):
+ def dl(name, info, subtitle=False):
fd = get_suitable_downloader(info, self.params)(self, self.params)
for ph in self._progress_hooks:
fd.add_progress_hook(ph)
if self.params.get('verbose'):
self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
- return fd.download(name, info)
+ return fd.download(name, info, subtitle)
subtitles_are_requested = any([self.params.get('writesubtitles', False),
self.params.get('writeautomaticsub')])
@@ -1872,7 +1872,7 @@ class YoutubeDL(object):
# subtitles download errors are already managed as troubles in relevant IE
# that way it will silently go on when used with unsupporting IE
subtitles = info_dict['requested_subtitles']
- ie = self.get_info_extractor(info_dict['extractor_key'])
+ # ie = self.get_info_extractor(info_dict['extractor_key'])
for sub_lang, sub_info in subtitles.items():
sub_format = sub_info['ext']
sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
@@ -1891,6 +1891,8 @@ class YoutubeDL(object):
return
else:
try:
+ dl(sub_filename, sub_info, subtitle=True)
+ '''
if self.params.get('sleep_interval_subtitles', False):
dl(sub_filename, sub_info)
else:
@@ -1898,6 +1900,7 @@ class YoutubeDL(object):
sub_info['url'], info_dict['id'], note=False).read()
with io.open(encodeFilename(sub_filename), 'wb') as subfile:
subfile.write(sub_data)
+ '''
except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self.report_warning('Unable to download subtitle for "%s": %s' %
(sub_lang, error_to_compat_str(err)))
diff --git a/youtube_dlc/downloader/common.py b/youtube_dlc/downloader/common.py
index 31c286458..460364a0b 100644
--- a/youtube_dlc/downloader/common.py
+++ b/youtube_dlc/downloader/common.py
@@ -326,7 +326,7 @@ class FileDownloader(object):
"""Report it was impossible to resume download."""
self.to_screen('[download] Unable to resume')
- def download(self, filename, info_dict):
+ def download(self, filename, info_dict, subtitle=False):
"""Download to a filename using the info from info_dict
Return True on success and False otherwise
"""
@@ -353,16 +353,22 @@ class FileDownloader(object):
})
return True
- min_sleep_interval = self.params.get('sleep_interval')
- if min_sleep_interval:
- max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval)
- sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval)
+ if subtitle is False:
+ min_sleep_interval = self.params.get('sleep_interval')
+ if min_sleep_interval:
+ max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval)
+ sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval)
+ self.to_screen(
+ '[download] Sleeping %s seconds...' % (
+ int(sleep_interval) if sleep_interval.is_integer()
+ else '%.2f' % sleep_interval))
+ time.sleep(sleep_interval)
+ else:
+ sleep_interval_sub = self.params.get('sleep_interval_subtitles')
self.to_screen(
'[download] Sleeping %s seconds...' % (
- int(sleep_interval) if sleep_interval.is_integer()
- else '%.2f' % sleep_interval))
- time.sleep(sleep_interval)
-
+ int(sleep_interval_sub)))
+ time.sleep(sleep_interval_sub)
return self.real_download(filename, info_dict)
def real_download(self, filename, info_dict):
diff --git a/youtube_dlc/extractor/brightcove.py b/youtube_dlc/extractor/brightcove.py
index 2aa9f4782..638673c31 100644
--- a/youtube_dlc/extractor/brightcove.py
+++ b/youtube_dlc/extractor/brightcove.py
@@ -471,12 +471,17 @@ class BrightcoveNewIE(AdobePassIE):
title = json_data['name'].strip()
formats = []
+ sources_num = len(json_data.get('sources'))
+ key_systems_present = 0
for source in json_data.get('sources', []):
container = source.get('container')
ext = mimetype2ext(source.get('type'))
src = source.get('src')
- # https://support.brightcove.com/playback-api-video-fields-reference#key_systems_object
- if ext == 'ism' or container == 'WVM' or source.get('key_systems'):
+ # https://apis.support.brightcove.com/playback/references/playback-api-video-fields-reference.html
+ if source.get('key_systems'):
+ key_systems_present += 1
+ continue
+ elif ext == 'ism' or container == 'WVM':
continue
elif ext == 'm3u8' or container == 'M2TS':
if not src:
@@ -533,6 +538,10 @@ class BrightcoveNewIE(AdobePassIE):
'format_id': build_format_id('rtmp'),
})
formats.append(f)
+
+ if sources_num == key_systems_present:
+ raise ExtractorError('This video is DRM protected', expected=True)
+
if not formats:
# for sonyliv.com DRM protected videos
s3_source_url = json_data.get('custom_fields', {}).get('s3sourceurl')
diff --git a/youtube_dlc/extractor/extractors.py b/youtube_dlc/extractor/extractors.py
index d31edd7c8..666134d86 100644
--- a/youtube_dlc/extractor/extractors.py
+++ b/youtube_dlc/extractor/extractors.py
@@ -751,6 +751,7 @@ from .ninecninemedia import NineCNineMediaIE
from .ninegag import NineGagIE
from .ninenow import NineNowIE
from .nintendo import NintendoIE
+from .nitter import NitterIE
from .njpwworld import NJPWWorldIE
from .nobelprize import NobelPrizeIE
from .noco import NocoIE
@@ -1037,6 +1038,10 @@ from .sky import (
SkyNewsIE,
SkySportsIE,
)
+from .skyitalia import (
+ SkyArteItaliaIE,
+ SkyItaliaIE,
+)
from .slideshare import SlideshareIE
from .slideslive import SlidesLiveIE
from .slutload import SlutloadIE
diff --git a/youtube_dlc/extractor/mtv.py b/youtube_dlc/extractor/mtv.py
index 6b3658397..04cc95b6a 100644
--- a/youtube_dlc/extractor/mtv.py
+++ b/youtube_dlc/extractor/mtv.py
@@ -289,7 +289,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
return mgid
- def _extract_mgid(self, webpage, url, data_zone=None):
+ def _extract_mgid(self, webpage, url, title=None, data_zone=None):
try:
# the url can be http://media.mtvnservices.com/fb/{mgid}.swf
# or http://media.mtvnservices.com/{mgid}
@@ -300,7 +300,8 @@ class MTVServicesInfoExtractor(InfoExtractor):
except RegexNotFoundError:
mgid = None
- title = self._match_id(url)
+ if not title:
+ title = url_basename(url)
try:
window_data = self._parse_json(self._search_regex(
@@ -336,7 +337,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
def _real_extract(self, url):
title = url_basename(url)
webpage = self._download_webpage(url, title)
- mgid = self._extract_mgid(webpage, url)
+ mgid = self._extract_mgid(webpage, url, title=title)
videos_info = self._get_videos_info(mgid, url=url)
return videos_info
diff --git a/youtube_dlc/extractor/newgrounds.py b/youtube_dlc/extractor/newgrounds.py
index 82e7cf522..b9f01235f 100644
--- a/youtube_dlc/extractor/newgrounds.py
+++ b/youtube_dlc/extractor/newgrounds.py
@@ -4,6 +4,7 @@ import re
from .common import InfoExtractor
from ..utils import (
+ ExtractorError,
extract_attributes,
int_or_none,
parse_duration,
@@ -20,22 +21,22 @@ class NewgroundsIE(InfoExtractor):
'info_dict': {
'id': '549479',
'ext': 'mp3',
- 'title': 'B7 - BusMode',
+ 'title': 'Burn7 - B7 - BusMode',
'uploader': 'Burn7',
'timestamp': 1378878540,
'upload_date': '20130911',
'duration': 143,
},
}, {
- 'url': 'https://www.newgrounds.com/portal/view/673111',
- 'md5': '3394735822aab2478c31b1004fe5e5bc',
+ 'url': 'https://www.newgrounds.com/portal/view/1',
+ 'md5': 'fbfb40e2dc765a7e830cb251d370d981',
'info_dict': {
- 'id': '673111',
+ 'id': '1',
'ext': 'mp4',
- 'title': 'Dancin',
- 'uploader': 'Squirrelman82',
- 'timestamp': 1460256780,
- 'upload_date': '20160410',
+ 'title': 'Brian-Beaton - Scrotum 1',
+ 'uploader': 'Brian-Beaton',
+ 'timestamp': 955064100,
+ 'upload_date': '20000406',
},
}, {
# source format unavailable, additional mp4 formats
@@ -43,7 +44,7 @@ class NewgroundsIE(InfoExtractor):
'info_dict': {
'id': '689400',
'ext': 'mp4',
- 'title': 'ZTV News Episode 8',
+ 'title': 'Bennettthesage - ZTV News Episode 8',
'uploader': 'BennettTheSage',
'timestamp': 1487965140,
'upload_date': '20170224',
@@ -55,42 +56,73 @@ class NewgroundsIE(InfoExtractor):
def _real_extract(self, url):
media_id = self._match_id(url)
-
+ formats = []
+ uploader = None
webpage = self._download_webpage(url, media_id)
title = self._html_search_regex(
r'<title>([^>]+)</title>', webpage, 'title')
- media_url = self._parse_json(self._search_regex(
- r'"url"\s*:\s*("[^"]+"),', webpage, ''), media_id)
-
- formats = [{
- 'url': media_url,
- 'format_id': 'source',
- 'quality': 1,
- }]
-
- max_resolution = int_or_none(self._search_regex(
- r'max_resolution["\']\s*:\s*(\d+)', webpage, 'max resolution',
- default=None))
- if max_resolution:
- url_base = media_url.rpartition('.')[0]
- for resolution in (360, 720, 1080):
- if resolution > max_resolution:
- break
- formats.append({
- 'url': '%s.%dp.mp4' % (url_base, resolution),
- 'format_id': '%dp' % resolution,
- 'height': resolution,
- })
+ media_url_string = self._search_regex(
+ r'"url"\s*:\s*("[^"]+"),', webpage, 'media url', default=None, fatal=False)
+
+ if media_url_string:
+ media_url = self._parse_json(media_url_string, media_id)
+ formats = [{
+ 'url': media_url,
+ 'format_id': 'source',
+ 'quality': 1,
+ }]
+
+ max_resolution = int_or_none(self._search_regex(
+ r'max_resolution["\']\s*:\s*(\d+)', webpage, 'max resolution',
+ default=None))
+ if max_resolution:
+ url_base = media_url.rpartition('.')[0]
+ for resolution in (360, 720, 1080):
+ if resolution > max_resolution:
+ break
+ formats.append({
+ 'url': '%s.%dp.mp4' % (url_base, resolution),
+ 'format_id': '%dp' % resolution,
+ 'height': resolution,
+ })
+ else:
+ video_id = int_or_none(self._search_regex(
+ r'data-movie-id=\\"([0-9]+)\\"', webpage, ''))
+ if not video_id:
+ raise ExtractorError('Could not extract media data')
+
+ url_video_data = 'https://www.newgrounds.com/portal/video/%s' % video_id
+ headers = {
+ 'Accept': 'application/json',
+ 'Referer': url,
+ 'X-Requested-With': 'XMLHttpRequest'
+ }
+ json_video = self._download_json(url_video_data, video_id, headers=headers, fatal=False)
+ if not json_video:
+ raise ExtractorError('Could not fetch media data')
+
+ uploader = json_video.get('author')
+ title = json_video.get('title')
+ media_formats = json_video.get('sources', [])
+ for media_format in media_formats:
+ media_sources = media_formats[media_format]
+ for source in media_sources:
+ formats.append({
+ 'format_id': media_format,
+ 'quality': int_or_none(media_format[:-1]),
+ 'url': source.get('src')
+ })
self._check_formats(formats, media_id)
self._sort_formats(formats)
- uploader = self._html_search_regex(
- (r'(?s)<h4[^>]*>(.+?)</h4>.*?<em>\s*Author\s*</em>',
- r'(?:Author|Writer)\s*<a[^>]+>([^<]+)'), webpage, 'uploader',
- fatal=False)
+ if not uploader:
+ uploader = self._html_search_regex(
+ (r'(?s)<h4[^>]*>(.+?)</h4>.*?<em>\s*(?:Author|Artist)\s*</em>',
+ r'(?:Author|Writer)\s*<a[^>]+>([^<]+)'), webpage, 'uploader',
+ fatal=False)
timestamp = unified_timestamp(self._html_search_regex(
(r'<dt>\s*Uploaded\s*</dt>\s*<dd>([^<]+</dd>\s*<dd>[^<]+)',
@@ -109,6 +141,9 @@ class NewgroundsIE(InfoExtractor):
if '<dd>Song' in webpage:
formats[0]['vcodec'] = 'none'
+ if uploader:
+ title = "%s - %s" % (uploader, title)
+
return {
'id': media_id,
'title': title,
diff --git a/youtube_dlc/extractor/nitter.py b/youtube_dlc/extractor/nitter.py
new file mode 100644
index 000000000..3191543ed
--- /dev/null
+++ b/youtube_dlc/extractor/nitter.py
@@ -0,0 +1,167 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..utils import (
+ parse_count,
+ unified_strdate,
+ unified_timestamp,
+ remove_end,
+ determine_ext,
+)
+import re
+
+
+class NitterIE(InfoExtractor):
+ # Taken from https://github.com/zedeus/nitter/wiki/Instances
+ INSTANCES = ('nitter.net',
+ 'nitter.snopyta.org',
+ 'nitter.42l.fr',
+ 'nitter.nixnet.services',
+ 'nitter.13ad.de',
+ 'nitter.pussthecat.org',
+ 'nitter.mastodont.cat',
+ 'nitter.dark.fail',
+ 'nitter.tedomum.net',
+ 'nitter.cattube.org',
+ 'nitter.fdn.fr',
+ 'nitter.1d4.us',
+ 'nitter.kavin.rocks',
+ 'tweet.lambda.dance',
+ 'nitter.cc',
+ 'nitter.weaponizedhumiliation.com',
+ '3nzoldnxplag42gqjs23xvghtzf6t6yzssrtytnntc6ppc7xxuoneoad.onion',
+ 'nitter.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd.onion',
+ 'nitterlgj3n5fgwesu3vxc5h67ruku33nqaoeoocae2mvlzhsu6k7fqd.onion')
+
+ _INSTANCES_RE = '(?:' + '|'.join([re.escape(instance) for instance in INSTANCES]) + ')'
+ _VALID_URL = r'https?://%(instance)s/(?P<uploader_id>.+)/status/(?P<id>[0-9]+)(#.)?' % {'instance': _INSTANCES_RE}
+ current_instance = INSTANCES[0] # the test and official instance
+ _TESTS = [
+ {
+ # GIF (wrapped in mp4)
+ 'url': 'https://' + current_instance + '/firefox/status/1314279897502629888#m',
+ 'info_dict': {
+ 'id': '1314279897502629888',
+ 'ext': 'mp4',
+ 'title': 'Firefox 🔥 - You know the old saying, if you see something say something. Now you actually can with the YouTube regrets extension. Report harmful YouTube recommendations so others can avoid watching them. ➡️ https://mzl.la/3iFIiyg #UnfckTheInternet',
+ 'description': 'You know the old saying, if you see something say something. Now you actually can with the YouTube regrets extension. Report harmful YouTube recommendations so others can avoid watching them. ➡️ https://mzl.la/3iFIiyg #UnfckTheInternet',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Firefox 🔥',
+ 'uploader_id': 'firefox',
+ 'uploader_url': 'https://' + current_instance + '/firefox',
+ 'upload_date': '20201008',
+ 'timestamp': 1602183720,
+ },
+ }, { # normal video
+ 'url': 'https://' + current_instance + '/Le___Doc/status/1299715685392756737#m',
+ 'info_dict': {
+ 'id': '1299715685392756737',
+ 'ext': 'mp4',
+ 'title': 'Le Doc - "Je ne prédis jamais rien" D Raoult, Août 2020...',
+ 'description': '"Je ne prédis jamais rien" D Raoult, Août 2020...',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Le Doc',
+ 'uploader_id': 'Le___Doc',
+ 'uploader_url': 'https://' + current_instance + '/Le___Doc',
+ 'upload_date': '20200829',
+ 'timestamp': 1598711341,
+ 'view_count': int,
+ 'like_count': int,
+ 'repost_count': int,
+ 'comment_count': int,
+ },
+ }, { # video embed in a "Streaming Political Ads" box
+ 'url': 'https://' + current_instance + '/mozilla/status/1321147074491092994#m',
+ 'info_dict': {
+ 'id': '1321147074491092994',
+ 'ext': 'mp4',
+ 'title': "Mozilla - Are you being targeted with weird, ominous or just plain annoying political ads while streaming your favorite shows? This isn't a real political ad, but if you're watching streaming TV in the U.S., chances are you've seen quite a few. Learn more ➡️ https://mzl.la/StreamingAds",
+ 'description': "Are you being targeted with weird, ominous or just plain annoying political ads while streaming your favorite shows? This isn't a real political ad, but if you're watching streaming TV in the U.S., chances are you've seen quite a few. Learn more ➡️ https://mzl.la/StreamingAds",
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Mozilla',
+ 'uploader_id': 'mozilla',
+ 'uploader_url': 'https://' + current_instance + '/mozilla',
+ 'upload_date': '20201027',
+ 'timestamp': 1603820982
+ },
+ },
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ parsed_url = compat_urlparse.urlparse(url)
+ base_url = parsed_url.scheme + '://' + parsed_url.netloc
+
+ self._set_cookie(parsed_url.netloc, 'hlsPlayback', 'on')
+ webpage = self._download_webpage(url, video_id)
+
+ video_url = base_url + self._html_search_regex(r'(?:<video[^>]+data-url|<source[^>]+src)="([^"]+)"', webpage, 'video url')
+ ext = determine_ext(video_url)
+
+ if ext == 'unknown_video':
+ formats = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
+ else:
+ formats = [{
+ 'url': video_url,
+ 'ext': ext
+ }]
+
+ title = (
+ self._og_search_description(webpage).replace('\n', ' ')
+ or self._html_search_regex(r'<div class="tweet-content[^>]+>([^<]+)</div>', webpage, 'title'))
+ description = title
+
+ mobj = re.match(self._VALID_URL, url)
+ uploader_id = (
+ mobj.group('uploader_id')
+ or self._html_search_regex(r'<a class="fullname"[^>]+title="([^"]+)"', webpage, 'uploader name', fatal=False))
+
+ if uploader_id:
+ uploader_url = base_url + '/' + uploader_id
+
+ uploader = self._html_search_regex(r'<a class="fullname"[^>]+title="([^"]+)"', webpage, 'uploader name', fatal=False)
+
+ if uploader:
+ title = uploader + ' - ' + title
+
+ view_count = parse_count(self._html_search_regex(r'<span[^>]+class="icon-play[^>]*></span>\s([^<]+)</div>', webpage, 'view count', fatal=False))
+ like_count = parse_count(self._html_search_regex(r'<span[^>]+class="icon-heart[^>]*></span>\s([^<]+)</div>', webpage, 'like count', fatal=False))
+ repost_count = parse_count(self._html_search_regex(r'<span[^>]+class="icon-retweet[^>]*></span>\s([^<]+)</div>', webpage, 'repost count', fatal=False))
+ comment_count = parse_count(self._html_search_regex(r'<span[^>]+class="icon-comment[^>]*></span>\s([^<]+)</div>', webpage, 'repost count', fatal=False))
+
+ thumbnail = base_url + (self._html_search_meta('og:image', webpage, 'thumbnail url')
+ or self._html_search_regex(r'<video[^>]+poster="([^"]+)"', webpage, 'thumbnail url', fatal=False))
+
+ thumbnail = remove_end(thumbnail, '%3Asmall') # if parsed with regex, it should contain this
+
+ thumbnails = []
+ thumbnail_ids = ('thumb', 'small', 'large', 'medium', 'orig')
+ for id in thumbnail_ids:
+ thumbnails.append({
+ 'id': id,
+ 'url': thumbnail + '%3A' + id,
+ })
+
+ date = self._html_search_regex(r'<span[^>]+class="tweet-date"[^>]*><a[^>]+title="([^"]+)"', webpage, 'upload date', fatal=False)
+ upload_date = unified_strdate(date)
+ timestamp = unified_timestamp(date)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'uploader': uploader,
+ 'timestamp': timestamp,
+ 'uploader_id': uploader_id,
+ 'uploader_url': uploader_url,
+ 'view_count': view_count,
+ 'like_count': like_count,
+ 'repost_count': repost_count,
+ 'comment_count': comment_count,
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ }
diff --git a/youtube_dlc/extractor/skyitalia.py b/youtube_dlc/extractor/skyitalia.py
new file mode 100644
index 000000000..3c7bd465d
--- /dev/null
+++ b/youtube_dlc/extractor/skyitalia.py
@@ -0,0 +1,119 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class SkyItaliaBaseIE(InfoExtractor):
+ _GET_VIDEO_DATA = 'https://apid.sky.it/vdp/v1/getVideoData?token={token}&caller=sky&rendition=web&id={id}'
+ _RES = {
+ 'low': [426, 240],
+ 'med': [640, 360],
+ 'high': [854, 480],
+ 'hd': [1280, 720]
+ }
+
+ def _extract_video_id(self, url):
+ webpage = self._download_webpage(url, 'skyitalia')
+ video_id = self._html_search_regex(
+ [r'data-videoid=\"(\d+)\"',
+ r'http://player\.sky\.it/social\?id=(\d+)\&'],
+ webpage, 'video_id')
+ if video_id:
+ return video_id
+ raise ExtractorError('Video ID not found.')
+
+ def _get_formats(self, video_id, token):
+ data_url = self._GET_VIDEO_DATA.replace('{id}', video_id)
+ data_url = data_url.replace('{token}', token)
+ video_data = self._parse_json(
+ self._download_webpage(data_url, video_id),
+ video_id)
+
+ formats = []
+ for q, r in self._RES.items():
+ key = 'web_%s_url' % q
+ if key not in video_data:
+ continue
+ formats.append({
+ 'url': video_data.get(key),
+ 'format_id': q,
+ 'width': r[0],
+ 'height': r[1]
+ })
+
+ self._sort_formats(formats)
+ title = video_data.get('title')
+ thumb = video_data.get('thumb')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumb,
+ 'formats': formats
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ if video_id == 'None':
+ video_id = self._extract_video_id(url)
+ return self._get_formats(video_id, self._TOKEN)
+
+
+class SkyItaliaIE(SkyItaliaBaseIE):
+ IE_NAME = 'sky.it'
+ _VALID_URL = r'''(?x)https?://
+ (?P<ie>sport|tg24|video)
+ \.sky\.it/(?:.+?)
+ (?P<id>[0-9]{6})?
+ (?:$|\?)'''
+
+ _TESTS = [{
+ 'url': 'https://video.sky.it/sport/motogp/video/motogp-gp-emilia-romagna-highlights-prove-libere-616162',
+ 'md5': '9c03b590b06e5952d8051f0e02b0feca',
+ 'info_dict': {
+ 'id': '616162',
+ 'ext': 'mp4',
+ 'title': 'MotoGP, GP Emilia Romagna: gli highlights delle prove libere',
+ 'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/18/1600441214452_hl-libere-motogp-misano2_5602634_thumbnail_1.jpg',
+ }
+ }, {
+ 'url': 'https://sport.sky.it/motogp/2020/09/18/motogp-gp-emilia-romagna-misano-2020-prove-libere-diretta',
+ 'md5': '9c03b590b06e5952d8051f0e02b0feca',
+ 'info_dict': {
+ 'id': '616162',
+ 'ext': 'mp4',
+ 'title': 'MotoGP, GP Emilia Romagna: gli highlights delle prove libere',
+ 'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/18/1600441214452_hl-libere-motogp-misano2_5602634_thumbnail_1.jpg',
+ }
+ }, {
+ 'url': 'https://tg24.sky.it/salute-e-benessere/2020/09/18/coronavirus-vaccino-ue-sanofi',
+ 'md5': 'caa25e62dadb529bc5e0b078da99f854',
+ 'info_dict': {
+ 'id': '615904',
+ 'ext': 'mp4',
+ 'title': 'Covid-19, al Buzzi di Milano tamponi drive-in per studenti',
+ 'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/17/1600351405841_error-coronavirus-al-buzzi-di-milano-tamponi_thumbnail_1.jpg',
+ }
+ }, {
+ 'url': 'https://video.sky.it/sport/motogp/video/motogp-gp-emilia-romagna-highlights-prove-libere-616162?itm_source=parsely-api',
+ 'only_matching': True,
+ }]
+ _TOKEN = 'F96WlOd8yoFmLQgiqv6fNQRvHZcsWk5jDaYnDvhbiJk'
+
+
+class SkyArteItaliaIE(SkyItaliaBaseIE):
+ IE_NAME = 'arte.sky.it'
+ _VALID_URL = r'https?://arte\.sky\.it/video/.+?(?P<id>[0-9]{6})?$'
+ _TEST = {
+ 'url': 'https://arte.sky.it/video/federico-fellini-maestri-cinema/',
+ 'md5': '2f22513a89f45142f2746f878d690647',
+ 'info_dict': {
+ 'id': '612888',
+ 'ext': 'mp4',
+ 'title': 'I maestri del cinema Federico Felini',
+ 'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/03/1599146747305_i-maestri-del-cinema-federico-felini_thumbnail_1.jpg',
+ }
+ }
+ _TOKEN = 'LWk29hfiU39NNdq87ePeRach3nzTSV20o0lTv2001Cd'
diff --git a/youtube_dlc/extractor/xtube.py b/youtube_dlc/extractor/xtube.py
index 01b253dcb..081c5e2e7 100644
--- a/youtube_dlc/extractor/xtube.py
+++ b/youtube_dlc/extractor/xtube.py
@@ -5,6 +5,7 @@ import re
from .common import InfoExtractor
from ..utils import (
+ ExtractorError,
int_or_none,
js_to_json,
orderedSet,
@@ -33,28 +34,12 @@ class XTubeIE(InfoExtractor):
'title': 'strange erotica',
'description': 'contains:an ET kind of thing',
'uploader': 'greenshowers',
- 'duration': 450,
+ 'duration': 449,
'view_count': int,
'comment_count': int,
'age_limit': 18,
}
}, {
- # FLV videos with duplicated formats
- 'url': 'http://www.xtube.com/video-watch/A-Super-Run-Part-1-YT-9299752',
- 'md5': 'a406963eb349dd43692ec54631efd88b',
- 'info_dict': {
- 'id': '9299752',
- 'display_id': 'A-Super-Run-Part-1-YT',
- 'ext': 'flv',
- 'title': 'A Super Run - Part 1 (YT)',
- 'description': 'md5:4cc3af1aa1b0413289babc88f0d4f616',
- 'uploader': 'tshirtguy59',
- 'duration': 579,
- 'view_count': int,
- 'comment_count': int,
- 'age_limit': 18,
- },
- }, {
# new URL schema
'url': 'http://www.xtube.com/video-watch/strange-erotica-625837',
'only_matching': True,
@@ -89,16 +74,24 @@ class XTubeIE(InfoExtractor):
title, thumbnail, duration = [None] * 3
- config = self._parse_json(self._search_regex(
- r'playerConf\s*=\s*({.+?})\s*,\s*\n', webpage, 'config',
- default='{}'), video_id, transform_source=js_to_json, fatal=False)
- if config:
- config = config.get('mainRoll')
- if isinstance(config, dict):
- title = config.get('title')
- thumbnail = config.get('poster')
- duration = int_or_none(config.get('duration'))
- sources = config.get('sources') or config.get('format')
+ json_config_string = self._search_regex(
+ r'playerConf=({.+?}),loaderConf',
+ webpage, 'config', default=None)
+ if not json_config_string:
+ raise ExtractorError("Could not extract video player data")
+
+ json_config_string = json_config_string.replace("!0", "true").replace("!1", "false")
+
+ config = self._parse_json(json_config_string, video_id, transform_source=js_to_json, fatal=False)
+ if not config:
+ raise ExtractorError("Could not extract video player data")
+
+ config = config.get('mainRoll')
+ if isinstance(config, dict):
+ title = config.get('title')
+ thumbnail = config.get('poster')
+ duration = int_or_none(config.get('duration'))
+ sources = config.get('sources') or config.get('format')
if not isinstance(sources, dict):
sources = self._parse_json(self._search_regex(
diff --git a/youtube_dlc/extractor/youtube.py b/youtube_dlc/extractor/youtube.py
index 5fd22081a..d605f1e74 100644
--- a/youtube_dlc/extractor/youtube.py
+++ b/youtube_dlc/extractor/youtube.py
@@ -1366,14 +1366,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'ext': ext,
})
sub_lang_list[lang] = sub_formats
- """ if has_live_chat_replay:
+ if has_live_chat_replay:
sub_lang_list['live_chat'] = [
{
'video_id': video_id,
'ext': 'json',
'protocol': 'youtube_live_chat_replay',
},
- ] """
+ ]
if not sub_lang_list:
self._downloader.report_warning('video doesn\'t have subtitles')
return {}
@@ -1406,6 +1406,44 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return self._parse_json(
uppercase_escape(config), video_id, fatal=False)
+ def _get_music_metadata_from_yt_initial(self, yt_initial):
+ music_metadata = []
+ key_map = {
+ 'Album': 'album',
+ 'Artist': 'artist',
+ 'Song': 'track'
+ }
+ contents = try_get(yt_initial, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'])
+ if type(contents) is list:
+ for content in contents:
+ music_track = {}
+ if type(content) is not dict:
+ continue
+ videoSecondaryInfoRenderer = try_get(content, lambda x: x['videoSecondaryInfoRenderer'])
+ if type(videoSecondaryInfoRenderer) is not dict:
+ continue
+ rows = try_get(videoSecondaryInfoRenderer, lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'])
+ if type(rows) is not list:
+ continue
+ for row in rows:
+ metadataRowRenderer = try_get(row, lambda x: x['metadataRowRenderer'])
+ if type(metadataRowRenderer) is not dict:
+ continue
+ key = try_get(metadataRowRenderer, lambda x: x['title']['simpleText'])
+ value = try_get(metadataRowRenderer, lambda x: x['contents'][0]['simpleText']) or \
+ try_get(metadataRowRenderer, lambda x: x['contents'][0]['runs'][0]['text'])
+ if type(key) is not str or type(value) is not str:
+ continue
+ if key in key_map:
+ if key_map[key] in music_track:
+ # we've started on a new track
+ music_metadata.append(music_track)
+ music_track = {}
+ music_track[key_map[key]] = value
+ if len(music_track.keys()):
+ music_metadata.append(music_track)
+ return music_metadata
+
def _get_automatic_captions(self, video_id, webpage):
"""We need the webpage for getting the captions url, pass it as an
argument to speed up the process."""
@@ -2328,6 +2366,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if release_year:
release_year = int(release_year)
+ yt_initial = self._get_yt_initial_data(video_id, video_webpage)
+ if yt_initial:
+ music_metadata = self._get_music_metadata_from_yt_initial(yt_initial)
+ if len(music_metadata):
+ album = music_metadata[0].get('album')
+ artist = music_metadata[0].get('artist')
+ track = music_metadata[0].get('track')
+
m_episode = re.search(
r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
video_webpage)
diff --git a/youtube_dlc/options.py b/youtube_dlc/options.py
index 1d7a7fed2..66b45220c 100644
--- a/youtube_dlc/options.py
+++ b/youtube_dlc/options.py
@@ -582,7 +582,7 @@ def parseOpts(overrideArguments=None):
'along with --min-sleep-interval.'))
workarounds.add_option(
'--sleep-subtitles',
- dest='sleep_interval_subtitles', action='store_true', default=False,
+ dest='sleep_interval_subtitles', action='store_true', default=0,
help='Enforce sleep interval on subtitles as well')
verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
diff --git a/youtube_dlc/update.py b/youtube_dlc/update.py
index e49e09c17..b358e902b 100644
--- a/youtube_dlc/update.py
+++ b/youtube_dlc/update.py
@@ -37,10 +37,26 @@ def update_self(to_screen, verbose, opener):
JSON_URL = UPDATE_URL + 'versions.json'
UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537)
+ def sha256sum():
+ h = hashlib.sha256()
+ b = bytearray(128 * 1024)
+ mv = memoryview(b)
+ with open(os.path.realpath(sys.executable), 'rb', buffering=0) as f:
+ for n in iter(lambda: f.readinto(mv), 0):
+ h.update(mv[:n])
+ return h.hexdigest()
+
+ to_screen('Current Build Hash %s' % sha256sum())
+
if not isinstance(globals().get('__loader__'), zipimporter) and not hasattr(sys, 'frozen'):
to_screen('It looks like you installed youtube-dlc with a package manager, pip, setup.py or a tarball. Please use that to update.')
return
+ # compiled file.exe can find itself by
+ # to_screen(os.path.basename(sys.executable))
+ # and path to py or exe
+ # to_screen(os.path.realpath(sys.executable))
+
# Check if there is a new version
try:
newversion = opener.open(VERSION_URL).read().decode('utf-8').strip()
@@ -48,6 +64,7 @@ def update_self(to_screen, verbose, opener):
if verbose:
to_screen(encode_compat_str(traceback.format_exc()))
to_screen('ERROR: can\'t find the current version. Please try again later.')
+ to_screen('Visit https://github.com/blackjack4494/yt-dlc/releases/latest')
return
if newversion == __version__:
to_screen('youtube-dlc is up-to-date (' + __version__ + ')')
@@ -61,6 +78,7 @@ def update_self(to_screen, verbose, opener):
if verbose:
to_screen(encode_compat_str(traceback.format_exc()))
to_screen('ERROR: can\'t obtain versions info. Please try again later.')
+ to_screen('Visit https://github.com/blackjack4494/yt-dlc/releases/latest')
return
if 'signature' not in versions_info:
to_screen('ERROR: the versions file is not signed or corrupted. Aborting.')
@@ -109,6 +127,7 @@ def update_self(to_screen, verbose, opener):
if verbose:
to_screen(encode_compat_str(traceback.format_exc()))
to_screen('ERROR: unable to download latest version')
+ to_screen('Visit https://github.com/blackjack4494/yt-dlc/releases/latest')
return
newcontent_hash = hashlib.sha256(newcontent).hexdigest()
@@ -155,6 +174,7 @@ start /b "" cmd /c del "%%~f0"&exit /b"
if verbose:
to_screen(encode_compat_str(traceback.format_exc()))
to_screen('ERROR: unable to download latest version')
+ to_screen('Visit https://github.com/blackjack4494/yt-dlc/releases/latest')
return
newcontent_hash = hashlib.sha256(newcontent).hexdigest()