aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.github/workflows/build.yml32
-rw-r--r--README.md4
-rw-r--r--youtube_dlc/YoutubeDL.py18
-rw-r--r--youtube_dlc/downloader/common.py24
-rw-r--r--youtube_dlc/extractor/bandcamp.py180
-rw-r--r--youtube_dlc/extractor/extractors.py4
-rw-r--r--youtube_dlc/extractor/mtv.py7
-rw-r--r--youtube_dlc/extractor/skyitalia.py119
-rw-r--r--youtube_dlc/extractor/tvland.py2
-rw-r--r--youtube_dlc/extractor/urplay.py49
-rw-r--r--youtube_dlc/extractor/xtube.py47
-rw-r--r--youtube_dlc/extractor/youtube.py2
-rw-r--r--youtube_dlc/options.py2
-rw-r--r--youtube_dlc/update.py20
14 files changed, 346 insertions, 164 deletions
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 8db7e92f2..f5d94dc49 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -57,7 +57,7 @@ jobs:
id: sha2_file
env:
SHA2: ${{ hashFiles('youtube-dlc') }}
- run: echo "::set-output name=sha2_unix::${env:SHA2}"
+ run: echo "::set-output name=sha2_unix::$SHA2"
- name: Install dependencies for pypi
run: |
python -m pip install --upgrade pip
@@ -98,12 +98,12 @@ jobs:
upload_url: ${{ needs.build_unix.outputs.upload_url }}
asset_path: ./dist/youtube-dlc.exe
asset_name: youtube-dlc.exe
- asset_content_type: application/octet-stream
+ asset_content_type: application/vnd.microsoft.portable-executable
- name: Get SHA2-256SUMS for youtube-dlc.exe
id: sha2_file_win
env:
- SHA2: ${{ hashFiles('dist/youtube-dlc.exe') }}
- run: echo "::set-output name=sha2_windows::${env:SHA2}"
+ SHA2_win: ${{ hashFiles('dist/youtube-dlc.exe') }}
+ run: echo "::set-output name=sha2_windows::$SHA2_win"
build_windows32:
@@ -133,12 +133,12 @@ jobs:
upload_url: ${{ needs.build_unix.outputs.upload_url }}
asset_path: ./dist/youtube-dlc_x86.exe
asset_name: youtube-dlc_x86.exe
- asset_content_type: application/octet-stream
+ asset_content_type: application/vnd.microsoft.portable-executable
- name: Get SHA2-256SUMS for youtube-dlc_x86.exe
id: sha2_file_win32
env:
- SHA2: ${{ hashFiles('dist/youtube-dlc_x86.exe') }}
- run: echo "::set-output name=sha2_windows32::${env:SHA2}"
+ SHA2_win32: ${{ hashFiles('dist/youtube-dlc_x86.exe') }}
+ run: echo "::set-output name=sha2_windows32::$SHA2_win32"
- name: Make SHA2-256SUMS file
env:
SHA2_WINDOWS: ${{ needs.build_windows.outputs.sha2_windows }}
@@ -146,6 +146,18 @@ jobs:
SHA2_UNIX: ${{ needs.build_unix.outputs.sha2_unix }}
YTDLC_VERSION: ${{ needs.build_unix.outputs.ytdlc_version }}
run: |
- echo "$SHA2_WINDOWS youtube-dlc.exe" > SHA2-256SUMS
- echo "$SHA2_WINDOWS32 youtube-dlc32.exe" > SHA2-256SUMS
- echo "$SHA2_UNIX youtube-dlc" >> SHA2-256SUMS
+ echo "version:$YTDLC_VERSION" >> SHA2-256SUMS
+ echo "youtube-dlc.exe:$SHA2_WINDOWS" >> SHA2-256SUMS
+ echo "youtube-dlc32.exe:$SHA2_WINDOWS32" >> SHA2-256SUMS
+ echo "youtube-dlc:$SHA2_UNIX" >> SHA2-256SUMS
+
+ - name: Upload 256SUMS file
+ id: upload-sums
+ uses: actions/upload-release-asset@v1
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ with:
+ upload_url: ${{ needs.build_unix.outputs.upload_url }}
+ asset_path: ./SHA2-256SUMS
+ asset_name: SHA2-256SUMS
+ asset_content_type: text/plain
diff --git a/README.md b/README.md
index 9d40d2631..08bddaa18 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-[![Build Status](https://travis-ci.com/blackjack4494/youtube-dlc.svg?branch=master)](https://travis-ci.com/blackjack4494/youtube-dlc)
+[![Build Status](https://travis-ci.com/blackjack4494/yt-dlc.svg?branch=master)](https://travis-ci.com/blackjack4494/yt-dlc)
[![PyPi](https://img.shields.io/pypi/v/youtube-dlc.svg)](https://pypi.org/project/youtube-dlc)
[![Downloads](https://pepy.tech/badge/youtube-dlc)](https://pepy.tech/project/youtube-dlc)
@@ -7,7 +7,7 @@
youtube-dlc - download videos from youtube.com or other video platforms.
-youtube-dlc is a fork of youtube-dl with the intention of getting features tested by the community merged in the tool faster, since youtube-dl's development seems to be slowing down. (https://github.com/ytdl-org/youtube-dl/issues/26462)
+youtube-dlc is a fork of youtube-dl with the intention of getting features tested by the community merged in the tool faster, since youtube-dl's development seems to be slowing down. (https://web.archive.org/web/20201014194602/https://github.com/ytdl-org/youtube-dl/issues/26462)
- [INSTALLATION](#installation)
- [DESCRIPTION](#description)
diff --git a/youtube_dlc/YoutubeDL.py b/youtube_dlc/YoutubeDL.py
index fc351db0d..dd55ba0f2 100644
--- a/youtube_dlc/YoutubeDL.py
+++ b/youtube_dlc/YoutubeDL.py
@@ -801,7 +801,7 @@ class YoutubeDL(object):
for key, value in extra_info.items():
info_dict.setdefault(key, value)
- def extract_info(self, url, download=True, ie_key=None, extra_info={},
+ def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_info={},
process=True, force_generic_extractor=False):
'''
Returns a list with a dictionary for each video we find.
@@ -836,6 +836,11 @@ class YoutubeDL(object):
'_type': 'compat_list',
'entries': ie_result,
}
+ if info_dict:
+ if info_dict.get('id'):
+ ie_result['id'] = info_dict['id']
+ if info_dict.get('title'):
+ ie_result['title'] = info_dict['title']
self.add_default_extra_info(ie_result, ie, url)
if process:
return self.process_ie_result(ie_result, download, extra_info)
@@ -898,7 +903,7 @@ class YoutubeDL(object):
# We have to add extra_info to the results because it may be
# contained in a playlist
return self.extract_info(ie_result['url'],
- download,
+ download, info_dict=ie_result,
ie_key=ie_result.get('ie_key'),
extra_info=extra_info)
elif result_type == 'url_transparent':
@@ -1852,13 +1857,13 @@ class YoutubeDL(object):
self.report_error('Cannot write annotations file: ' + annofn)
return
- def dl(name, info):
+ def dl(name, info, subtitle=False):
fd = get_suitable_downloader(info, self.params)(self, self.params)
for ph in self._progress_hooks:
fd.add_progress_hook(ph)
if self.params.get('verbose'):
self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
- return fd.download(name, info)
+ return fd.download(name, info, subtitle)
subtitles_are_requested = any([self.params.get('writesubtitles', False),
self.params.get('writeautomaticsub')])
@@ -1867,7 +1872,7 @@ class YoutubeDL(object):
# subtitles download errors are already managed as troubles in relevant IE
# that way it will silently go on when used with unsupporting IE
subtitles = info_dict['requested_subtitles']
- ie = self.get_info_extractor(info_dict['extractor_key'])
+ # ie = self.get_info_extractor(info_dict['extractor_key'])
for sub_lang, sub_info in subtitles.items():
sub_format = sub_info['ext']
sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
@@ -1886,6 +1891,8 @@ class YoutubeDL(object):
return
else:
try:
+ dl(sub_filename, sub_info, subtitle=True)
+ '''
if self.params.get('sleep_interval_subtitles', False):
dl(sub_filename, sub_info)
else:
@@ -1893,6 +1900,7 @@ class YoutubeDL(object):
sub_info['url'], info_dict['id'], note=False).read()
with io.open(encodeFilename(sub_filename), 'wb') as subfile:
subfile.write(sub_data)
+ '''
except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self.report_warning('Unable to download subtitle for "%s": %s' %
(sub_lang, error_to_compat_str(err)))
diff --git a/youtube_dlc/downloader/common.py b/youtube_dlc/downloader/common.py
index 31c286458..460364a0b 100644
--- a/youtube_dlc/downloader/common.py
+++ b/youtube_dlc/downloader/common.py
@@ -326,7 +326,7 @@ class FileDownloader(object):
"""Report it was impossible to resume download."""
self.to_screen('[download] Unable to resume')
- def download(self, filename, info_dict):
+ def download(self, filename, info_dict, subtitle=False):
"""Download to a filename using the info from info_dict
Return True on success and False otherwise
"""
@@ -353,16 +353,22 @@ class FileDownloader(object):
})
return True
- min_sleep_interval = self.params.get('sleep_interval')
- if min_sleep_interval:
- max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval)
- sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval)
+ if subtitle is False:
+ min_sleep_interval = self.params.get('sleep_interval')
+ if min_sleep_interval:
+ max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval)
+ sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval)
+ self.to_screen(
+ '[download] Sleeping %s seconds...' % (
+ int(sleep_interval) if sleep_interval.is_integer()
+ else '%.2f' % sleep_interval))
+ time.sleep(sleep_interval)
+ else:
+ sleep_interval_sub = self.params.get('sleep_interval_subtitles')
self.to_screen(
'[download] Sleeping %s seconds...' % (
- int(sleep_interval) if sleep_interval.is_integer()
- else '%.2f' % sleep_interval))
- time.sleep(sleep_interval)
-
+ int(sleep_interval_sub)))
+ time.sleep(sleep_interval_sub)
return self.real_download(filename, info_dict)
def real_download(self, filename, info_dict):
diff --git a/youtube_dlc/extractor/bandcamp.py b/youtube_dlc/extractor/bandcamp.py
index 9dbafe86d..0e7492764 100644
--- a/youtube_dlc/extractor/bandcamp.py
+++ b/youtube_dlc/extractor/bandcamp.py
@@ -25,10 +25,48 @@ from ..utils import (
)
-class BandcampIE(InfoExtractor):
+class BandcampBaseIE(InfoExtractor):
+ """Provide base functions for Bandcamp extractors"""
+
+ def _extract_json_from_html_data_attribute(self, webpage, suffix, video_id):
+ json_string = self._html_search_regex(
+ r' data-%s="([^"]*)' % suffix,
+ webpage, '%s json' % suffix, default='{}')
+
+ return self._parse_json(json_string, video_id)
+
+ def _parse_json_track(self, json):
+ formats = []
+ file_ = json.get('file')
+ if isinstance(file_, dict):
+ for format_id, format_url in file_.items():
+ if not url_or_none(format_url):
+ continue
+ ext, abr_str = format_id.split('-', 1)
+ formats.append({
+ 'format_id': format_id,
+ 'url': self._proto_relative_url(format_url, 'http:'),
+ 'ext': ext,
+ 'vcodec': 'none',
+ 'acodec': ext,
+ 'abr': int_or_none(abr_str),
+ })
+
+ return {
+ 'duration': float_or_none(json.get('duration')),
+ 'id': str_or_none(json.get('track_id') or json.get('id')),
+ 'title': json.get('title'),
+ 'title_link': json.get('title_link'),
+ 'number': int_or_none(json.get('track_num')),
+ 'formats': formats
+ }
+
+
+class BandcampIE(BandcampBaseIE):
+ IE_NAME = "Bandcamp:track"
_VALID_URL = r'https?://[^/]+\.bandcamp\.com/track/(?P<title>[^/?#&]+)'
_TESTS = [{
- 'url': 'http://youtube-dlc.bandcamp.com/track/youtube-dlc-test-song',
+ 'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
'md5': 'c557841d5e50261777a6585648adf439',
'info_dict': {
'id': '1812978515',
@@ -85,52 +123,32 @@ class BandcampIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
title = mobj.group('title')
+ url_track_title = title
webpage = self._download_webpage(url, title)
thumbnail = self._html_search_meta('og:image', webpage, default=None)
- track_id = None
- track = None
- track_number = None
- duration = None
+ json_tralbum = self._extract_json_from_html_data_attribute(webpage, "tralbum", url_track_title)
+ json_embed = self._extract_json_from_html_data_attribute(webpage, "embed", url_track_title)
- formats = []
- trackinfo_block = self._html_search_regex(
- r'trackinfo(?:["\']|&quot;):\[\s*({.+?})\s*\],(?:["\']|&quot;)',
- webpage, 'track info', default='{}')
-
- track_info = self._parse_json(trackinfo_block, title)
- if track_info:
- file_ = track_info.get('file')
- if isinstance(file_, dict):
- for format_id, format_url in file_.items():
- if not url_or_none(format_url):
- continue
- ext, abr_str = format_id.split('-', 1)
- formats.append({
- 'format_id': format_id,
- 'url': self._proto_relative_url(format_url, 'http:'),
- 'ext': ext,
- 'vcodec': 'none',
- 'acodec': ext,
- 'abr': int_or_none(abr_str),
- })
-
- track_id = str_or_none(track_info.get('track_id') or track_info.get('id'))
- track_number = int_or_none(track_info.get('track_num'))
- duration = float_or_none(track_info.get('duration'))
-
- def extract(key):
- data = self._html_search_regex(
- r',(["\']|&quot;)%s\1:\1(?P<value>(?:\\\1|((?!\1).))+)\1' % key,
- webpage, key, default=None, group='value')
- return data.replace(r'\"', '"').replace('\\\\', '\\') if data else data
-
- track = extract('title')
- artist = extract('artist')
- album = extract('album_title')
- timestamp = unified_timestamp(
- extract('publish_date') or extract('album_publish_date'))
- release_date = unified_strdate(extract('album_release_date'))
+ json_tracks = json_tralbum.get('trackinfo')
+ if not json_tracks:
+ raise ExtractorError('Could not extract track')
+
+ track = self._parse_json_track(json_tracks[0])
+ artist = json_tralbum.get('artist')
+ album_title = json_embed.get('album_title')
+
+ json_album = json_tralbum.get('packages')
+ if json_album:
+ json_album = json_album[0]
+ album_publish_date = json_album.get('album_publish_date')
+ album_release_date = json_album.get('album_release_date')
+ else:
+ album_publish_date = None
+ album_release_date = json_tralbum.get('album_release_date')
+
+ timestamp = unified_timestamp(json_tralbum.get('current', {}).get('publish_date') or album_publish_date)
+ release_date = unified_strdate(album_release_date)
download_link = self._search_regex(
r'freeDownloadPage(?:["\']|&quot;):\s*(["\']|&quot;)(?P<url>(?:(?!\1).)+)\1', webpage,
@@ -155,8 +173,6 @@ class BandcampIE(InfoExtractor):
if info:
downloads = info.get('downloads')
if isinstance(downloads, dict):
- if not track:
- track = info.get('title')
if not artist:
artist = info.get('artist')
if not thumbnail:
@@ -190,7 +206,7 @@ class BandcampIE(InfoExtractor):
retry_url = url_or_none(stat.get('retry_url'))
if not retry_url:
continue
- formats.append({
+ track['formats'].append({
'url': self._proto_relative_url(retry_url, 'http:'),
'ext': download_formats.get(format_id),
'format_id': format_id,
@@ -199,32 +215,28 @@ class BandcampIE(InfoExtractor):
'vcodec': 'none',
})
- self._sort_formats(formats)
+ self._sort_formats(track['formats'])
- title = '%s - %s' % (artist, track) if artist else track
-
- if not duration:
- duration = float_or_none(self._html_search_meta(
- 'duration', webpage, default=None))
+ title = '%s - %s' % (artist, track.get('title')) if artist else track.get('title')
return {
- 'id': track_id,
- 'title': title,
+ 'album': album_title,
+ 'artist': artist,
+ 'duration': track['duration'],
+ 'formats': track['formats'],
+ 'id': track['id'],
+ 'release_date': release_date,
'thumbnail': thumbnail,
- 'uploader': artist,
'timestamp': timestamp,
- 'release_date': release_date,
- 'duration': duration,
- 'track': track,
- 'track_number': track_number,
- 'track_id': track_id,
- 'artist': artist,
- 'album': album,
- 'formats': formats,
+ 'title': title,
+ 'track': track['title'],
+ 'track_id': track['id'],
+ 'track_number': track['number'],
+ 'uploader': artist
}
-class BandcampAlbumIE(InfoExtractor):
+class BandcampAlbumIE(BandcampBaseIE):
IE_NAME = 'Bandcamp:album'
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^/?#&]+))?'
@@ -305,34 +317,32 @@ class BandcampAlbumIE(InfoExtractor):
album_id = mobj.group('album_id')
playlist_id = album_id or uploader_id
webpage = self._download_webpage(url, playlist_id)
- track_elements = re.findall(
- r'(?s)<div[^>]*>(.*?<a[^>]+href="([^"]+?)"[^>]+itemprop="url"[^>]*>.*?)</div>', webpage)
- if not track_elements:
- raise ExtractorError('The page doesn\'t contain any tracks')
+
+ json_tralbum = self._extract_json_from_html_data_attribute(webpage, "tralbum", playlist_id)
+ json_embed = self._extract_json_from_html_data_attribute(webpage, "embed", playlist_id)
+
+ json_tracks = json_tralbum.get('trackinfo')
+ if not json_tracks:
+ raise ExtractorError('Could not extract album tracks')
+
+ album_title = json_embed.get('album_title')
+
# Only tracks with duration info have songs
+ tracks = [self._parse_json_track(track) for track in json_tracks]
entries = [
self.url_result(
- compat_urlparse.urljoin(url, t_path),
- ie=BandcampIE.ie_key(),
- video_title=self._search_regex(
- r'<span\b[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)',
- elem_content, 'track title', fatal=False))
- for elem_content, t_path in track_elements
- if self._html_search_meta('duration', elem_content, default=None)]
-
- title = self._html_search_regex(
- r'album_title\s*(?:&quot;|["\']):\s*(&quot;|["\'])(?P<album>(?:\\\1|((?!\1).))+)\1',
- webpage, 'title', fatal=False, group='album')
-
- if title:
- title = title.replace(r'\"', '"')
+ compat_urlparse.urljoin(url, track['title_link']),
+ ie=BandcampIE.ie_key(), video_id=track['id'],
+ video_title=track['title'])
+ for track in tracks
+ if track.get('duration')]
return {
'_type': 'playlist',
'uploader_id': uploader_id,
'id': playlist_id,
- 'title': title,
- 'entries': entries,
+ 'title': album_title,
+ 'entries': entries
}
diff --git a/youtube_dlc/extractor/extractors.py b/youtube_dlc/extractor/extractors.py
index d31edd7c8..a0c7d0f42 100644
--- a/youtube_dlc/extractor/extractors.py
+++ b/youtube_dlc/extractor/extractors.py
@@ -1037,6 +1037,10 @@ from .sky import (
SkyNewsIE,
SkySportsIE,
)
+from .skyitalia import (
+ SkyArteItaliaIE,
+ SkyItaliaIE,
+)
from .slideshare import SlideshareIE
from .slideslive import SlidesLiveIE
from .slutload import SlutloadIE
diff --git a/youtube_dlc/extractor/mtv.py b/youtube_dlc/extractor/mtv.py
index 6b3658397..04cc95b6a 100644
--- a/youtube_dlc/extractor/mtv.py
+++ b/youtube_dlc/extractor/mtv.py
@@ -289,7 +289,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
return mgid
- def _extract_mgid(self, webpage, url, data_zone=None):
+ def _extract_mgid(self, webpage, url, title=None, data_zone=None):
try:
# the url can be http://media.mtvnservices.com/fb/{mgid}.swf
# or http://media.mtvnservices.com/{mgid}
@@ -300,7 +300,8 @@ class MTVServicesInfoExtractor(InfoExtractor):
except RegexNotFoundError:
mgid = None
- title = self._match_id(url)
+ if not title:
+ title = url_basename(url)
try:
window_data = self._parse_json(self._search_regex(
@@ -336,7 +337,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
def _real_extract(self, url):
title = url_basename(url)
webpage = self._download_webpage(url, title)
- mgid = self._extract_mgid(webpage, url)
+ mgid = self._extract_mgid(webpage, url, title=title)
videos_info = self._get_videos_info(mgid, url=url)
return videos_info
diff --git a/youtube_dlc/extractor/skyitalia.py b/youtube_dlc/extractor/skyitalia.py
new file mode 100644
index 000000000..3c7bd465d
--- /dev/null
+++ b/youtube_dlc/extractor/skyitalia.py
@@ -0,0 +1,119 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class SkyItaliaBaseIE(InfoExtractor):
+ _GET_VIDEO_DATA = 'https://apid.sky.it/vdp/v1/getVideoData?token={token}&caller=sky&rendition=web&id={id}'
+ _RES = {
+ 'low': [426, 240],
+ 'med': [640, 360],
+ 'high': [854, 480],
+ 'hd': [1280, 720]
+ }
+
+ def _extract_video_id(self, url):
+ webpage = self._download_webpage(url, 'skyitalia')
+ video_id = self._html_search_regex(
+ [r'data-videoid=\"(\d+)\"',
+ r'http://player\.sky\.it/social\?id=(\d+)\&'],
+ webpage, 'video_id')
+ if video_id:
+ return video_id
+ raise ExtractorError('Video ID not found.')
+
+ def _get_formats(self, video_id, token):
+ data_url = self._GET_VIDEO_DATA.replace('{id}', video_id)
+ data_url = data_url.replace('{token}', token)
+ video_data = self._parse_json(
+ self._download_webpage(data_url, video_id),
+ video_id)
+
+ formats = []
+ for q, r in self._RES.items():
+ key = 'web_%s_url' % q
+ if key not in video_data:
+ continue
+ formats.append({
+ 'url': video_data.get(key),
+ 'format_id': q,
+ 'width': r[0],
+ 'height': r[1]
+ })
+
+ self._sort_formats(formats)
+ title = video_data.get('title')
+ thumb = video_data.get('thumb')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumb,
+ 'formats': formats
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ if video_id == 'None':
+ video_id = self._extract_video_id(url)
+ return self._get_formats(video_id, self._TOKEN)
+
+
+class SkyItaliaIE(SkyItaliaBaseIE):
+ IE_NAME = 'sky.it'
+ _VALID_URL = r'''(?x)https?://
+ (?P<ie>sport|tg24|video)
+ \.sky\.it/(?:.+?)
+ (?P<id>[0-9]{6})?
+ (?:$|\?)'''
+
+ _TESTS = [{
+ 'url': 'https://video.sky.it/sport/motogp/video/motogp-gp-emilia-romagna-highlights-prove-libere-616162',
+ 'md5': '9c03b590b06e5952d8051f0e02b0feca',
+ 'info_dict': {
+ 'id': '616162',
+ 'ext': 'mp4',
+ 'title': 'MotoGP, GP Emilia Romagna: gli highlights delle prove libere',
+ 'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/18/1600441214452_hl-libere-motogp-misano2_5602634_thumbnail_1.jpg',
+ }
+ }, {
+ 'url': 'https://sport.sky.it/motogp/2020/09/18/motogp-gp-emilia-romagna-misano-2020-prove-libere-diretta',
+ 'md5': '9c03b590b06e5952d8051f0e02b0feca',
+ 'info_dict': {
+ 'id': '616162',
+ 'ext': 'mp4',
+ 'title': 'MotoGP, GP Emilia Romagna: gli highlights delle prove libere',
+ 'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/18/1600441214452_hl-libere-motogp-misano2_5602634_thumbnail_1.jpg',
+ }
+ }, {
+ 'url': 'https://tg24.sky.it/salute-e-benessere/2020/09/18/coronavirus-vaccino-ue-sanofi',
+ 'md5': 'caa25e62dadb529bc5e0b078da99f854',
+ 'info_dict': {
+ 'id': '615904',
+ 'ext': 'mp4',
+ 'title': 'Covid-19, al Buzzi di Milano tamponi drive-in per studenti',
+ 'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/17/1600351405841_error-coronavirus-al-buzzi-di-milano-tamponi_thumbnail_1.jpg',
+ }
+ }, {
+ 'url': 'https://video.sky.it/sport/motogp/video/motogp-gp-emilia-romagna-highlights-prove-libere-616162?itm_source=parsely-api',
+ 'only_matching': True,
+ }]
+ _TOKEN = 'F96WlOd8yoFmLQgiqv6fNQRvHZcsWk5jDaYnDvhbiJk'
+
+
+class SkyArteItaliaIE(SkyItaliaBaseIE):
+ IE_NAME = 'arte.sky.it'
+ _VALID_URL = r'https?://arte\.sky\.it/video/.+?(?P<id>[0-9]{6})?$'
+ _TEST = {
+ 'url': 'https://arte.sky.it/video/federico-fellini-maestri-cinema/',
+ 'md5': '2f22513a89f45142f2746f878d690647',
+ 'info_dict': {
+ 'id': '612888',
+ 'ext': 'mp4',
+ 'title': 'I maestri del cinema Federico Felini',
+ 'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/03/1599146747305_i-maestri-del-cinema-federico-felini_thumbnail_1.jpg',
+ }
+ }
+ _TOKEN = 'LWk29hfiU39NNdq87ePeRach3nzTSV20o0lTv2001Cd'
diff --git a/youtube_dlc/extractor/tvland.py b/youtube_dlc/extractor/tvland.py
index 791144128..225b6b078 100644
--- a/youtube_dlc/extractor/tvland.py
+++ b/youtube_dlc/extractor/tvland.py
@@ -3,6 +3,8 @@ from __future__ import unicode_literals
from .spike import ParamountNetworkIE
+# TODO: Remove - Reason not used anymore - Service moved to youtube
+
class TVLandIE(ParamountNetworkIE):
IE_NAME = 'tvland.com'
diff --git a/youtube_dlc/extractor/urplay.py b/youtube_dlc/extractor/urplay.py
index 6030b7cb5..4bc2b78fb 100644
--- a/youtube_dlc/extractor/urplay.py
+++ b/youtube_dlc/extractor/urplay.py
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import unified_timestamp
+import re
class URPlayIE(InfoExtractor):
@@ -13,10 +14,10 @@ class URPlayIE(InfoExtractor):
'info_dict': {
'id': '203704',
'ext': 'mp4',
- 'title': 'UR Samtiden - Livet, universum och rymdens märkliga musik : Om vetenskap, kritiskt tänkande och motstånd',
+ 'title': 'Om vetenskap, kritiskt tänkande och motstånd',
'description': 'md5:5344508a52aa78c1ced6c1b8b9e44e9a',
- 'timestamp': 1513512768,
- 'upload_date': '20171217',
+ 'timestamp': 1513292400,
+ 'upload_date': '20171214',
},
}, {
'url': 'https://urskola.se/Produkter/190031-Tripp-Trapp-Trad-Sovkudde',
@@ -37,35 +38,41 @@ class URPlayIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- urplayer_data = self._parse_json(self._search_regex(
- r'urPlayer\.init\(({.+?})\);', webpage, 'urplayer data'), video_id)
- host = self._download_json('http://streaming-loadbalancer.ur.se/loadbalancer.json', video_id)['redirect']
+ urplayer_data = re.sub("&quot;", "\"", self._search_regex(
+ r'components\/Player\/Player\" data-react-props=\"({.+?})\"',
+ webpage, 'urplayer data'))
+ urplayer_data = self._parse_json(urplayer_data, video_id)
+ for i in range(len(urplayer_data['accessibleEpisodes'])):
+ if urplayer_data.get('accessibleEpisodes', {})[i].get('id') == int(video_id):
+ urplayer_data = urplayer_data['accessibleEpisodes'][i]
+ break
+ host = self._download_json('http://streaming-loadbalancer.ur.se/loadbalancer.json', video_id)['redirect']
formats = []
- for quality_attr, quality, preference in (('', 'sd', 0), ('_hd', 'hd', 1)):
- file_http = urplayer_data.get('file_http' + quality_attr) or urplayer_data.get('file_http_sub' + quality_attr)
- if file_http:
+ urplayer_streams = urplayer_data.get("streamingInfo")
+ for quality in ('sd'), ('hd'):
+ location = (urplayer_streams.get("raw", {}).get(quality, {}).get("location")
+ or urplayer_streams.get("sweComplete", {}).get(quality, {}).get("location"))
+ if location:
formats.extend(self._extract_wowza_formats(
- 'http://%s/%splaylist.m3u8' % (host, file_http), video_id, skip_protocols=['rtmp', 'rtsp']))
+ 'http://%s/%s/playlist.m3u8' % (host, location), video_id,
+ skip_protocols=['f4m', 'rtmp', 'rtsp']))
self._sort_formats(formats)
-
subtitles = {}
- for subtitle in urplayer_data.get('subtitles', []):
- subtitle_url = subtitle.get('file')
- kind = subtitle.get('kind')
- if not subtitle_url or (kind and kind != 'captions'):
- continue
- subtitles.setdefault(subtitle.get('label', 'Svenska'), []).append({
- 'url': subtitle_url,
+ subs = urplayer_streams.get("sweComplete", {}).get("tt", {}).get("location")
+ if subs:
+ subtitles.setdefault('Svenska', []).append({
+ 'url': subs,
})
return {
'id': video_id,
'title': urplayer_data['title'],
'description': self._og_search_description(webpage),
- 'thumbnail': urplayer_data.get('image'),
- 'timestamp': unified_timestamp(self._html_search_meta(('uploadDate', 'schema:uploadDate'), webpage, 'timestamp')),
- 'series': urplayer_data.get('series_title'),
+ 'thumbnail': urplayer_data.get('image', {}).get('1280x720'),
+ 'timestamp': unified_timestamp(self._html_search_meta(('uploadDate', 'schema:uploadDate'),
+ webpage, 'timestamp')),
+ 'series': urplayer_data.get('seriesTitle'),
'subtitles': subtitles,
'formats': formats,
}
diff --git a/youtube_dlc/extractor/xtube.py b/youtube_dlc/extractor/xtube.py
index 01b253dcb..081c5e2e7 100644
--- a/youtube_dlc/extractor/xtube.py
+++ b/youtube_dlc/extractor/xtube.py
@@ -5,6 +5,7 @@ import re
from .common import InfoExtractor
from ..utils import (
+ ExtractorError,
int_or_none,
js_to_json,
orderedSet,
@@ -33,28 +34,12 @@ class XTubeIE(InfoExtractor):
'title': 'strange erotica',
'description': 'contains:an ET kind of thing',
'uploader': 'greenshowers',
- 'duration': 450,
+ 'duration': 449,
'view_count': int,
'comment_count': int,
'age_limit': 18,
}
}, {
- # FLV videos with duplicated formats
- 'url': 'http://www.xtube.com/video-watch/A-Super-Run-Part-1-YT-9299752',
- 'md5': 'a406963eb349dd43692ec54631efd88b',
- 'info_dict': {
- 'id': '9299752',
- 'display_id': 'A-Super-Run-Part-1-YT',
- 'ext': 'flv',
- 'title': 'A Super Run - Part 1 (YT)',
- 'description': 'md5:4cc3af1aa1b0413289babc88f0d4f616',
- 'uploader': 'tshirtguy59',
- 'duration': 579,
- 'view_count': int,
- 'comment_count': int,
- 'age_limit': 18,
- },
- }, {
# new URL schema
'url': 'http://www.xtube.com/video-watch/strange-erotica-625837',
'only_matching': True,
@@ -89,16 +74,24 @@ class XTubeIE(InfoExtractor):
title, thumbnail, duration = [None] * 3
- config = self._parse_json(self._search_regex(
- r'playerConf\s*=\s*({.+?})\s*,\s*\n', webpage, 'config',
- default='{}'), video_id, transform_source=js_to_json, fatal=False)
- if config:
- config = config.get('mainRoll')
- if isinstance(config, dict):
- title = config.get('title')
- thumbnail = config.get('poster')
- duration = int_or_none(config.get('duration'))
- sources = config.get('sources') or config.get('format')
+ json_config_string = self._search_regex(
+ r'playerConf=({.+?}),loaderConf',
+ webpage, 'config', default=None)
+ if not json_config_string:
+ raise ExtractorError("Could not extract video player data")
+
+ json_config_string = json_config_string.replace("!0", "true").replace("!1", "false")
+
+ config = self._parse_json(json_config_string, video_id, transform_source=js_to_json, fatal=False)
+ if not config:
+ raise ExtractorError("Could not extract video player data")
+
+ config = config.get('mainRoll')
+ if isinstance(config, dict):
+ title = config.get('title')
+ thumbnail = config.get('poster')
+ duration = int_or_none(config.get('duration'))
+ sources = config.get('sources') or config.get('format')
if not isinstance(sources, dict):
sources = self._parse_json(self._search_regex(
diff --git a/youtube_dlc/extractor/youtube.py b/youtube_dlc/extractor/youtube.py
index 4fb49b864..ccfaa733d 100644
--- a/youtube_dlc/extractor/youtube.py
+++ b/youtube_dlc/extractor/youtube.py
@@ -2051,7 +2051,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if cipher:
if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
- ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
+ ASSETS_RE = r'(?:"assets":.+?"js":\s*("[^"]+"))|(?:"jsUrl":\s*("[^"]+"))'
jsplayer_url_json = self._search_regex(
ASSETS_RE,
embed_webpage if age_gate else video_webpage,
diff --git a/youtube_dlc/options.py b/youtube_dlc/options.py
index 1d7a7fed2..66b45220c 100644
--- a/youtube_dlc/options.py
+++ b/youtube_dlc/options.py
@@ -582,7 +582,7 @@ def parseOpts(overrideArguments=None):
'along with --min-sleep-interval.'))
workarounds.add_option(
'--sleep-subtitles',
- dest='sleep_interval_subtitles', action='store_true', default=False,
+ dest='sleep_interval_subtitles', action='store_true', default=0,
help='Enforce sleep interval on subtitles as well')
verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
diff --git a/youtube_dlc/update.py b/youtube_dlc/update.py
index e49e09c17..b358e902b 100644
--- a/youtube_dlc/update.py
+++ b/youtube_dlc/update.py
@@ -37,10 +37,26 @@ def update_self(to_screen, verbose, opener):
JSON_URL = UPDATE_URL + 'versions.json'
UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537)
+ def sha256sum():
+ h = hashlib.sha256()
+ b = bytearray(128 * 1024)
+ mv = memoryview(b)
+ with open(os.path.realpath(sys.executable), 'rb', buffering=0) as f:
+ for n in iter(lambda: f.readinto(mv), 0):
+ h.update(mv[:n])
+ return h.hexdigest()
+
+ to_screen('Current Build Hash %s' % sha256sum())
+
if not isinstance(globals().get('__loader__'), zipimporter) and not hasattr(sys, 'frozen'):
to_screen('It looks like you installed youtube-dlc with a package manager, pip, setup.py or a tarball. Please use that to update.')
return
+ # compiled file.exe can find itself by
+ # to_screen(os.path.basename(sys.executable))
+ # and path to py or exe
+ # to_screen(os.path.realpath(sys.executable))
+
# Check if there is a new version
try:
newversion = opener.open(VERSION_URL).read().decode('utf-8').strip()
@@ -48,6 +64,7 @@ def update_self(to_screen, verbose, opener):
if verbose:
to_screen(encode_compat_str(traceback.format_exc()))
to_screen('ERROR: can\'t find the current version. Please try again later.')
+ to_screen('Visit https://github.com/blackjack4494/yt-dlc/releases/latest')
return
if newversion == __version__:
to_screen('youtube-dlc is up-to-date (' + __version__ + ')')
@@ -61,6 +78,7 @@ def update_self(to_screen, verbose, opener):
if verbose:
to_screen(encode_compat_str(traceback.format_exc()))
to_screen('ERROR: can\'t obtain versions info. Please try again later.')
+ to_screen('Visit https://github.com/blackjack4494/yt-dlc/releases/latest')
return
if 'signature' not in versions_info:
to_screen('ERROR: the versions file is not signed or corrupted. Aborting.')
@@ -109,6 +127,7 @@ def update_self(to_screen, verbose, opener):
if verbose:
to_screen(encode_compat_str(traceback.format_exc()))
to_screen('ERROR: unable to download latest version')
+ to_screen('Visit https://github.com/blackjack4494/yt-dlc/releases/latest')
return
newcontent_hash = hashlib.sha256(newcontent).hexdigest()
@@ -155,6 +174,7 @@ start /b "" cmd /c del "%%~f0"&exit /b"
if verbose:
to_screen(encode_compat_str(traceback.format_exc()))
to_screen('ERROR: unable to download latest version')
+ to_screen('Visit https://github.com/blackjack4494/yt-dlc/releases/latest')
return
newcontent_hash = hashlib.sha256(newcontent).hexdigest()