diff options
-rw-r--r-- | .github/workflows/build.yml | 32 | ||||
-rw-r--r-- | README.md | 4 | ||||
-rw-r--r-- | youtube_dlc/YoutubeDL.py | 18 | ||||
-rw-r--r-- | youtube_dlc/downloader/common.py | 24 | ||||
-rw-r--r-- | youtube_dlc/extractor/bandcamp.py | 180 | ||||
-rw-r--r-- | youtube_dlc/extractor/extractors.py | 4 | ||||
-rw-r--r-- | youtube_dlc/extractor/mtv.py | 7 | ||||
-rw-r--r-- | youtube_dlc/extractor/skyitalia.py | 119 | ||||
-rw-r--r-- | youtube_dlc/extractor/tvland.py | 2 | ||||
-rw-r--r-- | youtube_dlc/extractor/urplay.py | 49 | ||||
-rw-r--r-- | youtube_dlc/extractor/xtube.py | 47 | ||||
-rw-r--r-- | youtube_dlc/extractor/youtube.py | 2 | ||||
-rw-r--r-- | youtube_dlc/options.py | 2 | ||||
-rw-r--r-- | youtube_dlc/update.py | 20 |
14 files changed, 346 insertions, 164 deletions
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 8db7e92f2..f5d94dc49 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -57,7 +57,7 @@ jobs: id: sha2_file env: SHA2: ${{ hashFiles('youtube-dlc') }} - run: echo "::set-output name=sha2_unix::${env:SHA2}" + run: echo "::set-output name=sha2_unix::$SHA2" - name: Install dependencies for pypi run: | python -m pip install --upgrade pip @@ -98,12 +98,12 @@ jobs: upload_url: ${{ needs.build_unix.outputs.upload_url }} asset_path: ./dist/youtube-dlc.exe asset_name: youtube-dlc.exe - asset_content_type: application/octet-stream + asset_content_type: application/vnd.microsoft.portable-executable - name: Get SHA2-256SUMS for youtube-dlc.exe id: sha2_file_win env: - SHA2: ${{ hashFiles('dist/youtube-dlc.exe') }} - run: echo "::set-output name=sha2_windows::${env:SHA2}" + SHA2_win: ${{ hashFiles('dist/youtube-dlc.exe') }} + run: echo "::set-output name=sha2_windows::$SHA2_win" build_windows32: @@ -133,12 +133,12 @@ jobs: upload_url: ${{ needs.build_unix.outputs.upload_url }} asset_path: ./dist/youtube-dlc_x86.exe asset_name: youtube-dlc_x86.exe - asset_content_type: application/octet-stream + asset_content_type: application/vnd.microsoft.portable-executable - name: Get SHA2-256SUMS for youtube-dlc_x86.exe id: sha2_file_win32 env: - SHA2: ${{ hashFiles('dist/youtube-dlc_x86.exe') }} - run: echo "::set-output name=sha2_windows32::${env:SHA2}" + SHA2_win32: ${{ hashFiles('dist/youtube-dlc_x86.exe') }} + run: echo "::set-output name=sha2_windows32::$SHA2_win32" - name: Make SHA2-256SUMS file env: SHA2_WINDOWS: ${{ needs.build_windows.outputs.sha2_windows }} @@ -146,6 +146,18 @@ jobs: SHA2_UNIX: ${{ needs.build_unix.outputs.sha2_unix }} YTDLC_VERSION: ${{ needs.build_unix.outputs.ytdlc_version }} run: | - echo "$SHA2_WINDOWS youtube-dlc.exe" > SHA2-256SUMS - echo "$SHA2_WINDOWS32 youtube-dlc32.exe" > SHA2-256SUMS - echo "$SHA2_UNIX youtube-dlc" >> SHA2-256SUMS + echo "version:$YTDLC_VERSION" >> SHA2-256SUMS + echo "youtube-dlc.exe:$SHA2_WINDOWS" >> SHA2-256SUMS + echo "youtube-dlc32.exe:$SHA2_WINDOWS32" >> SHA2-256SUMS + echo "youtube-dlc:$SHA2_UNIX" >> SHA2-256SUMS + + - name: Upload 256SUMS file + id: upload-sums + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + upload_url: ${{ needs.build_unix.outputs.upload_url }} + asset_path: ./SHA2-256SUMS + asset_name: SHA2-256SUMS + asset_content_type: text/plain @@ -1,4 +1,4 @@ -[](https://travis-ci.com/blackjack4494/youtube-dlc) +[](https://travis-ci.com/blackjack4494/yt-dlc) [](https://pypi.org/project/youtube-dlc) [](https://pepy.tech/project/youtube-dlc) @@ -7,7 +7,7 @@ youtube-dlc - download videos from youtube.com or other video platforms. -youtube-dlc is a fork of youtube-dl with the intention of getting features tested by the community merged in the tool faster, since youtube-dl's development seems to be slowing down. (https://github.com/ytdl-org/youtube-dl/issues/26462) +youtube-dlc is a fork of youtube-dl with the intention of getting features tested by the community merged in the tool faster, since youtube-dl's development seems to be slowing down. (https://web.archive.org/web/20201014194602/https://github.com/ytdl-org/youtube-dl/issues/26462) - [INSTALLATION](#installation) - [DESCRIPTION](#description) diff --git a/youtube_dlc/YoutubeDL.py b/youtube_dlc/YoutubeDL.py index fc351db0d..dd55ba0f2 100644 --- a/youtube_dlc/YoutubeDL.py +++ b/youtube_dlc/YoutubeDL.py @@ -801,7 +801,7 @@ class YoutubeDL(object): for key, value in extra_info.items(): info_dict.setdefault(key, value) - def extract_info(self, url, download=True, ie_key=None, extra_info={}, + def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_info={}, process=True, force_generic_extractor=False): ''' Returns a list with a dictionary for each video we find. @@ -836,6 +836,11 @@ class YoutubeDL(object): '_type': 'compat_list', 'entries': ie_result, } + if info_dict: + if info_dict.get('id'): + ie_result['id'] = info_dict['id'] + if info_dict.get('title'): + ie_result['title'] = info_dict['title'] self.add_default_extra_info(ie_result, ie, url) if process: return self.process_ie_result(ie_result, download, extra_info) @@ -898,7 +903,7 @@ class YoutubeDL(object): # We have to add extra_info to the results because it may be # contained in a playlist return self.extract_info(ie_result['url'], - download, + download, info_dict=ie_result, ie_key=ie_result.get('ie_key'), extra_info=extra_info) elif result_type == 'url_transparent': @@ -1852,13 +1857,13 @@ class YoutubeDL(object): self.report_error('Cannot write annotations file: ' + annofn) return - def dl(name, info): + def dl(name, info, subtitle=False): fd = get_suitable_downloader(info, self.params)(self, self.params) for ph in self._progress_hooks: fd.add_progress_hook(ph) if self.params.get('verbose'): self.to_stdout('[debug] Invoking downloader on %r' % info.get('url')) - return fd.download(name, info) + return fd.download(name, info, subtitle) subtitles_are_requested = any([self.params.get('writesubtitles', False), self.params.get('writeautomaticsub')]) @@ -1867,7 +1872,7 @@ class YoutubeDL(object): # subtitles download errors are already managed as troubles in relevant IE # that way it will silently go on when used with unsupporting IE subtitles = info_dict['requested_subtitles'] - ie = self.get_info_extractor(info_dict['extractor_key']) + # ie = self.get_info_extractor(info_dict['extractor_key']) for sub_lang, sub_info in subtitles.items(): sub_format = sub_info['ext'] sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext')) @@ -1886,6 +1891,8 @@ class YoutubeDL(object): return else: try: + dl(sub_filename, sub_info, subtitle=True) + ''' if self.params.get('sleep_interval_subtitles', False): dl(sub_filename, sub_info) else: @@ -1893,6 +1900,7 @@ class YoutubeDL(object): sub_info['url'], info_dict['id'], note=False).read() with io.open(encodeFilename(sub_filename), 'wb') as subfile: subfile.write(sub_data) + ''' except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self.report_warning('Unable to download subtitle for "%s": %s' % (sub_lang, error_to_compat_str(err))) diff --git a/youtube_dlc/downloader/common.py b/youtube_dlc/downloader/common.py index 31c286458..460364a0b 100644 --- a/youtube_dlc/downloader/common.py +++ b/youtube_dlc/downloader/common.py @@ -326,7 +326,7 @@ class FileDownloader(object): """Report it was impossible to resume download.""" self.to_screen('[download] Unable to resume') - def download(self, filename, info_dict): + def download(self, filename, info_dict, subtitle=False): """Download to a filename using the info from info_dict Return True on success and False otherwise """ @@ -353,16 +353,22 @@ class FileDownloader(object): }) return True - min_sleep_interval = self.params.get('sleep_interval') - if min_sleep_interval: - max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval) - sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval) + if subtitle is False: + min_sleep_interval = self.params.get('sleep_interval') + if min_sleep_interval: + max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval) + sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval) + self.to_screen( + '[download] Sleeping %s seconds...' % ( + int(sleep_interval) if sleep_interval.is_integer() + else '%.2f' % sleep_interval)) + time.sleep(sleep_interval) + else: + sleep_interval_sub = self.params.get('sleep_interval_subtitles') self.to_screen( '[download] Sleeping %s seconds...' % ( - int(sleep_interval) if sleep_interval.is_integer() - else '%.2f' % sleep_interval)) - time.sleep(sleep_interval) - + int(sleep_interval_sub))) + time.sleep(sleep_interval_sub) return self.real_download(filename, info_dict) def real_download(self, filename, info_dict): diff --git a/youtube_dlc/extractor/bandcamp.py b/youtube_dlc/extractor/bandcamp.py index 9dbafe86d..0e7492764 100644 --- a/youtube_dlc/extractor/bandcamp.py +++ b/youtube_dlc/extractor/bandcamp.py @@ -25,10 +25,48 @@ from ..utils import ( ) -class BandcampIE(InfoExtractor): +class BandcampBaseIE(InfoExtractor): + """Provide base functions for Bandcamp extractors""" + + def _extract_json_from_html_data_attribute(self, webpage, suffix, video_id): + json_string = self._html_search_regex( + r' data-%s="([^"]*)' % suffix, + webpage, '%s json' % suffix, default='{}') + + return self._parse_json(json_string, video_id) + + def _parse_json_track(self, json): + formats = [] + file_ = json.get('file') + if isinstance(file_, dict): + for format_id, format_url in file_.items(): + if not url_or_none(format_url): + continue + ext, abr_str = format_id.split('-', 1) + formats.append({ + 'format_id': format_id, + 'url': self._proto_relative_url(format_url, 'http:'), + 'ext': ext, + 'vcodec': 'none', + 'acodec': ext, + 'abr': int_or_none(abr_str), + }) + + return { + 'duration': float_or_none(json.get('duration')), + 'id': str_or_none(json.get('track_id') or json.get('id')), + 'title': json.get('title'), + 'title_link': json.get('title_link'), + 'number': int_or_none(json.get('track_num')), + 'formats': formats + } + + +class BandcampIE(BandcampBaseIE): + IE_NAME = "Bandcamp:track" _VALID_URL = r'https?://[^/]+\.bandcamp\.com/track/(?P<title>[^/?#&]+)' _TESTS = [{ - 'url': 'http://youtube-dlc.bandcamp.com/track/youtube-dlc-test-song', + 'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song', 'md5': 'c557841d5e50261777a6585648adf439', 'info_dict': { 'id': '1812978515', @@ -85,52 +123,32 @@ class BandcampIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) title = mobj.group('title') + url_track_title = title webpage = self._download_webpage(url, title) thumbnail = self._html_search_meta('og:image', webpage, default=None) - track_id = None - track = None - track_number = None - duration = None + json_tralbum = self._extract_json_from_html_data_attribute(webpage, "tralbum", url_track_title) + json_embed = self._extract_json_from_html_data_attribute(webpage, "embed", url_track_title) - formats = [] - trackinfo_block = self._html_search_regex( - r'trackinfo(?:["\']|"):\[\s*({.+?})\s*\],(?:["\']|")', - webpage, 'track info', default='{}') - - track_info = self._parse_json(trackinfo_block, title) - if track_info: - file_ = track_info.get('file') - if isinstance(file_, dict): - for format_id, format_url in file_.items(): - if not url_or_none(format_url): - continue - ext, abr_str = format_id.split('-', 1) - formats.append({ - 'format_id': format_id, - 'url': self._proto_relative_url(format_url, 'http:'), - 'ext': ext, - 'vcodec': 'none', - 'acodec': ext, - 'abr': int_or_none(abr_str), - }) - - track_id = str_or_none(track_info.get('track_id') or track_info.get('id')) - track_number = int_or_none(track_info.get('track_num')) - duration = float_or_none(track_info.get('duration')) - - def extract(key): - data = self._html_search_regex( - r',(["\']|")%s\1:\1(?P<value>(?:\\\1|((?!\1).))+)\1' % key, - webpage, key, default=None, group='value') - return data.replace(r'\"', '"').replace('\\\\', '\\') if data else data - - track = extract('title') - artist = extract('artist') - album = extract('album_title') - timestamp = unified_timestamp( - extract('publish_date') or extract('album_publish_date')) - release_date = unified_strdate(extract('album_release_date')) + json_tracks = json_tralbum.get('trackinfo') + if not json_tracks: + raise ExtractorError('Could not extract track') + + track = self._parse_json_track(json_tracks[0]) + artist = json_tralbum.get('artist') + album_title = json_embed.get('album_title') + + json_album = json_tralbum.get('packages') + if json_album: + json_album = json_album[0] + album_publish_date = json_album.get('album_publish_date') + album_release_date = json_album.get('album_release_date') + else: + album_publish_date = None + album_release_date = json_tralbum.get('album_release_date') + + timestamp = unified_timestamp(json_tralbum.get('current', {}).get('publish_date') or album_publish_date) + release_date = unified_strdate(album_release_date) download_link = self._search_regex( r'freeDownloadPage(?:["\']|"):\s*(["\']|")(?P<url>(?:(?!\1).)+)\1', webpage, @@ -155,8 +173,6 @@ class BandcampIE(InfoExtractor): if info: downloads = info.get('downloads') if isinstance(downloads, dict): - if not track: - track = info.get('title') if not artist: artist = info.get('artist') if not thumbnail: @@ -190,7 +206,7 @@ class BandcampIE(InfoExtractor): retry_url = url_or_none(stat.get('retry_url')) if not retry_url: continue - formats.append({ + track['formats'].append({ 'url': self._proto_relative_url(retry_url, 'http:'), 'ext': download_formats.get(format_id), 'format_id': format_id, @@ -199,32 +215,28 @@ class BandcampIE(InfoExtractor): 'vcodec': 'none', }) - self._sort_formats(formats) + self._sort_formats(track['formats']) - title = '%s - %s' % (artist, track) if artist else track - - if not duration: - duration = float_or_none(self._html_search_meta( - 'duration', webpage, default=None)) + title = '%s - %s' % (artist, track.get('title')) if artist else track.get('title') return { - 'id': track_id, - 'title': title, + 'album': album_title, + 'artist': artist, + 'duration': track['duration'], + 'formats': track['formats'], + 'id': track['id'], + 'release_date': release_date, 'thumbnail': thumbnail, - 'uploader': artist, 'timestamp': timestamp, - 'release_date': release_date, - 'duration': duration, - 'track': track, - 'track_number': track_number, - 'track_id': track_id, - 'artist': artist, - 'album': album, - 'formats': formats, + 'title': title, + 'track': track['title'], + 'track_id': track['id'], + 'track_number': track['number'], + 'uploader': artist } -class BandcampAlbumIE(InfoExtractor): +class BandcampAlbumIE(BandcampBaseIE): IE_NAME = 'Bandcamp:album' _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^/?#&]+))?' @@ -305,34 +317,32 @@ class BandcampAlbumIE(InfoExtractor): album_id = mobj.group('album_id') playlist_id = album_id or uploader_id webpage = self._download_webpage(url, playlist_id) - track_elements = re.findall( - r'(?s)<div[^>]*>(.*?<a[^>]+href="([^"]+?)"[^>]+itemprop="url"[^>]*>.*?)</div>', webpage) - if not track_elements: - raise ExtractorError('The page doesn\'t contain any tracks') + + json_tralbum = self._extract_json_from_html_data_attribute(webpage, "tralbum", playlist_id) + json_embed = self._extract_json_from_html_data_attribute(webpage, "embed", playlist_id) + + json_tracks = json_tralbum.get('trackinfo') + if not json_tracks: + raise ExtractorError('Could not extract album tracks') + + album_title = json_embed.get('album_title') + # Only tracks with duration info have songs + tracks = [self._parse_json_track(track) for track in json_tracks] entries = [ self.url_result( - compat_urlparse.urljoin(url, t_path), - ie=BandcampIE.ie_key(), - video_title=self._search_regex( - r'<span\b[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)', - elem_content, 'track title', fatal=False)) - for elem_content, t_path in track_elements - if self._html_search_meta('duration', elem_content, default=None)] - - title = self._html_search_regex( - r'album_title\s*(?:"|["\']):\s*("|["\'])(?P<album>(?:\\\1|((?!\1).))+)\1', - webpage, 'title', fatal=False, group='album') - - if title: - title = title.replace(r'\"', '"') + compat_urlparse.urljoin(url, track['title_link']), + ie=BandcampIE.ie_key(), video_id=track['id'], + video_title=track['title']) + for track in tracks + if track.get('duration')] return { '_type': 'playlist', 'uploader_id': uploader_id, 'id': playlist_id, - 'title': title, - 'entries': entries, + 'title': album_title, + 'entries': entries } diff --git a/youtube_dlc/extractor/extractors.py b/youtube_dlc/extractor/extractors.py index d31edd7c8..a0c7d0f42 100644 --- a/youtube_dlc/extractor/extractors.py +++ b/youtube_dlc/extractor/extractors.py @@ -1037,6 +1037,10 @@ from .sky import ( SkyNewsIE, SkySportsIE, ) +from .skyitalia import ( + SkyArteItaliaIE, + SkyItaliaIE, +) from .slideshare import SlideshareIE from .slideslive import SlidesLiveIE from .slutload import SlutloadIE diff --git a/youtube_dlc/extractor/mtv.py b/youtube_dlc/extractor/mtv.py index 6b3658397..04cc95b6a 100644 --- a/youtube_dlc/extractor/mtv.py +++ b/youtube_dlc/extractor/mtv.py @@ -289,7 +289,7 @@ class MTVServicesInfoExtractor(InfoExtractor): return mgid - def _extract_mgid(self, webpage, url, data_zone=None): + def _extract_mgid(self, webpage, url, title=None, data_zone=None): try: # the url can be http://media.mtvnservices.com/fb/{mgid}.swf # or http://media.mtvnservices.com/{mgid} @@ -300,7 +300,8 @@ class MTVServicesInfoExtractor(InfoExtractor): except RegexNotFoundError: mgid = None - title = self._match_id(url) + if not title: + title = url_basename(url) try: window_data = self._parse_json(self._search_regex( @@ -336,7 +337,7 @@ class MTVServicesInfoExtractor(InfoExtractor): def _real_extract(self, url): title = url_basename(url) webpage = self._download_webpage(url, title) - mgid = self._extract_mgid(webpage, url) + mgid = self._extract_mgid(webpage, url, title=title) videos_info = self._get_videos_info(mgid, url=url) return videos_info diff --git a/youtube_dlc/extractor/skyitalia.py b/youtube_dlc/extractor/skyitalia.py new file mode 100644 index 000000000..3c7bd465d --- /dev/null +++ b/youtube_dlc/extractor/skyitalia.py @@ -0,0 +1,119 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ExtractorError + + +class SkyItaliaBaseIE(InfoExtractor): + _GET_VIDEO_DATA = 'https://apid.sky.it/vdp/v1/getVideoData?token={token}&caller=sky&rendition=web&id={id}' + _RES = { + 'low': [426, 240], + 'med': [640, 360], + 'high': [854, 480], + 'hd': [1280, 720] + } + + def _extract_video_id(self, url): + webpage = self._download_webpage(url, 'skyitalia') + video_id = self._html_search_regex( + [r'data-videoid=\"(\d+)\"', + r'http://player\.sky\.it/social\?id=(\d+)\&'], + webpage, 'video_id') + if video_id: + return video_id + raise ExtractorError('Video ID not found.') + + def _get_formats(self, video_id, token): + data_url = self._GET_VIDEO_DATA.replace('{id}', video_id) + data_url = data_url.replace('{token}', token) + video_data = self._parse_json( + self._download_webpage(data_url, video_id), + video_id) + + formats = [] + for q, r in self._RES.items(): + key = 'web_%s_url' % q + if key not in video_data: + continue + formats.append({ + 'url': video_data.get(key), + 'format_id': q, + 'width': r[0], + 'height': r[1] + }) + + self._sort_formats(formats) + title = video_data.get('title') + thumb = video_data.get('thumb') + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumb, + 'formats': formats + } + + def _real_extract(self, url): + video_id = self._match_id(url) + if video_id == 'None': + video_id = self._extract_video_id(url) + return self._get_formats(video_id, self._TOKEN) + + +class SkyItaliaIE(SkyItaliaBaseIE): + IE_NAME = 'sky.it' + _VALID_URL = r'''(?x)https?:// + (?P<ie>sport|tg24|video) + \.sky\.it/(?:.+?) + (?P<id>[0-9]{6})? + (?:$|\?)''' + + _TESTS = [{ + 'url': 'https://video.sky.it/sport/motogp/video/motogp-gp-emilia-romagna-highlights-prove-libere-616162', + 'md5': '9c03b590b06e5952d8051f0e02b0feca', + 'info_dict': { + 'id': '616162', + 'ext': 'mp4', + 'title': 'MotoGP, GP Emilia Romagna: gli highlights delle prove libere', + 'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/18/1600441214452_hl-libere-motogp-misano2_5602634_thumbnail_1.jpg', + } + }, { + 'url': 'https://sport.sky.it/motogp/2020/09/18/motogp-gp-emilia-romagna-misano-2020-prove-libere-diretta', + 'md5': '9c03b590b06e5952d8051f0e02b0feca', + 'info_dict': { + 'id': '616162', + 'ext': 'mp4', + 'title': 'MotoGP, GP Emilia Romagna: gli highlights delle prove libere', + 'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/18/1600441214452_hl-libere-motogp-misano2_5602634_thumbnail_1.jpg', + } + }, { + 'url': 'https://tg24.sky.it/salute-e-benessere/2020/09/18/coronavirus-vaccino-ue-sanofi', + 'md5': 'caa25e62dadb529bc5e0b078da99f854', + 'info_dict': { + 'id': '615904', + 'ext': 'mp4', + 'title': 'Covid-19, al Buzzi di Milano tamponi drive-in per studenti', + 'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/17/1600351405841_error-coronavirus-al-buzzi-di-milano-tamponi_thumbnail_1.jpg', + } + }, { + 'url': 'https://video.sky.it/sport/motogp/video/motogp-gp-emilia-romagna-highlights-prove-libere-616162?itm_source=parsely-api', + 'only_matching': True, + }] + _TOKEN = 'F96WlOd8yoFmLQgiqv6fNQRvHZcsWk5jDaYnDvhbiJk' + + +class SkyArteItaliaIE(SkyItaliaBaseIE): + IE_NAME = 'arte.sky.it' + _VALID_URL = r'https?://arte\.sky\.it/video/.+?(?P<id>[0-9]{6})?$' + _TEST = { + 'url': 'https://arte.sky.it/video/federico-fellini-maestri-cinema/', + 'md5': '2f22513a89f45142f2746f878d690647', + 'info_dict': { + 'id': '612888', + 'ext': 'mp4', + 'title': 'I maestri del cinema Federico Felini', + 'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/03/1599146747305_i-maestri-del-cinema-federico-felini_thumbnail_1.jpg', + } + } + _TOKEN = 'LWk29hfiU39NNdq87ePeRach3nzTSV20o0lTv2001Cd' diff --git a/youtube_dlc/extractor/tvland.py b/youtube_dlc/extractor/tvland.py index 791144128..225b6b078 100644 --- a/youtube_dlc/extractor/tvland.py +++ b/youtube_dlc/extractor/tvland.py @@ -3,6 +3,8 @@ from __future__ import unicode_literals from .spike import ParamountNetworkIE +# TODO: Remove - Reason not used anymore - Service moved to youtube + class TVLandIE(ParamountNetworkIE): IE_NAME = 'tvland.com' diff --git a/youtube_dlc/extractor/urplay.py b/youtube_dlc/extractor/urplay.py index 6030b7cb5..4bc2b78fb 100644 --- a/youtube_dlc/extractor/urplay.py +++ b/youtube_dlc/extractor/urplay.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import unified_timestamp +import re class URPlayIE(InfoExtractor): @@ -13,10 +14,10 @@ class URPlayIE(InfoExtractor): 'info_dict': { 'id': '203704', 'ext': 'mp4', - 'title': 'UR Samtiden - Livet, universum och rymdens märkliga musik : Om vetenskap, kritiskt tänkande och motstånd', + 'title': 'Om vetenskap, kritiskt tänkande och motstånd', 'description': 'md5:5344508a52aa78c1ced6c1b8b9e44e9a', - 'timestamp': 1513512768, - 'upload_date': '20171217', + 'timestamp': 1513292400, + 'upload_date': '20171214', }, }, { 'url': 'https://urskola.se/Produkter/190031-Tripp-Trapp-Trad-Sovkudde', @@ -37,35 +38,41 @@ class URPlayIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - urplayer_data = self._parse_json(self._search_regex( - r'urPlayer\.init\(({.+?})\);', webpage, 'urplayer data'), video_id) - host = self._download_json('http://streaming-loadbalancer.ur.se/loadbalancer.json', video_id)['redirect'] + urplayer_data = re.sub(""", "\"", self._search_regex( + r'components\/Player\/Player\" data-react-props=\"({.+?})\"', + webpage, 'urplayer data')) + urplayer_data = self._parse_json(urplayer_data, video_id) + for i in range(len(urplayer_data['accessibleEpisodes'])): + if urplayer_data.get('accessibleEpisodes', {})[i].get('id') == int(video_id): + urplayer_data = urplayer_data['accessibleEpisodes'][i] + break + host = self._download_json('http://streaming-loadbalancer.ur.se/loadbalancer.json', video_id)['redirect'] formats = [] - for quality_attr, quality, preference in (('', 'sd', 0), ('_hd', 'hd', 1)): - file_http = urplayer_data.get('file_http' + quality_attr) or urplayer_data.get('file_http_sub' + quality_attr) - if file_http: + urplayer_streams = urplayer_data.get("streamingInfo") + for quality in ('sd'), ('hd'): + location = (urplayer_streams.get("raw", {}).get(quality, {}).get("location") + or urplayer_streams.get("sweComplete", {}).get(quality, {}).get("location")) + if location: formats.extend(self._extract_wowza_formats( - 'http://%s/%splaylist.m3u8' % (host, file_http), video_id, skip_protocols=['rtmp', 'rtsp'])) + 'http://%s/%s/playlist.m3u8' % (host, location), video_id, + skip_protocols=['f4m', 'rtmp', 'rtsp'])) self._sort_formats(formats) - subtitles = {} - for subtitle in urplayer_data.get('subtitles', []): - subtitle_url = subtitle.get('file') - kind = subtitle.get('kind') - if not subtitle_url or (kind and kind != 'captions'): - continue - subtitles.setdefault(subtitle.get('label', 'Svenska'), []).append({ - 'url': subtitle_url, + subs = urplayer_streams.get("sweComplete", {}).get("tt", {}).get("location") + if subs: + subtitles.setdefault('Svenska', []).append({ + 'url': subs, }) return { 'id': video_id, 'title': urplayer_data['title'], 'description': self._og_search_description(webpage), - 'thumbnail': urplayer_data.get('image'), - 'timestamp': unified_timestamp(self._html_search_meta(('uploadDate', 'schema:uploadDate'), webpage, 'timestamp')), - 'series': urplayer_data.get('series_title'), + 'thumbnail': urplayer_data.get('image', {}).get('1280x720'), + 'timestamp': unified_timestamp(self._html_search_meta(('uploadDate', 'schema:uploadDate'), + webpage, 'timestamp')), + 'series': urplayer_data.get('seriesTitle'), 'subtitles': subtitles, 'formats': formats, } diff --git a/youtube_dlc/extractor/xtube.py b/youtube_dlc/extractor/xtube.py index 01b253dcb..081c5e2e7 100644 --- a/youtube_dlc/extractor/xtube.py +++ b/youtube_dlc/extractor/xtube.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from ..utils import ( + ExtractorError, int_or_none, js_to_json, orderedSet, @@ -33,28 +34,12 @@ class XTubeIE(InfoExtractor): 'title': 'strange erotica', 'description': 'contains:an ET kind of thing', 'uploader': 'greenshowers', - 'duration': 450, + 'duration': 449, 'view_count': int, 'comment_count': int, 'age_limit': 18, } }, { - # FLV videos with duplicated formats - 'url': 'http://www.xtube.com/video-watch/A-Super-Run-Part-1-YT-9299752', - 'md5': 'a406963eb349dd43692ec54631efd88b', - 'info_dict': { - 'id': '9299752', - 'display_id': 'A-Super-Run-Part-1-YT', - 'ext': 'flv', - 'title': 'A Super Run - Part 1 (YT)', - 'description': 'md5:4cc3af1aa1b0413289babc88f0d4f616', - 'uploader': 'tshirtguy59', - 'duration': 579, - 'view_count': int, - 'comment_count': int, - 'age_limit': 18, - }, - }, { # new URL schema 'url': 'http://www.xtube.com/video-watch/strange-erotica-625837', 'only_matching': True, @@ -89,16 +74,24 @@ class XTubeIE(InfoExtractor): title, thumbnail, duration = [None] * 3 - config = self._parse_json(self._search_regex( - r'playerConf\s*=\s*({.+?})\s*,\s*\n', webpage, 'config', - default='{}'), video_id, transform_source=js_to_json, fatal=False) - if config: - config = config.get('mainRoll') - if isinstance(config, dict): - title = config.get('title') - thumbnail = config.get('poster') - duration = int_or_none(config.get('duration')) - sources = config.get('sources') or config.get('format') + json_config_string = self._search_regex( + r'playerConf=({.+?}),loaderConf', + webpage, 'config', default=None) + if not json_config_string: + raise ExtractorError("Could not extract video player data") + + json_config_string = json_config_string.replace("!0", "true").replace("!1", "false") + + config = self._parse_json(json_config_string, video_id, transform_source=js_to_json, fatal=False) + if not config: + raise ExtractorError("Could not extract video player data") + + config = config.get('mainRoll') + if isinstance(config, dict): + title = config.get('title') + thumbnail = config.get('poster') + duration = int_or_none(config.get('duration')) + sources = config.get('sources') or config.get('format') if not isinstance(sources, dict): sources = self._parse_json(self._search_regex( diff --git a/youtube_dlc/extractor/youtube.py b/youtube_dlc/extractor/youtube.py index 4fb49b864..ccfaa733d 100644 --- a/youtube_dlc/extractor/youtube.py +++ b/youtube_dlc/extractor/youtube.py @@ -2051,7 +2051,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if cipher: if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True): - ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")' + ASSETS_RE = r'(?:"assets":.+?"js":\s*("[^"]+"))|(?:"jsUrl":\s*("[^"]+"))' jsplayer_url_json = self._search_regex( ASSETS_RE, embed_webpage if age_gate else video_webpage, diff --git a/youtube_dlc/options.py b/youtube_dlc/options.py index 1d7a7fed2..66b45220c 100644 --- a/youtube_dlc/options.py +++ b/youtube_dlc/options.py @@ -582,7 +582,7 @@ def parseOpts(overrideArguments=None): 'along with --min-sleep-interval.')) workarounds.add_option( '--sleep-subtitles', - dest='sleep_interval_subtitles', action='store_true', default=False, + dest='sleep_interval_subtitles', action='store_true', default=0, help='Enforce sleep interval on subtitles as well') verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') diff --git a/youtube_dlc/update.py b/youtube_dlc/update.py index e49e09c17..b358e902b 100644 --- a/youtube_dlc/update.py +++ b/youtube_dlc/update.py @@ -37,10 +37,26 @@ def update_self(to_screen, verbose, opener): JSON_URL = UPDATE_URL + 'versions.json' UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537) + def sha256sum(): + h = hashlib.sha256() + b = bytearray(128 * 1024) + mv = memoryview(b) + with open(os.path.realpath(sys.executable), 'rb', buffering=0) as f: + for n in iter(lambda: f.readinto(mv), 0): + h.update(mv[:n]) + return h.hexdigest() + + to_screen('Current Build Hash %s' % sha256sum()) + if not isinstance(globals().get('__loader__'), zipimporter) and not hasattr(sys, 'frozen'): to_screen('It looks like you installed youtube-dlc with a package manager, pip, setup.py or a tarball. Please use that to update.') return + # compiled file.exe can find itself by + # to_screen(os.path.basename(sys.executable)) + # and path to py or exe + # to_screen(os.path.realpath(sys.executable)) + # Check if there is a new version try: newversion = opener.open(VERSION_URL).read().decode('utf-8').strip() @@ -48,6 +64,7 @@ def update_self(to_screen, verbose, opener): if verbose: to_screen(encode_compat_str(traceback.format_exc())) to_screen('ERROR: can\'t find the current version. Please try again later.') + to_screen('Visit https://github.com/blackjack4494/yt-dlc/releases/latest') return if newversion == __version__: to_screen('youtube-dlc is up-to-date (' + __version__ + ')') @@ -61,6 +78,7 @@ def update_self(to_screen, verbose, opener): if verbose: to_screen(encode_compat_str(traceback.format_exc())) to_screen('ERROR: can\'t obtain versions info. Please try again later.') + to_screen('Visit https://github.com/blackjack4494/yt-dlc/releases/latest') return if 'signature' not in versions_info: to_screen('ERROR: the versions file is not signed or corrupted. Aborting.') @@ -109,6 +127,7 @@ def update_self(to_screen, verbose, opener): if verbose: to_screen(encode_compat_str(traceback.format_exc())) to_screen('ERROR: unable to download latest version') + to_screen('Visit https://github.com/blackjack4494/yt-dlc/releases/latest') return newcontent_hash = hashlib.sha256(newcontent).hexdigest() @@ -155,6 +174,7 @@ start /b "" cmd /c del "%%~f0"&exit /b" if verbose: to_screen(encode_compat_str(traceback.format_exc())) to_screen('ERROR: unable to download latest version') + to_screen('Visit https://github.com/blackjack4494/yt-dlc/releases/latest') return newcontent_hash = hashlib.sha256(newcontent).hexdigest() |