diff options
Diffstat (limited to 'youtube_dlc')
-rw-r--r-- | youtube_dlc/YoutubeDL.py | 24 | ||||
-rw-r--r-- | youtube_dlc/__init__.py | 1 | ||||
-rw-r--r-- | youtube_dlc/extractor/kakao.py | 35 | ||||
-rw-r--r-- | youtube_dlc/extractor/ndr.py | 15 | ||||
-rw-r--r-- | youtube_dlc/extractor/youtube.py | 2 | ||||
-rw-r--r-- | youtube_dlc/version.py | 2 |
6 files changed, 48 insertions, 31 deletions
diff --git a/youtube_dlc/YoutubeDL.py b/youtube_dlc/YoutubeDL.py index f79d31deb..4cec2298c 100644 --- a/youtube_dlc/YoutubeDL.py +++ b/youtube_dlc/YoutubeDL.py @@ -105,6 +105,7 @@ from .postprocessor import ( FFmpegFixupStretchedPP, FFmpegMergerPP, FFmpegPostProcessor, + FFmpegSubtitlesConvertorPP, get_postprocessor, ) from .version import __version__ @@ -1846,6 +1847,29 @@ class YoutubeDL(object): (sub_lang, error_to_compat_str(err))) continue + if self.params.get('skip_download', False): + if self.params.get('convertsubtitles', False): + subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles')) + filename_real_ext = os.path.splitext(filename)[1][1:] + filename_wo_ext = ( + os.path.splitext(filename)[0] + if filename_real_ext == info_dict['ext'] + else filename) + afilename = '%s.%s' % (filename_wo_ext, self.params.get('convertsubtitles')) + if subconv.available: + info_dict.setdefault('__postprocessors', []) + # info_dict['__postprocessors'].append(subconv) + if os.path.exists(encodeFilename(afilename)): + self.to_screen( + '[download] %s has already been downloaded and ' + 'converted' % afilename) + else: + try: + self.post_process(filename, info_dict) + except (PostProcessingError) as err: + self.report_error('postprocessing: %s' % str(err)) + return + if self.params.get('writeinfojson', False): infofn = replace_extension(filename, 'info.json', info_dict.get('ext')) if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)): diff --git a/youtube_dlc/__init__.py b/youtube_dlc/__init__.py index a663417da..fc11642b9 100644 --- a/youtube_dlc/__init__.py +++ b/youtube_dlc/__init__.py @@ -315,6 +315,7 @@ def _real_main(argv=None): else match_filter_func(opts.match_filter)) ydl_opts = { + 'convertsubtitles': opts.convertsubtitles, 'usenetrc': opts.usenetrc, 'username': opts.username, 'password': opts.password, diff --git a/youtube_dlc/extractor/kakao.py b/youtube_dlc/extractor/kakao.py index 32935bb28..fefd8a215 100644 --- a/youtube_dlc/extractor/kakao.py +++ b/youtube_dlc/extractor/kakao.py @@ -8,13 +8,13 @@ from ..utils import ( int_or_none, strip_or_none, unified_timestamp, - update_url_query, ) class KakaoIE(InfoExtractor): _VALID_URL = r'https?://(?:play-)?tv\.kakao\.com/(?:channel/\d+|embed/player)/cliplink/(?P<id>\d+|[^?#&]+@my)' - _API_BASE_TMPL = 'http://tv.kakao.com/api/v1/ft/cliplinks/%s/' + _API_BASE_TMPL = 'http://tv.kakao.com/api/v1/ft/playmeta/cliplink/%s/' + _CDN_API = 'https://tv.kakao.com/katz/v1/ft/cliplink/%s/readyNplay?' _TESTS = [{ 'url': 'http://tv.kakao.com/channel/2671005/cliplink/301965083', @@ -45,18 +45,8 @@ class KakaoIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - display_id = video_id.rstrip('@my') api_base = self._API_BASE_TMPL % video_id - - player_header = { - 'Referer': update_url_query( - 'http://tv.kakao.com/embed/player/cliplink/%s' % video_id, { - 'service': 'kakao_tv', - 'autoplay': '1', - 'profile': 'HIGH', - 'wmode': 'transparent', - }) - } + cdn_api_base = self._CDN_API % video_id query = { 'player': 'monet_html5', @@ -73,17 +63,14 @@ class KakaoIE(InfoExtractor): 'videoOutputList', 'width', 'height', 'kbps', 'profile', 'label']) } - impress = self._download_json( - api_base + 'impress', display_id, 'Downloading video info', - query=query, headers=player_header) + api_json = self._download_json( + api_base, video_id, 'Downloading video info') - clip_link = impress['clipLink'] + clip_link = api_json['clipLink'] clip = clip_link['clip'] title = clip.get('title') or clip_link.get('displayTitle') - query['tid'] = impress.get('tid', '') - formats = [] for fmt in clip.get('videoOutputList', []): try: @@ -94,15 +81,17 @@ class KakaoIE(InfoExtractor): 'profile': profile_name, 'fields': '-*,url', }) + fmt_url_json = self._download_json( - api_base + 'raw/videolocation', display_id, + cdn_api_base, video_id, 'Downloading video URL for profile %s' % profile_name, - query=query, headers=player_header, fatal=False) + query=query, fatal=False) if fmt_url_json is None: continue - fmt_url = fmt_url_json['url'] + fmt_vidLocation = fmt_url_json['videoLocation'] + fmt_url = fmt_vidLocation['url'] formats.append({ 'url': fmt_url, 'format_id': profile_name, @@ -131,7 +120,7 @@ class KakaoIE(InfoExtractor): }) return { - 'id': display_id, + 'id': video_id, 'title': title, 'description': strip_or_none(clip.get('description')), 'uploader': clip_link.get('channel', {}).get('name'), diff --git a/youtube_dlc/extractor/ndr.py b/youtube_dlc/extractor/ndr.py index 2447c812e..f3897c71b 100644 --- a/youtube_dlc/extractor/ndr.py +++ b/youtube_dlc/extractor/ndr.py @@ -19,14 +19,15 @@ class NDRBaseIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) display_id = next(group for group in mobj.groups() if group) + id = mobj.group('id') webpage = self._download_webpage(url, display_id) - return self._extract_embed(webpage, display_id) + return self._extract_embed(webpage, display_id, id) class NDRIE(NDRBaseIE): IE_NAME = 'ndr' IE_DESC = 'NDR.de - Norddeutscher Rundfunk' - _VALID_URL = r'https?://(?:www\.)?ndr\.de/(?:[^/]+/)*(?P<id>[^/?#]+),[\da-z]+\.html' + _VALID_URL = r'https?://(?:www\.)?(?:daserste\.)?ndr\.de/(?:[^/]+/)*(?P<display_id>[^/?#]+),(?P<id>[\da-z]+)\.html' _TESTS = [{ # httpVideo, same content id 'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html', @@ -86,12 +87,14 @@ class NDRIE(NDRBaseIE): 'only_matching': True, }] - def _extract_embed(self, webpage, display_id): + def _extract_embed(self, webpage, display_id, id): embed_url = self._html_search_meta( 'embedURL', webpage, 'embed URL', default=None) or self._search_regex( r'\bembedUrl["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, - 'embed URL', group='url') + 'embed URL', fatal=False, group='url') + if embed_url is None: + return self.url_result('ndr:%s' % id, ie=NDREmbedBaseIE.ie_key()) description = self._search_regex( r'<p[^>]+itemprop="description">([^<]+)</p>', webpage, 'description', default=None) or self._og_search_description(webpage) @@ -152,7 +155,7 @@ class NJoyIE(NDRBaseIE): 'only_matching': True, }] - def _extract_embed(self, webpage, display_id): + def _extract_embed(self, webpage, display_id, id): video_id = self._search_regex( r'<iframe[^>]+id="pp_([\da-z]+)"', webpage, 'embed id') description = self._search_regex( @@ -253,7 +256,7 @@ class NDREmbedBaseIE(InfoExtractor): class NDREmbedIE(NDREmbedBaseIE): IE_NAME = 'ndr:embed' - _VALID_URL = r'https?://(?:www\.)?ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)\.html' + _VALID_URL = r'https?://(?:www\.)?(?:daserste\.)?ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)\.html' _TESTS = [{ 'url': 'http://www.ndr.de/fernsehen/sendungen/ndr_aktuell/ndraktuell28488-player.html', 'md5': '8b9306142fe65bbdefb5ce24edb6b0a9', diff --git a/youtube_dlc/extractor/youtube.py b/youtube_dlc/extractor/youtube.py index d3ba4c73c..a97921060 100644 --- a/youtube_dlc/extractor/youtube.py +++ b/youtube_dlc/extractor/youtube.py @@ -549,7 +549,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'}, '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'}, } - _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt', 'json3') + _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt') _GEO_BYPASS = False diff --git a/youtube_dlc/version.py b/youtube_dlc/version.py index 9dd9adf08..af04ea8b9 100644 --- a/youtube_dlc/version.py +++ b/youtube_dlc/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2020.09.12' +__version__ = '2020.09.13' |