From fb1a3531c59f5d9cee406295bbe006730695c249 Mon Sep 17 00:00:00 2001 From: James Taylor Date: Sun, 8 Sep 2019 17:20:02 -0700 Subject: Extraction: Fix url prefixing --- youtube/channel.py | 7 +++---- youtube/util.py | 4 ++++ youtube/yt_data_extract.py | 16 ++++++++++++---- 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/youtube/channel.py b/youtube/channel.py index de75eaa..79b7c9b 100644 --- a/youtube/channel.py +++ b/youtube/channel.py @@ -219,8 +219,7 @@ def extract_info(polymer_json, tab): else: items = contents # for search - # TODO: Fix this URL prefixing shit - additional_info = {'author': info['channel_name'], 'author_url': '/channel/' + channel_id} + additional_info = {'author': info['channel_name'], 'author_url': 'https://www.youtube.com/channel/' + channel_id} info['items'] = [yt_data_extract.renderer_info(renderer, additional_info) for renderer in items] elif tab == 'about': @@ -258,8 +257,8 @@ def extract_info(polymer_json, tab): return info def post_process_channel_info(info): - info['avatar'] = '/' + info['avatar'] - info['channel_url'] = '/' + info['channel_url'] + info['avatar'] = util.prefix_url(info['avatar']) + info['channel_url'] = util.prefix_url(info['channel_url']) for item in info['items']: yt_data_extract.prefix_urls(item) yt_data_extract.add_extra_html_info(item) diff --git a/youtube/util.py b/youtube/util.py index 2205645..a81ae83 100644 --- a/youtube/util.py +++ b/youtube/util.py @@ -317,3 +317,7 @@ def uppercase_escape(s): return re.sub( r'\\U([0-9a-fA-F]{8})', lambda m: chr(int(m.group(1), base=16)), s) + +def prefix_url(url): + url = url.lstrip('/') # some urls have // before them, which has a special meaning + return '/' + url diff --git a/youtube/yt_data_extract.py b/youtube/yt_data_extract.py index 5419084..663edc4 100644 --- a/youtube/yt_data_extract.py +++ b/youtube/yt_data_extract.py @@ -2,6 +2,7 @@ from youtube import util import html import json +import re # videos (all of type str): @@ -152,15 +153,22 @@ def ajax_info(item_json): raise +youtube_url_re = re.compile(r'^(?:(?:(?:https?:)?//)?(?:www\.)?youtube\.com)?(/.*)$') +def normalize_url(url): + match = youtube_url_re.fullmatch(url) + if match is None: + raise Exception() + + return 'https://www.youtube.com' + match.group(1) def prefix_urls(item): try: - item['thumbnail'] = '/' + item['thumbnail'].lstrip('/') + item['thumbnail'] = util.prefix_url(item['thumbnail']) except KeyError: pass try: - item['author_url'] = util.URL_ORIGIN + item['author_url'] + item['author_url'] = util.prefix_url(item['author_url']) except KeyError: pass @@ -219,7 +227,7 @@ def renderer_info(renderer, additional_info={}): if 'ownerText' in renderer: info['author'] = renderer['ownerText']['runs'][0]['text'] - info['author_url'] = renderer['ownerText']['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'] + info['author_url'] = normalize_url(renderer['ownerText']['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']) try: overlays = renderer['thumbnailOverlays'] except KeyError: @@ -241,7 +249,7 @@ def renderer_info(renderer, additional_info={}): if key in ('longBylineText', 'shortBylineText'): info['author'] = get_text(node) try: - info['author_url'] = get_url(node) + info['author_url'] = normalize_url(get_url(node)) except KeyError: pass -- cgit v1.2.3 From bd343ed71f628e0f1dd1eb3f45fb4e04887f223f Mon Sep 17 00:00:00 2001 From: James Taylor Date: Sun, 8 Sep 2019 17:28:11 -0700 Subject: Extraction: Move channel extraction to yt_data_extract --- youtube/channel.py | 122 +-------------------------------------------- youtube/subscriptions.py | 2 +- youtube/yt_data_extract.py | 121 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 124 insertions(+), 121 deletions(-) diff --git a/youtube/channel.py b/youtube/channel.py index 79b7c9b..16d0a3f 100644 --- a/youtube/channel.py +++ b/youtube/channel.py @@ -137,124 +137,6 @@ def get_channel_search_json(channel_id, query, page): return polymer_json -def extract_info(polymer_json, tab): - response = polymer_json[1]['response'] - try: - microformat = response['microformat']['microformatDataRenderer'] - - # channel doesn't exist or was terminated - # example terminated channel: https://www.youtube.com/channel/UCnKJeK_r90jDdIuzHXC0Org - except KeyError: - if 'alerts' in response and len(response['alerts']) > 0: - result = '' - for alert in response['alerts']: - result += alert['alertRenderer']['text']['simpleText'] + '\n' - flask.abort(200, result) - elif 'errors' in response['responseContext']: - for error in response['responseContext']['errors']['error']: - if error['code'] == 'INVALID_VALUE' and error['location'] == 'browse_id': - flask.abort(404, 'This channel does not exist') - raise - - - info = {} - info['current_tab'] = tab - - - # stuff from microformat (info given by youtube for every page on channel) - info['short_description'] = microformat['description'] - info['channel_name'] = microformat['title'] - info['avatar'] = microformat['thumbnail']['thumbnails'][0]['url'] - channel_url = microformat['urlCanonical'].rstrip('/') - channel_id = channel_url[channel_url.rfind('/')+1:] - info['channel_id'] = channel_id - info['channel_url'] = 'https://www.youtube.com/channel/' + channel_id - - info['items'] = [] - - # empty channel - if 'contents' not in response and 'continuationContents' not in response: - return info - - - # find the tab with content - # example channel where tabs do not have definite index: https://www.youtube.com/channel/UC4gQ8i3FD7YbhOgqUkeQEJg - # TODO: maybe use the 'selected' attribute for this? - if 'continuationContents' not in response: - tab_renderer = None - tab_content = None - for tab_json in response['contents']['twoColumnBrowseResultsRenderer']['tabs']: - try: - tab_renderer = tab_json['tabRenderer'] - except KeyError: - tab_renderer = tab_json['expandableTabRenderer'] - try: - tab_content = tab_renderer['content'] - break - except KeyError: - pass - else: # didn't break - raise Exception("No tabs found with content") - assert tab == tab_renderer['title'].lower() - - - # extract tab-specific info - if tab in ('videos', 'playlists', 'search'): # find the list of items - if 'continuationContents' in response: - try: - items = response['continuationContents']['gridContinuation']['items'] - except KeyError: - items = response['continuationContents']['sectionListContinuation']['contents'] # for search - else: - contents = tab_content['sectionListRenderer']['contents'] - if 'itemSectionRenderer' in contents[0]: - item_section = contents[0]['itemSectionRenderer']['contents'][0] - try: - items = item_section['gridRenderer']['items'] - except KeyError: - if "messageRenderer" in item_section: - items = [] - else: - raise Exception('gridRenderer missing but messageRenderer not found') - else: - items = contents # for search - - additional_info = {'author': info['channel_name'], 'author_url': 'https://www.youtube.com/channel/' + channel_id} - info['items'] = [yt_data_extract.renderer_info(renderer, additional_info) for renderer in items] - - elif tab == 'about': - channel_metadata = tab_content['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer'] - - - info['links'] = [] - for link_json in channel_metadata.get('primaryLinks', ()): - url = link_json['navigationEndpoint']['urlEndpoint']['url'] - if url.startswith('/redirect'): # youtube puts these on external links to do tracking - query_string = url[url.find('?')+1: ] - url = urllib.parse.parse_qs(query_string)['q'][0] - - text = yt_data_extract.get_plain_text(link_json['title']) - - info['links'].append( (text, url) ) - - - info['stats'] = [] - for stat_name in ('subscriberCountText', 'joinedDateText', 'viewCountText', 'country'): - try: - stat = channel_metadata[stat_name] - except KeyError: - continue - info['stats'].append(yt_data_extract.get_plain_text(stat)) - - if 'description' in channel_metadata: - info['description'] = yt_data_extract.get_text(channel_metadata['description']) - else: - info['description'] = '' - - else: - raise NotImplementedError('Unknown or unsupported channel tab: ' + tab) - - return info def post_process_channel_info(info): info['avatar'] = util.prefix_url(info['avatar']) @@ -303,7 +185,7 @@ def get_channel_page(channel_id, tab='videos'): flask.abort(404, 'Unknown channel tab: ' + tab) - info = extract_info(json.loads(polymer_json), tab) + info = yt_data_extract.extract_channel_info(json.loads(polymer_json), tab) post_process_channel_info(info) if tab in ('videos', 'search'): info['number_of_videos'] = number_of_videos @@ -343,7 +225,7 @@ def get_channel_page_general_url(base_url, tab, request): flask.abort(404, 'Unknown channel tab: ' + tab) - info = extract_info(json.loads(polymer_json), tab) + info = yt_data_extract.extract_channel_info(json.loads(polymer_json), tab) post_process_channel_info(info) if tab in ('videos', 'search'): info['number_of_videos'] = 1000 diff --git a/youtube/subscriptions.py b/youtube/subscriptions.py index 56bdf93..175622f 100644 --- a/youtube/subscriptions.py +++ b/youtube/subscriptions.py @@ -455,7 +455,7 @@ def _get_upstream_videos(channel_id): print('Failed to read atoma feed for ' + channel_status_name) traceback.print_exc() - videos = channel.extract_info(json.loads(channel_tab), 'videos')['items'] + videos = yt_data_extract.extract_channel_info(json.loads(channel_tab), 'videos')['items'] for i, video_item in enumerate(videos): if 'description' not in video_item: video_item['description'] = '' diff --git a/youtube/yt_data_extract.py b/youtube/yt_data_extract.py index 663edc4..c666ede 100644 --- a/youtube/yt_data_extract.py +++ b/youtube/yt_data_extract.py @@ -3,6 +3,7 @@ from youtube import util import html import json import re +import urllib # videos (all of type str): @@ -279,3 +280,123 @@ def parse_info_prepare_for_html(renderer, additional_info={}): return item +def extract_channel_info(polymer_json, tab): + response = polymer_json[1]['response'] + try: + microformat = response['microformat']['microformatDataRenderer'] + + # channel doesn't exist or was terminated + # example terminated channel: https://www.youtube.com/channel/UCnKJeK_r90jDdIuzHXC0Org + except KeyError: + if 'alerts' in response and len(response['alerts']) > 0: + result = '' + for alert in response['alerts']: + result += alert['alertRenderer']['text']['simpleText'] + '\n' + flask.abort(200, result) + elif 'errors' in response['responseContext']: + for error in response['responseContext']['errors']['error']: + if error['code'] == 'INVALID_VALUE' and error['location'] == 'browse_id': + flask.abort(404, 'This channel does not exist') + raise + + + info = {} + info['current_tab'] = tab + + + # stuff from microformat (info given by youtube for every page on channel) + info['short_description'] = microformat['description'] + info['channel_name'] = microformat['title'] + info['avatar'] = microformat['thumbnail']['thumbnails'][0]['url'] + channel_url = microformat['urlCanonical'].rstrip('/') + channel_id = channel_url[channel_url.rfind('/')+1:] + info['channel_id'] = channel_id + info['channel_url'] = 'https://www.youtube.com/channel/' + channel_id + + info['items'] = [] + + # empty channel + if 'contents' not in response and 'continuationContents' not in response: + return info + + + # find the tab with content + # example channel where tabs do not have definite index: https://www.youtube.com/channel/UC4gQ8i3FD7YbhOgqUkeQEJg + # TODO: maybe use the 'selected' attribute for this? + if 'continuationContents' not in response: + tab_renderer = None + tab_content = None + for tab_json in response['contents']['twoColumnBrowseResultsRenderer']['tabs']: + try: + tab_renderer = tab_json['tabRenderer'] + except KeyError: + tab_renderer = tab_json['expandableTabRenderer'] + try: + tab_content = tab_renderer['content'] + break + except KeyError: + pass + else: # didn't break + raise Exception("No tabs found with content") + assert tab == tab_renderer['title'].lower() + + + # extract tab-specific info + if tab in ('videos', 'playlists', 'search'): # find the list of items + if 'continuationContents' in response: + try: + items = response['continuationContents']['gridContinuation']['items'] + except KeyError: + items = response['continuationContents']['sectionListContinuation']['contents'] # for search + else: + contents = tab_content['sectionListRenderer']['contents'] + if 'itemSectionRenderer' in contents[0]: + item_section = contents[0]['itemSectionRenderer']['contents'][0] + try: + items = item_section['gridRenderer']['items'] + except KeyError: + if "messageRenderer" in item_section: + items = [] + else: + raise Exception('gridRenderer missing but messageRenderer not found') + else: + items = contents # for search + + additional_info = {'author': info['channel_name'], 'author_url': 'https://www.youtube.com/channel/' + channel_id} + info['items'] = [renderer_info(renderer, additional_info) for renderer in items] + + elif tab == 'about': + channel_metadata = tab_content['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer'] + + + info['links'] = [] + for link_json in channel_metadata.get('primaryLinks', ()): + url = link_json['navigationEndpoint']['urlEndpoint']['url'] + if url.startswith('/redirect'): # youtube puts these on external links to do tracking + query_string = url[url.find('?')+1: ] + url = urllib.parse.parse_qs(query_string)['q'][0] + + text = get_plain_text(link_json['title']) + + info['links'].append( (text, url) ) + + + info['stats'] = [] + for stat_name in ('subscriberCountText', 'joinedDateText', 'viewCountText', 'country'): + try: + stat = channel_metadata[stat_name] + except KeyError: + continue + info['stats'].append(get_plain_text(stat)) + + if 'description' in channel_metadata: + info['description'] = get_text(channel_metadata['description']) + else: + info['description'] = '' + + else: + raise NotImplementedError('Unknown or unsupported channel tab: ' + tab) + + return info + + -- cgit v1.2.3 From 216231f9a6ca9ed48389e797a0c30d7d3b01e379 Mon Sep 17 00:00:00 2001 From: James Taylor Date: Sun, 8 Sep 2019 17:48:02 -0700 Subject: Extraction: Proper error handling for terminated or non-existant channels --- youtube/channel.py | 5 +++++ youtube/subscriptions.py | 7 ++++++- youtube/yt_data_extract.py | 13 +++++++------ 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/youtube/channel.py b/youtube/channel.py index 16d0a3f..3a2a0b3 100644 --- a/youtube/channel.py +++ b/youtube/channel.py @@ -186,6 +186,8 @@ def get_channel_page(channel_id, tab='videos'): info = yt_data_extract.extract_channel_info(json.loads(polymer_json), tab) + if info['errors']: + return flask.render_template('error.html', error_message = '\n'.join(info['errors'])) post_process_channel_info(info) if tab in ('videos', 'search'): info['number_of_videos'] = number_of_videos @@ -226,6 +228,9 @@ def get_channel_page_general_url(base_url, tab, request): info = yt_data_extract.extract_channel_info(json.loads(polymer_json), tab) + if info['errors']: + return flask.render_template('error.html', error_message = '\n'.join(info['errors'])) + post_process_channel_info(info) if tab in ('videos', 'search'): info['number_of_videos'] = 1000 diff --git a/youtube/subscriptions.py b/youtube/subscriptions.py index 175622f..87e1659 100644 --- a/youtube/subscriptions.py +++ b/youtube/subscriptions.py @@ -455,7 +455,12 @@ def _get_upstream_videos(channel_id): print('Failed to read atoma feed for ' + channel_status_name) traceback.print_exc() - videos = yt_data_extract.extract_channel_info(json.loads(channel_tab), 'videos')['items'] + channel_info = yt_data_extract.extract_channel_info(json.loads(channel_tab), 'videos') + if channel_info['errors']: + print('Error checking channel ' + channel_status_name + ': ' + ', '.join(channel_info['errors'])) + return + + videos = channel_info['items'] for i, video_item in enumerate(videos): if 'description' not in video_item: video_item['description'] = '' diff --git a/youtube/yt_data_extract.py b/youtube/yt_data_extract.py index c666ede..f0c89cb 100644 --- a/youtube/yt_data_extract.py +++ b/youtube/yt_data_extract.py @@ -281,6 +281,7 @@ def parse_info_prepare_for_html(renderer, additional_info={}): def extract_channel_info(polymer_json, tab): + info = {'errors': []} response = polymer_json[1]['response'] try: microformat = response['microformat']['microformatDataRenderer'] @@ -289,18 +290,18 @@ def extract_channel_info(polymer_json, tab): # example terminated channel: https://www.youtube.com/channel/UCnKJeK_r90jDdIuzHXC0Org except KeyError: if 'alerts' in response and len(response['alerts']) > 0: - result = '' for alert in response['alerts']: - result += alert['alertRenderer']['text']['simpleText'] + '\n' - flask.abort(200, result) + info['errors'].append(alert['alertRenderer']['text']['simpleText']) + return info elif 'errors' in response['responseContext']: for error in response['responseContext']['errors']['error']: if error['code'] == 'INVALID_VALUE' and error['location'] == 'browse_id': - flask.abort(404, 'This channel does not exist') - raise + info['errors'].append('This channel does not exist') + return info + info['errors'].append('Failure getting microformat') + return info - info = {} info['current_tab'] = tab -- cgit v1.2.3 From c362a5e834d88524c154cb010be9dc909dcbe25d Mon Sep 17 00:00:00 2001 From: James Taylor Date: Sun, 8 Sep 2019 18:06:30 -0700 Subject: Extraction: Move search extraction to yt_data_extract --- youtube/search.py | 58 +++++++--------------------------------------- youtube/yt_data_extract.py | 48 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 49 deletions(-) diff --git a/youtube/search.py b/youtube/search.py index e167279..81a69f2 100644 --- a/youtube/search.py +++ b/youtube/search.py @@ -5,7 +5,6 @@ import settings import json import urllib import base64 -from math import ceil import mimetypes from flask import request import flask @@ -74,59 +73,20 @@ def get_search_page(): filters['time'] = int(request.args.get("time", "0")) filters['type'] = int(request.args.get("type", "0")) filters['duration'] = int(request.args.get("duration", "0")) - info = get_search_json(query, page, autocorrect, sort, filters) - - estimated_results = int(info[1]['response']['estimatedResults']) - estimated_pages = ceil(estimated_results/20) + polymer_json = get_search_json(query, page, autocorrect, sort, filters) - # almost always is the first "section", but if there's an advertisement for a google product like Stadia or Home in the search results, then that becomes the first "section" and the search results are in the second. So just join all of them for resiliency - results = [] - for section in info[1]['response']['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents']: - results += section['itemSectionRenderer']['contents'] - - parsed_results = [] - corrections = {'type': None} - for renderer in results: - type = list(renderer.keys())[0] - if type == 'shelfRenderer': - continue - if type == 'didYouMeanRenderer': - renderer = renderer[type] - corrected_query_string = request.args.to_dict(flat=False) - corrected_query_string['query'] = [renderer['correctedQueryEndpoint']['searchEndpoint']['query']] - corrected_query_url = util.URL_ORIGIN + '/search?' + urllib.parse.urlencode(corrected_query_string, doseq=True) - - corrections = { - 'type': 'did_you_mean', - 'corrected_query': yt_data_extract.format_text_runs(renderer['correctedQuery']['runs']), - 'corrected_query_url': corrected_query_url, - } - continue - if type == 'showingResultsForRenderer': - renderer = renderer[type] - no_autocorrect_query_string = request.args.to_dict(flat=False) - no_autocorrect_query_string['autocorrect'] = ['0'] - no_autocorrect_query_url = util.URL_ORIGIN + '/search?' + urllib.parse.urlencode(no_autocorrect_query_string, doseq=True) - - corrections = { - 'type': 'showing_results_for', - 'corrected_query': yt_data_extract.format_text_runs(renderer['correctedQuery']['runs']), - 'original_query_url': no_autocorrect_query_url, - 'original_query': renderer['originalQuery']['simpleText'], - } - continue - - info = yt_data_extract.parse_info_prepare_for_html(renderer) - if info['type'] != 'unsupported': - parsed_results.append(info) + search_info = yt_data_extract.extract_search_info(polymer_json) + for item_info in search_info['items']: + yt_data_extract.prefix_urls(item_info) + yt_data_extract.add_extra_html_info(item_info) return flask.render_template('search.html', header_playlist_names = local_playlist.get_playlist_names(), query = query, - estimated_results = estimated_results, - estimated_pages = estimated_pages, - corrections = corrections, - results = parsed_results, + estimated_results = search_info['estimated_results'], + estimated_pages = search_info['estimated_pages'], + corrections = search_info['corrections'], + results = search_info['items'], parameters_dictionary = request.args, ) diff --git a/youtube/yt_data_extract.py b/youtube/yt_data_extract.py index f0c89cb..95c68bc 100644 --- a/youtube/yt_data_extract.py +++ b/youtube/yt_data_extract.py @@ -4,6 +4,7 @@ import html import json import re import urllib +from math import ceil # videos (all of type str): @@ -400,4 +401,51 @@ def extract_channel_info(polymer_json, tab): return info +def extract_search_info(polymer_json): + info = {} + info['estimated_results'] = int(polymer_json[1]['response']['estimatedResults']) + info['estimated_pages'] = ceil(info['estimated_results']/20) + + # almost always is the first "section", but if there's an advertisement for a google product like Stadia or Home in the search results, then that becomes the first "section" and the search results are in the second. So just join all of them for resiliency + results = [] + for section in polymer_json[1]['response']['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents']: + results += section['itemSectionRenderer']['contents'] + + info['items'] = [] + info['corrections'] = {'type': None} + for renderer in results: + type = list(renderer.keys())[0] + if type == 'shelfRenderer': + continue + if type == 'didYouMeanRenderer': + renderer = renderer[type] + corrected_query_string = request.args.to_dict(flat=False) + corrected_query_string['query'] = [renderer['correctedQueryEndpoint']['searchEndpoint']['query']] + corrected_query_url = util.URL_ORIGIN + '/search?' + urllib.parse.urlencode(corrected_query_string, doseq=True) + + info['corrections'] = { + 'type': 'did_you_mean', + 'corrected_query': yt_data_extract.format_text_runs(renderer['correctedQuery']['runs']), + 'corrected_query_url': corrected_query_url, + } + continue + if type == 'showingResultsForRenderer': + renderer = renderer[type] + no_autocorrect_query_string = request.args.to_dict(flat=False) + no_autocorrect_query_string['autocorrect'] = ['0'] + no_autocorrect_query_url = util.URL_ORIGIN + '/search?' + urllib.parse.urlencode(no_autocorrect_query_string, doseq=True) + + info['corrections'] = { + 'type': 'showing_results_for', + 'corrected_query': yt_data_extract.format_text_runs(renderer['correctedQuery']['runs']), + 'original_query_url': no_autocorrect_query_url, + 'original_query': renderer['originalQuery']['simpleText'], + } + continue + + item_info = renderer_info(renderer) + if item_info['type'] != 'unsupported': + info['items'].append(item_info) + + return info -- cgit v1.2.3 From 89e5761f8d9ae4221c4a97eca3c0fce3405a5bc4 Mon Sep 17 00:00:00 2001 From: James Taylor Date: Sun, 8 Sep 2019 18:42:08 -0700 Subject: Extraction: Move playlist extraction to yt_data_extract --- youtube/playlist.py | 28 ++++++++++------------------ youtube/templates/playlist.html | 2 +- youtube/yt_data_extract.py | 27 +++++++++++++++++++++++++++ 3 files changed, 38 insertions(+), 19 deletions(-) diff --git a/youtube/playlist.py b/youtube/playlist.py index 3e5b0d2..2f7abdc 100644 --- a/youtube/playlist.py +++ b/youtube/playlist.py @@ -89,28 +89,20 @@ def get_playlist_page(): ) gevent.joinall(tasks) first_page_json, this_page_json = tasks[0].value, tasks[1].value - - try: # first page - video_list = this_page_json['response']['contents']['singleColumnBrowseResultsRenderer']['tabs'][0]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['playlistVideoListRenderer']['contents'] - except KeyError: # other pages - video_list = this_page_json['response']['continuationContents']['playlistVideoListContinuation']['contents'] - - parsed_video_list = [yt_data_extract.parse_info_prepare_for_html(video_json) for video_json in video_list] - - - metadata = yt_data_extract.renderer_info(first_page_json['response']['header']) - yt_data_extract.prefix_urls(metadata) - if 'description' not in metadata: - metadata['description'] = '' + info = yt_data_extract.extract_playlist_info(this_page_json) + if page != '1': + info['metadata'] = yt_data_extract.extract_playlist_metadata(first_page_json) - video_count = int(metadata['size'].replace(',', '')) - metadata['size'] += ' videos' + yt_data_extract.prefix_urls(info['metadata']) + for item in info['items']: + yt_data_extract.prefix_urls(item) + yt_data_extract.add_extra_html_info(item) return flask.render_template('playlist.html', - video_list = parsed_video_list, - num_pages = math.ceil(video_count/20), + video_list = info['items'], + num_pages = math.ceil(info['metadata']['size']/20), parameters_dictionary = request.args, - **metadata + **info['metadata'] ).encode('utf-8') diff --git a/youtube/templates/playlist.html b/youtube/templates/playlist.html index ab2640f..52c468e 100644 --- a/youtube/templates/playlist.html +++ b/youtube/templates/playlist.html @@ -55,7 +55,7 @@ {{ author }}
{{ views }}
-
{{ size }}
+
{{ size }} videos
{{ common_elements.text_runs(description) }}
diff --git a/youtube/yt_data_extract.py b/youtube/yt_data_extract.py index 95c68bc..e7a2f1e 100644 --- a/youtube/yt_data_extract.py +++ b/youtube/yt_data_extract.py @@ -449,3 +449,30 @@ def extract_search_info(polymer_json): return info + +def extract_playlist_metadata(polymer_json): + metadata = renderer_info(polymer_json['response']['header']) + + if 'description' not in metadata: + metadata['description'] = '' + + metadata['size'] = int(metadata['size'].replace(',', '')) + + return metadata + +def extract_playlist_info(polymer_json): + info = {} + try: # first page + video_list = polymer_json['response']['contents']['singleColumnBrowseResultsRenderer']['tabs'][0]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['playlistVideoListRenderer']['contents'] + first_page = True + except KeyError: # other pages + video_list = polymer_json['response']['continuationContents']['playlistVideoListContinuation']['contents'] + first_page = False + + info['items'] = [renderer_info(renderer) for renderer in video_list] + + if first_page: + info['metadata'] = extract_playlist_metadata(polymer_json) + + return info + -- cgit v1.2.3 From dc6c370152d063ad4198c747fc12eb06fc1ec0e4 Mon Sep 17 00:00:00 2001 From: James Taylor Date: Wed, 18 Sep 2019 21:39:53 -0700 Subject: Extraction: refactor response extraction to work with both mobile & desktop respones, also improve errors --- youtube/channel.py | 8 +++---- youtube/playlist.py | 3 +++ youtube/search.py | 3 +++ youtube/subscriptions.py | 4 ++-- youtube/yt_data_extract.py | 59 +++++++++++++++++++++++++++++++++------------- 5 files changed, 54 insertions(+), 23 deletions(-) diff --git a/youtube/channel.py b/youtube/channel.py index 3a2a0b3..67a79ad 100644 --- a/youtube/channel.py +++ b/youtube/channel.py @@ -186,8 +186,8 @@ def get_channel_page(channel_id, tab='videos'): info = yt_data_extract.extract_channel_info(json.loads(polymer_json), tab) - if info['errors']: - return flask.render_template('error.html', error_message = '\n'.join(info['errors'])) + if info['error']: + return flask.render_template('error.html', error_message = info['error']) post_process_channel_info(info) if tab in ('videos', 'search'): info['number_of_videos'] = number_of_videos @@ -228,8 +228,8 @@ def get_channel_page_general_url(base_url, tab, request): info = yt_data_extract.extract_channel_info(json.loads(polymer_json), tab) - if info['errors']: - return flask.render_template('error.html', error_message = '\n'.join(info['errors'])) + if info['error']: + return flask.render_template('error.html', error_message = info['error']) post_process_channel_info(info) if tab in ('videos', 'search'): diff --git a/youtube/playlist.py b/youtube/playlist.py index 2f7abdc..bc2c417 100644 --- a/youtube/playlist.py +++ b/youtube/playlist.py @@ -91,6 +91,9 @@ def get_playlist_page(): first_page_json, this_page_json = tasks[0].value, tasks[1].value info = yt_data_extract.extract_playlist_info(this_page_json) + if info['error']: + return flask.render_template('error.html', error_message = info['error']) + if page != '1': info['metadata'] = yt_data_extract.extract_playlist_metadata(first_page_json) diff --git a/youtube/search.py b/youtube/search.py index 81a69f2..ba40f0b 100644 --- a/youtube/search.py +++ b/youtube/search.py @@ -76,6 +76,9 @@ def get_search_page(): polymer_json = get_search_json(query, page, autocorrect, sort, filters) search_info = yt_data_extract.extract_search_info(polymer_json) + if search_info['error']: + return flask.render_template('error.html', error_message = search_info['error']) + for item_info in search_info['items']: yt_data_extract.prefix_urls(item_info) yt_data_extract.add_extra_html_info(item_info) diff --git a/youtube/subscriptions.py b/youtube/subscriptions.py index 87e1659..e0c71f5 100644 --- a/youtube/subscriptions.py +++ b/youtube/subscriptions.py @@ -456,8 +456,8 @@ def _get_upstream_videos(channel_id): traceback.print_exc() channel_info = yt_data_extract.extract_channel_info(json.loads(channel_tab), 'videos') - if channel_info['errors']: - print('Error checking channel ' + channel_status_name + ': ' + ', '.join(channel_info['errors'])) + if channel_info['error']: + print('Error checking channel ' + channel_status_name + ': ' + channel_info['error']) return videos = channel_info['items'] diff --git a/youtube/yt_data_extract.py b/youtube/yt_data_extract.py index e7a2f1e..440cc0d 100644 --- a/youtube/yt_data_extract.py +++ b/youtube/yt_data_extract.py @@ -280,10 +280,29 @@ def parse_info_prepare_for_html(renderer, additional_info={}): return item +def get_response(polymer_json): + '''return response, error''' + + # responses returned for desktop version + try: + return polymer_json[1]['response'], None + except (TypeError, KeyError, IndexError): + pass + + # responses returned for mobile version + try: + return polymer_json['response'], None + except (TypeError, KeyError): + pass + + return None, 'Failed to extract response' + def extract_channel_info(polymer_json, tab): - info = {'errors': []} - response = polymer_json[1]['response'] + response, err = get_response(polymer_json) + if err: + return {'error': err} + try: microformat = response['microformat']['microformatDataRenderer'] @@ -291,18 +310,14 @@ def extract_channel_info(polymer_json, tab): # example terminated channel: https://www.youtube.com/channel/UCnKJeK_r90jDdIuzHXC0Org except KeyError: if 'alerts' in response and len(response['alerts']) > 0: - for alert in response['alerts']: - info['errors'].append(alert['alertRenderer']['text']['simpleText']) - return info + return {'error': ' '.join(alert['alertRenderer']['text']['simpleText'] for alert in response['alerts']) } elif 'errors' in response['responseContext']: for error in response['responseContext']['errors']['error']: if error['code'] == 'INVALID_VALUE' and error['location'] == 'browse_id': - info['errors'].append('This channel does not exist') - return info - info['errors'].append('Failure getting microformat') - return info - + return {'error': 'This channel does not exist'} + return {'error': 'Failure getting microformat'} + info = {'error': None} info['current_tab'] = tab @@ -402,13 +417,16 @@ def extract_channel_info(polymer_json, tab): return info def extract_search_info(polymer_json): - info = {} - info['estimated_results'] = int(polymer_json[1]['response']['estimatedResults']) + response, err = get_response(polymer_json) + if err: + return {'error': err} + info = {'error': None} + info['estimated_results'] = int(response['estimatedResults']) info['estimated_pages'] = ceil(info['estimated_results']/20) # almost always is the first "section", but if there's an advertisement for a google product like Stadia or Home in the search results, then that becomes the first "section" and the search results are in the second. So just join all of them for resiliency results = [] - for section in polymer_json[1]['response']['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents']: + for section in response['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents']: results += section['itemSectionRenderer']['contents'] info['items'] = [] @@ -451,7 +469,11 @@ def extract_search_info(polymer_json): return info def extract_playlist_metadata(polymer_json): - metadata = renderer_info(polymer_json['response']['header']) + response, err = get_response(polymer_json) + if err: + return {'error': err} + metadata = renderer_info(response['header']) + metadata['error'] = None if 'description' not in metadata: metadata['description'] = '' @@ -461,12 +483,15 @@ def extract_playlist_metadata(polymer_json): return metadata def extract_playlist_info(polymer_json): - info = {} + response, err = get_response(polymer_json) + if err: + return {'error': err} + info = {'error': None} try: # first page - video_list = polymer_json['response']['contents']['singleColumnBrowseResultsRenderer']['tabs'][0]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['playlistVideoListRenderer']['contents'] + video_list = response['contents']['singleColumnBrowseResultsRenderer']['tabs'][0]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['playlistVideoListRenderer']['contents'] first_page = True except KeyError: # other pages - video_list = polymer_json['response']['continuationContents']['playlistVideoListContinuation']['contents'] + video_list = response['continuationContents']['playlistVideoListContinuation']['contents'] first_page = False info['items'] = [renderer_info(renderer) for renderer in video_list] -- cgit v1.2.3 From 61c50e0b540fa7ebabadb870c6aeb38b87d4912c Mon Sep 17 00:00:00 2001 From: James Taylor Date: Thu, 19 Sep 2019 11:41:16 -0700 Subject: Extraction: Move comment extraction to yt_data_extract --- youtube/comments.py | 99 +++------------------------------------ youtube/util.py | 9 ---- youtube/yt_data_extract.py | 113 +++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 114 insertions(+), 107 deletions(-) diff --git a/youtube/comments.py b/youtube/comments.py index 3b1ef86..250a95f 100644 --- a/youtube/comments.py +++ b/youtube/comments.py @@ -48,24 +48,6 @@ def comment_replies_ctoken(video_id, comment_id, max_results=500): result = proto.nested(2, proto.string(2, video_id)) + proto.uint(3,6) + proto.nested(6, params) return base64.urlsafe_b64encode(result).decode('ascii') -def ctoken_metadata(ctoken): - result = dict() - params = proto.parse(proto.b64_to_bytes(ctoken)) - result['video_id'] = proto.parse(params[2])[2].decode('ascii') - - offset_information = proto.parse(params[6]) - result['offset'] = offset_information.get(5, 0) - - result['is_replies'] = False - if (3 in offset_information) and (2 in proto.parse(offset_information[3])): - result['is_replies'] = True - result['sort'] = None - else: - try: - result['sort'] = proto.parse(offset_information[4])[6] - except KeyError: - result['sort'] = 0 - return result mobile_headers = { @@ -91,7 +73,9 @@ def request_comments(ctoken, replies=False): print("got , retrying") continue break - return content + + polymer_json = json.loads(util.uppercase_escape(content.decode('utf-8'))) + return polymer_json def single_comment_ctoken(video_id, comment_id): @@ -102,77 +86,6 @@ def single_comment_ctoken(video_id, comment_id): -def parse_comments_polymer(content): - try: - video_title = '' - content = json.loads(util.uppercase_escape(content.decode('utf-8'))) - url = content[1]['url'] - ctoken = urllib.parse.parse_qs(url[url.find('?')+1:])['ctoken'][0] - metadata = ctoken_metadata(ctoken) - - try: - comments_raw = content[1]['response']['continuationContents']['commentSectionContinuation']['items'] - except KeyError: - comments_raw = content[1]['response']['continuationContents']['commentRepliesContinuation']['contents'] - - ctoken = util.default_multi_get(content, 1, 'response', 'continuationContents', 'commentSectionContinuation', 'continuations', 0, 'nextContinuationData', 'continuation', default='') - - comments = [] - for comment_json in comments_raw: - number_of_replies = 0 - try: - comment_thread = comment_json['commentThreadRenderer'] - except KeyError: - comment_renderer = comment_json['commentRenderer'] - else: - if 'commentTargetTitle' in comment_thread: - video_title = comment_thread['commentTargetTitle']['runs'][0]['text'] - - if 'replies' in comment_thread: - view_replies_text = yt_data_extract.get_plain_text(comment_thread['replies']['commentRepliesRenderer']['moreText']) - view_replies_text = view_replies_text.replace(',', '') - match = re.search(r'(\d+)', view_replies_text) - if match is None: - number_of_replies = 1 - else: - number_of_replies = int(match.group(1)) - comment_renderer = comment_thread['comment']['commentRenderer'] - - comment = { - 'author_id': comment_renderer.get('authorId', ''), - 'author_avatar': comment_renderer['authorThumbnail']['thumbnails'][0]['url'], - 'likes': comment_renderer['likeCount'], - 'published': yt_data_extract.get_plain_text(comment_renderer['publishedTimeText']), - 'text': comment_renderer['contentText'].get('runs', ''), - 'number_of_replies': number_of_replies, - 'comment_id': comment_renderer['commentId'], - } - - if 'authorText' in comment_renderer: # deleted channels have no name or channel link - comment['author'] = yt_data_extract.get_plain_text(comment_renderer['authorText']) - comment['author_url'] = comment_renderer['authorEndpoint']['commandMetadata']['webCommandMetadata']['url'] - comment['author_channel_id'] = comment_renderer['authorEndpoint']['browseEndpoint']['browseId'] - else: - comment['author'] = '' - comment['author_url'] = '' - comment['author_channel_id'] = '' - - comments.append(comment) - except Exception as e: - print('Error parsing comments: ' + str(e)) - comments = () - ctoken = '' - - return { - 'ctoken': ctoken, - 'comments': comments, - 'video_title': video_title, - 'video_id': metadata['video_id'], - 'offset': metadata['offset'], - 'is_replies': metadata['is_replies'], - 'sort': metadata['sort'], - } - def post_process_comments_info(comments_info): for comment in comments_info['comments']: comment['author_url'] = util.URL_ORIGIN + comment['author_url'] @@ -207,7 +120,7 @@ def post_process_comments_info(comments_info): comment['likes_text'] = str(comment['likes']) + ' likes' comments_info['include_avatars'] = settings.enable_comment_avatars - if comments_info['ctoken'] != '': + if comments_info['ctoken']: comments_info['more_comments_url'] = util.URL_ORIGIN + '/comments?ctoken=' + comments_info['ctoken'] comments_info['page_number'] = page_number = str(int(comments_info['offset']/20) + 1) @@ -222,7 +135,7 @@ def post_process_comments_info(comments_info): def video_comments(video_id, sort=0, offset=0, lc='', secret_key=''): if settings.comments_mode: - comments_info = parse_comments_polymer(request_comments(make_comment_ctoken(video_id, sort, offset, lc, secret_key))) + comments_info = yt_data_extract.parse_comments_polymer(request_comments(make_comment_ctoken(video_id, sort, offset, lc, secret_key))) post_process_comments_info(comments_info) post_comment_url = util.URL_ORIGIN + "/post_comment?video_id=" + video_id @@ -247,7 +160,7 @@ def get_comments_page(): ctoken = comment_replies_ctoken(video_id, parent_id) replies = True - comments_info = parse_comments_polymer(request_comments(ctoken, replies)) + comments_info = yt_data_extract.parse_comments_polymer(request_comments(ctoken, replies)) post_process_comments_info(comments_info) if not replies: diff --git a/youtube/util.py b/youtube/util.py index a81ae83..5b63e2a 100644 --- a/youtube/util.py +++ b/youtube/util.py @@ -277,15 +277,6 @@ def video_id(url): url_parts = urllib.parse.urlparse(url) return urllib.parse.parse_qs(url_parts.query)['v'][0] -def default_multi_get(object, *keys, default): - ''' Like dict.get(), but for nested dictionaries/sequences, supporting keys or indices. Last argument is the default value to use in case of any IndexErrors or KeyErrors ''' - try: - for key in keys: - object = object[key] - return object - except (IndexError, KeyError): - return default - # default, sddefault, mqdefault, hqdefault, hq720 def get_thumbnail_url(video_id): diff --git a/youtube/yt_data_extract.py b/youtube/yt_data_extract.py index 440cc0d..551b663 100644 --- a/youtube/yt_data_extract.py +++ b/youtube/yt_data_extract.py @@ -1,4 +1,4 @@ -from youtube import util +from youtube import util, proto import html import json @@ -59,10 +59,14 @@ def format_text_runs(runs): return result - - - - +def default_multi_get(object, *keys, default): + ''' Like dict.get(), but for nested dictionaries/sequences, supporting keys or indices. Last argument is the default value to use in case of any IndexErrors or KeyErrors ''' + try: + for key in keys: + object = object[key] + return object + except (IndexError, KeyError): + return default def get_url(node): @@ -501,3 +505,102 @@ def extract_playlist_info(polymer_json): return info +def ctoken_metadata(ctoken): + result = dict() + params = proto.parse(proto.b64_to_bytes(ctoken)) + result['video_id'] = proto.parse(params[2])[2].decode('ascii') + + offset_information = proto.parse(params[6]) + result['offset'] = offset_information.get(5, 0) + + result['is_replies'] = False + if (3 in offset_information) and (2 in proto.parse(offset_information[3])): + result['is_replies'] = True + result['sort'] = None + else: + try: + result['sort'] = proto.parse(offset_information[4])[6] + except KeyError: + result['sort'] = 0 + return result + +def parse_comments_polymer(polymer_json): + try: + video_title = '' + response, err = get_response(polymer_json) + if err: + raise Exception(err) + + try: + url = polymer_json[1]['url'] + except (TypeError, IndexError, KeyError): + url = polymer_json['url'] + + ctoken = urllib.parse.parse_qs(url[url.find('?')+1:])['ctoken'][0] + metadata = ctoken_metadata(ctoken) + + try: + comments_raw = response['continuationContents']['commentSectionContinuation']['items'] + except KeyError: + comments_raw = response['continuationContents']['commentRepliesContinuation']['contents'] + + ctoken = default_multi_get(response, 'continuationContents', 'commentSectionContinuation', 'continuations', 0, 'nextContinuationData', 'continuation', default='') + + comments = [] + for comment_json in comments_raw: + number_of_replies = 0 + try: + comment_thread = comment_json['commentThreadRenderer'] + except KeyError: + comment_renderer = comment_json['commentRenderer'] + else: + if 'commentTargetTitle' in comment_thread: + video_title = comment_thread['commentTargetTitle']['runs'][0]['text'] + + if 'replies' in comment_thread: + view_replies_text = get_plain_text(comment_thread['replies']['commentRepliesRenderer']['moreText']) + view_replies_text = view_replies_text.replace(',', '') + match = re.search(r'(\d+)', view_replies_text) + if match is None: + number_of_replies = 1 + else: + number_of_replies = int(match.group(1)) + comment_renderer = comment_thread['comment']['commentRenderer'] + + comment = { + 'author_id': comment_renderer.get('authorId', ''), + 'author_avatar': comment_renderer['authorThumbnail']['thumbnails'][0]['url'], + 'likes': comment_renderer['likeCount'], + 'published': get_plain_text(comment_renderer['publishedTimeText']), + 'text': comment_renderer['contentText'].get('runs', ''), + 'number_of_replies': number_of_replies, + 'comment_id': comment_renderer['commentId'], + } + + if 'authorText' in comment_renderer: # deleted channels have no name or channel link + comment['author'] = get_plain_text(comment_renderer['authorText']) + comment['author_url'] = comment_renderer['authorEndpoint']['commandMetadata']['webCommandMetadata']['url'] + comment['author_channel_id'] = comment_renderer['authorEndpoint']['browseEndpoint']['browseId'] + else: + comment['author'] = '' + comment['author_url'] = '' + comment['author_channel_id'] = '' + + comments.append(comment) + except Exception as e: + print('Error parsing comments: ' + str(e)) + comments = () + ctoken = '' + + return { + 'ctoken': ctoken, + 'comments': comments, + 'video_title': video_title, + 'video_id': metadata['video_id'], + 'offset': metadata['offset'], + 'is_replies': metadata['is_replies'], + 'sort': metadata['sort'], + } + + + -- cgit v1.2.3 From ce8a658a0e56a9dfd3d0145e53f85711c4cbfb11 Mon Sep 17 00:00:00 2001 From: James Taylor Date: Fri, 27 Sep 2019 18:03:19 -0700 Subject: Extraction: Move item extraction into a generic, robust function --- youtube/yt_data_extract.py | 239 +++++++++++++++++++++++++++++++++------------ 1 file changed, 176 insertions(+), 63 deletions(-) diff --git a/youtube/yt_data_extract.py b/youtube/yt_data_extract.py index 551b663..892e73e 100644 --- a/youtube/yt_data_extract.py +++ b/youtube/yt_data_extract.py @@ -4,6 +4,7 @@ import html import json import re import urllib +import collections from math import ceil # videos (all of type str): @@ -58,15 +59,52 @@ def format_text_runs(runs): result += html.escape(text_run["text"]) return result +def default_get(object, key, default, types=()): + '''Like dict.get(), but returns default if the result doesn't match one of the types. + Also works for indexing lists.''' + try: + result = object[key] + except (TypeError, IndexError, KeyError): + return default + + if not types or isinstance(result, types): + return result + else: + return default + -def default_multi_get(object, *keys, default): - ''' Like dict.get(), but for nested dictionaries/sequences, supporting keys or indices. Last argument is the default value to use in case of any IndexErrors or KeyErrors ''' + +def default_multi_get(object, *keys, default, types=()): + '''Like dict.get(), but for nested dictionaries/sequences, supporting keys or indices. + Last argument is the default value to use in case of any IndexErrors or KeyErrors. + If types is given and the result doesn't match one of those types, default is returned''' try: for key in keys: object = object[key] - return object - except (IndexError, KeyError): + except (TypeError, IndexError, KeyError): return default + else: + if not types or isinstance(object, types): + return object + else: + return default + +def multi_default_multi_get(object, *key_sequences, default=None, types=()): + '''Like default_multi_get, but can try different key sequences in case one fails. + Return default if all of them fail. key_sequences is a list of lists''' + for key_sequence in key_sequences: + _object = object + try: + for key in key_sequence: + _object = _object[key] + except (TypeError, IndexError, KeyError): + pass + else: + if not types or isinstance(_object, types): + return _object + else: + continue + return default def get_url(node): @@ -284,6 +322,7 @@ def parse_info_prepare_for_html(renderer, additional_info={}): return item +# TODO: Type checking def get_response(polymer_json): '''return response, error''' @@ -301,6 +340,123 @@ def get_response(polymer_json): return None, 'Failed to extract response' +list_types = { + 'sectionListRenderer', + 'itemSectionRenderer', + 'gridRenderer', + 'playlistVideoListRenderer', +} + +item_types = { + 'movieRenderer', + 'didYouMeanRenderer', + 'showingResultsForRenderer', + + 'videoRenderer', + 'compactVideoRenderer', + 'gridVideoRenderer', + 'playlistVideoRenderer', + + 'playlistRenderer', + 'compactPlaylistRenderer', + 'gridPlaylistRenderer', + + 'radioRenderer', + 'compactRadioRenderer', + 'gridRadioRenderer', + + 'showRenderer', + 'compactShowRenderer', + 'gridShowRenderer', + + + 'channelRenderer', + 'compactChannelRenderer', + 'gridChannelRenderer', + + 'channelAboutFullMetadataRenderer', +} + +def traverse_browse_renderer(renderer): + for tab in default_get(renderer, 'tabs', (), types=(list, tuple)): + tab_renderer = multi_default_multi_get(tab, ['tabRenderer'], ['expandableTabRenderer'], default=None, types=dict) + if tab_renderer is None: + continue + if tab_renderer.get('selected', False): + return default_get(tab_renderer, 'content', {}, types=(dict)) + print('Could not find tab with content') + return {} + +# these renderers contain one inside them +nested_renderer_dispatch = { + 'singleColumnBrowseResultsRenderer': traverse_browse_renderer, + 'twoColumnBrowseResultsRenderer': traverse_browse_renderer, + 'twoColumnSearchResultsRenderer': lambda renderer: default_get(renderer, 'primaryContents', {}, types=dict), +} + +def extract_items(response): + '''return items, ctoken''' + if 'continuationContents' in response: + # always has just the one [something]Continuation key, but do this just in case they add some tracking key or something + for key, renderer_continuation in default_get(response, 'continuationContents', {}, types=dict).items(): + if key.endswith('Continuation'): # e.g. commentSectionContinuation, playlistVideoListContinuation + items = multi_default_multi_get(renderer_continuation, ['contents'], ['items'], default=None, types=(list, tuple)) + ctoken = default_multi_get(renderer_continuation, 'continuations', 0, 'nextContinuationData', 'continuation', default=None, types=str) + return items, ctoken + return [], None + elif 'contents' in response: + ctoken = None + items = [] + + iter_stack = collections.deque() + current_iter = iter(()) + + renderer = default_get(response, 'contents', {}, types=dict) + + while True: + # mode 1: dig into the current renderer + # Will stay in mode 1 (via continue) if a new renderer is found inside this one + # Otherwise, after finding that it is an item renderer, + # contains a list, or contains nothing, + # falls through into mode 2 to get a new renderer + if len(renderer) != 0: + key, value = list(renderer.items())[0] + + # has a list in it, add it to the iter stack + if key in list_types: + renderer_list = multi_default_multi_get(value, ['contents'], ['items'], default=(), types=(list, tuple)) + if renderer_list: + iter_stack.append(current_iter) + current_iter = iter(renderer_list) + + continuation = default_multi_get(value, 'continuations', 0, 'nextContinuationData', 'continuation', default=None, types=str) + if continuation: + ctoken = continuation + + # new renderer nested inside this one + elif key in nested_renderer_dispatch: + renderer = nested_renderer_dispatch[key](value) + continue # back to mode 1 + + # the renderer is an item + elif key in item_types: + items.append(renderer) + + + # mode 2: get a new renderer by iterating. + # goes up the stack for an iterator if one has been exhausted + while current_iter is not None: + try: + renderer = current_iter.__next__() + break + except StopIteration: + try: + current_iter = iter_stack.pop() # go back up the stack + except IndexError: + return items, ctoken + + else: + return [], None def extract_channel_info(polymer_json, tab): response, err = get_response(polymer_json) @@ -341,54 +497,21 @@ def extract_channel_info(polymer_json, tab): return info - # find the tab with content - # example channel where tabs do not have definite index: https://www.youtube.com/channel/UC4gQ8i3FD7YbhOgqUkeQEJg - # TODO: maybe use the 'selected' attribute for this? - if 'continuationContents' not in response: - tab_renderer = None - tab_content = None - for tab_json in response['contents']['twoColumnBrowseResultsRenderer']['tabs']: - try: - tab_renderer = tab_json['tabRenderer'] - except KeyError: - tab_renderer = tab_json['expandableTabRenderer'] + items, _ = extract_items(response) + if tab in ('videos', 'playlists', 'search'): + additional_info = {'author': info['channel_name'], 'author_url': 'https://www.youtube.com/channel/' + channel_id} + info['items'] = [renderer_info(renderer, additional_info) for renderer in items] + + elif tab == 'about': + for item in items: try: - tab_content = tab_renderer['content'] + channel_metadata = item['channelAboutFullMetadataRenderer'] break except KeyError: pass - else: # didn't break - raise Exception("No tabs found with content") - assert tab == tab_renderer['title'].lower() - - - # extract tab-specific info - if tab in ('videos', 'playlists', 'search'): # find the list of items - if 'continuationContents' in response: - try: - items = response['continuationContents']['gridContinuation']['items'] - except KeyError: - items = response['continuationContents']['sectionListContinuation']['contents'] # for search else: - contents = tab_content['sectionListRenderer']['contents'] - if 'itemSectionRenderer' in contents[0]: - item_section = contents[0]['itemSectionRenderer']['contents'][0] - try: - items = item_section['gridRenderer']['items'] - except KeyError: - if "messageRenderer" in item_section: - items = [] - else: - raise Exception('gridRenderer missing but messageRenderer not found') - else: - items = contents # for search - - additional_info = {'author': info['channel_name'], 'author_url': 'https://www.youtube.com/channel/' + channel_id} - info['items'] = [renderer_info(renderer, additional_info) for renderer in items] - - elif tab == 'about': - channel_metadata = tab_content['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer'] - + info['error'] = 'Could not find channelAboutFullMetadataRenderer' + return info info['links'] = [] for link_json in channel_metadata.get('primaryLinks', ()): @@ -428,10 +551,9 @@ def extract_search_info(polymer_json): info['estimated_results'] = int(response['estimatedResults']) info['estimated_pages'] = ceil(info['estimated_results']/20) - # almost always is the first "section", but if there's an advertisement for a google product like Stadia or Home in the search results, then that becomes the first "section" and the search results are in the second. So just join all of them for resiliency - results = [] - for section in response['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents']: - results += section['itemSectionRenderer']['contents'] + + results, _ = extract_items(response) + info['items'] = [] info['corrections'] = {'type': None} @@ -491,12 +613,8 @@ def extract_playlist_info(polymer_json): if err: return {'error': err} info = {'error': None} - try: # first page - video_list = response['contents']['singleColumnBrowseResultsRenderer']['tabs'][0]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['playlistVideoListRenderer']['contents'] - first_page = True - except KeyError: # other pages - video_list = response['continuationContents']['playlistVideoListContinuation']['contents'] - first_page = False + first_page = 'continuationContents' not in response + video_list, _ = extract_items(response) info['items'] = [renderer_info(renderer) for renderer in video_list] @@ -539,12 +657,7 @@ def parse_comments_polymer(polymer_json): ctoken = urllib.parse.parse_qs(url[url.find('?')+1:])['ctoken'][0] metadata = ctoken_metadata(ctoken) - try: - comments_raw = response['continuationContents']['commentSectionContinuation']['items'] - except KeyError: - comments_raw = response['continuationContents']['commentRepliesContinuation']['contents'] - - ctoken = default_multi_get(response, 'continuationContents', 'commentSectionContinuation', 'continuations', 0, 'nextContinuationData', 'continuation', default='') + comments_raw, ctoken = extract_items(response) comments = [] for comment_json in comments_raw: -- cgit v1.2.3 From e68ac26b4e2c216dad41e22da91067e2ddc80d00 Mon Sep 17 00:00:00 2001 From: James Taylor Date: Fri, 27 Sep 2019 18:07:46 -0700 Subject: Extraction: Rename get_response to extract_response and check that returned type is dict --- youtube/yt_data_extract.py | 31 +++++++++++-------------------- 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/youtube/yt_data_extract.py b/youtube/yt_data_extract.py index 892e73e..13d6ede 100644 --- a/youtube/yt_data_extract.py +++ b/youtube/yt_data_extract.py @@ -322,23 +322,14 @@ def parse_info_prepare_for_html(renderer, additional_info={}): return item -# TODO: Type checking -def get_response(polymer_json): +def extract_response(polymer_json): '''return response, error''' + response = multi_default_multi_get(polymer_json, [1, 'response'], ['response'], default=None, types=dict) + if response is None: + return None, 'Failed to extract response' + else: + return response, None - # responses returned for desktop version - try: - return polymer_json[1]['response'], None - except (TypeError, KeyError, IndexError): - pass - - # responses returned for mobile version - try: - return polymer_json['response'], None - except (TypeError, KeyError): - pass - - return None, 'Failed to extract response' list_types = { 'sectionListRenderer', @@ -459,7 +450,7 @@ def extract_items(response): return [], None def extract_channel_info(polymer_json, tab): - response, err = get_response(polymer_json) + response, err = extract_response(polymer_json) if err: return {'error': err} @@ -544,7 +535,7 @@ def extract_channel_info(polymer_json, tab): return info def extract_search_info(polymer_json): - response, err = get_response(polymer_json) + response, err = extract_response(polymer_json) if err: return {'error': err} info = {'error': None} @@ -595,7 +586,7 @@ def extract_search_info(polymer_json): return info def extract_playlist_metadata(polymer_json): - response, err = get_response(polymer_json) + response, err = extract_response(polymer_json) if err: return {'error': err} metadata = renderer_info(response['header']) @@ -609,7 +600,7 @@ def extract_playlist_metadata(polymer_json): return metadata def extract_playlist_info(polymer_json): - response, err = get_response(polymer_json) + response, err = extract_response(polymer_json) if err: return {'error': err} info = {'error': None} @@ -645,7 +636,7 @@ def ctoken_metadata(ctoken): def parse_comments_polymer(polymer_json): try: video_title = '' - response, err = get_response(polymer_json) + response, err = extract_response(polymer_json) if err: raise Exception(err) -- cgit v1.2.3 From 9abb83fdbc05294f186daeefff8c85cfda06b7d2 Mon Sep 17 00:00:00 2001 From: James Taylor Date: Fri, 27 Sep 2019 19:27:19 -0700 Subject: Extraction: Fix did_you_mean and showing_results_for --- youtube/search.py | 11 +++++++++++ youtube/templates/search.html | 6 +++--- youtube/yt_data_extract.py | 15 ++++----------- 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/youtube/search.py b/youtube/search.py index ba40f0b..cb66744 100644 --- a/youtube/search.py +++ b/youtube/search.py @@ -83,6 +83,17 @@ def get_search_page(): yt_data_extract.prefix_urls(item_info) yt_data_extract.add_extra_html_info(item_info) + corrections = search_info['corrections'] + if corrections['type'] == 'did_you_mean': + corrected_query_string = request.args.to_dict(flat=False) + corrected_query_string['query'] = [corrections['corrected_query']] + corrections['corrected_query_url'] = util.URL_ORIGIN + '/search?' + urllib.parse.urlencode(corrected_query_string, doseq=True) + elif corrections['type'] == 'showing_results_for': + no_autocorrect_query_string = request.args.to_dict(flat=False) + no_autocorrect_query_string['autocorrect'] = ['0'] + no_autocorrect_query_url = util.URL_ORIGIN + '/search?' + urllib.parse.urlencode(no_autocorrect_query_string, doseq=True) + corrections['original_query_url'] = no_autocorrect_query_url + return flask.render_template('search.html', header_playlist_names = local_playlist.get_playlist_names(), query = query, diff --git a/youtube/templates/search.html b/youtube/templates/search.html index aef914a..8b803e7 100644 --- a/youtube/templates/search.html +++ b/youtube/templates/search.html @@ -29,10 +29,10 @@
Approximately {{ '{:,}'.format(estimated_results) }} results ({{ '{:,}'.format(estimated_pages) }} pages)
{% if corrections['type'] == 'showing_results_for' %} - - + + {% elif corrections['type'] == 'did_you_mean' %} - + {% endif %}
diff --git a/youtube/yt_data_extract.py b/youtube/yt_data_extract.py index 13d6ede..cccd679 100644 --- a/youtube/yt_data_extract.py +++ b/youtube/yt_data_extract.py @@ -554,27 +554,20 @@ def extract_search_info(polymer_json): continue if type == 'didYouMeanRenderer': renderer = renderer[type] - corrected_query_string = request.args.to_dict(flat=False) - corrected_query_string['query'] = [renderer['correctedQueryEndpoint']['searchEndpoint']['query']] - corrected_query_url = util.URL_ORIGIN + '/search?' + urllib.parse.urlencode(corrected_query_string, doseq=True) info['corrections'] = { 'type': 'did_you_mean', - 'corrected_query': yt_data_extract.format_text_runs(renderer['correctedQuery']['runs']), - 'corrected_query_url': corrected_query_url, + 'corrected_query': renderer['correctedQueryEndpoint']['searchEndpoint']['query'], + 'corrected_query_text': renderer['correctedQuery']['runs'], } continue if type == 'showingResultsForRenderer': renderer = renderer[type] - no_autocorrect_query_string = request.args.to_dict(flat=False) - no_autocorrect_query_string['autocorrect'] = ['0'] - no_autocorrect_query_url = util.URL_ORIGIN + '/search?' + urllib.parse.urlencode(no_autocorrect_query_string, doseq=True) info['corrections'] = { 'type': 'showing_results_for', - 'corrected_query': yt_data_extract.format_text_runs(renderer['correctedQuery']['runs']), - 'original_query_url': no_autocorrect_query_url, - 'original_query': renderer['originalQuery']['simpleText'], + 'corrected_query_text': renderer['correctedQuery']['runs'], + 'original_query_text': renderer['originalQuery']['simpleText'], } continue -- cgit v1.2.3 From 4c07546e7a5e5882abdda896009b744e947df1c4 Mon Sep 17 00:00:00 2001 From: James Taylor Date: Thu, 17 Oct 2019 19:58:13 -0700 Subject: Extraction: Replace youtube-dl with custom-built watch page extraction --- youtube/templates/watch.html | 21 +- youtube/util.py | 9 +- youtube/watch.py | 154 +- youtube/yt_data_extract.py | 435 ++- youtube_dl/YoutubeDL.py | 2392 ------------ youtube_dl/__init__.py | 481 --- youtube_dl/__main__.py | 19 - youtube_dl/aes.py | 361 -- youtube_dl/cache.py | 96 - youtube_dl/compat.py | 3016 --------------- youtube_dl/downloader/__init__.py | 61 - youtube_dl/downloader/common.py | 389 -- youtube_dl/downloader/dash.py | 80 - youtube_dl/downloader/external.py | 354 -- youtube_dl/downloader/f4m.py | 438 --- youtube_dl/downloader/fragment.py | 268 -- youtube_dl/downloader/hls.py | 204 - youtube_dl/downloader/http.py | 354 -- youtube_dl/downloader/ism.py | 259 -- youtube_dl/downloader/rtmp.py | 214 -- youtube_dl/downloader/rtsp.py | 47 - youtube_dl/extractor/__init__.py | 46 - youtube_dl/extractor/adobepass.py | 1567 -------- youtube_dl/extractor/common.py | 2862 -------------- youtube_dl/extractor/commonmistakes.py | 50 - youtube_dl/extractor/commonprotocols.py | 60 - youtube_dl/extractor/extractors.py | 31 - youtube_dl/extractor/generic.py | 3335 ---------------- youtube_dl/extractor/openload.py | 379 -- youtube_dl/extractor/youtube.py | 3264 ---------------- .../extractor/youtube_unmodified_reference.py | 3192 ---------------- youtube_dl/jsinterp.py | 262 -- youtube_dl/options.py | 916 ----- youtube_dl/postprocessor/__init__.py | 40 - youtube_dl/postprocessor/common.py | 69 - youtube_dl/postprocessor/embedthumbnail.py | 93 - youtube_dl/postprocessor/execafterdownload.py | 31 - youtube_dl/postprocessor/ffmpeg.py | 613 --- youtube_dl/postprocessor/metadatafromtitle.py | 48 - youtube_dl/postprocessor/xattrpp.py | 79 - youtube_dl/socks.py | 273 -- youtube_dl/swfinterp.py | 834 ---- youtube_dl/update.py | 187 - youtube_dl/utils.py | 3990 -------------------- youtube_dl/version.py | 3 - 45 files changed, 523 insertions(+), 31353 deletions(-) delete mode 100644 youtube_dl/YoutubeDL.py delete mode 100644 youtube_dl/__init__.py delete mode 100644 youtube_dl/__main__.py delete mode 100644 youtube_dl/aes.py delete mode 100644 youtube_dl/cache.py delete mode 100644 youtube_dl/compat.py delete mode 100644 youtube_dl/downloader/__init__.py delete mode 100644 youtube_dl/downloader/common.py delete mode 100644 youtube_dl/downloader/dash.py delete mode 100644 youtube_dl/downloader/external.py delete mode 100644 youtube_dl/downloader/f4m.py delete mode 100644 youtube_dl/downloader/fragment.py delete mode 100644 youtube_dl/downloader/hls.py delete mode 100644 youtube_dl/downloader/http.py delete mode 100644 youtube_dl/downloader/ism.py delete mode 100644 youtube_dl/downloader/rtmp.py delete mode 100644 youtube_dl/downloader/rtsp.py delete mode 100644 youtube_dl/extractor/__init__.py delete mode 100644 youtube_dl/extractor/adobepass.py delete mode 100644 youtube_dl/extractor/common.py delete mode 100644 youtube_dl/extractor/commonmistakes.py delete mode 100644 youtube_dl/extractor/commonprotocols.py delete mode 100644 youtube_dl/extractor/extractors.py delete mode 100644 youtube_dl/extractor/generic.py delete mode 100644 youtube_dl/extractor/openload.py delete mode 100644 youtube_dl/extractor/youtube.py delete mode 100644 youtube_dl/extractor/youtube_unmodified_reference.py delete mode 100644 youtube_dl/jsinterp.py delete mode 100644 youtube_dl/options.py delete mode 100644 youtube_dl/postprocessor/__init__.py delete mode 100644 youtube_dl/postprocessor/common.py delete mode 100644 youtube_dl/postprocessor/embedthumbnail.py delete mode 100644 youtube_dl/postprocessor/execafterdownload.py delete mode 100644 youtube_dl/postprocessor/ffmpeg.py delete mode 100644 youtube_dl/postprocessor/metadatafromtitle.py delete mode 100644 youtube_dl/postprocessor/xattrpp.py delete mode 100644 youtube_dl/socks.py delete mode 100644 youtube_dl/swfinterp.py delete mode 100644 youtube_dl/update.py delete mode 100644 youtube_dl/utils.py delete mode 100644 youtube_dl/version.py diff --git a/youtube/templates/watch.html b/youtube/templates/watch.html index 14e953b..e97b638 100644 --- a/youtube/templates/watch.html +++ b/youtube/templates/watch.html @@ -187,8 +187,17 @@ .format-ext{ width: 60px; } - .format-res{ - width:90px; + .format-video-quality{ + width: 140px; + } + .format-audio-quality{ + width: 120px; + } + .format-file-size{ + width: 80px; + } + .format-codecs{ + width: 120px; } {% endblock style %} @@ -227,8 +236,10 @@
  1. {{ format['ext'] }}
  2. -
  3. {{ format['resolution'] }}
  4. -
  5. {{ format['note'] }}
  6. +
  7. {{ format['video_quality'] }}
  8. +
  9. {{ format['audio_quality'] }}
  10. +
  11. {{ format['file_size'] }}
  12. +
  13. {{ format['codecs'] }}
@@ -238,7 +249,7 @@ - {{ description }} + {{ common_elements.text_runs(description) }}
{% if music_list.__len__() != 0 %}
diff --git a/youtube/util.py b/youtube/util.py index 5b63e2a..474e7b5 100644 --- a/youtube/util.py +++ b/youtube/util.py @@ -176,7 +176,7 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookieja return content, response return content -mobile_user_agent = 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1' +mobile_user_agent = 'Mozilla/5.0 (Linux; Android 7.0; Redmi Note 4 Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Mobile Safari/537.36' mobile_ua = (('User-Agent', mobile_user_agent),) desktop_user_agent = 'Mozilla/5.0 (Windows NT 6.1; rv:52.0) Gecko/20100101 Firefox/52.0' desktop_ua = (('User-Agent', desktop_user_agent),) @@ -312,3 +312,10 @@ def uppercase_escape(s): def prefix_url(url): url = url.lstrip('/') # some urls have // before them, which has a special meaning return '/' + url + +def left_remove(string, substring): + '''removes substring from the start of string, if present''' + if string.startswith(substring): + return string[len(substring):] + return string + diff --git a/youtube/watch.py b/youtube/watch.py index 41c90e4..a5e0759 100644 --- a/youtube/watch.py +++ b/youtube/watch.py @@ -5,49 +5,15 @@ import settings from flask import request import flask -from youtube_dl.YoutubeDL import YoutubeDL -from youtube_dl.extractor.youtube import YoutubeError import json import html import gevent import os +import math +import traceback + -def get_related_items(info): - results = [] - for item in info['related_vids']: - if 'list' in item: # playlist: - result = watch_page_related_playlist_info(item) - else: - result = watch_page_related_video_info(item) - yt_data_extract.prefix_urls(result) - yt_data_extract.add_extra_html_info(result) - results.append(result) - return results - - -# json of related items retrieved directly from the watch page has different names for everything -# converts these to standard names -def watch_page_related_video_info(item): - result = {key: item[key] for key in ('id', 'title', 'author')} - result['duration'] = util.seconds_to_timestamp(item['length_seconds']) - try: - result['views'] = item['short_view_count_text'] - except KeyError: - result['views'] = '' - result['thumbnail'] = util.get_thumbnail_url(item['id']) - result['type'] = 'video' - return result - -def watch_page_related_playlist_info(item): - return { - 'size': item['playlist_length'] if item['playlist_length'] != "0" else "50+", - 'title': item['playlist_title'], - 'id': item['list'], - 'first_video_id': item['video_id'], - 'thumbnail': util.get_thumbnail_url(item['video_id']), - 'type': 'playlist', - } def get_video_sources(info): video_sources = [] @@ -55,9 +21,10 @@ def get_video_sources(info): max_resolution = 360 else: max_resolution = settings.default_resolution - for format in info['formats']: - if format['acodec'] != 'none' and format['vcodec'] != 'none' and format['height'] <= max_resolution: + if not all(attr in format for attr in ('height', 'width', 'ext', 'url')): + continue + if 'acodec' in format and 'vcodec' in format and format['height'] <= max_resolution: video_sources.append({ 'src': format['url'], 'type': 'video/' + format['ext'], @@ -134,14 +101,57 @@ def get_ordered_music_list_attributes(music_list): return ordered_attributes +headers = ( + ('Accept', '*/*'), + ('Accept-Language', 'en-US,en;q=0.5'), + ('X-YouTube-Client-Name', '2'), + ('X-YouTube-Client-Version', '2.20180830'), +) + util.mobile_ua -def extract_info(downloader, *args, **kwargs): +def extract_info(video_id): + polymer_json = util.fetch_url('https://m.youtube.com/watch?v=' + video_id + '&pbj=1', headers=headers, debug_name='watch') try: - return downloader.extract_info(*args, **kwargs) - except YoutubeError as e: - return str(e) - - + polymer_json = json.loads(polymer_json) + except json.decoder.JSONDecodeError: + traceback.print_exc() + return {'error': 'Failed to parse json response'} + return yt_data_extract.extract_watch_info(polymer_json) + +def video_quality_string(format): + if 'vcodec' in format: + result =str(format.get('width', '?')) + 'x' + str(format.get('height', '?')) + if 'fps' in format: + result += ' ' + format['fps'] + 'fps' + return result + elif 'acodec' in format: + return 'audio only' + + return '?' + +def audio_quality_string(format): + if 'acodec' in format: + result = str(format.get('abr', '?')) + 'k' + if 'audio_sample_rate' in format: + result += ' ' + str(format['audio_sample_rate']) + ' Hz' + return result + elif 'vcodec' in format: + return 'video only' + + return '?' + +# from https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py +def format_bytes(bytes): + if bytes is None: + return 'N/A' + if type(bytes) is str: + bytes = float(bytes) + if bytes == 0.0: + exponent = 0 + else: + exponent = int(math.log(bytes, 1024.0)) + suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent] + converted = float(bytes) / float(1024 ** exponent) + return '%.2f%s' % (converted, suffix) @yt_app.route('/watch') @@ -152,38 +162,26 @@ def get_watch_page(): flask.abort(flask.Response('Incomplete video id (too short): ' + video_id)) lc = request.args.get('lc', '') - if settings.route_tor: - proxy = 'socks5://127.0.0.1:9150/' - else: - proxy = '' - yt_dl_downloader = YoutubeDL(params={'youtube_include_dash_manifest':False, 'proxy':proxy}) tasks = ( gevent.spawn(comments.video_comments, video_id, int(settings.default_comment_sorting), lc=lc ), - gevent.spawn(extract_info, yt_dl_downloader, "https://www.youtube.com/watch?v=" + video_id, download=False) + gevent.spawn(extract_info, video_id) ) gevent.joinall(tasks) comments_info, info = tasks[0].value, tasks[1].value - if isinstance(info, str): # youtube error - return flask.render_template('error.html', error_message = info) + if info['error']: + return flask.render_template('error.html', error_message = info['error']) video_info = { - "duration": util.seconds_to_timestamp(info["duration"]), + "duration": util.seconds_to_timestamp(info["duration"] or 0), "id": info['id'], "title": info['title'], - "author": info['uploader'], + "author": info['author'], } - upload_year = info["upload_date"][0:4] - upload_month = info["upload_date"][4:6] - upload_day = info["upload_date"][6:8] - upload_date = upload_month + "/" + upload_day + "/" + upload_year - - if settings.related_videos_mode: - related_videos = get_related_items(info) - else: - related_videos = [] - + for item in info['related_videos']: + yt_data_extract.prefix_urls(item) + yt_data_extract.add_extra_html_info(item) if settings.gather_googlevideo_domains: with open(os.path.join(settings.data_dir, 'googlevideo-domains.txt'), 'a+', encoding='utf-8') as f: @@ -195,23 +193,29 @@ def get_watch_page(): download_formats = [] for format in info['formats']: + if 'acodec' in format and 'vcodec' in format: + codecs_string = format['acodec'] + ', ' + format['vcodec'] + else: + codecs_string = format.get('acodec') or format.get('vcodec') or '?' download_formats.append({ 'url': format['url'], - 'ext': format['ext'], - 'resolution': yt_dl_downloader.format_resolution(format), - 'note': yt_dl_downloader._format_note(format), + 'ext': format.get('ext', '?'), + 'audio_quality': audio_quality_string(format), + 'video_quality': video_quality_string(format), + 'file_size': format_bytes(format['file_size']), + 'codecs': codecs_string, }) video_sources = get_video_sources(info) - video_height = video_sources[0]['height'] - + video_height = yt_data_extract.default_multi_get(video_sources, 0, 'height', default=360) + video_width = yt_data_extract.default_multi_get(video_sources, 0, 'width', default=640) # 1 second per pixel, or the actual video width - theater_video_target_width = max(640, info['duration'], video_sources[0]['width']) + theater_video_target_width = max(640, info['duration'] or 0, video_width) return flask.render_template('watch.html', header_playlist_names = local_playlist.get_playlist_names(), - uploader_channel_url = '/' + info['uploader_url'], - upload_date = upload_date, + uploader_channel_url = ('/' + info['author_url']) if info['author_url'] else '', + upload_date = info['published_date'], views = (lambda x: '{:,}'.format(x) if x is not None else "")(info.get("view_count", None)), likes = (lambda x: '{:,}'.format(x) if x is not None else "")(info.get("like_count", None)), dislikes = (lambda x: '{:,}'.format(x) if x is not None else "")(info.get("dislike_count", None)), @@ -219,7 +223,7 @@ def get_watch_page(): video_info = json.dumps(video_info), video_sources = video_sources, subtitle_sources = get_subtitle_sources(info), - related = related_videos, + related = info['related_videos'], music_list = info['music_list'], music_attributes = get_ordered_music_list_attributes(info['music_list']), comments_info = comments_info, @@ -232,7 +236,7 @@ def get_watch_page(): theater_video_target_width = theater_video_target_width, title = info['title'], - uploader = info['uploader'], + uploader = info['author'], description = info['description'], unlisted = info['unlisted'], ) diff --git a/youtube/yt_data_extract.py b/youtube/yt_data_extract.py index cccd679..81604fd 100644 --- a/youtube/yt_data_extract.py +++ b/youtube/yt_data_extract.py @@ -6,6 +6,7 @@ import re import urllib import collections from math import ceil +import traceback # videos (all of type str): @@ -36,8 +37,112 @@ from math import ceil # size # first_video_id - - +# from https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/youtube.py +_formats = { + '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'}, + '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'}, + '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'}, + '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'}, + '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'}, + '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'}, + '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'}, + '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'}, + # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well + '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'}, + '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'}, + '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'}, + '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'}, + '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'}, + '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'}, + '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'}, + '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'}, + '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'}, + + + # 3D videos + '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'}, + '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'}, + '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'}, + '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'}, + '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'}, + '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'}, + '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'}, + + # Apple HTTP Live Streaming + '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264'}, + '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264'}, + '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'}, + '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'}, + '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264'}, + '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264'}, + '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264'}, + '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264'}, + + # DASH mp4 video + '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'}, + '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'}, + '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'}, + '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'}, + '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'}, + '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559) + '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'}, + '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'}, + '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'}, + '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60}, + '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60}, + '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'}, + + # Dash mp4 audio + '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'}, + '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'}, + '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'}, + '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'}, + '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'}, + '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'}, + '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'}, + + # Dash webm + '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'}, + '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'}, + '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'}, + '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'}, + '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'}, + '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'}, + '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'}, + '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'}, + '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'}, + '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'}, + '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'}, + '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'}, + '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'}, + '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'}, + '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'}, + # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug) + '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'}, + '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60}, + '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60}, + '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60}, + '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'}, + '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60}, + + # Dash webm audio + '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128}, + '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256}, + + # Dash webm audio with opus inside + '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50}, + '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70}, + '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160}, + + # RTMP (unnamed) + '_rtmp': {'protocol': 'rtmp'}, + + # av01 video only formats sometimes served with "unknown" codecs + '394': {'vcodec': 'av01.0.05M.08'}, + '395': {'vcodec': 'av01.0.05M.08'}, + '396': {'vcodec': 'av01.0.05M.08'}, + '397': {'vcodec': 'av01.0.05M.08'}, +} def get_plain_text(node): @@ -59,7 +164,7 @@ def format_text_runs(runs): result += html.escape(text_run["text"]) return result -def default_get(object, key, default, types=()): +def default_get(object, key, default=None, types=()): '''Like dict.get(), but returns default if the result doesn't match one of the types. Also works for indexing lists.''' try: @@ -74,7 +179,7 @@ def default_get(object, key, default, types=()): -def default_multi_get(object, *keys, default, types=()): +def default_multi_get(object, *keys, default=None, types=()): '''Like dict.get(), but for nested dictionaries/sequences, supporting keys or indices. Last argument is the default value to use in case of any IndexErrors or KeyErrors. If types is given and the result doesn't match one of those types, default is returned''' @@ -106,6 +211,11 @@ def multi_default_multi_get(object, *key_sequences, default=None, types=()): continue return default +def remove_redirect(url): + if re.fullmatch(r'(((https?:)?//)?(www.)?youtube.com)?/redirect\?.*', url) is not None: # youtube puts these on external links to do tracking + query_string = url[url.find('?')+1: ] + return urllib.parse.parse_qs(query_string)['q'][0] + return url def get_url(node): try: @@ -239,9 +349,9 @@ def renderer_info(renderer, additional_info={}): type = list(renderer.keys())[0] renderer = renderer[type] info = {} - if type == 'itemSectionRenderer': + if type in ('itemSectionRenderer', 'compactAutoplayRenderer'): return renderer_info(renderer['contents'][0], additional_info) - + if type in ('movieRenderer', 'clarificationRenderer'): info['type'] = 'unsupported' return info @@ -345,6 +455,7 @@ item_types = { 'videoRenderer', 'compactVideoRenderer', + 'compactAutoplayRenderer', 'gridVideoRenderer', 'playlistVideoRenderer', @@ -378,6 +489,11 @@ def traverse_browse_renderer(renderer): print('Could not find tab with content') return {} +def traverse_standard_list(renderer): + renderer_list = multi_default_multi_get(renderer, ['contents'], ['items'], default=(), types=(list, tuple)) + continuation = default_multi_get(renderer, 'continuations', 0, 'nextContinuationData', 'continuation') + return renderer_list, continuation + # these renderers contain one inside them nested_renderer_dispatch = { 'singleColumnBrowseResultsRenderer': traverse_browse_renderer, @@ -385,7 +501,16 @@ nested_renderer_dispatch = { 'twoColumnSearchResultsRenderer': lambda renderer: default_get(renderer, 'primaryContents', {}, types=dict), } -def extract_items(response): +# these renderers contain a list of renderers in side them +nested_renderer_list_dispatch = { + 'sectionListRenderer': traverse_standard_list, + 'itemSectionRenderer': traverse_standard_list, + 'gridRenderer': traverse_standard_list, + 'playlistVideoListRenderer': traverse_standard_list, + 'singleColumnWatchNextResults': lambda r: (default_multi_get(r, 'results', 'results', 'contents', default=[], types=(list, tuple)), None), +} + +def extract_items(response, item_types=item_types): '''return items, ctoken''' if 'continuationContents' in response: # always has just the one [something]Continuation key, but do this just in case they add some tracking key or something @@ -414,13 +539,11 @@ def extract_items(response): key, value = list(renderer.items())[0] # has a list in it, add it to the iter stack - if key in list_types: - renderer_list = multi_default_multi_get(value, ['contents'], ['items'], default=(), types=(list, tuple)) + if key in nested_renderer_list_dispatch: + renderer_list, continuation = nested_renderer_list_dispatch[key](value) if renderer_list: iter_stack.append(current_iter) current_iter = iter(renderer_list) - - continuation = default_multi_get(value, 'continuations', 0, 'nextContinuationData', 'continuation', default=None, types=str) if continuation: ctoken = continuation @@ -506,10 +629,7 @@ def extract_channel_info(polymer_json, tab): info['links'] = [] for link_json in channel_metadata.get('primaryLinks', ()): - url = link_json['navigationEndpoint']['urlEndpoint']['url'] - if url.startswith('/redirect'): # youtube puts these on external links to do tracking - query_string = url[url.find('?')+1: ] - url = urllib.parse.parse_qs(query_string)['q'][0] + url = remove_redirect(link_json['navigationEndpoint']['urlEndpoint']['url']) text = get_plain_text(link_json['title']) @@ -699,5 +819,290 @@ def parse_comments_polymer(polymer_json): 'sort': metadata['sort'], } +def check_missing_keys(object, *key_sequences): + for key_sequence in key_sequences: + _object = object + try: + for key in key_sequence: + _object = object[key] + except (KeyError, IndexError, TypeError): + return 'Could not find ' + key + + return None + +def extract_plain_text(node, default=None): + if isinstance(node, str): + return node + + try: + return node['simpleText'] + except (KeyError, TypeError): + pass + + try: + return ''.join(text_run['text'] for text_run in node['runs']) + except (KeyError, TypeError): + pass + + return default + +def extract_formatted_text(node): + try: + result = [] + runs = node['runs'] + for run in runs: + url = default_multi_get(run, 'navigationEndpoint', 'urlEndpoint', 'url') + if url is not None: + run['url'] = remove_redirect(url) + run['text'] = run['url'] # youtube truncates the url text, we don't want that nonsense + return runs + except (KeyError, TypeError): + traceback.print_exc() + pass + + try: + return [{'text': node['simpleText']}] + except (KeyError, TypeError): + pass + + return [] + +def extract_integer(string): + if not isinstance(string, str): + return None + match = re.search(r'(\d+)', string.replace(',', '')) + if match is None: + return None + try: + return int(match.group(1)) + except ValueError: + return None + +def extract_metadata_row_info(video_renderer_info): + # extract category and music list + info = { + 'category': None, + 'music_list': [], + } + + current_song = {} + for row in default_multi_get(video_renderer_info, 'metadataRowContainer', 'metadataRowContainerRenderer', 'rows', default=[]): + row_title = extract_plain_text(default_multi_get(row, 'metadataRowRenderer', 'title'), default='') + row_content = extract_plain_text(default_multi_get(row, 'metadataRowRenderer', 'contents', 0)) + if row_title == 'Category': + info['category'] = row_content + elif row_title in ('Song', 'Music'): + if current_song: + info['music_list'].append(current_song) + current_song = {'title': row_content} + elif row_title == 'Artist': + current_song['artist'] = row_content + elif row_title == 'Album': + current_song['album'] = row_content + elif row_title == 'Writers': + current_song['writers'] = row_content + elif row_title.startswith('Licensed'): + current_song['licensor'] = row_content + if current_song: + info['music_list'].append(current_song) + return info + +def extract_watch_info_mobile(top_level): + info = {} + microformat = default_multi_get(top_level, 'playerResponse', 'microformat', 'playerMicroformatRenderer', default={}) + + info['allowed_countries'] = microformat.get('availableCountries', []) + info['published_date'] = microformat.get('publishDate') + + response = top_level.get('response', {}) + + # video info from metadata renderers + items, _ = extract_items(response, item_types={'slimVideoMetadataRenderer'}) + if items: + video_info = items[0]['slimVideoMetadataRenderer'] + else: + print('Failed to extract video metadata') + video_info = {} + + info.update(extract_metadata_row_info(video_info)) + #info['description'] = extract_formatted_text(video_info.get('description')) + info['like_count'] = None + info['dislike_count'] = None + for button in video_info.get('buttons', ()): + button_renderer = button.get('slimMetadataToggleButtonRenderer', {}) + + # all the digits can be found in the accessibility data + count = extract_integer(default_multi_get(button_renderer, 'button', 'toggleButtonRenderer', 'defaultText', 'accessibility', 'accessibilityData', 'label')) + + # this count doesn't have all the digits, it's like 53K for instance + dumb_count = extract_integer(extract_plain_text(default_multi_get(button_renderer, 'button', 'toggleButtonRenderer', 'defaultText'))) + + # the accessibility text will be "No likes" or "No dislikes" or something like that, but dumb count will be 0 + if dumb_count == 0: + count = 0 + + if 'isLike' in button_renderer: + info['like_count'] = count + elif 'isDislike' in button_renderer: + info['dislike_count'] = count + + # comment section info + items, _ = extract_items(response, item_types={'commentSectionRenderer'}) + if items: + comment_info = items[0]['commentSectionRenderer'] + comment_count_text = extract_plain_text(default_multi_get(comment_info, 'header', 'commentSectionHeaderRenderer', 'countText')) + if comment_count_text == 'Comments': # just this with no number, means 0 comments + info['comment_count'] = 0 + else: + info['comment_count'] = extract_integer(comment_count_text) + info['comments_disabled'] = False + else: # no comment section present means comments are disabled + info['comment_count'] = 0 + info['comments_disabled'] = True + + # related videos + related, _ = extract_items(response) + info['related_videos'] = [renderer_info(renderer) for renderer in related] + + return info + +month_abbreviations = {'jan':'1', 'feb':'2', 'mar':'3', 'apr':'4', 'may':'5', 'jun':'6', 'jul':'7', 'aug':'8', 'sep':'9', 'oct':'10', 'nov':'11', 'dec':'12'} +def extract_watch_info_desktop(top_level): + info = { + 'comment_count': None, + 'comments_disabled': None, + 'allowed_countries': None, + } + + video_info = {} + for renderer in default_multi_get(top_level, 'response', 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', default=()): + if renderer and list(renderer.keys())[0] in ('videoPrimaryInfoRenderer', 'videoSecondaryInfoRenderer'): + video_info.update(list(renderer.values())[0]) + + info.update(extract_metadata_row_info(video_info)) + #info['description'] = extract_formatted_text(video_info.get('description', None)) + info['published_date'] = None + date_text = extract_plain_text(video_info.get('dateText', None)) + if date_text is not None: + date_text = util.left_remove(date_text.lower(), 'published on ').replace(',', '') + parts = date_text.split() + if len(parts) == 3: + month, day, year = date_text.split() + month = month_abbreviations.get(month[0:3]) # slicing in case they start writing out the full month name + if month and (re.fullmatch(r'\d\d?', day) is not None) and (re.fullmatch(r'\d{4}', year) is not None): + info['published_date'] = year + '-' + month + '-' + day + + likes_dislikes = default_multi_get(video_info, 'sentimentBar', 'sentimentBarRenderer', 'tooltip', default='').split('/') + if len(likes_dislikes) == 2: + info['like_count'] = extract_integer(likes_dislikes[0]) + info['dislike_count'] = extract_integer(likes_dislikes[1]) + else: + info['like_count'] = None + info['dislike_count'] = None + + #info['title'] = extract_plain_text(video_info.get('title', None)) + #info['author'] = extract_plain_text(default_multi_get(video_info, 'owner', 'videoOwnerRenderer', 'title')) + #info['author_id'] = default_multi_get(video_info, 'owner', 'videoOwnerRenderer', 'navigationEndpoint', 'browseEndpoint', 'browseId') + #info['view_count'] = extract_integer(extract_plain_text(default_multi_get(video_info, 'viewCount', 'videoViewCountRenderer', 'viewCount'))) + + related = default_multi_get(top_level, 'response', 'contents', 'twoColumnWatchNextResults', 'secondaryResults', 'secondaryResults', 'results', default=[]) + info['related_videos'] = [renderer_info(renderer) for renderer in related] + + return info + + +def extract_watch_info(polymer_json): + info = {'playability_error': None, 'error': None} + + if isinstance(polymer_json, dict): + top_level = polymer_json + elif isinstance(polymer_json, (list, tuple)): + top_level = {} + for page_part in polymer_json: + if not isinstance(page_part, dict): + return {'error': 'Invalid page part'} + top_level.update(page_part) + else: + return {'error': 'Invalid top level polymer data'} + + error = check_missing_keys(top_level, + ['playerResponse'], + ) + if error: + return {'error': error} + + error = check_missing_keys(top_level, + ['player', 'args'], + ['player', 'assets', 'js'], + ) + if error: + info['playability_error'] = error + + + player_args = default_multi_get(top_level, 'player', 'args', default={}) + parsed_formats = [] + + if 'url_encoded_fmt_stream_map' in player_args: + string_formats = player_args['url_encoded_fmt_stream_map'].split(',') + parsed_formats += [dict(urllib.parse.parse_qsl(fmt_string)) for fmt_string in string_formats if fmt_string] + + if 'adaptive_fmts' in player_args: + string_formats = player_args['adaptive_fmts'].split(',') + parsed_formats += [dict(urllib.parse.parse_qsl(fmt_string)) for fmt_string in string_formats if fmt_string] + + info['formats'] = [] + + for parsed_fmt in parsed_formats: + # start with defaults from the big table at the top + if 'itag' in parsed_fmt: + fmt = _formats.get(parsed_fmt['itag'], {}).copy() + else: + fmt = {} + + # then override them + fmt.update(parsed_fmt) + try: + fmt['width'], fmt['height'] = map(int, fmt['size'].split('x')) + except (KeyError, ValueError, TypeError): + pass + + fmt['file_size'] = None + if 'clen' in fmt: + fmt['file_size'] = int(fmt.get('clen')) + else: + match = re.search(r'&clen=(\d+)', fmt.get('url')) + if match: + fmt['file_size'] = int(match.group(1)) + info['formats'].append(fmt) + + info['base_js'] = default_multi_get(top_level, 'player', 'assets', 'js') + if info['base_js']: + info['base_js'] = normalize_url(info['base_js']) + + mobile = 'singleColumnWatchNextResults' in default_multi_get(top_level, 'response', 'contents', default={}) + if mobile: + info.update(extract_watch_info_mobile(top_level)) + else: + info.update(extract_watch_info_desktop(top_level)) + + # stuff from videoDetails + video_details = default_multi_get(top_level, 'playerResponse', 'videoDetails', default={}) + info['title'] = extract_plain_text(video_details.get('title')) + info['duration'] = extract_integer(video_details.get('lengthSeconds')) + info['view_count'] = extract_integer(video_details.get('viewCount')) + # videos with no description have a blank string + info['description'] = video_details.get('shortDescription') + info['id'] = video_details.get('videoId') + info['author'] = video_details.get('author') + info['author_id'] = video_details.get('channelId') + info['live'] = video_details.get('isLiveContent') + info['unlisted'] = not video_details.get('isCrawlable', True) + info['tags'] = video_details.get('keywords', []) + + # other stuff + info['author_url'] = 'https://www.youtube.com/channel/' + info['author_id'] if info['author_id'] else None + info['subtitles'] = {} # TODO + + return info diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py deleted file mode 100644 index 38ba43a..0000000 --- a/youtube_dl/YoutubeDL.py +++ /dev/null @@ -1,2392 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -from __future__ import absolute_import, unicode_literals - -import collections -import contextlib -import copy -import datetime -import errno -import fileinput -import io -import itertools -import json -import locale -import operator -import os -import platform -import re -import shutil -import subprocess -import socket -import sys -import time -import tokenize -import traceback -import random - -from string import ascii_letters - -from .compat import ( - compat_basestring, - compat_cookiejar, - compat_get_terminal_size, - compat_http_client, - compat_kwargs, - compat_numeric_types, - compat_os_name, - compat_str, - compat_tokenize_tokenize, - compat_urllib_error, - compat_urllib_request, - compat_urllib_request_DataHandler, -) -from .utils import ( - age_restricted, - args_to_str, - ContentTooShortError, - date_from_str, - DateRange, - DEFAULT_OUTTMPL, - determine_ext, - determine_protocol, - DownloadError, - encode_compat_str, - encodeFilename, - error_to_compat_str, - expand_path, - ExtractorError, - format_bytes, - formatSeconds, - GeoRestrictedError, - int_or_none, - ISO3166Utils, - locked_file, - make_HTTPS_handler, - MaxDownloadsReached, - orderedSet, - PagedList, - parse_filesize, - PerRequestProxyHandler, - platform_name, - PostProcessingError, - preferredencoding, - prepend_extension, - register_socks_protocols, - render_table, - replace_extension, - SameFileError, - sanitize_filename, - sanitize_path, - sanitize_url, - sanitized_Request, - std_headers, - subtitles_filename, - UnavailableVideoError, - url_basename, - version_tuple, - write_json_file, - write_string, - YoutubeDLCookieProcessor, - YoutubeDLHandler, -) -from .cache import Cache -from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER -from .extractor.openload import PhantomJSwrapper -from .downloader import get_suitable_downloader -from .downloader.rtmp import rtmpdump_version -from .postprocessor import ( - FFmpegFixupM3u8PP, - FFmpegFixupM4aPP, - FFmpegFixupStretchedPP, - FFmpegMergerPP, - FFmpegPostProcessor, - get_postprocessor, -) -from .version import __version__ - -if compat_os_name == 'nt': - import ctypes - - -class YoutubeDL(object): - """YoutubeDL class. - - YoutubeDL objects are the ones responsible of downloading the - actual video file and writing it to disk if the user has requested - it, among some other tasks. In most cases there should be one per - program. As, given a video URL, the downloader doesn't know how to - extract all the needed information, task that InfoExtractors do, it - has to pass the URL to one of them. - - For this, YoutubeDL objects have a method that allows - InfoExtractors to be registered in a given order. When it is passed - a URL, the YoutubeDL object handles it to the first InfoExtractor it - finds that reports being able to handle it. The InfoExtractor extracts - all the information about the video or videos the URL refers to, and - YoutubeDL process the extracted information, possibly using a File - Downloader to download the video. - - YoutubeDL objects accept a lot of parameters. In order not to saturate - the object constructor with arguments, it receives a dictionary of - options instead. These options are available through the params - attribute for the InfoExtractors to use. The YoutubeDL also - registers itself as the downloader in charge for the InfoExtractors - that are added to it, so this is a "mutual registration". - - Available options: - - username: Username for authentication purposes. - password: Password for authentication purposes. - videopassword: Password for accessing a video. - ap_mso: Adobe Pass multiple-system operator identifier. - ap_username: Multiple-system operator account username. - ap_password: Multiple-system operator account password. - usenetrc: Use netrc for authentication instead. - verbose: Print additional info to stdout. - quiet: Do not print messages to stdout. - no_warnings: Do not print out anything for warnings. - forceurl: Force printing final URL. - forcetitle: Force printing title. - forceid: Force printing ID. - forcethumbnail: Force printing thumbnail URL. - forcedescription: Force printing description. - forcefilename: Force printing final filename. - forceduration: Force printing duration. - forcejson: Force printing info_dict as JSON. - dump_single_json: Force printing the info_dict of the whole playlist - (or video) as a single JSON line. - simulate: Do not download the video files. - format: Video format code. See options.py for more information. - outtmpl: Template for output names. - restrictfilenames: Do not allow "&" and spaces in file names - ignoreerrors: Do not stop on download errors. - force_generic_extractor: Force downloader to use the generic extractor - nooverwrites: Prevent overwriting files. - playliststart: Playlist item to start at. - playlistend: Playlist item to end at. - playlist_items: Specific indices of playlist to download. - playlistreverse: Download playlist items in reverse order. - playlistrandom: Download playlist items in random order. - matchtitle: Download only matching titles. - rejecttitle: Reject downloads for matching titles. - logger: Log messages to a logging.Logger instance. - logtostderr: Log messages to stderr instead of stdout. - writedescription: Write the video description to a .description file - writeinfojson: Write the video description to a .info.json file - writeannotations: Write the video annotations to a .annotations.xml file - writethumbnail: Write the thumbnail image to a file - write_all_thumbnails: Write all thumbnail formats to files - writesubtitles: Write the video subtitles to a file - writeautomaticsub: Write the automatically generated subtitles to a file - allsubtitles: Downloads all the subtitles of the video - (requires writesubtitles or writeautomaticsub) - listsubtitles: Lists all available subtitles for the video - subtitlesformat: The format code for subtitles - subtitleslangs: List of languages of the subtitles to download - keepvideo: Keep the video file after post-processing - daterange: A DateRange object, download only if the upload_date is in the range. - skip_download: Skip the actual download of the video file - cachedir: Location of the cache files in the filesystem. - False to disable filesystem cache. - noplaylist: Download single video instead of a playlist if in doubt. - age_limit: An integer representing the user's age in years. - Unsuitable videos for the given age are skipped. - min_views: An integer representing the minimum view count the video - must have in order to not be skipped. - Videos without view count information are always - downloaded. None for no limit. - max_views: An integer representing the maximum view count. - Videos that are more popular than that are not - downloaded. - Videos without view count information are always - downloaded. None for no limit. - download_archive: File name of a file where all downloads are recorded. - Videos already present in the file are not downloaded - again. - cookiefile: File name where cookies should be read from and dumped to. - nocheckcertificate:Do not verify SSL certificates - prefer_insecure: Use HTTP instead of HTTPS to retrieve information. - At the moment, this is only supported by YouTube. - proxy: URL of the proxy server to use - geo_verification_proxy: URL of the proxy to use for IP address verification - on geo-restricted sites. - socket_timeout: Time to wait for unresponsive hosts, in seconds - bidi_workaround: Work around buggy terminals without bidirectional text - support, using fridibi - debug_printtraffic:Print out sent and received HTTP traffic - include_ads: Download ads as well - default_search: Prepend this string if an input url is not valid. - 'auto' for elaborate guessing - encoding: Use this encoding instead of the system-specified. - extract_flat: Do not resolve URLs, return the immediate result. - Pass in 'in_playlist' to only show this behavior for - playlist items. - postprocessors: A list of dictionaries, each with an entry - * key: The name of the postprocessor. See - youtube_dl/postprocessor/__init__.py for a list. - as well as any further keyword arguments for the - postprocessor. - progress_hooks: A list of functions that get called on download - progress, with a dictionary with the entries - * status: One of "downloading", "error", or "finished". - Check this first and ignore unknown values. - - If status is one of "downloading", or "finished", the - following properties may also be present: - * filename: The final filename (always present) - * tmpfilename: The filename we're currently writing to - * downloaded_bytes: Bytes on disk - * total_bytes: Size of the whole file, None if unknown - * total_bytes_estimate: Guess of the eventual file size, - None if unavailable. - * elapsed: The number of seconds since download started. - * eta: The estimated time in seconds, None if unknown - * speed: The download speed in bytes/second, None if - unknown - * fragment_index: The counter of the currently - downloaded video fragment. - * fragment_count: The number of fragments (= individual - files that will be merged) - - Progress hooks are guaranteed to be called at least once - (with status "finished") if the download is successful. - merge_output_format: Extension to use when merging formats. - fixup: Automatically correct known faults of the file. - One of: - - "never": do nothing - - "warn": only emit a warning - - "detect_or_warn": check whether we can do anything - about it, warn otherwise (default) - source_address: Client-side IP address to bind to. - call_home: Boolean, true iff we are allowed to contact the - youtube-dl servers for debugging. - sleep_interval: Number of seconds to sleep before each download when - used alone or a lower bound of a range for randomized - sleep before each download (minimum possible number - of seconds to sleep) when used along with - max_sleep_interval. - max_sleep_interval:Upper bound of a range for randomized sleep before each - download (maximum possible number of seconds to sleep). - Must only be used along with sleep_interval. - Actual sleep time will be a random float from range - [sleep_interval; max_sleep_interval]. - listformats: Print an overview of available video formats and exit. - list_thumbnails: Print a table of all thumbnails and exit. - match_filter: A function that gets called with the info_dict of - every video. - If it returns a message, the video is ignored. - If it returns None, the video is downloaded. - match_filter_func in utils.py is one example for this. - no_color: Do not emit color codes in output. - geo_bypass: Bypass geographic restriction via faking X-Forwarded-For - HTTP header - geo_bypass_country: - Two-letter ISO 3166-2 country code that will be used for - explicit geographic restriction bypassing via faking - X-Forwarded-For HTTP header - geo_bypass_ip_block: - IP range in CIDR notation that will be used similarly to - geo_bypass_country - - The following options determine which downloader is picked: - external_downloader: Executable of the external downloader to call. - None or unset for standard (built-in) downloader. - hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv - if True, otherwise use ffmpeg/avconv if False, otherwise - use downloader suggested by extractor if None. - - The following parameters are not used by YoutubeDL itself, they are used by - the downloader (see youtube_dl/downloader/common.py): - nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test, - noresizebuffer, retries, continuedl, noprogress, consoletitle, - xattr_set_filesize, external_downloader_args, hls_use_mpegts, - http_chunk_size. - - The following options are used by the post processors: - prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available, - otherwise prefer ffmpeg. - postprocessor_args: A list of additional command-line arguments for the - postprocessor. - - The following options are used by the Youtube extractor: - youtube_include_dash_manifest: If True (default), DASH manifests and related - data will be downloaded and processed by extractor. - You can reduce network I/O by disabling it if you don't - care about DASH. - """ - - _NUMERIC_FIELDS = set(( - 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx', - 'timestamp', 'upload_year', 'upload_month', 'upload_day', - 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count', - 'average_rating', 'comment_count', 'age_limit', - 'start_time', 'end_time', - 'chapter_number', 'season_number', 'episode_number', - 'track_number', 'disc_number', 'release_year', - 'playlist_index', - )) - - params = None - _ies = [] - _pps = [] - _download_retcode = None - _num_downloads = None - _screen_file = None - - def __init__(self, params=None, auto_init=True): - """Create a FileDownloader object with the given options.""" - if params is None: - params = {} - self._ies = [] - self._ies_instances = {} - self._pps = [] - self._progress_hooks = [] - self._download_retcode = 0 - self._num_downloads = 0 - self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] - self._err_file = sys.stderr - self.params = { - # Default parameters - 'nocheckcertificate': False, - } - self.params.update(params) - self.cache = Cache(self) - - def check_deprecated(param, option, suggestion): - if self.params.get(param) is not None: - self.report_warning( - '%s is deprecated. Use %s instead.' % (option, suggestion)) - return True - return False - - if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'): - if self.params.get('geo_verification_proxy') is None: - self.params['geo_verification_proxy'] = self.params['cn_verification_proxy'] - - check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits') - check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"') - check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"') - - if params.get('bidi_workaround', False): - try: - import pty - master, slave = pty.openpty() - width = compat_get_terminal_size().columns - if width is None: - width_args = [] - else: - width_args = ['-w', str(width)] - sp_kwargs = dict( - stdin=subprocess.PIPE, - stdout=slave, - stderr=self._err_file) - try: - self._output_process = subprocess.Popen( - ['bidiv'] + width_args, **sp_kwargs - ) - except OSError: - self._output_process = subprocess.Popen( - ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs) - self._output_channel = os.fdopen(master, 'rb') - except OSError as ose: - if ose.errno == errno.ENOENT: - self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.') - else: - raise - - if (sys.platform != 'win32' and - sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and - not params.get('restrictfilenames', False)): - # Unicode filesystem API will throw errors (#1474, #13027) - self.report_warning( - 'Assuming --restrict-filenames since file system encoding ' - 'cannot encode all characters. ' - 'Set the LC_ALL environment variable to fix this.') - self.params['restrictfilenames'] = True - - if isinstance(params.get('outtmpl'), bytes): - self.report_warning( - 'Parameter outtmpl is bytes, but should be a unicode string. ' - 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.') - - self._setup_opener() - - if auto_init: - self.print_debug_header() - self.add_default_info_extractors() - - for pp_def_raw in self.params.get('postprocessors', []): - pp_class = get_postprocessor(pp_def_raw['key']) - pp_def = dict(pp_def_raw) - del pp_def['key'] - pp = pp_class(self, **compat_kwargs(pp_def)) - self.add_post_processor(pp) - - for ph in self.params.get('progress_hooks', []): - self.add_progress_hook(ph) - - register_socks_protocols() - - def warn_if_short_id(self, argv): - # short YouTube ID starting with dash? - idxs = [ - i for i, a in enumerate(argv) - if re.match(r'^-[0-9A-Za-z_-]{10}$', a)] - if idxs: - correct_argv = ( - ['youtube-dl'] + - [a for i, a in enumerate(argv) if i not in idxs] + - ['--'] + [argv[i] for i in idxs] - ) - self.report_warning( - 'Long argument string detected. ' - 'Use -- to separate parameters and URLs, like this:\n%s\n' % - args_to_str(correct_argv)) - - def add_info_extractor(self, ie): - """Add an InfoExtractor object to the end of the list.""" - self._ies.append(ie) - if not isinstance(ie, type): - self._ies_instances[ie.ie_key()] = ie - ie.set_downloader(self) - - def get_info_extractor(self, ie_key): - """ - Get an instance of an IE with name ie_key, it will try to get one from - the _ies list, if there's no instance it will create a new one and add - it to the extractor list. - """ - ie = self._ies_instances.get(ie_key) - if ie is None: - ie = get_info_extractor(ie_key)() - self.add_info_extractor(ie) - return ie - - def add_default_info_extractors(self): - """ - Add the InfoExtractors returned by gen_extractors to the end of the list - """ - for ie in gen_extractor_classes(): - self.add_info_extractor(ie) - - def add_post_processor(self, pp): - """Add a PostProcessor object to the end of the chain.""" - self._pps.append(pp) - pp.set_downloader(self) - - def add_progress_hook(self, ph): - """Add the progress hook (currently only for the file downloader)""" - self._progress_hooks.append(ph) - - def _bidi_workaround(self, message): - if not hasattr(self, '_output_channel'): - return message - - assert hasattr(self, '_output_process') - assert isinstance(message, compat_str) - line_count = message.count('\n') + 1 - self._output_process.stdin.write((message + '\n').encode('utf-8')) - self._output_process.stdin.flush() - res = ''.join(self._output_channel.readline().decode('utf-8') - for _ in range(line_count)) - return res[:-len('\n')] - - def to_screen(self, message, skip_eol=False): - """Print message to stdout if not in quiet mode.""" - return self.to_stdout(message, skip_eol, check_quiet=True) - - def _write_string(self, s, out=None): - write_string(s, out=out, encoding=self.params.get('encoding')) - - def to_stdout(self, message, skip_eol=False, check_quiet=False): - """Print message to stdout if not in quiet mode.""" - if self.params.get('logger'): - self.params['logger'].debug(message) - elif not check_quiet or not self.params.get('quiet', False): - message = self._bidi_workaround(message) - terminator = ['\n', ''][skip_eol] - output = message + terminator - - self._write_string(output, self._screen_file) - - def to_stderr(self, message): - """Print message to stderr.""" - assert isinstance(message, compat_str) - if self.params.get('logger'): - self.params['logger'].error(message) - else: - message = self._bidi_workaround(message) - output = message + '\n' - self._write_string(output, self._err_file) - - def to_console_title(self, message): - if not self.params.get('consoletitle', False): - return - if compat_os_name == 'nt': - if ctypes.windll.kernel32.GetConsoleWindow(): - # c_wchar_p() might not be necessary if `message` is - # already of type unicode() - ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message)) - elif 'TERM' in os.environ: - self._write_string('\033]0;%s\007' % message, self._screen_file) - - def save_console_title(self): - if not self.params.get('consoletitle', False): - return - if self.params.get('simulate', False): - return - if compat_os_name != 'nt' and 'TERM' in os.environ: - # Save the title on stack - self._write_string('\033[22;0t', self._screen_file) - - def restore_console_title(self): - if not self.params.get('consoletitle', False): - return - if self.params.get('simulate', False): - return - if compat_os_name != 'nt' and 'TERM' in os.environ: - # Restore the title from stack - self._write_string('\033[23;0t', self._screen_file) - - def __enter__(self): - self.save_console_title() - return self - - def __exit__(self, *args): - self.restore_console_title() - - if self.params.get('cookiefile') is not None: - self.cookiejar.save() - - def trouble(self, message=None, tb=None): - """Determine action to take when a download problem appears. - - Depending on if the downloader has been configured to ignore - download errors or not, this method may throw an exception or - not when errors are found, after printing the message. - - tb, if given, is additional traceback information. - """ - if message is not None: - self.to_stderr(message) - if self.params.get('verbose'): - if tb is None: - if sys.exc_info()[0]: # if .trouble has been called from an except block - tb = '' - if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]: - tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info)) - tb += encode_compat_str(traceback.format_exc()) - else: - tb_data = traceback.format_list(traceback.extract_stack()) - tb = ''.join(tb_data) - self.to_stderr(tb) - if not self.params.get('ignoreerrors', False): - if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]: - exc_info = sys.exc_info()[1].exc_info - else: - exc_info = sys.exc_info() - raise DownloadError(message, exc_info) - self._download_retcode = 1 - - def report_warning(self, message): - ''' - Print the message to stderr, it will be prefixed with 'WARNING:' - If stderr is a tty file the 'WARNING:' will be colored - ''' - if self.params.get('logger') is not None: - self.params['logger'].warning(message) - else: - if self.params.get('no_warnings'): - return - if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt': - _msg_header = '\033[0;33mWARNING:\033[0m' - else: - _msg_header = 'WARNING:' - warning_message = '%s %s' % (_msg_header, message) - self.to_stderr(warning_message) - - def report_error(self, message, tb=None): - ''' - Do the same as trouble, but prefixes the message with 'ERROR:', colored - in red if stderr is a tty file. - ''' - if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt': - _msg_header = '\033[0;31mERROR:\033[0m' - else: - _msg_header = 'ERROR:' - error_message = '%s %s' % (_msg_header, message) - self.trouble(error_message, tb) - - def report_file_already_downloaded(self, file_name): - """Report file has already been fully downloaded.""" - try: - self.to_screen('[download] %s has already been downloaded' % file_name) - except UnicodeEncodeError: - self.to_screen('[download] The file has already been downloaded') - - def prepare_filename(self, info_dict): - """Generate the output filename.""" - try: - template_dict = dict(info_dict) - - template_dict['epoch'] = int(time.time()) - autonumber_size = self.params.get('autonumber_size') - if autonumber_size is None: - autonumber_size = 5 - template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads - if template_dict.get('resolution') is None: - if template_dict.get('width') and template_dict.get('height'): - template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height']) - elif template_dict.get('height'): - template_dict['resolution'] = '%sp' % template_dict['height'] - elif template_dict.get('width'): - template_dict['resolution'] = '%dx?' % template_dict['width'] - - sanitize = lambda k, v: sanitize_filename( - compat_str(v), - restricted=self.params.get('restrictfilenames'), - is_id=(k == 'id' or k.endswith('_id'))) - template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v)) - for k, v in template_dict.items() - if v is not None and not isinstance(v, (list, tuple, dict))) - template_dict = collections.defaultdict(lambda: 'NA', template_dict) - - outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL) - - # For fields playlist_index and autonumber convert all occurrences - # of %(field)s to %(field)0Nd for backward compatibility - field_size_compat_map = { - 'playlist_index': len(str(template_dict['n_entries'])), - 'autonumber': autonumber_size, - } - FIELD_SIZE_COMPAT_RE = r'(?autonumber|playlist_index)\)s' - mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl) - if mobj: - outtmpl = re.sub( - FIELD_SIZE_COMPAT_RE, - r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')], - outtmpl) - - # Missing numeric fields used together with integer presentation types - # in format specification will break the argument substitution since - # string 'NA' is returned for missing fields. We will patch output - # template for missing fields to meet string presentation type. - for numeric_field in self._NUMERIC_FIELDS: - if numeric_field not in template_dict: - # As of [1] format syntax is: - # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type - # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting - FORMAT_RE = r'''(?x) - (? max_views: - return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views) - if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')): - return 'Skipping "%s" because it is age restricted' % video_title - if self.in_download_archive(info_dict): - return '%s has already been recorded in archive' % video_title - - if not incomplete: - match_filter = self.params.get('match_filter') - if match_filter is not None: - ret = match_filter(info_dict) - if ret is not None: - return ret - - return None - - @staticmethod - def add_extra_info(info_dict, extra_info): - '''Set the keys from extra_info in info dict if they are missing''' - for key, value in extra_info.items(): - info_dict.setdefault(key, value) - - def extract_info(self, url, download=True, ie_key=None, extra_info={}, - process=True, force_generic_extractor=False): - ''' - Returns a list with a dictionary for each video we find. - If 'download', also downloads the videos. - extra_info is a dict containing the extra values to add to each result - ''' - - if not ie_key and force_generic_extractor: - ie_key = 'Generic' - - if ie_key: - ies = [self.get_info_extractor(ie_key)] - else: - ies = self._ies - - for ie in ies: - if not ie.suitable(url): - continue - - ie = self.get_info_extractor(ie.ie_key()) - if not ie.working(): - self.report_warning('The program functionality for this site has been marked as broken, ' - 'and will probably not work.') - - try: - ie_result = ie.extract(url) - if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here) - break - if isinstance(ie_result, list): - # Backwards compatibility: old IE result format - ie_result = { - '_type': 'compat_list', - 'entries': ie_result, - } - self.add_default_extra_info(ie_result, ie, url) - if process: - return self.process_ie_result(ie_result, download, extra_info) - else: - return ie_result - except GeoRestrictedError as e: - msg = e.msg - if e.countries: - msg += '\nThis video is available in %s.' % ', '.join( - map(ISO3166Utils.short2full, e.countries)) - msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.' - self.report_error(msg) - break - except ExtractorError as e: # An error we somewhat expected - self.report_error(compat_str(e), e.format_traceback()) - break - except MaxDownloadsReached: - raise - except Exception as e: - if self.params.get('ignoreerrors', False): - self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc())) - break - else: - raise - else: - self.report_error('no suitable InfoExtractor for URL %s' % url) - - def add_default_extra_info(self, ie_result, ie, url): - self.add_extra_info(ie_result, { - 'extractor': ie.IE_NAME, - 'webpage_url': url, - 'webpage_url_basename': url_basename(url), - 'extractor_key': ie.ie_key(), - }) - - def process_ie_result(self, ie_result, download=True, extra_info={}): - """ - Take the result of the ie(may be modified) and resolve all unresolved - references (URLs, playlist items). - - It will also download the videos if 'download'. - Returns the resolved ie_result. - """ - result_type = ie_result.get('_type', 'video') - - if result_type in ('url', 'url_transparent'): - ie_result['url'] = sanitize_url(ie_result['url']) - extract_flat = self.params.get('extract_flat', False) - if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or - extract_flat is True): - if self.params.get('forcejson', False): - self.to_stdout(json.dumps(ie_result)) - return ie_result - - if result_type == 'video': - self.add_extra_info(ie_result, extra_info) - return self.process_video_result(ie_result, download=download) - elif result_type == 'url': - # We have to add extra_info to the results because it may be - # contained in a playlist - return self.extract_info(ie_result['url'], - download, - ie_key=ie_result.get('ie_key'), - extra_info=extra_info) - elif result_type == 'url_transparent': - # Use the information from the embedding page - info = self.extract_info( - ie_result['url'], ie_key=ie_result.get('ie_key'), - extra_info=extra_info, download=False, process=False) - - # extract_info may return None when ignoreerrors is enabled and - # extraction failed with an error, don't crash and return early - # in this case - if not info: - return info - - force_properties = dict( - (k, v) for k, v in ie_result.items() if v is not None) - for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'): - if f in force_properties: - del force_properties[f] - new_result = info.copy() - new_result.update(force_properties) - - # Extracted info may not be a video result (i.e. - # info.get('_type', 'video') != video) but rather an url or - # url_transparent. In such cases outer metadata (from ie_result) - # should be propagated to inner one (info). For this to happen - # _type of info should be overridden with url_transparent. This - # fixes issue from https://github.com/rg3/youtube-dl/pull/11163. - if new_result.get('_type') == 'url': - new_result['_type'] = 'url_transparent' - - return self.process_ie_result( - new_result, download=download, extra_info=extra_info) - elif result_type in ('playlist', 'multi_video'): - # We process each entry in the playlist - playlist = ie_result.get('title') or ie_result.get('id') - self.to_screen('[download] Downloading playlist: %s' % playlist) - - playlist_results = [] - - playliststart = self.params.get('playliststart', 1) - 1 - playlistend = self.params.get('playlistend') - # For backwards compatibility, interpret -1 as whole list - if playlistend == -1: - playlistend = None - - playlistitems_str = self.params.get('playlist_items') - playlistitems = None - if playlistitems_str is not None: - def iter_playlistitems(format): - for string_segment in format.split(','): - if '-' in string_segment: - start, end = string_segment.split('-') - for item in range(int(start), int(end) + 1): - yield int(item) - else: - yield int(string_segment) - playlistitems = orderedSet(iter_playlistitems(playlistitems_str)) - - ie_entries = ie_result['entries'] - - def make_playlistitems_entries(list_ie_entries): - num_entries = len(list_ie_entries) - return [ - list_ie_entries[i - 1] for i in playlistitems - if -num_entries <= i - 1 < num_entries] - - def report_download(num_entries): - self.to_screen( - '[%s] playlist %s: Downloading %d videos' % - (ie_result['extractor'], playlist, num_entries)) - - if isinstance(ie_entries, list): - n_all_entries = len(ie_entries) - if playlistitems: - entries = make_playlistitems_entries(ie_entries) - else: - entries = ie_entries[playliststart:playlistend] - n_entries = len(entries) - self.to_screen( - '[%s] playlist %s: Collected %d video ids (downloading %d of them)' % - (ie_result['extractor'], playlist, n_all_entries, n_entries)) - elif isinstance(ie_entries, PagedList): - if playlistitems: - entries = [] - for item in playlistitems: - entries.extend(ie_entries.getslice( - item - 1, item - )) - else: - entries = ie_entries.getslice( - playliststart, playlistend) - n_entries = len(entries) - report_download(n_entries) - else: # iterable - if playlistitems: - entries = make_playlistitems_entries(list(itertools.islice( - ie_entries, 0, max(playlistitems)))) - else: - entries = list(itertools.islice( - ie_entries, playliststart, playlistend)) - n_entries = len(entries) - report_download(n_entries) - - if self.params.get('playlistreverse', False): - entries = entries[::-1] - - if self.params.get('playlistrandom', False): - random.shuffle(entries) - - x_forwarded_for = ie_result.get('__x_forwarded_for_ip') - - for i, entry in enumerate(entries, 1): - self.to_screen('[download] Downloading video %s of %s' % (i, n_entries)) - # This __x_forwarded_for_ip thing is a bit ugly but requires - # minimal changes - if x_forwarded_for: - entry['__x_forwarded_for_ip'] = x_forwarded_for - extra = { - 'n_entries': n_entries, - 'playlist': playlist, - 'playlist_id': ie_result.get('id'), - 'playlist_title': ie_result.get('title'), - 'playlist_uploader': ie_result.get('uploader'), - 'playlist_uploader_id': ie_result.get('uploader_id'), - 'playlist_index': i + playliststart, - 'extractor': ie_result['extractor'], - 'webpage_url': ie_result['webpage_url'], - 'webpage_url_basename': url_basename(ie_result['webpage_url']), - 'extractor_key': ie_result['extractor_key'], - } - - reason = self._match_entry(entry, incomplete=True) - if reason is not None: - self.to_screen('[download] ' + reason) - continue - - entry_result = self.process_ie_result(entry, - download=download, - extra_info=extra) - playlist_results.append(entry_result) - ie_result['entries'] = playlist_results - self.to_screen('[download] Finished downloading playlist: %s' % playlist) - return ie_result - elif result_type == 'compat_list': - self.report_warning( - 'Extractor %s returned a compat_list result. ' - 'It needs to be updated.' % ie_result.get('extractor')) - - def _fixup(r): - self.add_extra_info( - r, - { - 'extractor': ie_result['extractor'], - 'webpage_url': ie_result['webpage_url'], - 'webpage_url_basename': url_basename(ie_result['webpage_url']), - 'extractor_key': ie_result['extractor_key'], - } - ) - return r - ie_result['entries'] = [ - self.process_ie_result(_fixup(r), download, extra_info) - for r in ie_result['entries'] - ] - return ie_result - else: - raise Exception('Invalid result type: %s' % result_type) - - def _build_format_filter(self, filter_spec): - " Returns a function to filter the formats according to the filter_spec " - - OPERATORS = { - '<': operator.lt, - '<=': operator.le, - '>': operator.gt, - '>=': operator.ge, - '=': operator.eq, - '!=': operator.ne, - } - operator_rex = re.compile(r'''(?x)\s* - (?Pwidth|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps) - \s*(?P%s)(?P\s*\?)?\s* - (?P[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?) - $ - ''' % '|'.join(map(re.escape, OPERATORS.keys()))) - m = operator_rex.search(filter_spec) - if m: - try: - comparison_value = int(m.group('value')) - except ValueError: - comparison_value = parse_filesize(m.group('value')) - if comparison_value is None: - comparison_value = parse_filesize(m.group('value') + 'B') - if comparison_value is None: - raise ValueError( - 'Invalid value %r in format specification %r' % ( - m.group('value'), filter_spec)) - op = OPERATORS[m.group('op')] - - if not m: - STR_OPERATORS = { - '=': operator.eq, - '!=': operator.ne, - '^=': lambda attr, value: attr.startswith(value), - '$=': lambda attr, value: attr.endswith(value), - '*=': lambda attr, value: value in attr, - } - str_operator_rex = re.compile(r'''(?x) - \s*(?Pext|acodec|vcodec|container|protocol|format_id) - \s*(?P%s)(?P\s*\?)? - \s*(?P[a-zA-Z0-9._-]+) - \s*$ - ''' % '|'.join(map(re.escape, STR_OPERATORS.keys()))) - m = str_operator_rex.search(filter_spec) - if m: - comparison_value = m.group('value') - op = STR_OPERATORS[m.group('op')] - - if not m: - raise ValueError('Invalid filter specification %r' % filter_spec) - - def _filter(f): - actual_value = f.get(m.group('key')) - if actual_value is None: - return m.group('none_inclusive') - return op(actual_value, comparison_value) - return _filter - - def _default_format_spec(self, info_dict, download=True): - - def can_merge(): - merger = FFmpegMergerPP(self) - return merger.available and merger.can_merge() - - def prefer_best(): - if self.params.get('simulate', False): - return False - if not download: - return False - if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-': - return True - if info_dict.get('is_live'): - return True - if not can_merge(): - return True - return False - - req_format_list = ['bestvideo+bestaudio', 'best'] - if prefer_best(): - req_format_list.reverse() - return '/'.join(req_format_list) - - def build_format_selector(self, format_spec): - def syntax_error(note, start): - message = ( - 'Invalid format specification: ' - '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1])) - return SyntaxError(message) - - PICKFIRST = 'PICKFIRST' - MERGE = 'MERGE' - SINGLE = 'SINGLE' - GROUP = 'GROUP' - FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters']) - - def _parse_filter(tokens): - filter_parts = [] - for type, string, start, _, _ in tokens: - if type == tokenize.OP and string == ']': - return ''.join(filter_parts) - else: - filter_parts.append(string) - - def _remove_unused_ops(tokens): - # Remove operators that we don't use and join them with the surrounding strings - # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9' - ALLOWED_OPS = ('/', '+', ',', '(', ')') - last_string, last_start, last_end, last_line = None, None, None, None - for type, string, start, end, line in tokens: - if type == tokenize.OP and string == '[': - if last_string: - yield tokenize.NAME, last_string, last_start, last_end, last_line - last_string = None - yield type, string, start, end, line - # everything inside brackets will be handled by _parse_filter - for type, string, start, end, line in tokens: - yield type, string, start, end, line - if type == tokenize.OP and string == ']': - break - elif type == tokenize.OP and string in ALLOWED_OPS: - if last_string: - yield tokenize.NAME, last_string, last_start, last_end, last_line - last_string = None - yield type, string, start, end, line - elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]: - if not last_string: - last_string = string - last_start = start - last_end = end - else: - last_string += string - if last_string: - yield tokenize.NAME, last_string, last_start, last_end, last_line - - def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False): - selectors = [] - current_selector = None - for type, string, start, _, _ in tokens: - # ENCODING is only defined in python 3.x - if type == getattr(tokenize, 'ENCODING', None): - continue - elif type in [tokenize.NAME, tokenize.NUMBER]: - current_selector = FormatSelector(SINGLE, string, []) - elif type == tokenize.OP: - if string == ')': - if not inside_group: - # ')' will be handled by the parentheses group - tokens.restore_last_token() - break - elif inside_merge and string in ['/', ',']: - tokens.restore_last_token() - break - elif inside_choice and string == ',': - tokens.restore_last_token() - break - elif string == ',': - if not current_selector: - raise syntax_error('"," must follow a format selector', start) - selectors.append(current_selector) - current_selector = None - elif string == '/': - if not current_selector: - raise syntax_error('"/" must follow a format selector', start) - first_choice = current_selector - second_choice = _parse_format_selection(tokens, inside_choice=True) - current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), []) - elif string == '[': - if not current_selector: - current_selector = FormatSelector(SINGLE, 'best', []) - format_filter = _parse_filter(tokens) - current_selector.filters.append(format_filter) - elif string == '(': - if current_selector: - raise syntax_error('Unexpected "("', start) - group = _parse_format_selection(tokens, inside_group=True) - current_selector = FormatSelector(GROUP, group, []) - elif string == '+': - video_selector = current_selector - audio_selector = _parse_format_selection(tokens, inside_merge=True) - if not video_selector or not audio_selector: - raise syntax_error('"+" must be between two format selectors', start) - current_selector = FormatSelector(MERGE, (video_selector, audio_selector), []) - else: - raise syntax_error('Operator not recognized: "{0}"'.format(string), start) - elif type == tokenize.ENDMARKER: - break - if current_selector: - selectors.append(current_selector) - return selectors - - def _build_selector_function(selector): - if isinstance(selector, list): - fs = [_build_selector_function(s) for s in selector] - - def selector_function(ctx): - for f in fs: - for format in f(ctx): - yield format - return selector_function - elif selector.type == GROUP: - selector_function = _build_selector_function(selector.selector) - elif selector.type == PICKFIRST: - fs = [_build_selector_function(s) for s in selector.selector] - - def selector_function(ctx): - for f in fs: - picked_formats = list(f(ctx)) - if picked_formats: - return picked_formats - return [] - elif selector.type == SINGLE: - format_spec = selector.selector - - def selector_function(ctx): - formats = list(ctx['formats']) - if not formats: - return - if format_spec == 'all': - for f in formats: - yield f - elif format_spec in ['best', 'worst', None]: - format_idx = 0 if format_spec == 'worst' else -1 - audiovideo_formats = [ - f for f in formats - if f.get('vcodec') != 'none' and f.get('acodec') != 'none'] - if audiovideo_formats: - yield audiovideo_formats[format_idx] - # for extractors with incomplete formats (audio only (soundcloud) - # or video only (imgur)) we will fallback to best/worst - # {video,audio}-only format - elif ctx['incomplete_formats']: - yield formats[format_idx] - elif format_spec == 'bestaudio': - audio_formats = [ - f for f in formats - if f.get('vcodec') == 'none'] - if audio_formats: - yield audio_formats[-1] - elif format_spec == 'worstaudio': - audio_formats = [ - f for f in formats - if f.get('vcodec') == 'none'] - if audio_formats: - yield audio_formats[0] - elif format_spec == 'bestvideo': - video_formats = [ - f for f in formats - if f.get('acodec') == 'none'] - if video_formats: - yield video_formats[-1] - elif format_spec == 'worstvideo': - video_formats = [ - f for f in formats - if f.get('acodec') == 'none'] - if video_formats: - yield video_formats[0] - else: - extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] - if format_spec in extensions: - filter_f = lambda f: f['ext'] == format_spec - else: - filter_f = lambda f: f['format_id'] == format_spec - matches = list(filter(filter_f, formats)) - if matches: - yield matches[-1] - elif selector.type == MERGE: - def _merge(formats_info): - format_1, format_2 = [f['format_id'] for f in formats_info] - # The first format must contain the video and the - # second the audio - if formats_info[0].get('vcodec') == 'none': - self.report_error('The first format must ' - 'contain the video, try using ' - '"-f %s+%s"' % (format_2, format_1)) - return - # Formats must be opposite (video+audio) - if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none': - self.report_error( - 'Both formats %s and %s are video-only, you must specify "-f video+audio"' - % (format_1, format_2)) - return - output_ext = ( - formats_info[0]['ext'] - if self.params.get('merge_output_format') is None - else self.params['merge_output_format']) - return { - 'requested_formats': formats_info, - 'format': '%s+%s' % (formats_info[0].get('format'), - formats_info[1].get('format')), - 'format_id': '%s+%s' % (formats_info[0].get('format_id'), - formats_info[1].get('format_id')), - 'width': formats_info[0].get('width'), - 'height': formats_info[0].get('height'), - 'resolution': formats_info[0].get('resolution'), - 'fps': formats_info[0].get('fps'), - 'vcodec': formats_info[0].get('vcodec'), - 'vbr': formats_info[0].get('vbr'), - 'stretched_ratio': formats_info[0].get('stretched_ratio'), - 'acodec': formats_info[1].get('acodec'), - 'abr': formats_info[1].get('abr'), - 'ext': output_ext, - } - video_selector, audio_selector = map(_build_selector_function, selector.selector) - - def selector_function(ctx): - for pair in itertools.product( - video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))): - yield _merge(pair) - - filters = [self._build_format_filter(f) for f in selector.filters] - - def final_selector(ctx): - ctx_copy = copy.deepcopy(ctx) - for _filter in filters: - ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats'])) - return selector_function(ctx_copy) - return final_selector - - stream = io.BytesIO(format_spec.encode('utf-8')) - try: - tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline))) - except tokenize.TokenError: - raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec))) - - class TokenIterator(object): - def __init__(self, tokens): - self.tokens = tokens - self.counter = 0 - - def __iter__(self): - return self - - def __next__(self): - if self.counter >= len(self.tokens): - raise StopIteration() - value = self.tokens[self.counter] - self.counter += 1 - return value - - next = __next__ - - def restore_last_token(self): - self.counter -= 1 - - parsed_selector = _parse_format_selection(iter(TokenIterator(tokens))) - return _build_selector_function(parsed_selector) - - def _calc_headers(self, info_dict): - res = std_headers.copy() - - add_headers = info_dict.get('http_headers') - if add_headers: - res.update(add_headers) - - cookies = self._calc_cookies(info_dict) - if cookies: - res['Cookie'] = cookies - - if 'X-Forwarded-For' not in res: - x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip') - if x_forwarded_for_ip: - res['X-Forwarded-For'] = x_forwarded_for_ip - - return res - - def _calc_cookies(self, info_dict): - pr = sanitized_Request(info_dict['url']) - self.cookiejar.add_cookie_header(pr) - return pr.get_header('Cookie') - - def process_video_result(self, info_dict, download=True): - assert info_dict.get('_type', 'video') == 'video' - - if 'id' not in info_dict: - raise ExtractorError('Missing "id" field in extractor result') - if 'title' not in info_dict: - raise ExtractorError('Missing "title" field in extractor result') - - def report_force_conversion(field, field_not, conversion): - self.report_warning( - '"%s" field is not %s - forcing %s conversion, there is an error in extractor' - % (field, field_not, conversion)) - - def sanitize_string_field(info, string_field): - field = info.get(string_field) - if field is None or isinstance(field, compat_str): - return - report_force_conversion(string_field, 'a string', 'string') - info[string_field] = compat_str(field) - - def sanitize_numeric_fields(info): - for numeric_field in self._NUMERIC_FIELDS: - field = info.get(numeric_field) - if field is None or isinstance(field, compat_numeric_types): - continue - report_force_conversion(numeric_field, 'numeric', 'int') - info[numeric_field] = int_or_none(field) - - sanitize_string_field(info_dict, 'id') - sanitize_numeric_fields(info_dict) - - if 'playlist' not in info_dict: - # It isn't part of a playlist - info_dict['playlist'] = None - info_dict['playlist_index'] = None - - thumbnails = info_dict.get('thumbnails') - if thumbnails is None: - thumbnail = info_dict.get('thumbnail') - if thumbnail: - info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}] - if thumbnails: - thumbnails.sort(key=lambda t: ( - t.get('preference') if t.get('preference') is not None else -1, - t.get('width') if t.get('width') is not None else -1, - t.get('height') if t.get('height') is not None else -1, - t.get('id') if t.get('id') is not None else '', t.get('url'))) - for i, t in enumerate(thumbnails): - t['url'] = sanitize_url(t['url']) - if t.get('width') and t.get('height'): - t['resolution'] = '%dx%d' % (t['width'], t['height']) - if t.get('id') is None: - t['id'] = '%d' % i - - if self.params.get('list_thumbnails'): - self.list_thumbnails(info_dict) - return - - thumbnail = info_dict.get('thumbnail') - if thumbnail: - info_dict['thumbnail'] = sanitize_url(thumbnail) - elif thumbnails: - info_dict['thumbnail'] = thumbnails[-1]['url'] - - if 'display_id' not in info_dict and 'id' in info_dict: - info_dict['display_id'] = info_dict['id'] - - if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None: - # Working around out-of-range timestamp values (e.g. negative ones on Windows, - # see http://bugs.python.org/issue1646728) - try: - upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp']) - info_dict['upload_date'] = upload_date.strftime('%Y%m%d') - except (ValueError, OverflowError, OSError): - pass - - # Auto generate title fields corresponding to the *_number fields when missing - # in order to always have clean titles. This is very common for TV series. - for field in ('chapter', 'season', 'episode'): - if info_dict.get('%s_number' % field) is not None and not info_dict.get(field): - info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field]) - - for cc_kind in ('subtitles', 'automatic_captions'): - cc = info_dict.get(cc_kind) - if cc: - for _, subtitle in cc.items(): - for subtitle_format in subtitle: - if subtitle_format.get('url'): - subtitle_format['url'] = sanitize_url(subtitle_format['url']) - if subtitle_format.get('ext') is None: - subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower() - - automatic_captions = info_dict.get('automatic_captions') - subtitles = info_dict.get('subtitles') - - if self.params.get('listsubtitles', False): - if 'automatic_captions' in info_dict: - self.list_subtitles( - info_dict['id'], automatic_captions, 'automatic captions') - self.list_subtitles(info_dict['id'], subtitles, 'subtitles') - return - - info_dict['requested_subtitles'] = self.process_subtitles( - info_dict['id'], subtitles, automatic_captions) - - # We now pick which formats have to be downloaded - if info_dict.get('formats') is None: - # There's only one format available - formats = [info_dict] - else: - formats = info_dict['formats'] - - if not formats: - raise ExtractorError('No video formats found!') - - def is_wellformed(f): - url = f.get('url') - if not url: - self.report_warning( - '"url" field is missing or empty - skipping format, ' - 'there is an error in extractor') - return False - if isinstance(url, bytes): - sanitize_string_field(f, 'url') - return True - - # Filter out malformed formats for better extraction robustness - formats = list(filter(is_wellformed, formats)) - - formats_dict = {} - - # We check that all the formats have the format and format_id fields - for i, format in enumerate(formats): - sanitize_string_field(format, 'format_id') - sanitize_numeric_fields(format) - format['url'] = sanitize_url(format['url']) - if not format.get('format_id'): - format['format_id'] = compat_str(i) - else: - # Sanitize format_id from characters used in format selector expression - format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id']) - format_id = format['format_id'] - if format_id not in formats_dict: - formats_dict[format_id] = [] - formats_dict[format_id].append(format) - - # Make sure all formats have unique format_id - for format_id, ambiguous_formats in formats_dict.items(): - if len(ambiguous_formats) > 1: - for i, format in enumerate(ambiguous_formats): - format['format_id'] = '%s-%d' % (format_id, i) - - for i, format in enumerate(formats): - if format.get('format') is None: - format['format'] = '{id} - {res}{note}'.format( - id=format['format_id'], - res=self.format_resolution(format), - note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '', - ) - # Automatically determine file extension if missing - if format.get('ext') is None: - format['ext'] = determine_ext(format['url']).lower() - # Automatically determine protocol if missing (useful for format - # selection purposes) - if format.get('protocol') is None: - format['protocol'] = determine_protocol(format) - # Add HTTP headers, so that external programs can use them from the - # json output - full_format_info = info_dict.copy() - full_format_info.update(format) - format['http_headers'] = self._calc_headers(full_format_info) - # Remove private housekeeping stuff - if '__x_forwarded_for_ip' in info_dict: - del info_dict['__x_forwarded_for_ip'] - - # TODO Central sorting goes here - - if formats[0] is not info_dict: - # only set the 'formats' fields if the original info_dict list them - # otherwise we end up with a circular reference, the first (and unique) - # element in the 'formats' field in info_dict is info_dict itself, - # which can't be exported to json - info_dict['formats'] = formats - if self.params.get('listformats'): - self.list_formats(info_dict) - return - - req_format = self.params.get('format') - if req_format is None: - req_format = self._default_format_spec(info_dict, download=download) - if self.params.get('verbose'): - self.to_stdout('[debug] Default format spec: %s' % req_format) - - format_selector = self.build_format_selector(req_format) - - # While in format selection we may need to have an access to the original - # format set in order to calculate some metrics or do some processing. - # For now we need to be able to guess whether original formats provided - # by extractor are incomplete or not (i.e. whether extractor provides only - # video-only or audio-only formats) for proper formats selection for - # extractors with such incomplete formats (see - # https://github.com/rg3/youtube-dl/pull/5556). - # Since formats may be filtered during format selection and may not match - # the original formats the results may be incorrect. Thus original formats - # or pre-calculated metrics should be passed to format selection routines - # as well. - # We will pass a context object containing all necessary additional data - # instead of just formats. - # This fixes incorrect format selection issue (see - # https://github.com/rg3/youtube-dl/issues/10083). - incomplete_formats = ( - # All formats are video-only or - all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) or - # all formats are audio-only - all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)) - - ctx = { - 'formats': formats, - 'incomplete_formats': incomplete_formats, - } - - formats_to_download = list(format_selector(ctx)) - if not formats_to_download: - raise ExtractorError('requested format not available', - expected=True) - - if download: - if len(formats_to_download) > 1: - self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download))) - for format in formats_to_download: - new_info = dict(info_dict) - new_info.update(format) - self.process_info(new_info) - # We update the info dict with the best quality format (backwards compatibility) - info_dict.update(formats_to_download[-1]) - return info_dict - - def process_subtitles(self, video_id, normal_subtitles, automatic_captions): - """Select the requested subtitles and their format""" - available_subs = {} - if normal_subtitles and self.params.get('writesubtitles'): - available_subs.update(normal_subtitles) - if automatic_captions and self.params.get('writeautomaticsub'): - for lang, cap_info in automatic_captions.items(): - if lang not in available_subs: - available_subs[lang] = cap_info - - if (not self.params.get('writesubtitles') and not - self.params.get('writeautomaticsub') or not - available_subs): - return None - - if self.params.get('allsubtitles', False): - requested_langs = available_subs.keys() - else: - if self.params.get('subtitleslangs', False): - requested_langs = self.params.get('subtitleslangs') - elif 'en' in available_subs: - requested_langs = ['en'] - else: - requested_langs = [list(available_subs.keys())[0]] - - formats_query = self.params.get('subtitlesformat', 'best') - formats_preference = formats_query.split('/') if formats_query else [] - subs = {} - for lang in requested_langs: - formats = available_subs.get(lang) - if formats is None: - self.report_warning('%s subtitles not available for %s' % (lang, video_id)) - continue - for ext in formats_preference: - if ext == 'best': - f = formats[-1] - break - matches = list(filter(lambda f: f['ext'] == ext, formats)) - if matches: - f = matches[-1] - break - else: - f = formats[-1] - self.report_warning( - 'No subtitle format found matching "%s" for language %s, ' - 'using %s' % (formats_query, lang, f['ext'])) - subs[lang] = f - return subs - - def process_info(self, info_dict): - """Process a single resolved IE result.""" - - assert info_dict.get('_type', 'video') == 'video' - - max_downloads = self.params.get('max_downloads') - if max_downloads is not None: - if self._num_downloads >= int(max_downloads): - raise MaxDownloadsReached() - - info_dict['fulltitle'] = info_dict['title'] - if len(info_dict['title']) > 200: - info_dict['title'] = info_dict['title'][:197] + '...' - - if 'format' not in info_dict: - info_dict['format'] = info_dict['ext'] - - reason = self._match_entry(info_dict, incomplete=False) - if reason is not None: - self.to_screen('[download] ' + reason) - return - - self._num_downloads += 1 - - info_dict['_filename'] = filename = self.prepare_filename(info_dict) - - # Forced printings - if self.params.get('forcetitle', False): - self.to_stdout(info_dict['fulltitle']) - if self.params.get('forceid', False): - self.to_stdout(info_dict['id']) - if self.params.get('forceurl', False): - if info_dict.get('requested_formats') is not None: - for f in info_dict['requested_formats']: - self.to_stdout(f['url'] + f.get('play_path', '')) - else: - # For RTMP URLs, also include the playpath - self.to_stdout(info_dict['url'] + info_dict.get('play_path', '')) - if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None: - self.to_stdout(info_dict['thumbnail']) - if self.params.get('forcedescription', False) and info_dict.get('description') is not None: - self.to_stdout(info_dict['description']) - if self.params.get('forcefilename', False) and filename is not None: - self.to_stdout(filename) - if self.params.get('forceduration', False) and info_dict.get('duration') is not None: - self.to_stdout(formatSeconds(info_dict['duration'])) - if self.params.get('forceformat', False): - self.to_stdout(info_dict['format']) - if self.params.get('forcejson', False): - self.to_stdout(json.dumps(info_dict)) - - # Do nothing else if in simulate mode - if self.params.get('simulate', False): - return - - if filename is None: - return - - def ensure_dir_exists(path): - try: - dn = os.path.dirname(path) - if dn and not os.path.exists(dn): - os.makedirs(dn) - return True - except (OSError, IOError) as err: - self.report_error('unable to create directory ' + error_to_compat_str(err)) - return False - - if not ensure_dir_exists(sanitize_path(encodeFilename(filename))): - return - - if self.params.get('writedescription', False): - descfn = replace_extension(filename, 'description', info_dict.get('ext')) - if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)): - self.to_screen('[info] Video description is already present') - elif info_dict.get('description') is None: - self.report_warning('There\'s no description to write.') - else: - try: - self.to_screen('[info] Writing video description to: ' + descfn) - with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile: - descfile.write(info_dict['description']) - except (OSError, IOError): - self.report_error('Cannot write description file ' + descfn) - return - - if self.params.get('writeannotations', False): - annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext')) - if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)): - self.to_screen('[info] Video annotations are already present') - else: - try: - self.to_screen('[info] Writing video annotations to: ' + annofn) - with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile: - annofile.write(info_dict['annotations']) - except (KeyError, TypeError): - self.report_warning('There are no annotations to write.') - except (OSError, IOError): - self.report_error('Cannot write annotations file: ' + annofn) - return - - subtitles_are_requested = any([self.params.get('writesubtitles', False), - self.params.get('writeautomaticsub')]) - - if subtitles_are_requested and info_dict.get('requested_subtitles'): - # subtitles download errors are already managed as troubles in relevant IE - # that way it will silently go on when used with unsupporting IE - subtitles = info_dict['requested_subtitles'] - ie = self.get_info_extractor(info_dict['extractor_key']) - for sub_lang, sub_info in subtitles.items(): - sub_format = sub_info['ext'] - sub_filename = subtitles_filename(filename, sub_lang, sub_format) - if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)): - self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format)) - else: - self.to_screen('[info] Writing video subtitles to: ' + sub_filename) - if sub_info.get('data') is not None: - try: - # Use newline='' to prevent conversion of newline characters - # See https://github.com/rg3/youtube-dl/issues/10268 - with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile: - subfile.write(sub_info['data']) - except (OSError, IOError): - self.report_error('Cannot write subtitles file ' + sub_filename) - return - else: - try: - sub_data = ie._request_webpage( - sub_info['url'], info_dict['id'], note=False).read() - with io.open(encodeFilename(sub_filename), 'wb') as subfile: - subfile.write(sub_data) - except (ExtractorError, IOError, OSError, ValueError) as err: - self.report_warning('Unable to download subtitle for "%s": %s' % - (sub_lang, error_to_compat_str(err))) - continue - - if self.params.get('writeinfojson', False): - infofn = replace_extension(filename, 'info.json', info_dict.get('ext')) - if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)): - self.to_screen('[info] Video description metadata is already present') - else: - self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn) - try: - write_json_file(self.filter_requested_info(info_dict), infofn) - except (OSError, IOError): - self.report_error('Cannot write metadata to JSON file ' + infofn) - return - - self._write_thumbnails(info_dict, filename) - - if not self.params.get('skip_download', False): - try: - def dl(name, info): - fd = get_suitable_downloader(info, self.params)(self, self.params) - for ph in self._progress_hooks: - fd.add_progress_hook(ph) - if self.params.get('verbose'): - self.to_stdout('[debug] Invoking downloader on %r' % info.get('url')) - return fd.download(name, info) - - if info_dict.get('requested_formats') is not None: - downloaded = [] - success = True - merger = FFmpegMergerPP(self) - if not merger.available: - postprocessors = [] - self.report_warning('You have requested multiple ' - 'formats but ffmpeg or avconv are not installed.' - ' The formats won\'t be merged.') - else: - postprocessors = [merger] - - def compatible_formats(formats): - video, audio = formats - # Check extension - video_ext, audio_ext = video.get('ext'), audio.get('ext') - if video_ext and audio_ext: - COMPATIBLE_EXTS = ( - ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'), - ('webm') - ) - for exts in COMPATIBLE_EXTS: - if video_ext in exts and audio_ext in exts: - return True - # TODO: Check acodec/vcodec - return False - - filename_real_ext = os.path.splitext(filename)[1][1:] - filename_wo_ext = ( - os.path.splitext(filename)[0] - if filename_real_ext == info_dict['ext'] - else filename) - requested_formats = info_dict['requested_formats'] - if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats): - info_dict['ext'] = 'mkv' - self.report_warning( - 'Requested formats are incompatible for merge and will be merged into mkv.') - # Ensure filename always has a correct extension for successful merge - filename = '%s.%s' % (filename_wo_ext, info_dict['ext']) - if os.path.exists(encodeFilename(filename)): - self.to_screen( - '[download] %s has already been downloaded and ' - 'merged' % filename) - else: - for f in requested_formats: - new_info = dict(info_dict) - new_info.update(f) - fname = prepend_extension( - self.prepare_filename(new_info), - 'f%s' % f['format_id'], new_info['ext']) - if not ensure_dir_exists(fname): - return - downloaded.append(fname) - partial_success = dl(fname, new_info) - success = success and partial_success - info_dict['__postprocessors'] = postprocessors - info_dict['__files_to_merge'] = downloaded - else: - # Just a single file - success = dl(filename, info_dict) - except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self.report_error('unable to download video data: %s' % error_to_compat_str(err)) - return - except (OSError, IOError) as err: - raise UnavailableVideoError(err) - except (ContentTooShortError, ) as err: - self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) - return - - if success and filename != '-': - # Fixup content - fixup_policy = self.params.get('fixup') - if fixup_policy is None: - fixup_policy = 'detect_or_warn' - - INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.' - - stretched_ratio = info_dict.get('stretched_ratio') - if stretched_ratio is not None and stretched_ratio != 1: - if fixup_policy == 'warn': - self.report_warning('%s: Non-uniform pixel ratio (%s)' % ( - info_dict['id'], stretched_ratio)) - elif fixup_policy == 'detect_or_warn': - stretched_pp = FFmpegFixupStretchedPP(self) - if stretched_pp.available: - info_dict.setdefault('__postprocessors', []) - info_dict['__postprocessors'].append(stretched_pp) - else: - self.report_warning( - '%s: Non-uniform pixel ratio (%s). %s' - % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE)) - else: - assert fixup_policy in ('ignore', 'never') - - if (info_dict.get('requested_formats') is None and - info_dict.get('container') == 'm4a_dash'): - if fixup_policy == 'warn': - self.report_warning( - '%s: writing DASH m4a. ' - 'Only some players support this container.' - % info_dict['id']) - elif fixup_policy == 'detect_or_warn': - fixup_pp = FFmpegFixupM4aPP(self) - if fixup_pp.available: - info_dict.setdefault('__postprocessors', []) - info_dict['__postprocessors'].append(fixup_pp) - else: - self.report_warning( - '%s: writing DASH m4a. ' - 'Only some players support this container. %s' - % (info_dict['id'], INSTALL_FFMPEG_MESSAGE)) - else: - assert fixup_policy in ('ignore', 'never') - - if (info_dict.get('protocol') == 'm3u8_native' or - info_dict.get('protocol') == 'm3u8' and - self.params.get('hls_prefer_native')): - if fixup_policy == 'warn': - self.report_warning('%s: malformed AAC bitstream detected.' % ( - info_dict['id'])) - elif fixup_policy == 'detect_or_warn': - fixup_pp = FFmpegFixupM3u8PP(self) - if fixup_pp.available: - info_dict.setdefault('__postprocessors', []) - info_dict['__postprocessors'].append(fixup_pp) - else: - self.report_warning( - '%s: malformed AAC bitstream detected. %s' - % (info_dict['id'], INSTALL_FFMPEG_MESSAGE)) - else: - assert fixup_policy in ('ignore', 'never') - - try: - self.post_process(filename, info_dict) - except (PostProcessingError) as err: - self.report_error('postprocessing: %s' % str(err)) - return - self.record_download_archive(info_dict) - - def download(self, url_list): - """Download a given list of URLs.""" - outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL) - if (len(url_list) > 1 and - outtmpl != '-' and - '%' not in outtmpl and - self.params.get('max_downloads') != 1): - raise SameFileError(outtmpl) - - for url in url_list: - try: - # It also downloads the videos - res = self.extract_info( - url, force_generic_extractor=self.params.get('force_generic_extractor', False)) - except UnavailableVideoError: - self.report_error('unable to download video') - except MaxDownloadsReached: - self.to_screen('[info] Maximum number of downloaded files reached.') - raise - else: - if self.params.get('dump_single_json', False): - self.to_stdout(json.dumps(res)) - - return self._download_retcode - - def download_with_info_file(self, info_filename): - with contextlib.closing(fileinput.FileInput( - [info_filename], mode='r', - openhook=fileinput.hook_encoded('utf-8'))) as f: - # FileInput doesn't have a read method, we can't call json.load - info = self.filter_requested_info(json.loads('\n'.join(f))) - try: - self.process_ie_result(info, download=True) - except DownloadError: - webpage_url = info.get('webpage_url') - if webpage_url is not None: - self.report_warning('The info failed to download, trying with "%s"' % webpage_url) - return self.download([webpage_url]) - else: - raise - return self._download_retcode - - @staticmethod - def filter_requested_info(info_dict): - return dict( - (k, v) for k, v in info_dict.items() - if k not in ['requested_formats', 'requested_subtitles']) - - def post_process(self, filename, ie_info): - """Run all the postprocessors on the given file.""" - info = dict(ie_info) - info['filepath'] = filename - pps_chain = [] - if ie_info.get('__postprocessors') is not None: - pps_chain.extend(ie_info['__postprocessors']) - pps_chain.extend(self._pps) - for pp in pps_chain: - files_to_delete = [] - try: - files_to_delete, info = pp.run(info) - except PostProcessingError as e: - self.report_error(e.msg) - if files_to_delete and not self.params.get('keepvideo', False): - for old_filename in files_to_delete: - self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename) - try: - os.remove(encodeFilename(old_filename)) - except (IOError, OSError): - self.report_warning('Unable to remove downloaded original file') - - def _make_archive_id(self, info_dict): - # Future-proof against any change in case - # and backwards compatibility with prior versions - extractor = info_dict.get('extractor_key') - if extractor is None: - if 'id' in info_dict: - extractor = info_dict.get('ie_key') # key in a playlist - if extractor is None: - return None # Incomplete video information - return extractor.lower() + ' ' + info_dict['id'] - - def in_download_archive(self, info_dict): - fn = self.params.get('download_archive') - if fn is None: - return False - - vid_id = self._make_archive_id(info_dict) - if vid_id is None: - return False # Incomplete video information - - try: - with locked_file(fn, 'r', encoding='utf-8') as archive_file: - for line in archive_file: - if line.strip() == vid_id: - return True - except IOError as ioe: - if ioe.errno != errno.ENOENT: - raise - return False - - def record_download_archive(self, info_dict): - fn = self.params.get('download_archive') - if fn is None: - return - vid_id = self._make_archive_id(info_dict) - assert vid_id - with locked_file(fn, 'a', encoding='utf-8') as archive_file: - archive_file.write(vid_id + '\n') - - @staticmethod - def format_resolution(format, default='unknown'): - if format.get('vcodec') == 'none': - return 'audio only' - if format.get('resolution') is not None: - return format['resolution'] - if format.get('height') is not None: - if format.get('width') is not None: - res = '%sx%s' % (format['width'], format['height']) - else: - res = '%sp' % format['height'] - elif format.get('width') is not None: - res = '%dx?' % format['width'] - else: - res = default - return res - - def _format_note(self, fdict): - res = '' - if fdict.get('ext') in ['f4f', 'f4m']: - res += '(unsupported) ' - if fdict.get('language'): - if res: - res += ' ' - res += '[%s] ' % fdict['language'] - if fdict.get('format_note') is not None: - res += fdict['format_note'] + ' ' - if fdict.get('tbr') is not None: - res += '%4dk ' % fdict['tbr'] - if fdict.get('container') is not None: - if res: - res += ', ' - res += '%s container' % fdict['container'] - if (fdict.get('vcodec') is not None and - fdict.get('vcodec') != 'none'): - if res: - res += ', ' - res += fdict['vcodec'] - if fdict.get('vbr') is not None: - res += '@' - elif fdict.get('vbr') is not None and fdict.get('abr') is not None: - res += 'video@' - if fdict.get('vbr') is not None: - res += '%4dk' % fdict['vbr'] - if fdict.get('fps') is not None: - if res: - res += ', ' - res += '%sfps' % fdict['fps'] - if fdict.get('acodec') is not None: - if res: - res += ', ' - if fdict['acodec'] == 'none': - res += 'video only' - else: - res += '%-5s' % fdict['acodec'] - elif fdict.get('abr') is not None: - if res: - res += ', ' - res += 'audio' - if fdict.get('abr') is not None: - res += '@%3dk' % fdict['abr'] - if fdict.get('asr') is not None: - res += ' (%5dHz)' % fdict['asr'] - if fdict.get('filesize') is not None: - if res: - res += ', ' - res += format_bytes(fdict['filesize']) - elif fdict.get('filesize_approx') is not None: - if res: - res += ', ' - res += '~' + format_bytes(fdict['filesize_approx']) - return res - - def list_formats(self, info_dict): - formats = info_dict.get('formats', [info_dict]) - table = [ - [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)] - for f in formats - if f.get('preference') is None or f['preference'] >= -1000] - if len(formats) > 1: - table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)' - - header_line = ['format code', 'extension', 'resolution', 'note'] - self.to_screen( - '[info] Available formats for %s:\n%s' % - (info_dict['id'], render_table(header_line, table))) - - def list_thumbnails(self, info_dict): - thumbnails = info_dict.get('thumbnails') - if not thumbnails: - self.to_screen('[info] No thumbnails present for %s' % info_dict['id']) - return - - self.to_screen( - '[info] Thumbnails for %s:' % info_dict['id']) - self.to_screen(render_table( - ['ID', 'width', 'height', 'URL'], - [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])) - - def list_subtitles(self, video_id, subtitles, name='subtitles'): - if not subtitles: - self.to_screen('%s has no %s' % (video_id, name)) - return - self.to_screen( - 'Available %s for %s:' % (name, video_id)) - self.to_screen(render_table( - ['Language', 'formats'], - [[lang, ', '.join(f['ext'] for f in reversed(formats))] - for lang, formats in subtitles.items()])) - - def urlopen(self, req): - """ Start an HTTP download """ - if isinstance(req, compat_basestring): - req = sanitized_Request(req) - return self._opener.open(req, timeout=self._socket_timeout) - - def print_debug_header(self): - if not self.params.get('verbose'): - return - - if type('') is not compat_str: - # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326) - self.report_warning( - 'Your Python is broken! Update to a newer and supported version') - - stdout_encoding = getattr( - sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__) - encoding_str = ( - '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % ( - locale.getpreferredencoding(), - sys.getfilesystemencoding(), - stdout_encoding, - self.get_encoding())) - write_string(encoding_str, encoding=None) - - self._write_string('[debug] youtube-dl version ' + __version__ + '\n') - if _LAZY_LOADER: - self._write_string('[debug] Lazy loading extractors enabled' + '\n') - try: - sp = subprocess.Popen( - ['git', 'rev-parse', '--short', 'HEAD'], - stdout=subprocess.PIPE, stderr=subprocess.PIPE, - cwd=os.path.dirname(os.path.abspath(__file__))) - out, err = sp.communicate() - out = out.decode().strip() - if re.match('[0-9a-f]+', out): - self._write_string('[debug] Git HEAD: ' + out + '\n') - except Exception: - try: - sys.exc_clear() - except Exception: - pass - - def python_implementation(): - impl_name = platform.python_implementation() - if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'): - return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3] - return impl_name - - self._write_string('[debug] Python version %s (%s) - %s\n' % ( - platform.python_version(), python_implementation(), - platform_name())) - - exe_versions = FFmpegPostProcessor.get_versions(self) - exe_versions['rtmpdump'] = rtmpdump_version() - exe_versions['phantomjs'] = PhantomJSwrapper._version() - exe_str = ', '.join( - '%s %s' % (exe, v) - for exe, v in sorted(exe_versions.items()) - if v - ) - if not exe_str: - exe_str = 'none' - self._write_string('[debug] exe versions: %s\n' % exe_str) - - proxy_map = {} - for handler in self._opener.handlers: - if hasattr(handler, 'proxies'): - proxy_map.update(handler.proxies) - self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n') - - if self.params.get('call_home', False): - ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8') - self._write_string('[debug] Public IP address: %s\n' % ipaddr) - latest_version = self.urlopen( - 'https://yt-dl.org/latest/version').read().decode('utf-8') - if version_tuple(latest_version) > version_tuple(__version__): - self.report_warning( - 'You are using an outdated version (newest version: %s)! ' - 'See https://yt-dl.org/update if you need help updating.' % - latest_version) - - def _setup_opener(self): - timeout_val = self.params.get('socket_timeout') - self._socket_timeout = 600 if timeout_val is None else float(timeout_val) - - opts_cookiefile = self.params.get('cookiefile') - opts_proxy = self.params.get('proxy') - - if opts_cookiefile is None: - self.cookiejar = compat_cookiejar.CookieJar() - else: - opts_cookiefile = expand_path(opts_cookiefile) - self.cookiejar = compat_cookiejar.MozillaCookieJar( - opts_cookiefile) - if os.access(opts_cookiefile, os.R_OK): - self.cookiejar.load() - - cookie_processor = YoutubeDLCookieProcessor(self.cookiejar) - if opts_proxy is not None: - if opts_proxy == '': - proxies = {} - else: - proxies = {'http': opts_proxy, 'https': opts_proxy} - else: - proxies = compat_urllib_request.getproxies() - # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805) - if 'http' in proxies and 'https' not in proxies: - proxies['https'] = proxies['http'] - proxy_handler = PerRequestProxyHandler(proxies) - - debuglevel = 1 if self.params.get('debug_printtraffic') else 0 - https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel) - ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel) - data_handler = compat_urllib_request_DataHandler() - - # When passing our own FileHandler instance, build_opener won't add the - # default FileHandler and allows us to disable the file protocol, which - # can be used for malicious purposes (see - # https://github.com/rg3/youtube-dl/issues/8227) - file_handler = compat_urllib_request.FileHandler() - - def file_open(*args, **kwargs): - raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons') - file_handler.file_open = file_open - - opener = compat_urllib_request.build_opener( - proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler) - - # Delete the default user-agent header, which would otherwise apply in - # cases where our custom HTTP handler doesn't come into play - # (See https://github.com/rg3/youtube-dl/issues/1309 for details) - opener.addheaders = [] - self._opener = opener - - def encode(self, s): - if isinstance(s, bytes): - return s # Already encoded - - try: - return s.encode(self.get_encoding()) - except UnicodeEncodeError as err: - err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.' - raise - - def get_encoding(self): - encoding = self.params.get('encoding') - if encoding is None: - encoding = preferredencoding() - return encoding - - def _write_thumbnails(self, info_dict, filename): - if self.params.get('writethumbnail', False): - thumbnails = info_dict.get('thumbnails') - if thumbnails: - thumbnails = [thumbnails[-1]] - elif self.params.get('write_all_thumbnails', False): - thumbnails = info_dict.get('thumbnails') - else: - return - - if not thumbnails: - # No thumbnails present, so return immediately - return - - for t in thumbnails: - thumb_ext = determine_ext(t['url'], 'jpg') - suffix = '_%s' % t['id'] if len(thumbnails) > 1 else '' - thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else '' - t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext - - if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)): - self.to_screen('[%s] %s: Thumbnail %sis already present' % - (info_dict['extractor'], info_dict['id'], thumb_display_id)) - else: - self.to_screen('[%s] %s: Downloading thumbnail %s...' % - (info_dict['extractor'], info_dict['id'], thumb_display_id)) - try: - uf = self.urlopen(t['url']) - with open(encodeFilename(thumb_filename), 'wb') as thumbf: - shutil.copyfileobj(uf, thumbf) - self.to_screen('[%s] %s: Writing thumbnail %sto: %s' % - (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename)) - except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self.report_warning('Unable to download thumbnail "%s": %s' % - (t['url'], error_to_compat_str(err))) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py deleted file mode 100644 index ba435ea..0000000 --- a/youtube_dl/__init__.py +++ /dev/null @@ -1,481 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -from __future__ import unicode_literals - -__license__ = 'Public Domain' - -import codecs -import io -import os -import random -import sys - - -from .options import ( - parseOpts, -) -from .compat import ( - compat_getpass, - compat_shlex_split, - workaround_optparse_bug9161, -) -from .utils import ( - DateRange, - decodeOption, - DEFAULT_OUTTMPL, - DownloadError, - expand_path, - match_filter_func, - MaxDownloadsReached, - preferredencoding, - read_batch_urls, - SameFileError, - setproctitle, - std_headers, - write_string, - render_table, -) -from .update import update_self -from .downloader import ( - FileDownloader, -) -from .extractor import gen_extractors, list_extractors -from .extractor.adobepass import MSO_INFO -from .YoutubeDL import YoutubeDL - - -def _real_main(argv=None): - # Compatibility fixes for Windows - if sys.platform == 'win32': - # https://github.com/rg3/youtube-dl/issues/820 - codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None) - - workaround_optparse_bug9161() - - setproctitle('youtube-dl') - - parser, opts, args = parseOpts(argv) - - # Set user agent - if opts.user_agent is not None: - std_headers['User-Agent'] = opts.user_agent - - # Set referer - if opts.referer is not None: - std_headers['Referer'] = opts.referer - - # Custom HTTP headers - if opts.headers is not None: - for h in opts.headers: - if ':' not in h: - parser.error('wrong header formatting, it should be key:value, not "%s"' % h) - key, value = h.split(':', 1) - if opts.verbose: - write_string('[debug] Adding header from command line option %s:%s\n' % (key, value)) - std_headers[key] = value - - # Dump user agent - if opts.dump_user_agent: - write_string(std_headers['User-Agent'] + '\n', out=sys.stdout) - sys.exit(0) - - # Batch file verification - batch_urls = [] - if opts.batchfile is not None: - try: - if opts.batchfile == '-': - batchfd = sys.stdin - else: - batchfd = io.open( - expand_path(opts.batchfile), - 'r', encoding='utf-8', errors='ignore') - batch_urls = read_batch_urls(batchfd) - if opts.verbose: - write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n') - except IOError: - sys.exit('ERROR: batch file could not be read') - all_urls = batch_urls + [url.strip() for url in args] # batch_urls are already striped in read_batch_urls - _enc = preferredencoding() - all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls] - - if opts.list_extractors: - for ie in list_extractors(opts.age_limit): - write_string(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else '') + '\n', out=sys.stdout) - matchedUrls = [url for url in all_urls if ie.suitable(url)] - for mu in matchedUrls: - write_string(' ' + mu + '\n', out=sys.stdout) - sys.exit(0) - if opts.list_extractor_descriptions: - for ie in list_extractors(opts.age_limit): - if not ie._WORKING: - continue - desc = getattr(ie, 'IE_DESC', ie.IE_NAME) - if desc is False: - continue - if hasattr(ie, 'SEARCH_KEY'): - _SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny', 'burping cow') - _COUNTS = ('', '5', '10', 'all') - desc += ' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES)) - write_string(desc + '\n', out=sys.stdout) - sys.exit(0) - if opts.ap_list_mso: - table = [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()] - write_string('Supported TV Providers:\n' + render_table(['mso', 'mso name'], table) + '\n', out=sys.stdout) - sys.exit(0) - - # Conflicting, missing and erroneous options - if opts.usenetrc and (opts.username is not None or opts.password is not None): - parser.error('using .netrc conflicts with giving username/password') - if opts.password is not None and opts.username is None: - parser.error('account username missing\n') - if opts.ap_password is not None and opts.ap_username is None: - parser.error('TV Provider account username missing\n') - if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid): - parser.error('using output template conflicts with using title, video ID or auto number') - if opts.autonumber_size is not None: - if opts.autonumber_size <= 0: - parser.error('auto number size must be positive') - if opts.autonumber_start is not None: - if opts.autonumber_start < 0: - parser.error('auto number start must be positive or 0') - if opts.usetitle and opts.useid: - parser.error('using title conflicts with using video ID') - if opts.username is not None and opts.password is None: - opts.password = compat_getpass('Type account password and press [Return]: ') - if opts.ap_username is not None and opts.ap_password is None: - opts.ap_password = compat_getpass('Type TV provider account password and press [Return]: ') - if opts.ratelimit is not None: - numeric_limit = FileDownloader.parse_bytes(opts.ratelimit) - if numeric_limit is None: - parser.error('invalid rate limit specified') - opts.ratelimit = numeric_limit - if opts.min_filesize is not None: - numeric_limit = FileDownloader.parse_bytes(opts.min_filesize) - if numeric_limit is None: - parser.error('invalid min_filesize specified') - opts.min_filesize = numeric_limit - if opts.max_filesize is not None: - numeric_limit = FileDownloader.parse_bytes(opts.max_filesize) - if numeric_limit is None: - parser.error('invalid max_filesize specified') - opts.max_filesize = numeric_limit - if opts.sleep_interval is not None: - if opts.sleep_interval < 0: - parser.error('sleep interval must be positive or 0') - if opts.max_sleep_interval is not None: - if opts.max_sleep_interval < 0: - parser.error('max sleep interval must be positive or 0') - if opts.max_sleep_interval < opts.sleep_interval: - parser.error('max sleep interval must be greater than or equal to min sleep interval') - else: - opts.max_sleep_interval = opts.sleep_interval - if opts.ap_mso and opts.ap_mso not in MSO_INFO: - parser.error('Unsupported TV Provider, use --ap-list-mso to get a list of supported TV Providers') - - def parse_retries(retries): - if retries in ('inf', 'infinite'): - parsed_retries = float('inf') - else: - try: - parsed_retries = int(retries) - except (TypeError, ValueError): - parser.error('invalid retry count specified') - return parsed_retries - if opts.retries is not None: - opts.retries = parse_retries(opts.retries) - if opts.fragment_retries is not None: - opts.fragment_retries = parse_retries(opts.fragment_retries) - if opts.buffersize is not None: - numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize) - if numeric_buffersize is None: - parser.error('invalid buffer size specified') - opts.buffersize = numeric_buffersize - if opts.http_chunk_size is not None: - numeric_chunksize = FileDownloader.parse_bytes(opts.http_chunk_size) - if not numeric_chunksize: - parser.error('invalid http chunk size specified') - opts.http_chunk_size = numeric_chunksize - if opts.playliststart <= 0: - raise ValueError('Playlist start must be positive') - if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart: - raise ValueError('Playlist end must be greater than playlist start') - if opts.extractaudio: - if opts.audioformat not in ['best', 'aac', 'flac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']: - parser.error('invalid audio format specified') - if opts.audioquality: - opts.audioquality = opts.audioquality.strip('k').strip('K') - if not opts.audioquality.isdigit(): - parser.error('invalid audio quality specified') - if opts.recodevideo is not None: - if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv', 'avi']: - parser.error('invalid video recode format specified') - if opts.convertsubtitles is not None: - if opts.convertsubtitles not in ['srt', 'vtt', 'ass', 'lrc']: - parser.error('invalid subtitle format specified') - - if opts.date is not None: - date = DateRange.day(opts.date) - else: - date = DateRange(opts.dateafter, opts.datebefore) - - # Do not download videos when there are audio-only formats - if opts.extractaudio and not opts.keepvideo and opts.format is None: - opts.format = 'bestaudio/best' - - # --all-sub automatically sets --write-sub if --write-auto-sub is not given - # this was the old behaviour if only --all-sub was given. - if opts.allsubtitles and not opts.writeautomaticsub: - opts.writesubtitles = True - - outtmpl = ((opts.outtmpl is not None and opts.outtmpl) or - (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') or - (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') or - (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s') or - (opts.usetitle and '%(title)s-%(id)s.%(ext)s') or - (opts.useid and '%(id)s.%(ext)s') or - (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s') or - DEFAULT_OUTTMPL) - if not os.path.splitext(outtmpl)[1] and opts.extractaudio: - parser.error('Cannot download a video and extract audio into the same' - ' file! Use "{0}.%(ext)s" instead of "{0}" as the output' - ' template'.format(outtmpl)) - - any_getting = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json - any_printing = opts.print_json - download_archive_fn = expand_path(opts.download_archive) if opts.download_archive is not None else opts.download_archive - - # PostProcessors - postprocessors = [] - if opts.metafromtitle: - postprocessors.append({ - 'key': 'MetadataFromTitle', - 'titleformat': opts.metafromtitle - }) - if opts.extractaudio: - postprocessors.append({ - 'key': 'FFmpegExtractAudio', - 'preferredcodec': opts.audioformat, - 'preferredquality': opts.audioquality, - 'nopostoverwrites': opts.nopostoverwrites, - }) - if opts.recodevideo: - postprocessors.append({ - 'key': 'FFmpegVideoConvertor', - 'preferedformat': opts.recodevideo, - }) - # FFmpegMetadataPP should be run after FFmpegVideoConvertorPP and - # FFmpegExtractAudioPP as containers before conversion may not support - # metadata (3gp, webm, etc.) - # And this post-processor should be placed before other metadata - # manipulating post-processors (FFmpegEmbedSubtitle) to prevent loss of - # extra metadata. By default ffmpeg preserves metadata applicable for both - # source and target containers. From this point the container won't change, - # so metadata can be added here. - if opts.addmetadata: - postprocessors.append({'key': 'FFmpegMetadata'}) - if opts.convertsubtitles: - postprocessors.append({ - 'key': 'FFmpegSubtitlesConvertor', - 'format': opts.convertsubtitles, - }) - if opts.embedsubtitles: - postprocessors.append({ - 'key': 'FFmpegEmbedSubtitle', - }) - if opts.embedthumbnail: - already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails - postprocessors.append({ - 'key': 'EmbedThumbnail', - 'already_have_thumbnail': already_have_thumbnail - }) - if not already_have_thumbnail: - opts.writethumbnail = True - # XAttrMetadataPP should be run after post-processors that may change file - # contents - if opts.xattrs: - postprocessors.append({'key': 'XAttrMetadata'}) - # Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way. - # So if the user is able to remove the file before your postprocessor runs it might cause a few problems. - if opts.exec_cmd: - postprocessors.append({ - 'key': 'ExecAfterDownload', - 'exec_cmd': opts.exec_cmd, - }) - external_downloader_args = None - if opts.external_downloader_args: - external_downloader_args = compat_shlex_split(opts.external_downloader_args) - postprocessor_args = None - if opts.postprocessor_args: - postprocessor_args = compat_shlex_split(opts.postprocessor_args) - match_filter = ( - None if opts.match_filter is None - else match_filter_func(opts.match_filter)) - - ydl_opts = { - 'usenetrc': opts.usenetrc, - 'username': opts.username, - 'password': opts.password, - 'twofactor': opts.twofactor, - 'videopassword': opts.videopassword, - 'ap_mso': opts.ap_mso, - 'ap_username': opts.ap_username, - 'ap_password': opts.ap_password, - 'quiet': (opts.quiet or any_getting or any_printing), - 'no_warnings': opts.no_warnings, - 'forceurl': opts.geturl, - 'forcetitle': opts.gettitle, - 'forceid': opts.getid, - 'forcethumbnail': opts.getthumbnail, - 'forcedescription': opts.getdescription, - 'forceduration': opts.getduration, - 'forcefilename': opts.getfilename, - 'forceformat': opts.getformat, - 'forcejson': opts.dumpjson or opts.print_json, - 'dump_single_json': opts.dump_single_json, - 'simulate': opts.simulate or any_getting, - 'skip_download': opts.skip_download, - 'format': opts.format, - 'listformats': opts.listformats, - 'outtmpl': outtmpl, - 'autonumber_size': opts.autonumber_size, - 'autonumber_start': opts.autonumber_start, - 'restrictfilenames': opts.restrictfilenames, - 'ignoreerrors': opts.ignoreerrors, - 'force_generic_extractor': opts.force_generic_extractor, - 'ratelimit': opts.ratelimit, - 'nooverwrites': opts.nooverwrites, - 'retries': opts.retries, - 'fragment_retries': opts.fragment_retries, - 'skip_unavailable_fragments': opts.skip_unavailable_fragments, - 'keep_fragments': opts.keep_fragments, - 'buffersize': opts.buffersize, - 'noresizebuffer': opts.noresizebuffer, - 'http_chunk_size': opts.http_chunk_size, - 'continuedl': opts.continue_dl, - 'noprogress': opts.noprogress, - 'progress_with_newline': opts.progress_with_newline, - 'playliststart': opts.playliststart, - 'playlistend': opts.playlistend, - 'playlistreverse': opts.playlist_reverse, - 'playlistrandom': opts.playlist_random, - 'noplaylist': opts.noplaylist, - 'logtostderr': opts.outtmpl == '-', - 'consoletitle': opts.consoletitle, - 'nopart': opts.nopart, - 'updatetime': opts.updatetime, - 'writedescription': opts.writedescription, - 'writeannotations': opts.writeannotations, - 'writeinfojson': opts.writeinfojson, - 'writethumbnail': opts.writethumbnail, - 'write_all_thumbnails': opts.write_all_thumbnails, - 'writesubtitles': opts.writesubtitles, - 'writeautomaticsub': opts.writeautomaticsub, - 'allsubtitles': opts.allsubtitles, - 'listsubtitles': opts.listsubtitles, - 'subtitlesformat': opts.subtitlesformat, - 'subtitleslangs': opts.subtitleslangs, - 'matchtitle': decodeOption(opts.matchtitle), - 'rejecttitle': decodeOption(opts.rejecttitle), - 'max_downloads': opts.max_downloads, - 'prefer_free_formats': opts.prefer_free_formats, - 'verbose': opts.verbose, - 'dump_intermediate_pages': opts.dump_intermediate_pages, - 'write_pages': opts.write_pages, - 'test': opts.test, - 'keepvideo': opts.keepvideo, - 'min_filesize': opts.min_filesize, - 'max_filesize': opts.max_filesize, - 'min_views': opts.min_views, - 'max_views': opts.max_views, - 'daterange': date, - 'cachedir': opts.cachedir, - 'youtube_print_sig_code': opts.youtube_print_sig_code, - 'age_limit': opts.age_limit, - 'download_archive': download_archive_fn, - 'cookiefile': opts.cookiefile, - 'nocheckcertificate': opts.no_check_certificate, - 'prefer_insecure': opts.prefer_insecure, - 'proxy': opts.proxy, - 'socket_timeout': opts.socket_timeout, - 'bidi_workaround': opts.bidi_workaround, - 'debug_printtraffic': opts.debug_printtraffic, - 'prefer_ffmpeg': opts.prefer_ffmpeg, - 'include_ads': opts.include_ads, - 'default_search': opts.default_search, - 'youtube_include_dash_manifest': opts.youtube_include_dash_manifest, - 'encoding': opts.encoding, - 'extract_flat': opts.extract_flat, - 'mark_watched': opts.mark_watched, - 'merge_output_format': opts.merge_output_format, - 'postprocessors': postprocessors, - 'fixup': opts.fixup, - 'source_address': opts.source_address, - 'call_home': opts.call_home, - 'sleep_interval': opts.sleep_interval, - 'max_sleep_interval': opts.max_sleep_interval, - 'external_downloader': opts.external_downloader, - 'list_thumbnails': opts.list_thumbnails, - 'playlist_items': opts.playlist_items, - 'xattr_set_filesize': opts.xattr_set_filesize, - 'match_filter': match_filter, - 'no_color': opts.no_color, - 'ffmpeg_location': opts.ffmpeg_location, - 'hls_prefer_native': opts.hls_prefer_native, - 'hls_use_mpegts': opts.hls_use_mpegts, - 'external_downloader_args': external_downloader_args, - 'postprocessor_args': postprocessor_args, - 'cn_verification_proxy': opts.cn_verification_proxy, - 'geo_verification_proxy': opts.geo_verification_proxy, - 'config_location': opts.config_location, - 'geo_bypass': opts.geo_bypass, - 'geo_bypass_country': opts.geo_bypass_country, - 'geo_bypass_ip_block': opts.geo_bypass_ip_block, - # just for deprecation check - 'autonumber': opts.autonumber if opts.autonumber is True else None, - 'usetitle': opts.usetitle if opts.usetitle is True else None, - } - - with YoutubeDL(ydl_opts) as ydl: - # Update version - if opts.update_self: - update_self(ydl.to_screen, opts.verbose, ydl._opener) - - # Remove cache dir - if opts.rm_cachedir: - ydl.cache.remove() - - # Maybe do nothing - if (len(all_urls) < 1) and (opts.load_info_filename is None): - if opts.update_self or opts.rm_cachedir: - sys.exit() - - ydl.warn_if_short_id(sys.argv[1:] if argv is None else argv) - parser.error( - 'You must provide at least one URL.\n' - 'Type youtube-dl --help to see a list of all options.') - - try: - if opts.load_info_filename is not None: - retcode = ydl.download_with_info_file(expand_path(opts.load_info_filename)) - else: - retcode = ydl.download(all_urls) - except MaxDownloadsReached: - ydl.to_screen('--max-download limit reached, aborting.') - retcode = 101 - - sys.exit(retcode) - - -def main(argv=None): - try: - _real_main(argv) - except DownloadError: - sys.exit(1) - except SameFileError: - sys.exit('ERROR: fixed output name but more than one file to download') - except KeyboardInterrupt: - sys.exit('\nERROR: Interrupted by user') - - -__all__ = ['main', 'YoutubeDL', 'gen_extractors', 'list_extractors'] diff --git a/youtube_dl/__main__.py b/youtube_dl/__main__.py deleted file mode 100644 index 138f5fb..0000000 --- a/youtube_dl/__main__.py +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env python -from __future__ import unicode_literals - -# Execute with -# $ python youtube_dl/__main__.py (2.6+) -# $ python -m youtube_dl (2.7+) - -import sys - -if __package__ is None and not hasattr(sys, 'frozen'): - # direct call of __main__.py - import os.path - path = os.path.realpath(os.path.abspath(__file__)) - sys.path.insert(0, os.path.dirname(os.path.dirname(path))) - -import youtube_dl - -if __name__ == '__main__': - youtube_dl.main() diff --git a/youtube_dl/aes.py b/youtube_dl/aes.py deleted file mode 100644 index 461bb6d..0000000 --- a/youtube_dl/aes.py +++ /dev/null @@ -1,361 +0,0 @@ -from __future__ import unicode_literals - -from math import ceil - -from .compat import compat_b64decode -from .utils import bytes_to_intlist, intlist_to_bytes - -BLOCK_SIZE_BYTES = 16 - - -def aes_ctr_decrypt(data, key, counter): - """ - Decrypt with aes in counter mode - - @param {int[]} data cipher - @param {int[]} key 16/24/32-Byte cipher key - @param {instance} counter Instance whose next_value function (@returns {int[]} 16-Byte block) - returns the next counter block - @returns {int[]} decrypted data - """ - expanded_key = key_expansion(key) - block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) - - decrypted_data = [] - for i in range(block_count): - counter_block = counter.next_value() - block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] - block += [0] * (BLOCK_SIZE_BYTES - len(block)) - - cipher_counter_block = aes_encrypt(counter_block, expanded_key) - decrypted_data += xor(block, cipher_counter_block) - decrypted_data = decrypted_data[:len(data)] - - return decrypted_data - - -def aes_cbc_decrypt(data, key, iv): - """ - Decrypt with aes in CBC mode - - @param {int[]} data cipher - @param {int[]} key 16/24/32-Byte cipher key - @param {int[]} iv 16-Byte IV - @returns {int[]} decrypted data - """ - expanded_key = key_expansion(key) - block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) - - decrypted_data = [] - previous_cipher_block = iv - for i in range(block_count): - block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] - block += [0] * (BLOCK_SIZE_BYTES - len(block)) - - decrypted_block = aes_decrypt(block, expanded_key) - decrypted_data += xor(decrypted_block, previous_cipher_block) - previous_cipher_block = block - decrypted_data = decrypted_data[:len(data)] - - return decrypted_data - - -def aes_cbc_encrypt(data, key, iv): - """ - Encrypt with aes in CBC mode. Using PKCS#7 padding - - @param {int[]} data cleartext - @param {int[]} key 16/24/32-Byte cipher key - @param {int[]} iv 16-Byte IV - @returns {int[]} encrypted data - """ - expanded_key = key_expansion(key) - block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) - - encrypted_data = [] - previous_cipher_block = iv - for i in range(block_count): - block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] - remaining_length = BLOCK_SIZE_BYTES - len(block) - block += [remaining_length] * remaining_length - mixed_block = xor(block, previous_cipher_block) - - encrypted_block = aes_encrypt(mixed_block, expanded_key) - encrypted_data += encrypted_block - - previous_cipher_block = encrypted_block - - return encrypted_data - - -def key_expansion(data): - """ - Generate key schedule - - @param {int[]} data 16/24/32-Byte cipher key - @returns {int[]} 176/208/240-Byte expanded key - """ - data = data[:] # copy - rcon_iteration = 1 - key_size_bytes = len(data) - expanded_key_size_bytes = (key_size_bytes // 4 + 7) * BLOCK_SIZE_BYTES - - while len(data) < expanded_key_size_bytes: - temp = data[-4:] - temp = key_schedule_core(temp, rcon_iteration) - rcon_iteration += 1 - data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) - - for _ in range(3): - temp = data[-4:] - data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) - - if key_size_bytes == 32: - temp = data[-4:] - temp = sub_bytes(temp) - data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) - - for _ in range(3 if key_size_bytes == 32 else 2 if key_size_bytes == 24 else 0): - temp = data[-4:] - data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) - data = data[:expanded_key_size_bytes] - - return data - - -def aes_encrypt(data, expanded_key): - """ - Encrypt one block with aes - - @param {int[]} data 16-Byte state - @param {int[]} expanded_key 176/208/240-Byte expanded key - @returns {int[]} 16-Byte cipher - """ - rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1 - - data = xor(data, expanded_key[:BLOCK_SIZE_BYTES]) - for i in range(1, rounds + 1): - data = sub_bytes(data) - data = shift_rows(data) - if i != rounds: - data = mix_columns(data) - data = xor(data, expanded_key[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]) - - return data - - -def aes_decrypt(data, expanded_key): - """ - Decrypt one block with aes - - @param {int[]} data 16-Byte cipher - @param {int[]} expanded_key 176/208/240-Byte expanded key - @returns {int[]} 16-Byte state - """ - rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1 - - for i in range(rounds, 0, -1): - data = xor(data, expanded_key[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]) - if i != rounds: - data = mix_columns_inv(data) - data = shift_rows_inv(data) - data = sub_bytes_inv(data) - data = xor(data, expanded_key[:BLOCK_SIZE_BYTES]) - - return data - - -def aes_decrypt_text(data, password, key_size_bytes): - """ - Decrypt text - - The first 8 Bytes of decoded 'data' are the 8 high Bytes of the counter - - The cipher key is retrieved by encrypting the first 16 Byte of 'password' - with the first 'key_size_bytes' Bytes from 'password' (if necessary filled with 0's) - - Mode of operation is 'counter' - - @param {str} data Base64 encoded string - @param {str,unicode} password Password (will be encoded with utf-8) - @param {int} key_size_bytes Possible values: 16 for 128-Bit, 24 for 192-Bit or 32 for 256-Bit - @returns {str} Decrypted data - """ - NONCE_LENGTH_BYTES = 8 - - data = bytes_to_intlist(compat_b64decode(data)) - password = bytes_to_intlist(password.encode('utf-8')) - - key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password)) - key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES) - - nonce = data[:NONCE_LENGTH_BYTES] - cipher = data[NONCE_LENGTH_BYTES:] - - class Counter(object): - __value = nonce + [0] * (BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES) - - def next_value(self): - temp = self.__value - self.__value = inc(self.__value) - return temp - - decrypted_data = aes_ctr_decrypt(cipher, key, Counter()) - plaintext = intlist_to_bytes(decrypted_data) - - return plaintext - - -RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36) -SBOX = (0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76, - 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0, - 0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15, - 0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75, - 0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84, - 0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF, - 0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8, - 0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2, - 0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73, - 0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB, - 0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79, - 0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08, - 0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A, - 0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E, - 0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF, - 0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16) -SBOX_INV = (0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, - 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, - 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, - 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, - 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, - 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, - 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, - 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, - 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, - 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, - 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, - 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, - 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, - 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, - 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, - 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d) -MIX_COLUMN_MATRIX = ((0x2, 0x3, 0x1, 0x1), - (0x1, 0x2, 0x3, 0x1), - (0x1, 0x1, 0x2, 0x3), - (0x3, 0x1, 0x1, 0x2)) -MIX_COLUMN_MATRIX_INV = ((0xE, 0xB, 0xD, 0x9), - (0x9, 0xE, 0xB, 0xD), - (0xD, 0x9, 0xE, 0xB), - (0xB, 0xD, 0x9, 0xE)) -RIJNDAEL_EXP_TABLE = (0x01, 0x03, 0x05, 0x0F, 0x11, 0x33, 0x55, 0xFF, 0x1A, 0x2E, 0x72, 0x96, 0xA1, 0xF8, 0x13, 0x35, - 0x5F, 0xE1, 0x38, 0x48, 0xD8, 0x73, 0x95, 0xA4, 0xF7, 0x02, 0x06, 0x0A, 0x1E, 0x22, 0x66, 0xAA, - 0xE5, 0x34, 0x5C, 0xE4, 0x37, 0x59, 0xEB, 0x26, 0x6A, 0xBE, 0xD9, 0x70, 0x90, 0xAB, 0xE6, 0x31, - 0x53, 0xF5, 0x04, 0x0C, 0x14, 0x3C, 0x44, 0xCC, 0x4F, 0xD1, 0x68, 0xB8, 0xD3, 0x6E, 0xB2, 0xCD, - 0x4C, 0xD4, 0x67, 0xA9, 0xE0, 0x3B, 0x4D, 0xD7, 0x62, 0xA6, 0xF1, 0x08, 0x18, 0x28, 0x78, 0x88, - 0x83, 0x9E, 0xB9, 0xD0, 0x6B, 0xBD, 0xDC, 0x7F, 0x81, 0x98, 0xB3, 0xCE, 0x49, 0xDB, 0x76, 0x9A, - 0xB5, 0xC4, 0x57, 0xF9, 0x10, 0x30, 0x50, 0xF0, 0x0B, 0x1D, 0x27, 0x69, 0xBB, 0xD6, 0x61, 0xA3, - 0xFE, 0x19, 0x2B, 0x7D, 0x87, 0x92, 0xAD, 0xEC, 0x2F, 0x71, 0x93, 0xAE, 0xE9, 0x20, 0x60, 0xA0, - 0xFB, 0x16, 0x3A, 0x4E, 0xD2, 0x6D, 0xB7, 0xC2, 0x5D, 0xE7, 0x32, 0x56, 0xFA, 0x15, 0x3F, 0x41, - 0xC3, 0x5E, 0xE2, 0x3D, 0x47, 0xC9, 0x40, 0xC0, 0x5B, 0xED, 0x2C, 0x74, 0x9C, 0xBF, 0xDA, 0x75, - 0x9F, 0xBA, 0xD5, 0x64, 0xAC, 0xEF, 0x2A, 0x7E, 0x82, 0x9D, 0xBC, 0xDF, 0x7A, 0x8E, 0x89, 0x80, - 0x9B, 0xB6, 0xC1, 0x58, 0xE8, 0x23, 0x65, 0xAF, 0xEA, 0x25, 0x6F, 0xB1, 0xC8, 0x43, 0xC5, 0x54, - 0xFC, 0x1F, 0x21, 0x63, 0xA5, 0xF4, 0x07, 0x09, 0x1B, 0x2D, 0x77, 0x99, 0xB0, 0xCB, 0x46, 0xCA, - 0x45, 0xCF, 0x4A, 0xDE, 0x79, 0x8B, 0x86, 0x91, 0xA8, 0xE3, 0x3E, 0x42, 0xC6, 0x51, 0xF3, 0x0E, - 0x12, 0x36, 0x5A, 0xEE, 0x29, 0x7B, 0x8D, 0x8C, 0x8F, 0x8A, 0x85, 0x94, 0xA7, 0xF2, 0x0D, 0x17, - 0x39, 0x4B, 0xDD, 0x7C, 0x84, 0x97, 0xA2, 0xFD, 0x1C, 0x24, 0x6C, 0xB4, 0xC7, 0x52, 0xF6, 0x01) -RIJNDAEL_LOG_TABLE = (0x00, 0x00, 0x19, 0x01, 0x32, 0x02, 0x1a, 0xc6, 0x4b, 0xc7, 0x1b, 0x68, 0x33, 0xee, 0xdf, 0x03, - 0x64, 0x04, 0xe0, 0x0e, 0x34, 0x8d, 0x81, 0xef, 0x4c, 0x71, 0x08, 0xc8, 0xf8, 0x69, 0x1c, 0xc1, - 0x7d, 0xc2, 0x1d, 0xb5, 0xf9, 0xb9, 0x27, 0x6a, 0x4d, 0xe4, 0xa6, 0x72, 0x9a, 0xc9, 0x09, 0x78, - 0x65, 0x2f, 0x8a, 0x05, 0x21, 0x0f, 0xe1, 0x24, 0x12, 0xf0, 0x82, 0x45, 0x35, 0x93, 0xda, 0x8e, - 0x96, 0x8f, 0xdb, 0xbd, 0x36, 0xd0, 0xce, 0x94, 0x13, 0x5c, 0xd2, 0xf1, 0x40, 0x46, 0x83, 0x38, - 0x66, 0xdd, 0xfd, 0x30, 0xbf, 0x06, 0x8b, 0x62, 0xb3, 0x25, 0xe2, 0x98, 0x22, 0x88, 0x91, 0x10, - 0x7e, 0x6e, 0x48, 0xc3, 0xa3, 0xb6, 0x1e, 0x42, 0x3a, 0x6b, 0x28, 0x54, 0xfa, 0x85, 0x3d, 0xba, - 0x2b, 0x79, 0x0a, 0x15, 0x9b, 0x9f, 0x5e, 0xca, 0x4e, 0xd4, 0xac, 0xe5, 0xf3, 0x73, 0xa7, 0x57, - 0xaf, 0x58, 0xa8, 0x50, 0xf4, 0xea, 0xd6, 0x74, 0x4f, 0xae, 0xe9, 0xd5, 0xe7, 0xe6, 0xad, 0xe8, - 0x2c, 0xd7, 0x75, 0x7a, 0xeb, 0x16, 0x0b, 0xf5, 0x59, 0xcb, 0x5f, 0xb0, 0x9c, 0xa9, 0x51, 0xa0, - 0x7f, 0x0c, 0xf6, 0x6f, 0x17, 0xc4, 0x49, 0xec, 0xd8, 0x43, 0x1f, 0x2d, 0xa4, 0x76, 0x7b, 0xb7, - 0xcc, 0xbb, 0x3e, 0x5a, 0xfb, 0x60, 0xb1, 0x86, 0x3b, 0x52, 0xa1, 0x6c, 0xaa, 0x55, 0x29, 0x9d, - 0x97, 0xb2, 0x87, 0x90, 0x61, 0xbe, 0xdc, 0xfc, 0xbc, 0x95, 0xcf, 0xcd, 0x37, 0x3f, 0x5b, 0xd1, - 0x53, 0x39, 0x84, 0x3c, 0x41, 0xa2, 0x6d, 0x47, 0x14, 0x2a, 0x9e, 0x5d, 0x56, 0xf2, 0xd3, 0xab, - 0x44, 0x11, 0x92, 0xd9, 0x23, 0x20, 0x2e, 0x89, 0xb4, 0x7c, 0xb8, 0x26, 0x77, 0x99, 0xe3, 0xa5, - 0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18, 0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07) - - -def sub_bytes(data): - return [SBOX[x] for x in data] - - -def sub_bytes_inv(data): - return [SBOX_INV[x] for x in data] - - -def rotate(data): - return data[1:] + [data[0]] - - -def key_schedule_core(data, rcon_iteration): - data = rotate(data) - data = sub_bytes(data) - data[0] = data[0] ^ RCON[rcon_iteration] - - return data - - -def xor(data1, data2): - return [x ^ y for x, y in zip(data1, data2)] - - -def rijndael_mul(a, b): - if(a == 0 or b == 0): - return 0 - return RIJNDAEL_EXP_TABLE[(RIJNDAEL_LOG_TABLE[a] + RIJNDAEL_LOG_TABLE[b]) % 0xFF] - - -def mix_column(data, matrix): - data_mixed = [] - for row in range(4): - mixed = 0 - for column in range(4): - # xor is (+) and (-) - mixed ^= rijndael_mul(data[column], matrix[row][column]) - data_mixed.append(mixed) - return data_mixed - - -def mix_columns(data, matrix=MIX_COLUMN_MATRIX): - data_mixed = [] - for i in range(4): - column = data[i * 4: (i + 1) * 4] - data_mixed += mix_column(column, matrix) - return data_mixed - - -def mix_columns_inv(data): - return mix_columns(data, MIX_COLUMN_MATRIX_INV) - - -def shift_rows(data): - data_shifted = [] - for column in range(4): - for row in range(4): - data_shifted.append(data[((column + row) & 0b11) * 4 + row]) - return data_shifted - - -def shift_rows_inv(data): - data_shifted = [] - for column in range(4): - for row in range(4): - data_shifted.append(data[((column - row) & 0b11) * 4 + row]) - return data_shifted - - -def inc(data): - data = data[:] # copy - for i in range(len(data) - 1, -1, -1): - if data[i] == 255: - data[i] = 0 - else: - data[i] = data[i] + 1 - break - return data - - -__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text'] diff --git a/youtube_dl/cache.py b/youtube_dl/cache.py deleted file mode 100644 index 7bdade1..0000000 --- a/youtube_dl/cache.py +++ /dev/null @@ -1,96 +0,0 @@ -from __future__ import unicode_literals - -import errno -import io -import json -import os -import re -import shutil -import traceback - -from .compat import compat_getenv -from .utils import ( - expand_path, - write_json_file, -) - - -class Cache(object): - def __init__(self, ydl): - self._ydl = ydl - - def _get_root_dir(self): - res = self._ydl.params.get('cachedir') - if res is None: - cache_root = compat_getenv('XDG_CACHE_HOME', '~/.cache') - res = os.path.join(cache_root, 'youtube-dl') - return expand_path(res) - - def _get_cache_fn(self, section, key, dtype): - assert re.match(r'^[a-zA-Z0-9_.-]+$', section), \ - 'invalid section %r' % section - assert re.match(r'^[a-zA-Z0-9_.-]+$', key), 'invalid key %r' % key - return os.path.join( - self._get_root_dir(), section, '%s.%s' % (key, dtype)) - - @property - def enabled(self): - return self._ydl.params.get('cachedir') is not False - - def store(self, section, key, data, dtype='json'): - assert dtype in ('json',) - - if not self.enabled: - return - - fn = self._get_cache_fn(section, key, dtype) - try: - try: - os.makedirs(os.path.dirname(fn)) - except OSError as ose: - if ose.errno != errno.EEXIST: - raise - write_json_file(data, fn) - except Exception: - tb = traceback.format_exc() - self._ydl.report_warning( - 'Writing cache to %r failed: %s' % (fn, tb)) - - def load(self, section, key, dtype='json', default=None): - assert dtype in ('json',) - - if not self.enabled: - return default - - cache_fn = self._get_cache_fn(section, key, dtype) - try: - try: - with io.open(cache_fn, 'r', encoding='utf-8') as cachef: - return json.load(cachef) - except ValueError: - try: - file_size = os.path.getsize(cache_fn) - except (OSError, IOError) as oe: - file_size = str(oe) - self._ydl.report_warning( - 'Cache retrieval from %s failed (%s)' % (cache_fn, file_size)) - except IOError: - pass # No cache available - - return default - - def remove(self): - if not self.enabled: - self._ydl.to_screen('Cache is disabled (Did you combine --no-cache-dir and --rm-cache-dir?)') - return - - cachedir = self._get_root_dir() - if not any((term in cachedir) for term in ('cache', 'tmp')): - raise Exception('Not removing directory %s - this does not look like a cache dir' % cachedir) - - self._ydl.to_screen( - 'Removing cache dir %s .' % cachedir, skip_eol=True) - if os.path.exists(cachedir): - self._ydl.to_screen('.', skip_eol=True) - shutil.rmtree(cachedir) - self._ydl.to_screen('.') diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py deleted file mode 100644 index 7b77034..0000000 --- a/youtube_dl/compat.py +++ /dev/null @@ -1,3016 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import base64 -import binascii -import collections -import ctypes -import email -import getpass -import io -import itertools -import optparse -import os -import platform -import re -import shlex -import shutil -import socket -import struct -import subprocess -import sys -import xml.etree.ElementTree - - -try: - import urllib.request as compat_urllib_request -except ImportError: # Python 2 - import urllib2 as compat_urllib_request - -try: - import urllib.error as compat_urllib_error -except ImportError: # Python 2 - import urllib2 as compat_urllib_error - -try: - import urllib.parse as compat_urllib_parse -except ImportError: # Python 2 - import urllib as compat_urllib_parse - -try: - from urllib.parse import urlparse as compat_urllib_parse_urlparse -except ImportError: # Python 2 - from urlparse import urlparse as compat_urllib_parse_urlparse - -try: - import urllib.parse as compat_urlparse -except ImportError: # Python 2 - import urlparse as compat_urlparse - -try: - import urllib.response as compat_urllib_response -except ImportError: # Python 2 - import urllib as compat_urllib_response - -try: - import http.cookiejar as compat_cookiejar -except ImportError: # Python 2 - import cookielib as compat_cookiejar - -try: - import http.cookies as compat_cookies -except ImportError: # Python 2 - import Cookie as compat_cookies - -try: - import html.entities as compat_html_entities -except ImportError: # Python 2 - import htmlentitydefs as compat_html_entities - -try: # Python >= 3.3 - compat_html_entities_html5 = compat_html_entities.html5 -except AttributeError: - # Copied from CPython 3.5.1 html/entities.py - compat_html_entities_html5 = { - 'Aacute': '\xc1', - 'aacute': '\xe1', - 'Aacute;': '\xc1', - 'aacute;': '\xe1', - 'Abreve;': '\u0102', - 'abreve;': '\u0103', - 'ac;': '\u223e', - 'acd;': '\u223f', - 'acE;': '\u223e\u0333', - 'Acirc': '\xc2', - 'acirc': '\xe2', - 'Acirc;': '\xc2', - 'acirc;': '\xe2', - 'acute': '\xb4', - 'acute;': '\xb4', - 'Acy;': '\u0410', - 'acy;': '\u0430', - 'AElig': '\xc6', - 'aelig': '\xe6', - 'AElig;': '\xc6', - 'aelig;': '\xe6', - 'af;': '\u2061', - 'Afr;': '\U0001d504', - 'afr;': '\U0001d51e', - 'Agrave': '\xc0', - 'agrave': '\xe0', - 'Agrave;': '\xc0', - 'agrave;': '\xe0', - 'alefsym;': '\u2135', - 'aleph;': '\u2135', - 'Alpha;': '\u0391', - 'alpha;': '\u03b1', - 'Amacr;': '\u0100', - 'amacr;': '\u0101', - 'amalg;': '\u2a3f', - 'AMP': '&', - 'amp': '&', - 'AMP;': '&', - 'amp;': '&', - 'And;': '\u2a53', - 'and;': '\u2227', - 'andand;': '\u2a55', - 'andd;': '\u2a5c', - 'andslope;': '\u2a58', - 'andv;': '\u2a5a', - 'ang;': '\u2220', - 'ange;': '\u29a4', - 'angle;': '\u2220', - 'angmsd;': '\u2221', - 'angmsdaa;': '\u29a8', - 'angmsdab;': '\u29a9', - 'angmsdac;': '\u29aa', - 'angmsdad;': '\u29ab', - 'angmsdae;': '\u29ac', - 'angmsdaf;': '\u29ad', - 'angmsdag;': '\u29ae', - 'angmsdah;': '\u29af', - 'angrt;': '\u221f', - 'angrtvb;': '\u22be', - 'angrtvbd;': '\u299d', - 'angsph;': '\u2222', - 'angst;': '\xc5', - 'angzarr;': '\u237c', - 'Aogon;': '\u0104', - 'aogon;': '\u0105', - 'Aopf;': '\U0001d538', - 'aopf;': '\U0001d552', - 'ap;': '\u2248', - 'apacir;': '\u2a6f', - 'apE;': '\u2a70', - 'ape;': '\u224a', - 'apid;': '\u224b', - 'apos;': "'", - 'ApplyFunction;': '\u2061', - 'approx;': '\u2248', - 'approxeq;': '\u224a', - 'Aring': '\xc5', - 'aring': '\xe5', - 'Aring;': '\xc5', - 'aring;': '\xe5', - 'Ascr;': '\U0001d49c', - 'ascr;': '\U0001d4b6', - 'Assign;': '\u2254', - 'ast;': '*', - 'asymp;': '\u2248', - 'asympeq;': '\u224d', - 'Atilde': '\xc3', - 'atilde': '\xe3', - 'Atilde;': '\xc3', - 'atilde;': '\xe3', - 'Auml': '\xc4', - 'auml': '\xe4', - 'Auml;': '\xc4', - 'auml;': '\xe4', - 'awconint;': '\u2233', - 'awint;': '\u2a11', - 'backcong;': '\u224c', - 'backepsilon;': '\u03f6', - 'backprime;': '\u2035', - 'backsim;': '\u223d', - 'backsimeq;': '\u22cd', - 'Backslash;': '\u2216', - 'Barv;': '\u2ae7', - 'barvee;': '\u22bd', - 'Barwed;': '\u2306', - 'barwed;': '\u2305', - 'barwedge;': '\u2305', - 'bbrk;': '\u23b5', - 'bbrktbrk;': '\u23b6', - 'bcong;': '\u224c', - 'Bcy;': '\u0411', - 'bcy;': '\u0431', - 'bdquo;': '\u201e', - 'becaus;': '\u2235', - 'Because;': '\u2235', - 'because;': '\u2235', - 'bemptyv;': '\u29b0', - 'bepsi;': '\u03f6', - 'bernou;': '\u212c', - 'Bernoullis;': '\u212c', - 'Beta;': '\u0392', - 'beta;': '\u03b2', - 'beth;': '\u2136', - 'between;': '\u226c', - 'Bfr;': '\U0001d505', - 'bfr;': '\U0001d51f', - 'bigcap;': '\u22c2', - 'bigcirc;': '\u25ef', - 'bigcup;': '\u22c3', - 'bigodot;': '\u2a00', - 'bigoplus;': '\u2a01', - 'bigotimes;': '\u2a02', - 'bigsqcup;': '\u2a06', - 'bigstar;': '\u2605', - 'bigtriangledown;': '\u25bd', - 'bigtriangleup;': '\u25b3', - 'biguplus;': '\u2a04', - 'bigvee;': '\u22c1', - 'bigwedge;': '\u22c0', - 'bkarow;': '\u290d', - 'blacklozenge;': '\u29eb', - 'blacksquare;': '\u25aa', - 'blacktriangle;': '\u25b4', - 'blacktriangledown;': '\u25be', - 'blacktriangleleft;': '\u25c2', - 'blacktriangleright;': '\u25b8', - 'blank;': '\u2423', - 'blk12;': '\u2592', - 'blk14;': '\u2591', - 'blk34;': '\u2593', - 'block;': '\u2588', - 'bne;': '=\u20e5', - 'bnequiv;': '\u2261\u20e5', - 'bNot;': '\u2aed', - 'bnot;': '\u2310', - 'Bopf;': '\U0001d539', - 'bopf;': '\U0001d553', - 'bot;': '\u22a5', - 'bottom;': '\u22a5', - 'bowtie;': '\u22c8', - 'boxbox;': '\u29c9', - 'boxDL;': '\u2557', - 'boxDl;': '\u2556', - 'boxdL;': '\u2555', - 'boxdl;': '\u2510', - 'boxDR;': '\u2554', - 'boxDr;': '\u2553', - 'boxdR;': '\u2552', - 'boxdr;': '\u250c', - 'boxH;': '\u2550', - 'boxh;': '\u2500', - 'boxHD;': '\u2566', - 'boxHd;': '\u2564', - 'boxhD;': '\u2565', - 'boxhd;': '\u252c', - 'boxHU;': '\u2569', - 'boxHu;': '\u2567', - 'boxhU;': '\u2568', - 'boxhu;': '\u2534', - 'boxminus;': '\u229f', - 'boxplus;': '\u229e', - 'boxtimes;': '\u22a0', - 'boxUL;': '\u255d', - 'boxUl;': '\u255c', - 'boxuL;': '\u255b', - 'boxul;': '\u2518', - 'boxUR;': '\u255a', - 'boxUr;': '\u2559', - 'boxuR;': '\u2558', - 'boxur;': '\u2514', - 'boxV;': '\u2551', - 'boxv;': '\u2502', - 'boxVH;': '\u256c', - 'boxVh;': '\u256b', - 'boxvH;': '\u256a', - 'boxvh;': '\u253c', - 'boxVL;': '\u2563', - 'boxVl;': '\u2562', - 'boxvL;': '\u2561', - 'boxvl;': '\u2524', - 'boxVR;': '\u2560', - 'boxVr;': '\u255f', - 'boxvR;': '\u255e', - 'boxvr;': '\u251c', - 'bprime;': '\u2035', - 'Breve;': '\u02d8', - 'breve;': '\u02d8', - 'brvbar': '\xa6', - 'brvbar;': '\xa6', - 'Bscr;': '\u212c', - 'bscr;': '\U0001d4b7', - 'bsemi;': '\u204f', - 'bsim;': '\u223d', - 'bsime;': '\u22cd', - 'bsol;': '\\', - 'bsolb;': '\u29c5', - 'bsolhsub;': '\u27c8', - 'bull;': '\u2022', - 'bullet;': '\u2022', - 'bump;': '\u224e', - 'bumpE;': '\u2aae', - 'bumpe;': '\u224f', - 'Bumpeq;': '\u224e', - 'bumpeq;': '\u224f', - 'Cacute;': '\u0106', - 'cacute;': '\u0107', - 'Cap;': '\u22d2', - 'cap;': '\u2229', - 'capand;': '\u2a44', - 'capbrcup;': '\u2a49', - 'capcap;': '\u2a4b', - 'capcup;': '\u2a47', - 'capdot;': '\u2a40', - 'CapitalDifferentialD;': '\u2145', - 'caps;': '\u2229\ufe00', - 'caret;': '\u2041', - 'caron;': '\u02c7', - 'Cayleys;': '\u212d', - 'ccaps;': '\u2a4d', - 'Ccaron;': '\u010c', - 'ccaron;': '\u010d', - 'Ccedil': '\xc7', - 'ccedil': '\xe7', - 'Ccedil;': '\xc7', - 'ccedil;': '\xe7', - 'Ccirc;': '\u0108', - 'ccirc;': '\u0109', - 'Cconint;': '\u2230', - 'ccups;': '\u2a4c', - 'ccupssm;': '\u2a50', - 'Cdot;': '\u010a', - 'cdot;': '\u010b', - 'cedil': '\xb8', - 'cedil;': '\xb8', - 'Cedilla;': '\xb8', - 'cemptyv;': '\u29b2', - 'cent': '\xa2', - 'cent;': '\xa2', - 'CenterDot;': '\xb7', - 'centerdot;': '\xb7', - 'Cfr;': '\u212d', - 'cfr;': '\U0001d520', - 'CHcy;': '\u0427', - 'chcy;': '\u0447', - 'check;': '\u2713', - 'checkmark;': '\u2713', - 'Chi;': '\u03a7', - 'chi;': '\u03c7', - 'cir;': '\u25cb', - 'circ;': '\u02c6', - 'circeq;': '\u2257', - 'circlearrowleft;': '\u21ba', - 'circlearrowright;': '\u21bb', - 'circledast;': '\u229b', - 'circledcirc;': '\u229a', - 'circleddash;': '\u229d', - 'CircleDot;': '\u2299', - 'circledR;': '\xae', - 'circledS;': '\u24c8', - 'CircleMinus;': '\u2296', - 'CirclePlus;': '\u2295', - 'CircleTimes;': '\u2297', - 'cirE;': '\u29c3', - 'cire;': '\u2257', - 'cirfnint;': '\u2a10', - 'cirmid;': '\u2aef', - 'cirscir;': '\u29c2', - 'ClockwiseContourIntegral;': '\u2232', - 'CloseCurlyDoubleQuote;': '\u201d', - 'CloseCurlyQuote;': '\u2019', - 'clubs;': '\u2663', - 'clubsuit;': '\u2663', - 'Colon;': '\u2237', - 'colon;': ':', - 'Colone;': '\u2a74', - 'colone;': '\u2254', - 'coloneq;': '\u2254', - 'comma;': ',', - 'commat;': '@', - 'comp;': '\u2201', - 'compfn;': '\u2218', - 'complement;': '\u2201', - 'complexes;': '\u2102', - 'cong;': '\u2245', - 'congdot;': '\u2a6d', - 'Congruent;': '\u2261', - 'Conint;': '\u222f', - 'conint;': '\u222e', - 'ContourIntegral;': '\u222e', - 'Copf;': '\u2102', - 'copf;': '\U0001d554', - 'coprod;': '\u2210', - 'Coproduct;': '\u2210', - 'COPY': '\xa9', - 'copy': '\xa9', - 'COPY;': '\xa9', - 'copy;': '\xa9', - 'copysr;': '\u2117', - 'CounterClockwiseContourIntegral;': '\u2233', - 'crarr;': '\u21b5', - 'Cross;': '\u2a2f', - 'cross;': '\u2717', - 'Cscr;': '\U0001d49e', - 'cscr;': '\U0001d4b8', - 'csub;': '\u2acf', - 'csube;': '\u2ad1', - 'csup;': '\u2ad0', - 'csupe;': '\u2ad2', - 'ctdot;': '\u22ef', - 'cudarrl;': '\u2938', - 'cudarrr;': '\u2935', - 'cuepr;': '\u22de', - 'cuesc;': '\u22df', - 'cularr;': '\u21b6', - 'cularrp;': '\u293d', - 'Cup;': '\u22d3', - 'cup;': '\u222a', - 'cupbrcap;': '\u2a48', - 'CupCap;': '\u224d', - 'cupcap;': '\u2a46', - 'cupcup;': '\u2a4a', - 'cupdot;': '\u228d', - 'cupor;': '\u2a45', - 'cups;': '\u222a\ufe00', - 'curarr;': '\u21b7', - 'curarrm;': '\u293c', - 'curlyeqprec;': '\u22de', - 'curlyeqsucc;': '\u22df', - 'curlyvee;': '\u22ce', - 'curlywedge;': '\u22cf', - 'curren': '\xa4', - 'curren;': '\xa4', - 'curvearrowleft;': '\u21b6', - 'curvearrowright;': '\u21b7', - 'cuvee;': '\u22ce', - 'cuwed;': '\u22cf', - 'cwconint;': '\u2232', - 'cwint;': '\u2231', - 'cylcty;': '\u232d', - 'Dagger;': '\u2021', - 'dagger;': '\u2020', - 'daleth;': '\u2138', - 'Darr;': '\u21a1', - 'dArr;': '\u21d3', - 'darr;': '\u2193', - 'dash;': '\u2010', - 'Dashv;': '\u2ae4', - 'dashv;': '\u22a3', - 'dbkarow;': '\u290f', - 'dblac;': '\u02dd', - 'Dcaron;': '\u010e', - 'dcaron;': '\u010f', - 'Dcy;': '\u0414', - 'dcy;': '\u0434', - 'DD;': '\u2145', - 'dd;': '\u2146', - 'ddagger;': '\u2021', - 'ddarr;': '\u21ca', - 'DDotrahd;': '\u2911', - 'ddotseq;': '\u2a77', - 'deg': '\xb0', - 'deg;': '\xb0', - 'Del;': '\u2207', - 'Delta;': '\u0394', - 'delta;': '\u03b4', - 'demptyv;': '\u29b1', - 'dfisht;': '\u297f', - 'Dfr;': '\U0001d507', - 'dfr;': '\U0001d521', - 'dHar;': '\u2965', - 'dharl;': '\u21c3', - 'dharr;': '\u21c2', - 'DiacriticalAcute;': '\xb4', - 'DiacriticalDot;': '\u02d9', - 'DiacriticalDoubleAcute;': '\u02dd', - 'DiacriticalGrave;': '`', - 'DiacriticalTilde;': '\u02dc', - 'diam;': '\u22c4', - 'Diamond;': '\u22c4', - 'diamond;': '\u22c4', - 'diamondsuit;': '\u2666', - 'diams;': '\u2666', - 'die;': '\xa8', - 'DifferentialD;': '\u2146', - 'digamma;': '\u03dd', - 'disin;': '\u22f2', - 'div;': '\xf7', - 'divide': '\xf7', - 'divide;': '\xf7', - 'divideontimes;': '\u22c7', - 'divonx;': '\u22c7', - 'DJcy;': '\u0402', - 'djcy;': '\u0452', - 'dlcorn;': '\u231e', - 'dlcrop;': '\u230d', - 'dollar;': '$', - 'Dopf;': '\U0001d53b', - 'dopf;': '\U0001d555', - 'Dot;': '\xa8', - 'dot;': '\u02d9', - 'DotDot;': '\u20dc', - 'doteq;': '\u2250', - 'doteqdot;': '\u2251', - 'DotEqual;': '\u2250', - 'dotminus;': '\u2238', - 'dotplus;': '\u2214', - 'dotsquare;': '\u22a1', - 'doublebarwedge;': '\u2306', - 'DoubleContourIntegral;': '\u222f', - 'DoubleDot;': '\xa8', - 'DoubleDownArrow;': '\u21d3', - 'DoubleLeftArrow;': '\u21d0', - 'DoubleLeftRightArrow;': '\u21d4', - 'DoubleLeftTee;': '\u2ae4', - 'DoubleLongLeftArrow;': '\u27f8', - 'DoubleLongLeftRightArrow;': '\u27fa', - 'DoubleLongRightArrow;': '\u27f9', - 'DoubleRightArrow;': '\u21d2', - 'DoubleRightTee;': '\u22a8', - 'DoubleUpArrow;': '\u21d1', - 'DoubleUpDownArrow;': '\u21d5', - 'DoubleVerticalBar;': '\u2225', - 'DownArrow;': '\u2193', - 'Downarrow;': '\u21d3', - 'downarrow;': '\u2193', - 'DownArrowBar;': '\u2913', - 'DownArrowUpArrow;': '\u21f5', - 'DownBreve;': '\u0311', - 'downdownarrows;': '\u21ca', - 'downharpoonleft;': '\u21c3', - 'downharpoonright;': '\u21c2', - 'DownLeftRightVector;': '\u2950', - 'DownLeftTeeVector;': '\u295e', - 'DownLeftVector;': '\u21bd', - 'DownLeftVectorBar;': '\u2956', - 'DownRightTeeVector;': '\u295f', - 'DownRightVector;': '\u21c1', - 'DownRightVectorBar;': '\u2957', - 'DownTee;': '\u22a4', - 'DownTeeArrow;': '\u21a7', - 'drbkarow;': '\u2910', - 'drcorn;': '\u231f', - 'drcrop;': '\u230c', - 'Dscr;': '\U0001d49f', - 'dscr;': '\U0001d4b9', - 'DScy;': '\u0405', - 'dscy;': '\u0455', - 'dsol;': '\u29f6', - 'Dstrok;': '\u0110', - 'dstrok;': '\u0111', - 'dtdot;': '\u22f1', - 'dtri;': '\u25bf', - 'dtrif;': '\u25be', - 'duarr;': '\u21f5', - 'duhar;': '\u296f', - 'dwangle;': '\u29a6', - 'DZcy;': '\u040f', - 'dzcy;': '\u045f', - 'dzigrarr;': '\u27ff', - 'Eacute': '\xc9', - 'eacute': '\xe9', - 'Eacute;': '\xc9', - 'eacute;': '\xe9', - 'easter;': '\u2a6e', - 'Ecaron;': '\u011a', - 'ecaron;': '\u011b', - 'ecir;': '\u2256', - 'Ecirc': '\xca', - 'ecirc': '\xea', - 'Ecirc;': '\xca', - 'ecirc;': '\xea', - 'ecolon;': '\u2255', - 'Ecy;': '\u042d', - 'ecy;': '\u044d', - 'eDDot;': '\u2a77', - 'Edot;': '\u0116', - 'eDot;': '\u2251', - 'edot;': '\u0117', - 'ee;': '\u2147', - 'efDot;': '\u2252', - 'Efr;': '\U0001d508', - 'efr;': '\U0001d522', - 'eg;': '\u2a9a', - 'Egrave': '\xc8', - 'egrave': '\xe8', - 'Egrave;': '\xc8', - 'egrave;': '\xe8', - 'egs;': '\u2a96', - 'egsdot;': '\u2a98', - 'el;': '\u2a99', - 'Element;': '\u2208', - 'elinters;': '\u23e7', - 'ell;': '\u2113', - 'els;': '\u2a95', - 'elsdot;': '\u2a97', - 'Emacr;': '\u0112', - 'emacr;': '\u0113', - 'empty;': '\u2205', - 'emptyset;': '\u2205', - 'EmptySmallSquare;': '\u25fb', - 'emptyv;': '\u2205', - 'EmptyVerySmallSquare;': '\u25ab', - 'emsp13;': '\u2004', - 'emsp14;': '\u2005', - 'emsp;': '\u2003', - 'ENG;': '\u014a', - 'eng;': '\u014b', - 'ensp;': '\u2002', - 'Eogon;': '\u0118', - 'eogon;': '\u0119', - 'Eopf;': '\U0001d53c', - 'eopf;': '\U0001d556', - 'epar;': '\u22d5', - 'eparsl;': '\u29e3', - 'eplus;': '\u2a71', - 'epsi;': '\u03b5', - 'Epsilon;': '\u0395', - 'epsilon;': '\u03b5', - 'epsiv;': '\u03f5', - 'eqcirc;': '\u2256', - 'eqcolon;': '\u2255', - 'eqsim;': '\u2242', - 'eqslantgtr;': '\u2a96', - 'eqslantless;': '\u2a95', - 'Equal;': '\u2a75', - 'equals;': '=', - 'EqualTilde;': '\u2242', - 'equest;': '\u225f', - 'Equilibrium;': '\u21cc', - 'equiv;': '\u2261', - 'equivDD;': '\u2a78', - 'eqvparsl;': '\u29e5', - 'erarr;': '\u2971', - 'erDot;': '\u2253', - 'Escr;': '\u2130', - 'escr;': '\u212f', - 'esdot;': '\u2250', - 'Esim;': '\u2a73', - 'esim;': '\u2242', - 'Eta;': '\u0397', - 'eta;': '\u03b7', - 'ETH': '\xd0', - 'eth': '\xf0', - 'ETH;': '\xd0', - 'eth;': '\xf0', - 'Euml': '\xcb', - 'euml': '\xeb', - 'Euml;': '\xcb', - 'euml;': '\xeb', - 'euro;': '\u20ac', - 'excl;': '!', - 'exist;': '\u2203', - 'Exists;': '\u2203', - 'expectation;': '\u2130', - 'ExponentialE;': '\u2147', - 'exponentiale;': '\u2147', - 'fallingdotseq;': '\u2252', - 'Fcy;': '\u0424', - 'fcy;': '\u0444', - 'female;': '\u2640', - 'ffilig;': '\ufb03', - 'fflig;': '\ufb00', - 'ffllig;': '\ufb04', - 'Ffr;': '\U0001d509', - 'ffr;': '\U0001d523', - 'filig;': '\ufb01', - 'FilledSmallSquare;': '\u25fc', - 'FilledVerySmallSquare;': '\u25aa', - 'fjlig;': 'fj', - 'flat;': '\u266d', - 'fllig;': '\ufb02', - 'fltns;': '\u25b1', - 'fnof;': '\u0192', - 'Fopf;': '\U0001d53d', - 'fopf;': '\U0001d557', - 'ForAll;': '\u2200', - 'forall;': '\u2200', - 'fork;': '\u22d4', - 'forkv;': '\u2ad9', - 'Fouriertrf;': '\u2131', - 'fpartint;': '\u2a0d', - 'frac12': '\xbd', - 'frac12;': '\xbd', - 'frac13;': '\u2153', - 'frac14': '\xbc', - 'frac14;': '\xbc', - 'frac15;': '\u2155', - 'frac16;': '\u2159', - 'frac18;': '\u215b', - 'frac23;': '\u2154', - 'frac25;': '\u2156', - 'frac34': '\xbe', - 'frac34;': '\xbe', - 'frac35;': '\u2157', - 'frac38;': '\u215c', - 'frac45;': '\u2158', - 'frac56;': '\u215a', - 'frac58;': '\u215d', - 'frac78;': '\u215e', - 'frasl;': '\u2044', - 'frown;': '\u2322', - 'Fscr;': '\u2131', - 'fscr;': '\U0001d4bb', - 'gacute;': '\u01f5', - 'Gamma;': '\u0393', - 'gamma;': '\u03b3', - 'Gammad;': '\u03dc', - 'gammad;': '\u03dd', - 'gap;': '\u2a86', - 'Gbreve;': '\u011e', - 'gbreve;': '\u011f', - 'Gcedil;': '\u0122', - 'Gcirc;': '\u011c', - 'gcirc;': '\u011d', - 'Gcy;': '\u0413', - 'gcy;': '\u0433', - 'Gdot;': '\u0120', - 'gdot;': '\u0121', - 'gE;': '\u2267', - 'ge;': '\u2265', - 'gEl;': '\u2a8c', - 'gel;': '\u22db', - 'geq;': '\u2265', - 'geqq;': '\u2267', - 'geqslant;': '\u2a7e', - 'ges;': '\u2a7e', - 'gescc;': '\u2aa9', - 'gesdot;': '\u2a80', - 'gesdoto;': '\u2a82', - 'gesdotol;': '\u2a84', - 'gesl;': '\u22db\ufe00', - 'gesles;': '\u2a94', - 'Gfr;': '\U0001d50a', - 'gfr;': '\U0001d524', - 'Gg;': '\u22d9', - 'gg;': '\u226b', - 'ggg;': '\u22d9', - 'gimel;': '\u2137', - 'GJcy;': '\u0403', - 'gjcy;': '\u0453', - 'gl;': '\u2277', - 'gla;': '\u2aa5', - 'glE;': '\u2a92', - 'glj;': '\u2aa4', - 'gnap;': '\u2a8a', - 'gnapprox;': '\u2a8a', - 'gnE;': '\u2269', - 'gne;': '\u2a88', - 'gneq;': '\u2a88', - 'gneqq;': '\u2269', - 'gnsim;': '\u22e7', - 'Gopf;': '\U0001d53e', - 'gopf;': '\U0001d558', - 'grave;': '`', - 'GreaterEqual;': '\u2265', - 'GreaterEqualLess;': '\u22db', - 'GreaterFullEqual;': '\u2267', - 'GreaterGreater;': '\u2aa2', - 'GreaterLess;': '\u2277', - 'GreaterSlantEqual;': '\u2a7e', - 'GreaterTilde;': '\u2273', - 'Gscr;': '\U0001d4a2', - 'gscr;': '\u210a', - 'gsim;': '\u2273', - 'gsime;': '\u2a8e', - 'gsiml;': '\u2a90', - 'GT': '>', - 'gt': '>', - 'GT;': '>', - 'Gt;': '\u226b', - 'gt;': '>', - 'gtcc;': '\u2aa7', - 'gtcir;': '\u2a7a', - 'gtdot;': '\u22d7', - 'gtlPar;': '\u2995', - 'gtquest;': '\u2a7c', - 'gtrapprox;': '\u2a86', - 'gtrarr;': '\u2978', - 'gtrdot;': '\u22d7', - 'gtreqless;': '\u22db', - 'gtreqqless;': '\u2a8c', - 'gtrless;': '\u2277', - 'gtrsim;': '\u2273', - 'gvertneqq;': '\u2269\ufe00', - 'gvnE;': '\u2269\ufe00', - 'Hacek;': '\u02c7', - 'hairsp;': '\u200a', - 'half;': '\xbd', - 'hamilt;': '\u210b', - 'HARDcy;': '\u042a', - 'hardcy;': '\u044a', - 'hArr;': '\u21d4', - 'harr;': '\u2194', - 'harrcir;': '\u2948', - 'harrw;': '\u21ad', - 'Hat;': '^', - 'hbar;': '\u210f', - 'Hcirc;': '\u0124', - 'hcirc;': '\u0125', - 'hearts;': '\u2665', - 'heartsuit;': '\u2665', - 'hellip;': '\u2026', - 'hercon;': '\u22b9', - 'Hfr;': '\u210c', - 'hfr;': '\U0001d525', - 'HilbertSpace;': '\u210b', - 'hksearow;': '\u2925', - 'hkswarow;': '\u2926', - 'hoarr;': '\u21ff', - 'homtht;': '\u223b', - 'hookleftarrow;': '\u21a9', - 'hookrightarrow;': '\u21aa', - 'Hopf;': '\u210d', - 'hopf;': '\U0001d559', - 'horbar;': '\u2015', - 'HorizontalLine;': '\u2500', - 'Hscr;': '\u210b', - 'hscr;': '\U0001d4bd', - 'hslash;': '\u210f', - 'Hstrok;': '\u0126', - 'hstrok;': '\u0127', - 'HumpDownHump;': '\u224e', - 'HumpEqual;': '\u224f', - 'hybull;': '\u2043', - 'hyphen;': '\u2010', - 'Iacute': '\xcd', - 'iacute': '\xed', - 'Iacute;': '\xcd', - 'iacute;': '\xed', - 'ic;': '\u2063', - 'Icirc': '\xce', - 'icirc': '\xee', - 'Icirc;': '\xce', - 'icirc;': '\xee', - 'Icy;': '\u0418', - 'icy;': '\u0438', - 'Idot;': '\u0130', - 'IEcy;': '\u0415', - 'iecy;': '\u0435', - 'iexcl': '\xa1', - 'iexcl;': '\xa1', - 'iff;': '\u21d4', - 'Ifr;': '\u2111', - 'ifr;': '\U0001d526', - 'Igrave': '\xcc', - 'igrave': '\xec', - 'Igrave;': '\xcc', - 'igrave;': '\xec', - 'ii;': '\u2148', - 'iiiint;': '\u2a0c', - 'iiint;': '\u222d', - 'iinfin;': '\u29dc', - 'iiota;': '\u2129', - 'IJlig;': '\u0132', - 'ijlig;': '\u0133', - 'Im;': '\u2111', - 'Imacr;': '\u012a', - 'imacr;': '\u012b', - 'image;': '\u2111', - 'ImaginaryI;': '\u2148', - 'imagline;': '\u2110', - 'imagpart;': '\u2111', - 'imath;': '\u0131', - 'imof;': '\u22b7', - 'imped;': '\u01b5', - 'Implies;': '\u21d2', - 'in;': '\u2208', - 'incare;': '\u2105', - 'infin;': '\u221e', - 'infintie;': '\u29dd', - 'inodot;': '\u0131', - 'Int;': '\u222c', - 'int;': '\u222b', - 'intcal;': '\u22ba', - 'integers;': '\u2124', - 'Integral;': '\u222b', - 'intercal;': '\u22ba', - 'Intersection;': '\u22c2', - 'intlarhk;': '\u2a17', - 'intprod;': '\u2a3c', - 'InvisibleComma;': '\u2063', - 'InvisibleTimes;': '\u2062', - 'IOcy;': '\u0401', - 'iocy;': '\u0451', - 'Iogon;': '\u012e', - 'iogon;': '\u012f', - 'Iopf;': '\U0001d540', - 'iopf;': '\U0001d55a', - 'Iota;': '\u0399', - 'iota;': '\u03b9', - 'iprod;': '\u2a3c', - 'iquest': '\xbf', - 'iquest;': '\xbf', - 'Iscr;': '\u2110', - 'iscr;': '\U0001d4be', - 'isin;': '\u2208', - 'isindot;': '\u22f5', - 'isinE;': '\u22f9', - 'isins;': '\u22f4', - 'isinsv;': '\u22f3', - 'isinv;': '\u2208', - 'it;': '\u2062', - 'Itilde;': '\u0128', - 'itilde;': '\u0129', - 'Iukcy;': '\u0406', - 'iukcy;': '\u0456', - 'Iuml': '\xcf', - 'iuml': '\xef', - 'Iuml;': '\xcf', - 'iuml;': '\xef', - 'Jcirc;': '\u0134', - 'jcirc;': '\u0135', - 'Jcy;': '\u0419', - 'jcy;': '\u0439', - 'Jfr;': '\U0001d50d', - 'jfr;': '\U0001d527', - 'jmath;': '\u0237', - 'Jopf;': '\U0001d541', - 'jopf;': '\U0001d55b', - 'Jscr;': '\U0001d4a5', - 'jscr;': '\U0001d4bf', - 'Jsercy;': '\u0408', - 'jsercy;': '\u0458', - 'Jukcy;': '\u0404', - 'jukcy;': '\u0454', - 'Kappa;': '\u039a', - 'kappa;': '\u03ba', - 'kappav;': '\u03f0', - 'Kcedil;': '\u0136', - 'kcedil;': '\u0137', - 'Kcy;': '\u041a', - 'kcy;': '\u043a', - 'Kfr;': '\U0001d50e', - 'kfr;': '\U0001d528', - 'kgreen;': '\u0138', - 'KHcy;': '\u0425', - 'khcy;': '\u0445', - 'KJcy;': '\u040c', - 'kjcy;': '\u045c', - 'Kopf;': '\U0001d542', - 'kopf;': '\U0001d55c', - 'Kscr;': '\U0001d4a6', - 'kscr;': '\U0001d4c0', - 'lAarr;': '\u21da', - 'Lacute;': '\u0139', - 'lacute;': '\u013a', - 'laemptyv;': '\u29b4', - 'lagran;': '\u2112', - 'Lambda;': '\u039b', - 'lambda;': '\u03bb', - 'Lang;': '\u27ea', - 'lang;': '\u27e8', - 'langd;': '\u2991', - 'langle;': '\u27e8', - 'lap;': '\u2a85', - 'Laplacetrf;': '\u2112', - 'laquo': '\xab', - 'laquo;': '\xab', - 'Larr;': '\u219e', - 'lArr;': '\u21d0', - 'larr;': '\u2190', - 'larrb;': '\u21e4', - 'larrbfs;': '\u291f', - 'larrfs;': '\u291d', - 'larrhk;': '\u21a9', - 'larrlp;': '\u21ab', - 'larrpl;': '\u2939', - 'larrsim;': '\u2973', - 'larrtl;': '\u21a2', - 'lat;': '\u2aab', - 'lAtail;': '\u291b', - 'latail;': '\u2919', - 'late;': '\u2aad', - 'lates;': '\u2aad\ufe00', - 'lBarr;': '\u290e', - 'lbarr;': '\u290c', - 'lbbrk;': '\u2772', - 'lbrace;': '{', - 'lbrack;': '[', - 'lbrke;': '\u298b', - 'lbrksld;': '\u298f', - 'lbrkslu;': '\u298d', - 'Lcaron;': '\u013d', - 'lcaron;': '\u013e', - 'Lcedil;': '\u013b', - 'lcedil;': '\u013c', - 'lceil;': '\u2308', - 'lcub;': '{', - 'Lcy;': '\u041b', - 'lcy;': '\u043b', - 'ldca;': '\u2936', - 'ldquo;': '\u201c', - 'ldquor;': '\u201e', - 'ldrdhar;': '\u2967', - 'ldrushar;': '\u294b', - 'ldsh;': '\u21b2', - 'lE;': '\u2266', - 'le;': '\u2264', - 'LeftAngleBracket;': '\u27e8', - 'LeftArrow;': '\u2190', - 'Leftarrow;': '\u21d0', - 'leftarrow;': '\u2190', - 'LeftArrowBar;': '\u21e4', - 'LeftArrowRightArrow;': '\u21c6', - 'leftarrowtail;': '\u21a2', - 'LeftCeiling;': '\u2308', - 'LeftDoubleBracket;': '\u27e6', - 'LeftDownTeeVector;': '\u2961', - 'LeftDownVector;': '\u21c3', - 'LeftDownVectorBar;': '\u2959', - 'LeftFloor;': '\u230a', - 'leftharpoondown;': '\u21bd', - 'leftharpoonup;': '\u21bc', - 'leftleftarrows;': '\u21c7', - 'LeftRightArrow;': '\u2194', - 'Leftrightarrow;': '\u21d4', - 'leftrightarrow;': '\u2194', - 'leftrightarrows;': '\u21c6', - 'leftrightharpoons;': '\u21cb', - 'leftrightsquigarrow;': '\u21ad', - 'LeftRightVector;': '\u294e', - 'LeftTee;': '\u22a3', - 'LeftTeeArrow;': '\u21a4', - 'LeftTeeVector;': '\u295a', - 'leftthreetimes;': '\u22cb', - 'LeftTriangle;': '\u22b2', - 'LeftTriangleBar;': '\u29cf', - 'LeftTriangleEqual;': '\u22b4', - 'LeftUpDownVector;': '\u2951', - 'LeftUpTeeVector;': '\u2960', - 'LeftUpVector;': '\u21bf', - 'LeftUpVectorBar;': '\u2958', - 'LeftVector;': '\u21bc', - 'LeftVectorBar;': '\u2952', - 'lEg;': '\u2a8b', - 'leg;': '\u22da', - 'leq;': '\u2264', - 'leqq;': '\u2266', - 'leqslant;': '\u2a7d', - 'les;': '\u2a7d', - 'lescc;': '\u2aa8', - 'lesdot;': '\u2a7f', - 'lesdoto;': '\u2a81', - 'lesdotor;': '\u2a83', - 'lesg;': '\u22da\ufe00', - 'lesges;': '\u2a93', - 'lessapprox;': '\u2a85', - 'lessdot;': '\u22d6', - 'lesseqgtr;': '\u22da', - 'lesseqqgtr;': '\u2a8b', - 'LessEqualGreater;': '\u22da', - 'LessFullEqual;': '\u2266', - 'LessGreater;': '\u2276', - 'lessgtr;': '\u2276', - 'LessLess;': '\u2aa1', - 'lesssim;': '\u2272', - 'LessSlantEqual;': '\u2a7d', - 'LessTilde;': '\u2272', - 'lfisht;': '\u297c', - 'lfloor;': '\u230a', - 'Lfr;': '\U0001d50f', - 'lfr;': '\U0001d529', - 'lg;': '\u2276', - 'lgE;': '\u2a91', - 'lHar;': '\u2962', - 'lhard;': '\u21bd', - 'lharu;': '\u21bc', - 'lharul;': '\u296a', - 'lhblk;': '\u2584', - 'LJcy;': '\u0409', - 'ljcy;': '\u0459', - 'Ll;': '\u22d8', - 'll;': '\u226a', - 'llarr;': '\u21c7', - 'llcorner;': '\u231e', - 'Lleftarrow;': '\u21da', - 'llhard;': '\u296b', - 'lltri;': '\u25fa', - 'Lmidot;': '\u013f', - 'lmidot;': '\u0140', - 'lmoust;': '\u23b0', - 'lmoustache;': '\u23b0', - 'lnap;': '\u2a89', - 'lnapprox;': '\u2a89', - 'lnE;': '\u2268', - 'lne;': '\u2a87', - 'lneq;': '\u2a87', - 'lneqq;': '\u2268', - 'lnsim;': '\u22e6', - 'loang;': '\u27ec', - 'loarr;': '\u21fd', - 'lobrk;': '\u27e6', - 'LongLeftArrow;': '\u27f5', - 'Longleftarrow;': '\u27f8', - 'longleftarrow;': '\u27f5', - 'LongLeftRightArrow;': '\u27f7', - 'Longleftrightarrow;': '\u27fa', - 'longleftrightarrow;': '\u27f7', - 'longmapsto;': '\u27fc', - 'LongRightArrow;': '\u27f6', - 'Longrightarrow;': '\u27f9', - 'longrightarrow;': '\u27f6', - 'looparrowleft;': '\u21ab', - 'looparrowright;': '\u21ac', - 'lopar;': '\u2985', - 'Lopf;': '\U0001d543', - 'lopf;': '\U0001d55d', - 'loplus;': '\u2a2d', - 'lotimes;': '\u2a34', - 'lowast;': '\u2217', - 'lowbar;': '_', - 'LowerLeftArrow;': '\u2199', - 'LowerRightArrow;': '\u2198', - 'loz;': '\u25ca', - 'lozenge;': '\u25ca', - 'lozf;': '\u29eb', - 'lpar;': '(', - 'lparlt;': '\u2993', - 'lrarr;': '\u21c6', - 'lrcorner;': '\u231f', - 'lrhar;': '\u21cb', - 'lrhard;': '\u296d', - 'lrm;': '\u200e', - 'lrtri;': '\u22bf', - 'lsaquo;': '\u2039', - 'Lscr;': '\u2112', - 'lscr;': '\U0001d4c1', - 'Lsh;': '\u21b0', - 'lsh;': '\u21b0', - 'lsim;': '\u2272', - 'lsime;': '\u2a8d', - 'lsimg;': '\u2a8f', - 'lsqb;': '[', - 'lsquo;': '\u2018', - 'lsquor;': '\u201a', - 'Lstrok;': '\u0141', - 'lstrok;': '\u0142', - 'LT': '<', - 'lt': '<', - 'LT;': '<', - 'Lt;': '\u226a', - 'lt;': '<', - 'ltcc;': '\u2aa6', - 'ltcir;': '\u2a79', - 'ltdot;': '\u22d6', - 'lthree;': '\u22cb', - 'ltimes;': '\u22c9', - 'ltlarr;': '\u2976', - 'ltquest;': '\u2a7b', - 'ltri;': '\u25c3', - 'ltrie;': '\u22b4', - 'ltrif;': '\u25c2', - 'ltrPar;': '\u2996', - 'lurdshar;': '\u294a', - 'luruhar;': '\u2966', - 'lvertneqq;': '\u2268\ufe00', - 'lvnE;': '\u2268\ufe00', - 'macr': '\xaf', - 'macr;': '\xaf', - 'male;': '\u2642', - 'malt;': '\u2720', - 'maltese;': '\u2720', - 'Map;': '\u2905', - 'map;': '\u21a6', - 'mapsto;': '\u21a6', - 'mapstodown;': '\u21a7', - 'mapstoleft;': '\u21a4', - 'mapstoup;': '\u21a5', - 'marker;': '\u25ae', - 'mcomma;': '\u2a29', - 'Mcy;': '\u041c', - 'mcy;': '\u043c', - 'mdash;': '\u2014', - 'mDDot;': '\u223a', - 'measuredangle;': '\u2221', - 'MediumSpace;': '\u205f', - 'Mellintrf;': '\u2133', - 'Mfr;': '\U0001d510', - 'mfr;': '\U0001d52a', - 'mho;': '\u2127', - 'micro': '\xb5', - 'micro;': '\xb5', - 'mid;': '\u2223', - 'midast;': '*', - 'midcir;': '\u2af0', - 'middot': '\xb7', - 'middot;': '\xb7', - 'minus;': '\u2212', - 'minusb;': '\u229f', - 'minusd;': '\u2238', - 'minusdu;': '\u2a2a', - 'MinusPlus;': '\u2213', - 'mlcp;': '\u2adb', - 'mldr;': '\u2026', - 'mnplus;': '\u2213', - 'models;': '\u22a7', - 'Mopf;': '\U0001d544', - 'mopf;': '\U0001d55e', - 'mp;': '\u2213', - 'Mscr;': '\u2133', - 'mscr;': '\U0001d4c2', - 'mstpos;': '\u223e', - 'Mu;': '\u039c', - 'mu;': '\u03bc', - 'multimap;': '\u22b8', - 'mumap;': '\u22b8', - 'nabla;': '\u2207', - 'Nacute;': '\u0143', - 'nacute;': '\u0144', - 'nang;': '\u2220\u20d2', - 'nap;': '\u2249', - 'napE;': '\u2a70\u0338', - 'napid;': '\u224b\u0338', - 'napos;': '\u0149', - 'napprox;': '\u2249', - 'natur;': '\u266e', - 'natural;': '\u266e', - 'naturals;': '\u2115', - 'nbsp': '\xa0', - 'nbsp;': '\xa0', - 'nbump;': '\u224e\u0338', - 'nbumpe;': '\u224f\u0338', - 'ncap;': '\u2a43', - 'Ncaron;': '\u0147', - 'ncaron;': '\u0148', - 'Ncedil;': '\u0145', - 'ncedil;': '\u0146', - 'ncong;': '\u2247', - 'ncongdot;': '\u2a6d\u0338', - 'ncup;': '\u2a42', - 'Ncy;': '\u041d', - 'ncy;': '\u043d', - 'ndash;': '\u2013', - 'ne;': '\u2260', - 'nearhk;': '\u2924', - 'neArr;': '\u21d7', - 'nearr;': '\u2197', - 'nearrow;': '\u2197', - 'nedot;': '\u2250\u0338', - 'NegativeMediumSpace;': '\u200b', - 'NegativeThickSpace;': '\u200b', - 'NegativeThinSpace;': '\u200b', - 'NegativeVeryThinSpace;': '\u200b', - 'nequiv;': '\u2262', - 'nesear;': '\u2928', - 'nesim;': '\u2242\u0338', - 'NestedGreaterGreater;': '\u226b', - 'NestedLessLess;': '\u226a', - 'NewLine;': '\n', - 'nexist;': '\u2204', - 'nexists;': '\u2204', - 'Nfr;': '\U0001d511', - 'nfr;': '\U0001d52b', - 'ngE;': '\u2267\u0338', - 'nge;': '\u2271', - 'ngeq;': '\u2271', - 'ngeqq;': '\u2267\u0338', - 'ngeqslant;': '\u2a7e\u0338', - 'nges;': '\u2a7e\u0338', - 'nGg;': '\u22d9\u0338', - 'ngsim;': '\u2275', - 'nGt;': '\u226b\u20d2', - 'ngt;': '\u226f', - 'ngtr;': '\u226f', - 'nGtv;': '\u226b\u0338', - 'nhArr;': '\u21ce', - 'nharr;': '\u21ae', - 'nhpar;': '\u2af2', - 'ni;': '\u220b', - 'nis;': '\u22fc', - 'nisd;': '\u22fa', - 'niv;': '\u220b', - 'NJcy;': '\u040a', - 'njcy;': '\u045a', - 'nlArr;': '\u21cd', - 'nlarr;': '\u219a', - 'nldr;': '\u2025', - 'nlE;': '\u2266\u0338', - 'nle;': '\u2270', - 'nLeftarrow;': '\u21cd', - 'nleftarrow;': '\u219a', - 'nLeftrightarrow;': '\u21ce', - 'nleftrightarrow;': '\u21ae', - 'nleq;': '\u2270', - 'nleqq;': '\u2266\u0338', - 'nleqslant;': '\u2a7d\u0338', - 'nles;': '\u2a7d\u0338', - 'nless;': '\u226e', - 'nLl;': '\u22d8\u0338', - 'nlsim;': '\u2274', - 'nLt;': '\u226a\u20d2', - 'nlt;': '\u226e', - 'nltri;': '\u22ea', - 'nltrie;': '\u22ec', - 'nLtv;': '\u226a\u0338', - 'nmid;': '\u2224', - 'NoBreak;': '\u2060', - 'NonBreakingSpace;': '\xa0', - 'Nopf;': '\u2115', - 'nopf;': '\U0001d55f', - 'not': '\xac', - 'Not;': '\u2aec', - 'not;': '\xac', - 'NotCongruent;': '\u2262', - 'NotCupCap;': '\u226d', - 'NotDoubleVerticalBar;': '\u2226', - 'NotElement;': '\u2209', - 'NotEqual;': '\u2260', - 'NotEqualTilde;': '\u2242\u0338', - 'NotExists;': '\u2204', - 'NotGreater;': '\u226f', - 'NotGreaterEqual;': '\u2271', - 'NotGreaterFullEqual;': '\u2267\u0338', - 'NotGreaterGreater;': '\u226b\u0338', - 'NotGreaterLess;': '\u2279', - 'NotGreaterSlantEqual;': '\u2a7e\u0338', - 'NotGreaterTilde;': '\u2275', - 'NotHumpDownHump;': '\u224e\u0338', - 'NotHumpEqual;': '\u224f\u0338', - 'notin;': '\u2209', - 'notindot;': '\u22f5\u0338', - 'notinE;': '\u22f9\u0338', - 'notinva;': '\u2209', - 'notinvb;': '\u22f7', - 'notinvc;': '\u22f6', - 'NotLeftTriangle;': '\u22ea', - 'NotLeftTriangleBar;': '\u29cf\u0338', - 'NotLeftTriangleEqual;': '\u22ec', - 'NotLess;': '\u226e', - 'NotLessEqual;': '\u2270', - 'NotLessGreater;': '\u2278', - 'NotLessLess;': '\u226a\u0338', - 'NotLessSlantEqual;': '\u2a7d\u0338', - 'NotLessTilde;': '\u2274', - 'NotNestedGreaterGreater;': '\u2aa2\u0338', - 'NotNestedLessLess;': '\u2aa1\u0338', - 'notni;': '\u220c', - 'notniva;': '\u220c', - 'notnivb;': '\u22fe', - 'notnivc;': '\u22fd', - 'NotPrecedes;': '\u2280', - 'NotPrecedesEqual;': '\u2aaf\u0338', - 'NotPrecedesSlantEqual;': '\u22e0', - 'NotReverseElement;': '\u220c', - 'NotRightTriangle;': '\u22eb', - 'NotRightTriangleBar;': '\u29d0\u0338', - 'NotRightTriangleEqual;': '\u22ed', - 'NotSquareSubset;': '\u228f\u0338', - 'NotSquareSubsetEqual;': '\u22e2', - 'NotSquareSuperset;': '\u2290\u0338', - 'NotSquareSupersetEqual;': '\u22e3', - 'NotSubset;': '\u2282\u20d2', - 'NotSubsetEqual;': '\u2288', - 'NotSucceeds;': '\u2281', - 'NotSucceedsEqual;': '\u2ab0\u0338', - 'NotSucceedsSlantEqual;': '\u22e1', - 'NotSucceedsTilde;': '\u227f\u0338', - 'NotSuperset;': '\u2283\u20d2', - 'NotSupersetEqual;': '\u2289', - 'NotTilde;': '\u2241', - 'NotTildeEqual;': '\u2244', - 'NotTildeFullEqual;': '\u2247', - 'NotTildeTilde;': '\u2249', - 'NotVerticalBar;': '\u2224', - 'npar;': '\u2226', - 'nparallel;': '\u2226', - 'nparsl;': '\u2afd\u20e5', - 'npart;': '\u2202\u0338', - 'npolint;': '\u2a14', - 'npr;': '\u2280', - 'nprcue;': '\u22e0', - 'npre;': '\u2aaf\u0338', - 'nprec;': '\u2280', - 'npreceq;': '\u2aaf\u0338', - 'nrArr;': '\u21cf', - 'nrarr;': '\u219b', - 'nrarrc;': '\u2933\u0338', - 'nrarrw;': '\u219d\u0338', - 'nRightarrow;': '\u21cf', - 'nrightarrow;': '\u219b', - 'nrtri;': '\u22eb', - 'nrtrie;': '\u22ed', - 'nsc;': '\u2281', - 'nsccue;': '\u22e1', - 'nsce;': '\u2ab0\u0338', - 'Nscr;': '\U0001d4a9', - 'nscr;': '\U0001d4c3', - 'nshortmid;': '\u2224', - 'nshortparallel;': '\u2226', - 'nsim;': '\u2241', - 'nsime;': '\u2244', - 'nsimeq;': '\u2244', - 'nsmid;': '\u2224', - 'nspar;': '\u2226', - 'nsqsube;': '\u22e2', - 'nsqsupe;': '\u22e3', - 'nsub;': '\u2284', - 'nsubE;': '\u2ac5\u0338', - 'nsube;': '\u2288', - 'nsubset;': '\u2282\u20d2', - 'nsubseteq;': '\u2288', - 'nsubseteqq;': '\u2ac5\u0338', - 'nsucc;': '\u2281', - 'nsucceq;': '\u2ab0\u0338', - 'nsup;': '\u2285', - 'nsupE;': '\u2ac6\u0338', - 'nsupe;': '\u2289', - 'nsupset;': '\u2283\u20d2', - 'nsupseteq;': '\u2289', - 'nsupseteqq;': '\u2ac6\u0338', - 'ntgl;': '\u2279', - 'Ntilde': '\xd1', - 'ntilde': '\xf1', - 'Ntilde;': '\xd1', - 'ntilde;': '\xf1', - 'ntlg;': '\u2278', - 'ntriangleleft;': '\u22ea', - 'ntrianglelefteq;': '\u22ec', - 'ntriangleright;': '\u22eb', - 'ntrianglerighteq;': '\u22ed', - 'Nu;': '\u039d', - 'nu;': '\u03bd', - 'num;': '#', - 'numero;': '\u2116', - 'numsp;': '\u2007', - 'nvap;': '\u224d\u20d2', - 'nVDash;': '\u22af', - 'nVdash;': '\u22ae', - 'nvDash;': '\u22ad', - 'nvdash;': '\u22ac', - 'nvge;': '\u2265\u20d2', - 'nvgt;': '>\u20d2', - 'nvHarr;': '\u2904', - 'nvinfin;': '\u29de', - 'nvlArr;': '\u2902', - 'nvle;': '\u2264\u20d2', - 'nvlt;': '<\u20d2', - 'nvltrie;': '\u22b4\u20d2', - 'nvrArr;': '\u2903', - 'nvrtrie;': '\u22b5\u20d2', - 'nvsim;': '\u223c\u20d2', - 'nwarhk;': '\u2923', - 'nwArr;': '\u21d6', - 'nwarr;': '\u2196', - 'nwarrow;': '\u2196', - 'nwnear;': '\u2927', - 'Oacute': '\xd3', - 'oacute': '\xf3', - 'Oacute;': '\xd3', - 'oacute;': '\xf3', - 'oast;': '\u229b', - 'ocir;': '\u229a', - 'Ocirc': '\xd4', - 'ocirc': '\xf4', - 'Ocirc;': '\xd4', - 'ocirc;': '\xf4', - 'Ocy;': '\u041e', - 'ocy;': '\u043e', - 'odash;': '\u229d', - 'Odblac;': '\u0150', - 'odblac;': '\u0151', - 'odiv;': '\u2a38', - 'odot;': '\u2299', - 'odsold;': '\u29bc', - 'OElig;': '\u0152', - 'oelig;': '\u0153', - 'ofcir;': '\u29bf', - 'Ofr;': '\U0001d512', - 'ofr;': '\U0001d52c', - 'ogon;': '\u02db', - 'Ograve': '\xd2', - 'ograve': '\xf2', - 'Ograve;': '\xd2', - 'ograve;': '\xf2', - 'ogt;': '\u29c1', - 'ohbar;': '\u29b5', - 'ohm;': '\u03a9', - 'oint;': '\u222e', - 'olarr;': '\u21ba', - 'olcir;': '\u29be', - 'olcross;': '\u29bb', - 'oline;': '\u203e', - 'olt;': '\u29c0', - 'Omacr;': '\u014c', - 'omacr;': '\u014d', - 'Omega;': '\u03a9', - 'omega;': '\u03c9', - 'Omicron;': '\u039f', - 'omicron;': '\u03bf', - 'omid;': '\u29b6', - 'ominus;': '\u2296', - 'Oopf;': '\U0001d546', - 'oopf;': '\U0001d560', - 'opar;': '\u29b7', - 'OpenCurlyDoubleQuote;': '\u201c', - 'OpenCurlyQuote;': '\u2018', - 'operp;': '\u29b9', - 'oplus;': '\u2295', - 'Or;': '\u2a54', - 'or;': '\u2228', - 'orarr;': '\u21bb', - 'ord;': '\u2a5d', - 'order;': '\u2134', - 'orderof;': '\u2134', - 'ordf': '\xaa', - 'ordf;': '\xaa', - 'ordm': '\xba', - 'ordm;': '\xba', - 'origof;': '\u22b6', - 'oror;': '\u2a56', - 'orslope;': '\u2a57', - 'orv;': '\u2a5b', - 'oS;': '\u24c8', - 'Oscr;': '\U0001d4aa', - 'oscr;': '\u2134', - 'Oslash': '\xd8', - 'oslash': '\xf8', - 'Oslash;': '\xd8', - 'oslash;': '\xf8', - 'osol;': '\u2298', - 'Otilde': '\xd5', - 'otilde': '\xf5', - 'Otilde;': '\xd5', - 'otilde;': '\xf5', - 'Otimes;': '\u2a37', - 'otimes;': '\u2297', - 'otimesas;': '\u2a36', - 'Ouml': '\xd6', - 'ouml': '\xf6', - 'Ouml;': '\xd6', - 'ouml;': '\xf6', - 'ovbar;': '\u233d', - 'OverBar;': '\u203e', - 'OverBrace;': '\u23de', - 'OverBracket;': '\u23b4', - 'OverParenthesis;': '\u23dc', - 'par;': '\u2225', - 'para': '\xb6', - 'para;': '\xb6', - 'parallel;': '\u2225', - 'parsim;': '\u2af3', - 'parsl;': '\u2afd', - 'part;': '\u2202', - 'PartialD;': '\u2202', - 'Pcy;': '\u041f', - 'pcy;': '\u043f', - 'percnt;': '%', - 'period;': '.', - 'permil;': '\u2030', - 'perp;': '\u22a5', - 'pertenk;': '\u2031', - 'Pfr;': '\U0001d513', - 'pfr;': '\U0001d52d', - 'Phi;': '\u03a6', - 'phi;': '\u03c6', - 'phiv;': '\u03d5', - 'phmmat;': '\u2133', - 'phone;': '\u260e', - 'Pi;': '\u03a0', - 'pi;': '\u03c0', - 'pitchfork;': '\u22d4', - 'piv;': '\u03d6', - 'planck;': '\u210f', - 'planckh;': '\u210e', - 'plankv;': '\u210f', - 'plus;': '+', - 'plusacir;': '\u2a23', - 'plusb;': '\u229e', - 'pluscir;': '\u2a22', - 'plusdo;': '\u2214', - 'plusdu;': '\u2a25', - 'pluse;': '\u2a72', - 'PlusMinus;': '\xb1', - 'plusmn': '\xb1', - 'plusmn;': '\xb1', - 'plussim;': '\u2a26', - 'plustwo;': '\u2a27', - 'pm;': '\xb1', - 'Poincareplane;': '\u210c', - 'pointint;': '\u2a15', - 'Popf;': '\u2119', - 'popf;': '\U0001d561', - 'pound': '\xa3', - 'pound;': '\xa3', - 'Pr;': '\u2abb', - 'pr;': '\u227a', - 'prap;': '\u2ab7', - 'prcue;': '\u227c', - 'prE;': '\u2ab3', - 'pre;': '\u2aaf', - 'prec;': '\u227a', - 'precapprox;': '\u2ab7', - 'preccurlyeq;': '\u227c', - 'Precedes;': '\u227a', - 'PrecedesEqual;': '\u2aaf', - 'PrecedesSlantEqual;': '\u227c', - 'PrecedesTilde;': '\u227e', - 'preceq;': '\u2aaf', - 'precnapprox;': '\u2ab9', - 'precneqq;': '\u2ab5', - 'precnsim;': '\u22e8', - 'precsim;': '\u227e', - 'Prime;': '\u2033', - 'prime;': '\u2032', - 'primes;': '\u2119', - 'prnap;': '\u2ab9', - 'prnE;': '\u2ab5', - 'prnsim;': '\u22e8', - 'prod;': '\u220f', - 'Product;': '\u220f', - 'profalar;': '\u232e', - 'profline;': '\u2312', - 'profsurf;': '\u2313', - 'prop;': '\u221d', - 'Proportion;': '\u2237', - 'Proportional;': '\u221d', - 'propto;': '\u221d', - 'prsim;': '\u227e', - 'prurel;': '\u22b0', - 'Pscr;': '\U0001d4ab', - 'pscr;': '\U0001d4c5', - 'Psi;': '\u03a8', - 'psi;': '\u03c8', - 'puncsp;': '\u2008', - 'Qfr;': '\U0001d514', - 'qfr;': '\U0001d52e', - 'qint;': '\u2a0c', - 'Qopf;': '\u211a', - 'qopf;': '\U0001d562', - 'qprime;': '\u2057', - 'Qscr;': '\U0001d4ac', - 'qscr;': '\U0001d4c6', - 'quaternions;': '\u210d', - 'quatint;': '\u2a16', - 'quest;': '?', - 'questeq;': '\u225f', - 'QUOT': '"', - 'quot': '"', - 'QUOT;': '"', - 'quot;': '"', - 'rAarr;': '\u21db', - 'race;': '\u223d\u0331', - 'Racute;': '\u0154', - 'racute;': '\u0155', - 'radic;': '\u221a', - 'raemptyv;': '\u29b3', - 'Rang;': '\u27eb', - 'rang;': '\u27e9', - 'rangd;': '\u2992', - 'range;': '\u29a5', - 'rangle;': '\u27e9', - 'raquo': '\xbb', - 'raquo;': '\xbb', - 'Rarr;': '\u21a0', - 'rArr;': '\u21d2', - 'rarr;': '\u2192', - 'rarrap;': '\u2975', - 'rarrb;': '\u21e5', - 'rarrbfs;': '\u2920', - 'rarrc;': '\u2933', - 'rarrfs;': '\u291e', - 'rarrhk;': '\u21aa', - 'rarrlp;': '\u21ac', - 'rarrpl;': '\u2945', - 'rarrsim;': '\u2974', - 'Rarrtl;': '\u2916', - 'rarrtl;': '\u21a3', - 'rarrw;': '\u219d', - 'rAtail;': '\u291c', - 'ratail;': '\u291a', - 'ratio;': '\u2236', - 'rationals;': '\u211a', - 'RBarr;': '\u2910', - 'rBarr;': '\u290f', - 'rbarr;': '\u290d', - 'rbbrk;': '\u2773', - 'rbrace;': '}', - 'rbrack;': ']', - 'rbrke;': '\u298c', - 'rbrksld;': '\u298e', - 'rbrkslu;': '\u2990', - 'Rcaron;': '\u0158', - 'rcaron;': '\u0159', - 'Rcedil;': '\u0156', - 'rcedil;': '\u0157', - 'rceil;': '\u2309', - 'rcub;': '}', - 'Rcy;': '\u0420', - 'rcy;': '\u0440', - 'rdca;': '\u2937', - 'rdldhar;': '\u2969', - 'rdquo;': '\u201d', - 'rdquor;': '\u201d', - 'rdsh;': '\u21b3', - 'Re;': '\u211c', - 'real;': '\u211c', - 'realine;': '\u211b', - 'realpart;': '\u211c', - 'reals;': '\u211d', - 'rect;': '\u25ad', - 'REG': '\xae', - 'reg': '\xae', - 'REG;': '\xae', - 'reg;': '\xae', - 'ReverseElement;': '\u220b', - 'ReverseEquilibrium;': '\u21cb', - 'ReverseUpEquilibrium;': '\u296f', - 'rfisht;': '\u297d', - 'rfloor;': '\u230b', - 'Rfr;': '\u211c', - 'rfr;': '\U0001d52f', - 'rHar;': '\u2964', - 'rhard;': '\u21c1', - 'rharu;': '\u21c0', - 'rharul;': '\u296c', - 'Rho;': '\u03a1', - 'rho;': '\u03c1', - 'rhov;': '\u03f1', - 'RightAngleBracket;': '\u27e9', - 'RightArrow;': '\u2192', - 'Rightarrow;': '\u21d2', - 'rightarrow;': '\u2192', - 'RightArrowBar;': '\u21e5', - 'RightArrowLeftArrow;': '\u21c4', - 'rightarrowtail;': '\u21a3', - 'RightCeiling;': '\u2309', - 'RightDoubleBracket;': '\u27e7', - 'RightDownTeeVector;': '\u295d', - 'RightDownVector;': '\u21c2', - 'RightDownVectorBar;': '\u2955', - 'RightFloor;': '\u230b', - 'rightharpoondown;': '\u21c1', - 'rightharpoonup;': '\u21c0', - 'rightleftarrows;': '\u21c4', - 'rightleftharpoons;': '\u21cc', - 'rightrightarrows;': '\u21c9', - 'rightsquigarrow;': '\u219d', - 'RightTee;': '\u22a2', - 'RightTeeArrow;': '\u21a6', - 'RightTeeVector;': '\u295b', - 'rightthreetimes;': '\u22cc', - 'RightTriangle;': '\u22b3', - 'RightTriangleBar;': '\u29d0', - 'RightTriangleEqual;': '\u22b5', - 'RightUpDownVector;': '\u294f', - 'RightUpTeeVector;': '\u295c', - 'RightUpVector;': '\u21be', - 'RightUpVectorBar;': '\u2954', - 'RightVector;': '\u21c0', - 'RightVectorBar;': '\u2953', - 'ring;': '\u02da', - 'risingdotseq;': '\u2253', - 'rlarr;': '\u21c4', - 'rlhar;': '\u21cc', - 'rlm;': '\u200f', - 'rmoust;': '\u23b1', - 'rmoustache;': '\u23b1', - 'rnmid;': '\u2aee', - 'roang;': '\u27ed', - 'roarr;': '\u21fe', - 'robrk;': '\u27e7', - 'ropar;': '\u2986', - 'Ropf;': '\u211d', - 'ropf;': '\U0001d563', - 'roplus;': '\u2a2e', - 'rotimes;': '\u2a35', - 'RoundImplies;': '\u2970', - 'rpar;': ')', - 'rpargt;': '\u2994', - 'rppolint;': '\u2a12', - 'rrarr;': '\u21c9', - 'Rrightarrow;': '\u21db', - 'rsaquo;': '\u203a', - 'Rscr;': '\u211b', - 'rscr;': '\U0001d4c7', - 'Rsh;': '\u21b1', - 'rsh;': '\u21b1', - 'rsqb;': ']', - 'rsquo;': '\u2019', - 'rsquor;': '\u2019', - 'rthree;': '\u22cc', - 'rtimes;': '\u22ca', - 'rtri;': '\u25b9', - 'rtrie;': '\u22b5', - 'rtrif;': '\u25b8', - 'rtriltri;': '\u29ce', - 'RuleDelayed;': '\u29f4', - 'ruluhar;': '\u2968', - 'rx;': '\u211e', - 'Sacute;': '\u015a', - 'sacute;': '\u015b', - 'sbquo;': '\u201a', - 'Sc;': '\u2abc', - 'sc;': '\u227b', - 'scap;': '\u2ab8', - 'Scaron;': '\u0160', - 'scaron;': '\u0161', - 'sccue;': '\u227d', - 'scE;': '\u2ab4', - 'sce;': '\u2ab0', - 'Scedil;': '\u015e', - 'scedil;': '\u015f', - 'Scirc;': '\u015c', - 'scirc;': '\u015d', - 'scnap;': '\u2aba', - 'scnE;': '\u2ab6', - 'scnsim;': '\u22e9', - 'scpolint;': '\u2a13', - 'scsim;': '\u227f', - 'Scy;': '\u0421', - 'scy;': '\u0441', - 'sdot;': '\u22c5', - 'sdotb;': '\u22a1', - 'sdote;': '\u2a66', - 'searhk;': '\u2925', - 'seArr;': '\u21d8', - 'searr;': '\u2198', - 'searrow;': '\u2198', - 'sect': '\xa7', - 'sect;': '\xa7', - 'semi;': ';', - 'seswar;': '\u2929', - 'setminus;': '\u2216', - 'setmn;': '\u2216', - 'sext;': '\u2736', - 'Sfr;': '\U0001d516', - 'sfr;': '\U0001d530', - 'sfrown;': '\u2322', - 'sharp;': '\u266f', - 'SHCHcy;': '\u0429', - 'shchcy;': '\u0449', - 'SHcy;': '\u0428', - 'shcy;': '\u0448', - 'ShortDownArrow;': '\u2193', - 'ShortLeftArrow;': '\u2190', - 'shortmid;': '\u2223', - 'shortparallel;': '\u2225', - 'ShortRightArrow;': '\u2192', - 'ShortUpArrow;': '\u2191', - 'shy': '\xad', - 'shy;': '\xad', - 'Sigma;': '\u03a3', - 'sigma;': '\u03c3', - 'sigmaf;': '\u03c2', - 'sigmav;': '\u03c2', - 'sim;': '\u223c', - 'simdot;': '\u2a6a', - 'sime;': '\u2243', - 'simeq;': '\u2243', - 'simg;': '\u2a9e', - 'simgE;': '\u2aa0', - 'siml;': '\u2a9d', - 'simlE;': '\u2a9f', - 'simne;': '\u2246', - 'simplus;': '\u2a24', - 'simrarr;': '\u2972', - 'slarr;': '\u2190', - 'SmallCircle;': '\u2218', - 'smallsetminus;': '\u2216', - 'smashp;': '\u2a33', - 'smeparsl;': '\u29e4', - 'smid;': '\u2223', - 'smile;': '\u2323', - 'smt;': '\u2aaa', - 'smte;': '\u2aac', - 'smtes;': '\u2aac\ufe00', - 'SOFTcy;': '\u042c', - 'softcy;': '\u044c', - 'sol;': '/', - 'solb;': '\u29c4', - 'solbar;': '\u233f', - 'Sopf;': '\U0001d54a', - 'sopf;': '\U0001d564', - 'spades;': '\u2660', - 'spadesuit;': '\u2660', - 'spar;': '\u2225', - 'sqcap;': '\u2293', - 'sqcaps;': '\u2293\ufe00', - 'sqcup;': '\u2294', - 'sqcups;': '\u2294\ufe00', - 'Sqrt;': '\u221a', - 'sqsub;': '\u228f', - 'sqsube;': '\u2291', - 'sqsubset;': '\u228f', - 'sqsubseteq;': '\u2291', - 'sqsup;': '\u2290', - 'sqsupe;': '\u2292', - 'sqsupset;': '\u2290', - 'sqsupseteq;': '\u2292', - 'squ;': '\u25a1', - 'Square;': '\u25a1', - 'square;': '\u25a1', - 'SquareIntersection;': '\u2293', - 'SquareSubset;': '\u228f', - 'SquareSubsetEqual;': '\u2291', - 'SquareSuperset;': '\u2290', - 'SquareSupersetEqual;': '\u2292', - 'SquareUnion;': '\u2294', - 'squarf;': '\u25aa', - 'squf;': '\u25aa', - 'srarr;': '\u2192', - 'Sscr;': '\U0001d4ae', - 'sscr;': '\U0001d4c8', - 'ssetmn;': '\u2216', - 'ssmile;': '\u2323', - 'sstarf;': '\u22c6', - 'Star;': '\u22c6', - 'star;': '\u2606', - 'starf;': '\u2605', - 'straightepsilon;': '\u03f5', - 'straightphi;': '\u03d5', - 'strns;': '\xaf', - 'Sub;': '\u22d0', - 'sub;': '\u2282', - 'subdot;': '\u2abd', - 'subE;': '\u2ac5', - 'sube;': '\u2286', - 'subedot;': '\u2ac3', - 'submult;': '\u2ac1', - 'subnE;': '\u2acb', - 'subne;': '\u228a', - 'subplus;': '\u2abf', - 'subrarr;': '\u2979', - 'Subset;': '\u22d0', - 'subset;': '\u2282', - 'subseteq;': '\u2286', - 'subseteqq;': '\u2ac5', - 'SubsetEqual;': '\u2286', - 'subsetneq;': '\u228a', - 'subsetneqq;': '\u2acb', - 'subsim;': '\u2ac7', - 'subsub;': '\u2ad5', - 'subsup;': '\u2ad3', - 'succ;': '\u227b', - 'succapprox;': '\u2ab8', - 'succcurlyeq;': '\u227d', - 'Succeeds;': '\u227b', - 'SucceedsEqual;': '\u2ab0', - 'SucceedsSlantEqual;': '\u227d', - 'SucceedsTilde;': '\u227f', - 'succeq;': '\u2ab0', - 'succnapprox;': '\u2aba', - 'succneqq;': '\u2ab6', - 'succnsim;': '\u22e9', - 'succsim;': '\u227f', - 'SuchThat;': '\u220b', - 'Sum;': '\u2211', - 'sum;': '\u2211', - 'sung;': '\u266a', - 'sup1': '\xb9', - 'sup1;': '\xb9', - 'sup2': '\xb2', - 'sup2;': '\xb2', - 'sup3': '\xb3', - 'sup3;': '\xb3', - 'Sup;': '\u22d1', - 'sup;': '\u2283', - 'supdot;': '\u2abe', - 'supdsub;': '\u2ad8', - 'supE;': '\u2ac6', - 'supe;': '\u2287', - 'supedot;': '\u2ac4', - 'Superset;': '\u2283', - 'SupersetEqual;': '\u2287', - 'suphsol;': '\u27c9', - 'suphsub;': '\u2ad7', - 'suplarr;': '\u297b', - 'supmult;': '\u2ac2', - 'supnE;': '\u2acc', - 'supne;': '\u228b', - 'supplus;': '\u2ac0', - 'Supset;': '\u22d1', - 'supset;': '\u2283', - 'supseteq;': '\u2287', - 'supseteqq;': '\u2ac6', - 'supsetneq;': '\u228b', - 'supsetneqq;': '\u2acc', - 'supsim;': '\u2ac8', - 'supsub;': '\u2ad4', - 'supsup;': '\u2ad6', - 'swarhk;': '\u2926', - 'swArr;': '\u21d9', - 'swarr;': '\u2199', - 'swarrow;': '\u2199', - 'swnwar;': '\u292a', - 'szlig': '\xdf', - 'szlig;': '\xdf', - 'Tab;': '\t', - 'target;': '\u2316', - 'Tau;': '\u03a4', - 'tau;': '\u03c4', - 'tbrk;': '\u23b4', - 'Tcaron;': '\u0164', - 'tcaron;': '\u0165', - 'Tcedil;': '\u0162', - 'tcedil;': '\u0163', - 'Tcy;': '\u0422', - 'tcy;': '\u0442', - 'tdot;': '\u20db', - 'telrec;': '\u2315', - 'Tfr;': '\U0001d517', - 'tfr;': '\U0001d531', - 'there4;': '\u2234', - 'Therefore;': '\u2234', - 'therefore;': '\u2234', - 'Theta;': '\u0398', - 'theta;': '\u03b8', - 'thetasym;': '\u03d1', - 'thetav;': '\u03d1', - 'thickapprox;': '\u2248', - 'thicksim;': '\u223c', - 'ThickSpace;': '\u205f\u200a', - 'thinsp;': '\u2009', - 'ThinSpace;': '\u2009', - 'thkap;': '\u2248', - 'thksim;': '\u223c', - 'THORN': '\xde', - 'thorn': '\xfe', - 'THORN;': '\xde', - 'thorn;': '\xfe', - 'Tilde;': '\u223c', - 'tilde;': '\u02dc', - 'TildeEqual;': '\u2243', - 'TildeFullEqual;': '\u2245', - 'TildeTilde;': '\u2248', - 'times': '\xd7', - 'times;': '\xd7', - 'timesb;': '\u22a0', - 'timesbar;': '\u2a31', - 'timesd;': '\u2a30', - 'tint;': '\u222d', - 'toea;': '\u2928', - 'top;': '\u22a4', - 'topbot;': '\u2336', - 'topcir;': '\u2af1', - 'Topf;': '\U0001d54b', - 'topf;': '\U0001d565', - 'topfork;': '\u2ada', - 'tosa;': '\u2929', - 'tprime;': '\u2034', - 'TRADE;': '\u2122', - 'trade;': '\u2122', - 'triangle;': '\u25b5', - 'triangledown;': '\u25bf', - 'triangleleft;': '\u25c3', - 'trianglelefteq;': '\u22b4', - 'triangleq;': '\u225c', - 'triangleright;': '\u25b9', - 'trianglerighteq;': '\u22b5', - 'tridot;': '\u25ec', - 'trie;': '\u225c', - 'triminus;': '\u2a3a', - 'TripleDot;': '\u20db', - 'triplus;': '\u2a39', - 'trisb;': '\u29cd', - 'tritime;': '\u2a3b', - 'trpezium;': '\u23e2', - 'Tscr;': '\U0001d4af', - 'tscr;': '\U0001d4c9', - 'TScy;': '\u0426', - 'tscy;': '\u0446', - 'TSHcy;': '\u040b', - 'tshcy;': '\u045b', - 'Tstrok;': '\u0166', - 'tstrok;': '\u0167', - 'twixt;': '\u226c', - 'twoheadleftarrow;': '\u219e', - 'twoheadrightarrow;': '\u21a0', - 'Uacute': '\xda', - 'uacute': '\xfa', - 'Uacute;': '\xda', - 'uacute;': '\xfa', - 'Uarr;': '\u219f', - 'uArr;': '\u21d1', - 'uarr;': '\u2191', - 'Uarrocir;': '\u2949', - 'Ubrcy;': '\u040e', - 'ubrcy;': '\u045e', - 'Ubreve;': '\u016c', - 'ubreve;': '\u016d', - 'Ucirc': '\xdb', - 'ucirc': '\xfb', - 'Ucirc;': '\xdb', - 'ucirc;': '\xfb', - 'Ucy;': '\u0423', - 'ucy;': '\u0443', - 'udarr;': '\u21c5', - 'Udblac;': '\u0170', - 'udblac;': '\u0171', - 'udhar;': '\u296e', - 'ufisht;': '\u297e', - 'Ufr;': '\U0001d518', - 'ufr;': '\U0001d532', - 'Ugrave': '\xd9', - 'ugrave': '\xf9', - 'Ugrave;': '\xd9', - 'ugrave;': '\xf9', - 'uHar;': '\u2963', - 'uharl;': '\u21bf', - 'uharr;': '\u21be', - 'uhblk;': '\u2580', - 'ulcorn;': '\u231c', - 'ulcorner;': '\u231c', - 'ulcrop;': '\u230f', - 'ultri;': '\u25f8', - 'Umacr;': '\u016a', - 'umacr;': '\u016b', - 'uml': '\xa8', - 'uml;': '\xa8', - 'UnderBar;': '_', - 'UnderBrace;': '\u23df', - 'UnderBracket;': '\u23b5', - 'UnderParenthesis;': '\u23dd', - 'Union;': '\u22c3', - 'UnionPlus;': '\u228e', - 'Uogon;': '\u0172', - 'uogon;': '\u0173', - 'Uopf;': '\U0001d54c', - 'uopf;': '\U0001d566', - 'UpArrow;': '\u2191', - 'Uparrow;': '\u21d1', - 'uparrow;': '\u2191', - 'UpArrowBar;': '\u2912', - 'UpArrowDownArrow;': '\u21c5', - 'UpDownArrow;': '\u2195', - 'Updownarrow;': '\u21d5', - 'updownarrow;': '\u2195', - 'UpEquilibrium;': '\u296e', - 'upharpoonleft;': '\u21bf', - 'upharpoonright;': '\u21be', - 'uplus;': '\u228e', - 'UpperLeftArrow;': '\u2196', - 'UpperRightArrow;': '\u2197', - 'Upsi;': '\u03d2', - 'upsi;': '\u03c5', - 'upsih;': '\u03d2', - 'Upsilon;': '\u03a5', - 'upsilon;': '\u03c5', - 'UpTee;': '\u22a5', - 'UpTeeArrow;': '\u21a5', - 'upuparrows;': '\u21c8', - 'urcorn;': '\u231d', - 'urcorner;': '\u231d', - 'urcrop;': '\u230e', - 'Uring;': '\u016e', - 'uring;': '\u016f', - 'urtri;': '\u25f9', - 'Uscr;': '\U0001d4b0', - 'uscr;': '\U0001d4ca', - 'utdot;': '\u22f0', - 'Utilde;': '\u0168', - 'utilde;': '\u0169', - 'utri;': '\u25b5', - 'utrif;': '\u25b4', - 'uuarr;': '\u21c8', - 'Uuml': '\xdc', - 'uuml': '\xfc', - 'Uuml;': '\xdc', - 'uuml;': '\xfc', - 'uwangle;': '\u29a7', - 'vangrt;': '\u299c', - 'varepsilon;': '\u03f5', - 'varkappa;': '\u03f0', - 'varnothing;': '\u2205', - 'varphi;': '\u03d5', - 'varpi;': '\u03d6', - 'varpropto;': '\u221d', - 'vArr;': '\u21d5', - 'varr;': '\u2195', - 'varrho;': '\u03f1', - 'varsigma;': '\u03c2', - 'varsubsetneq;': '\u228a\ufe00', - 'varsubsetneqq;': '\u2acb\ufe00', - 'varsupsetneq;': '\u228b\ufe00', - 'varsupsetneqq;': '\u2acc\ufe00', - 'vartheta;': '\u03d1', - 'vartriangleleft;': '\u22b2', - 'vartriangleright;': '\u22b3', - 'Vbar;': '\u2aeb', - 'vBar;': '\u2ae8', - 'vBarv;': '\u2ae9', - 'Vcy;': '\u0412', - 'vcy;': '\u0432', - 'VDash;': '\u22ab', - 'Vdash;': '\u22a9', - 'vDash;': '\u22a8', - 'vdash;': '\u22a2', - 'Vdashl;': '\u2ae6', - 'Vee;': '\u22c1', - 'vee;': '\u2228', - 'veebar;': '\u22bb', - 'veeeq;': '\u225a', - 'vellip;': '\u22ee', - 'Verbar;': '\u2016', - 'verbar;': '|', - 'Vert;': '\u2016', - 'vert;': '|', - 'VerticalBar;': '\u2223', - 'VerticalLine;': '|', - 'VerticalSeparator;': '\u2758', - 'VerticalTilde;': '\u2240', - 'VeryThinSpace;': '\u200a', - 'Vfr;': '\U0001d519', - 'vfr;': '\U0001d533', - 'vltri;': '\u22b2', - 'vnsub;': '\u2282\u20d2', - 'vnsup;': '\u2283\u20d2', - 'Vopf;': '\U0001d54d', - 'vopf;': '\U0001d567', - 'vprop;': '\u221d', - 'vrtri;': '\u22b3', - 'Vscr;': '\U0001d4b1', - 'vscr;': '\U0001d4cb', - 'vsubnE;': '\u2acb\ufe00', - 'vsubne;': '\u228a\ufe00', - 'vsupnE;': '\u2acc\ufe00', - 'vsupne;': '\u228b\ufe00', - 'Vvdash;': '\u22aa', - 'vzigzag;': '\u299a', - 'Wcirc;': '\u0174', - 'wcirc;': '\u0175', - 'wedbar;': '\u2a5f', - 'Wedge;': '\u22c0', - 'wedge;': '\u2227', - 'wedgeq;': '\u2259', - 'weierp;': '\u2118', - 'Wfr;': '\U0001d51a', - 'wfr;': '\U0001d534', - 'Wopf;': '\U0001d54e', - 'wopf;': '\U0001d568', - 'wp;': '\u2118', - 'wr;': '\u2240', - 'wreath;': '\u2240', - 'Wscr;': '\U0001d4b2', - 'wscr;': '\U0001d4cc', - 'xcap;': '\u22c2', - 'xcirc;': '\u25ef', - 'xcup;': '\u22c3', - 'xdtri;': '\u25bd', - 'Xfr;': '\U0001d51b', - 'xfr;': '\U0001d535', - 'xhArr;': '\u27fa', - 'xharr;': '\u27f7', - 'Xi;': '\u039e', - 'xi;': '\u03be', - 'xlArr;': '\u27f8', - 'xlarr;': '\u27f5', - 'xmap;': '\u27fc', - 'xnis;': '\u22fb', - 'xodot;': '\u2a00', - 'Xopf;': '\U0001d54f', - 'xopf;': '\U0001d569', - 'xoplus;': '\u2a01', - 'xotime;': '\u2a02', - 'xrArr;': '\u27f9', - 'xrarr;': '\u27f6', - 'Xscr;': '\U0001d4b3', - 'xscr;': '\U0001d4cd', - 'xsqcup;': '\u2a06', - 'xuplus;': '\u2a04', - 'xutri;': '\u25b3', - 'xvee;': '\u22c1', - 'xwedge;': '\u22c0', - 'Yacute': '\xdd', - 'yacute': '\xfd', - 'Yacute;': '\xdd', - 'yacute;': '\xfd', - 'YAcy;': '\u042f', - 'yacy;': '\u044f', - 'Ycirc;': '\u0176', - 'ycirc;': '\u0177', - 'Ycy;': '\u042b', - 'ycy;': '\u044b', - 'yen': '\xa5', - 'yen;': '\xa5', - 'Yfr;': '\U0001d51c', - 'yfr;': '\U0001d536', - 'YIcy;': '\u0407', - 'yicy;': '\u0457', - 'Yopf;': '\U0001d550', - 'yopf;': '\U0001d56a', - 'Yscr;': '\U0001d4b4', - 'yscr;': '\U0001d4ce', - 'YUcy;': '\u042e', - 'yucy;': '\u044e', - 'yuml': '\xff', - 'Yuml;': '\u0178', - 'yuml;': '\xff', - 'Zacute;': '\u0179', - 'zacute;': '\u017a', - 'Zcaron;': '\u017d', - 'zcaron;': '\u017e', - 'Zcy;': '\u0417', - 'zcy;': '\u0437', - 'Zdot;': '\u017b', - 'zdot;': '\u017c', - 'zeetrf;': '\u2128', - 'ZeroWidthSpace;': '\u200b', - 'Zeta;': '\u0396', - 'zeta;': '\u03b6', - 'Zfr;': '\u2128', - 'zfr;': '\U0001d537', - 'ZHcy;': '\u0416', - 'zhcy;': '\u0436', - 'zigrarr;': '\u21dd', - 'Zopf;': '\u2124', - 'zopf;': '\U0001d56b', - 'Zscr;': '\U0001d4b5', - 'zscr;': '\U0001d4cf', - 'zwj;': '\u200d', - 'zwnj;': '\u200c', - } - -try: - import http.client as compat_http_client -except ImportError: # Python 2 - import httplib as compat_http_client - -try: - from urllib.error import HTTPError as compat_HTTPError -except ImportError: # Python 2 - from urllib2 import HTTPError as compat_HTTPError - -try: - from urllib.request import urlretrieve as compat_urlretrieve -except ImportError: # Python 2 - from urllib import urlretrieve as compat_urlretrieve - -try: - from html.parser import HTMLParser as compat_HTMLParser -except ImportError: # Python 2 - from HTMLParser import HTMLParser as compat_HTMLParser - -try: # Python 2 - from HTMLParser import HTMLParseError as compat_HTMLParseError -except ImportError: # Python <3.4 - try: - from html.parser import HTMLParseError as compat_HTMLParseError - except ImportError: # Python >3.4 - - # HTMLParseError has been deprecated in Python 3.3 and removed in - # Python 3.5. Introducing dummy exception for Python >3.5 for compatible - # and uniform cross-version exceptiong handling - class compat_HTMLParseError(Exception): - pass - -try: - from subprocess import DEVNULL - compat_subprocess_get_DEVNULL = lambda: DEVNULL -except ImportError: - compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w') - -try: - import http.server as compat_http_server -except ImportError: - import BaseHTTPServer as compat_http_server - -try: - compat_str = unicode # Python 2 -except NameError: - compat_str = str - -try: - from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes - from urllib.parse import unquote as compat_urllib_parse_unquote - from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus -except ImportError: # Python 2 - _asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire') - else re.compile(r'([\x00-\x7f]+)')) - - # HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus - # implementations from cpython 3.4.3's stdlib. Python 2's version - # is apparently broken (see https://github.com/rg3/youtube-dl/pull/6244) - - def compat_urllib_parse_unquote_to_bytes(string): - """unquote_to_bytes('abc%20def') -> b'abc def'.""" - # Note: strings are encoded as UTF-8. This is only an issue if it contains - # unescaped non-ASCII characters, which URIs should not. - if not string: - # Is it a string-like object? - string.split - return b'' - if isinstance(string, compat_str): - string = string.encode('utf-8') - bits = string.split(b'%') - if len(bits) == 1: - return string - res = [bits[0]] - append = res.append - for item in bits[1:]: - try: - append(compat_urllib_parse._hextochr[item[:2]]) - append(item[2:]) - except KeyError: - append(b'%') - append(item) - return b''.join(res) - - def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'): - """Replace %xx escapes by their single-character equivalent. The optional - encoding and errors parameters specify how to decode percent-encoded - sequences into Unicode characters, as accepted by the bytes.decode() - method. - By default, percent-encoded sequences are decoded with UTF-8, and invalid - sequences are replaced by a placeholder character. - - unquote('abc%20def') -> 'abc def'. - """ - if '%' not in string: - string.split - return string - if encoding is None: - encoding = 'utf-8' - if errors is None: - errors = 'replace' - bits = _asciire.split(string) - res = [bits[0]] - append = res.append - for i in range(1, len(bits), 2): - append(compat_urllib_parse_unquote_to_bytes(bits[i]).decode(encoding, errors)) - append(bits[i + 1]) - return ''.join(res) - - def compat_urllib_parse_unquote_plus(string, encoding='utf-8', errors='replace'): - """Like unquote(), but also replace plus signs by spaces, as required for - unquoting HTML form values. - - unquote_plus('%7e/abc+def') -> '~/abc def' - """ - string = string.replace('+', ' ') - return compat_urllib_parse_unquote(string, encoding, errors) - -try: - from urllib.parse import urlencode as compat_urllib_parse_urlencode -except ImportError: # Python 2 - # Python 2 will choke in urlencode on mixture of byte and unicode strings. - # Possible solutions are to either port it from python 3 with all - # the friends or manually ensure input query contains only byte strings. - # We will stick with latter thus recursively encoding the whole query. - def compat_urllib_parse_urlencode(query, doseq=0, encoding='utf-8'): - def encode_elem(e): - if isinstance(e, dict): - e = encode_dict(e) - elif isinstance(e, (list, tuple,)): - list_e = encode_list(e) - e = tuple(list_e) if isinstance(e, tuple) else list_e - elif isinstance(e, compat_str): - e = e.encode(encoding) - return e - - def encode_dict(d): - return dict((encode_elem(k), encode_elem(v)) for k, v in d.items()) - - def encode_list(l): - return [encode_elem(e) for e in l] - - return compat_urllib_parse.urlencode(encode_elem(query), doseq=doseq) - -try: - from urllib.request import DataHandler as compat_urllib_request_DataHandler -except ImportError: # Python < 3.4 - # Ported from CPython 98774:1733b3bd46db, Lib/urllib/request.py - class compat_urllib_request_DataHandler(compat_urllib_request.BaseHandler): - def data_open(self, req): - # data URLs as specified in RFC 2397. - # - # ignores POSTed data - # - # syntax: - # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data - # mediatype := [ type "/" subtype ] *( ";" parameter ) - # data := *urlchar - # parameter := attribute "=" value - url = req.get_full_url() - - scheme, data = url.split(':', 1) - mediatype, data = data.split(',', 1) - - # even base64 encoded data URLs might be quoted so unquote in any case: - data = compat_urllib_parse_unquote_to_bytes(data) - if mediatype.endswith(';base64'): - data = binascii.a2b_base64(data) - mediatype = mediatype[:-7] - - if not mediatype: - mediatype = 'text/plain;charset=US-ASCII' - - headers = email.message_from_string( - 'Content-type: %s\nContent-length: %d\n' % (mediatype, len(data))) - - return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url) - -try: - compat_basestring = basestring # Python 2 -except NameError: - compat_basestring = str - -try: - compat_chr = unichr # Python 2 -except NameError: - compat_chr = chr - -try: - from xml.etree.ElementTree import ParseError as compat_xml_parse_error -except ImportError: # Python 2.6 - from xml.parsers.expat import ExpatError as compat_xml_parse_error - - -etree = xml.etree.ElementTree - - -class _TreeBuilder(etree.TreeBuilder): - def doctype(self, name, pubid, system): - pass - - -if sys.version_info[0] >= 3: - def compat_etree_fromstring(text): - return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder())) -else: - # python 2.x tries to encode unicode strings with ascii (see the - # XMLParser._fixtext method) - try: - _etree_iter = etree.Element.iter - except AttributeError: # Python <=2.6 - def _etree_iter(root): - for el in root.findall('*'): - yield el - for sub in _etree_iter(el): - yield sub - - # on 2.6 XML doesn't have a parser argument, function copied from CPython - # 2.7 source - def _XML(text, parser=None): - if not parser: - parser = etree.XMLParser(target=_TreeBuilder()) - parser.feed(text) - return parser.close() - - def _element_factory(*args, **kwargs): - el = etree.Element(*args, **kwargs) - for k, v in el.items(): - if isinstance(v, bytes): - el.set(k, v.decode('utf-8')) - return el - - def compat_etree_fromstring(text): - doc = _XML(text, parser=etree.XMLParser(target=_TreeBuilder(element_factory=_element_factory))) - for el in _etree_iter(doc): - if el.text is not None and isinstance(el.text, bytes): - el.text = el.text.decode('utf-8') - return doc - -if hasattr(etree, 'register_namespace'): - compat_etree_register_namespace = etree.register_namespace -else: - def compat_etree_register_namespace(prefix, uri): - """Register a namespace prefix. - The registry is global, and any existing mapping for either the - given prefix or the namespace URI will be removed. - *prefix* is the namespace prefix, *uri* is a namespace uri. Tags and - attributes in this namespace will be serialized with prefix if possible. - ValueError is raised if prefix is reserved or is invalid. - """ - if re.match(r"ns\d+$", prefix): - raise ValueError("Prefix format reserved for internal use") - for k, v in list(etree._namespace_map.items()): - if k == uri or v == prefix: - del etree._namespace_map[k] - etree._namespace_map[uri] = prefix - -if sys.version_info < (2, 7): - # Here comes the crazy part: In 2.6, if the xpath is a unicode, - # .//node does not match if a node is a direct child of . ! - def compat_xpath(xpath): - if isinstance(xpath, compat_str): - xpath = xpath.encode('ascii') - return xpath -else: - compat_xpath = lambda xpath: xpath - -try: - from urllib.parse import parse_qs as compat_parse_qs -except ImportError: # Python 2 - # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib. - # Python 2's version is apparently totally broken - - def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False, - encoding='utf-8', errors='replace'): - qs, _coerce_result = qs, compat_str - pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] - r = [] - for name_value in pairs: - if not name_value and not strict_parsing: - continue - nv = name_value.split('=', 1) - if len(nv) != 2: - if strict_parsing: - raise ValueError('bad query field: %r' % (name_value,)) - # Handle case of a control-name with no equal sign - if keep_blank_values: - nv.append('') - else: - continue - if len(nv[1]) or keep_blank_values: - name = nv[0].replace('+', ' ') - name = compat_urllib_parse_unquote( - name, encoding=encoding, errors=errors) - name = _coerce_result(name) - value = nv[1].replace('+', ' ') - value = compat_urllib_parse_unquote( - value, encoding=encoding, errors=errors) - value = _coerce_result(value) - r.append((name, value)) - return r - - def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False, - encoding='utf-8', errors='replace'): - parsed_result = {} - pairs = _parse_qsl(qs, keep_blank_values, strict_parsing, - encoding=encoding, errors=errors) - for name, value in pairs: - if name in parsed_result: - parsed_result[name].append(value) - else: - parsed_result[name] = [value] - return parsed_result - - -compat_os_name = os._name if os.name == 'java' else os.name - - -if compat_os_name == 'nt': - def compat_shlex_quote(s): - return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"') -else: - try: - from shlex import quote as compat_shlex_quote - except ImportError: # Python < 3.3 - def compat_shlex_quote(s): - if re.match(r'^[-_\w./]+$', s): - return s - else: - return "'" + s.replace("'", "'\"'\"'") + "'" - - -try: - args = shlex.split('中文') - assert (isinstance(args, list) and - isinstance(args[0], compat_str) and - args[0] == '中文') - compat_shlex_split = shlex.split -except (AssertionError, UnicodeEncodeError): - # Working around shlex issue with unicode strings on some python 2 - # versions (see http://bugs.python.org/issue1548891) - def compat_shlex_split(s, comments=False, posix=True): - if isinstance(s, compat_str): - s = s.encode('utf-8') - return list(map(lambda s: s.decode('utf-8'), shlex.split(s, comments, posix))) - - -def compat_ord(c): - if type(c) is int: - return c - else: - return ord(c) - - -if sys.version_info >= (3, 0): - compat_getenv = os.getenv - compat_expanduser = os.path.expanduser - - def compat_setenv(key, value, env=os.environ): - env[key] = value -else: - # Environment variables should be decoded with filesystem encoding. - # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918) - - def compat_getenv(key, default=None): - from .utils import get_filesystem_encoding - env = os.getenv(key, default) - if env: - env = env.decode(get_filesystem_encoding()) - return env - - def compat_setenv(key, value, env=os.environ): - def encode(v): - from .utils import get_filesystem_encoding - return v.encode(get_filesystem_encoding()) if isinstance(v, compat_str) else v - env[encode(key)] = encode(value) - - # HACK: The default implementations of os.path.expanduser from cpython do not decode - # environment variables with filesystem encoding. We will work around this by - # providing adjusted implementations. - # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib - # for different platforms with correct environment variables decoding. - - if compat_os_name == 'posix': - def compat_expanduser(path): - """Expand ~ and ~user constructions. If user or $HOME is unknown, - do nothing.""" - if not path.startswith('~'): - return path - i = path.find('/', 1) - if i < 0: - i = len(path) - if i == 1: - if 'HOME' not in os.environ: - import pwd - userhome = pwd.getpwuid(os.getuid()).pw_dir - else: - userhome = compat_getenv('HOME') - else: - import pwd - try: - pwent = pwd.getpwnam(path[1:i]) - except KeyError: - return path - userhome = pwent.pw_dir - userhome = userhome.rstrip('/') - return (userhome + path[i:]) or '/' - elif compat_os_name in ('nt', 'ce'): - def compat_expanduser(path): - """Expand ~ and ~user constructs. - - If user or $HOME is unknown, do nothing.""" - if path[:1] != '~': - return path - i, n = 1, len(path) - while i < n and path[i] not in '/\\': - i = i + 1 - - if 'HOME' in os.environ: - userhome = compat_getenv('HOME') - elif 'USERPROFILE' in os.environ: - userhome = compat_getenv('USERPROFILE') - elif 'HOMEPATH' not in os.environ: - return path - else: - try: - drive = compat_getenv('HOMEDRIVE') - except KeyError: - drive = '' - userhome = os.path.join(drive, compat_getenv('HOMEPATH')) - - if i != 1: # ~user - userhome = os.path.join(os.path.dirname(userhome), path[1:i]) - - return userhome + path[i:] - else: - compat_expanduser = os.path.expanduser - - -if sys.version_info < (3, 0): - def compat_print(s): - from .utils import preferredencoding - print(s.encode(preferredencoding(), 'xmlcharrefreplace')) -else: - def compat_print(s): - assert isinstance(s, compat_str) - print(s) - - -if sys.version_info < (3, 0) and sys.platform == 'win32': - def compat_getpass(prompt, *args, **kwargs): - if isinstance(prompt, compat_str): - from .utils import preferredencoding - prompt = prompt.encode(preferredencoding()) - return getpass.getpass(prompt, *args, **kwargs) -else: - compat_getpass = getpass.getpass - -try: - compat_input = raw_input -except NameError: # Python 3 - compat_input = input - -# Python < 2.6.5 require kwargs to be bytes -try: - def _testfunc(x): - pass - _testfunc(**{'x': 0}) -except TypeError: - def compat_kwargs(kwargs): - return dict((bytes(k), v) for k, v in kwargs.items()) -else: - compat_kwargs = lambda kwargs: kwargs - - -try: - compat_numeric_types = (int, float, long, complex) -except NameError: # Python 3 - compat_numeric_types = (int, float, complex) - - -try: - compat_integer_types = (int, long) -except NameError: # Python 3 - compat_integer_types = (int, ) - - -if sys.version_info < (2, 7): - def compat_socket_create_connection(address, timeout, source_address=None): - host, port = address - err = None - for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM): - af, socktype, proto, canonname, sa = res - sock = None - try: - sock = socket.socket(af, socktype, proto) - sock.settimeout(timeout) - if source_address: - sock.bind(source_address) - sock.connect(sa) - return sock - except socket.error as _: - err = _ - if sock is not None: - sock.close() - if err is not None: - raise err - else: - raise socket.error('getaddrinfo returns an empty list') -else: - compat_socket_create_connection = socket.create_connection - - -# Fix https://github.com/rg3/youtube-dl/issues/4223 -# See http://bugs.python.org/issue9161 for what is broken -def workaround_optparse_bug9161(): - op = optparse.OptionParser() - og = optparse.OptionGroup(op, 'foo') - try: - og.add_option('-t') - except TypeError: - real_add_option = optparse.OptionGroup.add_option - - def _compat_add_option(self, *args, **kwargs): - enc = lambda v: ( - v.encode('ascii', 'replace') if isinstance(v, compat_str) - else v) - bargs = [enc(a) for a in args] - bkwargs = dict( - (k, enc(v)) for k, v in kwargs.items()) - return real_add_option(self, *bargs, **bkwargs) - optparse.OptionGroup.add_option = _compat_add_option - - -if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3 - compat_get_terminal_size = shutil.get_terminal_size -else: - _terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines']) - - def compat_get_terminal_size(fallback=(80, 24)): - columns = compat_getenv('COLUMNS') - if columns: - columns = int(columns) - else: - columns = None - lines = compat_getenv('LINES') - if lines: - lines = int(lines) - else: - lines = None - - if columns is None or lines is None or columns <= 0 or lines <= 0: - try: - sp = subprocess.Popen( - ['stty', 'size'], - stdout=subprocess.PIPE, stderr=subprocess.PIPE) - out, err = sp.communicate() - _lines, _columns = map(int, out.split()) - except Exception: - _columns, _lines = _terminal_size(*fallback) - - if columns is None or columns <= 0: - columns = _columns - if lines is None or lines <= 0: - lines = _lines - return _terminal_size(columns, lines) - -try: - itertools.count(start=0, step=1) - compat_itertools_count = itertools.count -except TypeError: # Python 2.6 - def compat_itertools_count(start=0, step=1): - n = start - while True: - yield n - n += step - -if sys.version_info >= (3, 0): - from tokenize import tokenize as compat_tokenize_tokenize -else: - from tokenize import generate_tokens as compat_tokenize_tokenize - - -try: - struct.pack('!I', 0) -except TypeError: - # In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument - # See https://bugs.python.org/issue19099 - def compat_struct_pack(spec, *args): - if isinstance(spec, compat_str): - spec = spec.encode('ascii') - return struct.pack(spec, *args) - - def compat_struct_unpack(spec, *args): - if isinstance(spec, compat_str): - spec = spec.encode('ascii') - return struct.unpack(spec, *args) - - class compat_Struct(struct.Struct): - def __init__(self, fmt): - if isinstance(fmt, compat_str): - fmt = fmt.encode('ascii') - super(compat_Struct, self).__init__(fmt) -else: - compat_struct_pack = struct.pack - compat_struct_unpack = struct.unpack - if platform.python_implementation() == 'IronPython' and sys.version_info < (2, 7, 8): - class compat_Struct(struct.Struct): - def unpack(self, string): - if not isinstance(string, buffer): # noqa: F821 - string = buffer(string) # noqa: F821 - return super(compat_Struct, self).unpack(string) - else: - compat_Struct = struct.Struct - - -try: - from future_builtins import zip as compat_zip -except ImportError: # not 2.6+ or is 3.x - try: - from itertools import izip as compat_zip # < 2.5 or 3.x - except ImportError: - compat_zip = zip - - -if sys.version_info < (3, 3): - def compat_b64decode(s, *args, **kwargs): - if isinstance(s, compat_str): - s = s.encode('ascii') - return base64.b64decode(s, *args, **kwargs) -else: - compat_b64decode = base64.b64decode - - -if platform.python_implementation() == 'PyPy' and sys.pypy_version_info < (5, 4, 0): - # PyPy2 prior to version 5.4.0 expects byte strings as Windows function - # names, see the original PyPy issue [1] and the youtube-dl one [2]. - # 1. https://bitbucket.org/pypy/pypy/issues/2360/windows-ctypescdll-typeerror-function-name - # 2. https://github.com/rg3/youtube-dl/pull/4392 - def compat_ctypes_WINFUNCTYPE(*args, **kwargs): - real = ctypes.WINFUNCTYPE(*args, **kwargs) - - def resf(tpl, *args, **kwargs): - funcname, dll = tpl - return real((str(funcname), dll), *args, **kwargs) - - return resf -else: - def compat_ctypes_WINFUNCTYPE(*args, **kwargs): - return ctypes.WINFUNCTYPE(*args, **kwargs) - - -__all__ = [ - 'compat_HTMLParseError', - 'compat_HTMLParser', - 'compat_HTTPError', - 'compat_Struct', - 'compat_b64decode', - 'compat_basestring', - 'compat_chr', - 'compat_cookiejar', - 'compat_cookies', - 'compat_ctypes_WINFUNCTYPE', - 'compat_etree_fromstring', - 'compat_etree_register_namespace', - 'compat_expanduser', - 'compat_get_terminal_size', - 'compat_getenv', - 'compat_getpass', - 'compat_html_entities', - 'compat_html_entities_html5', - 'compat_http_client', - 'compat_http_server', - 'compat_input', - 'compat_integer_types', - 'compat_itertools_count', - 'compat_kwargs', - 'compat_numeric_types', - 'compat_ord', - 'compat_os_name', - 'compat_parse_qs', - 'compat_print', - 'compat_setenv', - 'compat_shlex_quote', - 'compat_shlex_split', - 'compat_socket_create_connection', - 'compat_str', - 'compat_struct_pack', - 'compat_struct_unpack', - 'compat_subprocess_get_DEVNULL', - 'compat_tokenize_tokenize', - 'compat_urllib_error', - 'compat_urllib_parse', - 'compat_urllib_parse_unquote', - 'compat_urllib_parse_unquote_plus', - 'compat_urllib_parse_unquote_to_bytes', - 'compat_urllib_parse_urlencode', - 'compat_urllib_parse_urlparse', - 'compat_urllib_request', - 'compat_urllib_request_DataHandler', - 'compat_urllib_response', - 'compat_urlparse', - 'compat_urlretrieve', - 'compat_xml_parse_error', - 'compat_xpath', - 'compat_zip', - 'workaround_optparse_bug9161', -] diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py deleted file mode 100644 index 2e485df..0000000 --- a/youtube_dl/downloader/__init__.py +++ /dev/null @@ -1,61 +0,0 @@ -from __future__ import unicode_literals - -from .common import FileDownloader -from .f4m import F4mFD -from .hls import HlsFD -from .http import HttpFD -from .rtmp import RtmpFD -from .dash import DashSegmentsFD -from .rtsp import RtspFD -from .ism import IsmFD -from .external import ( - get_external_downloader, - FFmpegFD, -) - -from ..utils import ( - determine_protocol, -) - -PROTOCOL_MAP = { - 'rtmp': RtmpFD, - 'm3u8_native': HlsFD, - 'm3u8': FFmpegFD, - 'mms': RtspFD, - 'rtsp': RtspFD, - 'f4m': F4mFD, - 'http_dash_segments': DashSegmentsFD, - 'ism': IsmFD, -} - - -def get_suitable_downloader(info_dict, params={}): - """Get the downloader class that can handle the info dict.""" - protocol = determine_protocol(info_dict) - info_dict['protocol'] = protocol - - # if (info_dict.get('start_time') or info_dict.get('end_time')) and not info_dict.get('requested_formats') and FFmpegFD.can_download(info_dict): - # return FFmpegFD - - external_downloader = params.get('external_downloader') - if external_downloader is not None: - ed = get_external_downloader(external_downloader) - if ed.can_download(info_dict): - return ed - - if protocol.startswith('m3u8') and info_dict.get('is_live'): - return FFmpegFD - - if protocol == 'm3u8' and params.get('hls_prefer_native') is True: - return HlsFD - - if protocol == 'm3u8_native' and params.get('hls_prefer_native') is False: - return FFmpegFD - - return PROTOCOL_MAP.get(protocol, HttpFD) - - -__all__ = [ - 'get_suitable_downloader', - 'FileDownloader', -] diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py deleted file mode 100644 index 5979833..0000000 --- a/youtube_dl/downloader/common.py +++ /dev/null @@ -1,389 +0,0 @@ -from __future__ import division, unicode_literals - -import os -import re -import sys -import time -import random - -from ..compat import compat_os_name -from ..utils import ( - decodeArgument, - encodeFilename, - error_to_compat_str, - format_bytes, - shell_quote, - timeconvert, -) - - -class FileDownloader(object): - """File Downloader class. - - File downloader objects are the ones responsible of downloading the - actual video file and writing it to disk. - - File downloaders accept a lot of parameters. In order not to saturate - the object constructor with arguments, it receives a dictionary of - options instead. - - Available options: - - verbose: Print additional info to stdout. - quiet: Do not print messages to stdout. - ratelimit: Download speed limit, in bytes/sec. - retries: Number of times to retry for HTTP error 5xx - buffersize: Size of download buffer in bytes. - noresizebuffer: Do not automatically resize the download buffer. - continuedl: Try to continue downloads if possible. - noprogress: Do not print the progress bar. - logtostderr: Log messages to stderr instead of stdout. - consoletitle: Display progress in console window's titlebar. - nopart: Do not use temporary .part files. - updatetime: Use the Last-modified header to set output file timestamps. - test: Download only first bytes to test the downloader. - min_filesize: Skip files smaller than this size - max_filesize: Skip files larger than this size - xattr_set_filesize: Set ytdl.filesize user xattribute with expected size. - external_downloader_args: A list of additional command-line arguments for the - external downloader. - hls_use_mpegts: Use the mpegts container for HLS videos. - http_chunk_size: Size of a chunk for chunk-based HTTP downloading. May be - useful for bypassing bandwidth throttling imposed by - a webserver (experimental) - - Subclasses of this one must re-define the real_download method. - """ - - _TEST_FILE_SIZE = 10241 - params = None - - def __init__(self, ydl, params): - """Create a FileDownloader object with the given options.""" - self.ydl = ydl - self._progress_hooks = [] - self.params = params - self.add_progress_hook(self.report_progress) - - @staticmethod - def format_seconds(seconds): - (mins, secs) = divmod(seconds, 60) - (hours, mins) = divmod(mins, 60) - if hours > 99: - return '--:--:--' - if hours == 0: - return '%02d:%02d' % (mins, secs) - else: - return '%02d:%02d:%02d' % (hours, mins, secs) - - @staticmethod - def calc_percent(byte_counter, data_len): - if data_len is None: - return None - return float(byte_counter) / float(data_len) * 100.0 - - @staticmethod - def format_percent(percent): - if percent is None: - return '---.-%' - return '%6s' % ('%3.1f%%' % percent) - - @staticmethod - def calc_eta(start, now, total, current): - if total is None: - return None - if now is None: - now = time.time() - dif = now - start - if current == 0 or dif < 0.001: # One millisecond - return None - rate = float(current) / dif - return int((float(total) - float(current)) / rate) - - @staticmethod - def format_eta(eta): - if eta is None: - return '--:--' - return FileDownloader.format_seconds(eta) - - @staticmethod - def calc_speed(start, now, bytes): - dif = now - start - if bytes == 0 or dif < 0.001: # One millisecond - return None - return float(bytes) / dif - - @staticmethod - def format_speed(speed): - if speed is None: - return '%10s' % '---b/s' - return '%10s' % ('%s/s' % format_bytes(speed)) - - @staticmethod - def format_retries(retries): - return 'inf' if retries == float('inf') else '%.0f' % retries - - @staticmethod - def best_block_size(elapsed_time, bytes): - new_min = max(bytes / 2.0, 1.0) - new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB - if elapsed_time < 0.001: - return int(new_max) - rate = bytes / elapsed_time - if rate > new_max: - return int(new_max) - if rate < new_min: - return int(new_min) - return int(rate) - - @staticmethod - def parse_bytes(bytestr): - """Parse a string indicating a byte quantity into an integer.""" - matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr) - if matchobj is None: - return None - number = float(matchobj.group(1)) - multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower()) - return int(round(number * multiplier)) - - def to_screen(self, *args, **kargs): - self.ydl.to_screen(*args, **kargs) - - def to_stderr(self, message): - self.ydl.to_screen(message) - - def to_console_title(self, message): - self.ydl.to_console_title(message) - - def trouble(self, *args, **kargs): - self.ydl.trouble(*args, **kargs) - - def report_warning(self, *args, **kargs): - self.ydl.report_warning(*args, **kargs) - - def report_error(self, *args, **kargs): - self.ydl.report_error(*args, **kargs) - - def slow_down(self, start_time, now, byte_counter): - """Sleep if the download speed is over the rate limit.""" - rate_limit = self.params.get('ratelimit') - if rate_limit is None or byte_counter == 0: - return - if now is None: - now = time.time() - elapsed = now - start_time - if elapsed <= 0.0: - return - speed = float(byte_counter) / elapsed - if speed > rate_limit: - time.sleep(max((byte_counter // rate_limit) - elapsed, 0)) - - def temp_name(self, filename): - """Returns a temporary filename for the given filename.""" - if self.params.get('nopart', False) or filename == '-' or \ - (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))): - return filename - return filename + '.part' - - def undo_temp_name(self, filename): - if filename.endswith('.part'): - return filename[:-len('.part')] - return filename - - def ytdl_filename(self, filename): - return filename + '.ytdl' - - def try_rename(self, old_filename, new_filename): - try: - if old_filename == new_filename: - return - os.rename(encodeFilename(old_filename), encodeFilename(new_filename)) - except (IOError, OSError) as err: - self.report_error('unable to rename file: %s' % error_to_compat_str(err)) - - def try_utime(self, filename, last_modified_hdr): - """Try to set the last-modified time of the given file.""" - if last_modified_hdr is None: - return - if not os.path.isfile(encodeFilename(filename)): - return - timestr = last_modified_hdr - if timestr is None: - return - filetime = timeconvert(timestr) - if filetime is None: - return filetime - # Ignore obviously invalid dates - if filetime == 0: - return - try: - os.utime(filename, (time.time(), filetime)) - except Exception: - pass - return filetime - - def report_destination(self, filename): - """Report destination filename.""" - self.to_screen('[download] Destination: ' + filename) - - def _report_progress_status(self, msg, is_last_line=False): - fullmsg = '[download] ' + msg - if self.params.get('progress_with_newline', False): - self.to_screen(fullmsg) - else: - if compat_os_name == 'nt': - prev_len = getattr(self, '_report_progress_prev_line_length', - 0) - if prev_len > len(fullmsg): - fullmsg += ' ' * (prev_len - len(fullmsg)) - self._report_progress_prev_line_length = len(fullmsg) - clear_line = '\r' - else: - clear_line = ('\r\x1b[K' if sys.stderr.isatty() else '\r') - self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line) - self.to_console_title('youtube-dl ' + msg) - - def report_progress(self, s): - if s['status'] == 'finished': - if self.params.get('noprogress', False): - self.to_screen('[download] Download completed') - else: - msg_template = '100%%' - if s.get('total_bytes') is not None: - s['_total_bytes_str'] = format_bytes(s['total_bytes']) - msg_template += ' of %(_total_bytes_str)s' - if s.get('elapsed') is not None: - s['_elapsed_str'] = self.format_seconds(s['elapsed']) - msg_template += ' in %(_elapsed_str)s' - self._report_progress_status( - msg_template % s, is_last_line=True) - - if self.params.get('noprogress'): - return - - if s['status'] != 'downloading': - return - - if s.get('eta') is not None: - s['_eta_str'] = self.format_eta(s['eta']) - else: - s['_eta_str'] = 'Unknown ETA' - - if s.get('total_bytes') and s.get('downloaded_bytes') is not None: - s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes']) - elif s.get('total_bytes_estimate') and s.get('downloaded_bytes') is not None: - s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes_estimate']) - else: - if s.get('downloaded_bytes') == 0: - s['_percent_str'] = self.format_percent(0) - else: - s['_percent_str'] = 'Unknown %' - - if s.get('speed') is not None: - s['_speed_str'] = self.format_speed(s['speed']) - else: - s['_speed_str'] = 'Unknown speed' - - if s.get('total_bytes') is not None: - s['_total_bytes_str'] = format_bytes(s['total_bytes']) - msg_template = '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s' - elif s.get('total_bytes_estimate') is not None: - s['_total_bytes_estimate_str'] = format_bytes(s['total_bytes_estimate']) - msg_template = '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s' - else: - if s.get('downloaded_bytes') is not None: - s['_downloaded_bytes_str'] = format_bytes(s['downloaded_bytes']) - if s.get('elapsed'): - s['_elapsed_str'] = self.format_seconds(s['elapsed']) - msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)' - else: - msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s' - else: - msg_template = '%(_percent_str)s % at %(_speed_str)s ETA %(_eta_str)s' - - self._report_progress_status(msg_template % s) - - def report_resuming_byte(self, resume_len): - """Report attempt to resume at given byte.""" - self.to_screen('[download] Resuming download at byte %s' % resume_len) - - def report_retry(self, err, count, retries): - """Report retry in case of HTTP error 5xx""" - self.to_screen( - '[download] Got server HTTP error: %s. Retrying (attempt %d of %s)...' - % (error_to_compat_str(err), count, self.format_retries(retries))) - - def report_file_already_downloaded(self, file_name): - """Report file has already been fully downloaded.""" - try: - self.to_screen('[download] %s has already been downloaded' % file_name) - except UnicodeEncodeError: - self.to_screen('[download] The file has already been downloaded') - - def report_unable_to_resume(self): - """Report it was impossible to resume download.""" - self.to_screen('[download] Unable to resume') - - def download(self, filename, info_dict): - """Download to a filename using the info from info_dict - Return True on success and False otherwise - """ - - nooverwrites_and_exists = ( - self.params.get('nooverwrites', False) and - os.path.exists(encodeFilename(filename)) - ) - - if not hasattr(filename, 'write'): - continuedl_and_exists = ( - self.params.get('continuedl', True) and - os.path.isfile(encodeFilename(filename)) and - not self.params.get('nopart', False) - ) - - # Check file already present - if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists): - self.report_file_already_downloaded(filename) - self._hook_progress({ - 'filename': filename, - 'status': 'finished', - 'total_bytes': os.path.getsize(encodeFilename(filename)), - }) - return True - - min_sleep_interval = self.params.get('sleep_interval') - if min_sleep_interval: - max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval) - sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval) - self.to_screen( - '[download] Sleeping %s seconds...' % ( - int(sleep_interval) if sleep_interval.is_integer() - else '%.2f' % sleep_interval)) - time.sleep(sleep_interval) - - return self.real_download(filename, info_dict) - - def real_download(self, filename, info_dict): - """Real download process. Redefine in subclasses.""" - raise NotImplementedError('This method must be implemented by subclasses') - - def _hook_progress(self, status): - for ph in self._progress_hooks: - ph(status) - - def add_progress_hook(self, ph): - # See YoutubeDl.py (search for progress_hooks) for a description of - # this interface - self._progress_hooks.append(ph) - - def _debug_cmd(self, args, exe=None): - if not self.params.get('verbose', False): - return - - str_args = [decodeArgument(a) for a in args] - - if exe is None: - exe = os.path.basename(str_args[0]) - - self.to_screen('[debug] %s command line: %s' % ( - exe, shell_quote(str_args))) diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py deleted file mode 100644 index eaa7adf..0000000 --- a/youtube_dl/downloader/dash.py +++ /dev/null @@ -1,80 +0,0 @@ -from __future__ import unicode_literals - -from .fragment import FragmentFD -from ..compat import compat_urllib_error -from ..utils import ( - DownloadError, - urljoin, -) - - -class DashSegmentsFD(FragmentFD): - """ - Download segments in a DASH manifest - """ - - FD_NAME = 'dashsegments' - - def real_download(self, filename, info_dict): - fragment_base_url = info_dict.get('fragment_base_url') - fragments = info_dict['fragments'][:1] if self.params.get( - 'test', False) else info_dict['fragments'] - - ctx = { - 'filename': filename, - 'total_frags': len(fragments), - } - - self._prepare_and_start_frag_download(ctx) - - fragment_retries = self.params.get('fragment_retries', 0) - skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) - - frag_index = 0 - for i, fragment in enumerate(fragments): - frag_index += 1 - if frag_index <= ctx['fragment_index']: - continue - # In DASH, the first segment contains necessary headers to - # generate a valid MP4 file, so always abort for the first segment - fatal = i == 0 or not skip_unavailable_fragments - count = 0 - while count <= fragment_retries: - try: - fragment_url = fragment.get('url') - if not fragment_url: - assert fragment_base_url - fragment_url = urljoin(fragment_base_url, fragment['path']) - success, frag_content = self._download_fragment(ctx, fragment_url, info_dict) - if not success: - return False - self._append_fragment(ctx, frag_content) - break - except compat_urllib_error.HTTPError as err: - # YouTube may often return 404 HTTP error for a fragment causing the - # whole download to fail. However if the same fragment is immediately - # retried with the same request data this usually succeeds (1-2 attemps - # is usually enough) thus allowing to download the whole file successfully. - # To be future-proof we will retry all fragments that fail with any - # HTTP error. - count += 1 - if count <= fragment_retries: - self.report_retry_fragment(err, frag_index, count, fragment_retries) - except DownloadError: - # Don't retry fragment if error occurred during HTTP downloading - # itself since it has own retry settings - if not fatal: - self.report_skip_fragment(frag_index) - break - raise - - if count > fragment_retries: - if not fatal: - self.report_skip_fragment(frag_index) - continue - self.report_error('giving up after %s fragment retries' % fragment_retries) - return False - - self._finish_frag_download(ctx) - - return True diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py deleted file mode 100644 index 958d00a..0000000 --- a/youtube_dl/downloader/external.py +++ /dev/null @@ -1,354 +0,0 @@ -from __future__ import unicode_literals - -import os.path -import re -import subprocess -import sys -import time - -from .common import FileDownloader -from ..compat import ( - compat_setenv, - compat_str, -) -from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS -from ..utils import ( - cli_option, - cli_valueless_option, - cli_bool_option, - cli_configuration_args, - encodeFilename, - encodeArgument, - handle_youtubedl_headers, - check_executable, - is_outdated_version, -) - - -class ExternalFD(FileDownloader): - def real_download(self, filename, info_dict): - self.report_destination(filename) - tmpfilename = self.temp_name(filename) - - try: - started = time.time() - retval = self._call_downloader(tmpfilename, info_dict) - except KeyboardInterrupt: - if not info_dict.get('is_live'): - raise - # Live stream downloading cancellation should be considered as - # correct and expected termination thus all postprocessing - # should take place - retval = 0 - self.to_screen('[%s] Interrupted by user' % self.get_basename()) - - if retval == 0: - status = { - 'filename': filename, - 'status': 'finished', - 'elapsed': time.time() - started, - } - if filename != '-': - fsize = os.path.getsize(encodeFilename(tmpfilename)) - self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize)) - self.try_rename(tmpfilename, filename) - status.update({ - 'downloaded_bytes': fsize, - 'total_bytes': fsize, - }) - self._hook_progress(status) - return True - else: - self.to_stderr('\n') - self.report_error('%s exited with code %d' % ( - self.get_basename(), retval)) - return False - - @classmethod - def get_basename(cls): - return cls.__name__[:-2].lower() - - @property - def exe(self): - return self.params.get('external_downloader') - - @classmethod - def available(cls): - return check_executable(cls.get_basename(), [cls.AVAILABLE_OPT]) - - @classmethod - def supports(cls, info_dict): - return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps') - - @classmethod - def can_download(cls, info_dict): - return cls.available() and cls.supports(info_dict) - - def _option(self, command_option, param): - return cli_option(self.params, command_option, param) - - def _bool_option(self, command_option, param, true_value='true', false_value='false', separator=None): - return cli_bool_option(self.params, command_option, param, true_value, false_value, separator) - - def _valueless_option(self, command_option, param, expected_value=True): - return cli_valueless_option(self.params, command_option, param, expected_value) - - def _configuration_args(self, default=[]): - return cli_configuration_args(self.params, 'external_downloader_args', default) - - def _call_downloader(self, tmpfilename, info_dict): - """ Either overwrite this or implement _make_cmd """ - cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)] - - self._debug_cmd(cmd) - - p = subprocess.Popen( - cmd, stderr=subprocess.PIPE) - _, stderr = p.communicate() - if p.returncode != 0: - self.to_stderr(stderr.decode('utf-8', 'replace')) - return p.returncode - - -class CurlFD(ExternalFD): - AVAILABLE_OPT = '-V' - - def _make_cmd(self, tmpfilename, info_dict): - cmd = [self.exe, '--location', '-o', tmpfilename] - for key, val in info_dict['http_headers'].items(): - cmd += ['--header', '%s: %s' % (key, val)] - cmd += self._bool_option('--continue-at', 'continuedl', '-', '0') - cmd += self._valueless_option('--silent', 'noprogress') - cmd += self._valueless_option('--verbose', 'verbose') - cmd += self._option('--limit-rate', 'ratelimit') - cmd += self._option('--retry', 'retries') - cmd += self._option('--max-filesize', 'max_filesize') - cmd += self._option('--interface', 'source_address') - cmd += self._option('--proxy', 'proxy') - cmd += self._valueless_option('--insecure', 'nocheckcertificate') - cmd += self._configuration_args() - cmd += ['--', info_dict['url']] - return cmd - - def _call_downloader(self, tmpfilename, info_dict): - cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)] - - self._debug_cmd(cmd) - - # curl writes the progress to stderr so don't capture it. - p = subprocess.Popen(cmd) - p.communicate() - return p.returncode - - -class AxelFD(ExternalFD): - AVAILABLE_OPT = '-V' - - def _make_cmd(self, tmpfilename, info_dict): - cmd = [self.exe, '-o', tmpfilename] - for key, val in info_dict['http_headers'].items(): - cmd += ['-H', '%s: %s' % (key, val)] - cmd += self._configuration_args() - cmd += ['--', info_dict['url']] - return cmd - - -class WgetFD(ExternalFD): - AVAILABLE_OPT = '--version' - - def _make_cmd(self, tmpfilename, info_dict): - cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies'] - for key, val in info_dict['http_headers'].items(): - cmd += ['--header', '%s: %s' % (key, val)] - cmd += self._option('--bind-address', 'source_address') - cmd += self._option('--proxy', 'proxy') - cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate') - cmd += self._configuration_args() - cmd += ['--', info_dict['url']] - return cmd - - -class Aria2cFD(ExternalFD): - AVAILABLE_OPT = '-v' - - def _make_cmd(self, tmpfilename, info_dict): - cmd = [self.exe, '-c'] - cmd += self._configuration_args([ - '--min-split-size', '1M', '--max-connection-per-server', '4']) - dn = os.path.dirname(tmpfilename) - if dn: - cmd += ['--dir', dn] - cmd += ['--out', os.path.basename(tmpfilename)] - for key, val in info_dict['http_headers'].items(): - cmd += ['--header', '%s: %s' % (key, val)] - cmd += self._option('--interface', 'source_address') - cmd += self._option('--all-proxy', 'proxy') - cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=') - cmd += ['--', info_dict['url']] - return cmd - - -class HttpieFD(ExternalFD): - @classmethod - def available(cls): - return check_executable('http', ['--version']) - - def _make_cmd(self, tmpfilename, info_dict): - cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']] - for key, val in info_dict['http_headers'].items(): - cmd += ['%s:%s' % (key, val)] - return cmd - - -class FFmpegFD(ExternalFD): - @classmethod - def supports(cls, info_dict): - return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps', 'm3u8', 'rtsp', 'rtmp', 'mms') - - @classmethod - def available(cls): - return FFmpegPostProcessor().available - - def _call_downloader(self, tmpfilename, info_dict): - url = info_dict['url'] - ffpp = FFmpegPostProcessor(downloader=self) - if not ffpp.available: - self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.') - return False - ffpp.check_version() - - args = [ffpp.executable, '-y'] - - for log_level in ('quiet', 'verbose'): - if self.params.get(log_level, False): - args += ['-loglevel', log_level] - break - - seekable = info_dict.get('_seekable') - if seekable is not None: - # setting -seekable prevents ffmpeg from guessing if the server - # supports seeking(by adding the header `Range: bytes=0-`), which - # can cause problems in some cases - # https://github.com/rg3/youtube-dl/issues/11800#issuecomment-275037127 - # http://trac.ffmpeg.org/ticket/6125#comment:10 - args += ['-seekable', '1' if seekable else '0'] - - args += self._configuration_args() - - # start_time = info_dict.get('start_time') or 0 - # if start_time: - # args += ['-ss', compat_str(start_time)] - # end_time = info_dict.get('end_time') - # if end_time: - # args += ['-t', compat_str(end_time - start_time)] - - if info_dict['http_headers'] and re.match(r'^https?://', url): - # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv: - # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header. - headers = handle_youtubedl_headers(info_dict['http_headers']) - args += [ - '-headers', - ''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())] - - env = None - proxy = self.params.get('proxy') - if proxy: - if not re.match(r'^[\da-zA-Z]+://', proxy): - proxy = 'http://%s' % proxy - - if proxy.startswith('socks'): - self.report_warning( - '%s does not support SOCKS proxies. Downloading is likely to fail. ' - 'Consider adding --hls-prefer-native to your command.' % self.get_basename()) - - # Since December 2015 ffmpeg supports -http_proxy option (see - # http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd) - # We could switch to the following code if we are able to detect version properly - # args += ['-http_proxy', proxy] - env = os.environ.copy() - compat_setenv('HTTP_PROXY', proxy, env=env) - compat_setenv('http_proxy', proxy, env=env) - - protocol = info_dict.get('protocol') - - if protocol == 'rtmp': - player_url = info_dict.get('player_url') - page_url = info_dict.get('page_url') - app = info_dict.get('app') - play_path = info_dict.get('play_path') - tc_url = info_dict.get('tc_url') - flash_version = info_dict.get('flash_version') - live = info_dict.get('rtmp_live', False) - if player_url is not None: - args += ['-rtmp_swfverify', player_url] - if page_url is not None: - args += ['-rtmp_pageurl', page_url] - if app is not None: - args += ['-rtmp_app', app] - if play_path is not None: - args += ['-rtmp_playpath', play_path] - if tc_url is not None: - args += ['-rtmp_tcurl', tc_url] - if flash_version is not None: - args += ['-rtmp_flashver', flash_version] - if live: - args += ['-rtmp_live', 'live'] - - args += ['-i', url, '-c', 'copy'] - - if self.params.get('test', False): - args += ['-fs', compat_str(self._TEST_FILE_SIZE)] - - if protocol in ('m3u8', 'm3u8_native'): - if self.params.get('hls_use_mpegts', False) or tmpfilename == '-': - args += ['-f', 'mpegts'] - else: - args += ['-f', 'mp4'] - if (ffpp.basename == 'ffmpeg' and is_outdated_version(ffpp._versions['ffmpeg'], '3.2', False)) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')): - args += ['-bsf:a', 'aac_adtstoasc'] - elif protocol == 'rtmp': - args += ['-f', 'flv'] - else: - args += ['-f', EXT_TO_OUT_FORMATS.get(info_dict['ext'], info_dict['ext'])] - - args = [encodeArgument(opt) for opt in args] - args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True)) - - self._debug_cmd(args) - - proc = subprocess.Popen(args, stdin=subprocess.PIPE, env=env) - try: - retval = proc.wait() - except KeyboardInterrupt: - # subprocces.run would send the SIGKILL signal to ffmpeg and the - # mp4 file couldn't be played, but if we ask ffmpeg to quit it - # produces a file that is playable (this is mostly useful for live - # streams). Note that Windows is not affected and produces playable - # files (see https://github.com/rg3/youtube-dl/issues/8300). - if sys.platform != 'win32': - proc.communicate(b'q') - raise - return retval - - -class AVconvFD(FFmpegFD): - pass - - -_BY_NAME = dict( - (klass.get_basename(), klass) - for name, klass in globals().items() - if name.endswith('FD') and name != 'ExternalFD' -) - - -def list_external_downloaders(): - return sorted(_BY_NAME.keys()) - - -def get_external_downloader(external_downloader): - """ Given the name of the executable, see whether we support the given - downloader . """ - # Drop .exe extension on Windows - bn = os.path.splitext(os.path.basename(external_downloader))[0] - return _BY_NAME[bn] diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py deleted file mode 100644 index 15e71be..0000000 --- a/youtube_dl/downloader/f4m.py +++ /dev/null @@ -1,438 +0,0 @@ -from __future__ import division, unicode_literals - -import io -import itertools -import time - -from .fragment import FragmentFD -from ..compat import ( - compat_b64decode, - compat_etree_fromstring, - compat_urlparse, - compat_urllib_error, - compat_urllib_parse_urlparse, - compat_struct_pack, - compat_struct_unpack, -) -from ..utils import ( - fix_xml_ampersands, - xpath_text, -) - - -class DataTruncatedError(Exception): - pass - - -class FlvReader(io.BytesIO): - """ - Reader for Flv files - The file format is documented in https://www.adobe.com/devnet/f4v.html - """ - - def read_bytes(self, n): - data = self.read(n) - if len(data) < n: - raise DataTruncatedError( - 'FlvReader error: need %d bytes while only %d bytes got' % ( - n, len(data))) - return data - - # Utility functions for reading numbers and strings - def read_unsigned_long_long(self): - return compat_struct_unpack('!Q', self.read_bytes(8))[0] - - def read_unsigned_int(self): - return compat_struct_unpack('!I', self.read_bytes(4))[0] - - def read_unsigned_char(self): - return compat_struct_unpack('!B', self.read_bytes(1))[0] - - def read_string(self): - res = b'' - while True: - char = self.read_bytes(1) - if char == b'\x00': - break - res += char - return res - - def read_box_info(self): - """ - Read a box and return the info as a tuple: (box_size, box_type, box_data) - """ - real_size = size = self.read_unsigned_int() - box_type = self.read_bytes(4) - header_end = 8 - if size == 1: - real_size = self.read_unsigned_long_long() - header_end = 16 - return real_size, box_type, self.read_bytes(real_size - header_end) - - def read_asrt(self): - # version - self.read_unsigned_char() - # flags - self.read_bytes(3) - quality_entry_count = self.read_unsigned_char() - # QualityEntryCount - for i in range(quality_entry_count): - self.read_string() - - segment_run_count = self.read_unsigned_int() - segments = [] - for i in range(segment_run_count): - first_segment = self.read_unsigned_int() - fragments_per_segment = self.read_unsigned_int() - segments.append((first_segment, fragments_per_segment)) - - return { - 'segment_run': segments, - } - - def read_afrt(self): - # version - self.read_unsigned_char() - # flags - self.read_bytes(3) - # time scale - self.read_unsigned_int() - - quality_entry_count = self.read_unsigned_char() - # QualitySegmentUrlModifiers - for i in range(quality_entry_count): - self.read_string() - - fragments_count = self.read_unsigned_int() - fragments = [] - for i in range(fragments_count): - first = self.read_unsigned_int() - first_ts = self.read_unsigned_long_long() - duration = self.read_unsigned_int() - if duration == 0: - discontinuity_indicator = self.read_unsigned_char() - else: - discontinuity_indicator = None - fragments.append({ - 'first': first, - 'ts': first_ts, - 'duration': duration, - 'discontinuity_indicator': discontinuity_indicator, - }) - - return { - 'fragments': fragments, - } - - def read_abst(self): - # version - self.read_unsigned_char() - # flags - self.read_bytes(3) - - self.read_unsigned_int() # BootstrapinfoVersion - # Profile,Live,Update,Reserved - flags = self.read_unsigned_char() - live = flags & 0x20 != 0 - # time scale - self.read_unsigned_int() - # CurrentMediaTime - self.read_unsigned_long_long() - # SmpteTimeCodeOffset - self.read_unsigned_long_long() - - self.read_string() # MovieIdentifier - server_count = self.read_unsigned_char() - # ServerEntryTable - for i in range(server_count): - self.read_string() - quality_count = self.read_unsigned_char() - # QualityEntryTable - for i in range(quality_count): - self.read_string() - # DrmData - self.read_string() - # MetaData - self.read_string() - - segments_count = self.read_unsigned_char() - segments = [] - for i in range(segments_count): - box_size, box_type, box_data = self.read_box_info() - assert box_type == b'asrt' - segment = FlvReader(box_data).read_asrt() - segments.append(segment) - fragments_run_count = self.read_unsigned_char() - fragments = [] - for i in range(fragments_run_count): - box_size, box_type, box_data = self.read_box_info() - assert box_type == b'afrt' - fragments.append(FlvReader(box_data).read_afrt()) - - return { - 'segments': segments, - 'fragments': fragments, - 'live': live, - } - - def read_bootstrap_info(self): - total_size, box_type, box_data = self.read_box_info() - assert box_type == b'abst' - return FlvReader(box_data).read_abst() - - -def read_bootstrap_info(bootstrap_bytes): - return FlvReader(bootstrap_bytes).read_bootstrap_info() - - -def build_fragments_list(boot_info): - """ Return a list of (segment, fragment) for each fragment in the video """ - res = [] - segment_run_table = boot_info['segments'][0] - fragment_run_entry_table = boot_info['fragments'][0]['fragments'] - first_frag_number = fragment_run_entry_table[0]['first'] - fragments_counter = itertools.count(first_frag_number) - for segment, fragments_count in segment_run_table['segment_run']: - # In some live HDS streams (for example Rai), `fragments_count` is - # abnormal and causing out-of-memory errors. It's OK to change the - # number of fragments for live streams as they are updated periodically - if fragments_count == 4294967295 and boot_info['live']: - fragments_count = 2 - for _ in range(fragments_count): - res.append((segment, next(fragments_counter))) - - if boot_info['live']: - res = res[-2:] - - return res - - -def write_unsigned_int(stream, val): - stream.write(compat_struct_pack('!I', val)) - - -def write_unsigned_int_24(stream, val): - stream.write(compat_struct_pack('!I', val)[1:]) - - -def write_flv_header(stream): - """Writes the FLV header to stream""" - # FLV header - stream.write(b'FLV\x01') - stream.write(b'\x05') - stream.write(b'\x00\x00\x00\x09') - stream.write(b'\x00\x00\x00\x00') - - -def write_metadata_tag(stream, metadata): - """Writes optional metadata tag to stream""" - SCRIPT_TAG = b'\x12' - FLV_TAG_HEADER_LEN = 11 - - if metadata: - stream.write(SCRIPT_TAG) - write_unsigned_int_24(stream, len(metadata)) - stream.write(b'\x00\x00\x00\x00\x00\x00\x00') - stream.write(metadata) - write_unsigned_int(stream, FLV_TAG_HEADER_LEN + len(metadata)) - - -def remove_encrypted_media(media): - return list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib and - 'drmAdditionalHeaderSetId' not in e.attrib, - media)) - - -def _add_ns(prop, ver=1): - return '{http://ns.adobe.com/f4m/%d.0}%s' % (ver, prop) - - -def get_base_url(manifest): - base_url = xpath_text( - manifest, [_add_ns('baseURL'), _add_ns('baseURL', 2)], - 'base URL', default=None) - if base_url: - base_url = base_url.strip() - return base_url - - -class F4mFD(FragmentFD): - """ - A downloader for f4m manifests or AdobeHDS. - """ - - FD_NAME = 'f4m' - - def _get_unencrypted_media(self, doc): - media = doc.findall(_add_ns('media')) - if not media: - self.report_error('No media found') - for e in (doc.findall(_add_ns('drmAdditionalHeader')) + - doc.findall(_add_ns('drmAdditionalHeaderSet'))): - # If id attribute is missing it's valid for all media nodes - # without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute - if 'id' not in e.attrib: - self.report_error('Missing ID in f4m DRM') - media = remove_encrypted_media(media) - if not media: - self.report_error('Unsupported DRM') - return media - - def _get_bootstrap_from_url(self, bootstrap_url): - bootstrap = self.ydl.urlopen(bootstrap_url).read() - return read_bootstrap_info(bootstrap) - - def _update_live_fragments(self, bootstrap_url, latest_fragment): - fragments_list = [] - retries = 30 - while (not fragments_list) and (retries > 0): - boot_info = self._get_bootstrap_from_url(bootstrap_url) - fragments_list = build_fragments_list(boot_info) - fragments_list = [f for f in fragments_list if f[1] > latest_fragment] - if not fragments_list: - # Retry after a while - time.sleep(5.0) - retries -= 1 - - if not fragments_list: - self.report_error('Failed to update fragments') - - return fragments_list - - def _parse_bootstrap_node(self, node, base_url): - # Sometimes non empty inline bootstrap info can be specified along - # with bootstrap url attribute (e.g. dummy inline bootstrap info - # contains whitespace characters in [1]). We will prefer bootstrap - # url over inline bootstrap info when present. - # 1. http://live-1-1.rutube.ru/stream/1024/HDS/SD/C2NKsS85HQNckgn5HdEmOQ/1454167650/S-s604419906/move/four/dirs/upper/1024-576p.f4m - bootstrap_url = node.get('url') - if bootstrap_url: - bootstrap_url = compat_urlparse.urljoin( - base_url, bootstrap_url) - boot_info = self._get_bootstrap_from_url(bootstrap_url) - else: - bootstrap_url = None - bootstrap = compat_b64decode(node.text) - boot_info = read_bootstrap_info(bootstrap) - return boot_info, bootstrap_url - - def real_download(self, filename, info_dict): - man_url = info_dict['url'] - requested_bitrate = info_dict.get('tbr') - self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME) - - urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) - man_url = urlh.geturl() - # Some manifests may be malformed, e.g. prosiebensat1 generated manifests - # (see https://github.com/rg3/youtube-dl/issues/6215#issuecomment-121704244 - # and https://github.com/rg3/youtube-dl/issues/7823) - manifest = fix_xml_ampersands(urlh.read().decode('utf-8', 'ignore')).strip() - - doc = compat_etree_fromstring(manifest) - formats = [(int(f.attrib.get('bitrate', -1)), f) - for f in self._get_unencrypted_media(doc)] - if requested_bitrate is None or len(formats) == 1: - # get the best format - formats = sorted(formats, key=lambda f: f[0]) - rate, media = formats[-1] - else: - rate, media = list(filter( - lambda f: int(f[0]) == requested_bitrate, formats))[0] - - # Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec. - man_base_url = get_base_url(doc) or man_url - - base_url = compat_urlparse.urljoin(man_base_url, media.attrib['url']) - bootstrap_node = doc.find(_add_ns('bootstrapInfo')) - boot_info, bootstrap_url = self._parse_bootstrap_node( - bootstrap_node, man_base_url) - live = boot_info['live'] - metadata_node = media.find(_add_ns('metadata')) - if metadata_node is not None: - metadata = compat_b64decode(metadata_node.text) - else: - metadata = None - - fragments_list = build_fragments_list(boot_info) - test = self.params.get('test', False) - if test: - # We only download the first fragment - fragments_list = fragments_list[:1] - total_frags = len(fragments_list) - # For some akamai manifests we'll need to add a query to the fragment url - akamai_pv = xpath_text(doc, _add_ns('pv-2.0')) - - ctx = { - 'filename': filename, - 'total_frags': total_frags, - 'live': live, - } - - self._prepare_frag_download(ctx) - - dest_stream = ctx['dest_stream'] - - if ctx['complete_frags_downloaded_bytes'] == 0: - write_flv_header(dest_stream) - if not live: - write_metadata_tag(dest_stream, metadata) - - base_url_parsed = compat_urllib_parse_urlparse(base_url) - - self._start_frag_download(ctx) - - frag_index = 0 - while fragments_list: - seg_i, frag_i = fragments_list.pop(0) - frag_index += 1 - if frag_index <= ctx['fragment_index']: - continue - name = 'Seg%d-Frag%d' % (seg_i, frag_i) - query = [] - if base_url_parsed.query: - query.append(base_url_parsed.query) - if akamai_pv: - query.append(akamai_pv.strip(';')) - if info_dict.get('extra_param_to_segment_url'): - query.append(info_dict['extra_param_to_segment_url']) - url_parsed = base_url_parsed._replace(path=base_url_parsed.path + name, query='&'.join(query)) - try: - success, down_data = self._download_fragment(ctx, url_parsed.geturl(), info_dict) - if not success: - return False - reader = FlvReader(down_data) - while True: - try: - _, box_type, box_data = reader.read_box_info() - except DataTruncatedError: - if test: - # In tests, segments may be truncated, and thus - # FlvReader may not be able to parse the whole - # chunk. If so, write the segment as is - # See https://github.com/rg3/youtube-dl/issues/9214 - dest_stream.write(down_data) - break - raise - if box_type == b'mdat': - self._append_fragment(ctx, box_data) - break - except (compat_urllib_error.HTTPError, ) as err: - if live and (err.code == 404 or err.code == 410): - # We didn't keep up with the live window. Continue - # with the next available fragment. - msg = 'Fragment %d unavailable' % frag_i - self.report_warning(msg) - fragments_list = [] - else: - raise - - if not fragments_list and not test and live and bootstrap_url: - fragments_list = self._update_live_fragments(bootstrap_url, frag_i) - total_frags += len(fragments_list) - if fragments_list and (fragments_list[0][1] > frag_i + 1): - msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1)) - self.report_warning(msg) - - self._finish_frag_download(ctx) - - return True diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py deleted file mode 100644 index 917f6dc..0000000 --- a/youtube_dl/downloader/fragment.py +++ /dev/null @@ -1,268 +0,0 @@ -from __future__ import division, unicode_literals - -import os -import time -import json - -from .common import FileDownloader -from .http import HttpFD -from ..utils import ( - error_to_compat_str, - encodeFilename, - sanitize_open, - sanitized_Request, -) - - -class HttpQuietDownloader(HttpFD): - def to_screen(self, *args, **kargs): - pass - - -class FragmentFD(FileDownloader): - """ - A base file downloader class for fragmented media (e.g. f4m/m3u8 manifests). - - Available options: - - fragment_retries: Number of times to retry a fragment for HTTP error (DASH - and hlsnative only) - skip_unavailable_fragments: - Skip unavailable fragments (DASH and hlsnative only) - keep_fragments: Keep downloaded fragments on disk after downloading is - finished - - For each incomplete fragment download youtube-dl keeps on disk a special - bookkeeping file with download state and metadata (in future such files will - be used for any incomplete download handled by youtube-dl). This file is - used to properly handle resuming, check download file consistency and detect - potential errors. The file has a .ytdl extension and represents a standard - JSON file of the following format: - - extractor: - Dictionary of extractor related data. TBD. - - downloader: - Dictionary of downloader related data. May contain following data: - current_fragment: - Dictionary with current (being downloaded) fragment data: - index: 0-based index of current fragment among all fragments - fragment_count: - Total count of fragments - - This feature is experimental and file format may change in future. - """ - - def report_retry_fragment(self, err, frag_index, count, retries): - self.to_screen( - '[download] Got server HTTP error: %s. Retrying fragment %d (attempt %d of %s)...' - % (error_to_compat_str(err), frag_index, count, self.format_retries(retries))) - - def report_skip_fragment(self, frag_index): - self.to_screen('[download] Skipping fragment %d...' % frag_index) - - def _prepare_url(self, info_dict, url): - headers = info_dict.get('http_headers') - return sanitized_Request(url, None, headers) if headers else url - - def _prepare_and_start_frag_download(self, ctx): - self._prepare_frag_download(ctx) - self._start_frag_download(ctx) - - @staticmethod - def __do_ytdl_file(ctx): - return not ctx['live'] and not ctx['tmpfilename'] == '-' - - def _read_ytdl_file(self, ctx): - assert 'ytdl_corrupt' not in ctx - stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'r') - try: - ctx['fragment_index'] = json.loads(stream.read())['downloader']['current_fragment']['index'] - except Exception: - ctx['ytdl_corrupt'] = True - finally: - stream.close() - - def _write_ytdl_file(self, ctx): - frag_index_stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'w') - downloader = { - 'current_fragment': { - 'index': ctx['fragment_index'], - }, - } - if ctx.get('fragment_count') is not None: - downloader['fragment_count'] = ctx['fragment_count'] - frag_index_stream.write(json.dumps({'downloader': downloader})) - frag_index_stream.close() - - def _download_fragment(self, ctx, frag_url, info_dict, headers=None): - fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index']) - success = ctx['dl'].download(fragment_filename, { - 'url': frag_url, - 'http_headers': headers or info_dict.get('http_headers'), - }) - if not success: - return False, None - down, frag_sanitized = sanitize_open(fragment_filename, 'rb') - ctx['fragment_filename_sanitized'] = frag_sanitized - frag_content = down.read() - down.close() - return True, frag_content - - def _append_fragment(self, ctx, frag_content): - try: - ctx['dest_stream'].write(frag_content) - ctx['dest_stream'].flush() - finally: - if self.__do_ytdl_file(ctx): - self._write_ytdl_file(ctx) - if not self.params.get('keep_fragments', False): - os.remove(encodeFilename(ctx['fragment_filename_sanitized'])) - del ctx['fragment_filename_sanitized'] - - def _prepare_frag_download(self, ctx): - if 'live' not in ctx: - ctx['live'] = False - if not ctx['live']: - total_frags_str = '%d' % ctx['total_frags'] - ad_frags = ctx.get('ad_frags', 0) - if ad_frags: - total_frags_str += ' (not including %d ad)' % ad_frags - else: - total_frags_str = 'unknown (live)' - self.to_screen( - '[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str)) - self.report_destination(ctx['filename']) - dl = HttpQuietDownloader( - self.ydl, - { - 'continuedl': True, - 'quiet': True, - 'noprogress': True, - 'ratelimit': self.params.get('ratelimit'), - 'retries': self.params.get('retries', 0), - 'nopart': self.params.get('nopart', False), - 'test': self.params.get('test', False), - } - ) - tmpfilename = self.temp_name(ctx['filename']) - open_mode = 'wb' - resume_len = 0 - - # Establish possible resume length - if os.path.isfile(encodeFilename(tmpfilename)): - open_mode = 'ab' - resume_len = os.path.getsize(encodeFilename(tmpfilename)) - - # Should be initialized before ytdl file check - ctx.update({ - 'tmpfilename': tmpfilename, - 'fragment_index': 0, - }) - - if self.__do_ytdl_file(ctx): - if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))): - self._read_ytdl_file(ctx) - is_corrupt = ctx.get('ytdl_corrupt') is True - is_inconsistent = ctx['fragment_index'] > 0 and resume_len == 0 - if is_corrupt or is_inconsistent: - message = ( - '.ytdl file is corrupt' if is_corrupt else - 'Inconsistent state of incomplete fragment download') - self.report_warning( - '%s. Restarting from the beginning...' % message) - ctx['fragment_index'] = resume_len = 0 - if 'ytdl_corrupt' in ctx: - del ctx['ytdl_corrupt'] - self._write_ytdl_file(ctx) - else: - self._write_ytdl_file(ctx) - assert ctx['fragment_index'] == 0 - - dest_stream, tmpfilename = sanitize_open(tmpfilename, open_mode) - - ctx.update({ - 'dl': dl, - 'dest_stream': dest_stream, - 'tmpfilename': tmpfilename, - # Total complete fragments downloaded so far in bytes - 'complete_frags_downloaded_bytes': resume_len, - }) - - def _start_frag_download(self, ctx): - total_frags = ctx['total_frags'] - # This dict stores the download progress, it's updated by the progress - # hook - state = { - 'status': 'downloading', - 'downloaded_bytes': ctx['complete_frags_downloaded_bytes'], - 'fragment_index': ctx['fragment_index'], - 'fragment_count': total_frags, - 'filename': ctx['filename'], - 'tmpfilename': ctx['tmpfilename'], - } - - start = time.time() - ctx.update({ - 'started': start, - # Amount of fragment's bytes downloaded by the time of the previous - # frag progress hook invocation - 'prev_frag_downloaded_bytes': 0, - }) - - def frag_progress_hook(s): - if s['status'] not in ('downloading', 'finished'): - return - - time_now = time.time() - state['elapsed'] = time_now - start - frag_total_bytes = s.get('total_bytes') or 0 - if not ctx['live']: - estimated_size = ( - (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) / - (state['fragment_index'] + 1) * total_frags) - state['total_bytes_estimate'] = estimated_size - - if s['status'] == 'finished': - state['fragment_index'] += 1 - ctx['fragment_index'] = state['fragment_index'] - state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes'] - ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes'] - ctx['prev_frag_downloaded_bytes'] = 0 - else: - frag_downloaded_bytes = s['downloaded_bytes'] - state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes'] - if not ctx['live']: - state['eta'] = self.calc_eta( - start, time_now, estimated_size, - state['downloaded_bytes']) - state['speed'] = s.get('speed') or ctx.get('speed') - ctx['speed'] = state['speed'] - ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes - self._hook_progress(state) - - ctx['dl'].add_progress_hook(frag_progress_hook) - - return start - - def _finish_frag_download(self, ctx): - ctx['dest_stream'].close() - if self.__do_ytdl_file(ctx): - ytdl_filename = encodeFilename(self.ytdl_filename(ctx['filename'])) - if os.path.isfile(ytdl_filename): - os.remove(ytdl_filename) - elapsed = time.time() - ctx['started'] - - if ctx['tmpfilename'] == '-': - downloaded_bytes = ctx['complete_frags_downloaded_bytes'] - else: - self.try_rename(ctx['tmpfilename'], ctx['filename']) - downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename'])) - - self._hook_progress({ - 'downloaded_bytes': downloaded_bytes, - 'total_bytes': downloaded_bytes, - 'filename': ctx['filename'], - 'status': 'finished', - 'elapsed': elapsed, - }) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py deleted file mode 100644 index fd30452..0000000 --- a/youtube_dl/downloader/hls.py +++ /dev/null @@ -1,204 +0,0 @@ -from __future__ import unicode_literals - -import re -import binascii -try: - from Crypto.Cipher import AES - can_decrypt_frag = True -except ImportError: - can_decrypt_frag = False - -from .fragment import FragmentFD -from .external import FFmpegFD - -from ..compat import ( - compat_urllib_error, - compat_urlparse, - compat_struct_pack, -) -from ..utils import ( - parse_m3u8_attributes, - update_url_query, -) - - -class HlsFD(FragmentFD): - """ A limited implementation that does not require ffmpeg """ - - FD_NAME = 'hlsnative' - - @staticmethod - def can_download(manifest, info_dict): - UNSUPPORTED_FEATURES = ( - r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1] - # r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2] - - # Live streams heuristic does not always work (e.g. geo restricted to Germany - # http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0) - # r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3] - - # This heuristic also is not correct since segments may not be appended as well. - # Twitch vods of finished streams have EXT-X-PLAYLIST-TYPE:EVENT despite - # no segments will definitely be appended to the end of the playlist. - # r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of - # # event media playlists [4] - - # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4 - # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2 - # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2 - # 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5 - ) - check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES] - is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest - check_results.append(can_decrypt_frag or not is_aes128_enc) - check_results.append(not (is_aes128_enc and r'#EXT-X-BYTERANGE' in manifest)) - check_results.append(not info_dict.get('is_live')) - return all(check_results) - - def real_download(self, filename, info_dict): - man_url = info_dict['url'] - self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME) - - urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) - man_url = urlh.geturl() - s = urlh.read().decode('utf-8', 'ignore') - - if not self.can_download(s, info_dict): - if info_dict.get('extra_param_to_segment_url'): - self.report_error('pycrypto not found. Please install it.') - return False - self.report_warning( - 'hlsnative has detected features it does not support, ' - 'extraction will be delegated to ffmpeg') - fd = FFmpegFD(self.ydl, self.params) - for ph in self._progress_hooks: - fd.add_progress_hook(ph) - return fd.real_download(filename, info_dict) - - def is_ad_fragment(s): - return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s or - s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad')) - - media_frags = 0 - ad_frags = 0 - ad_frag_next = False - for line in s.splitlines(): - line = line.strip() - if not line: - continue - if line.startswith('#'): - if is_ad_fragment(line): - ad_frags += 1 - ad_frag_next = True - continue - if ad_frag_next: - ad_frag_next = False - continue - media_frags += 1 - - ctx = { - 'filename': filename, - 'total_frags': media_frags, - 'ad_frags': ad_frags, - } - - self._prepare_and_start_frag_download(ctx) - - fragment_retries = self.params.get('fragment_retries', 0) - skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) - test = self.params.get('test', False) - - extra_query = None - extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url') - if extra_param_to_segment_url: - extra_query = compat_urlparse.parse_qs(extra_param_to_segment_url) - i = 0 - media_sequence = 0 - decrypt_info = {'METHOD': 'NONE'} - byte_range = {} - frag_index = 0 - ad_frag_next = False - for line in s.splitlines(): - line = line.strip() - if line: - if not line.startswith('#'): - if ad_frag_next: - ad_frag_next = False - continue - frag_index += 1 - if frag_index <= ctx['fragment_index']: - continue - frag_url = ( - line - if re.match(r'^https?://', line) - else compat_urlparse.urljoin(man_url, line)) - if extra_query: - frag_url = update_url_query(frag_url, extra_query) - count = 0 - headers = info_dict.get('http_headers', {}) - if byte_range: - headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end']) - while count <= fragment_retries: - try: - success, frag_content = self._download_fragment( - ctx, frag_url, info_dict, headers) - if not success: - return False - break - except compat_urllib_error.HTTPError as err: - # Unavailable (possibly temporary) fragments may be served. - # First we try to retry then either skip or abort. - # See https://github.com/rg3/youtube-dl/issues/10165, - # https://github.com/rg3/youtube-dl/issues/10448). - count += 1 - if count <= fragment_retries: - self.report_retry_fragment(err, frag_index, count, fragment_retries) - if count > fragment_retries: - if skip_unavailable_fragments: - i += 1 - media_sequence += 1 - self.report_skip_fragment(frag_index) - continue - self.report_error( - 'giving up after %s fragment retries' % fragment_retries) - return False - if decrypt_info['METHOD'] == 'AES-128': - iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence) - decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen( - self._prepare_url(info_dict, decrypt_info['URI'])).read() - frag_content = AES.new( - decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content) - self._append_fragment(ctx, frag_content) - # We only download the first fragment during the test - if test: - break - i += 1 - media_sequence += 1 - elif line.startswith('#EXT-X-KEY'): - decrypt_url = decrypt_info.get('URI') - decrypt_info = parse_m3u8_attributes(line[11:]) - if decrypt_info['METHOD'] == 'AES-128': - if 'IV' in decrypt_info: - decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:].zfill(32)) - if not re.match(r'^https?://', decrypt_info['URI']): - decrypt_info['URI'] = compat_urlparse.urljoin( - man_url, decrypt_info['URI']) - if extra_query: - decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query) - if decrypt_url != decrypt_info['URI']: - decrypt_info['KEY'] = None - elif line.startswith('#EXT-X-MEDIA-SEQUENCE'): - media_sequence = int(line[22:]) - elif line.startswith('#EXT-X-BYTERANGE'): - splitted_byte_range = line[17:].split('@') - sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end'] - byte_range = { - 'start': sub_range_start, - 'end': sub_range_start + int(splitted_byte_range[0]), - } - elif is_ad_fragment(line): - ad_frag_next = True - - self._finish_frag_download(ctx) - - return True diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py deleted file mode 100644 index 5b1e960..0000000 --- a/youtube_dl/downloader/http.py +++ /dev/null @@ -1,354 +0,0 @@ -from __future__ import unicode_literals - -import errno -import os -import socket -import time -import random -import re - -from .common import FileDownloader -from ..compat import ( - compat_str, - compat_urllib_error, -) -from ..utils import ( - ContentTooShortError, - encodeFilename, - int_or_none, - sanitize_open, - sanitized_Request, - write_xattr, - XAttrMetadataError, - XAttrUnavailableError, -) - - -class HttpFD(FileDownloader): - def real_download(self, filename, info_dict): - url = info_dict['url'] - - class DownloadContext(dict): - __getattr__ = dict.get - __setattr__ = dict.__setitem__ - __delattr__ = dict.__delitem__ - - ctx = DownloadContext() - ctx.filename = filename - ctx.tmpfilename = self.temp_name(filename) - ctx.stream = None - - # Do not include the Accept-Encoding header - headers = {'Youtubedl-no-compression': 'True'} - add_headers = info_dict.get('http_headers') - if add_headers: - headers.update(add_headers) - - is_test = self.params.get('test', False) - chunk_size = self._TEST_FILE_SIZE if is_test else ( - info_dict.get('downloader_options', {}).get('http_chunk_size') or - self.params.get('http_chunk_size') or 0) - - ctx.open_mode = 'wb' - ctx.resume_len = 0 - ctx.data_len = None - ctx.block_size = self.params.get('buffersize', 1024) - ctx.start_time = time.time() - ctx.chunk_size = None - - if self.params.get('continuedl', True): - # Establish possible resume length - if os.path.isfile(encodeFilename(ctx.tmpfilename)): - ctx.resume_len = os.path.getsize( - encodeFilename(ctx.tmpfilename)) - - ctx.is_resume = ctx.resume_len > 0 - - count = 0 - retries = self.params.get('retries', 0) - - class SucceedDownload(Exception): - pass - - class RetryDownload(Exception): - def __init__(self, source_error): - self.source_error = source_error - - class NextFragment(Exception): - pass - - def set_range(req, start, end): - range_header = 'bytes=%d-' % start - if end: - range_header += compat_str(end) - req.add_header('Range', range_header) - - def establish_connection(): - ctx.chunk_size = (random.randint(int(chunk_size * 0.95), chunk_size) - if not is_test and chunk_size else chunk_size) - if ctx.resume_len > 0: - range_start = ctx.resume_len - if ctx.is_resume: - self.report_resuming_byte(ctx.resume_len) - ctx.open_mode = 'ab' - elif ctx.chunk_size > 0: - range_start = 0 - else: - range_start = None - ctx.is_resume = False - range_end = range_start + ctx.chunk_size - 1 if ctx.chunk_size else None - if range_end and ctx.data_len is not None and range_end >= ctx.data_len: - range_end = ctx.data_len - 1 - has_range = range_start is not None - ctx.has_range = has_range - request = sanitized_Request(url, None, headers) - if has_range: - set_range(request, range_start, range_end) - # Establish connection - try: - ctx.data = self.ydl.urlopen(request) - # When trying to resume, Content-Range HTTP header of response has to be checked - # to match the value of requested Range HTTP header. This is due to a webservers - # that don't support resuming and serve a whole file with no Content-Range - # set in response despite of requested Range (see - # https://github.com/rg3/youtube-dl/issues/6057#issuecomment-126129799) - if has_range: - content_range = ctx.data.headers.get('Content-Range') - if content_range: - content_range_m = re.search(r'bytes (\d+)-(\d+)?(?:/(\d+))?', content_range) - # Content-Range is present and matches requested Range, resume is possible - if content_range_m: - if range_start == int(content_range_m.group(1)): - content_range_end = int_or_none(content_range_m.group(2)) - content_len = int_or_none(content_range_m.group(3)) - accept_content_len = ( - # Non-chunked download - not ctx.chunk_size or - # Chunked download and requested piece or - # its part is promised to be served - content_range_end == range_end or - content_len < range_end) - if accept_content_len: - ctx.data_len = content_len - return - # Content-Range is either not present or invalid. Assuming remote webserver is - # trying to send the whole file, resume is not possible, so wiping the local file - # and performing entire redownload - self.report_unable_to_resume() - ctx.resume_len = 0 - ctx.open_mode = 'wb' - ctx.data_len = int_or_none(ctx.data.info().get('Content-length', None)) - return - except (compat_urllib_error.HTTPError, ) as err: - if err.code == 416: - # Unable to resume (requested range not satisfiable) - try: - # Open the connection again without the range header - ctx.data = self.ydl.urlopen( - sanitized_Request(url, None, headers)) - content_length = ctx.data.info()['Content-Length'] - except (compat_urllib_error.HTTPError, ) as err: - if err.code < 500 or err.code >= 600: - raise - else: - # Examine the reported length - if (content_length is not None and - (ctx.resume_len - 100 < int(content_length) < ctx.resume_len + 100)): - # The file had already been fully downloaded. - # Explanation to the above condition: in issue #175 it was revealed that - # YouTube sometimes adds or removes a few bytes from the end of the file, - # changing the file size slightly and causing problems for some users. So - # I decided to implement a suggested change and consider the file - # completely downloaded if the file size differs less than 100 bytes from - # the one in the hard drive. - self.report_file_already_downloaded(ctx.filename) - self.try_rename(ctx.tmpfilename, ctx.filename) - self._hook_progress({ - 'filename': ctx.filename, - 'status': 'finished', - 'downloaded_bytes': ctx.resume_len, - 'total_bytes': ctx.resume_len, - }) - raise SucceedDownload() - else: - # The length does not match, we start the download over - self.report_unable_to_resume() - ctx.resume_len = 0 - ctx.open_mode = 'wb' - return - elif err.code < 500 or err.code >= 600: - # Unexpected HTTP error - raise - raise RetryDownload(err) - except socket.error as err: - if err.errno != errno.ECONNRESET: - # Connection reset is no problem, just retry - raise - raise RetryDownload(err) - - def download(): - data_len = ctx.data.info().get('Content-length', None) - - # Range HTTP header may be ignored/unsupported by a webserver - # (e.g. extractor/scivee.py, extractor/bambuser.py). - # However, for a test we still would like to download just a piece of a file. - # To achieve this we limit data_len to _TEST_FILE_SIZE and manually control - # block size when downloading a file. - if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE): - data_len = self._TEST_FILE_SIZE - - if data_len is not None: - data_len = int(data_len) + ctx.resume_len - min_data_len = self.params.get('min_filesize') - max_data_len = self.params.get('max_filesize') - if min_data_len is not None and data_len < min_data_len: - self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len)) - return False - if max_data_len is not None and data_len > max_data_len: - self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len)) - return False - - byte_counter = 0 + ctx.resume_len - block_size = ctx.block_size - start = time.time() - - # measure time over whole while-loop, so slow_down() and best_block_size() work together properly - now = None # needed for slow_down() in the first loop run - before = start # start measuring - - def retry(e): - to_stdout = ctx.tmpfilename == '-' - if not to_stdout: - ctx.stream.close() - ctx.stream = None - ctx.resume_len = byte_counter if to_stdout else os.path.getsize(encodeFilename(ctx.tmpfilename)) - raise RetryDownload(e) - - while True: - try: - # Download and write - data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter)) - # socket.timeout is a subclass of socket.error but may not have - # errno set - except socket.timeout as e: - retry(e) - except socket.error as e: - if e.errno not in (errno.ECONNRESET, errno.ETIMEDOUT): - raise - retry(e) - - byte_counter += len(data_block) - - # exit loop when download is finished - if len(data_block) == 0: - break - - # Open destination file just in time - if ctx.stream is None: - try: - ctx.stream, ctx.tmpfilename = sanitize_open( - ctx.tmpfilename, ctx.open_mode) - assert ctx.stream is not None - ctx.filename = self.undo_temp_name(ctx.tmpfilename) - self.report_destination(ctx.filename) - except (OSError, IOError) as err: - self.report_error('unable to open for writing: %s' % str(err)) - return False - - if self.params.get('xattr_set_filesize', False) and data_len is not None: - try: - write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8')) - except (XAttrUnavailableError, XAttrMetadataError) as err: - self.report_error('unable to set filesize xattr: %s' % str(err)) - - try: - ctx.stream.write(data_block) - except (IOError, OSError) as err: - self.to_stderr('\n') - self.report_error('unable to write data: %s' % str(err)) - return False - - # Apply rate limit - self.slow_down(start, now, byte_counter - ctx.resume_len) - - # end measuring of one loop run - now = time.time() - after = now - - # Adjust block size - if not self.params.get('noresizebuffer', False): - block_size = self.best_block_size(after - before, len(data_block)) - - before = after - - # Progress message - speed = self.calc_speed(start, now, byte_counter - ctx.resume_len) - if ctx.data_len is None: - eta = None - else: - eta = self.calc_eta(start, time.time(), ctx.data_len - ctx.resume_len, byte_counter - ctx.resume_len) - - self._hook_progress({ - 'status': 'downloading', - 'downloaded_bytes': byte_counter, - 'total_bytes': ctx.data_len, - 'tmpfilename': ctx.tmpfilename, - 'filename': ctx.filename, - 'eta': eta, - 'speed': speed, - 'elapsed': now - ctx.start_time, - }) - - if is_test and byte_counter == data_len: - break - - if not is_test and ctx.chunk_size and ctx.data_len is not None and byte_counter < ctx.data_len: - ctx.resume_len = byte_counter - # ctx.block_size = block_size - raise NextFragment() - - if ctx.stream is None: - self.to_stderr('\n') - self.report_error('Did not get any data blocks') - return False - if ctx.tmpfilename != '-': - ctx.stream.close() - - if data_len is not None and byte_counter != data_len: - err = ContentTooShortError(byte_counter, int(data_len)) - if count <= retries: - retry(err) - raise err - - self.try_rename(ctx.tmpfilename, ctx.filename) - - # Update file modification time - if self.params.get('updatetime', True): - info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.info().get('last-modified', None)) - - self._hook_progress({ - 'downloaded_bytes': byte_counter, - 'total_bytes': byte_counter, - 'filename': ctx.filename, - 'status': 'finished', - 'elapsed': time.time() - ctx.start_time, - }) - - return True - - while count <= retries: - try: - establish_connection() - return download() - except RetryDownload as e: - count += 1 - if count <= retries: - self.report_retry(e.source_error, count, retries) - continue - except NextFragment: - continue - except SucceedDownload: - return True - - self.report_error('giving up after %s retries' % retries) - return False diff --git a/youtube_dl/downloader/ism.py b/youtube_dl/downloader/ism.py deleted file mode 100644 index 063fcf4..0000000 --- a/youtube_dl/downloader/ism.py +++ /dev/null @@ -1,259 +0,0 @@ -from __future__ import unicode_literals - -import time -import binascii -import io - -from .fragment import FragmentFD -from ..compat import ( - compat_Struct, - compat_urllib_error, -) - - -u8 = compat_Struct('>B') -u88 = compat_Struct('>Bx') -u16 = compat_Struct('>H') -u1616 = compat_Struct('>Hxx') -u32 = compat_Struct('>I') -u64 = compat_Struct('>Q') - -s88 = compat_Struct('>bx') -s16 = compat_Struct('>h') -s1616 = compat_Struct('>hxx') -s32 = compat_Struct('>i') - -unity_matrix = (s32.pack(0x10000) + s32.pack(0) * 3) * 2 + s32.pack(0x40000000) - -TRACK_ENABLED = 0x1 -TRACK_IN_MOVIE = 0x2 -TRACK_IN_PREVIEW = 0x4 - -SELF_CONTAINED = 0x1 - - -def box(box_type, payload): - return u32.pack(8 + len(payload)) + box_type + payload - - -def full_box(box_type, version, flags, payload): - return box(box_type, u8.pack(version) + u32.pack(flags)[1:] + payload) - - -def write_piff_header(stream, params): - track_id = params['track_id'] - fourcc = params['fourcc'] - duration = params['duration'] - timescale = params.get('timescale', 10000000) - language = params.get('language', 'und') - height = params.get('height', 0) - width = params.get('width', 0) - is_audio = width == 0 and height == 0 - creation_time = modification_time = int(time.time()) - - ftyp_payload = b'isml' # major brand - ftyp_payload += u32.pack(1) # minor version - ftyp_payload += b'piff' + b'iso2' # compatible brands - stream.write(box(b'ftyp', ftyp_payload)) # File Type Box - - mvhd_payload = u64.pack(creation_time) - mvhd_payload += u64.pack(modification_time) - mvhd_payload += u32.pack(timescale) - mvhd_payload += u64.pack(duration) - mvhd_payload += s1616.pack(1) # rate - mvhd_payload += s88.pack(1) # volume - mvhd_payload += u16.pack(0) # reserved - mvhd_payload += u32.pack(0) * 2 # reserved - mvhd_payload += unity_matrix - mvhd_payload += u32.pack(0) * 6 # pre defined - mvhd_payload += u32.pack(0xffffffff) # next track id - moov_payload = full_box(b'mvhd', 1, 0, mvhd_payload) # Movie Header Box - - tkhd_payload = u64.pack(creation_time) - tkhd_payload += u64.pack(modification_time) - tkhd_payload += u32.pack(track_id) # track id - tkhd_payload += u32.pack(0) # reserved - tkhd_payload += u64.pack(duration) - tkhd_payload += u32.pack(0) * 2 # reserved - tkhd_payload += s16.pack(0) # layer - tkhd_payload += s16.pack(0) # alternate group - tkhd_payload += s88.pack(1 if is_audio else 0) # volume - tkhd_payload += u16.pack(0) # reserved - tkhd_payload += unity_matrix - tkhd_payload += u1616.pack(width) - tkhd_payload += u1616.pack(height) - trak_payload = full_box(b'tkhd', 1, TRACK_ENABLED | TRACK_IN_MOVIE | TRACK_IN_PREVIEW, tkhd_payload) # Track Header Box - - mdhd_payload = u64.pack(creation_time) - mdhd_payload += u64.pack(modification_time) - mdhd_payload += u32.pack(timescale) - mdhd_payload += u64.pack(duration) - mdhd_payload += u16.pack(((ord(language[0]) - 0x60) << 10) | ((ord(language[1]) - 0x60) << 5) | (ord(language[2]) - 0x60)) - mdhd_payload += u16.pack(0) # pre defined - mdia_payload = full_box(b'mdhd', 1, 0, mdhd_payload) # Media Header Box - - hdlr_payload = u32.pack(0) # pre defined - hdlr_payload += b'soun' if is_audio else b'vide' # handler type - hdlr_payload += u32.pack(0) * 3 # reserved - hdlr_payload += (b'Sound' if is_audio else b'Video') + b'Handler\0' # name - mdia_payload += full_box(b'hdlr', 0, 0, hdlr_payload) # Handler Reference Box - - if is_audio: - smhd_payload = s88.pack(0) # balance - smhd_payload += u16.pack(0) # reserved - media_header_box = full_box(b'smhd', 0, 0, smhd_payload) # Sound Media Header - else: - vmhd_payload = u16.pack(0) # graphics mode - vmhd_payload += u16.pack(0) * 3 # opcolor - media_header_box = full_box(b'vmhd', 0, 1, vmhd_payload) # Video Media Header - minf_payload = media_header_box - - dref_payload = u32.pack(1) # entry count - dref_payload += full_box(b'url ', 0, SELF_CONTAINED, b'') # Data Entry URL Box - dinf_payload = full_box(b'dref', 0, 0, dref_payload) # Data Reference Box - minf_payload += box(b'dinf', dinf_payload) # Data Information Box - - stsd_payload = u32.pack(1) # entry count - - sample_entry_payload = u8.pack(0) * 6 # reserved - sample_entry_payload += u16.pack(1) # data reference index - if is_audio: - sample_entry_payload += u32.pack(0) * 2 # reserved - sample_entry_payload += u16.pack(params.get('channels', 2)) - sample_entry_payload += u16.pack(params.get('bits_per_sample', 16)) - sample_entry_payload += u16.pack(0) # pre defined - sample_entry_payload += u16.pack(0) # reserved - sample_entry_payload += u1616.pack(params['sampling_rate']) - - if fourcc == 'AACL': - sample_entry_box = box(b'mp4a', sample_entry_payload) - else: - sample_entry_payload += u16.pack(0) # pre defined - sample_entry_payload += u16.pack(0) # reserved - sample_entry_payload += u32.pack(0) * 3 # pre defined - sample_entry_payload += u16.pack(width) - sample_entry_payload += u16.pack(height) - sample_entry_payload += u1616.pack(0x48) # horiz resolution 72 dpi - sample_entry_payload += u1616.pack(0x48) # vert resolution 72 dpi - sample_entry_payload += u32.pack(0) # reserved - sample_entry_payload += u16.pack(1) # frame count - sample_entry_payload += u8.pack(0) * 32 # compressor name - sample_entry_payload += u16.pack(0x18) # depth - sample_entry_payload += s16.pack(-1) # pre defined - - codec_private_data = binascii.unhexlify(params['codec_private_data'].encode('utf-8')) - if fourcc in ('H264', 'AVC1'): - sps, pps = codec_private_data.split(u32.pack(1))[1:] - avcc_payload = u8.pack(1) # configuration version - avcc_payload += sps[1:4] # avc profile indication + profile compatibility + avc level indication - avcc_payload += u8.pack(0xfc | (params.get('nal_unit_length_field', 4) - 1)) # complete represenation (1) + reserved (11111) + length size minus one - avcc_payload += u8.pack(1) # reserved (0) + number of sps (0000001) - avcc_payload += u16.pack(len(sps)) - avcc_payload += sps - avcc_payload += u8.pack(1) # number of pps - avcc_payload += u16.pack(len(pps)) - avcc_payload += pps - sample_entry_payload += box(b'avcC', avcc_payload) # AVC Decoder Configuration Record - sample_entry_box = box(b'avc1', sample_entry_payload) # AVC Simple Entry - stsd_payload += sample_entry_box - - stbl_payload = full_box(b'stsd', 0, 0, stsd_payload) # Sample Description Box - - stts_payload = u32.pack(0) # entry count - stbl_payload += full_box(b'stts', 0, 0, stts_payload) # Decoding Time to Sample Box - - stsc_payload = u32.pack(0) # entry count - stbl_payload += full_box(b'stsc', 0, 0, stsc_payload) # Sample To Chunk Box - - stco_payload = u32.pack(0) # entry count - stbl_payload += full_box(b'stco', 0, 0, stco_payload) # Chunk Offset Box - - minf_payload += box(b'stbl', stbl_payload) # Sample Table Box - - mdia_payload += box(b'minf', minf_payload) # Media Information Box - - trak_payload += box(b'mdia', mdia_payload) # Media Box - - moov_payload += box(b'trak', trak_payload) # Track Box - - mehd_payload = u64.pack(duration) - mvex_payload = full_box(b'mehd', 1, 0, mehd_payload) # Movie Extends Header Box - - trex_payload = u32.pack(track_id) # track id - trex_payload += u32.pack(1) # default sample description index - trex_payload += u32.pack(0) # default sample duration - trex_payload += u32.pack(0) # default sample size - trex_payload += u32.pack(0) # default sample flags - mvex_payload += full_box(b'trex', 0, 0, trex_payload) # Track Extends Box - - moov_payload += box(b'mvex', mvex_payload) # Movie Extends Box - stream.write(box(b'moov', moov_payload)) # Movie Box - - -def extract_box_data(data, box_sequence): - data_reader = io.BytesIO(data) - while True: - box_size = u32.unpack(data_reader.read(4))[0] - box_type = data_reader.read(4) - if box_type == box_sequence[0]: - box_data = data_reader.read(box_size - 8) - if len(box_sequence) == 1: - return box_data - return extract_box_data(box_data, box_sequence[1:]) - data_reader.seek(box_size - 8, 1) - - -class IsmFD(FragmentFD): - """ - Download segments in a ISM manifest - """ - - FD_NAME = 'ism' - - def real_download(self, filename, info_dict): - segments = info_dict['fragments'][:1] if self.params.get( - 'test', False) else info_dict['fragments'] - - ctx = { - 'filename': filename, - 'total_frags': len(segments), - } - - self._prepare_and_start_frag_download(ctx) - - fragment_retries = self.params.get('fragment_retries', 0) - skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) - - track_written = False - frag_index = 0 - for i, segment in enumerate(segments): - frag_index += 1 - if frag_index <= ctx['fragment_index']: - continue - count = 0 - while count <= fragment_retries: - try: - success, frag_content = self._download_fragment(ctx, segment['url'], info_dict) - if not success: - return False - if not track_written: - tfhd_data = extract_box_data(frag_content, [b'moof', b'traf', b'tfhd']) - info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0] - write_piff_header(ctx['dest_stream'], info_dict['_download_params']) - track_written = True - self._append_fragment(ctx, frag_content) - break - except compat_urllib_error.HTTPError as err: - count += 1 - if count <= fragment_retries: - self.report_retry_fragment(err, frag_index, count, fragment_retries) - if count > fragment_retries: - if skip_unavailable_fragments: - self.report_skip_fragment(frag_index) - continue - self.report_error('giving up after %s fragment retries' % fragment_retries) - return False - - self._finish_frag_download(ctx) - - return True diff --git a/youtube_dl/downloader/rtmp.py b/youtube_dl/downloader/rtmp.py deleted file mode 100644 index fbb7f51..0000000 --- a/youtube_dl/downloader/rtmp.py +++ /dev/null @@ -1,214 +0,0 @@ -from __future__ import unicode_literals - -import os -import re -import subprocess -import time - -from .common import FileDownloader -from ..compat import compat_str -from ..utils import ( - check_executable, - encodeFilename, - encodeArgument, - get_exe_version, -) - - -def rtmpdump_version(): - return get_exe_version( - 'rtmpdump', ['--help'], r'(?i)RTMPDump\s*v?([0-9a-zA-Z._-]+)') - - -class RtmpFD(FileDownloader): - def real_download(self, filename, info_dict): - def run_rtmpdump(args): - start = time.time() - resume_percent = None - resume_downloaded_data_len = None - proc = subprocess.Popen(args, stderr=subprocess.PIPE) - cursor_in_new_line = True - proc_stderr_closed = False - try: - while not proc_stderr_closed: - # read line from stderr - line = '' - while True: - char = proc.stderr.read(1) - if not char: - proc_stderr_closed = True - break - if char in [b'\r', b'\n']: - break - line += char.decode('ascii', 'replace') - if not line: - # proc_stderr_closed is True - continue - mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line) - if mobj: - downloaded_data_len = int(float(mobj.group(1)) * 1024) - percent = float(mobj.group(2)) - if not resume_percent: - resume_percent = percent - resume_downloaded_data_len = downloaded_data_len - time_now = time.time() - eta = self.calc_eta(start, time_now, 100 - resume_percent, percent - resume_percent) - speed = self.calc_speed(start, time_now, downloaded_data_len - resume_downloaded_data_len) - data_len = None - if percent > 0: - data_len = int(downloaded_data_len * 100 / percent) - self._hook_progress({ - 'status': 'downloading', - 'downloaded_bytes': downloaded_data_len, - 'total_bytes_estimate': data_len, - 'tmpfilename': tmpfilename, - 'filename': filename, - 'eta': eta, - 'elapsed': time_now - start, - 'speed': speed, - }) - cursor_in_new_line = False - else: - # no percent for live streams - mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line) - if mobj: - downloaded_data_len = int(float(mobj.group(1)) * 1024) - time_now = time.time() - speed = self.calc_speed(start, time_now, downloaded_data_len) - self._hook_progress({ - 'downloaded_bytes': downloaded_data_len, - 'tmpfilename': tmpfilename, - 'filename': filename, - 'status': 'downloading', - 'elapsed': time_now - start, - 'speed': speed, - }) - cursor_in_new_line = False - elif self.params.get('verbose', False): - if not cursor_in_new_line: - self.to_screen('') - cursor_in_new_line = True - self.to_screen('[rtmpdump] ' + line) - finally: - proc.wait() - if not cursor_in_new_line: - self.to_screen('') - return proc.returncode - - url = info_dict['url'] - player_url = info_dict.get('player_url') - page_url = info_dict.get('page_url') - app = info_dict.get('app') - play_path = info_dict.get('play_path') - tc_url = info_dict.get('tc_url') - flash_version = info_dict.get('flash_version') - live = info_dict.get('rtmp_live', False) - conn = info_dict.get('rtmp_conn') - protocol = info_dict.get('rtmp_protocol') - real_time = info_dict.get('rtmp_real_time', False) - no_resume = info_dict.get('no_resume', False) - continue_dl = self.params.get('continuedl', True) - - self.report_destination(filename) - tmpfilename = self.temp_name(filename) - test = self.params.get('test', False) - - # Check for rtmpdump first - if not check_executable('rtmpdump', ['-h']): - self.report_error('RTMP download detected but "rtmpdump" could not be run. Please install it.') - return False - - # Download using rtmpdump. rtmpdump returns exit code 2 when - # the connection was interrupted and resuming appears to be - # possible. This is part of rtmpdump's normal usage, AFAIK. - basic_args = [ - 'rtmpdump', '--verbose', '-r', url, - '-o', tmpfilename] - if player_url is not None: - basic_args += ['--swfVfy', player_url] - if page_url is not None: - basic_args += ['--pageUrl', page_url] - if app is not None: - basic_args += ['--app', app] - if play_path is not None: - basic_args += ['--playpath', play_path] - if tc_url is not None: - basic_args += ['--tcUrl', tc_url] - if test: - basic_args += ['--stop', '1'] - if flash_version is not None: - basic_args += ['--flashVer', flash_version] - if live: - basic_args += ['--live'] - if isinstance(conn, list): - for entry in conn: - basic_args += ['--conn', entry] - elif isinstance(conn, compat_str): - basic_args += ['--conn', conn] - if protocol is not None: - basic_args += ['--protocol', protocol] - if real_time: - basic_args += ['--realtime'] - - args = basic_args - if not no_resume and continue_dl and not live: - args += ['--resume'] - if not live and continue_dl: - args += ['--skip', '1'] - - args = [encodeArgument(a) for a in args] - - self._debug_cmd(args, exe='rtmpdump') - - RD_SUCCESS = 0 - RD_FAILED = 1 - RD_INCOMPLETE = 2 - RD_NO_CONNECT = 3 - - started = time.time() - - try: - retval = run_rtmpdump(args) - except KeyboardInterrupt: - if not info_dict.get('is_live'): - raise - retval = RD_SUCCESS - self.to_screen('\n[rtmpdump] Interrupted by user') - - if retval == RD_NO_CONNECT: - self.report_error('[rtmpdump] Could not connect to RTMP server.') - return False - - while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live: - prevsize = os.path.getsize(encodeFilename(tmpfilename)) - self.to_screen('[rtmpdump] Downloaded %s bytes' % prevsize) - time.sleep(5.0) # This seems to be needed - args = basic_args + ['--resume'] - if retval == RD_FAILED: - args += ['--skip', '1'] - args = [encodeArgument(a) for a in args] - retval = run_rtmpdump(args) - cursize = os.path.getsize(encodeFilename(tmpfilename)) - if prevsize == cursize and retval == RD_FAILED: - break - # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those - if prevsize == cursize and retval == RD_INCOMPLETE and cursize > 1024: - self.to_screen('[rtmpdump] Could not download the whole video. This can happen for some advertisements.') - retval = RD_SUCCESS - break - if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE): - fsize = os.path.getsize(encodeFilename(tmpfilename)) - self.to_screen('[rtmpdump] Downloaded %s bytes' % fsize) - self.try_rename(tmpfilename, filename) - self._hook_progress({ - 'downloaded_bytes': fsize, - 'total_bytes': fsize, - 'filename': filename, - 'status': 'finished', - 'elapsed': time.time() - started, - }) - return True - else: - self.to_stderr('\n') - self.report_error('rtmpdump exited with code %d' % retval) - return False diff --git a/youtube_dl/downloader/rtsp.py b/youtube_dl/downloader/rtsp.py deleted file mode 100644 index 939358b..0000000 --- a/youtube_dl/downloader/rtsp.py +++ /dev/null @@ -1,47 +0,0 @@ -from __future__ import unicode_literals - -import os -import subprocess - -from .common import FileDownloader -from ..utils import ( - check_executable, - encodeFilename, -) - - -class RtspFD(FileDownloader): - def real_download(self, filename, info_dict): - url = info_dict['url'] - self.report_destination(filename) - tmpfilename = self.temp_name(filename) - - if check_executable('mplayer', ['-h']): - args = [ - 'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', - '-dumpstream', '-dumpfile', tmpfilename, url] - elif check_executable('mpv', ['-h']): - args = [ - 'mpv', '-really-quiet', '--vo=null', '--stream-dump=' + tmpfilename, url] - else: - self.report_error('MMS or RTSP download detected but neither "mplayer" nor "mpv" could be run. Please install any.') - return False - - self._debug_cmd(args) - - retval = subprocess.call(args) - if retval == 0: - fsize = os.path.getsize(encodeFilename(tmpfilename)) - self.to_screen('\r[%s] %s bytes' % (args[0], fsize)) - self.try_rename(tmpfilename, filename) - self._hook_progress({ - 'downloaded_bytes': fsize, - 'total_bytes': fsize, - 'filename': filename, - 'status': 'finished', - }) - return True - else: - self.to_stderr('\n') - self.report_error('%s exited with code %d' % (args[0], retval)) - return False diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py deleted file mode 100644 index d5a4418..0000000 --- a/youtube_dl/extractor/__init__.py +++ /dev/null @@ -1,46 +0,0 @@ -from __future__ import unicode_literals - -try: - from .lazy_extractors import * - from .lazy_extractors import _ALL_CLASSES - _LAZY_LOADER = True -except ImportError: - _LAZY_LOADER = False - from .extractors import * - - _ALL_CLASSES = [ - klass - for name, klass in globals().items() - if name.endswith('IE') and name != 'GenericIE' - ] - #_ALL_CLASSES.append(GenericIE) - - -def gen_extractor_classes(): - """ Return a list of supported extractors. - The order does matter; the first extractor matched is the one handling the URL. - """ - return _ALL_CLASSES - - -def gen_extractors(): - """ Return a list of an instance of every supported extractor. - The order does matter; the first extractor matched is the one handling the URL. - """ - return [klass() for klass in gen_extractor_classes()] - - -def list_extractors(age_limit): - """ - Return a list of extractors that are suitable for the given age, - sorted by extractor ID. - """ - - return sorted( - filter(lambda ie: ie.is_suitable(age_limit), gen_extractors()), - key=lambda ie: ie.IE_NAME.lower()) - - -def get_info_extractor(ie_name): - """Returns the info extractor class with the given ie_name""" - return globals()[ie_name + 'IE'] diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py deleted file mode 100644 index b83b51e..0000000 --- a/youtube_dl/extractor/adobepass.py +++ /dev/null @@ -1,1567 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re -import time -import xml.etree.ElementTree as etree - -from .common import InfoExtractor -from ..compat import ( - compat_kwargs, - compat_urlparse, -) -from ..utils import ( - unescapeHTML, - urlencode_postdata, - unified_timestamp, - ExtractorError, - NO_DEFAULT, -) - - -MSO_INFO = { - 'DTV': { - 'name': 'DIRECTV', - 'username_field': 'username', - 'password_field': 'password', - }, - 'ATTOTT': { - 'name': 'DIRECTV NOW', - 'username_field': 'email', - 'password_field': 'loginpassword', - }, - 'Rogers': { - 'name': 'Rogers', - 'username_field': 'UserName', - 'password_field': 'UserPassword', - }, - 'Comcast_SSO': { - 'name': 'Comcast XFINITY', - 'username_field': 'user', - 'password_field': 'passwd', - }, - 'TWC': { - 'name': 'Time Warner Cable | Spectrum', - 'username_field': 'Ecom_User_ID', - 'password_field': 'Ecom_Password', - }, - 'Brighthouse': { - 'name': 'Bright House Networks | Spectrum', - 'username_field': 'j_username', - 'password_field': 'j_password', - }, - 'Charter_Direct': { - 'name': 'Charter Spectrum', - 'username_field': 'IDToken1', - 'password_field': 'IDToken2', - }, - 'Verizon': { - 'name': 'Verizon FiOS', - 'username_field': 'IDToken1', - 'password_field': 'IDToken2', - }, - 'thr030': { - 'name': '3 Rivers Communications' - }, - 'com140': { - 'name': 'Access Montana' - }, - 'acecommunications': { - 'name': 'AcenTek' - }, - 'acm010': { - 'name': 'Acme Communications' - }, - 'ada020': { - 'name': 'Adams Cable Service' - }, - 'alb020': { - 'name': 'Albany Mutual Telephone' - }, - 'algona': { - 'name': 'Algona Municipal Utilities' - }, - 'allwest': { - 'name': 'All West Communications' - }, - 'all025': { - 'name': 'Allen\'s Communications' - }, - 'spl010': { - 'name': 'Alliance Communications' - }, - 'all070': { - 'name': 'ALLO Communications' - }, - 'alpine': { - 'name': 'Alpine Communications' - }, - 'hun015': { - 'name': 'American Broadband' - }, - 'nwc010': { - 'name': 'American Broadband Missouri' - }, - 'com130-02': { - 'name': 'American Community Networks' - }, - 'com130-01': { - 'name': 'American Warrior Networks' - }, - 'tom020': { - 'name': 'Amherst Telephone/Tomorrow Valley' - }, - 'tvc020': { - 'name': 'Andycable' - }, - 'arkwest': { - 'name': 'Arkwest Communications' - }, - 'art030': { - 'name': 'Arthur Mutual Telephone Company' - }, - 'arvig': { - 'name': 'Arvig' - }, - 'nttcash010': { - 'name': 'Ashland Home Net' - }, - 'astound': { - 'name': 'Astound (now Wave)' - }, - 'dix030': { - 'name': 'ATC Broadband' - }, - 'ara010': { - 'name': 'ATC Communications' - }, - 'she030-02': { - 'name': 'Ayersville Communications' - }, - 'baldwin': { - 'name': 'Baldwin Lightstream' - }, - 'bal040': { - 'name': 'Ballard TV' - }, - 'cit025': { - 'name': 'Bardstown Cable TV' - }, - 'bay030': { - 'name': 'Bay Country Communications' - }, - 'tel095': { - 'name': 'Beaver Creek Cooperative Telephone' - }, - 'bea020': { - 'name': 'Beaver Valley Cable' - }, - 'bee010': { - 'name': 'Bee Line Cable' - }, - 'wir030': { - 'name': 'Beehive Broadband' - }, - 'bra020': { - 'name': 'BELD' - }, - 'bel020': { - 'name': 'Bellevue Municipal Cable' - }, - 'vol040-01': { - 'name': 'Ben Lomand Connect / BLTV' - }, - 'bev010': { - 'name': 'BEVCOMM' - }, - 'big020': { - 'name': 'Big Sandy Broadband' - }, - 'ble020': { - 'name': 'Bledsoe Telephone Cooperative' - }, - 'bvt010': { - 'name': 'Blue Valley Tele-Communications' - }, - 'bra050': { - 'name': 'Brandenburg Telephone Co.' - }, - 'bte010': { - 'name': 'Bristol Tennessee Essential Services' - }, - 'annearundel': { - 'name': 'Broadstripe' - }, - 'btc010': { - 'name': 'BTC Communications' - }, - 'btc040': { - 'name': 'BTC Vision - Nahunta' - }, - 'bul010': { - 'name': 'Bulloch Telephone Cooperative' - }, - 'but010': { - 'name': 'Butler-Bremer Communications' - }, - 'tel160-csp': { - 'name': 'C Spire SNAP' - }, - 'csicable': { - 'name': 'Cable Services Inc.' - }, - 'cableamerica': { - 'name': 'CableAmerica' - }, - 'cab038': { - 'name': 'CableSouth Media 3' - }, - 'weh010-camtel': { - 'name': 'Cam-Tel Company' - }, - 'car030': { - 'name': 'Cameron Communications' - }, - 'canbytel': { - 'name': 'Canby Telcom' - }, - 'crt020': { - 'name': 'CapRock Tv' - }, - 'car050': { - 'name': 'Carnegie Cable' - }, - 'cas': { - 'name': 'CAS Cable' - }, - 'casscomm': { - 'name': 'CASSCOMM' - }, - 'mid180-02': { - 'name': 'Catalina Broadband Solutions' - }, - 'cccomm': { - 'name': 'CC Communications' - }, - 'nttccde010': { - 'name': 'CDE Lightband' - }, - 'cfunet': { - 'name': 'Cedar Falls Utilities' - }, - 'dem010-01': { - 'name': 'Celect-Bloomer Telephone Area' - }, - 'dem010-02': { - 'name': 'Celect-Bruce Telephone Area' - }, - 'dem010-03': { - 'name': 'Celect-Citizens Connected Area' - }, - 'dem010-04': { - 'name': 'Celect-Elmwood/Spring Valley Area' - }, - 'dem010-06': { - 'name': 'Celect-Mosaic Telecom' - }, - 'dem010-05': { - 'name': 'Celect-West WI Telephone Area' - }, - 'net010-02': { - 'name': 'Cellcom/Nsight Telservices' - }, - 'cen100': { - 'name': 'CentraCom' - }, - 'nttccst010': { - 'name': 'Central Scott / CSTV' - }, - 'cha035': { - 'name': 'Chaparral CableVision' - }, - 'cha050': { - 'name': 'Chariton Valley Communication Corporation, Inc.' - }, - 'cha060': { - 'name': 'Chatmoss Cablevision' - }, - 'nttcche010': { - 'name': 'Cherokee Communications' - }, - 'che050': { - 'name': 'Chesapeake Bay Communications' - }, - 'cimtel': { - 'name': 'Cim-Tel Cable, LLC.' - }, - 'cit180': { - 'name': 'Citizens Cablevision - Floyd, VA' - }, - 'cit210': { - 'name': 'Citizens Cablevision, Inc.' - }, - 'cit040': { - 'name': 'Citizens Fiber' - }, - 'cit250': { - 'name': 'Citizens Mutual' - }, - 'war040': { - 'name': 'Citizens Telephone Corporation' - }, - 'wat025': { - 'name': 'City Of Monroe' - }, - 'wadsworth': { - 'name': 'CityLink' - }, - 'nor100': { - 'name': 'CL Tel' - }, - 'cla010': { - 'name': 'Clarence Telephone and Cedar Communications' - }, - 'ser060': { - 'name': 'Clear Choice Communications' - }, - 'tac020': { - 'name': 'Click! Cable TV' - }, - 'war020': { - 'name': 'CLICK1.NET' - }, - 'cml010': { - 'name': 'CML Telephone Cooperative Association' - }, - 'cns': { - 'name': 'CNS' - }, - 'com160': { - 'name': 'Co-Mo Connect' - }, - 'coa020': { - 'name': 'Coast Communications' - }, - 'coa030': { - 'name': 'Coaxial Cable TV' - }, - 'mid055': { - 'name': 'Cobalt TV (Mid-State Community TV)' - }, - 'col070': { - 'name': 'Columbia Power & Water Systems' - }, - 'col080': { - 'name': 'Columbus Telephone' - }, - 'nor105': { - 'name': 'Communications 1 Cablevision, Inc.' - }, - 'com150': { - 'name': 'Community Cable & Broadband' - }, - 'com020': { - 'name': 'Community Communications Company' - }, - 'coy010': { - 'name': 'commZoom' - }, - 'com025': { - 'name': 'Complete Communication Services' - }, - 'cat020': { - 'name': 'Comporium' - }, - 'com071': { - 'name': 'ComSouth Telesys' - }, - 'consolidatedcable': { - 'name': 'Consolidated' - }, - 'conwaycorp': { - 'name': 'Conway Corporation' - }, - 'coo050': { - 'name': 'Coon Valley Telecommunications Inc' - }, - 'coo080': { - 'name': 'Cooperative Telephone Company' - }, - 'cpt010': { - 'name': 'CP-TEL' - }, - 'cra010': { - 'name': 'Craw-Kan Telephone' - }, - 'crestview': { - 'name': 'Crestview Cable Communications' - }, - 'cross': { - 'name': 'Cross TV' - }, - 'cro030': { - 'name': 'Crosslake Communications' - }, - 'ctc040': { - 'name': 'CTC - Brainerd MN' - }, - 'phe030': { - 'name': 'CTV-Beam - East Alabama' - }, - 'cun010': { - 'name': 'Cunningham Telephone & Cable' - }, - 'dpc010': { - 'name': 'D & P Communications' - }, - 'dak030': { - 'name': 'Dakota Central Telecommunications' - }, - 'nttcdel010': { - 'name': 'Delcambre Telephone LLC' - }, - 'tel160-del': { - 'name': 'Delta Telephone Company' - }, - 'sal040': { - 'name': 'DiamondNet' - }, - 'ind060-dc': { - 'name': 'Direct Communications' - }, - 'doy010': { - 'name': 'Doylestown Cable TV' - }, - 'dic010': { - 'name': 'DRN' - }, - 'dtc020': { - 'name': 'DTC' - }, - 'dtc010': { - 'name': 'DTC Cable (Delhi)' - }, - 'dum010': { - 'name': 'Dumont Telephone Company' - }, - 'dun010': { - 'name': 'Dunkerton Telephone Cooperative' - }, - 'cci010': { - 'name': 'Duo County Telecom' - }, - 'eagle': { - 'name': 'Eagle Communications' - }, - 'weh010-east': { - 'name': 'East Arkansas Cable TV' - }, - 'eatel': { - 'name': 'EATEL Video, LLC' - }, - 'ell010': { - 'name': 'ECTA' - }, - 'emerytelcom': { - 'name': 'Emery Telcom Video LLC' - }, - 'nor200': { - 'name': 'Empire Access' - }, - 'endeavor': { - 'name': 'Endeavor Communications' - }, - 'sun045': { - 'name': 'Enhanced Telecommunications Corporation' - }, - 'mid030': { - 'name': 'enTouch' - }, - 'epb020': { - 'name': 'EPB Smartnet' - }, - 'jea010': { - 'name': 'EPlus Broadband' - }, - 'com065': { - 'name': 'ETC' - }, - 'ete010': { - 'name': 'Etex Communications' - }, - 'fbc-tele': { - 'name': 'F&B Communications' - }, - 'fal010': { - 'name': 'Falcon Broadband' - }, - 'fam010': { - 'name': 'FamilyView CableVision' - }, - 'far020': { - 'name': 'Farmers Mutual Telephone Company' - }, - 'fay010': { - 'name': 'Fayetteville Public Utilities' - }, - 'sal060': { - 'name': 'fibrant' - }, - 'fid010': { - 'name': 'Fidelity Communications' - }, - 'for030': { - 'name': 'FJ Communications' - }, - 'fli020': { - 'name': 'Flint River Communications' - }, - 'far030': { - 'name': 'FMT - Jesup' - }, - 'foo010': { - 'name': 'Foothills Communications' - }, - 'for080': { - 'name': 'Forsyth CableNet' - }, - 'fbcomm': { - 'name': 'Frankfort Plant Board' - }, - 'tel160-fra': { - 'name': 'Franklin Telephone Company' - }, - 'nttcftc010': { - 'name': 'FTC' - }, - 'fullchannel': { - 'name': 'Full Channel, Inc.' - }, - 'gar040': { - 'name': 'Gardonville Cooperative Telephone Association' - }, - 'gbt010': { - 'name': 'GBT Communications, Inc.' - }, - 'tec010': { - 'name': 'Genuine Telecom' - }, - 'clr010': { - 'name': 'Giant Communications' - }, - 'gla010': { - 'name': 'Glasgow EPB' - }, - 'gle010': { - 'name': 'Glenwood Telecommunications' - }, - 'gra060': { - 'name': 'GLW Broadband Inc.' - }, - 'goldenwest': { - 'name': 'Golden West Cablevision' - }, - 'vis030': { - 'name': 'Grantsburg Telcom' - }, - 'gpcom': { - 'name': 'Great Plains Communications' - }, - 'gri010': { - 'name': 'Gridley Cable Inc' - }, - 'hbc010': { - 'name': 'H&B Cable Services' - }, - 'hae010': { - 'name': 'Haefele TV Inc.' - }, - 'htc010': { - 'name': 'Halstad Telephone Company' - }, - 'har005': { - 'name': 'Harlan Municipal Utilities' - }, - 'har020': { - 'name': 'Hart Communications' - }, - 'ced010': { - 'name': 'Hartelco TV' - }, - 'hea040': { - 'name': 'Heart of Iowa Communications Cooperative' - }, - 'htc020': { - 'name': 'Hickory Telephone Company' - }, - 'nttchig010': { - 'name': 'Highland Communication Services' - }, - 'hig030': { - 'name': 'Highland Media' - }, - 'spc010': { - 'name': 'Hilliary Communications' - }, - 'hin020': { - 'name': 'Hinton CATV Co.' - }, - 'hometel': { - 'name': 'HomeTel Entertainment, Inc.' - }, - 'hoodcanal': { - 'name': 'Hood Canal Communications' - }, - 'weh010-hope': { - 'name': 'Hope - Prescott Cable TV' - }, - 'horizoncable': { - 'name': 'Horizon Cable TV, Inc.' - }, - 'hor040': { - 'name': 'Horizon Chillicothe Telephone' - }, - 'htc030': { - 'name': 'HTC Communications Co. - IL' - }, - 'htccomm': { - 'name': 'HTC Communications, Inc. - IA' - }, - 'wal005': { - 'name': 'Huxley Communications' - }, - 'imon': { - 'name': 'ImOn Communications' - }, - 'ind040': { - 'name': 'Independence Telecommunications' - }, - 'rrc010': { - 'name': 'Inland Networks' - }, - 'stc020': { - 'name': 'Innovative Cable TV St Croix' - }, - 'car100': { - 'name': 'Innovative Cable TV St Thomas-St John' - }, - 'icc010': { - 'name': 'Inside Connect Cable' - }, - 'int100': { - 'name': 'Integra Telecom' - }, - 'int050': { - 'name': 'Interstate Telecommunications Coop' - }, - 'irv010': { - 'name': 'Irvine Cable' - }, - 'k2c010': { - 'name': 'K2 Communications' - }, - 'kal010': { - 'name': 'Kalida Telephone Company, Inc.' - }, - 'kal030': { - 'name': 'Kalona Cooperative Telephone Company' - }, - 'kmt010': { - 'name': 'KMTelecom' - }, - 'kpu010': { - 'name': 'KPU Telecommunications' - }, - 'kuh010': { - 'name': 'Kuhn Communications, Inc.' - }, - 'lak130': { - 'name': 'Lakeland Communications' - }, - 'lan010': { - 'name': 'Langco' - }, - 'lau020': { - 'name': 'Laurel Highland Total Communications, Inc.' - }, - 'leh010': { - 'name': 'Lehigh Valley Cooperative Telephone' - }, - 'bra010': { - 'name': 'Limestone Cable/Bracken Cable' - }, - 'loc020': { - 'name': 'LISCO' - }, - 'lit020': { - 'name': 'Litestream' - }, - 'tel140': { - 'name': 'LivCom' - }, - 'loc010': { - 'name': 'LocalTel Communications' - }, - 'weh010-longview': { - 'name': 'Longview - Kilgore Cable TV' - }, - 'lon030': { - 'name': 'Lonsdale Video Ventures, LLC' - }, - 'lns010': { - 'name': 'Lost Nation-Elwood Telephone Co.' - }, - 'nttclpc010': { - 'name': 'LPC Connect' - }, - 'lumos': { - 'name': 'Lumos Networks' - }, - 'madison': { - 'name': 'Madison Communications' - }, - 'mad030': { - 'name': 'Madison County Cable Inc.' - }, - 'nttcmah010': { - 'name': 'Mahaska Communication Group' - }, - 'mar010': { - 'name': 'Marne & Elk Horn Telephone Company' - }, - 'mcc040': { - 'name': 'McClure Telephone Co.' - }, - 'mctv': { - 'name': 'MCTV' - }, - 'merrimac': { - 'name': 'Merrimac Communications Ltd.' - }, - 'metronet': { - 'name': 'Metronet' - }, - 'mhtc': { - 'name': 'MHTC' - }, - 'midhudson': { - 'name': 'Mid-Hudson Cable' - }, - 'midrivers': { - 'name': 'Mid-Rivers Communications' - }, - 'mid045': { - 'name': 'Midstate Communications' - }, - 'mil080': { - 'name': 'Milford Communications' - }, - 'min030': { - 'name': 'MINET' - }, - 'nttcmin010': { - 'name': 'Minford TV' - }, - 'san040-02': { - 'name': 'Mitchell Telecom' - }, - 'mlg010': { - 'name': 'MLGC' - }, - 'mon060': { - 'name': 'Mon-Cre TVE' - }, - 'mou110': { - 'name': 'Mountain Telephone' - }, - 'mou050': { - 'name': 'Mountain Village Cable' - }, - 'mtacomm': { - 'name': 'MTA Communications, LLC' - }, - 'mtc010': { - 'name': 'MTC Cable' - }, - 'med040': { - 'name': 'MTC Technologies' - }, - 'man060': { - 'name': 'MTCC' - }, - 'mtc030': { - 'name': 'MTCO Communications' - }, - 'mul050': { - 'name': 'Mulberry Telecommunications' - }, - 'mur010': { - 'name': 'Murray Electric System' - }, - 'musfiber': { - 'name': 'MUS FiberNET' - }, - 'mpw': { - 'name': 'Muscatine Power & Water' - }, - 'nttcsli010': { - 'name': 'myEVTV.com' - }, - 'nor115': { - 'name': 'NCC' - }, - 'nor260': { - 'name': 'NDTC' - }, - 'nctc': { - 'name': 'Nebraska Central Telecom, Inc.' - }, - 'nel020': { - 'name': 'Nelsonville TV Cable' - }, - 'nem010': { - 'name': 'Nemont' - }, - 'new075': { - 'name': 'New Hope Telephone Cooperative' - }, - 'nor240': { - 'name': 'NICP' - }, - 'cic010': { - 'name': 'NineStar Connect' - }, - 'nktelco': { - 'name': 'NKTelco' - }, - 'nortex': { - 'name': 'Nortex Communications' - }, - 'nor140': { - 'name': 'North Central Telephone Cooperative' - }, - 'nor030': { - 'name': 'Northland Communications' - }, - 'nor075': { - 'name': 'Northwest Communications' - }, - 'nor125': { - 'name': 'Norwood Light Broadband' - }, - 'net010': { - 'name': 'Nsight Telservices' - }, - 'dur010': { - 'name': 'Ntec' - }, - 'nts010': { - 'name': 'NTS Communications' - }, - 'new045': { - 'name': 'NU-Telecom' - }, - 'nulink': { - 'name': 'NuLink' - }, - 'jam030': { - 'name': 'NVC' - }, - 'far035': { - 'name': 'OmniTel Communications' - }, - 'onesource': { - 'name': 'OneSource Communications' - }, - 'cit230': { - 'name': 'Opelika Power Services' - }, - 'daltonutilities': { - 'name': 'OptiLink' - }, - 'mid140': { - 'name': 'OPTURA' - }, - 'ote010': { - 'name': 'OTEC Communication Company' - }, - 'cci020': { - 'name': 'Packerland Broadband' - }, - 'pan010': { - 'name': 'Panora Telco/Guthrie Center Communications' - }, - 'otter': { - 'name': 'Park Region Telephone & Otter Tail Telcom' - }, - 'mid050': { - 'name': 'Partner Communications Cooperative' - }, - 'fib010': { - 'name': 'Pathway' - }, - 'paulbunyan': { - 'name': 'Paul Bunyan Communications' - }, - 'pem020': { - 'name': 'Pembroke Telephone Company' - }, - 'mck010': { - 'name': 'Peoples Rural Telephone Cooperative' - }, - 'pul010': { - 'name': 'PES Energize' - }, - 'phi010': { - 'name': 'Philippi Communications System' - }, - 'phonoscope': { - 'name': 'Phonoscope Cable' - }, - 'pin070': { - 'name': 'Pine Belt Communications, Inc.' - }, - 'weh010-pine': { - 'name': 'Pine Bluff Cable TV' - }, - 'pin060': { - 'name': 'Pineland Telephone Cooperative' - }, - 'cam010': { - 'name': 'Pinpoint Communications' - }, - 'pio060': { - 'name': 'Pioneer Broadband' - }, - 'pioncomm': { - 'name': 'Pioneer Communications' - }, - 'pioneer': { - 'name': 'Pioneer DTV' - }, - 'pla020': { - 'name': 'Plant TiftNet, Inc.' - }, - 'par010': { - 'name': 'PLWC' - }, - 'pro035': { - 'name': 'PMT' - }, - 'vik011': { - 'name': 'Polar Cablevision' - }, - 'pottawatomie': { - 'name': 'Pottawatomie Telephone Co.' - }, - 'premiercomm': { - 'name': 'Premier Communications' - }, - 'psc010': { - 'name': 'PSC' - }, - 'pan020': { - 'name': 'PTCI' - }, - 'qco010': { - 'name': 'QCOL' - }, - 'qua010': { - 'name': 'Quality Cablevision' - }, - 'rad010': { - 'name': 'Radcliffe Telephone Company' - }, - 'car040': { - 'name': 'Rainbow Communications' - }, - 'rai030': { - 'name': 'Rainier Connect' - }, - 'ral010': { - 'name': 'Ralls Technologies' - }, - 'rct010': { - 'name': 'RC Technologies' - }, - 'red040': { - 'name': 'Red River Communications' - }, - 'ree010': { - 'name': 'Reedsburg Utility Commission' - }, - 'mol010': { - 'name': 'Reliance Connects- Oregon' - }, - 'res020': { - 'name': 'Reserve Telecommunications' - }, - 'weh010-resort': { - 'name': 'Resort TV Cable' - }, - 'rld010': { - 'name': 'Richland Grant Telephone Cooperative, Inc.' - }, - 'riv030': { - 'name': 'River Valley Telecommunications Coop' - }, - 'rockportcable': { - 'name': 'Rock Port Cablevision' - }, - 'rsf010': { - 'name': 'RS Fiber' - }, - 'rtc': { - 'name': 'RTC Communication Corp' - }, - 'res040': { - 'name': 'RTC-Reservation Telephone Coop.' - }, - 'rte010': { - 'name': 'RTEC Communications' - }, - 'stc010': { - 'name': 'S&T' - }, - 'san020': { - 'name': 'San Bruno Cable TV' - }, - 'san040-01': { - 'name': 'Santel' - }, - 'sav010': { - 'name': 'SCI Broadband-Savage Communications Inc.' - }, - 'sco050': { - 'name': 'Scottsboro Electric Power Board' - }, - 'scr010': { - 'name': 'Scranton Telephone Company' - }, - 'selco': { - 'name': 'SELCO' - }, - 'she010': { - 'name': 'Shentel' - }, - 'she030': { - 'name': 'Sherwood Mutual Telephone Association, Inc.' - }, - 'ind060-ssc': { - 'name': 'Silver Star Communications' - }, - 'sjoberg': { - 'name': 'Sjoberg\'s Inc.' - }, - 'sou025': { - 'name': 'SKT' - }, - 'sky050': { - 'name': 'SkyBest TV' - }, - 'nttcsmi010': { - 'name': 'Smithville Communications' - }, - 'woo010': { - 'name': 'Solarus' - }, - 'sou075': { - 'name': 'South Central Rural Telephone Cooperative' - }, - 'sou065': { - 'name': 'South Holt Cablevision, Inc.' - }, - 'sou035': { - 'name': 'South Slope Cooperative Communications' - }, - 'spa020': { - 'name': 'Spanish Fork Community Network' - }, - 'spe010': { - 'name': 'Spencer Municipal Utilities' - }, - 'spi005': { - 'name': 'Spillway Communications, Inc.' - }, - 'srt010': { - 'name': 'SRT' - }, - 'cccsmc010': { - 'name': 'St. Maarten Cable TV' - }, - 'sta025': { - 'name': 'Star Communications' - }, - 'sco020': { - 'name': 'STE' - }, - 'uin010': { - 'name': 'STRATA Networks' - }, - 'sum010': { - 'name': 'Sumner Cable TV' - }, - 'pie010': { - 'name': 'Surry TV/PCSI TV' - }, - 'swa010': { - 'name': 'Swayzee Communications' - }, - 'sweetwater': { - 'name': 'Sweetwater Cable Television Co' - }, - 'weh010-talequah': { - 'name': 'Tahlequah Cable TV' - }, - 'tct': { - 'name': 'TCT' - }, - 'tel050': { - 'name': 'Tele-Media Company' - }, - 'com050': { - 'name': 'The Community Agency' - }, - 'thr020': { - 'name': 'Three River' - }, - 'cab140': { - 'name': 'Town & Country Technologies' - }, - 'tra010': { - 'name': 'Trans-Video' - }, - 'tre010': { - 'name': 'Trenton TV Cable Company' - }, - 'tcc': { - 'name': 'Tri County Communications Cooperative' - }, - 'tri025': { - 'name': 'TriCounty Telecom' - }, - 'tri110': { - 'name': 'TrioTel Communications, Inc.' - }, - 'tro010': { - 'name': 'Troy Cablevision, Inc.' - }, - 'tsc': { - 'name': 'TSC' - }, - 'cit220': { - 'name': 'Tullahoma Utilities Board' - }, - 'tvc030': { - 'name': 'TV Cable of Rensselaer' - }, - 'tvc015': { - 'name': 'TVC Cable' - }, - 'cab180': { - 'name': 'TVision' - }, - 'twi040': { - 'name': 'Twin Lakes' - }, - 'tvtinc': { - 'name': 'Twin Valley' - }, - 'uis010': { - 'name': 'Union Telephone Company' - }, - 'uni110': { - 'name': 'United Communications - TN' - }, - 'uni120': { - 'name': 'United Services' - }, - 'uss020': { - 'name': 'US Sonet' - }, - 'cab060': { - 'name': 'USA Communications' - }, - 'she005': { - 'name': 'USA Communications/Shellsburg, IA' - }, - 'val040': { - 'name': 'Valley TeleCom Group' - }, - 'val025': { - 'name': 'Valley Telecommunications' - }, - 'val030': { - 'name': 'Valparaiso Broadband' - }, - 'cla050': { - 'name': 'Vast Broadband' - }, - 'sul015': { - 'name': 'Venture Communications Cooperative, Inc.' - }, - 'ver025': { - 'name': 'Vernon Communications Co-op' - }, - 'weh010-vicksburg': { - 'name': 'Vicksburg Video' - }, - 'vis070': { - 'name': 'Vision Communications' - }, - 'volcanotel': { - 'name': 'Volcano Vision, Inc.' - }, - 'vol040-02': { - 'name': 'VolFirst / BLTV' - }, - 'ver070': { - 'name': 'VTel' - }, - 'nttcvtx010': { - 'name': 'VTX1' - }, - 'bci010-02': { - 'name': 'Vyve Broadband' - }, - 'wab020': { - 'name': 'Wabash Mutual Telephone' - }, - 'waitsfield': { - 'name': 'Waitsfield Cable' - }, - 'wal010': { - 'name': 'Walnut Communications' - }, - 'wavebroadband': { - 'name': 'Wave' - }, - 'wav030': { - 'name': 'Waverly Communications Utility' - }, - 'wbi010': { - 'name': 'WBI' - }, - 'web020': { - 'name': 'Webster-Calhoun Cooperative Telephone Association' - }, - 'wes005': { - 'name': 'West Alabama TV Cable' - }, - 'carolinata': { - 'name': 'West Carolina Communications' - }, - 'wct010': { - 'name': 'West Central Telephone Association' - }, - 'wes110': { - 'name': 'West River Cooperative Telephone Company' - }, - 'ani030': { - 'name': 'WesTel Systems' - }, - 'westianet': { - 'name': 'Western Iowa Networks' - }, - 'nttcwhi010': { - 'name': 'Whidbey Telecom' - }, - 'weh010-white': { - 'name': 'White County Cable TV' - }, - 'wes130': { - 'name': 'Wiatel' - }, - 'wik010': { - 'name': 'Wiktel' - }, - 'wil070': { - 'name': 'Wilkes Communications, Inc./RiverStreet Networks' - }, - 'wil015': { - 'name': 'Wilson Communications' - }, - 'win010': { - 'name': 'Windomnet/SMBS' - }, - 'win090': { - 'name': 'Windstream Cable TV' - }, - 'wcta': { - 'name': 'Winnebago Cooperative Telecom Association' - }, - 'wtc010': { - 'name': 'WTC' - }, - 'wil040': { - 'name': 'WTC Communications, Inc.' - }, - 'wya010': { - 'name': 'Wyandotte Cable' - }, - 'hin020-02': { - 'name': 'X-Stream Services' - }, - 'xit010': { - 'name': 'XIT Communications' - }, - 'yel010': { - 'name': 'Yelcot Communications' - }, - 'mid180-01': { - 'name': 'yondoo' - }, - 'cou060': { - 'name': 'Zito Media' - }, -} - - -class AdobePassIE(InfoExtractor): - _SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s' - _USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0' - _MVPD_CACHE = 'ap-mvpd' - - _DOWNLOADING_LOGIN_PAGE = 'Downloading Provider Login Page' - - def _download_webpage_handle(self, *args, **kwargs): - headers = kwargs.get('headers', {}) - headers.update(self.geo_verification_headers()) - kwargs['headers'] = headers - return super(AdobePassIE, self)._download_webpage_handle( - *args, **compat_kwargs(kwargs)) - - @staticmethod - def _get_mvpd_resource(provider_id, title, guid, rating): - channel = etree.Element('channel') - channel_title = etree.SubElement(channel, 'title') - channel_title.text = provider_id - item = etree.SubElement(channel, 'item') - resource_title = etree.SubElement(item, 'title') - resource_title.text = title - resource_guid = etree.SubElement(item, 'guid') - resource_guid.text = guid - resource_rating = etree.SubElement(item, 'media:rating') - resource_rating.attrib = {'scheme': 'urn:v-chip'} - resource_rating.text = rating - return '' + etree.tostring(channel).decode() + '' - - def _extract_mvpd_auth(self, url, video_id, requestor_id, resource): - def xml_text(xml_str, tag): - return self._search_regex( - '<%s>(.+?)' % (tag, tag), xml_str, tag) - - def is_expired(token, date_ele): - token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(token, date_ele))) - return token_expires and token_expires <= int(time.time()) - - def post_form(form_page_res, note, data={}): - form_page, urlh = form_page_res - post_url = self._html_search_regex(r']+action=(["\'])(?P.+?)\1', form_page, 'post url', group='url') - if not re.match(r'https?://', post_url): - post_url = compat_urlparse.urljoin(urlh.geturl(), post_url) - form_data = self._hidden_inputs(form_page) - form_data.update(data) - return self._download_webpage_handle( - post_url, video_id, note, data=urlencode_postdata(form_data), headers={ - 'Content-Type': 'application/x-www-form-urlencoded', - }) - - def raise_mvpd_required(): - raise ExtractorError( - 'This video is only available for users of participating TV providers. ' - 'Use --ap-mso to specify Adobe Pass Multiple-system operator Identifier ' - 'and --ap-username and --ap-password or --netrc to provide account credentials.', expected=True) - - def extract_redirect_url(html, url=None, fatal=False): - # TODO: eliminate code duplication with generic extractor and move - # redirection code into _download_webpage_handle - REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)' - redirect_url = self._search_regex( - r'(?i)Resume' in mvpd_confirm_page: - post_form(mvpd_confirm_page_res, 'Confirming Login') - elif mso_id == 'Verizon': - # In general, if you're connecting from a Verizon-assigned IP, - # you will not actually pass your credentials. - provider_redirect_page, urlh = provider_redirect_page_res - if 'Please wait ...' in provider_redirect_page: - saml_redirect_url = self._html_search_regex( - r'self\.parent\.location=(["\'])(?P.+?)\1', - provider_redirect_page, - 'SAML Redirect URL', group='url') - saml_login_page = self._download_webpage( - saml_redirect_url, video_id, - 'Downloading SAML Login Page') - else: - saml_login_page_res = post_form( - provider_redirect_page_res, 'Logging in', { - mso_info['username_field']: username, - mso_info['password_field']: password, - }) - saml_login_page, urlh = saml_login_page_res - if 'Please try again.' in saml_login_page: - raise ExtractorError( - 'We\'re sorry, but either the User ID or Password entered is not correct.') - saml_login_url = self._search_regex( - r'xmlHttp\.open\("POST"\s*,\s*(["\'])(?P.+?)\1', - saml_login_page, 'SAML Login URL', group='url') - saml_response_json = self._download_json( - saml_login_url, video_id, 'Downloading SAML Response', - headers={'Content-Type': 'text/xml'}) - self._download_webpage( - saml_response_json['targetValue'], video_id, - 'Confirming Login', data=urlencode_postdata({ - 'SAMLResponse': saml_response_json['SAMLResponse'], - 'RelayState': saml_response_json['RelayState'] - }), headers={ - 'Content-Type': 'application/x-www-form-urlencoded' - }) - else: - # Some providers (e.g. DIRECTV NOW) have another meta refresh - # based redirect that should be followed. - provider_redirect_page, urlh = provider_redirect_page_res - provider_refresh_redirect_url = extract_redirect_url( - provider_redirect_page, url=urlh.geturl()) - if provider_refresh_redirect_url: - provider_redirect_page_res = self._download_webpage_handle( - provider_refresh_redirect_url, video_id, - 'Downloading Provider Redirect Page (meta refresh)') - provider_login_page_res = post_form( - provider_redirect_page_res, self._DOWNLOADING_LOGIN_PAGE) - mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', { - mso_info.get('username_field', 'username'): username, - mso_info.get('password_field', 'password'): password, - }) - if mso_id != 'Rogers': - post_form(mvpd_confirm_page_res, 'Confirming Login') - - session = self._download_webpage( - self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id, - 'Retrieving Session', data=urlencode_postdata({ - '_method': 'GET', - 'requestor_id': requestor_id, - }), headers=mvpd_headers) - if ']+charset=[\'"]?([^\'")]+)[ /\'">]', - webpage_bytes[:1024]) - if m: - encoding = m.group(1).decode('ascii') - elif webpage_bytes.startswith(b'\xff\xfe'): - encoding = 'utf-16' - else: - encoding = 'utf-8' - - return encoding - - def __check_blocked(self, content): - first_block = content[:512] - if ('Access to this site is blocked' in content and - 'Websense' in first_block): - msg = 'Access to this webpage has been blocked by Websense filtering software in your network.' - blocked_iframe = self._html_search_regex( - r'