diff options
author | James Taylor <user234683@users.noreply.github.com> | 2019-10-17 19:58:13 -0700 |
---|---|---|
committer | James Taylor <user234683@users.noreply.github.com> | 2019-10-17 19:58:13 -0700 |
commit | 4c07546e7a5e5882abdda896009b744e947df1c4 (patch) | |
tree | 25870ecb94999df109895840810609e1d2167d96 /youtube/watch.py | |
parent | 9abb83fdbc05294f186daeefff8c85cfda06b7d2 (diff) | |
download | yt-local-4c07546e7a5e5882abdda896009b744e947df1c4.tar.lz yt-local-4c07546e7a5e5882abdda896009b744e947df1c4.tar.xz yt-local-4c07546e7a5e5882abdda896009b744e947df1c4.zip |
Extraction: Replace youtube-dl with custom-built watch page extraction
Diffstat (limited to 'youtube/watch.py')
-rw-r--r-- | youtube/watch.py | 154 |
1 files changed, 79 insertions, 75 deletions
diff --git a/youtube/watch.py b/youtube/watch.py index 41c90e4..a5e0759 100644 --- a/youtube/watch.py +++ b/youtube/watch.py @@ -5,49 +5,15 @@ import settings from flask import request import flask -from youtube_dl.YoutubeDL import YoutubeDL -from youtube_dl.extractor.youtube import YoutubeError import json import html import gevent import os +import math +import traceback + -def get_related_items(info): - results = [] - for item in info['related_vids']: - if 'list' in item: # playlist: - result = watch_page_related_playlist_info(item) - else: - result = watch_page_related_video_info(item) - yt_data_extract.prefix_urls(result) - yt_data_extract.add_extra_html_info(result) - results.append(result) - return results - - -# json of related items retrieved directly from the watch page has different names for everything -# converts these to standard names -def watch_page_related_video_info(item): - result = {key: item[key] for key in ('id', 'title', 'author')} - result['duration'] = util.seconds_to_timestamp(item['length_seconds']) - try: - result['views'] = item['short_view_count_text'] - except KeyError: - result['views'] = '' - result['thumbnail'] = util.get_thumbnail_url(item['id']) - result['type'] = 'video' - return result - -def watch_page_related_playlist_info(item): - return { - 'size': item['playlist_length'] if item['playlist_length'] != "0" else "50+", - 'title': item['playlist_title'], - 'id': item['list'], - 'first_video_id': item['video_id'], - 'thumbnail': util.get_thumbnail_url(item['video_id']), - 'type': 'playlist', - } def get_video_sources(info): video_sources = [] @@ -55,9 +21,10 @@ def get_video_sources(info): max_resolution = 360 else: max_resolution = settings.default_resolution - for format in info['formats']: - if format['acodec'] != 'none' and format['vcodec'] != 'none' and format['height'] <= max_resolution: + if not all(attr in format for attr in ('height', 'width', 'ext', 'url')): + continue + if 'acodec' in format and 'vcodec' in format and format['height'] <= max_resolution: video_sources.append({ 'src': format['url'], 'type': 'video/' + format['ext'], @@ -134,14 +101,57 @@ def get_ordered_music_list_attributes(music_list): return ordered_attributes +headers = ( + ('Accept', '*/*'), + ('Accept-Language', 'en-US,en;q=0.5'), + ('X-YouTube-Client-Name', '2'), + ('X-YouTube-Client-Version', '2.20180830'), +) + util.mobile_ua -def extract_info(downloader, *args, **kwargs): +def extract_info(video_id): + polymer_json = util.fetch_url('https://m.youtube.com/watch?v=' + video_id + '&pbj=1', headers=headers, debug_name='watch') try: - return downloader.extract_info(*args, **kwargs) - except YoutubeError as e: - return str(e) - - + polymer_json = json.loads(polymer_json) + except json.decoder.JSONDecodeError: + traceback.print_exc() + return {'error': 'Failed to parse json response'} + return yt_data_extract.extract_watch_info(polymer_json) + +def video_quality_string(format): + if 'vcodec' in format: + result =str(format.get('width', '?')) + 'x' + str(format.get('height', '?')) + if 'fps' in format: + result += ' ' + format['fps'] + 'fps' + return result + elif 'acodec' in format: + return 'audio only' + + return '?' + +def audio_quality_string(format): + if 'acodec' in format: + result = str(format.get('abr', '?')) + 'k' + if 'audio_sample_rate' in format: + result += ' ' + str(format['audio_sample_rate']) + ' Hz' + return result + elif 'vcodec' in format: + return 'video only' + + return '?' + +# from https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py +def format_bytes(bytes): + if bytes is None: + return 'N/A' + if type(bytes) is str: + bytes = float(bytes) + if bytes == 0.0: + exponent = 0 + else: + exponent = int(math.log(bytes, 1024.0)) + suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent] + converted = float(bytes) / float(1024 ** exponent) + return '%.2f%s' % (converted, suffix) @yt_app.route('/watch') @@ -152,38 +162,26 @@ def get_watch_page(): flask.abort(flask.Response('Incomplete video id (too short): ' + video_id)) lc = request.args.get('lc', '') - if settings.route_tor: - proxy = 'socks5://127.0.0.1:9150/' - else: - proxy = '' - yt_dl_downloader = YoutubeDL(params={'youtube_include_dash_manifest':False, 'proxy':proxy}) tasks = ( gevent.spawn(comments.video_comments, video_id, int(settings.default_comment_sorting), lc=lc ), - gevent.spawn(extract_info, yt_dl_downloader, "https://www.youtube.com/watch?v=" + video_id, download=False) + gevent.spawn(extract_info, video_id) ) gevent.joinall(tasks) comments_info, info = tasks[0].value, tasks[1].value - if isinstance(info, str): # youtube error - return flask.render_template('error.html', error_message = info) + if info['error']: + return flask.render_template('error.html', error_message = info['error']) video_info = { - "duration": util.seconds_to_timestamp(info["duration"]), + "duration": util.seconds_to_timestamp(info["duration"] or 0), "id": info['id'], "title": info['title'], - "author": info['uploader'], + "author": info['author'], } - upload_year = info["upload_date"][0:4] - upload_month = info["upload_date"][4:6] - upload_day = info["upload_date"][6:8] - upload_date = upload_month + "/" + upload_day + "/" + upload_year - - if settings.related_videos_mode: - related_videos = get_related_items(info) - else: - related_videos = [] - + for item in info['related_videos']: + yt_data_extract.prefix_urls(item) + yt_data_extract.add_extra_html_info(item) if settings.gather_googlevideo_domains: with open(os.path.join(settings.data_dir, 'googlevideo-domains.txt'), 'a+', encoding='utf-8') as f: @@ -195,23 +193,29 @@ def get_watch_page(): download_formats = [] for format in info['formats']: + if 'acodec' in format and 'vcodec' in format: + codecs_string = format['acodec'] + ', ' + format['vcodec'] + else: + codecs_string = format.get('acodec') or format.get('vcodec') or '?' download_formats.append({ 'url': format['url'], - 'ext': format['ext'], - 'resolution': yt_dl_downloader.format_resolution(format), - 'note': yt_dl_downloader._format_note(format), + 'ext': format.get('ext', '?'), + 'audio_quality': audio_quality_string(format), + 'video_quality': video_quality_string(format), + 'file_size': format_bytes(format['file_size']), + 'codecs': codecs_string, }) video_sources = get_video_sources(info) - video_height = video_sources[0]['height'] - + video_height = yt_data_extract.default_multi_get(video_sources, 0, 'height', default=360) + video_width = yt_data_extract.default_multi_get(video_sources, 0, 'width', default=640) # 1 second per pixel, or the actual video width - theater_video_target_width = max(640, info['duration'], video_sources[0]['width']) + theater_video_target_width = max(640, info['duration'] or 0, video_width) return flask.render_template('watch.html', header_playlist_names = local_playlist.get_playlist_names(), - uploader_channel_url = '/' + info['uploader_url'], - upload_date = upload_date, + uploader_channel_url = ('/' + info['author_url']) if info['author_url'] else '', + upload_date = info['published_date'], views = (lambda x: '{:,}'.format(x) if x is not None else "")(info.get("view_count", None)), likes = (lambda x: '{:,}'.format(x) if x is not None else "")(info.get("like_count", None)), dislikes = (lambda x: '{:,}'.format(x) if x is not None else "")(info.get("dislike_count", None)), @@ -219,7 +223,7 @@ def get_watch_page(): video_info = json.dumps(video_info), video_sources = video_sources, subtitle_sources = get_subtitle_sources(info), - related = related_videos, + related = info['related_videos'], music_list = info['music_list'], music_attributes = get_ordered_music_list_attributes(info['music_list']), comments_info = comments_info, @@ -232,7 +236,7 @@ def get_watch_page(): theater_video_target_width = theater_video_target_width, title = info['title'], - uploader = info['uploader'], + uploader = info['author'], description = info['description'], unlisted = info['unlisted'], ) |