From 98157bf1bf1223ffa7556d2d21cfac6f07675f9d Mon Sep 17 00:00:00 2001 From: James Taylor Date: Sat, 30 Jun 2018 23:34:46 -0700 Subject: initial commit --- youtube/watch.py | 294 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 294 insertions(+) create mode 100644 youtube/watch.py (limited to 'youtube/watch.py') diff --git a/youtube/watch.py b/youtube/watch.py new file mode 100644 index 0000000..b8aa17d --- /dev/null +++ b/youtube/watch.py @@ -0,0 +1,294 @@ +from youtube_dl.YoutubeDL import YoutubeDL +import json +import urllib +from string import Template +import html +import youtube.common as common +from youtube.common import default_multi_get, get_thumbnail_url, video_id, URL_ORIGIN +import youtube.comments as comments +import gevent + +video_height_priority = (360, 480, 240, 720, 1080) + + +_formats = { + '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'}, + '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'}, + '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'}, + '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'}, + '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'}, + '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'}, + '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'}, + '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'}, + # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well + '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'}, + '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'}, + '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'}, + '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'}, + '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'}, + '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'}, + '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'}, + '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'}, + '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'}, + + + # 3D videos + '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20}, + '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20}, + '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20}, + '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20}, + '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20}, + '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20}, + '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20}, + + # Apple HTTP Live Streaming + '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10}, + '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10}, + '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10}, + '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10}, + '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10}, + '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10}, + '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10}, + '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10}, + + # DASH mp4 video + '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'}, + '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'}, + '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'}, + '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'}, + '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'}, + '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559) + '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'}, + '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'}, + '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'}, + '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60}, + '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60}, + '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'}, + + # Dash mp4 audio + '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'}, + '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'}, + '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'}, + '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'}, + '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'}, + '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'}, + '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'}, + + # Dash webm + '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'}, + '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'}, + '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'}, + '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'}, + '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'}, + '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'}, + '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'}, + '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'}, + '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'}, + '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'}, + '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'}, + '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'}, + '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'}, + '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'}, + '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'}, + # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug) + '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'}, + '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60}, + '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60}, + '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60}, + '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'}, + '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60}, + + # Dash webm audio + '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128}, + '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256}, + + # Dash webm audio with opus inside + '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50}, + '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70}, + '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160}, + + # RTMP (unnamed) + '_rtmp': {'protocol': 'rtmp'}, +} + + + + +source_tag_template = Template(''' +''') + +with open("yt_watch_template.html", "r") as file: + yt_watch_template = Template(file.read()) + + + +# example: +#https://www.youtube.com/related_ajax?ctoken=CBQSJhILVGNxV29rOEF1YkXAAQDIAQDgAQGiAg0o____________AUAAGAAq0gEInJOqsOyB1tAaCNeMgaD4spLIKQioxdHSu8SF9JgBCLr27tnaioDpXwj1-L_R3s7r2wcIv8TnueeUo908CMXSganIrvHDJgiVuMirrqbgqYABCJDsu8PBzdGW8wEI_-WI2t-c-IlQCOK_m_KB_rP5wAEIl7S4serqnq5YCNSs55mMt8qLyQEImvutmp-x9LaCAQiVg96VpY_pqJMBCOPsgdTflsGRsQEI7ZfYleKIub0tCIrcsb7a_uu95gEIi9Gz6_bC76zEAQjo1c_W8JzlkhI%3D&continuation=CBQSJhILVGNxV29rOEF1YkXAAQDIAQDgAQGiAg0o____________AUAAGAAq0gEInJOqsOyB1tAaCNeMgaD4spLIKQioxdHSu8SF9JgBCLr27tnaioDpXwj1-L_R3s7r2wcIv8TnueeUo908CMXSganIrvHDJgiVuMirrqbgqYABCJDsu8PBzdGW8wEI_-WI2t-c-IlQCOK_m_KB_rP5wAEIl7S4serqnq5YCNSs55mMt8qLyQEImvutmp-x9LaCAQiVg96VpY_pqJMBCOPsgdTflsGRsQEI7ZfYleKIub0tCIrcsb7a_uu95gEIi9Gz6_bC76zEAQjo1c_W8JzlkhI%3D&itct=CCkQybcCIhMIg8PShInX2gIVgdvBCh15WA0ZKPgd +def get_bloated_more_related_videos(video_url, related_videos_token, id_token): + related_videos_token = urllib.parse.quote(related_videos_token) + url = "https://www.youtube.com/related_ajax?ctoken=" + related_videos_token + "&continuation=" + related_videos_token + headers = { + 'Host': 'www.youtube.com', + 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)', + 'Accept': '*/*', + 'Accept-Language': 'en-US,en;q=0.5', + 'Referer': video_url, + 'X-YouTube-Client-Name': '1', + 'X-YouTube-Client-Version': '2.20180418', + 'X-Youtube-Identity-Token': id_token, + + } + #print(url) + req = urllib.request.Request(url, headers=headers) + response = urllib.request.urlopen(req, timeout = 5) + content = response.read() + info = json.loads(content) + return info + +def get_more_related_videos_info(video_url, related_videos_token, id_token): + results = [] + info = get_bloated_more_related_videos(video_url, related_videos_token, id_token) + bloated_results = info[1]['response']['continuationContents']['watchNextSecondaryResultsContinuation']['results'] + for bloated_result in bloated_results: + bloated_result = bloated_result['compactVideoRenderer'] + results.append({ + "title": bloated_result['title']['simpleText'], + "video_id": bloated_result['videoId'], + "views_text": bloated_result['viewCountText']['simpleText'], + "length_text": default_multi_get(bloated_result, 'lengthText', 'simpleText', default=''), # livestreams dont have a length + "length_text": bloated_result['lengthText']['simpleText'], + "uploader_name": bloated_result['longBylineText']['runs'][0]['text'], + "uploader_url": bloated_result['longBylineText']['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'], + }) + return results + +def more_related_videos_html(video_info): + related_videos = get_related_videos(url, 1, video_info['related_videos_token'], video_info['id_token']) + + related_videos_html = "" + for video in related_videos: + related_videos_html += Template(video_related_template).substitute( + video_title=html.escape(video["title"]), + views=video["views_text"], + uploader=html.escape(video["uploader_name"]), + uploader_channel_url=video["uploader_url"], + length=video["length_text"], + video_url = "/youtube.com/watch?v=" + video["video_id"], + thumbnail_url= get_thumbnail_url(video['video_id']), + ) + return related_videos_html + + + +def get_related_items_html(info): + result = "" + for item in info['related_vids']: + if 'list' in item: # playlist: + result += common.small_playlist_item_html(watch_page_related_playlist_info(item)) + else: + result += common.small_video_item_html(watch_page_related_video_info(item)) + return result + + +# json of related items retrieved directly from the watch page has different names for everything +# converts these to standard names +def watch_page_related_video_info(item): + result = {key: item[key] for key in ('id', 'title', 'author')} + result['duration'] = common.seconds_to_timestamp(item['length_seconds']) + try: + result['views'] = item['short_view_count_text'] + except KeyError: + result['views'] = '' + return result + +def watch_page_related_playlist_info(item): + return { + 'size': item['playlist_length'] if item['playlist_length'] != "0" else "50+", + 'title': item['playlist_title'], + 'id': item['list'], + 'first_video_id': item['video_id'], + } + + +def sort_formats(info): + info['formats'].sort(key=lambda x: default_multi_get(_formats, x['format_id'], 'height', default=0)) + for index, format in enumerate(info['formats']): + if default_multi_get(_formats, format['format_id'], 'height', default=0) >= 360: + break + info['formats'] = info['formats'][index:] + info['formats'][0:index] + info['formats'] = [format for format in info['formats'] if format['acodec'] != 'none' and format['vcodec'] != 'none'] + +def formats_html(info): + result = '' + for format in info['formats']: + result += source_tag_template.substitute( + src=format['url'], + type='audio/' + format['ext'] if format['vcodec'] == "none" else 'video/' + format['ext'], + ) + return result + +def choose_format(info): + suitable_formats = [] + with open('teste.txt', 'w', encoding='utf-8') as f: + f.write(json.dumps(info['formats'])) + for format in info['formats']: + if (format["ext"] in ("mp4", "webm") + and format["acodec"] != "none" + and format["vcodec"] != "none" + and format.get("height","none") in video_height_priority): + suitable_formats.append(format) + + current_best = (suitable_formats[0],video_height_priority.index(suitable_formats[0]["height"])) + for format in suitable_formats: + video_priority_index = video_height_priority.index(format["height"]) + if video_priority_index < current_best[1]: + current_best = (format, video_priority_index) + return current_best[0] + +more_comments_template = Template('''More comments''') +def get_watch_page(query_string): + id = urllib.parse.parse_qs(query_string)['v'][0] + tasks = ( + gevent.spawn(comments.video_comments, id ), + gevent.spawn(YoutubeDL(params={'youtube_include_dash_manifest':False}).extract_info, "https://www.youtube.com/watch?v=" + id, download=False) + ) + gevent.joinall(tasks) + comments_info, info = tasks[0].value, tasks[1].value + comments_html, ctoken = comments_info + + if ctoken == '': + more_comments_button = '' + else: + more_comments_button = more_comments_template.substitute(url = URL_ORIGIN + '/comments?ctoken=' + ctoken) + #comments_html = comments.comments_html(video_id(url)) + #info = YoutubeDL().extract_info(url, download=False) + + #chosen_format = choose_format(info) + sort_formats(info) + + + + upload_year = info["upload_date"][0:4] + upload_month = info["upload_date"][4:6] + upload_day = info["upload_date"][6:8] + upload_date = upload_month + "/" + upload_day + "/" + upload_year + + related_videos_html = get_related_items_html(info) + + page = yt_watch_template.substitute( + video_title=html.escape(info["title"]), + page_title=html.escape(info["title"]), + uploader=html.escape(info["uploader"]), + uploader_channel_url='/' + info["uploader_url"], + #upload_date=datetime.datetime.fromtimestamp(info["timestamp"]).strftime("%d %b %Y %H:%M:%S"), + upload_date = upload_date, + views='{:,}'.format(info["view_count"]), + likes=(lambda x: '{:,}'.format(x) if x is not None else "")(info["like_count"]), + dislikes=(lambda x: '{:,}'.format(x) if x is not None else "")(info["dislike_count"]), + description=html.escape(info["description"]), + video_sources=formats_html(info), + related = related_videos_html, + comments=comments_html, + more_comments_button = more_comments_button, + ) + return page \ No newline at end of file -- cgit v1.2.3