diff options
Diffstat (limited to 'youtube')
-rw-r--r-- | youtube/playlist.py | 4 | ||||
-rw-r--r-- | youtube/templates/common_elements.html | 8 | ||||
-rw-r--r-- | youtube/templates/watch.html | 229 | ||||
-rw-r--r-- | youtube/watch.py | 28 | ||||
-rw-r--r-- | youtube/yt_data_extract/common.py | 26 | ||||
-rw-r--r-- | youtube/yt_data_extract/watch_extraction.py | 34 |
6 files changed, 299 insertions, 30 deletions
diff --git a/youtube/playlist.py b/youtube/playlist.py index 91c8d1d..b7167f6 100644 --- a/youtube/playlist.py +++ b/youtube/playlist.py @@ -105,6 +105,10 @@ def get_playlist_page(): if 'id' in item: item['thumbnail'] = '/https://i.ytimg.com/vi/' + item['id'] + '/default.jpg' + item['url'] += '&list=' + playlist_id + if item['index']: + item['url'] += '&index=' + str(item['index']) + video_count = yt_data_extract.deep_get(info, 'metadata', 'video_count') if video_count is None: video_count = 40 diff --git a/youtube/templates/common_elements.html b/youtube/templates/common_elements.html index 58580a3..0587ce3 100644 --- a/youtube/templates/common_elements.html +++ b/youtube/templates/common_elements.html @@ -14,14 +14,18 @@ {%- endif -%} {% endmacro %} -{% macro item(info, description=false, horizontal=true, include_author=true, include_badges=true) %} +{% macro item(info, description=false, horizontal=true, include_author=true, include_badges=true, lazy_load=false) %} <div class="item-box {{ info['type'] + '-item-box' }} {{'horizontal-item-box' if horizontal else 'vertical-item-box'}} {{'has-description' if description else 'no-description'}}"> {% if info['error'] %} {{ info['error'] }} {% else %} <div class="item {{ info['type'] + '-item' }}"> <a class="thumbnail-box" href="{{ info['url'] }}" title="{{ info['title'] }}"> - <img class="thumbnail-img" src="{{ info['thumbnail'] }}"> + {% if lazy_load %} + <img class="thumbnail-img lazy" data-src="{{ info['thumbnail'] }}"> + {% else %} + <img class="thumbnail-img" src="{{ info['thumbnail'] }}"> + {% endif %} {% if info['type'] != 'channel' %} <div class="thumbnail-info"> <span>{{ (info['video_count']|commatize + ' videos') if info['type'] == 'playlist' else info['duration'] }}</span> diff --git a/youtube/templates/watch.html b/youtube/templates/watch.html index 27e1986..f47c337 100644 --- a/youtube/templates/watch.html +++ b/youtube/templates/watch.html @@ -37,7 +37,7 @@ margin-bottom: 10px; background-color: var(--video-background-color); } - .related-videos-outer{ + .side-videos{ grid-row: 2 /span 3; width: 400px; } @@ -50,7 +50,7 @@ width: 640px; grid-column: 2; } - .related-videos-outer{ + .side-videos{ grid-row: 1 /span 4; } {% endif %} @@ -183,20 +183,54 @@ .comment{ width:640px; } - .related-videos-outer{ + + .side-videos{ grid-column: 4; max-width: 640px; } - .related-videos-inner{ - padding-top: 10px; - display: grid; - grid-auto-rows: 90px; - grid-row-gap: 10px; - } - .thumbnail-box{ /* overides rule in shared.css */ - height: 90px !important; - width: 120px !important; + .playlist{ + border-style: solid; + border-width: 2px; + border-color: lightgray; + margin-bottom: 10px; + } + .playlist-header{ + background-color: var(--interface-color); + padding: 3px; + border-bottom-style: solid; + border-bottom-width: 2px; + border-bottom-color: lightgray; + } + .playlist-header h3{ + margin: 2px; + } + .playlist-metadata{ + list-style: none; + padding: 0px; + margin: 0px; + } + .playlist-metadata li{ + display: inline; + margin: 2px; + } + .playlist-videos{ + height: 300px; + overflow-y: scroll; + display: grid; + grid-auto-rows: 90px; + grid-row-gap: 10px; + padding-top: 10px; + } + .related-videos-inner{ + padding-top: 10px; + display: grid; + grid-auto-rows: 90px; + grid-row-gap: 10px; } + .thumbnail-box{ /* overides rule in shared.css */ + height: 90px !important; + width: 120px !important; + } /* Put related vids below videos when window is too small */ /* 1100px instead of 1080 because W3C is full of idiots who include scrollbar width */ @@ -204,7 +238,7 @@ main{ grid-template-columns: 1fr 640px 40px 1fr; } - .related-videos-outer{ + .side-videos{ margin-top: 10px; grid-column: 2; grid-row: 3; @@ -345,16 +379,165 @@ </details> </div> - {% if related_videos_mode != 0 %} - <details class="related-videos-outer" {{'open' if related_videos_mode == 1 else ''}}> - <summary>Related Videos</summary> - <nav class="related-videos-inner"> - {% for info in related %} - {{ common_elements.item(info, include_badges=false) }} - {% endfor %} - </nav> - </details> - {% endif %} + <div class="side-videos"> + {% if playlist %} + <div class="playlist"> + <div class="playlist-header"> + <a href="{{ playlist['url'] }}" title="{{ playlist['title'] }}"><h3>{{ playlist['title'] }}</h3></a> + <ul class="playlist-metadata"> + <li>Autoplay: <input type="checkbox" id="autoplay-toggle"></li> + {% if playlist['current_index'] is none %} + <li>[Error!]/{{ playlist['video_count'] }}</li> + {% else %} + <li>{{ playlist['current_index']+1 }}/{{ playlist['video_count'] }}</li> + {% endif %} + <li><a href="{{ playlist['author_url'] }}" title="{{ playlist['author'] }}">{{ playlist['author'] }}</a></li> + </ul> + </div> + <nav class="playlist-videos"> + {% for info in playlist['items'] %} + {# non-lazy load for 5 videos surrounding current video #} + {# for non-js browsers or old such that IntersectionObserver doesn't work #} + {# -10 is sentinel to not load anything if there's no current_index for some reason #} + {% if (playlist.get('current_index', -10) - loop.index0)|abs is lt(5) %} + {{ common_elements.item(info, include_badges=false, lazy_load=false) }} + {% else %} + {{ common_elements.item(info, include_badges=false, lazy_load=true) }} + {% endif %} + {% endfor %} + </nav> + {% if playlist['current_index'] is not none %} + <script> + // from https://stackoverflow.com/a/6969486 + function escapeRegExp(string) { + return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string + } + var playability_error = {{ 'true' if playability_error else 'false' }}; + var playlist_id = {{ playlist['id']|tojson }}; + playlist_id = escapeRegExp(playlist_id); + + // read cookies on whether to autoplay thru playlist + // pain in the ass: + // https://developer.mozilla.org/en-US/docs/Web/API/Document/cookie + var cookieValue = document.cookie.replace(new RegExp( + '(?:(?:^|.*;\\s*)autoplay_' + playlist_id + '\\s*\\=\\s*([^;]*).*$)|^.*$'), '$1'); + var autoplayEnabled = 0; + if(cookieValue.length === 0){ + autoplayEnabled = 0; + } else { + autoplayEnabled = Number(cookieValue); + } + + // check the checkbox if autoplay is on + var checkbox = document.querySelector('#autoplay-toggle'); + if(autoplayEnabled){ + checkbox.checked = true; + } + + // listen for checkbox to turn autoplay on and off + checkbox.addEventListener( 'change', function() { + if(this.checked) { + autoplayEnabled = 1; + document.cookie = 'autoplay_' + playlist_id + '=1'; + } else { + autoplayEnabled = 0; + document.cookie = 'autoplay_' + playlist_id + '=0'; + } + }); + + if(!playability_error){ + // play the video if autoplay is on + var vid = document.querySelector('video'); + if(autoplayEnabled){ + vid.play(); + } + } + + var currentIndex = {{ playlist['current_index']|tojson }}; + {% if playlist['current_index']+1 == playlist['items']|length %} + var nextVideoUrl = null; + {% else %} + var nextVideoUrl = {{ (playlist['items'][playlist['current_index']+1]['url'])|tojson }}; + {% endif %} + var nextVideoDelay = 1000; + + // scroll playlist to proper position + var pl = document.querySelector('.playlist-videos'); + // item height + gap == 100 + pl.scrollTop = 100*currentIndex; + + // go to next video when video ends + // https://stackoverflow.com/a/2880950 + if(nextVideoUrl){ + if(playability_error){ + videoEnded(); + } else { + vid.addEventListener('ended', videoEnded, false); + } + function nextVideo(){ + if(autoplayEnabled){ + window.location.href = nextVideoUrl; + } + } + function videoEnded(e) { + window.setTimeout(nextVideo, nextVideoDelay); + } + } + </script> + {% endif %} + {% if playlist['id'] is not none %} + <script> + // lazy load playlist images + // copied almost verbatim from + // https://css-tricks.com/tips-for-rolling-your-own-lazy-loading/ + // IntersectionObserver isn't supported in pre-quantum + // firefox versions, but the alternative of making it + // manually is a performance drain, so oh well + var observer = new IntersectionObserver(lazyLoad, { + + // where in relation to the edge of the viewport, we are observing + rootMargin: "100px", + + // how much of the element needs to have intersected + // in order to fire our loading function + threshold: 1.0 + + }); + + function lazyLoad(elements) { + elements.forEach(item => { + if (item.intersectionRatio > 0) { + + // set the src attribute to trigger a load + item.target.src = item.target.dataset.src; + + // stop observing this element. Our work here is done! + observer.unobserve(item.target); + }; + }); + }; + + // Tell our observer to observe all img elements with a "lazy" class + var lazyImages = document.querySelectorAll('img.lazy'); + lazyImages.forEach(img => { + observer.observe(img); + }); + </script> + {% endif %} + </div> + {% endif %} + + {% if related_videos_mode != 0 %} + <details class="related-videos-outer" {{'open' if related_videos_mode == 1 else ''}}> + <summary>Related Videos</summary> + <nav class="related-videos-inner"> + {% for info in related %} + {{ common_elements.item(info, include_badges=false) }} + {% endfor %} + </nav> + </details> + {% endif %} + </div> {% if comments_mode != 0 %} {% if comments_disabled %} diff --git a/youtube/watch.py b/youtube/watch.py index f80229b..2440729 100644 --- a/youtube/watch.py +++ b/youtube/watch.py @@ -207,10 +207,16 @@ headers = ( ('X-YouTube-Client-Version', '2.20180830'), ) + util.mobile_ua -def extract_info(video_id): +def extract_info(video_id, playlist_id=None, index=None): # bpctr=9999999999 will bypass are-you-sure dialogs for controversial # videos - polymer_json = util.fetch_url('https://m.youtube.com/watch?v=' + video_id + '&pbj=1&bpctr=9999999999', headers=headers, debug_name='watch').decode('utf-8') + url = 'https://m.youtube.com/watch?v=' + video_id + '&pbj=1&bpctr=9999999999' + if playlist_id: + url += '&list=' + playlist_id + if index: + url += '&index=' + index + polymer_json = util.fetch_url(url, headers=headers, debug_name='watch') + polymer_json = polymer_json.decode('utf-8') # TODO: Decide whether this should be done in yt_data_extract.extract_watch_info try: polymer_json = json.loads(polymer_json) @@ -337,9 +343,12 @@ def get_watch_page(video_id=None): return flask.render_template('error.html', error_message='Incomplete video id (too short): ' + video_id), 404 lc = request.args.get('lc', '') + playlist_id = request.args.get('list') + index = request.args.get('index') tasks = ( gevent.spawn(comments.video_comments, video_id, int(settings.default_comment_sorting), lc=lc ), - gevent.spawn(extract_info, video_id) + gevent.spawn(extract_info, video_id, playlist_id=playlist_id, + index=index) ) gevent.joinall(tasks) util.check_gevent_exceptions(tasks[1]) @@ -359,6 +368,18 @@ def get_watch_page(video_id=None): util.prefix_urls(item) util.add_extra_html_info(item) + if info['playlist']: + playlist_id = info['playlist']['id'] + for item in info['playlist']['items']: + util.prefix_urls(item) + util.add_extra_html_info(item) + if playlist_id: + item['url'] += '&list=' + playlist_id + if item['index']: + item['url'] += '&index=' + str(item['index']) + info['playlist']['author_url'] = util.prefix_url( + info['playlist']['author_url']) + if settings.gather_googlevideo_domains: with open(os.path.join(settings.data_dir, 'googlevideo-domains.txt'), 'a+', encoding='utf-8') as f: url = info['formats'][0]['url'] @@ -400,6 +421,7 @@ def get_watch_page(video_id=None): video_sources = video_sources, subtitle_sources = get_subtitle_sources(info), related = info['related_videos'], + playlist = info['playlist'], music_list = info['music_list'], music_attributes = get_ordered_music_list_attributes(info['music_list']), comments_info = comments_info, diff --git a/youtube/yt_data_extract/common.py b/youtube/yt_data_extract/common.py index 877444e..974d981 100644 --- a/youtube/yt_data_extract/common.py +++ b/youtube/yt_data_extract/common.py @@ -73,6 +73,15 @@ def conservative_update(obj, key, value): if obj.get(key) is None: obj[key] = value +def concat_or_none(*strings): + '''Concatenates strings. Returns None if any of the arguments are None''' + result = '' + for string in strings: + if string is None: + return None + result += string + return result + def remove_redirect(url): if url is None: return None @@ -268,6 +277,23 @@ def extract_item_info(item, additional_info={}): info['approx_view_count'] = '0' info['duration'] = extract_str(item.get('lengthText')) + + # if it's an item in a playlist, get its index + if 'index' in item: # url has wrong index on playlist page + info['index'] = extract_int(item.get('index')) + elif 'indexText' in item: + # Current item in playlist has ▶ instead of the actual index, must + # dig into url + match = re.search(r'index=(\d+)', deep_get(item, + 'navigationEndpoint', 'commandMetadata', 'webCommandMetadata', + 'url', default='')) + if match is None: # worth a try then + info['index'] = extract_int(item.get('indexText')) + else: + info['index'] = int(match.group(1)) + else: + info['index'] = None + elif primary_type in ('playlist', 'radio'): info['id'] = item.get('playlistId') info['video_count'] = extract_int(item.get('videoCount')) diff --git a/youtube/yt_data_extract/watch_extraction.py b/youtube/yt_data_extract/watch_extraction.py index bc02313..0b30c91 100644 --- a/youtube/yt_data_extract/watch_extraction.py +++ b/youtube/yt_data_extract/watch_extraction.py @@ -2,7 +2,7 @@ from .common import (get, multi_get, deep_get, multi_deep_get, liberal_update, conservative_update, remove_redirect, normalize_url, extract_str, extract_formatted_text, extract_int, extract_approx_int, extract_date, check_missing_keys, extract_item_info, extract_items, - extract_response) + extract_response, concat_or_none) import json import urllib.parse @@ -160,7 +160,37 @@ def _extract_watch_info_mobile(top_level): response = top_level.get('response', {}) - # video info from metadata renderers + # this renderer has the stuff visible on the page + # check for playlist + items, _ = extract_items(response, + item_types={'singleColumnWatchNextResults'}) + if items: + watch_next_results = items[0]['singleColumnWatchNextResults'] + playlist = deep_get(watch_next_results, 'playlist', 'playlist') + if playlist is None: + info['playlist'] = None + else: + info['playlist'] = {} + info['playlist']['title'] = playlist.get('title') + info['playlist']['author'] = extract_str(multi_get(playlist, + 'ownerName', 'longBylineText', 'shortBylineText', 'ownerText')) + author_id = deep_get(playlist, 'longBylineText', 'runs', 0, + 'navigationEndpoint', 'browseEndpoint', 'browseId') + info['playlist']['author_id'] = author_id + if author_id: + info['playlist']['author_url'] = concat_or_none( + 'https://www.youtube.com/channel/', author_id) + info['playlist']['id'] = playlist.get('playlistId') + info['playlist']['url'] = concat_or_none( + 'https://www.youtube.com/playlist?list=', + info['playlist']['id']) + info['playlist']['video_count'] = playlist.get('totalVideos') + info['playlist']['current_index'] = playlist.get('currentIndex') + info['playlist']['items'] = [ + extract_item_info(i) for i in playlist.get('contents', ())] + + # Holds the visible video info. It is inside singleColumnWatchNextResults + # but use our convenience function instead items, _ = extract_items(response, item_types={'slimVideoMetadataRenderer'}) if items: video_info = items[0]['slimVideoMetadataRenderer'] |