Partially fix age restricted videos

Does not work for videos that require decryption because decryption is not working (giving 403) for some reason. Related invidious issue for decryption not working: https://github.com/iv-org/invidious/issues/3245 Partial fix for #146
author: Jesus E <heckyel@riseup.net> 2023-05-28 21:30:51 -0400
committer: Jesus E <heckyel@riseup.net> 2023-05-28 21:30:51 -0400
commit: e54596f3e9bac1b5ef328b25e44604aa11479f83 (patch)
tree: 5af65503f7b47037a0c5fbf7d70ffadd6c9c04bb /youtube
parent: c6e1b366b5cf4e1d7e52631e4dedc935ea021154 (diff)
download: yt-local-e54596f3e9bac1b5ef328b25e44604aa11479f83.tar.lz
yt-local-e54596f3e9bac1b5ef328b25e44604aa11479f83.tar.xz
yt-local-e54596f3e9bac1b5ef328b25e44604aa11479f83.zip
3 files changed, 119 insertions, 69 deletions
diff --git a/youtube/watch.py b/youtube/watch.py
index c18347e..c38a3f5 100644
--- a/youtube/watch.py
+++ b/youtube/watch.py
@@ -19,6 +19,46 @@ from urllib.parse import parse_qs, urlencode
 from types import SimpleNamespace
 from math import ceil
 
+# https://github.com/yt-dlp/yt-dlp/blob/master/yt_dlp/extractor/youtube.py#L72
+INNERTUBE_CLIENTS = {
+    'android': {
+        'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
+        'INNERTUBE_CONTEXT': {
+            'client': {
+                'clientName': 'ANDROID',
+                'clientVersion': '17.31.35',
+                'androidSdkVersion': 31,
+                'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
+            },
+            # https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-887739287
+            'thirdParty': {
+                'embedUrl': 'https://google.com',  # Can be any valid URL
+            }
+        },
+        'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
+        'REQUIRE_JS_PLAYER': False,
+    },
+
+    # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
+    # See: https://github.com/zerodytrash/YouTube-Internal-Clients
+    'tv_embedded': {
+        'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
+        'INNERTUBE_CONTEXT': {
+            'client': {
+                'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
+                'clientVersion': '2.0',
+            },
+            # https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-887739287
+            'thirdParty': {
+                'embedUrl': 'https://google.com',  # Can be any valid URL
+            }
+
+        },
+        'INNERTUBE_CONTEXT_CLIENT_NAME': 85,
+        'REQUIRE_JS_PLAYER': True,
+    },
+}
+
 try:
     with open(os.path.join(settings.data_dir, 'decrypt_function_cache.json'), 'r') as f:
         decrypt_cache = json.loads(f.read())['decrypt_cache']
@@ -49,6 +89,8 @@ def get_video_sources(info, target_resolution):
     video_only_sources = {}
     uni_sources = []
     pair_sources = []
+
+
     for fmt in info['formats']:
         if not all(fmt[attr] for attr in ('ext', 'url', 'itag')):
             continue
@@ -74,7 +116,6 @@ def get_video_sources(info, target_resolution):
                 fmt['audio_bitrate'] = int(fmt['bitrate']/1000)
             source = {
                 'type': 'audio/' + fmt['ext'],
-                'bitrate': fmt['audio_bitrate'],
                 'quality_string': audio_quality_string(fmt),
             }
             source.update(fmt)
@@ -308,14 +349,6 @@ def save_decrypt_cache():
     f.close()
 
 
-watch_headers = (
-    ('Accept', '*/*'),
-    ('Accept-Language', 'en-US,en;q=0.5'),
-    ('X-YouTube-Client-Name', '2'),
-    ('X-YouTube-Client-Version', '2.20180830'),
-) + util.mobile_ua
-
-
 def decrypt_signatures(info, video_id):
     '''return error string, or False if no errors'''
     if not yt_data_extract.requires_decryption(info):
@@ -345,8 +378,28 @@ def _add_to_error(info, key, additional_message):
     else:
         info[key] = additional_message
 
+def fetch_player_response(client, video_id):
+    client_params = INNERTUBE_CLIENTS[client]
+    context = client_params['INNERTUBE_CONTEXT']
+    key = client_params['INNERTUBE_API_KEY']
+    host = client_params.get('INNERTUBE_HOST') or 'youtubei.googleapis.com'
+    user_agent = context['client'].get('userAgent') or util.mobile_user_agent
 
-def extract_info(video_id, use_invidious, playlist_id=None, index=None):
+    url = 'https://' + host + '/youtubei/v1/player?key=' + key
+    data = {
+        'videoId': video_id,
+        'context': context,
+    }
+    data = json.dumps(data)
+    headers = (('Content-Type', 'application/json'),('User-Agent', user_agent))
+    player_response = util.fetch_url(
+        url, data=data, headers=headers,
+        debug_name='youtubei_player_' + client,
+        report_text='Fetched ' + client + ' youtubei player'
+    ).decode('utf-8')
+    return player_response
+
+def fetch_watch_page_info(video_id, playlist_id, index):
     # bpctr=9999999999 will bypass are-you-sure dialogs for controversial
     # videos
     url = 'https://m.youtube.com/embed/' + video_id + '?bpctr=9999999999'
@@ -354,52 +407,46 @@ def extract_info(video_id, use_invidious, playlist_id=None, index=None):
         url += '&list=' + playlist_id
     if index:
         url += '&index=' + index
-    watch_page = util.fetch_url(url, headers=watch_headers,
+
+    headers = (
+        ('Accept', '*/*'),
+        ('Accept-Language', 'en-US,en;q=0.5'),
+        ('X-YouTube-Client-Name', '2'),
+        ('X-YouTube-Client-Version', '2.20180830'),
+    ) + util.mobile_ua
+
+    watch_page = util.fetch_url(url, headers=headers,
                                 debug_name='watch')
     watch_page = watch_page.decode('utf-8')
-    info = yt_data_extract.extract_watch_info_from_html(watch_page)
-
-    context = {
-        'client': {
-            'clientName': 'ANDROID',
-            'clientVersion': '17.29.35',
-            'androidSdkVersion': '31',
-            'gl': 'US',
-            'hl': 'en',
-        },
-        # https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-887739287
-        'thirdParty': {
-            'embedUrl': 'https://google.com',  # Can be any valid URL
-        }
-    }
+    return yt_data_extract.extract_watch_info_from_html(watch_page)
+
+def extract_info(video_id, use_invidious, playlist_id=None, index=None):
+    tasks = (
+        # Get video metadata from here
+        gevent.spawn(fetch_watch_page_info, video_id, playlist_id, index),
+
+        # Get video URLs by spoofing as android client because its urls don't
+        # require decryption
+        # The URLs returned with WEB for videos requiring decryption
+        # couldn't be decrypted with the base.js from the web page for some
+        # reason
+        # https://github.com/yt-dlp/yt-dlp/issues/574#issuecomment-887171136
+        gevent.spawn(fetch_player_response, 'android', video_id)
+    )
+    gevent.joinall(tasks)
+    util.check_gevent_exceptions(*tasks)
+    info, player_response = tasks[0].value, tasks[1].value
+
+    yt_data_extract.update_with_new_urls(info, player_response)
+
+    # Age restricted video, retry
     if info['age_restricted'] or info['player_urls_missing']:
         if info['age_restricted']:
-            print('Age restricted video. Fetching /youtubei/v1/player page')
+            print('Age restricted video, retrying')
         else:
-            print('Missing player. Fetching /youtubei/v1/player page')
-        context['client']['clientScreen'] = 'EMBED'
-    else:
-        print('Fetching /youtubei/v1/player page')
-
-    # https://github.com/yt-dlp/yt-dlp/issues/574#issuecomment-887171136
-    # ANDROID is used instead because its urls don't require decryption
-    # The URLs returned with WEB for videos requiring decryption
-    # couldn't be decrypted with the base.js from the web page for some
-    # reason
-    url ='https://youtubei.googleapis.com/youtubei/v1/player'
-    url += '?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
-    data = {
-        'videoId': video_id,
-        'context': context,
-    }
-    data = json.dumps(data)
-    content_header = (('Content-Type', 'application/json'),)
-    player_response = util.fetch_url(
-        url, data=data, headers=util.mobile_ua + content_header,
-        debug_name='youtubei_player',
-        report_text='Fetched youtubei player page').decode('utf-8')
-
-    yt_data_extract.update_with_age_restricted_info(info, player_response)
+            print('Player urls missing, retrying')
+        player_response = fetch_player_response('tv_embedded', video_id)
+        yt_data_extract.update_with_new_urls(info, player_response)
 
     # signature decryption
     decryption_error = decrypt_signatures(info, video_id)
@@ -422,8 +469,7 @@ def extract_info(video_id, use_invidious, playlist_id=None, index=None):
     if (info['hls_manifest_url']
         and (info['live'] or not info['formats'] or not info['urls_ready'])
     ):
-        manifest = util.fetch_url(
-            info['hls_manifest_url'],
+        manifest = util.fetch_url(info['hls_manifest_url'],
             debug_name='hls_manifest.m3u8',
             report_text='Fetched hls manifest'
         ).decode('utf-8')
@@ -439,6 +485,7 @@ def extract_info(video_id, use_invidious, playlist_id=None, index=None):
     # check for 403. Unnecessary for tor video routing b/c ip address is same
     info['invidious_used'] = False
     info['invidious_reload_button'] = False
+    info['tor_bypass_used'] = False
     if (settings.route_tor == 1
             and info['formats'] and info['formats'][0]['url']):
         try:
@@ -452,6 +499,7 @@ def extract_info(video_id, use_invidious, playlist_id=None, index=None):
         if response.status == 403:
             print('Access denied (403) for video urls.')
             print('Routing video through Tor')
+            info['tor_bypass_used'] = True
             for fmt in info['formats']:
                 fmt['url'] += '&use_tor=1'
         elif 300 <= response.status < 400:
@@ -682,20 +730,20 @@ def get_watch_page(video_id=None):
             'codecs': codecs_string,
         })
 
-    target_resolution = settings.default_resolution
+    if (settings.route_tor == 2) or info['tor_bypass_used']:
+        target_resolution = 240
+    else:
+        target_resolution = settings.default_resolution
+
     source_info = get_video_sources(info, target_resolution)
     uni_sources = source_info['uni_sources']
     pair_sources = source_info['pair_sources']
     uni_idx, pair_idx = source_info['uni_idx'], source_info['pair_idx']
-    video_height = yt_data_extract.deep_get(source_info, 'uni_sources',
-                                            uni_idx, 'height',
-                                            default=360)
-    video_width = yt_data_extract.deep_get(source_info, 'uni_sources',
-                                           uni_idx, 'width',
-                                           default=640)
+
 
     pair_quality = yt_data_extract.deep_get(pair_sources, pair_idx, 'quality')
     uni_quality = yt_data_extract.deep_get(uni_sources, uni_idx, 'quality')
+
     pair_error = abs((pair_quality or 360) - target_resolution)
     uni_error = abs((uni_quality or 360) - target_resolution)
     if uni_error == pair_error:
@@ -705,6 +753,7 @@ def get_watch_page(video_id=None):
         closer_to_target = 'uni'
     else:
         closer_to_target = 'pair'
+
     using_pair_sources = (
         bool(pair_sources) and (not uni_sources or closer_to_target == 'pair')
     )
@@ -719,6 +768,8 @@ def get_watch_page(video_id=None):
             uni_sources, uni_idx, 'width', default=640
         )
 
+
+
     # 1 second per pixel, or the actual video width
     theater_video_target_width = max(640, info['duration'] or 0, video_width)
 
@@ -751,14 +802,13 @@ def get_watch_page(video_id=None):
         template_name = 'embed.html'
     else:
         template_name = 'watch.html'
-    return flask.render_template(
-        template_name,
-        header_playlist_names = local_playlist.get_playlist_names(),
-        uploader_channel_url = ('/' + info['author_url']) if info['author_url'] else '',
-        time_published = info['time_published'],
-        time_published_utc=time_utc_isoformat(info['time_published']),
+    return flask.render_template(template_name,
+        header_playlist_names   = local_playlist.get_playlist_names(),
+        uploader_channel_url    = ('/' + info['author_url']) if info['author_url'] else '',
+        time_published             = info['time_published'],
         view_count    = (lambda x: '{:,}'.format(x) if x is not None else "")(info.get("view_count", None)),
         like_count    = (lambda x: '{:,}'.format(x) if x is not None else "")(info.get("like_count", None)),
+        dislike_count = (lambda x: '{:,}'.format(x) if x is not None else "")(info.get("dislike_count", None)),
         download_formats        = download_formats,
         other_downloads         = other_downloads,
         video_info              = json.dumps(video_info),
@@ -807,7 +857,7 @@ def get_watch_page(video_id=None):
             'related': info['related_videos'],
             'playability_error': info['playability_error'],
         },
-        font_family=youtube.font_choices[settings.font],
+        font_family=youtube.font_choices[settings.font], # for embed page
         **source_info,
         using_pair_sources = using_pair_sources,
     )
diff --git a/youtube/yt_data_extract/__init__.py b/youtube/yt_data_extract/__init__.py
index 9016810..de1812d 100644
--- a/youtube/yt_data_extract/__init__.py
+++ b/youtube/yt_data_extract/__init__.py
@@ -7,7 +7,7 @@ from .everything_else import (extract_channel_info, extract_search_info,
     extract_playlist_metadata, extract_playlist_info, extract_comments_info)
 
 from .watch_extraction import (extract_watch_info, get_caption_url,
-    update_with_age_restricted_info, requires_decryption,
+    update_with_new_urls, requires_decryption,
     extract_decryption_function, decrypt_signatures, _formats,
     update_format_with_type_info, extract_hls_formats,
     extract_watch_info_from_html, captions_available)
diff --git a/youtube/yt_data_extract/watch_extraction.py b/youtube/yt_data_extract/watch_extraction.py
index 31127e3..4f9ec30 100644
--- a/youtube/yt_data_extract/watch_extraction.py
+++ b/youtube/yt_data_extract/watch_extraction.py
@@ -791,7 +791,7 @@ def get_caption_url(info, language, format, automatic=False, translation_languag
         url += '&tlang=' + translation_language
     return url
 
-def update_with_age_restricted_info(info, player_response):
+def update_with_new_urls(info, player_response):
     '''Inserts urls from player_response json'''
     ERROR_PREFIX = 'Error getting missing player or bypassing age-restriction: '
author	Jesus E <heckyel@riseup.net>	2023-05-28 21:30:51 -0400
committer	Jesus E <heckyel@riseup.net>	2023-05-28 21:30:51 -0400
commit	e54596f3e9bac1b5ef328b25e44604aa11479f83 (patch)
tree	5af65503f7b47037a0c5fbf7d70ffadd6c9c04bb /youtube
parent	c6e1b366b5cf4e1d7e52631e4dedc935ea021154 (diff)
download	yt-local-e54596f3e9bac1b5ef328b25e44604aa11479f83.tar.lz yt-local-e54596f3e9bac1b5ef328b25e44604aa11479f83.tar.xz yt-local-e54596f3e9bac1b5ef328b25e44604aa11479f83.zip