aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJames Taylor <user234683@users.noreply.github.com>2019-12-12 22:13:17 -0800
committerJames Taylor <user234683@users.noreply.github.com>2019-12-12 22:13:17 -0800
commit26f37521babbb2fc4b86ad59354e8c69da1f3897 (patch)
tree7fd6eb51bc09ce84b18a9193ab99f23cb481af55
parent205ad29cb0763dd263a5940cdcb3059d189bbfe7 (diff)
downloadyt-local-26f37521babbb2fc4b86ad59354e8c69da1f3897.tar.lz
yt-local-26f37521babbb2fc4b86ad59354e8c69da1f3897.tar.xz
yt-local-26f37521babbb2fc4b86ad59354e8c69da1f3897.zip
Extraction: Bypass age-restriction
-rw-r--r--youtube/watch.py26
-rw-r--r--youtube/yt_data_extract.py99
2 files changed, 90 insertions, 35 deletions
diff --git a/youtube/watch.py b/youtube/watch.py
index fa697ba..4575c1e 100644
--- a/youtube/watch.py
+++ b/youtube/watch.py
@@ -275,17 +275,32 @@ headers = (
) + util.mobile_ua
def extract_info(video_id):
- polymer_json = util.fetch_url('https://m.youtube.com/watch?v=' + video_id + '&pbj=1', headers=headers, debug_name='watch')
+ polymer_json = util.fetch_url('https://m.youtube.com/watch?v=' + video_id + '&pbj=1', headers=headers, debug_name='watch').decode('utf-8')
+ # TODO: Decide whether this should be done in yt_data_extract.extract_watch_info
try:
polymer_json = json.loads(polymer_json)
except json.decoder.JSONDecodeError:
traceback.print_exc()
return {'error': 'Failed to parse json response'}
info = yt_data_extract.extract_watch_info(polymer_json)
- error = decrypt_signatures(info)
- if error:
- print('Error decrypting url signatures: ' + error)
- info['playability_error'] = error
+
+ # age restriction bypass
+ if info['age_restricted']:
+ print('Fetching age restriction bypass page')
+ data = {
+ 'video_id': video_id,
+ 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
+ }
+ url = 'https://www.youtube.com/get_video_info?' + urllib.parse.urlencode(data)
+ video_info_page = util.fetch_url(url, debug_name='get_video_info', report_text='Fetched age restriction bypass page').decode('utf-8')
+ yt_data_extract.update_with_age_restricted_info(info, video_info_page)
+
+ # signature decryption
+ decryption_error = decrypt_signatures(info)
+ if decryption_error:
+ decryption_error = 'Error decrypting url signatures: ' + decryption_error
+ info['playability_error'] = decryption_error
+
return info
def video_quality_string(format):
@@ -410,6 +425,7 @@ def get_watch_page():
uploader = info['author'],
description = info['description'],
unlisted = info['unlisted'],
+ age_restricted = info['age_restricted'],
playability_error = info['playability_error'],
)
diff --git a/youtube/yt_data_extract.py b/youtube/yt_data_extract.py
index 7c2b717..8c5c63d 100644
--- a/youtube/yt_data_extract.py
+++ b/youtube/yt_data_extract.py
@@ -943,6 +943,11 @@ def extract_watch_info_mobile(top_level):
info = {}
microformat = default_multi_get(top_level, 'playerResponse', 'microformat', 'playerMicroformatRenderer', default={})
+ family_safe = microformat.get('isFamilySafe')
+ if family_safe is None:
+ info['age_restricted'] = None
+ else:
+ info['age_restricted'] = not family_safe
info['allowed_countries'] = microformat.get('availableCountries', [])
info['published_date'] = microformat.get('publishDate')
@@ -1055,6 +1060,34 @@ def get_caption_url(info, language, format, automatic=False, translation_languag
url += '&tlang=' + translation_language
return url
+def extract_formats(info, player_response):
+ streaming_data = player_response.get('streamingData', {})
+ yt_formats = streaming_data.get('formats', []) + streaming_data.get('adaptiveFormats', [])
+
+ info['formats'] = []
+
+ for yt_fmt in yt_formats:
+ fmt = {}
+ fmt['ext'] = None
+ fmt['audio_bitrate'] = None
+ fmt['acodec'] = None
+ fmt['vcodec'] = None
+ fmt['width'] = yt_fmt.get('width')
+ fmt['height'] = yt_fmt.get('height')
+ fmt['file_size'] = yt_fmt.get('contentLength')
+ fmt['audio_sample_rate'] = yt_fmt.get('audioSampleRate')
+ fmt['fps'] = yt_fmt.get('fps')
+ cipher = dict(urllib.parse.parse_qsl(yt_fmt.get('cipher', '')))
+ if cipher:
+ fmt['url'] = cipher.get('url')
+ else:
+ fmt['url'] = yt_fmt.get('url')
+ fmt['s'] = cipher.get('s')
+ fmt['sp'] = cipher.get('sp')
+ fmt.update(_formats.get(str(yt_fmt.get('itag')), {}))
+
+ info['formats'].append(fmt)
+
SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
def extract_watch_info(polymer_json):
info = {'playability_error': None, 'error': None}
@@ -1080,10 +1113,6 @@ def extract_watch_info(polymer_json):
player_args = default_multi_get(top_level, 'player', 'args', default={})
player_response = json.loads(player_args['player_response']) if 'player_response' in player_args else {}
- playability_status = default_multi_get(player_response, 'playabilityStatus', 'status', default=None)
- playability_reason = default_multi_get(player_response, 'playabilityStatus', 'reason', default='Unknown error')
- if playability_status not in (None, 'OK'):
- info['playability_error'] = playability_reason
# captions
info['automatic_caption_languages'] = []
@@ -1106,35 +1135,19 @@ def extract_watch_info(polymer_json):
print('WARNING: Found non-translatable caption language')
# formats
- streaming_data = player_response.get('streamingData', {})
- yt_formats = streaming_data.get('formats', []) + streaming_data.get('adaptiveFormats', [])
-
- info['formats'] = []
-
- for yt_fmt in yt_formats:
- fmt = {}
- fmt['ext'] = None
- fmt['audio_bitrate'] = None
- fmt['acodec'] = None
- fmt['vcodec'] = None
- fmt['width'] = yt_fmt.get('width')
- fmt['height'] = yt_fmt.get('height')
- fmt['file_size'] = yt_fmt.get('contentLength')
- fmt['audio_sample_rate'] = yt_fmt.get('audioSampleRate')
- fmt['fps'] = yt_fmt.get('fps')
- cipher = dict(urllib.parse.parse_qsl(yt_fmt.get('cipher', '')))
- if cipher:
- fmt['url'] = cipher.get('url')
+ extract_formats(info, player_response)
+ playability_status = default_multi_get(player_response, 'playabilityStatus', 'status', default=None)
+ playability_reason = default_multi_get(player_response, 'playabilityStatus', 'reason', default='Could not find playability error')
+ if not info['formats']:
+ if playability_status not in (None, 'OK'):
+ info['playability_error'] = playability_reason
else:
- fmt['url'] = yt_fmt.get('url')
- fmt['s'] = cipher.get('s')
- fmt['sp'] = cipher.get('sp')
- fmt.update(_formats.get(str(yt_fmt.get('itag')), {}))
+ info['playability_error'] = 'Unknown playability error'
- info['formats'].append(fmt)
- if info['formats']:
- info['playability_error'] = None # in case they lie
+ # check age-restriction
+ info['age_restricted'] = (playability_status == 'LOGIN_REQUIRED' and playability_reason and ' age' in playability_reason)
+ # base_js (for decryption of signatures)
info['base_js'] = default_multi_get(top_level, 'player', 'assets', 'js')
if info['base_js']:
info['base_js'] = normalize_url(info['base_js'])
@@ -1162,3 +1175,29 @@ def extract_watch_info(polymer_json):
# other stuff
info['author_url'] = 'https://www.youtube.com/channel/' + info['author_id'] if info['author_id'] else None
return info
+
+def update_with_age_restricted_info(info, video_info_page):
+ ERROR_PREFIX = 'Error bypassing age-restriction: '
+
+ video_info = urllib.parse.parse_qs(video_info_page)
+ player_response = default_multi_get(video_info, 'player_response', 0)
+ if player_response is None:
+ info['playability_error'] = ERROR_PREFIX + 'Could not find player_response in video_info_page'
+ return
+ try:
+ player_response = json.loads(player_response)
+ except json.decoder.JSONDecodeError:
+ traceback.print_exc()
+ info['playability_error'] = ERROR_PREFIX + 'Failed to parse json response'
+ return
+
+ extract_formats(info, player_response)
+ if info['formats']:
+ info['playability_error'] = None
+ else:
+ playability_status = default_multi_get(player_response, 'playabilityStatus', 'status', default=None)
+ playability_reason = default_multi_get(player_response, 'playabilityStatus', 'reason', default=ERROR_PREFIX + 'Could not find playability error')
+ if playability_status not in (None, 'OK'):
+ info['playability_error'] = ERROR_PREFIX + playability_reason
+ else:
+ info['playability_error'] = ERROR_PREFIX + 'Unknown playability error'