diff options
Diffstat (limited to 'youtube')
-rw-r--r-- | youtube/accounts.py | 15 | ||||
-rw-r--r-- | youtube/channel.py | 24 | ||||
-rw-r--r-- | youtube/comments.py | 4 | ||||
-rw-r--r-- | youtube/playlist.py | 8 | ||||
-rw-r--r-- | youtube/post_comment.py | 8 | ||||
-rw-r--r-- | youtube/search.py | 2 | ||||
-rw-r--r-- | youtube/util.py | 13 |
7 files changed, 29 insertions, 45 deletions
diff --git a/youtube/accounts.py b/youtube/accounts.py index c35b6cc..d2e8a41 100644 --- a/youtube/accounts.py +++ b/youtube/accounts.py @@ -162,10 +162,8 @@ def _login(username, password, cookiejar, use_tor): Taken from youtube-dl """ - login_page = util.fetch_url(_LOGIN_URL, yt_dl_headers, report_text='Downloaded login page', cookiejar_receive=cookiejar, use_tor=use_tor).decode('utf-8') - '''with open('debug/login_page', 'w', encoding='utf-8') as f: - f.write(login_page)''' - #print(cookiejar.as_lwp_str()) + login_page = util.fetch_url(_LOGIN_URL, yt_dl_headers, report_text='Downloaded login page', cookiejar_receive=cookiejar, use_tor=use_tor, debug_name='login_page').decode('utf-8') + if login_page is False: return @@ -189,10 +187,7 @@ def _login(username, password, cookiejar, use_tor): 'Google-Accounts-XSRF': 1, } headers.update(yt_dl_headers) - result = util.fetch_url(url, headers, report_text=note, data=data, cookiejar_send=cookiejar, cookiejar_receive=cookiejar, use_tor=use_tor).decode('utf-8') - #print(cookiejar.as_lwp_str()) - '''with open('debug/' + note, 'w', encoding='utf-8') as f: - f.write(result)''' + result = util.fetch_url(url, headers, report_text=note, data=data, cookiejar_send=cookiejar, cookiejar_receive=cookiejar, use_tor=use_tor, debug_name=note).decode('utf-8') result = re.sub(r'^[^\[]*', '', result) return json.loads(result) @@ -321,12 +316,10 @@ def _login(username, password, cookiejar, use_tor): return False try: - check_cookie_results = util.fetch_url(check_cookie_url, headers=yt_dl_headers, report_text="Checked cookie", cookiejar_send=cookiejar, cookiejar_receive=cookiejar, use_tor=use_tor).decode('utf-8') + check_cookie_results = util.fetch_url(check_cookie_url, headers=yt_dl_headers, report_text="Checked cookie", cookiejar_send=cookiejar, cookiejar_receive=cookiejar, use_tor=use_tor, debug_name='check_cookie_results').decode('utf-8') except (urllib.error.URLError, compat_http_client.HTTPException, socket.error) as err: return False - '''with open('debug/check_cookie_results', 'w', encoding='utf-8') as f: - f.write(check_cookie_results)''' if 'https://myaccount.google.com/' not in check_cookie_results: warn('Unable to log in') diff --git a/youtube/channel.py b/youtube/channel.py index 04f698b..daf4791 100644 --- a/youtube/channel.py +++ b/youtube/channel.py @@ -88,11 +88,9 @@ def get_channel_tab(channel_id, page="1", sort=3, tab='videos', view=1): url = "https://www.youtube.com/browse_ajax?ctoken=" + ctoken print("Sending channel tab ajax request") - content = util.fetch_url(url, util.desktop_ua + headers_1) + content = util.fetch_url(url, util.desktop_ua + headers_1, debug_name='channel_tab') print("Finished recieving channel tab response") - '''with open('debug/channel_debug', 'wb') as f: - f.write(content)''' return content def get_number_of_videos(channel_id): @@ -103,15 +101,13 @@ def get_number_of_videos(channel_id): # Sometimes retrieving playlist info fails with 403 for no discernable reason try: - response = util.fetch_url(url, util.mobile_ua + headers_pbj) + response = util.fetch_url(url, util.mobile_ua + headers_pbj, debug_name='number_of_videos') except urllib.error.HTTPError as e: if e.code != 403: raise print("Couldn't retrieve number of videos") return 1000 - '''with open('debug/playlist_debug_metadata', 'wb') as f: - f.write(response)''' response = response.decode('utf-8') print("Got response for number of videos") @@ -135,9 +131,7 @@ def get_channel_search_json(channel_id, query, page): ctoken = proto.string(2, channel_id) + proto.string(3, params) + proto.string(11, query) ctoken = base64.urlsafe_b64encode(proto.nested(80226972, ctoken)).decode('ascii') - polymer_json = util.fetch_url("https://www.youtube.com/browse_ajax?ctoken=" + ctoken, util.desktop_ua + headers_1) - '''with open('debug/channel_search_debug', 'wb') as f: - f.write(polymer_json)''' + polymer_json = util.fetch_url("https://www.youtube.com/browse_ajax?ctoken=" + ctoken, util.desktop_ua + headers_1, debug_name='channel_search') return polymer_json @@ -293,9 +287,9 @@ def get_channel_page(channel_id, tab='videos'): number_of_videos, polymer_json = tasks[0].value, tasks[1].value elif tab == 'about': - polymer_json = util.fetch_url('https://www.youtube.com/channel/' + channel_id + '/about?pbj=1', util.desktop_ua + headers_1) + polymer_json = util.fetch_url('https://www.youtube.com/channel/' + channel_id + '/about?pbj=1', util.desktop_ua + headers_1, debug_name='channel_about') elif tab == 'playlists': - polymer_json = util.fetch_url('https://www.youtube.com/channel/' + channel_id + '/playlists?pbj=1&view=1&sort=' + playlist_sort_codes[sort], util.desktop_ua + headers_1) + polymer_json = util.fetch_url('https://www.youtube.com/channel/' + channel_id + '/playlists?pbj=1&view=1&sort=' + playlist_sort_codes[sort], util.desktop_ua + headers_1, debug_name='channel_playlists') elif tab == 'search': tasks = ( gevent.spawn(get_number_of_videos, channel_id ), @@ -336,13 +330,11 @@ def get_channel_page_general_url(base_url, tab, request): query = request.args.get('query', '') if tab == 'videos': - polymer_json = util.fetch_url(base_url + '/videos?pbj=1&view=0', util.desktop_ua + headers_1) - with open('debug/channel_debug', 'wb') as f: - f.write(polymer_json) + polymer_json = util.fetch_url(base_url + '/videos?pbj=1&view=0', util.desktop_ua + headers_1, debug_name='gen_channel_videos') elif tab == 'about': - polymer_json = util.fetch_url(base_url + '/about?pbj=1', util.desktop_ua + headers_1) + polymer_json = util.fetch_url(base_url + '/about?pbj=1', util.desktop_ua + headers_1, debug_name='gen_channel_about') elif tab == 'playlists': - polymer_json = util.fetch_url(base_url+ '/playlists?pbj=1&view=1', util.desktop_ua + headers_1) + polymer_json = util.fetch_url(base_url+ '/playlists?pbj=1&view=1', util.desktop_ua + headers_1, debug_name='gen_channel_playlists') elif tab == 'search': raise NotImplementedError() else: diff --git a/youtube/comments.py b/youtube/comments.py index ba82154..4485ad2 100644 --- a/youtube/comments.py +++ b/youtube/comments.py @@ -83,7 +83,7 @@ def request_comments(ctoken, replies=False): url = base_url + ctoken.replace("=", "%3D") + "&pbj=1" for i in range(0,8): # don't retry more than 8 times - content = util.fetch_url(url, headers=mobile_headers, report_text="Retrieved comments") + content = util.fetch_url(url, headers=mobile_headers, report_text="Retrieved comments", debug_name='request_comments') if content[0:4] == b")]}'": # random closing characters included at beginning of response for some reason content = content[4:] elif content[0:10] == b'\n<!DOCTYPE': # occasionally returns html instead of json for no reason @@ -91,8 +91,6 @@ def request_comments(ctoken, replies=False): print("got <!DOCTYPE>, retrying") continue break - '''with open('debug/comments_debug', 'wb') as f: - f.write(content)''' return content diff --git a/youtube/playlist.py b/youtube/playlist.py index 18ddf49..5df7074 100644 --- a/youtube/playlist.py +++ b/youtube/playlist.py @@ -47,9 +47,7 @@ headers_1 = ( def playlist_first_page(playlist_id, report_text = "Retrieved playlist"): url = 'https://m.youtube.com/playlist?list=' + playlist_id + '&pbj=1' - content = util.fetch_url(url, util.mobile_ua + headers_1, report_text=report_text) - '''with open('debug/playlist_debug', 'wb') as f: - f.write(content)''' + content = util.fetch_url(url, util.mobile_ua + headers_1, report_text=report_text, debug_name='playlist_first_page') content = json.loads(util.uppercase_escape(content.decode('utf-8'))) return content @@ -67,9 +65,7 @@ def get_videos(playlist_id, page): 'X-YouTube-Client-Version': '2.20180508', } - content = util.fetch_url(url, headers, report_text="Retrieved playlist") - '''with open('debug/playlist_debug', 'wb') as f: - f.write(content)''' + content = util.fetch_url(url, headers, report_text="Retrieved playlist", debug_name='playlist_videos') info = json.loads(util.uppercase_escape(content.decode('utf-8'))) return info diff --git a/youtube/post_comment.py b/youtube/post_comment.py index c4ffb9d..25d0e3a 100644 --- a/youtube/post_comment.py +++ b/youtube/post_comment.py @@ -35,13 +35,11 @@ def _post_comment(text, video_id, session_token, cookiejar): data = urllib.parse.urlencode(data_dict).encode() - content = util.fetch_url("https://m.youtube.com/service_ajax?name=createCommentEndpoint", headers=headers, data=data, cookiejar_send=cookiejar) + content = util.fetch_url("https://m.youtube.com/service_ajax?name=createCommentEndpoint", headers=headers, data=data, cookiejar_send=cookiejar, debug_name='post_comment') code = json.loads(content)['code'] print("Comment posting code: " + code) return code - '''with open('debug/post_comment_response', 'wb') as f: - f.write(content)''' def _post_comment_reply(text, video_id, parent_comment_id, session_token, cookiejar): @@ -66,13 +64,11 @@ def _post_comment_reply(text, video_id, parent_comment_id, session_token, cookie } data = urllib.parse.urlencode(data_dict).encode() - content = util.fetch_url("https://m.youtube.com/service_ajax?name=createCommentReplyEndpoint", headers=headers, data=data, cookiejar_send=cookiejar) + content = util.fetch_url("https://m.youtube.com/service_ajax?name=createCommentReplyEndpoint", headers=headers, data=data, cookiejar_send=cookiejar, debug_name='post_reply') code = json.loads(content)['code'] print("Comment posting code: " + code) return code - '''with open('debug/post_comment_response', 'wb') as f: - f.write(content)''' def _delete_comment(video_id, comment_id, author_id, session_token, cookiejar): headers = { diff --git a/youtube/search.py b/youtube/search.py index 39a80bf..e35d0cb 100644 --- a/youtube/search.py +++ b/youtube/search.py @@ -53,7 +53,7 @@ def get_search_json(query, page, autocorrect, sort, filters): 'X-YouTube-Client-Version': '2.20180418', } url += "&pbj=1&sp=" + page_number_to_sp_parameter(page, autocorrect, sort, filters).replace("=", "%3D") - content = util.fetch_url(url, headers=headers, report_text="Got search results") + content = util.fetch_url(url, headers=headers, report_text="Got search results", debug_name='search_results') info = json.loads(content) return info diff --git a/youtube/util.py b/youtube/util.py index 9950815..2f80f11 100644 --- a/youtube/util.py +++ b/youtube/util.py @@ -5,6 +5,7 @@ import brotli import urllib.parse import re import time +import os # The trouble with the requests library: It ships its own certificate bundle via certifi # instead of using the system certificate store, meaning self-signed certificates @@ -103,7 +104,7 @@ def decode_content(content, encoding_header): content = gzip.decompress(content) return content -def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookiejar_send=None, cookiejar_receive=None, use_tor=True, return_response=False): +def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookiejar_send=None, cookiejar_receive=None, use_tor=True, return_response=False, debug_name=None): ''' When cookiejar_send is set to a CookieJar object, those cookies will be sent in the request (but cookies in response will not be merged into it) @@ -160,6 +161,14 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookieja print(report_text, ' Latency:', round(response_time - start_time,3), ' Read time:', round(read_finish - response_time,3)) content = decode_content(content, response.getheader('Content-Encoding', default='identity')) + if settings.debugging_save_responses and debug_name is not None: + save_dir = os.path.join(settings.data_dir, 'debug') + if not os.path.exists(save_dir): + os.makedirs(save_dir) + + with open(os.path.join(save_dir, debug_name), 'wb') as f: + f.write(content) + if return_response: return content, response return content @@ -226,4 +235,4 @@ def update_query_string(query_string, items): def uppercase_escape(s): return re.sub( r'\\U([0-9a-fA-F]{8})', - lambda m: chr(int(m.group(1), base=16)), s)
\ No newline at end of file + lambda m: chr(int(m.group(1), base=16)), s) |