aboutsummaryrefslogtreecommitdiffstats
path: root/youtube
diff options
context:
space:
mode:
Diffstat (limited to 'youtube')
-rw-r--r--youtube/comments.py39
-rw-r--r--youtube/util.py13
-rw-r--r--youtube/yt_data_extract/common.py39
-rw-r--r--youtube/yt_data_extract/everything_else.py6
4 files changed, 66 insertions, 31 deletions
diff --git a/youtube/comments.py b/youtube/comments.py
index b23c079..54baf61 100644
--- a/youtube/comments.py
+++ b/youtube/comments.py
@@ -47,25 +47,23 @@ def make_comment_ctoken(video_id, sort=0, offset=0, lc='', secret_key=''):
return base64.urlsafe_b64encode(result).decode('ascii')
-mobile_headers = {
- 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1',
- 'Accept': '*/*',
- 'Accept-Language': 'en-US,en;q=0.5',
- 'X-YouTube-Client-Name': '2',
- 'X-YouTube-Client-Version': '2.20180823',
-}
-
-
def request_comments(ctoken, replies=False):
- base_url = 'https://m.youtube.com/watch_comment?'
- if replies:
- base_url += 'action_get_comment_replies=1&ctoken='
- else:
- base_url += 'action_get_comments=1&ctoken='
- url = base_url + ctoken.replace("=", "%3D") + "&pbj=1"
+ url = 'https://m.youtube.com/youtubei/v1/next'
+ url += '?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
+ data = json.dumps({
+ 'context': {
+ 'client': {
+ 'hl': 'en',
+ 'gl': 'US',
+ 'clientName': 'MWEB',
+ 'clientVersion': '2.20210804.02.00',
+ },
+ },
+ 'continuation': ctoken.replace('=', '%3D'),
+ })
content = util.fetch_url(
- url, headers=mobile_headers,
+ url, headers=util.mobile_xhr_headers + util.json_header, data=data,
report_text='Retrieved comments', debug_name='request_comments')
content = content.decode('utf-8')
@@ -178,10 +176,9 @@ def video_comments(video_id, sort=0, offset=0, lc='', secret_key=''):
('Direct link', this_sort_url)
]
+ ctoken = make_comment_ctoken(video_id, sort, offset, lc)
comments_info.update(yt_data_extract.extract_comments_info(
- request_comments(
- make_comment_ctoken(video_id, sort, offset, lc, secret_key)
- )
+ request_comments(ctoken), ctoken=ctoken
))
post_process_comments_info(comments_info)
@@ -212,7 +209,9 @@ def get_comments_page():
ctoken = request.args.get('ctoken', '')
replies = request.args.get('replies', '0') == '1'
- comments_info = yt_data_extract.extract_comments_info(request_comments(ctoken, replies))
+ comments_info = yt_data_extract.extract_comments_info(
+ request_comments(ctoken, replies), ctoken=ctoken
+ )
post_process_comments_info(comments_info)
if not replies:
diff --git a/youtube/util.py b/youtube/util.py
index daec2df..462d371 100644
--- a/youtube/util.py
+++ b/youtube/util.py
@@ -387,6 +387,19 @@ mobile_user_agent = 'Mozilla/5.0 (Linux; Android 7.0; Redmi Note 4 Build/NRD90M)
mobile_ua = (('User-Agent', mobile_user_agent),)
desktop_user_agent = 'Mozilla/5.0 (Windows NT 6.1; rv:52.0) Gecko/20100101 Firefox/52.0'
desktop_ua = (('User-Agent', desktop_user_agent),)
+json_header = (('Content-Type', 'application/json'),)
+desktop_xhr_headers = (
+ ('Accept', '*/*'),
+ ('Accept-Language', 'en-US,en;q=0.5'),
+ ('X-YouTube-Client-Name', '1'),
+ ('X-YouTube-Client-Version', '2.20180830'),
+) + desktop_ua
+mobile_xhr_headers = (
+ ('Accept', '*/*'),
+ ('Accept-Language', 'en-US,en;q=0.5'),
+ ('X-YouTube-Client-Name', '2'),
+ ('X-YouTube-Client-Version', '2.20180830'),
+) + mobile_ua
class RateLimitedQueue(gevent.queue.Queue):
diff --git a/youtube/yt_data_extract/common.py b/youtube/yt_data_extract/common.py
index e87808b..74ac1d6 100644
--- a/youtube/yt_data_extract/common.py
+++ b/youtube/yt_data_extract/common.py
@@ -478,6 +478,22 @@ def extract_items_from_renderer(renderer, item_types=_item_types):
renderer = None
+
+def extract_items_from_renderer_list(renderers, item_types=_item_types):
+ '''Same as extract_items_from_renderer, but provide a list of renderers'''
+ items = []
+ ctoken = None
+ for renderer in renderers:
+ new_items, new_ctoken = extract_items_from_renderer(
+ renderer,
+ item_types=item_types)
+ items += new_items
+ # prioritize ctoken associated with items
+ if (not ctoken) or (new_ctoken and new_items):
+ ctoken = new_ctoken
+ return items, ctoken
+
+
def extract_items(response, item_types=_item_types,
search_engagement_panels=False):
'''return items, ctoken'''
@@ -495,6 +511,15 @@ def extract_items(response, item_types=_item_types,
item_types=item_types)
if items:
break
+ elif 'onResponseReceivedEndpoints' in response:
+ for endpoint in response.get('onResponseReceivedEndpoints', []):
+ items, ctoken = extract_items_from_renderer_list(
+ deep_get(endpoint, 'appendContinuationItemsAction',
+ 'continuationItems', default=[]),
+ item_types=item_types,
+ )
+ if items:
+ break
elif 'contents' in response:
renderer = get(response, 'contents', {})
items, ctoken = extract_items_from_renderer(
@@ -502,11 +527,11 @@ def extract_items(response, item_types=_item_types,
item_types=item_types)
if search_engagement_panels and 'engagementPanels' in response:
- for engagement_renderer in response['engagementPanels']:
- additional_items, cont = extract_items_from_renderer(
- engagement_renderer,
- item_types=item_types)
- items += additional_items
- if cont and not ctoken:
- ctoken = cont
+ new_items, new_ctoken = extract_items_from_renderer_list(
+ response['engagementPanels'], item_types=item_types
+ )
+ items += new_items
+ if (not ctoken) or (new_ctoken and new_items):
+ ctoken = new_ctoken
+
return items, ctoken
diff --git a/youtube/yt_data_extract/everything_else.py b/youtube/yt_data_extract/everything_else.py
index 7275975..ba3cd77 100644
--- a/youtube/yt_data_extract/everything_else.py
+++ b/youtube/yt_data_extract/everything_else.py
@@ -222,15 +222,13 @@ def _ctoken_metadata(ctoken):
result['sort'] = 0
return result
-def extract_comments_info(polymer_json):
+def extract_comments_info(polymer_json, ctoken=None):
response, err = extract_response(polymer_json)
if err:
return {'error': err}
info = {'error': None}
- url = multi_deep_get(polymer_json, [1, 'url'], ['url'])
- if url:
- ctoken = urllib.parse.parse_qs(url[url.find('?')+1:])['ctoken'][0]
+ if ctoken:
metadata = _ctoken_metadata(ctoken)
else:
metadata = {}