From b32330be4f15dd044e6212f526e52375f0a0f6c2 Mon Sep 17 00:00:00 2001 From: James Taylor Date: Thu, 21 Feb 2019 21:32:31 -0800 Subject: refactor common.py into 3 files --- youtube/comments.py | 59 +++++++++++++++++++++++++++-------------------------- 1 file changed, 30 insertions(+), 29 deletions(-) (limited to 'youtube/comments.py') diff --git a/youtube/comments.py b/youtube/comments.py index 10209e7..4087b47 100644 --- a/youtube/comments.py +++ b/youtube/comments.py @@ -1,13 +1,14 @@ +from youtube import proto, util, html_common, yt_data_extract, accounts +import settings + import json -from youtube import proto, common, accounts import base64 -from youtube.common import uppercase_escape, default_multi_get, format_text_runs, URL_ORIGIN, fetch_url from string import Template import urllib.request import urllib import html -import settings import re + comment_area_template = Template('''
$video-metadata @@ -130,7 +131,7 @@ def request_comments(ctoken, replies=False): url = base_url + ctoken.replace("=", "%3D") + "&pbj=1" for i in range(0,8): # don't retry more than 8 times - content = fetch_url(url, headers=mobile_headers, report_text="Retrieved comments") + content = util.fetch_url(url, headers=mobile_headers, report_text="Retrieved comments") if content[0:4] == b")]}'": # random closing characters included at beginning of response for some reason content = content[4:] elif content[0:10] == b'\nPost comment''' - other_sort_url = common.URL_ORIGIN + '/comments?ctoken=' + make_comment_ctoken(video_id, sort=1 - sort, lc=lc) + other_sort_url = util.URL_ORIGIN + '/comments?ctoken=' + make_comment_ctoken(video_id, sort=1 - sort, lc=lc) other_sort_name = 'newest' if sort == 0 else 'top' other_sort_link = '''Sort by ''' + other_sort_name + '''''' @@ -314,7 +315,7 @@ def video_comments(video_id, sort=0, offset=0, lc='', secret_key=''): if ctoken == '': more_comments_button = '' else: - more_comments_button = more_comments_template.substitute(url = common.URL_ORIGIN + '/comments?ctoken=' + ctoken) + more_comments_button = more_comments_template.substitute(url = util.URL_ORIGIN + '/comments?ctoken=' + ctoken) result = '''
\n''' result += comment_links + '\n' @@ -350,7 +351,7 @@ comment_box_template = Template(''' - Add account + Add account $video_id_input @@ -359,7 +360,7 @@ $options def get_comments_page(env, start_response): start_response('200 OK', [('Content-type','text/html'),] ) parameters = env['parameters'] - ctoken = default_multi_get(parameters, 'ctoken', 0, default='') + ctoken = util.default_multi_get(parameters, 'ctoken', 0, default='') replies = False if not ctoken: video_id = parameters['video_id'][0] @@ -384,17 +385,17 @@ def get_comments_page(env, start_response): page_number = page_number, sort = 'top' if metadata['sort'] == 0 else 'newest', title = html.escape(comment_info['video_title']), - url = common.URL_ORIGIN + '/watch?v=' + metadata['video_id'], + url = util.URL_ORIGIN + '/watch?v=' + metadata['video_id'], thumbnail = '/i.ytimg.com/vi/'+ metadata['video_id'] + '/mqdefault.jpg', ) comment_box = comment_box_template.substitute( - form_action= common.URL_ORIGIN + '/post_comment', + form_action= util.URL_ORIGIN + '/post_comment', video_id_input='''''', post_text='Post comment', options=comment_box_account_options(), ) - other_sort_url = common.URL_ORIGIN + '/comments?ctoken=' + make_comment_ctoken(metadata['video_id'], sort=1 - metadata['sort']) + other_sort_url = util.URL_ORIGIN + '/comments?ctoken=' + make_comment_ctoken(metadata['video_id'], sort=1 - metadata['sort']) other_sort_name = 'newest' if metadata['sort'] == 0 else 'top' other_sort_link = '''Sort by ''' + other_sort_name + '''''' @@ -408,7 +409,7 @@ def get_comments_page(env, start_response): if ctoken == '': more_comments_button = '' else: - more_comments_button = more_comments_template.substitute(url = URL_ORIGIN + '/comments?ctoken=' + ctoken) + more_comments_button = more_comments_template.substitute(url = util.URL_ORIGIN + '/comments?ctoken=' + ctoken) comments_area = '
\n' comments_area += video_metadata + comment_box + comment_links + '\n' comments_area += '
\n' @@ -417,7 +418,7 @@ def get_comments_page(env, start_response): comments_area += more_comments_button + '\n' comments_area += '
\n' return yt_comments_template.substitute( - header = common.get_header(), + header = html_common.get_header(), comments_area = comments_area, page_title = page_title, ).encode('utf-8') -- cgit v1.2.3 From 9f93b9429c77e631972186049fbc7518e2cf5d4b Mon Sep 17 00:00:00 2001 From: James Taylor Date: Thu, 9 May 2019 23:07:43 -0700 Subject: Fix comment parsing error due to comments from deleted channels --- youtube/comments.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'youtube/comments.py') diff --git a/youtube/comments.py b/youtube/comments.py index 4087b47..94b086e 100644 --- a/youtube/comments.py +++ b/youtube/comments.py @@ -234,10 +234,7 @@ def parse_comments_polymer(content, replies=False): comment_raw = comment_raw['commentRenderer'] comment = { - 'author': yt_data_extract.get_plain_text(comment_raw['authorText']), - 'author_url': comment_raw['authorEndpoint']['commandMetadata']['webCommandMetadata']['url'], - 'author_channel_id': comment_raw['authorEndpoint']['browseEndpoint']['browseId'], - 'author_id': comment_raw['authorId'], + 'author_id': comment_raw.get('authorId', ''), 'author_avatar': comment_raw['authorThumbnail']['thumbnails'][0]['url'], 'likes': comment_raw['likeCount'], 'published': yt_data_extract.get_plain_text(comment_raw['publishedTimeText']), @@ -247,6 +244,16 @@ def parse_comments_polymer(content, replies=False): 'video_id': video_id, 'comment_id': comment_raw['commentId'], } + + if 'authorText' in comment_raw: # deleted channels have no name or channel link + comment['author'] = yt_data_extract.get_plain_text(comment_raw['authorText']) + comment['author_url'] = comment_raw['authorEndpoint']['commandMetadata']['webCommandMetadata']['url'] + comment['author_channel_id'] = comment_raw['authorEndpoint']['browseEndpoint']['browseId'] + else: + comment['author'] = '' + comment['author_url'] = '' + comment['author_channel_id'] = '' + comments.append(comment) except Exception as e: print('Error parsing comments: ' + str(e)) -- cgit v1.2.3