aboutsummaryrefslogtreecommitdiffstats
path: root/yt_dlp/extractor
diff options
context:
space:
mode:
Diffstat (limited to 'yt_dlp/extractor')
-rw-r--r--yt_dlp/extractor/bilibili.py15
-rw-r--r--yt_dlp/extractor/common.py8
-rw-r--r--yt_dlp/extractor/youtube.py16
3 files changed, 27 insertions, 12 deletions
diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py
index be117a2bb..764ac4d3c 100644
--- a/yt_dlp/extractor/bilibili.py
+++ b/yt_dlp/extractor/bilibili.py
@@ -255,10 +255,6 @@ class BiliBiliIE(InfoExtractor):
info['uploader'] = self._html_search_meta(
'author', webpage, 'uploader', default=None)
- comments = None
- if self._downloader.params.get('getcomments', False):
- comments = self._get_all_comment_pages(video_id)
-
raw_danmaku = self._get_raw_danmaku(video_id, cid)
raw_tags = self._get_tags(video_id)
@@ -266,11 +262,18 @@ class BiliBiliIE(InfoExtractor):
top_level_info = {
'raw_danmaku': raw_danmaku,
- 'comments': comments,
- 'comment_count': len(comments) if comments is not None else None,
'tags': tags,
'raw_tags': raw_tags,
}
+ if self._downloader.params.get('getcomments', False):
+ def get_comments():
+ comments = self._get_all_comment_pages(video_id)
+ return {
+ 'comments': comments,
+ 'comment_count': len(comments)
+ }
+
+ top_level_info['__post_extractor'] = get_comments
'''
# Requires https://github.com/m13253/danmaku2ass which is licenced under GPL3
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 47b91a00a..3326d436b 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -294,6 +294,14 @@ class InfoExtractor(object):
players on other sites. Can be True (=always allowed),
False (=never allowed), None (=unknown), or a string
specifying the criteria for embedability (Eg: 'whitelist').
+ __post_extractor: A function to be called just before the metadata is
+ written to either disk, logger or console. The function
+ must return a dict which will be added to the info_dict.
+ This is usefull for additional information that is
+ time-consuming to extract. Note that the fields thus
+ extracted will not be available to output template and
+ match_filter. So, only "comments" and "comment_count" are
+ currently allowed to be extracted via this method.
The following fields should only be used when the video belongs to some logical
chapter or section:
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index 41b894776..804186b85 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -2012,9 +2012,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# Get comments
# TODO: Refactor and move to seperate function
- if get_comments:
+ def extract_comments():
expected_video_comment_count = 0
video_comments = []
+ comment_xsrf = xsrf_token
def find_value(html, key, num_chars=2, separator='"'):
pos_begin = html.find(key) + len(key) + num_chars
@@ -2083,7 +2084,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
self.to_screen('Downloading comments')
while continuations:
continuation = continuations.pop()
- comment_response = get_continuation(continuation, xsrf_token)
+ comment_response = get_continuation(continuation, comment_xsrf)
if not comment_response:
continue
if list(search_dict(comment_response, 'externalErrorMessage')):
@@ -2094,7 +2095,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
continue
# not sure if this actually helps
if 'xsrf_token' in comment_response:
- xsrf_token = comment_response['xsrf_token']
+ comment_xsrf = comment_response['xsrf_token']
item_section = comment_response['response']['continuationContents']['itemSectionContinuation']
if first_continuation:
@@ -2123,7 +2124,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
while reply_continuations:
time.sleep(1)
continuation = reply_continuations.pop()
- replies_data = get_continuation(continuation, xsrf_token, True)
+ replies_data = get_continuation(continuation, comment_xsrf, True)
if not replies_data or 'continuationContents' not in replies_data[1]['response']:
continue
@@ -2152,10 +2153,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
time.sleep(1)
self.to_screen('Total comments downloaded: %d of ~%d' % (len(video_comments), expected_video_comment_count))
- info.update({
+ return {
'comments': video_comments,
'comment_count': expected_video_comment_count
- })
+ }
+
+ if get_comments:
+ info['__post_extractor'] = extract_comments
self.mark_watched(video_id, player_response)