diff options
Diffstat (limited to 'yt_dlp/extractor')
-rw-r--r-- | yt_dlp/extractor/bilibili.py | 15 | ||||
-rw-r--r-- | yt_dlp/extractor/common.py | 8 | ||||
-rw-r--r-- | yt_dlp/extractor/youtube.py | 16 |
3 files changed, 27 insertions, 12 deletions
diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index be117a2bb..764ac4d3c 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -255,10 +255,6 @@ class BiliBiliIE(InfoExtractor): info['uploader'] = self._html_search_meta( 'author', webpage, 'uploader', default=None) - comments = None - if self._downloader.params.get('getcomments', False): - comments = self._get_all_comment_pages(video_id) - raw_danmaku = self._get_raw_danmaku(video_id, cid) raw_tags = self._get_tags(video_id) @@ -266,11 +262,18 @@ class BiliBiliIE(InfoExtractor): top_level_info = { 'raw_danmaku': raw_danmaku, - 'comments': comments, - 'comment_count': len(comments) if comments is not None else None, 'tags': tags, 'raw_tags': raw_tags, } + if self._downloader.params.get('getcomments', False): + def get_comments(): + comments = self._get_all_comment_pages(video_id) + return { + 'comments': comments, + 'comment_count': len(comments) + } + + top_level_info['__post_extractor'] = get_comments ''' # Requires https://github.com/m13253/danmaku2ass which is licenced under GPL3 diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 47b91a00a..3326d436b 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -294,6 +294,14 @@ class InfoExtractor(object): players on other sites. Can be True (=always allowed), False (=never allowed), None (=unknown), or a string specifying the criteria for embedability (Eg: 'whitelist'). + __post_extractor: A function to be called just before the metadata is + written to either disk, logger or console. The function + must return a dict which will be added to the info_dict. + This is usefull for additional information that is + time-consuming to extract. Note that the fields thus + extracted will not be available to output template and + match_filter. So, only "comments" and "comment_count" are + currently allowed to be extracted via this method. The following fields should only be used when the video belongs to some logical chapter or section: diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 41b894776..804186b85 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2012,9 +2012,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # Get comments # TODO: Refactor and move to seperate function - if get_comments: + def extract_comments(): expected_video_comment_count = 0 video_comments = [] + comment_xsrf = xsrf_token def find_value(html, key, num_chars=2, separator='"'): pos_begin = html.find(key) + len(key) + num_chars @@ -2083,7 +2084,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): self.to_screen('Downloading comments') while continuations: continuation = continuations.pop() - comment_response = get_continuation(continuation, xsrf_token) + comment_response = get_continuation(continuation, comment_xsrf) if not comment_response: continue if list(search_dict(comment_response, 'externalErrorMessage')): @@ -2094,7 +2095,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): continue # not sure if this actually helps if 'xsrf_token' in comment_response: - xsrf_token = comment_response['xsrf_token'] + comment_xsrf = comment_response['xsrf_token'] item_section = comment_response['response']['continuationContents']['itemSectionContinuation'] if first_continuation: @@ -2123,7 +2124,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): while reply_continuations: time.sleep(1) continuation = reply_continuations.pop() - replies_data = get_continuation(continuation, xsrf_token, True) + replies_data = get_continuation(continuation, comment_xsrf, True) if not replies_data or 'continuationContents' not in replies_data[1]['response']: continue @@ -2152,10 +2153,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor): time.sleep(1) self.to_screen('Total comments downloaded: %d of ~%d' % (len(video_comments), expected_video_comment_count)) - info.update({ + return { 'comments': video_comments, 'comment_count': expected_video_comment_count - }) + } + + if get_comments: + info['__post_extractor'] = extract_comments self.mark_watched(video_id, player_response) |