diff options
Diffstat (limited to 'yt_dlp')
| -rw-r--r-- | yt_dlp/downloader/__init__.py | 5 | ||||
| -rw-r--r-- | yt_dlp/downloader/youtube_live_chat.py | 136 | ||||
| -rw-r--r-- | yt_dlp/extractor/youtube.py | 23 | 
3 files changed, 123 insertions, 41 deletions
| diff --git a/yt_dlp/downloader/__init__.py b/yt_dlp/downloader/__init__.py index e469b512d..6769cf8e6 100644 --- a/yt_dlp/downloader/__init__.py +++ b/yt_dlp/downloader/__init__.py @@ -25,7 +25,7 @@ from .ism import IsmFD  from .mhtml import MhtmlFD  from .niconico import NiconicoDmcFD  from .websocket import WebSocketFragmentFD -from .youtube_live_chat import YoutubeLiveChatReplayFD +from .youtube_live_chat import YoutubeLiveChatFD  from .external import (      get_external_downloader,      FFmpegFD, @@ -44,7 +44,8 @@ PROTOCOL_MAP = {      'mhtml': MhtmlFD,      'niconico_dmc': NiconicoDmcFD,      'websocket_frag': WebSocketFragmentFD, -    'youtube_live_chat_replay': YoutubeLiveChatReplayFD, +    'youtube_live_chat': YoutubeLiveChatFD, +    'youtube_live_chat_replay': YoutubeLiveChatFD,  } diff --git a/yt_dlp/downloader/youtube_live_chat.py b/yt_dlp/downloader/youtube_live_chat.py index a6c13335e..f30dcb6bf 100644 --- a/yt_dlp/downloader/youtube_live_chat.py +++ b/yt_dlp/downloader/youtube_live_chat.py @@ -1,20 +1,23 @@  from __future__ import division, unicode_literals  import json +import time  from .fragment import FragmentFD  from ..compat import compat_urllib_error  from ..utils import (      try_get, +    dict_get, +    int_or_none,      RegexNotFoundError,  )  from ..extractor.youtube import YoutubeBaseInfoExtractor as YT_BaseIE -class YoutubeLiveChatReplayFD(FragmentFD): -    """ Downloads YouTube live chat replays fragment by fragment """ +class YoutubeLiveChatFD(FragmentFD): +    """ Downloads YouTube live chats fragment by fragment """ -    FD_NAME = 'youtube_live_chat_replay' +    FD_NAME = 'youtube_live_chat'      def real_download(self, filename, info_dict):          video_id = info_dict['video_id'] @@ -31,6 +34,8 @@ class YoutubeLiveChatReplayFD(FragmentFD):          ie = YT_BaseIE(self.ydl) +        start_time = int(time.time() * 1000) +          def dl_fragment(url, data=None, headers=None):              http_headers = info_dict.get('http_headers', {})              if headers: @@ -38,36 +43,70 @@ class YoutubeLiveChatReplayFD(FragmentFD):                  http_headers.update(headers)              return self._download_fragment(ctx, url, info_dict, http_headers, data) -        def download_and_parse_fragment(url, frag_index, request_data): +        def parse_actions_replay(live_chat_continuation): +            offset = continuation_id = None +            processed_fragment = bytearray() +            for action in live_chat_continuation.get('actions', []): +                if 'replayChatItemAction' in action: +                    replay_chat_item_action = action['replayChatItemAction'] +                    offset = int(replay_chat_item_action['videoOffsetTimeMsec']) +                processed_fragment.extend( +                    json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n') +            if offset is not None: +                continuation_id = try_get( +                    live_chat_continuation, +                    lambda x: x['continuations'][0]['liveChatReplayContinuationData']['continuation']) +            self._append_fragment(ctx, processed_fragment) +            return continuation_id, offset + +        live_offset = 0 + +        def parse_actions_live(live_chat_continuation): +            nonlocal live_offset +            continuation_id = None +            processed_fragment = bytearray() +            for action in live_chat_continuation.get('actions', []): +                timestamp = self.parse_live_timestamp(action) +                if timestamp is not None: +                    live_offset = timestamp - start_time +                # compatibility with replay format +                pseudo_action = { +                    'replayChatItemAction': {'actions': [action]}, +                    'videoOffsetTimeMsec': str(live_offset), +                    'isLive': True, +                } +                processed_fragment.extend( +                    json.dumps(pseudo_action, ensure_ascii=False).encode('utf-8') + b'\n') +            continuation_data_getters = [ +                lambda x: x['continuations'][0]['invalidationContinuationData'], +                lambda x: x['continuations'][0]['timedContinuationData'], +            ] +            continuation_data = try_get(live_chat_continuation, continuation_data_getters, dict) +            if continuation_data: +                continuation_id = continuation_data.get('continuation') +                timeout_ms = int_or_none(continuation_data.get('timeoutMs')) +                if timeout_ms is not None: +                    time.sleep(timeout_ms / 1000) +            self._append_fragment(ctx, processed_fragment) +            return continuation_id, live_offset + +        if info_dict['protocol'] == 'youtube_live_chat_replay': +            parse_actions = parse_actions_replay +        elif info_dict['protocol'] == 'youtube_live_chat': +            parse_actions = parse_actions_live + +        def download_and_parse_fragment(url, frag_index, request_data, headers):              count = 0              while count <= fragment_retries:                  try: -                    success, raw_fragment = dl_fragment(url, request_data, {'content-type': 'application/json'}) +                    success, raw_fragment = dl_fragment(url, request_data, headers)                      if not success:                          return False, None, None -                    try: -                        data = ie._extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace')) -                    except RegexNotFoundError: -                        data = None -                    if not data: -                        data = json.loads(raw_fragment) +                    data = json.loads(raw_fragment)                      live_chat_continuation = try_get(                          data,                          lambda x: x['continuationContents']['liveChatContinuation'], dict) or {} -                    offset = continuation_id = None -                    processed_fragment = bytearray() -                    for action in live_chat_continuation.get('actions', []): -                        if 'replayChatItemAction' in action: -                            replay_chat_item_action = action['replayChatItemAction'] -                            offset = int(replay_chat_item_action['videoOffsetTimeMsec']) -                        processed_fragment.extend( -                            json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n') -                    if offset is not None: -                        continuation_id = try_get( -                            live_chat_continuation, -                            lambda x: x['continuations'][0]['liveChatReplayContinuationData']['continuation']) -                    self._append_fragment(ctx, processed_fragment) - +                    continuation_id, offset = parse_actions(live_chat_continuation)                      return True, continuation_id, offset                  except compat_urllib_error.HTTPError as err:                      count += 1 @@ -100,7 +139,11 @@ class YoutubeLiveChatReplayFD(FragmentFD):          innertube_context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'])          if not api_key or not innertube_context:              return False -        url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key +        visitor_data = try_get(innertube_context, lambda x: x['client']['visitorData'], str) +        if info_dict['protocol'] == 'youtube_live_chat_replay': +            url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key +        elif info_dict['protocol'] == 'youtube_live_chat': +            url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat?key=' + api_key          frag_index = offset = 0          while continuation_id is not None: @@ -111,8 +154,11 @@ class YoutubeLiveChatReplayFD(FragmentFD):              }              if frag_index > 1:                  request_data['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))} +            headers = ie._generate_api_headers(ytcfg, visitor_data=visitor_data) +            headers.update({'content-type': 'application/json'}) +            fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode('utf-8') + b'\n'              success, continuation_id, offset = download_and_parse_fragment( -                url, frag_index, json.dumps(request_data, ensure_ascii=False).encode('utf-8') + b'\n') +                url, frag_index, fragment_request_data, headers)              if not success:                  return False              if test: @@ -120,3 +166,39 @@ class YoutubeLiveChatReplayFD(FragmentFD):          self._finish_frag_download(ctx)          return True + +    @staticmethod +    def parse_live_timestamp(action): +        action_content = dict_get( +            action, +            ['addChatItemAction', 'addLiveChatTickerItemAction', 'addBannerToLiveChatCommand']) +        if not isinstance(action_content, dict): +            return None +        item = dict_get(action_content, ['item', 'bannerRenderer']) +        if not isinstance(item, dict): +            return None +        renderer = dict_get(item, [ +            # text +            'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer', +            'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer', +            # ticker +            'liveChatTickerPaidMessageItemRenderer', +            'liveChatTickerSponsorItemRenderer', +            # banner +            'liveChatBannerRenderer', +        ]) +        if not isinstance(renderer, dict): +            return None +        parent_item_getters = [ +            lambda x: x['showItemEndpoint']['showLiveChatItemEndpoint']['renderer'], +            lambda x: x['contents'], +        ] +        parent_item = try_get(renderer, parent_item_getters, dict) +        if parent_item: +            renderer = dict_get(parent_item, [ +                'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer', +                'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer', +            ]) +            if not isinstance(renderer, dict): +                return None +        return int_or_none(renderer.get('timestampUsec'), 1000) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 375fc1909..ad2cdb052 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2339,18 +2339,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):              initial_data = self._call_api(                  'next', {'videoId': video_id}, video_id, fatal=False, api_key=self._extract_api_key(ytcfg)) -        if not is_live: -            try: -                # This will error if there is no livechat -                initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation'] -                info['subtitles']['live_chat'] = [{ -                    'url': 'https://www.youtube.com/watch?v=%s' % video_id,  # url is needed to set cookies -                    'video_id': video_id, -                    'ext': 'json', -                    'protocol': 'youtube_live_chat_replay', -                }] -            except (KeyError, IndexError, TypeError): -                pass +        try: +            # This will error if there is no livechat +            initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation'] +            info['subtitles']['live_chat'] = [{ +                'url': 'https://www.youtube.com/watch?v=%s' % video_id,  # url is needed to set cookies +                'video_id': video_id, +                'ext': 'json', +                'protocol': 'youtube_live_chat' if is_live else 'youtube_live_chat_replay', +            }] +        except (KeyError, IndexError, TypeError): +            pass          if initial_data:              chapters = self._extract_chapters_from_json( | 
