diff options
| author | Lesmiscore <nao20010128@gmail.com> | 2023-05-29 18:35:10 +0900 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-05-29 18:35:10 +0900 | 
| commit | f8f9250fe280d37f0988646cd5cc0072f4d33a6d (patch) | |
| tree | d0f9c9f42f14f4b6c1d9f368a8b6702f679564bc | |
| parent | 3459d3c5af3b2572ed51e8ecfda6c11022a838c6 (diff) | |
| download | hypervideo-pre-f8f9250fe280d37f0988646cd5cc0072f4d33a6d.tar.lz hypervideo-pre-f8f9250fe280d37f0988646cd5cc0072f4d33a6d.tar.xz hypervideo-pre-f8f9250fe280d37f0988646cd5cc0072f4d33a6d.zip | |
[extractor/niconico:live] Add extractor (#5764)
Authored by: Lesmiscore
| -rw-r--r-- | yt_dlp/downloader/__init__.py | 3 | ||||
| -rw-r--r-- | yt_dlp/downloader/niconico.py | 101 | ||||
| -rw-r--r-- | yt_dlp/extractor/_extractors.py | 1 | ||||
| -rw-r--r-- | yt_dlp/extractor/niconico.py | 163 | 
4 files changed, 266 insertions, 2 deletions
| diff --git a/yt_dlp/downloader/__init__.py b/yt_dlp/downloader/__init__.py index c34dbcea9..51a9f28f0 100644 --- a/yt_dlp/downloader/__init__.py +++ b/yt_dlp/downloader/__init__.py @@ -30,7 +30,7 @@ from .hls import HlsFD  from .http import HttpFD  from .ism import IsmFD  from .mhtml import MhtmlFD -from .niconico import NiconicoDmcFD +from .niconico import NiconicoDmcFD, NiconicoLiveFD  from .rtmp import RtmpFD  from .rtsp import RtspFD  from .websocket import WebSocketFragmentFD @@ -50,6 +50,7 @@ PROTOCOL_MAP = {      'ism': IsmFD,      'mhtml': MhtmlFD,      'niconico_dmc': NiconicoDmcFD, +    'niconico_live': NiconicoLiveFD,      'fc2_live': FC2LiveFD,      'websocket_frag': WebSocketFragmentFD,      'youtube_live_chat': YoutubeLiveChatFD, diff --git a/yt_dlp/downloader/niconico.py b/yt_dlp/downloader/niconico.py index 77ed39e5b..cfe739784 100644 --- a/yt_dlp/downloader/niconico.py +++ b/yt_dlp/downloader/niconico.py @@ -1,8 +1,17 @@ +import json  import threading +import time  from . import get_suitable_downloader  from .common import FileDownloader -from ..utils import sanitized_Request +from .external import FFmpegFD +from ..utils import ( +    DownloadError, +    str_or_none, +    sanitized_Request, +    WebSocketsWrapper, +    try_get, +)  class NiconicoDmcFD(FileDownloader): @@ -50,3 +59,93 @@ class NiconicoDmcFD(FileDownloader):                      timer[0].cancel()                      download_complete = True          return success + + +class NiconicoLiveFD(FileDownloader): +    """ Downloads niconico live without being stopped """ + +    def real_download(self, filename, info_dict): +        video_id = info_dict['video_id'] +        ws_url = info_dict['url'] +        ws_extractor = info_dict['ws'] +        ws_origin_host = info_dict['origin'] +        cookies = info_dict.get('cookies') +        live_quality = info_dict.get('live_quality', 'high') +        live_latency = info_dict.get('live_latency', 'high') +        dl = FFmpegFD(self.ydl, self.params or {}) + +        new_info_dict = info_dict.copy() +        new_info_dict.update({ +            'protocol': 'm3u8', +        }) + +        def communicate_ws(reconnect): +            if reconnect: +                ws = WebSocketsWrapper(ws_url, { +                    'Cookies': str_or_none(cookies) or '', +                    'Origin': f'https://{ws_origin_host}', +                    'Accept': '*/*', +                    'User-Agent': self.params['http_headers']['User-Agent'], +                }) +                if self.ydl.params.get('verbose', False): +                    self.to_screen('[debug] Sending startWatching request') +                ws.send(json.dumps({ +                    'type': 'startWatching', +                    'data': { +                        'stream': { +                            'quality': live_quality, +                            'protocol': 'hls+fmp4', +                            'latency': live_latency, +                            'chasePlay': False +                        }, +                        'room': { +                            'protocol': 'webSocket', +                            'commentable': True +                        }, +                        'reconnect': True, +                    } +                })) +            else: +                ws = ws_extractor +            with ws: +                while True: +                    recv = ws.recv() +                    if not recv: +                        continue +                    data = json.loads(recv) +                    if not data or not isinstance(data, dict): +                        continue +                    if data.get('type') == 'ping': +                        # pong back +                        ws.send(r'{"type":"pong"}') +                        ws.send(r'{"type":"keepSeat"}') +                    elif data.get('type') == 'disconnect': +                        self.write_debug(data) +                        return True +                    elif data.get('type') == 'error': +                        self.write_debug(data) +                        message = try_get(data, lambda x: x['body']['code'], str) or recv +                        return DownloadError(message) +                    elif self.ydl.params.get('verbose', False): +                        if len(recv) > 100: +                            recv = recv[:100] + '...' +                        self.to_screen('[debug] Server said: %s' % recv) + +        def ws_main(): +            reconnect = False +            while True: +                try: +                    ret = communicate_ws(reconnect) +                    if ret is True: +                        return +                except BaseException as e: +                    self.to_screen('[%s] %s: Connection error occured, reconnecting after 10 seconds: %s' % ('niconico:live', video_id, str_or_none(e))) +                    time.sleep(10) +                    continue +                finally: +                    reconnect = True + +        thread = threading.Thread(target=ws_main, daemon=True) +        thread.start() + +        return dl.download(filename, new_info_dict) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index d560ed91c..07249bba6 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1275,6 +1275,7 @@ from .niconico import (      NicovideoSearchIE,      NicovideoSearchURLIE,      NicovideoTagURLIE, +    NiconicoLiveIE,  )  from .ninecninemedia import (      NineCNineMediaIE, diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py index 30b4d7216..89e8e6093 100644 --- a/yt_dlp/extractor/niconico.py +++ b/yt_dlp/extractor/niconico.py @@ -5,13 +5,17 @@ import json  import re  import time +from urllib.parse import urlparse +  from .common import InfoExtractor, SearchInfoExtractor  from ..compat import (      compat_HTTPError,  ) +from ..dependencies import websockets  from ..utils import (      ExtractorError,      OnDemandPagedList, +    WebSocketsWrapper,      bug_reports_message,      clean_html,      float_or_none, @@ -895,3 +899,162 @@ class NiconicoUserIE(InfoExtractor):      def _real_extract(self, url):          list_id = self._match_id(url)          return self.playlist_result(self._entries(list_id), list_id, ie=NiconicoIE.ie_key()) + + +class NiconicoLiveIE(InfoExtractor): +    IE_NAME = 'niconico:live' +    IE_DESC = 'ニコニコ生放送' +    _VALID_URL = r'https?://(?:sp\.)?live2?\.nicovideo\.jp/(?:watch|gate)/(?P<id>lv\d+)' +    _TESTS = [{ +        'note': 'this test case includes invisible characters for title, pasting them as-is', +        'url': 'https://live.nicovideo.jp/watch/lv339533123', +        'info_dict': { +            'id': 'lv339533123', +            'title': '激辛ペヤング食べます( ;ᯅ; )(歌枠オーディション参加中)', +            'view_count': 1526, +            'comment_count': 1772, +            'description': '初めましてもかって言います❕\nのんびり自由に適当に暮らしてます', +            'uploader': 'もか', +            'channel': 'ゲストさんのコミュニティ', +            'channel_id': 'co5776900', +            'channel_url': 'https://com.nicovideo.jp/community/co5776900', +            'timestamp': 1670677328, +            'is_live': True, +        }, +        'skip': 'livestream', +    }, { +        'url': 'https://live2.nicovideo.jp/watch/lv339533123', +        'only_matching': True, +    }, { +        'url': 'https://sp.live.nicovideo.jp/watch/lv339533123', +        'only_matching': True, +    }, { +        'url': 'https://sp.live2.nicovideo.jp/watch/lv339533123', +        'only_matching': True, +    }] + +    _KNOWN_LATENCY = ('high', 'low') + +    def _real_extract(self, url): +        if not websockets: +            raise ExtractorError('websockets library is not available. Please install it.', expected=True) +        video_id = self._match_id(url) +        webpage, urlh = self._download_webpage_handle(f'https://live.nicovideo.jp/watch/{video_id}', video_id) + +        embedded_data = self._parse_json(unescapeHTML(self._search_regex( +            r'<script\s+id="embedded-data"\s*data-props="(.+?)"', webpage, 'embedded data')), video_id) + +        ws_url = traverse_obj(embedded_data, ('site', 'relive', 'webSocketUrl')) +        if not ws_url: +            raise ExtractorError('The live hasn\'t started yet or already ended.', expected=True) +        ws_url = update_url_query(ws_url, { +            'frontend_id': traverse_obj(embedded_data, ('site', 'frontendId')) or '9', +        }) + +        hostname = remove_start(urlparse(urlh.geturl()).hostname, 'sp.') +        cookies = try_get(urlh.geturl(), self._downloader._calc_cookies) +        latency = try_get(self._configuration_arg('latency'), lambda x: x[0]) +        if latency not in self._KNOWN_LATENCY: +            latency = 'high' + +        ws = WebSocketsWrapper(ws_url, { +            'Cookies': str_or_none(cookies) or '', +            'Origin': f'https://{hostname}', +            'Accept': '*/*', +            'User-Agent': self.get_param('http_headers')['User-Agent'], +        }) + +        self.write_debug('[debug] Sending HLS server request') +        ws.send(json.dumps({ +            'type': 'startWatching', +            'data': { +                'stream': { +                    'quality': 'abr', +                    'protocol': 'hls+fmp4', +                    'latency': latency, +                    'chasePlay': False +                }, +                'room': { +                    'protocol': 'webSocket', +                    'commentable': True +                }, +                'reconnect': False, +            } +        })) + +        while True: +            recv = ws.recv() +            if not recv: +                continue +            data = json.loads(recv) +            if not isinstance(data, dict): +                continue +            if data.get('type') == 'stream': +                m3u8_url = data['data']['uri'] +                qualities = data['data']['availableQualities'] +                break +            elif data.get('type') == 'disconnect': +                self.write_debug(recv) +                raise ExtractorError('Disconnected at middle of extraction') +            elif data.get('type') == 'error': +                self.write_debug(recv) +                message = traverse_obj(data, ('body', 'code')) or recv +                raise ExtractorError(message) +            elif self.get_param('verbose', False): +                if len(recv) > 100: +                    recv = recv[:100] + '...' +                self.write_debug('Server said: %s' % recv) + +        title = traverse_obj(embedded_data, ('program', 'title')) or self._html_search_meta( +            ('og:title', 'twitter:title'), webpage, 'live title', fatal=False) + +        raw_thumbs = traverse_obj(embedded_data, ('program', 'thumbnail')) or {} +        thumbnails = [] +        for name, value in raw_thumbs.items(): +            if not isinstance(value, dict): +                thumbnails.append({ +                    'id': name, +                    'url': value, +                    **parse_resolution(value, lenient=True), +                }) +                continue + +            for k, img_url in value.items(): +                res = parse_resolution(k, lenient=True) or parse_resolution(img_url, lenient=True) +                width, height = res.get('width'), res.get('height') + +                thumbnails.append({ +                    'id': f'{name}_{width}x{height}', +                    'url': img_url, +                    **res, +                }) + +        formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True) +        for fmt, q in zip(formats, reversed(qualities[1:])): +            fmt.update({ +                'format_id': q, +                'protocol': 'niconico_live', +                'ws': ws, +                'video_id': video_id, +                'cookies': cookies, +                'live_latency': latency, +                'origin': hostname, +            }) + +        return { +            'id': video_id, +            'title': title, +            **traverse_obj(embedded_data, { +                'view_count': ('program', 'statistics', 'watchCount'), +                'comment_count': ('program', 'statistics', 'commentCount'), +                'uploader': ('program', 'supplier', 'name'), +                'channel': ('socialGroup', 'name'), +                'channel_id': ('socialGroup', 'id'), +                'channel_url': ('socialGroup', 'socialGroupPageUrl'), +            }), +            'description': clean_html(traverse_obj(embedded_data, ('program', 'description'))), +            'timestamp': int_or_none(traverse_obj(embedded_data, ('program', 'openTime'))), +            'is_live': True, +            'thumbnails': thumbnails, +            'formats': formats, +        } | 
