diff options
| author | hasezoey <hasezoey@gmail.com> | 2023-05-29 12:52:01 +0200 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-05-29 10:52:01 +0000 | 
| commit | 489f51279d00318018478fd7461eddbe3b45297e (patch) | |
| tree | 9d2fb0234582a777749e5f8d277333b0266597c0 | |
| parent | 2d306c03d6f2697fcbabb7da35aa62cc078359d3 (diff) | |
| download | hypervideo-pre-489f51279d00318018478fd7461eddbe3b45297e.tar.lz hypervideo-pre-489f51279d00318018478fd7461eddbe3b45297e.tar.xz hypervideo-pre-489f51279d00318018478fd7461eddbe3b45297e.zip | |
[extractor/nekohacker] Add extractor (#7003)
Authored by: hasezoey
| -rw-r--r-- | yt_dlp/extractor/_extractors.py | 1 | ||||
| -rw-r--r-- | yt_dlp/extractor/nekohacker.py | 217 | 
2 files changed, 218 insertions, 0 deletions
| diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 31af6bd3f..f9fa84c43 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1206,6 +1206,7 @@ from .nebula import (      NebulaSubscriptionsIE,      NebulaChannelIE,  ) +from .nekohacker import NekoHackerIE  from .nerdcubed import NerdCubedFeedIE  from .netzkino import NetzkinoIE  from .neteasemusic import ( diff --git a/yt_dlp/extractor/nekohacker.py b/yt_dlp/extractor/nekohacker.py new file mode 100644 index 000000000..e10ffe925 --- /dev/null +++ b/yt_dlp/extractor/nekohacker.py @@ -0,0 +1,217 @@ +import re + +from .common import InfoExtractor +from ..utils import ( +    ExtractorError, +    determine_ext, +    extract_attributes, +    get_element_by_class, +    get_element_text_and_html_by_tag, +    parse_duration, +    traverse_obj, +    try_call, +    url_or_none, +) + + +class NekoHackerIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?nekohacker\.com/(?P<id>(?!free-dl)[\w-]+)' +    _TESTS = [{ +        'url': 'https://nekohacker.com/nekoverse/', +        'info_dict': { +            'id': 'nekoverse', +            'title': 'Nekoverse', +        }, +        'playlist': [ +            { +                'url': 'https://nekohacker.com/wp-content/uploads/2022/11/01-Spaceship.mp3', +                'md5': '44223701ebedba0467ebda4cc07fb3aa', +                'info_dict': { +                    'id': '1712', +                    'ext': 'mp3', +                    'title': 'Spaceship', +                    'thumbnail': 'https://nekohacker.com/wp-content/uploads/2022/11/Nekoverse_Artwork-1024x1024.jpg', +                    'vcodec': 'none', +                    'acodec': 'mp3', +                    'release_date': '20221101', +                    'album': 'Nekoverse', +                    'artist': 'Neko Hacker', +                    'track': 'Spaceship', +                    'track_number': 1, +                    'duration': 195.0 +                } +            }, +            { +                'url': 'https://nekohacker.com/wp-content/uploads/2022/11/02-City-Runner.mp3', +                'md5': '8f853c71719389d32bbbd3f1a87b3f08', +                'info_dict': { +                    'id': '1713', +                    'ext': 'mp3', +                    'title': 'City Runner', +                    'thumbnail': 'https://nekohacker.com/wp-content/uploads/2022/11/Nekoverse_Artwork-1024x1024.jpg', +                    'vcodec': 'none', +                    'acodec': 'mp3', +                    'release_date': '20221101', +                    'album': 'Nekoverse', +                    'artist': 'Neko Hacker', +                    'track': 'City Runner', +                    'track_number': 2, +                    'duration': 148.0 +                } +            }, +            { +                'url': 'https://nekohacker.com/wp-content/uploads/2022/11/03-Nature-Talk.mp3', +                'md5': '5a8a8ae852720cee4c0ac95c7d1a7450', +                'info_dict': { +                    'id': '1714', +                    'ext': 'mp3', +                    'title': 'Nature Talk', +                    'thumbnail': 'https://nekohacker.com/wp-content/uploads/2022/11/Nekoverse_Artwork-1024x1024.jpg', +                    'vcodec': 'none', +                    'acodec': 'mp3', +                    'release_date': '20221101', +                    'album': 'Nekoverse', +                    'artist': 'Neko Hacker', +                    'track': 'Nature Talk', +                    'track_number': 3, +                    'duration': 174.0 +                } +            }, +            { +                'url': 'https://nekohacker.com/wp-content/uploads/2022/11/04-Crystal-World.mp3', +                'md5': 'd8e59a48061764e50d92386a294abd50', +                'info_dict': { +                    'id': '1715', +                    'ext': 'mp3', +                    'title': 'Crystal World', +                    'thumbnail': 'https://nekohacker.com/wp-content/uploads/2022/11/Nekoverse_Artwork-1024x1024.jpg', +                    'vcodec': 'none', +                    'acodec': 'mp3', +                    'release_date': '20221101', +                    'album': 'Nekoverse', +                    'artist': 'Neko Hacker', +                    'track': 'Crystal World', +                    'track_number': 4, +                    'duration': 199.0 +                } +            } +        ] +    }, { +        'url': 'https://nekohacker.com/susume/', +        'info_dict': { +            'id': 'susume', +            'title': '進め!むじなカンパニー', +        }, +        'playlist': [ +            { +                'url': 'https://nekohacker.com/wp-content/uploads/2021/01/進め!むじなカンパニー-feat.-六科なじむ-CV_-日高里菜-割戶真友-CV_-金元寿子-軽井沢ユキ-CV_-上坂すみれ-出稼ぎガルシア-CV_-金子彩花-.mp3', +                'md5': 'fb13f008aa81f26ba48f91fd2d6186ce', +                'info_dict': { +                    'id': '711', +                    'ext': 'mp3', +                    'title': 'md5:1a5fcbc96ca3c3265b1c6f9f79f30fd0', +                    'thumbnail': 'https://nekohacker.com/wp-content/uploads/2021/01/OP表-1024x1024.png', +                    'vcodec': 'none', +                    'acodec': 'mp3', +                    'release_date': '20210115', +                    'album': '進め!むじなカンパニー', +                    'artist': 'Neko Hacker', +                    'track': 'md5:1a5fcbc96ca3c3265b1c6f9f79f30fd0', +                    'track_number': 1, +                    'duration': None +                } +            }, +            { +                'url': 'https://nekohacker.com/wp-content/uploads/2021/01/むじな-de-なじむ-feat.-六科なじむ-CV_-日高里菜-.mp3', +                'md5': '028803f70241df512b7764e73396fdd1', +                'info_dict': { +                    'id': '709', +                    'ext': 'mp3', +                    'title': 'むじな de なじむ feat. 六科なじむ (CV: 日高里菜 )', +                    'thumbnail': 'https://nekohacker.com/wp-content/uploads/2021/01/OP表-1024x1024.png', +                    'vcodec': 'none', +                    'acodec': 'mp3', +                    'release_date': '20210115', +                    'album': '進め!むじなカンパニー', +                    'artist': 'Neko Hacker', +                    'track': 'むじな de なじむ feat. 六科なじむ (CV: 日高里菜 )', +                    'track_number': 2, +                    'duration': None +                } +            }, +            { +                'url': 'https://nekohacker.com/wp-content/uploads/2021/01/進め!むじなカンパニー-instrumental.mp3', +                'md5': 'adde9e9a16e1da5e602b579c247d0fb9', +                'info_dict': { +                    'id': '710', +                    'ext': 'mp3', +                    'title': '進め!むじなカンパニー (instrumental)', +                    'thumbnail': 'https://nekohacker.com/wp-content/uploads/2021/01/OP表-1024x1024.png', +                    'vcodec': 'none', +                    'acodec': 'mp3', +                    'release_date': '20210115', +                    'album': '進め!むじなカンパニー', +                    'artist': 'Neko Hacker', +                    'track': '進め!むじなカンパニー (instrumental)', +                    'track_number': 3, +                    'duration': None +                } +            }, +            { +                'url': 'https://nekohacker.com/wp-content/uploads/2021/01/むじな-de-なじむ-instrumental.mp3', +                'md5': 'ebb0443039cf5f9ff7fd557ed9b23599', +                'info_dict': { +                    'id': '712', +                    'ext': 'mp3', +                    'title': 'むじな de なじむ (instrumental)', +                    'thumbnail': 'https://nekohacker.com/wp-content/uploads/2021/01/OP表-1024x1024.png', +                    'vcodec': 'none', +                    'acodec': 'mp3', +                    'release_date': '20210115', +                    'album': '進め!むじなカンパニー', +                    'artist': 'Neko Hacker', +                    'track': 'むじな de なじむ (instrumental)', +                    'track_number': 4, +                    'duration': None +                } +            } +        ] +    }] + +    def _real_extract(self, url): +        playlist_id = self._match_id(url) + +        webpage = self._download_webpage(url, playlist_id) +        playlist = get_element_by_class('playlist', webpage) + +        if not playlist: +            iframe = try_call(lambda: get_element_text_and_html_by_tag('iframe', webpage)[1]) or '' +            iframe_src = url_or_none(extract_attributes(iframe).get('src')) +            if not iframe_src: +                raise ExtractorError('No playlist or embed found in webpage') +            elif re.match(r'https?://(?:\w+\.)?spotify\.com/', iframe_src): +                raise ExtractorError('Spotify embeds are not supported', expected=True) +            return self.url_result(url, 'Generic') + +        entries = [] +        for track_number, track in enumerate(re.findall(r'(<li[^>]+data-audiopath[^>]+>)', playlist), 1): +            entry = traverse_obj(extract_attributes(track), { +                'url': ('data-audiopath', {url_or_none}), +                'ext': ('data-audiopath', {determine_ext}), +                'id': 'data-trackid', +                'title': 'data-tracktitle', +                'track': 'data-tracktitle', +                'album': 'data-albumtitle', +                'duration': ('data-tracktime', {parse_duration}), +                'release_date': ('data-releasedate', {lambda x: re.match(r'\d{8}', x.replace('.', ''))}, 0), +                'thumbnail': ('data-albumart', {url_or_none}), +            }) +            entries.append({ +                **entry, +                'track_number': track_number, +                'artist': 'Neko Hacker', +                'vcodec': 'none', +                'acodec': 'mp3' if entry['ext'] == 'mp3' else None, +            }) + +        return self.playlist_result(entries, playlist_id, traverse_obj(entries, (0, 'album'))) | 
