diff options
Diffstat (limited to 'hypervideo_dl/extractor/eroprofile.py')
-rw-r--r-- | hypervideo_dl/extractor/eroprofile.py | 39 |
1 files changed, 39 insertions, 0 deletions
diff --git a/hypervideo_dl/extractor/eroprofile.py b/hypervideo_dl/extractor/eroprofile.py index c460dc7..a8396f1 100644 --- a/hypervideo_dl/extractor/eroprofile.py +++ b/hypervideo_dl/extractor/eroprofile.py @@ -90,3 +90,42 @@ class EroProfileIE(InfoExtractor): 'title': title, 'age_limit': 18, }) + + +class EroProfileAlbumIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?eroprofile\.com/m/videos/album/(?P<id>[^/]+)' + IE_NAME = 'EroProfile:album' + + _TESTS = [{ + 'url': 'https://www.eroprofile.com/m/videos/album/BBW-2-893', + 'info_dict': { + 'id': 'BBW-2-893', + 'title': 'BBW 2' + }, + 'playlist_mincount': 486, + }, + ] + + def _extract_from_page(self, page): + for url in re.findall(r'href=".*?(/m/videos/view/[^"]+)"', page): + yield self.url_result(f'https://www.eroprofile.com{url}', EroProfileIE.ie_key()) + + def _entries(self, playlist_id, first_page): + yield from self._extract_from_page(first_page) + + page_urls = re.findall(rf'href=".*?(/m/videos/album/{playlist_id}\?pnum=(\d+))"', first_page) + max_page = max(int(n) for _, n in page_urls) + + for n in range(2, max_page + 1): + url = f'https://www.eroprofile.com/m/videos/album/{playlist_id}?pnum={n}' + yield from self._extract_from_page( + self._download_webpage(url, playlist_id, + note=f'Downloading playlist page {int(n) - 1}')) + + def _real_extract(self, url): + playlist_id = self._match_id(url) + first_page = self._download_webpage(url, playlist_id, note='Downloading playlist') + playlist_title = self._search_regex( + r'<title>Album: (.*) - EroProfile</title>', first_page, 'playlist_title') + + return self.playlist_result(self._entries(playlist_id, first_page), playlist_id, playlist_title) |