aboutsummaryrefslogtreecommitdiffstats
path: root/hypervideo_dl/extractor/eroprofile.py
diff options
context:
space:
mode:
Diffstat (limited to 'hypervideo_dl/extractor/eroprofile.py')
-rw-r--r--hypervideo_dl/extractor/eroprofile.py39
1 files changed, 39 insertions, 0 deletions
diff --git a/hypervideo_dl/extractor/eroprofile.py b/hypervideo_dl/extractor/eroprofile.py
index c460dc7..a8396f1 100644
--- a/hypervideo_dl/extractor/eroprofile.py
+++ b/hypervideo_dl/extractor/eroprofile.py
@@ -90,3 +90,42 @@ class EroProfileIE(InfoExtractor):
'title': title,
'age_limit': 18,
})
+
+
+class EroProfileAlbumIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?eroprofile\.com/m/videos/album/(?P<id>[^/]+)'
+ IE_NAME = 'EroProfile:album'
+
+ _TESTS = [{
+ 'url': 'https://www.eroprofile.com/m/videos/album/BBW-2-893',
+ 'info_dict': {
+ 'id': 'BBW-2-893',
+ 'title': 'BBW 2'
+ },
+ 'playlist_mincount': 486,
+ },
+ ]
+
+ def _extract_from_page(self, page):
+ for url in re.findall(r'href=".*?(/m/videos/view/[^"]+)"', page):
+ yield self.url_result(f'https://www.eroprofile.com{url}', EroProfileIE.ie_key())
+
+ def _entries(self, playlist_id, first_page):
+ yield from self._extract_from_page(first_page)
+
+ page_urls = re.findall(rf'href=".*?(/m/videos/album/{playlist_id}\?pnum=(\d+))"', first_page)
+ max_page = max(int(n) for _, n in page_urls)
+
+ for n in range(2, max_page + 1):
+ url = f'https://www.eroprofile.com/m/videos/album/{playlist_id}?pnum={n}'
+ yield from self._extract_from_page(
+ self._download_webpage(url, playlist_id,
+ note=f'Downloading playlist page {int(n) - 1}'))
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ first_page = self._download_webpage(url, playlist_id, note='Downloading playlist')
+ playlist_title = self._search_regex(
+ r'<title>Album: (.*) - EroProfile</title>', first_page, 'playlist_title')
+
+ return self.playlist_result(self._entries(playlist_id, first_page), playlist_id, playlist_title)