diff options
Diffstat (limited to 'hypervideo_dl/extractor/pornhub.py')
-rw-r--r-- | hypervideo_dl/extractor/pornhub.py | 47 |
1 files changed, 23 insertions, 24 deletions
diff --git a/hypervideo_dl/extractor/pornhub.py b/hypervideo_dl/extractor/pornhub.py index 17c8c91..5d8d7c1 100644 --- a/hypervideo_dl/extractor/pornhub.py +++ b/hypervideo_dl/extractor/pornhub.py @@ -1,33 +1,28 @@ -# coding: utf-8 -from __future__ import unicode_literals - import functools import itertools import math import operator import re +import urllib.request from .common import InfoExtractor -from ..compat import ( - compat_HTTPError, - compat_str, - compat_urllib_request, -) from .openload import PhantomJSwrapper +from ..compat import compat_HTTPError, compat_str from ..utils import ( + NO_DEFAULT, + ExtractorError, clean_html, determine_ext, - ExtractorError, format_field, int_or_none, merge_dicts, - NO_DEFAULT, orderedSet, remove_quotes, + remove_start, str_to_int, update_url_query, - urlencode_postdata, url_or_none, + urlencode_postdata, ) @@ -52,7 +47,7 @@ class PornHubBaseIE(InfoExtractor): r'document\.location\.reload\(true\)')): url_or_request = args[0] url = (url_or_request.get_full_url() - if isinstance(url_or_request, compat_urllib_request.Request) + if isinstance(url_or_request, urllib.request.Request) else url_or_request) phantom = PhantomJSwrapper(self, required_version='2.0') phantom.get(url, html=webpage) @@ -133,6 +128,7 @@ class PornHubIE(PornHubBaseIE): ) (?P<id>[\da-z]+) ''' % PornHubBaseIE._PORNHUB_HOST_RE + _EMBED_REGEX = [r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub(?:premium)?\.(?:com|net|org)/embed/[\da-z]+)'] _TESTS = [{ 'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015', 'md5': 'a6391306d050e4547f62b3f485dd9ba9', @@ -202,6 +198,16 @@ class PornHubIE(PornHubBaseIE): }, 'skip': 'This video has been disabled', }, { + 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph601dc30bae19a', + 'info_dict': { + 'id': 'ph601dc30bae19a', + 'uploader': 'Projekt Melody', + 'uploader_id': 'projekt-melody', + 'upload_date': '20210205', + 'title': '"Welcome to My Pussy Mansion" - CB Stream (02/03/21)', + 'thumbnail': r're:https?://.+', + }, + }, { 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d', 'only_matching': True, }, { @@ -252,12 +258,6 @@ class PornHubIE(PornHubBaseIE): 'only_matching': True, }] - @staticmethod - def _extract_urls(webpage): - return re.findall( - r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub(?:premium)?\.(?:com|net|org)/embed/[\da-z]+)', - webpage) - def _extract_count(self, pattern, webpage, name): return str_to_int(self._search_regex(pattern, webpage, '%s count' % name, default=None)) @@ -432,7 +432,7 @@ class PornHubIE(PornHubBaseIE): default=None)) formats.append({ 'url': format_url, - 'format_id': format_field(height, template='%dp'), + 'format_id': format_field(height, None, '%dp'), 'height': height, }) @@ -456,13 +456,11 @@ class PornHubIE(PornHubBaseIE): continue add_format(video_url) - # field_preference is unnecessary here, but kept for code-similarity with youtube-dl - self._sort_formats( - formats, field_preference=('height', 'width', 'fps', 'format_id')) - + model_profile = self._search_json( + r'var\s+MODEL_PROFILE\s*=', webpage, 'model profile', video_id, fatal=False) video_uploader = self._html_search_regex( r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<', - webpage, 'uploader', default=None) + webpage, 'uploader', default=None) or model_profile.get('username') def extract_vote_count(kind, name): return self._extract_count( @@ -491,6 +489,7 @@ class PornHubIE(PornHubBaseIE): return merge_dicts({ 'id': video_id, 'uploader': video_uploader, + 'uploader_id': remove_start(model_profile.get('modelProfileLink'), '/model/'), 'upload_date': upload_date, 'title': title, 'thumbnail': thumbnail, |