diff options
author | pukkandan <pukkandan.ytdlp@gmail.com> | 2021-06-13 21:36:47 +0530 |
---|---|---|
committer | pukkandan <pukkandan.ytdlp@gmail.com> | 2021-06-13 21:38:08 +0530 |
commit | d0fb4bd16f191445ab577ae23be57fc55242a108 (patch) | |
tree | faf2e09bf630a3840aba73e564fe03eb11330a7a | |
parent | 3fd4c2a543934501253dc5ffaab30ed3b89282a0 (diff) | |
download | hypervideo-pre-d0fb4bd16f191445ab577ae23be57fc55242a108.tar.lz hypervideo-pre-d0fb4bd16f191445ab577ae23be57fc55242a108.tar.xz hypervideo-pre-d0fb4bd16f191445ab577ae23be57fc55242a108.zip |
[pornhub] Extract `cast`
Closes #406, https://github.com/ytdl-org/youtube-dl/pull/27384
-rw-r--r-- | yt_dlp/extractor/common.py | 1 | ||||
-rw-r--r-- | yt_dlp/extractor/pornhub.py | 5 |
2 files changed, 5 insertions, 1 deletions
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 1524fcb15..b14cf0fc9 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -290,6 +290,7 @@ class InfoExtractor(object): categories: A list of categories that the video falls in, for example ["Sports", "Berlin"] tags: A list of tags assigned to the video, e.g. ["sweden", "pop music"] + cast: A list of the video cast is_live: True, False, or None (=unknown). Whether this video is a live stream that goes on instead of a fixed-length video. was_live: True, False, or None (=unknown). Whether this video was diff --git a/yt_dlp/extractor/pornhub.py b/yt_dlp/extractor/pornhub.py index 031454600..cf407a813 100644 --- a/yt_dlp/extractor/pornhub.py +++ b/yt_dlp/extractor/pornhub.py @@ -14,6 +14,7 @@ from ..compat import ( ) from .openload import PhantomJSwrapper from ..utils import ( + clean_html, determine_ext, ExtractorError, int_or_none, @@ -145,6 +146,7 @@ class PornHubIE(PornHubBaseIE): 'age_limit': 18, 'tags': list, 'categories': list, + 'cast': list, }, }, { # non-ASCII title @@ -464,7 +466,7 @@ class PornHubIE(PornHubBaseIE): r'(?s)<div[^>]+\bclass=["\'].*?\b%sWrapper[^>]*>(.+?)</div>' % meta_key, webpage, meta_key, default=None) if div: - return re.findall(r'<a[^>]+\bhref=[^>]+>([^<]+)', div) + return [clean_html(x).strip() for x in re.findall(r'(?s)<a[^>]+\bhref=[^>]+>.+?</a>', div)] info = self._search_json_ld(webpage, video_id, default={}) # description provided in JSON-LD is irrelevant @@ -485,6 +487,7 @@ class PornHubIE(PornHubBaseIE): 'age_limit': 18, 'tags': extract_list('tags'), 'categories': extract_list('categories'), + 'cast': extract_list('pornstars'), 'subtitles': subtitles, }, info) |