aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHobbyistDev <105957301+HobbyistDev@users.noreply.github.com>2023-02-17 12:19:24 +0900
committerGitHub <noreply@github.com>2023-02-17 08:49:24 +0530
commit361630015535026712bdb67f804a15b65ff9ee7e (patch)
tree2dcd799abe43a9b15005ec503cdb0b37cb5935e3
parente4a8b1769e19755acba6d8f212208359905a3159 (diff)
downloadhypervideo-pre-361630015535026712bdb67f804a15b65ff9ee7e.tar.lz
hypervideo-pre-361630015535026712bdb67f804a15b65ff9ee7e.tar.xz
hypervideo-pre-361630015535026712bdb67f804a15b65ff9ee7e.zip
[extractor/yappy] Add extractor (#6111)
Authored by: HobbyistDev Closes #3522
-rw-r--r--yt_dlp/extractor/_extractors.py1
-rw-r--r--yt_dlp/extractor/yappy.py99
2 files changed, 100 insertions, 0 deletions
diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 0a36e98de..4aab6ea78 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -2329,6 +2329,7 @@ from .yandexvideo import (
ZenYandexChannelIE,
)
from .yapfiles import YapFilesIE
+from .yappy import YappyIE
from .yesjapan import YesJapanIE
from .yinyuetai import YinYueTaiIE
from .yle_areena import YleAreenaIE
diff --git a/yt_dlp/extractor/yappy.py b/yt_dlp/extractor/yappy.py
new file mode 100644
index 000000000..f168bdbf9
--- /dev/null
+++ b/yt_dlp/extractor/yappy.py
@@ -0,0 +1,99 @@
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ traverse_obj,
+ unified_timestamp,
+ url_or_none
+)
+
+
+class YappyIE(InfoExtractor):
+ _VALID_URL = r'https?://yappy\.media/video/(?P<id>\w+)'
+ _TESTS = [{
+ 'url': 'https://yappy.media/video/47fea6d8586f48d1a0cf96a7342aabd2',
+ 'info_dict': {
+ 'id': '47fea6d8586f48d1a0cf96a7342aabd2',
+ 'ext': 'mp4',
+ 'title': 'Куда нажимать? Как снимать? Смотри видос и погнали!🤘🏻',
+ 'timestamp': 1661893200,
+ 'description': 'Куда нажимать? Как снимать? Смотри видос и погнали!🤘🏻',
+ 'thumbnail': 'https://cdn-st.ritm.media/static/pic/thumbnails/0c7c4d73388f47848acaf540d2e2bb8c-thumbnail.jpg',
+ 'upload_date': '20220830',
+ 'view_count': int,
+ 'like_count': int,
+ 'uploader_id': '59a0c8c485e5410b9c43474bf4c6a373',
+ 'categories': ['Образование и наука', 'Лайфхак', 'Технологии', 'Арт/искусство'],
+ 'repost_count': int,
+ 'uploader': 'YAPPY',
+ }
+ }, {
+ 'url': 'https://yappy.media/video/3862451954ad4bd58ae2ccefddb0bd33',
+ 'info_dict': {
+ 'id': '3862451954ad4bd58ae2ccefddb0bd33',
+ 'ext': 'mp4',
+ 'title': 'Опиши свой характер 3 словами🙃\n#психология #дружба #отношения',
+ 'timestamp': 1674726985,
+ 'like_count': int,
+ 'description': 'Опиши свой характер 3 словами🙃\n#психология #дружба #отношения',
+ 'uploader_id': '6793ee3581974a3586fc01e157de6c99',
+ 'view_count': int,
+ 'repost_count': int,
+ 'uploader': 'LENA SHTURMAN',
+ 'upload_date': '20230126',
+ 'thumbnail': 'https://cdn-st.ritm.media/static/pic/user_thumbnails/6e76bb4bbad640b6/9ec84c115b2b1967/1674716171.jpg',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ json_ld = self._search_json_ld(webpage, video_id)
+ nextjs_data = self._search_nextjs_data(webpage, video_id)
+
+ media_data = (
+ traverse_obj(
+ nextjs_data, ('props', 'pageProps', ('data', 'OpenGraphParameters')), get_all=False)
+ or self._download_json(f'https://yappy.media/api/video/{video_id}', video_id))
+
+ media_url = traverse_obj(media_data, ('link', {url_or_none})) or ''
+ has_watermark = media_url.endswith('-wm.mp4')
+
+ formats = [{
+ 'url': media_url,
+ 'ext': 'mp4',
+ 'format_note': 'Watermarked' if has_watermark else None,
+ 'preference': -10 if has_watermark else None
+ }] if media_url else []
+
+ if has_watermark:
+ formats.append({
+ 'url': media_url.replace('-wm.mp4', '.mp4'),
+ 'ext': 'mp4'
+ })
+
+ audio_link = traverse_obj(media_data, ('audio', 'link'))
+ if audio_link:
+ formats.append({
+ 'url': audio_link,
+ 'ext': 'mp3',
+ 'acodec': 'mp3',
+ 'vcodec': 'none'
+ })
+
+ return {
+ 'id': video_id,
+ 'title': (json_ld.get('description') or self._html_search_meta(['og:title'], webpage)
+ or self._html_extract_title(webpage)),
+ 'formats': formats,
+ 'thumbnail': (media_data.get('thumbnail')
+ or self._html_search_meta(['og:image', 'og:image:secure_url'], webpage)),
+ 'description': (media_data.get('description') or json_ld.get('description')
+ or self._html_search_meta(['description', 'og:description'], webpage)),
+ 'timestamp': unified_timestamp(media_data.get('publishedAt') or json_ld.get('timestamp')),
+ 'view_count': int_or_none(media_data.get('viewsCount') or json_ld.get('view_count')),
+ 'like_count': int_or_none(media_data.get('likesCount')),
+ 'uploader': traverse_obj(media_data, ('creator', 'firstName')),
+ 'uploader_id': traverse_obj(media_data, ('creator', ('uuid', 'nickname')), get_all=False),
+ 'categories': traverse_obj(media_data, ('categories', ..., 'name')) or None,
+ 'repost_count': int_or_none(media_data.get('sharingCount'))
+ }