aboutsummaryrefslogtreecommitdiffstats
path: root/hypervideo_dl/extractor/odnoklassniki.py
diff options
context:
space:
mode:
Diffstat (limited to 'hypervideo_dl/extractor/odnoklassniki.py')
-rw-r--r--hypervideo_dl/extractor/odnoklassniki.py107
1 files changed, 75 insertions, 32 deletions
diff --git a/hypervideo_dl/extractor/odnoklassniki.py b/hypervideo_dl/extractor/odnoklassniki.py
index 293f1aa..4f325f0 100644
--- a/hypervideo_dl/extractor/odnoklassniki.py
+++ b/hypervideo_dl/extractor/odnoklassniki.py
@@ -1,8 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
from .common import InfoExtractor
from ..compat import (
compat_etree_fromstring,
@@ -13,10 +8,12 @@ from ..compat import (
from ..utils import (
ExtractorError,
float_or_none,
- unified_strdate,
int_or_none,
qualities,
+ smuggle_url,
unescapeHTML,
+ unified_strdate,
+ unsmuggle_url,
urlencode_postdata,
)
@@ -27,13 +24,14 @@ class OdnoklassnikiIE(InfoExtractor):
(?:(?:www|m|mobile)\.)?
(?:odnoklassniki|ok)\.ru/
(?:
- video(?:embed)?/|
+ video(?P<embed>embed)?/|
web-api/video/moviePlayer/|
live/|
dk\?.*?st\.mvId=
)
(?P<id>[\d-]+)
'''
+ _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1']
_TESTS = [{
'note': 'Coub embedded',
'url': 'http://ok.ru/video/1484130554189',
@@ -42,7 +40,7 @@ class OdnoklassnikiIE(InfoExtractor):
'ext': 'mp4',
'timestamp': 1545580896,
'view_count': int,
- 'thumbnail': 'https://coub-anubis-a.akamaized.net/coub_storage/coub/simple/cw_image/c5ac87553bd/608e806a1239c210ab692/1545580913_00026.jpg',
+ 'thumbnail': 'https://coub-attachments.akamaized.net/coub_storage/coub/simple/cw_image/c5ac87553bd/608e806a1239c210ab692/1545580913_00026.jpg',
'title': 'Народная забава',
'uploader': 'Nevata',
'upload_date': '20181223',
@@ -69,11 +67,12 @@ class OdnoklassnikiIE(InfoExtractor):
}, {
# metadata in JSON
'url': 'http://ok.ru/video/20079905452',
- 'md5': '0b62089b479e06681abaaca9d204f152',
+ 'md5': '5d2b64756e2af296e3b383a0bc02a6aa',
'info_dict': {
'id': '20079905452',
'ext': 'mp4',
'title': 'Культура меняет нас (прекрасный ролик!))',
+ 'thumbnail': str,
'duration': 100,
'upload_date': '20141207',
'uploader_id': '330537914540',
@@ -84,11 +83,12 @@ class OdnoklassnikiIE(InfoExtractor):
}, {
# metadataUrl
'url': 'http://ok.ru/video/63567059965189-0?fromTime=5',
- 'md5': '6ff470ea2dd51d5d18c295a355b0b6bc',
+ 'md5': 'f8c951122516af72e6e6ffdd3c41103b',
'info_dict': {
'id': '63567059965189-0',
'ext': 'mp4',
'title': 'Девушка без комплексов ...',
+ 'thumbnail': str,
'duration': 191,
'upload_date': '20150518',
'uploader_id': '534380003155',
@@ -99,18 +99,32 @@ class OdnoklassnikiIE(InfoExtractor):
},
}, {
# YouTube embed (metadataUrl, provider == USER_YOUTUBE)
- 'url': 'http://ok.ru/video/64211978996595-1',
- 'md5': '2f206894ffb5dbfcce2c5a14b909eea5',
+ 'url': 'https://ok.ru/video/3952212382174',
+ 'md5': '91749d0bd20763a28d083fa335bbd37a',
'info_dict': {
- 'id': 'V_VztHT5BzY',
+ 'id': '5axVgHHDBvU',
'ext': 'mp4',
- 'title': 'Космическая среда от 26 августа 2015',
- 'description': 'md5:848eb8b85e5e3471a3a803dae1343ed0',
- 'duration': 440,
- 'upload_date': '20150826',
- 'uploader_id': 'tvroscosmos',
- 'uploader': 'Телестудия Роскосмоса',
+ 'title': 'Youtube-dl 101: What is it and HOW to use it! Full Download Walkthrough and Guide',
+ 'description': 'md5:b57209eeb9d5c2f20c984dfb58862097',
+ 'uploader': 'Lod Mer',
+ 'uploader_id': '575186401502',
+ 'duration': 1529,
'age_limit': 0,
+ 'upload_date': '20210405',
+ 'comment_count': int,
+ 'live_status': 'not_live',
+ 'view_count': int,
+ 'thumbnail': 'https://i.mycdn.me/i?r=AEHujHvw2RjEbemUCNEorZbxYpb_p_9AcN2FmGik64Krkcmz37YtlY093oAM5-HIEAt7Zi9s0CiBOSDmbngC-I-k&fn=external_8',
+ 'uploader_url': 'http://www.youtube.com/user/MrKewlkid94',
+ 'channel_follower_count': int,
+ 'tags': ['youtube-dl', 'youtube playlists', 'download videos', 'download audio'],
+ 'channel_id': 'UCVGtvURtEURYHtJFUegdSug',
+ 'like_count': int,
+ 'availability': 'public',
+ 'channel_url': 'https://www.youtube.com/channel/UCVGtvURtEURYHtJFUegdSug',
+ 'categories': ['Education'],
+ 'playable_in_embed': True,
+ 'channel': 'BornToReact',
},
}, {
# YouTube embed (metadata, provider == USER_YOUTUBE, no metadata.movie.title field)
@@ -130,10 +144,12 @@ class OdnoklassnikiIE(InfoExtractor):
},
'skip': 'Video has not been found',
}, {
+ # TODO: HTTP Error 400: Bad Request, it only works if there's no cookies when downloading
'note': 'Only available in mobile webpage',
'url': 'https://m.ok.ru/video/2361249957145',
'info_dict': {
'id': '2361249957145',
+ 'ext': 'mp4',
'title': 'Быковское крещение',
'duration': 3038.181,
},
@@ -162,14 +178,36 @@ class OdnoklassnikiIE(InfoExtractor):
# Paid video
'url': 'https://ok.ru/video/954886983203',
'only_matching': True,
+ }, {
+ 'url': 'https://ok.ru/videoembed/2932705602075',
+ 'info_dict': {
+ 'id': '2932705602075',
+ 'ext': 'mp4',
+ 'thumbnail': 'https://i.mycdn.me/videoPreview?id=1369902483995&type=37&idx=2&tkn=fqlnoQD_xwq5ovIlKfgNyU08qmM&fn=external_8',
+ 'title': 'Boosty для тебя!',
+ 'uploader_id': '597811038747',
+ 'like_count': 0,
+ 'duration': 35,
+ },
+ }]
+
+ _WEBPAGE_TESTS = [{
+ 'url': 'https://boosty.to/ikakprosto/posts/56cedaca-b56a-4dfd-b3ed-98c79cfa0167',
+ 'info_dict': {
+ 'id': '3950343629563',
+ 'ext': 'mp4',
+ 'thumbnail': 'https://i.mycdn.me/videoPreview?id=2776238394107&type=37&idx=11&tkn=F3ejkUFcpuI4DnMRxrDGcH5YcmM&fn=external_8',
+ 'title': 'Заяц Бусти.mp4',
+ 'uploader_id': '571368965883',
+ 'like_count': 0,
+ 'duration': 10444,
+ },
}]
- @staticmethod
- def _extract_url(webpage):
- mobj = re.search(
- r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage)
- if mobj:
- return mobj.group('url')
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ for x in super()._extract_embed_urls(url, webpage):
+ yield smuggle_url(x, {'referrer': url})
def _real_extract(self, url):
try:
@@ -185,16 +223,23 @@ class OdnoklassnikiIE(InfoExtractor):
start_time = int_or_none(compat_parse_qs(
compat_urllib_parse_urlparse(url).query).get('fromTime', [None])[0])
- video_id = self._match_id(url)
+ url, smuggled = unsmuggle_url(url, {})
+ video_id, is_embed = self._match_valid_url(url).group('id', 'embed')
+ mode = 'videoembed' if is_embed else 'video'
webpage = self._download_webpage(
- 'http://ok.ru/video/%s' % video_id, video_id,
- note='Downloading desktop webpage')
+ f'https://ok.ru/{mode}/{video_id}', video_id,
+ note='Downloading desktop webpage',
+ headers={'Referer': smuggled['referrer']} if smuggled.get('referrer') else {})
error = self._search_regex(
r'[^>]+class="vp_video_stub_txt"[^>]*>([^<]+)<',
webpage, 'error', default=None)
- if error:
+ # Direct link from boosty
+ if (error == 'The author of this video has not been found or is blocked'
+ and not smuggled.get('referrer') and mode == 'videoembed'):
+ return self._extract_desktop(smuggle_url(url, {'referrer': 'https://boosty.to'}))
+ elif error:
raise ExtractorError(error, expected=True)
player = self._parse_json(
@@ -281,7 +326,7 @@ class OdnoklassnikiIE(InfoExtractor):
if provider == 'LIVE_TV_APP':
info['title'] = title
- quality = qualities(('4', '0', '1', '2', '3', '5'))
+ quality = qualities(('4', '0', '1', '2', '3', '5', '6', '7'))
formats = [{
'url': f['url'],
@@ -325,8 +370,6 @@ class OdnoklassnikiIE(InfoExtractor):
if payment_info:
self.raise_no_formats('This video is paid, subscribe to download it', expected=True)
- self._sort_formats(formats)
-
info['formats'] = formats
return info