aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorpukkandan <pukkandan@gmail.com>2021-02-04 13:26:01 +0530
committerpukkandan <pukkandan@gmail.com>2021-02-04 13:26:22 +0530
commit2181983a0c97c6fd3bb681c86d64699051061c1c (patch)
tree3b352353ae60cf8a889cccf89cfffc60d4db5f0d
parente29663c644a65846125f5792be52dc27feb68297 (diff)
downloadhypervideo-pre-2181983a0c97c6fd3bb681c86d64699051061c1c.tar.lz
hypervideo-pre-2181983a0c97c6fd3bb681c86d64699051061c1c.tar.xz
hypervideo-pre-2181983a0c97c6fd3bb681c86d64699051061c1c.zip
Update to ytdl-2021.02.04.1 except youtube
-rw-r--r--youtube_dlc/extractor/abcnews.py134
-rw-r--r--youtube_dlc/extractor/adn.py38
-rw-r--r--youtube_dlc/extractor/aenetworks.py2
-rw-r--r--youtube_dlc/extractor/amp.py3
-rw-r--r--youtube_dlc/extractor/awaan.py2
-rw-r--r--youtube_dlc/extractor/azmedien.py2
-rw-r--r--youtube_dlc/extractor/bleacherreport.py10
-rw-r--r--youtube_dlc/extractor/bravotv.py14
-rw-r--r--youtube_dlc/extractor/ccma.py65
-rw-r--r--youtube_dlc/extractor/cda.py2
-rw-r--r--youtube_dlc/extractor/egghead.py29
-rw-r--r--youtube_dlc/extractor/extractors.py3
-rw-r--r--youtube_dlc/extractor/generic.py23
-rw-r--r--youtube_dlc/extractor/googledrive.py8
-rw-r--r--youtube_dlc/extractor/medialaan.py307
-rw-r--r--youtube_dlc/extractor/pornhub.py173
-rw-r--r--youtube_dlc/extractor/svt.py4
-rw-r--r--youtube_dlc/extractor/tv2.py82
-rw-r--r--youtube_dlc/extractor/tv4.py6
-rw-r--r--youtube_dlc/extractor/vidio.py86
-rw-r--r--youtube_dlc/extractor/vlive.py2
-rw-r--r--youtube_dlc/extractor/vtm.py62
-rw-r--r--youtube_dlc/extractor/vvvvid.py35
-rw-r--r--youtube_dlc/extractor/zype.py15
24 files changed, 668 insertions, 439 deletions
diff --git a/youtube_dlc/extractor/abcnews.py b/youtube_dlc/extractor/abcnews.py
index 8b407bf9c..908c83377 100644
--- a/youtube_dlc/extractor/abcnews.py
+++ b/youtube_dlc/extractor/abcnews.py
@@ -1,14 +1,15 @@
# coding: utf-8
from __future__ import unicode_literals
-import calendar
import re
-import time
from .amp import AMPIE
from .common import InfoExtractor
-from .youtube import YoutubeIE
-from ..compat import compat_urlparse
+from ..utils import (
+ parse_duration,
+ parse_iso8601,
+ try_get,
+)
class AbcNewsVideoIE(AMPIE):
@@ -18,8 +19,8 @@ class AbcNewsVideoIE(AMPIE):
(?:
abcnews\.go\.com/
(?:
- [^/]+/video/(?P<display_id>[0-9a-z-]+)-|
- video/embed\?.*?\bid=
+ (?:[^/]+/)*video/(?P<display_id>[0-9a-z-]+)-|
+ video/(?:embed|itemfeed)\?.*?\bid=
)|
fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/
)
@@ -36,6 +37,8 @@ class AbcNewsVideoIE(AMPIE):
'description': 'George Stephanopoulos goes one-on-one with Iranian Foreign Minister Dr. Javad Zarif.',
'duration': 180,
'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1380454200,
+ 'upload_date': '20130929',
},
'params': {
# m3u8 download
@@ -47,6 +50,12 @@ class AbcNewsVideoIE(AMPIE):
}, {
'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478',
'only_matching': True,
+ }, {
+ 'url': 'http://abcnews.go.com/video/itemfeed?id=46979033',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://abcnews.go.com/GMA/News/video/history-christmas-story-67894761',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -67,28 +76,23 @@ class AbcNewsIE(InfoExtractor):
_VALID_URL = r'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)'
_TESTS = [{
- 'url': 'http://abcnews.go.com/Blotter/News/dramatic-video-rare-death-job-america/story?id=10498713#.UIhwosWHLjY',
+ # Youtube Embeds
+ 'url': 'https://abcnews.go.com/Entertainment/peter-billingsley-child-actor-christmas-story-hollywood-power/story?id=51286501',
'info_dict': {
- 'id': '10505354',
- 'ext': 'flv',
- 'display_id': 'dramatic-video-rare-death-job-america',
- 'title': 'Occupational Hazards',
- 'description': 'Nightline investigates the dangers that lurk at various jobs.',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'upload_date': '20100428',
- 'timestamp': 1272412800,
+ 'id': '51286501',
+ 'title': "Peter Billingsley: From child actor in 'A Christmas Story' to Hollywood power player",
+ 'description': 'Billingsley went from a child actor to Hollywood power player.',
},
- 'add_ie': ['AbcNewsVideo'],
+ 'playlist_count': 5,
}, {
'url': 'http://abcnews.go.com/Entertainment/justin-timberlake-performs-stop-feeling-eurovision-2016/story?id=39125818',
'info_dict': {
'id': '38897857',
'ext': 'mp4',
- 'display_id': 'justin-timberlake-performs-stop-feeling-eurovision-2016',
'title': 'Justin Timberlake Drops Hints For Secret Single',
'description': 'Lara Spencer reports the buzziest stories of the day in "GMA" Pop News.',
- 'upload_date': '20160515',
- 'timestamp': 1463329500,
+ 'upload_date': '20160505',
+ 'timestamp': 1462442280,
},
'params': {
# m3u8 download
@@ -100,49 +104,55 @@ class AbcNewsIE(InfoExtractor):
}, {
'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343',
'only_matching': True,
+ }, {
+ # inline.type == 'video'
+ 'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343',
+ 'only_matching': True,
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- display_id = mobj.group('display_id')
- video_id = mobj.group('id')
-
- webpage = self._download_webpage(url, video_id)
- video_url = self._search_regex(
- r'window\.abcnvideo\.url\s*=\s*"([^"]+)"', webpage, 'video URL')
- full_video_url = compat_urlparse.urljoin(url, video_url)
-
- youtube_url = YoutubeIE._extract_url(webpage)
-
- timestamp = None
- date_str = self._html_search_regex(
- r'<span[^>]+class="timestamp">([^<]+)</span>',
- webpage, 'timestamp', fatal=False)
- if date_str:
- tz_offset = 0
- if date_str.endswith(' ET'): # Eastern Time
- tz_offset = -5
- date_str = date_str[:-3]
- date_formats = ['%b. %d, %Y', '%b %d, %Y, %I:%M %p']
- for date_format in date_formats:
- try:
- timestamp = calendar.timegm(time.strptime(date_str.strip(), date_format))
- except ValueError:
- continue
- if timestamp is not None:
- timestamp -= tz_offset * 3600
-
- entry = {
- '_type': 'url_transparent',
- 'ie_key': AbcNewsVideoIE.ie_key(),
- 'url': full_video_url,
- 'id': video_id,
- 'display_id': display_id,
- 'timestamp': timestamp,
- }
-
- if youtube_url:
- entries = [entry, self.url_result(youtube_url, ie=YoutubeIE.ie_key())]
- return self.playlist_result(entries)
-
- return entry
+ story_id = self._match_id(url)
+ webpage = self._download_webpage(url, story_id)
+ story = self._parse_json(self._search_regex(
+ r"window\['__abcnews__'\]\s*=\s*({.+?});",
+ webpage, 'data'), story_id)['page']['content']['story']['everscroll'][0]
+ article_contents = story.get('articleContents') or {}
+
+ def entries():
+ featured_video = story.get('featuredVideo') or {}
+ feed = try_get(featured_video, lambda x: x['video']['feed'])
+ if feed:
+ yield {
+ '_type': 'url',
+ 'id': featured_video.get('id'),
+ 'title': featured_video.get('name'),
+ 'url': feed,
+ 'thumbnail': featured_video.get('images'),
+ 'description': featured_video.get('description'),
+ 'timestamp': parse_iso8601(featured_video.get('uploadDate')),
+ 'duration': parse_duration(featured_video.get('duration')),
+ 'ie_key': AbcNewsVideoIE.ie_key(),
+ }
+
+ for inline in (article_contents.get('inlines') or []):
+ inline_type = inline.get('type')
+ if inline_type == 'iframe':
+ iframe_url = try_get(inline, lambda x: x['attrs']['src'])
+ if iframe_url:
+ yield self.url_result(iframe_url)
+ elif inline_type == 'video':
+ video_id = inline.get('id')
+ if video_id:
+ yield {
+ '_type': 'url',
+ 'id': video_id,
+ 'url': 'http://abcnews.go.com/video/embed?id=' + video_id,
+ 'thumbnail': inline.get('imgSrc') or inline.get('imgDefault'),
+ 'description': inline.get('description'),
+ 'duration': parse_duration(inline.get('duration')),
+ 'ie_key': AbcNewsVideoIE.ie_key(),
+ }
+
+ return self.playlist_result(
+ entries(), story_id, article_contents.get('headline'),
+ article_contents.get('subHead'))
diff --git a/youtube_dlc/extractor/adn.py b/youtube_dlc/extractor/adn.py
index d611ee237..a55ebbcbd 100644
--- a/youtube_dlc/extractor/adn.py
+++ b/youtube_dlc/extractor/adn.py
@@ -26,6 +26,7 @@ from ..utils import (
strip_or_none,
try_get,
unified_strdate,
+ urlencode_postdata,
)
@@ -51,9 +52,12 @@ class ADNIE(InfoExtractor):
}
}
+ _NETRC_MACHINE = 'animedigitalnetwork'
_BASE_URL = 'http://animedigitalnetwork.fr'
_API_BASE_URL = 'https://gw.api.animedigitalnetwork.fr/'
_PLAYER_BASE_URL = _API_BASE_URL + 'player/'
+ _HEADERS = {}
+ _LOGIN_ERR_MESSAGE = 'Unable to log in'
_RSA_KEY = (0x9B42B08905199A5CCE2026274399CA560ECB209EE9878A708B1C0812E1BB8CB5D1FB7441861147C1A1F2F3A0476DD63A9CAC20D3E983613346850AA6CB38F16DC7D720FD7D86FC6E5B3D5BBC72E14CD0BF9E869F2CEA2CCAD648F1DCE38F1FF916CEFB2D339B64AA0264372344BC775E265E8A852F88144AB0BD9AA06C1A4ABB, 65537)
_POS_ALIGN_MAP = {
'start': 1,
@@ -129,19 +133,42 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
}])
return subtitles
+ def _real_initialize(self):
+ username, password = self._get_login_info()
+ if not username:
+ return
+ try:
+ access_token = (self._download_json(
+ self._API_BASE_URL + 'authentication/login', None,
+ 'Logging in', self._LOGIN_ERR_MESSAGE, fatal=False,
+ data=urlencode_postdata({
+ 'password': password,
+ 'rememberMe': False,
+ 'source': 'Web',
+ 'username': username,
+ })) or {}).get('accessToken')
+ if access_token:
+ self._HEADERS = {'authorization': 'Bearer ' + access_token}
+ except ExtractorError as e:
+ message = None
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ resp = self._parse_json(
+ e.cause.read().decode(), None, fatal=False) or {}
+ message = resp.get('message') or resp.get('code')
+ self.report_warning(message or self._LOGIN_ERR_MESSAGE)
+
def _real_extract(self, url):
video_id = self._match_id(url)
video_base_url = self._PLAYER_BASE_URL + 'video/%s/' % video_id
player = self._download_json(
video_base_url + 'configuration', video_id,
- 'Downloading player config JSON metadata')['player']
+ 'Downloading player config JSON metadata',
+ headers=self._HEADERS)['player']
options = player['options']
user = options['user']
if not user.get('hasAccess'):
- raise ExtractorError(
- 'This video is only available for paying users', expected=True)
- # self.raise_login_required() # FIXME: Login is not implemented
+ self.raise_login_required()
token = self._download_json(
user.get('refreshTokenUrl') or (self._PLAYER_BASE_URL + 'refresh/token'),
@@ -188,8 +215,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
message = error.get('message')
if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country':
self.raise_geo_restricted(msg=message)
- else:
- raise ExtractorError(message)
+ raise ExtractorError(message)
else:
raise ExtractorError('Giving up retrying')
diff --git a/youtube_dlc/extractor/aenetworks.py b/youtube_dlc/extractor/aenetworks.py
index a5d88ebbe..e55c03fd7 100644
--- a/youtube_dlc/extractor/aenetworks.py
+++ b/youtube_dlc/extractor/aenetworks.py
@@ -252,7 +252,7 @@ class AENetworksShowIE(AENetworksListBaseIE):
_TESTS = [{
'url': 'http://www.history.com/shows/ancient-aliens',
'info_dict': {
- 'id': 'SH012427480000',
+ 'id': 'SERIES1574',
'title': 'Ancient Aliens',
'description': 'md5:3f6d74daf2672ff3ae29ed732e37ea7f',
},
diff --git a/youtube_dlc/extractor/amp.py b/youtube_dlc/extractor/amp.py
index 7ff098cfa..24c684cad 100644
--- a/youtube_dlc/extractor/amp.py
+++ b/youtube_dlc/extractor/amp.py
@@ -8,6 +8,7 @@ from ..utils import (
int_or_none,
mimetype2ext,
parse_iso8601,
+ unified_timestamp,
url_or_none,
)
@@ -88,7 +89,7 @@ class AMPIE(InfoExtractor):
self._sort_formats(formats)
- timestamp = parse_iso8601(item.get('pubDate'), ' ') or parse_iso8601(item.get('dc-date'))
+ timestamp = unified_timestamp(item.get('pubDate'), ' ') or parse_iso8601(item.get('dc-date'))
return {
'id': video_id,
diff --git a/youtube_dlc/extractor/awaan.py b/youtube_dlc/extractor/awaan.py
index a2603bbff..3a7700cd4 100644
--- a/youtube_dlc/extractor/awaan.py
+++ b/youtube_dlc/extractor/awaan.py
@@ -48,6 +48,7 @@ class AWAANBaseIE(InfoExtractor):
'duration': int_or_none(video_data.get('duration')),
'timestamp': parse_iso8601(video_data.get('create_time'), ' '),
'is_live': is_live,
+ 'uploader_id': video_data.get('user_id'),
}
@@ -107,6 +108,7 @@ class AWAANLiveIE(AWAANBaseIE):
'title': 're:Dubai Al Oula [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'upload_date': '20150107',
'timestamp': 1420588800,
+ 'uploader_id': '71',
},
'params': {
# m3u8 download
diff --git a/youtube_dlc/extractor/azmedien.py b/youtube_dlc/extractor/azmedien.py
index b1e20def5..930266990 100644
--- a/youtube_dlc/extractor/azmedien.py
+++ b/youtube_dlc/extractor/azmedien.py
@@ -47,7 +47,7 @@ class AZMedienIE(InfoExtractor):
'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1',
'only_matching': True
}]
- _API_TEMPL = 'https://www.%s/api/pub/gql/%s/NewsArticleTeaser/cb9f2f81ed22e9b47f4ca64ea3cc5a5d13e88d1d'
+ _API_TEMPL = 'https://www.%s/api/pub/gql/%s/NewsArticleTeaser/a4016f65fe62b81dc6664dd9f4910e4ab40383be'
_PARTNER_ID = '1719221'
def _real_extract(self, url):
diff --git a/youtube_dlc/extractor/bleacherreport.py b/youtube_dlc/extractor/bleacherreport.py
index dc60224d0..d1bf8e829 100644
--- a/youtube_dlc/extractor/bleacherreport.py
+++ b/youtube_dlc/extractor/bleacherreport.py
@@ -90,13 +90,19 @@ class BleacherReportCMSIE(AMPIE):
_VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36}|\d{5})'
_TESTS = [{
'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1&library=video-cms',
- 'md5': '2e4b0a997f9228ffa31fada5c53d1ed1',
+ 'md5': '670b2d73f48549da032861130488c681',
'info_dict': {
'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
- 'ext': 'flv',
+ 'ext': 'mp4',
'title': 'Cena vs. Rollins Would Expose the Heavyweight Division',
'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e',
+ 'upload_date': '20150723',
+ 'timestamp': 1437679032,
+
},
+ 'expected_warnings': [
+ 'Unable to download f4m manifest'
+ ]
}]
def _real_extract(self, url):
diff --git a/youtube_dlc/extractor/bravotv.py b/youtube_dlc/extractor/bravotv.py
index b9715df00..bae2aedce 100644
--- a/youtube_dlc/extractor/bravotv.py
+++ b/youtube_dlc/extractor/bravotv.py
@@ -12,7 +12,7 @@ from ..utils import (
class BravoTVIE(AdobePassIE):
- _VALID_URL = r'https?://(?:www\.)?bravotv\.com/(?:[^/]+/)+(?P<id>[^/?#]+)'
+ _VALID_URL = r'https?://(?:www\.)?(?P<req_id>bravotv|oxygen)\.com/(?:[^/]+/)+(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://www.bravotv.com/top-chef/season-16/episode-15/videos/the-top-chef-season-16-winner-is',
'md5': 'e34684cfea2a96cd2ee1ef3a60909de9',
@@ -28,10 +28,13 @@ class BravoTVIE(AdobePassIE):
}, {
'url': 'http://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1',
'only_matching': True,
+ }, {
+ 'url': 'https://www.oxygen.com/in-ice-cold-blood/season-2/episode-16/videos/handling-the-horwitz-house-after-the-murder-season-2',
+ 'only_matching': True,
}]
def _real_extract(self, url):
- display_id = self._match_id(url)
+ site, display_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, display_id)
settings = self._parse_json(self._search_regex(
r'<script[^>]+data-drupal-selector="drupal-settings-json"[^>]*>({.+?})</script>', webpage, 'drupal settings'),
@@ -53,11 +56,14 @@ class BravoTVIE(AdobePassIE):
tp_path = release_pid = tve['release_pid']
if tve.get('entitlement') == 'auth':
adobe_pass = settings.get('tve_adobe_auth', {})
+ if site == 'bravotv':
+ site = 'bravo'
resource = self._get_mvpd_resource(
- adobe_pass.get('adobePassResourceId', 'bravo'),
+ adobe_pass.get('adobePassResourceId') or site,
tve['title'], release_pid, tve.get('rating'))
query['auth'] = self._extract_mvpd_auth(
- url, release_pid, adobe_pass.get('adobePassRequestorId', 'bravo'), resource)
+ url, release_pid,
+ adobe_pass.get('adobePassRequestorId') or site, resource)
else:
shared_playlist = settings['ls_playlist']
account_pid = shared_playlist['account_pid']
diff --git a/youtube_dlc/extractor/ccma.py b/youtube_dlc/extractor/ccma.py
index 544647f92..4db51e650 100644
--- a/youtube_dlc/extractor/ccma.py
+++ b/youtube_dlc/extractor/ccma.py
@@ -1,6 +1,7 @@
# coding: utf-8
from __future__ import unicode_literals
+import datetime
import re
from .common import InfoExtractor
@@ -8,8 +9,8 @@ from ..utils import (
clean_html,
int_or_none,
parse_duration,
- parse_iso8601,
parse_resolution,
+ try_get,
url_or_none,
)
@@ -24,8 +25,9 @@ class CCMAIE(InfoExtractor):
'ext': 'mp4',
'title': 'L\'espot de La Marató de TV3',
'description': 'md5:f12987f320e2f6e988e9908e4fe97765',
- 'timestamp': 1470918540,
- 'upload_date': '20160811',
+ 'timestamp': 1478608140,
+ 'upload_date': '20161108',
+ 'age_limit': 0,
}
}, {
'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/',
@@ -35,8 +37,24 @@ class CCMAIE(InfoExtractor):
'ext': 'mp3',
'title': 'El Consell de Savis analitza el derbi',
'description': 'md5:e2a3648145f3241cb9c6b4b624033e53',
- 'upload_date': '20171205',
- 'timestamp': 1512507300,
+ 'upload_date': '20170512',
+ 'timestamp': 1494622500,
+ 'vcodec': 'none',
+ 'categories': ['Esports'],
+ }
+ }, {
+ 'url': 'http://www.ccma.cat/tv3/alacarta/crims/crims-josep-tallada-lespereu-me-capitol-1/video/6031387/',
+ 'md5': 'b43c3d3486f430f3032b5b160d80cbc3',
+ 'info_dict': {
+ 'id': '6031387',
+ 'ext': 'mp4',
+ 'title': 'Crims - Josep Talleda, l\'"Espereu-me" (capítol 1)',
+ 'description': 'md5:7cbdafb640da9d0d2c0f62bad1e74e60',
+ 'timestamp': 1582577700,
+ 'upload_date': '20200224',
+ 'subtitles': 'mincount:4',
+ 'age_limit': 16,
+ 'series': 'Crims',
}
}]
@@ -72,17 +90,27 @@ class CCMAIE(InfoExtractor):
informacio = media['informacio']
title = informacio['titol']
- durada = informacio.get('durada', {})
+ durada = informacio.get('durada') or {}
duration = int_or_none(durada.get('milisegons'), 1000) or parse_duration(durada.get('text'))
- timestamp = parse_iso8601(informacio.get('data_emissio', {}).get('utc'))
+ tematica = try_get(informacio, lambda x: x['tematica']['text'])
+
+ timestamp = None
+ data_utc = try_get(informacio, lambda x: x['data_emissio']['utc'])
+ try:
+ timestamp = datetime.datetime.strptime(
+ data_utc, '%Y-%d-%mT%H:%M:%S%z').timestamp()
+ except TypeError:
+ pass
subtitles = {}
- subtitols = media.get('subtitols', {})
- if subtitols:
- sub_url = subtitols.get('url')
+ subtitols = media.get('subtitols') or []
+ if isinstance(subtitols, dict):
+ subtitols = [subtitols]
+ for st in subtitols:
+ sub_url = st.get('url')
if sub_url:
subtitles.setdefault(
- subtitols.get('iso') or subtitols.get('text') or 'ca', []).append({
+ st.get('iso') or st.get('text') or 'ca', []).append({
'url': sub_url,
})
@@ -97,6 +125,16 @@ class CCMAIE(InfoExtractor):
'height': int_or_none(imatges.get('alcada')),
}]
+ age_limit = None
+ codi_etic = try_get(informacio, lambda x: x['codi_etic']['id'])
+ if codi_etic:
+ codi_etic_s = codi_etic.split('_')
+ if len(codi_etic_s) == 2:
+ if codi_etic_s[1] == 'TP':
+ age_limit = 0
+ else:
+ age_limit = int_or_none(codi_etic_s[1])
+
return {
'id': media_id,
'title': title,
@@ -106,4 +144,9 @@ class CCMAIE(InfoExtractor):
'thumbnails': thumbnails,
'subtitles': subtitles,
'formats': formats,
+ 'age_limit': age_limit,
+ 'alt_title': informacio.get('titol_complet'),
+ 'episode_number': int_or_none(informacio.get('capitol')),
+ 'categories': [tematica] if tematica else None,
+ 'series': informacio.get('programa'),
}
diff --git a/youtube_dlc/extractor/cda.py b/youtube_dlc/extractor/cda.py
index d67900e62..6429454fb 100644
--- a/youtube_dlc/extractor/cda.py
+++ b/youtube_dlc/extractor/cda.py
@@ -96,7 +96,7 @@ class CDAIE(InfoExtractor):
raise ExtractorError('This video is only available for premium users.', expected=True)
need_confirm_age = False
- if self._html_search_regex(r'(<form[^>]+action="/a/validatebirth")',
+ if self._html_search_regex(r'(<form[^>]+action="[^"]*/a/validatebirth[^"]*")',
webpage, 'birthday validate form', default=None):
webpage = self._download_age_confirm_page(
url, video_id, note='Confirming age')
diff --git a/youtube_dlc/extractor/egghead.py b/youtube_dlc/extractor/egghead.py
index df11dc206..aff9b88c0 100644
--- a/youtube_dlc/extractor/egghead.py
+++ b/youtube_dlc/extractor/egghead.py
@@ -12,7 +12,14 @@ from ..utils import (
)
-class EggheadCourseIE(InfoExtractor):
+class EggheadBaseIE(InfoExtractor):
+ def _call_api(self, path, video_id, resource, fatal=True):
+ return self._download_json(
+ 'https://app.egghead.io/api/v1/' + path,
+ video_id, 'Downloading %s JSON' % resource, fatal=fatal)
+
+
+class EggheadCourseIE(EggheadBaseIE):
IE_DESC = 'egghead.io course'
IE_NAME = 'egghead:course'
_VALID_URL = r'https://egghead\.io/courses/(?P<id>[^/?#&]+)'
@@ -28,10 +35,9 @@ class EggheadCourseIE(InfoExtractor):
def _real_extract(self, url):
playlist_id = self._match_id(url)
-
- lessons = self._download_json(
- 'https://egghead.io/api/v1/series/%s/lessons' % playlist_id,
- playlist_id, 'Downloading course lessons JSON')
+ series_path = 'series/' + playlist_id
+ lessons = self._call_api(
+ series_path + '/lessons', playlist_id, 'course lessons')
entries = []
for lesson in lessons:
@@ -44,9 +50,8 @@ class EggheadCourseIE(InfoExtractor):
entries.append(self.url_result(
lesson_url, ie=EggheadLessonIE.ie_key(), video_id=lesson_id))
- course = self._download_json(
- 'https://egghead.io/api/v1/series/%s' % playlist_id,
- playlist_id, 'Downloading course JSON', fatal=False) or {}
+ course = self._call_api(
+ series_path, playlist_id, 'course', False) or {}
playlist_id = course.get('id')
if playlist_id:
@@ -57,7 +62,7 @@ class EggheadCourseIE(InfoExtractor):
course.get('description'))
-class EggheadLessonIE(InfoExtractor):
+class EggheadLessonIE(EggheadBaseIE):
IE_DESC = 'egghead.io lesson'
IE_NAME = 'egghead:lesson'
_VALID_URL = r'https://egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)'
@@ -74,7 +79,7 @@ class EggheadLessonIE(InfoExtractor):
'upload_date': '20161209',
'duration': 304,
'view_count': 0,
- 'tags': ['javascript', 'free'],
+ 'tags': 'count:2',
},
'params': {
'skip_download': True,
@@ -88,8 +93,8 @@ class EggheadLessonIE(InfoExtractor):
def _real_extract(self, url):
display_id = self._match_id(url)
- lesson = self._download_json(
- 'https://egghead.io/api/v1/lessons/%s' % display_id, display_id)
+ lesson = self._call_api(
+ 'lessons/' + display_id, display_id, 'lesson')
lesson_id = compat_str(lesson['id'])
title = lesson['title']
diff --git a/youtube_dlc/extractor/extractors.py b/youtube_dlc/extractor/extractors.py
index d9e48680e..752e7bee5 100644
--- a/youtube_dlc/extractor/extractors.py
+++ b/youtube_dlc/extractor/extractors.py
@@ -1308,6 +1308,7 @@ from .tv2 import (
TV2IE,
TV2ArticleIE,
KatsomoIE,
+ MTVUutisetArticleIE,
)
from .tv2dk import (
TV2DKIE,
@@ -1448,7 +1449,6 @@ from .vidme import (
VidmeUserIE,
VidmeUserLikesIE,
)
-from .vidzi import VidziIE
from .vier import VierIE, VierVideosIE
from .viewlift import (
ViewLiftIE,
@@ -1508,6 +1508,7 @@ from .vrv import (
VRVSeriesIE,
)
from .vshare import VShareIE
+from .vtm import VTMIE
from .medialaan import MedialaanIE
from .vube import VubeIE
from .vuclip import VuClipIE
diff --git a/youtube_dlc/extractor/generic.py b/youtube_dlc/extractor/generic.py
index 6b4c84261..d5d8ed94b 100644
--- a/youtube_dlc/extractor/generic.py
+++ b/youtube_dlc/extractor/generic.py
@@ -131,6 +131,7 @@ from .gedi import GediEmbedsIE
from .rcs import RCSEmbedsIE
from .bitchute import BitChuteIE
from .arcpublishing import ArcPublishingIE
+from .medialaan import MedialaanIE
class GenericIE(InfoExtractor):
@@ -2224,6 +2225,20 @@ class GenericIE(InfoExtractor):
'duration': 1581,
},
},
+ {
+ # MyChannels SDK embed
+ # https://www.24kitchen.nl/populair/deskundige-dit-waarom-sommigen-gevoelig-zijn-voor-voedselallergieen
+ 'url': 'https://www.demorgen.be/nieuws/burgemeester-rotterdam-richt-zich-in-videoboodschap-tot-relschoppers-voelt-het-goed~b0bcfd741/',
+ 'md5': '90c0699c37006ef18e198c032d81739c',
+ 'info_dict': {
+ 'id': '194165',
+ 'ext': 'mp4',
+ 'title': 'Burgemeester Aboutaleb spreekt relschoppers toe',
+ 'timestamp': 1611740340,
+ 'upload_date': '20210127',
+ 'duration': 159,
+ },
+ },
]
def report_following_redirect(self, new_url):
@@ -2463,6 +2478,9 @@ class GenericIE(InfoExtractor):
webpage = self._webpage_read_content(
full_response, url, video_id, prefix=first_bytes)
+ if '<title>DPG Media Privacy Gate</title>' in webpage:
+ webpage = self._download_webpage(url, video_id)
+
self.report_extraction(video_id)
# Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
@@ -2594,6 +2612,11 @@ class GenericIE(InfoExtractor):
if arc_urls:
return self.playlist_from_matches(arc_urls, video_id, video_title, ie=ArcPublishingIE.ie_key())
+ mychannels_urls = MedialaanIE._extract_urls(webpage)
+ if mychannels_urls:
+ return self.playlist_from_matches(
+ mychannels_urls, video_id, video_title, ie=MedialaanIE.ie_key())
+
# Look for embedded rtl.nl player
matches = re.findall(
r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"',
diff --git a/youtube_dlc/extractor/googledrive.py b/youtube_dlc/extractor/googledrive.py
index fdb15795a..4eefcb70c 100644
--- a/youtube_dlc/extractor/googledrive.py
+++ b/youtube_dlc/extractor/googledrive.py
@@ -7,6 +7,7 @@ from ..compat import compat_parse_qs
from ..utils import (
determine_ext,
ExtractorError,
+ get_element_by_class,
int_or_none,
lowercase_escape,
try_get,
@@ -237,7 +238,7 @@ class GoogleDriveIE(InfoExtractor):
if confirmation_webpage:
confirm = self._search_regex(
r'confirm=([^&"\']+)', confirmation_webpage,
- 'confirmation code', fatal=False)
+ 'confirmation code', default=None)
if confirm:
confirmed_source_url = update_url_query(source_url, {
'confirm': confirm,
@@ -245,6 +246,11 @@ class GoogleDriveIE(InfoExtractor):
urlh = request_source_file(confirmed_source_url, 'confirmed source')
if urlh and urlh.headers.get('Content-Disposition'):
add_source_format(urlh)
+ else:
+ self.report_warning(
+ get_element_by_class('uc-error-subcaption', confirmation_webpage)
+ or get_element_by_class('uc-error-caption', confirmation_webpage)
+ or 'unable to extract confirmation code')
if not formats and reason:
raise ExtractorError(reason, expected=True)
diff --git a/youtube_dlc/extractor/medialaan.py b/youtube_dlc/extractor/medialaan.py
index 50d5db802..788acf7fb 100644
--- a/youtube_dlc/extractor/medialaan.py
+++ b/youtube_dlc/extractor/medialaan.py
@@ -2,268 +2,113 @@ from __future__ import unicode_literals
import re
-from .gigya import GigyaBaseIE
-
-from ..compat import compat_str
+from .common import InfoExtractor
from ..utils import (
+ extract_attributes,
int_or_none,
- parse_duration,
- try_get,
- unified_timestamp,
+ mimetype2ext,
+ parse_iso8601,
)
-class MedialaanIE(GigyaBaseIE):
+class MedialaanIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://
- (?:www\.|nieuws\.)?
(?:
- (?P<site_id>vtm|q2|vtmkzoom)\.be/
- (?:
- video(?:/[^/]+/id/|/?\?.*?\baid=)|
- (?:[^/]+/)*
- )
+ (?:embed\.)?mychannels.video/embed/|
+ embed\.mychannels\.video/(?:s(?:dk|cript)/)?production/|
+ (?:www\.)?(?:
+ (?:
+ 7sur7|
+ demorgen|
+ hln|
+ joe|
+ qmusic
+ )\.be|
+ (?:
+ [abe]d|
+ bndestem|
+ destentor|
+ gelderlander|
+ pzc|
+ tubantia|
+ volkskrant
+ )\.nl
+ )/video/(?:[^/]+/)*[^/?&#]+~p
)
- (?P<id>[^/?#&]+)
+ (?P<id>\d+)
'''
- _NETRC_MACHINE = 'medialaan'
- _APIKEY = '3_HZ0FtkMW_gOyKlqQzW5_0FHRC7Nd5XpXJZcDdXY4pk5eES2ZWmejRW5egwVm4ug-'
- _SITE_TO_APP_ID = {
- 'vtm': 'vtm_watch',
- 'q2': 'q2',
- 'vtmkzoom': 'vtmkzoom',
- }
_TESTS = [{
- # vod
- 'url': 'http://vtm.be/video/volledige-afleveringen/id/vtm_20170219_VM0678361_vtmwatch',
+ 'url': 'https://www.bndestem.nl/video/de-terugkeer-van-ally-de-aap-en-wie-vertrekt-er-nog-bij-nac~p193993',
'info_dict': {
- 'id': 'vtm_20170219_VM0678361_vtmwatch',
+ 'id': '193993',
'ext': 'mp4',
- 'title': 'Allemaal Chris afl. 6',
- 'description': 'md5:4be86427521e7b07e0adb0c9c554ddb2',
- 'timestamp': 1487533280,
- 'upload_date': '20170219',
- 'duration': 2562,
- 'series': 'Allemaal Chris',
- 'season': 'Allemaal Chris',
- 'season_number': 1,
- 'season_id': '256936078124527',
- 'episode': 'Allemaal Chris afl. 6',
- 'episode_number': 6,
- 'episode_id': '256936078591527',
+ 'title': 'De terugkeer van Ally de Aap en wie vertrekt er nog bij NAC?',
+ 'timestamp': 1611663540,
+ 'upload_date': '20210126',
+ 'duration': 238,
},
'params': {
'skip_download': True,
},
- 'skip': 'Requires account credentials',
- }, {
- # clip
- 'url': 'http://vtm.be/video?aid=168332',
- 'info_dict': {
- 'id': '168332',
- 'ext': 'mp4',
- 'title': '"Veronique liegt!"',
- 'description': 'md5:1385e2b743923afe54ba4adc38476155',
- 'timestamp': 1489002029,
- 'upload_date': '20170308',
- 'duration': 96,
- },
}, {
- # vod
- 'url': 'http://vtm.be/video/volledige-afleveringen/id/257107153551000',
+ 'url': 'https://www.gelderlander.nl/video/kanalen/degelderlander~c320/series/snel-nieuws~s984/noodbevel-in-doetinchem-politie-stuurt-mensen-centrum-uit~p194093',
'only_matching': True,
}, {
- # vod
- 'url': 'http://vtm.be/video?aid=163157',
+ 'url': 'https://embed.mychannels.video/sdk/production/193993?options=TFTFF_default',
'only_matching': True,
}, {
- # vod
- 'url': 'http://www.q2.be/video/volledige-afleveringen/id/2be_20170301_VM0684442_q2',
+ 'url': 'https://embed.mychannels.video/script/production/193993',
'only_matching': True,
}, {
- # clip
- 'url': 'http://vtmkzoom.be/k3-dansstudio/een-nieuw-seizoen-van-k3-dansstudio',
+ 'url': 'https://embed.mychannels.video/production/193993',
'only_matching': True,
}, {
- # http/s redirect
- 'url': 'https://vtmkzoom.be/video?aid=45724',
- 'info_dict': {
- 'id': '257136373657000',
- 'ext': 'mp4',
- 'title': 'K3 Dansstudio Ushuaia afl.6',
- },
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'Requires account credentials',
+ 'url': 'https://mychannels.video/embed/193993',
+ 'only_matching': True,
}, {
- # nieuws.vtm.be
- 'url': 'https://nieuws.vtm.be/stadion/stadion/genk-nog-moeilijk-programma',
+ 'url': 'https://embed.mychannels.video/embed/193993',
'only_matching': True,
}]
- def _real_initialize(self):
- self._logged_in = False
-
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- self.raise_login_required()
-
- auth_data = {
- 'APIKey': self._APIKEY,
- 'sdk': 'js_6.1',
- 'format': 'json',
- 'loginID': username,
- 'password': password,
- }
-
- auth_info = self._gigya_login(auth_data)
-
- self._uid = auth_info['UID']
- self._uid_signature = auth_info['UIDSignature']
- self._signature_timestamp = auth_info['signatureTimestamp']
-
- self._logged_in = True
+ @staticmethod
+ def _extract_urls(webpage):
+ entries = []
+ for element in re.findall(r'(<div[^>]+data-mychannels-type="video"[^>]*>)', webpage):
+ mychannels_id = extract_attributes(element).get('data-mychannels-id')
+ if mychannels_id:
+ entries.append('https://mychannels.video/embed/' + mychannels_id)
+ return entries
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id, site_id = mobj.group('id', 'site_id')
-
- webpage = self._download_webpage(url, video_id)
-
- config = self._parse_json(
- self._search_regex(
- r'videoJSConfig\s*=\s*JSON\.parse\(\'({.+?})\'\);',
- webpage, 'config', default='{}'), video_id,
- transform_source=lambda s: s.replace(
- '\\\\', '\\').replace(r'\"', '"').replace(r"\'", "'"))
-
- vod_id = config.get('vodId') or self._search_regex(
- (r'\\"vodId\\"\s*:\s*\\"(.+?)\\"',
- r'"vodId"\s*:\s*"(.+?)"',
- r'<[^>]+id=["\']vod-(\d+)'),
- webpage, 'video_id', default=None)
-
- # clip, no authentication required
- if not vod_id:
- player = self._parse_json(
- self._search_regex(
- r'vmmaplayer\(({.+?})\);', webpage, 'vmma player',
- default=''),
- video_id, transform_source=lambda s: '[%s]' % s, fatal=False)
- if player:
- video = player[-1]
- if video['videoUrl'] in ('http', 'https'):
- return self.url_result(video['url'], MedialaanIE.ie_key())
- info = {
- 'id': video_id,
- 'url': video['videoUrl'],
- 'title': video['title'],
- 'thumbnail': video.get('imageUrl'),
- 'timestamp': int_or_none(video.get('createdDate')),
- 'duration': int_or_none(video.get('duration')),
- }
+ production_id = self._match_id(url)
+ production = self._download_json(
+ 'https://embed.mychannels.video/sdk/production/' + production_id,
+ production_id, query={'options': 'UUUU_default'})['productions'][0]
+ title = production['title']
+
+ formats = []
+ for source in (production.get('sources') or []):
+ src = source.get('src')
+ if not src:
+ continue
+ ext = mimetype2ext(source.get('type'))
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ src, production_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
else:
- info = self._parse_html5_media_entries(
- url, webpage, video_id, m3u8_id='hls')[0]
- info.update({
- 'id': video_id,
- 'title': self._html_search_meta('description', webpage),
- 'duration': parse_duration(self._html_search_meta('duration', webpage)),
- })
- # vod, authentication required
- else:
- if not self._logged_in:
- self._login()
-
- settings = self._parse_json(
- self._search_regex(
- r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
- webpage, 'drupal settings', default='{}'),
- video_id)
-
- def get(container, item):
- return try_get(
- settings, lambda x: x[container][item],
- compat_str) or self._search_regex(
- r'"%s"\s*:\s*"([^"]+)' % item, webpage, item,
- default=None)
-
- app_id = get('vod', 'app_id') or self._SITE_TO_APP_ID.get(site_id, 'vtm_watch')
- sso = get('vod', 'gigyaDatabase') or 'vtm-sso'
-
- data = self._download_json(
- 'http://vod.medialaan.io/api/1.0/item/%s/video' % vod_id,
- video_id, query={
- 'app_id': app_id,
- 'user_network': sso,
- 'UID': self._uid,
- 'UIDSignature': self._uid_signature,
- 'signatureTimestamp': self._signature_timestamp,
+ formats.append({
+ 'ext': ext,
+ 'url': src,
})
-
- formats = self._extract_m3u8_formats(
- data['response']['uri'], video_id, entry_protocol='m3u8_native',
- ext='mp4', m3u8_id='hls')
-
- self._sort_formats(formats)
-
- info = {
- 'id': vod_id,
- 'formats': formats,
- }
-
- api_key = get('vod', 'apiKey')
- channel = get('medialaanGigya', 'channel')
-
- if api_key:
- videos = self._download_json(
- 'http://vod.medialaan.io/vod/v2/videos', video_id, fatal=False,
- query={
- 'channels': channel,
- 'ids': vod_id,
- 'limit': 1,
- 'apikey': api_key,
- })
- if videos:
- video = try_get(
- videos, lambda x: x['response']['videos'][0], dict)
- if video:
- def get(container, item, expected_type=None):
- return try_get(
- video, lambda x: x[container][item], expected_type)
-
- def get_string(container, item):
- return get(container, item, compat_str)
-
- info.update({
- 'series': get_string('program', 'title'),
- 'season': get_string('season', 'title'),
- 'season_number': int_or_none(get('season', 'number')),
- 'season_id': get_string('season', 'id'),
- 'episode': get_string('episode', 'title'),
- 'episode_number': int_or_none(get('episode', 'number')),
- 'episode_id': get_string('episode', 'id'),
- 'duration': int_or_none(
- video.get('duration')) or int_or_none(
- video.get('durationMillis'), scale=1000),
- 'title': get_string('episode', 'title'),
- 'description': get_string('episode', 'text'),
- 'timestamp': unified_timestamp(get_string(
- 'publication', 'begin')),
- })
-
- if not info.get('title'):
- info['title'] = try_get(
- config, lambda x: x['videoConfig']['title'],
- compat_str) or self._html_search_regex(
- r'\\"title\\"\s*:\s*\\"(.+?)\\"', webpage, 'title',
- default=None) or self._og_search_title(webpage)
-
- if not info.get('description'):
- info['description'] = self._html_search_regex(
- r'<div[^>]+class="field-item\s+even">\s*<p>(.+?)</p>',
- webpage, 'description', default=None)
-
- return info
+ self._sort_formats(formats)
+
+ return {
+ 'id': production_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': production.get('posterUrl'),
+ 'timestamp': parse_iso8601(production.get('publicationDate'), ' '),
+ 'duration': int_or_none(production.get('duration')) or None,
+ }
diff --git a/youtube_dlc/extractor/pornhub.py b/youtube_dlc/extractor/pornhub.py
index 2fcbd186f..b7631e4e1 100644
--- a/youtube_dlc/extractor/pornhub.py
+++ b/youtube_dlc/extractor/pornhub.py
@@ -22,11 +22,15 @@ from ..utils import (
orderedSet,
remove_quotes,
str_to_int,
+ update_url_query,
+ urlencode_postdata,
url_or_none,
)
class PornHubBaseIE(InfoExtractor):
+ _NETRC_MACHINE = 'pornhub'
+
def _download_webpage_handle(self, *args, **kwargs):
def dl(*args, **kwargs):
return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs)
@@ -52,6 +56,66 @@ class PornHubBaseIE(InfoExtractor):
return webpage, urlh
+ def _real_initialize(self):
+ self._logged_in = False
+
+ def _login(self, host):
+ if self._logged_in:
+ return
+
+ site = host.split('.')[0]
+
+ # Both sites pornhub and pornhubpremium have separate accounts
+ # so there should be an option to provide credentials for both.
+ # At the same time some videos are available under the same video id
+ # on both sites so that we have to identify them as the same video.
+ # For that purpose we have to keep both in the same extractor
+ # but under different netrc machines.
+ username, password = self._get_login_info(netrc_machine=site)
+ if username is None:
+ return
+
+ login_url = 'https://www.%s/%slogin' % (host, 'premium/' if 'premium' in host else '')
+ login_page = self._download_webpage(
+ login_url, None, 'Downloading %s login page' % site)
+
+ def is_logged(webpage):
+ return any(re.search(p, webpage) for p in (
+ r'class=["\']signOut',
+ r'>Sign\s+[Oo]ut\s*<'))
+
+ if is_logged(login_page):
+ self._logged_in = True
+ return
+
+ login_form = self._hidden_inputs(login_page)
+
+ login_form.update({
+ 'username': username,
+ 'password': password,
+ })
+
+ response = self._download_json(
+ 'https://www.%s/front/authenticate' % host, None,
+ 'Logging in to %s' % site,
+ data=urlencode_postdata(login_form),
+ headers={
+ 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
+ 'Referer': login_url,
+ 'X-Requested-With': 'XMLHttpRequest',
+ })
+
+ if response.get('success') == '1':
+ self._logged_in = True
+ return
+
+ message = response.get('message')
+ if message is not None:
+ raise ExtractorError(
+ 'Unable to login: %s' % message, expected=True)
+
+ raise ExtractorError('Unable to log in')
+
class PornHubIE(PornHubBaseIE):
IE_DESC = 'PornHub and Thumbzilla'
@@ -163,12 +227,20 @@ class PornHubIE(PornHubBaseIE):
}, {
'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5e4acdae54a82',
'only_matching': True,
+ }, {
+ # Some videos are available with the same id on both premium
+ # and non-premium sites (e.g. this and the following test)
+ 'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5f75b0f4b18e3',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5f75b0f4b18e3',
+ 'only_matching': True,
}]
@staticmethod
def _extract_urls(webpage):
return re.findall(
- r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.(?:com|net|org)/embed/[\da-z]+)',
+ r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub(?:premium)?\.(?:com|net|org)/embed/[\da-z]+)',
webpage)
def _extract_count(self, pattern, webpage, name):
@@ -180,12 +252,7 @@ class PornHubIE(PornHubBaseIE):
host = mobj.group('host') or 'pornhub.com'
video_id = mobj.group('id')
- if 'premium' in host:
- if not self._downloader.params.get('cookiefile'):
- raise ExtractorError(
- 'PornHub Premium requires authentication.'
- ' You may want to use --cookies.',
- expected=True)
+ self._login(host)
self._set_cookie(host, 'age_verified', '1')
@@ -405,6 +472,10 @@ class PornHubIE(PornHubBaseIE):
class PornHubPlaylistBaseIE(PornHubBaseIE):
+ def _extract_page(self, url):
+ return int_or_none(self._search_regex(
+ r'\bpage=(\d+)', url, 'page', default=None))
+
def _extract_entries(self, webpage, host):
# Only process container div with main playlist content skipping
# drop-down menu that uses similar pattern for videos (see
@@ -422,26 +493,6 @@ class PornHubPlaylistBaseIE(PornHubBaseIE):
container))
]
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- host = mobj.group('host')
- playlist_id = mobj.group('id')
-
- webpage = self._download_webpage(url, playlist_id)
-
- entries = self._extract_entries(webpage, host)
-
- playlist = self._parse_json(
- self._search_regex(
- r'(?:playlistObject|PLAYLIST_VIEW)\s*=\s*({.+?});', webpage,
- 'playlist', default='{}'),
- playlist_id, fatal=False)
- title = playlist.get('title') or self._search_regex(
- r'>Videos\s+in\s+(.+?)\s+[Pp]laylist<', webpage, 'title', fatal=False)
-
- return self.playlist_result(
- entries, playlist_id, title, playlist.get('description'))
-
class PornHubUserIE(PornHubPlaylistBaseIE):
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)'
@@ -463,14 +514,27 @@ class PornHubUserIE(PornHubPlaylistBaseIE):
}, {
'url': 'https://www.pornhub.com/model/zoe_ph?abc=1',
'only_matching': True,
+ }, {
+ # Unavailable via /videos page, but available with direct pagination
+ # on pornstar page (see [1]), requires premium
+ # 1. https://github.com/ytdl-org/youtube-dl/issues/27853
+ 'url': 'https://www.pornhubpremium.com/pornstar/sienna-west',
+ 'only_matching': True,
+ }, {
+ # Same as before, multi page
+ 'url': 'https://www.pornhubpremium.com/pornstar/lily-labeau',
+ 'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
user_id = mobj.group('id')
+ videos_url = '%s/videos' % mobj.group('url')
+ page = self._extract_page(url)
+ if page:
+ videos_url = update_url_query(videos_url, {'page': page})
return self.url_result(
- '%s/videos' % mobj.group('url'), ie=PornHubPagedVideoListIE.ie_key(),
- video_id=user_id)
+ videos_url, ie=PornHubPagedVideoListIE.ie_key(), video_id=user_id)
class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
@@ -483,32 +547,55 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
<button[^>]+\bid=["\']moreDataBtn
''', webpage) is not None
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- host = mobj.group('host')
- item_id = mobj.group('id')
+ def _entries(self, url, host, item_id):
+ page = self._extract_page(url)
- page = int_or_none(self._search_regex(
- r'\bpage=(\d+)', url, 'page', default=None))
+ VIDEOS = '/videos'
+
+ def download_page(base_url, num, fallback=False):
+ note = 'Downloading page %d%s' % (num, ' (switch to fallback)' if fallback else '')
+ return self._download_webpage(
+ base_url, item_id, note, query={'page': num})
- entries = []
- for page_num in (page, ) if page is not None else itertools.count(1):
+ def is_404(e):
+ return isinstance(e.cause, compat_HTTPError) and e.cause.code == 404
+
+ base_url = url
+ has_page = page is not None
+ first_page = page if has_page else 1
+ for page_num in (first_page, ) if has_page else itertools.count(first_page):
try:
- webpage = self._download_webpage(
- url, item_id, 'Downloading page %d' % page_num,
- query={'page': page_num})
+ try:
+ webpage = download_page(base_url, page_num)
+ except ExtractorError as e:
+ # Some sources may not be available via /videos page,
+ # trying to fallback to main page pagination (see [1])
+ # 1. https://github.com/ytdl-org/youtube-dl/issues/27853
+ if is_404(e) and page_num == first_page and VIDEOS in base_url:
+ base_url = base_url.replace(VIDEOS, '')
+ webpage = download_page(base_url, page_num, fallback=True)
+ else:
+ raise
except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
+ if is_404(e) and page_num != first_page:
break
raise
page_entries = self._extract_entries(webpage, host)
if not page_entries:
break
- entries.extend(page_entries)
+ for e in page_entries:
+ yield e
if not self._has_more(webpage):
break
- return self.playlist_result(orderedSet(entries), item_id)
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ host = mobj.group('host')
+ item_id = mobj.group('id')
+
+ self._login(host)
+
+ return self.playlist_result(self._entries(url, host, item_id), item_id)
class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
diff --git a/youtube_dlc/extractor/svt.py b/youtube_dlc/extractor/svt.py
index a0b6ef4db..4acc29fce 100644
--- a/youtube_dlc/extractor/svt.py
+++ b/youtube_dlc/extractor/svt.py
@@ -255,8 +255,10 @@ class SVTPlayIE(SVTPlayBaseIE):
svt_id = self._search_regex(
(r'<video[^>]+data-video-id=["\']([\da-zA-Z-]+)',
r'["\']videoSvtId["\']\s*:\s*["\']([\da-zA-Z-]+)',
+ r'["\']videoSvtId\\?["\']\s*:\s*\\?["\']([\da-zA-Z-]+)',
r'"content"\s*:\s*{.*?"id"\s*:\s*"([\da-zA-Z-]+)"',
- r'["\']svtId["\']\s*:\s*["\']([\da-zA-Z-]+)'),
+ r'["\']svtId["\']\s*:\s*["\']([\da-zA-Z-]+)',
+ r'["\']svtId\\?["\']\s*:\s*\\?["\']([\da-zA-Z-]+)'),
webpage, 'video id')
info_dict = self._extract_by_video_id(svt_id, webpage)
diff --git a/youtube_dlc/extractor/tv2.py b/youtube_dlc/extractor/tv2.py
index 4a19b9be6..334b7d540 100644
--- a/youtube_dlc/extractor/tv2.py
+++ b/youtube_dlc/extractor/tv2.py
@@ -20,7 +20,7 @@ from ..utils import (
class TV2IE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tv2\.no/v/(?P<id>\d+)'
- _TEST = {
+ _TESTS = [{
'url': 'http://www.tv2.no/v/916509/',
'info_dict': {
'id': '916509',
@@ -33,7 +33,7 @@ class TV2IE(InfoExtractor):
'view_count': int,
'categories': list,
},
- }
+ }]
_API_DOMAIN = 'sumo.tv2.no'
_PROTOCOLS = ('HDS', 'HLS', 'DASH')
_GEO_COUNTRIES = ['NO']
@@ -42,6 +42,12 @@ class TV2IE(InfoExtractor):
video_id = self._match_id(url)
api_base = 'http://%s/api/web/asset/%s' % (self._API_DOMAIN, video_id)
+ asset = self._download_json(
+ api_base + '.json', video_id,
+ 'Downloading metadata JSON')['asset']
+ title = asset.get('subtitle') or asset['title']
+ is_live = asset.get('live') is True
+
formats = []
format_urls = []
for protocol in self._PROTOCOLS:
@@ -81,7 +87,8 @@ class TV2IE(InfoExtractor):
elif ext == 'm3u8':
if not data.get('drmProtected'):
formats.extend(self._extract_m3u8_formats(
- video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ video_url, video_id, 'mp4',
+ 'm3u8' if is_live else 'm3u8_native',
m3u8_id=format_id, fatal=False))
elif ext == 'mpd':
formats.extend(self._extract_mpd_formats(
@@ -99,11 +106,6 @@ class TV2IE(InfoExtractor):
raise ExtractorError('This video is DRM protected.', expected=True)
self._sort_formats(formats)
- asset = self._download_json(
- api_base + '.json', video_id,
- 'Downloading metadata JSON')['asset']
- title = asset['title']
-
thumbnails = [{
'id': thumbnail.get('@type'),
'url': thumbnail.get('url'),
@@ -112,7 +114,7 @@ class TV2IE(InfoExtractor):
return {
'id': video_id,
'url': video_url,
- 'title': title,
+ 'title': self._live_title(title) if is_live else title,
'description': strip_or_none(asset.get('description')),
'thumbnails': thumbnails,
'timestamp': parse_iso8601(asset.get('createTime')),
@@ -120,6 +122,7 @@ class TV2IE(InfoExtractor):
'view_count': int_or_none(asset.get('views')),
'categories': asset.get('keywords', '').split(','),
'formats': formats,
+ 'is_live': is_live,
}
@@ -168,13 +171,13 @@ class TV2ArticleIE(InfoExtractor):
class KatsomoIE(TV2IE):
- _VALID_URL = r'https?://(?:www\.)?(?:katsomo|mtv)\.fi/(?:#!/)?(?:[^/]+/[0-9a-z-]+-\d+/[0-9a-z-]+-|[^/]+/\d+/[^/]+/)(?P<id>\d+)'
- _TEST = {
+ _VALID_URL = r'https?://(?:www\.)?(?:katsomo|mtv(uutiset)?)\.fi/(?:sarja/[0-9a-z-]+-\d+/[0-9a-z-]+-|(?:#!/)?jakso/(?:\d+/[^/]+/)?|video/prog)(?P<id>\d+)'
+ _TESTS = [{
'url': 'https://www.mtv.fi/sarja/mtv-uutiset-live-33001002003/lahden-pelicans-teki-kovan-ratkaisun-ville-nieminen-pihalle-1181321',
'info_dict': {
'id': '1181321',
'ext': 'mp4',
- 'title': 'MTV Uutiset Live',
+ 'title': 'Lahden Pelicans teki kovan ratkaisun – Ville Nieminen pihalle',
'description': 'Päätöksen teki Pelicansin hallitus.',
'timestamp': 1575116484,
'upload_date': '20191130',
@@ -186,7 +189,60 @@ class KatsomoIE(TV2IE):
# m3u8 download
'skip_download': True,
},
- }
+ }, {
+ 'url': 'http://www.katsomo.fi/#!/jakso/33001005/studio55-fi/658521/jukka-kuoppamaki-tekee-yha-lauluja-vaikka-lentokoneessa',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.mtvuutiset.fi/video/prog1311159',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.katsomo.fi/#!/jakso/1311159',
+ 'only_matching': True,
+ }]
_API_DOMAIN = 'api.katsomo.fi'
_PROTOCOLS = ('HLS', 'MPD')
_GEO_COUNTRIES = ['FI']
+
+
+class MTVUutisetArticleIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)mtvuutiset\.fi/artikkeli/[^/]+/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.mtvuutiset.fi/artikkeli/tallaisia-vaurioita-viking-amorellassa-on-useamman-osaston-alla-vetta/7931384',
+ 'info_dict': {
+ 'id': '1311159',
+ 'ext': 'mp4',
+ 'title': 'Viking Amorellan matkustajien evakuointi on alkanut – tältä operaatio näyttää laivalla',
+ 'description': 'Viking Amorellan matkustajien evakuointi on alkanut – tältä operaatio näyttää laivalla',
+ 'timestamp': 1600608966,
+ 'upload_date': '20200920',
+ 'duration': 153.7886666,
+ 'view_count': int,
+ 'categories': list,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ # multiple Youtube embeds
+ 'url': 'https://www.mtvuutiset.fi/artikkeli/50-vuotta-subarun-vastaiskua/6070962',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ article_id = self._match_id(url)
+ article = self._download_json(
+ 'http://api.mtvuutiset.fi/mtvuutiset/api/json/' + article_id,
+ article_id)
+
+ def entries():
+ for video in (article.get('videos') or []):
+ video_type = video.get('videotype')
+ video_url = video.get('url')
+ if not (video_url and video_type in ('katsomo', 'youtube')):
+ continue
+ yield self.url_result(
+ video_url, video_type.capitalize(), video.get('video_id'))
+
+ return self.playlist_result(
+ entries(), article_id, article.get('title'), article.get('description'))
diff --git a/youtube_dlc/extractor/tv4.py b/youtube_dlc/extractor/tv4.py
index c498b0191..b73bab9a8 100644
--- a/youtube_dlc/extractor/tv4.py
+++ b/youtube_dlc/extractor/tv4.py
@@ -17,7 +17,7 @@ class TV4IE(InfoExtractor):
tv4\.se/(?:[^/]+)/klipp/(?:.*)-|
tv4play\.se/
(?:
- (?:program|barn)/(?:[^/]+/|(?:[^\?]+)\?video_id=)|
+ (?:program|barn)/(?:(?:[^/]+/){1,2}|(?:[^\?]+)\?video_id=)|
iframe/video/|
film/|
sport/|
@@ -65,6 +65,10 @@ class TV4IE(InfoExtractor):
{
'url': 'http://www.tv4play.se/program/farang/3922081',
'only_matching': True,
+ },
+ {
+ 'url': 'https://www.tv4play.se/program/nyheterna/avsnitt/13315940',
+ 'only_matching': True,
}
]
diff --git a/youtube_dlc/extractor/vidio.py b/youtube_dlc/extractor/vidio.py
index b48baf00b..b1243e847 100644
--- a/youtube_dlc/extractor/vidio.py
+++ b/youtube_dlc/extractor/vidio.py
@@ -4,7 +4,13 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..utils import int_or_none
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+ str_or_none,
+ strip_or_none,
+ try_get,
+)
class VidioIE(InfoExtractor):
@@ -21,57 +27,63 @@ class VidioIE(InfoExtractor):
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 149,
'like_count': int,
+ 'uploader': 'TWELVE Pic',
+ 'timestamp': 1444902800,
+ 'upload_date': '20151015',
+ 'uploader_id': 'twelvepictures',
+ 'channel': 'Cover Music Video',
+ 'channel_id': '280236',
+ 'view_count': int,
+ 'dislike_count': int,
+ 'comment_count': int,
+ 'tags': 'count:4',
},
}, {
'url': 'https://www.vidio.com/watch/77949-south-korea-test-fires-missile-that-can-strike-all-of-the-north',
'only_matching': True,
}]
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id, display_id = mobj.group('id', 'display_id')
-
- webpage = self._download_webpage(url, display_id)
-
- title = self._og_search_title(webpage)
+ def _real_initialize(self):
+ self._api_key = self._download_json(
+ 'https://www.vidio.com/auth', None, data=b'')['api_key']
- m3u8_url, duration, thumbnail = [None] * 3
-
- clips = self._parse_json(
- self._html_search_regex(
- r'data-json-clips\s*=\s*(["\'])(?P<data>\[.+?\])\1',
- webpage, 'video data', default='[]', group='data'),
- display_id, fatal=False)
- if clips:
- clip = clips[0]
- m3u8_url = clip.get('sources', [{}])[0].get('file')
- duration = clip.get('clip_duration')
- thumbnail = clip.get('image')
+ def _real_extract(self, url):
+ video_id, display_id = re.match(self._VALID_URL, url).groups()
+ data = self._download_json(
+ 'https://api.vidio.com/videos/' + video_id, display_id, headers={
+ 'Content-Type': 'application/vnd.api+json',
+ 'X-API-KEY': self._api_key,
+ })
+ video = data['videos'][0]
+ title = video['title'].strip()
- m3u8_url = m3u8_url or self._search_regex(
- r'data(?:-vjs)?-clip-hls-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
- webpage, 'hls url', group='url')
formats = self._extract_m3u8_formats(
- m3u8_url, display_id, 'mp4', entry_protocol='m3u8_native')
+ data['clips'][0]['hls_url'], display_id, 'mp4', 'm3u8_native')
self._sort_formats(formats)
- duration = int_or_none(duration or self._search_regex(
- r'data-video-duration=(["\'])(?P<duration>\d+)\1', webpage,
- 'duration', fatal=False, group='duration'))
- thumbnail = thumbnail or self._og_search_thumbnail(webpage)
-
- like_count = int_or_none(self._search_regex(
- (r'<span[^>]+data-comment-vote-count=["\'](\d+)',
- r'<span[^>]+class=["\'].*?\blike(?:__|-)count\b.*?["\'][^>]*>\s*(\d+)'),
- webpage, 'like count', fatal=False))
+ get_first = lambda x: try_get(data, lambda y: y[x + 's'][0], dict) or {}
+ channel = get_first('channel')
+ user = get_first('user')
+ username = user.get('username')
+ get_count = lambda x: int_or_none(video.get('total_' + x))
return {
'id': video_id,
'display_id': display_id,
'title': title,
- 'description': self._og_search_description(webpage),
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'like_count': like_count,
+ 'description': strip_or_none(video.get('description')),
+ 'thumbnail': video.get('image_url_medium'),
+ 'duration': int_or_none(video.get('duration')),
+ 'like_count': get_count('likes'),
'formats': formats,
+ 'uploader': user.get('name'),
+ 'timestamp': parse_iso8601(video.get('created_at')),
+ 'uploader_id': username,
+ 'uploader_url': 'https://www.vidio.com/@' + username if username else None,
+ 'channel': channel.get('name'),
+ 'channel_id': str_or_none(channel.get('id')),
+ 'view_count': get_count('view_count'),
+ 'dislike_count': get_count('dislikes'),
+ 'comment_count': get_count('comments'),
+ 'tags': video.get('tag_list'),
}
diff --git a/youtube_dlc/extractor/vlive.py b/youtube_dlc/extractor/vlive.py
index fde6c0149..533bfd5da 100644
--- a/youtube_dlc/extractor/vlive.py
+++ b/youtube_dlc/extractor/vlive.py
@@ -125,7 +125,7 @@ class VLiveIE(VLiveBaseIE):
headers={'Referer': 'https://www.vlive.tv/'}, query=query)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
- self.raise_login_required(json.loads(e.cause.read().decode())['message'])
+ self.raise_login_required(json.loads(e.cause.read().decode('utf-8'))['message'])
raise
def _real_extract(self, url):
diff --git a/youtube_dlc/extractor/vtm.py b/youtube_dlc/extractor/vtm.py
new file mode 100644
index 000000000..093f1aa69
--- /dev/null
+++ b/youtube_dlc/extractor/vtm.py
@@ -0,0 +1,62 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+ try_get,
+)
+
+
+class VTMIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?vtm\.be/([^/?&#]+)~v(?P<id>[0-9a-f]{8}(?:-[0-9a-f]{4}){3}-[0-9a-f]{12})'
+ _TEST = {
+ 'url': 'https://vtm.be/gast-vernielt-genkse-hotelkamer~ve7534523-279f-4b4d-a5c9-a33ffdbe23e1',
+ 'md5': '37dca85fbc3a33f2de28ceb834b071f8',
+ 'info_dict': {
+ 'id': '192445',
+ 'ext': 'mp4',
+ 'title': 'Gast vernielt Genkse hotelkamer',
+ 'timestamp': 1611060180,
+ 'upload_date': '20210119',
+ 'duration': 74,
+ # TODO: fix url _type result processing
+ # 'series': 'Op Interventie',
+ }
+ }
+
+ def _real_extract(self, url):
+ uuid = self._match_id(url)
+ video = self._download_json(
+ 'https://omc4vm23offuhaxx6hekxtzspi.appsync-api.eu-west-1.amazonaws.com/graphql',
+ uuid, query={
+ 'query': '''{
+ getComponent(type: Video, uuid: "%s") {
+ ... on Video {
+ description
+ duration
+ myChannelsVideo
+ program {
+ title
+ }
+ publishedAt
+ title
+ }
+ }
+}''' % uuid,
+ }, headers={
+ 'x-api-key': 'da2-lz2cab4tfnah3mve6wiye4n77e',
+ })['data']['getComponent']
+
+ return {
+ '_type': 'url',
+ 'id': uuid,
+ 'title': video.get('title'),
+ 'url': 'http://mychannels.video/embed/%d' % video['myChannelsVideo'],
+ 'description': video.get('description'),
+ 'timestamp': parse_iso8601(video.get('publishedAt')),
+ 'duration': int_or_none(video.get('duration')),
+ 'series': try_get(video, lambda x: x['program']['title']),
+ 'ie_key': 'Medialaan',
+ }
diff --git a/youtube_dlc/extractor/vvvvid.py b/youtube_dlc/extractor/vvvvid.py
index f4cae7fe9..778ce8b76 100644
--- a/youtube_dlc/extractor/vvvvid.py
+++ b/youtube_dlc/extractor/vvvvid.py
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
+from .youtube import YoutubeIE
from ..utils import (
ExtractorError,
int_or_none,
@@ -48,6 +49,22 @@ class VVVVIDIE(InfoExtractor):
'skip_download': True,
},
}, {
+ # video_type == 'video/youtube'
+ 'url': 'https://www.vvvvid.it/show/404/one-punch-man/406/486683/trailer',
+ 'md5': '33e0edfba720ad73a8782157fdebc648',
+ 'info_dict': {
+ 'id': 'RzmFKUDOUgw',
+ 'ext': 'mp4',
+ 'title': 'Trailer',
+ 'upload_date': '20150906',
+ 'description': 'md5:a5e802558d35247fee285875328c0b80',
+ 'uploader_id': 'BandaiVisual',
+ 'uploader': 'BANDAI NAMCO Arts Channel',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
'url': 'https://www.vvvvid.it/show/434/perche-dovrei-guardarlo-di-dario-moccia/437/489048',
'only_matching': True
}]
@@ -154,12 +171,13 @@ class VVVVIDIE(InfoExtractor):
if season_number:
info['season_number'] = int(season_number)
- for quality in ('_sd', ''):
+ video_type = video_data.get('video_type')
+ is_youtube = False
+ for quality in ('', '_sd'):
embed_code = video_data.get('embed_info' + quality)
if not embed_code:
continue
embed_code = ds(embed_code)
- video_type = video_data.get('video_type')
if video_type in ('video/rcs', 'video/kenc'):
if video_type == 'video/kenc':
kenc = self._download_json(
@@ -172,19 +190,28 @@ class VVVVIDIE(InfoExtractor):
if kenc_message:
embed_code += '?' + ds(kenc_message)
formats.extend(self._extract_akamai_formats(embed_code, video_id))
+ elif video_type == 'video/youtube':
+ info.update({
+ '_type': 'url_transparent',
+ 'ie_key': YoutubeIE.ie_key(),
+ 'url': embed_code,
+ })
+ is_youtube = True
+ break
else:
formats.extend(self._extract_wowza_formats(
'http://sb.top-ix.org/videomg/_definst_/mp4:%s/playlist.m3u8' % embed_code, video_id))
metadata_from_url(embed_code)
- self._sort_formats(formats)
+ if not is_youtube:
+ self._sort_formats(formats)
+ info['formats'] = formats
metadata_from_url(video_data.get('thumbnail'))
info.update(self._extract_common_video_info(video_data))
info.update({
'id': video_id,
'title': title,
- 'formats': formats,
'duration': int_or_none(video_data.get('length')),
'series': video_data.get('show_title'),
'season_id': season_id,
diff --git a/youtube_dlc/extractor/zype.py b/youtube_dlc/extractor/zype.py
index 5288f40d8..f20f953cb 100644
--- a/youtube_dlc/extractor/zype.py
+++ b/youtube_dlc/extractor/zype.py
@@ -87,11 +87,16 @@ class ZypeIE(InfoExtractor):
r'(["\'])(?P<url>(?:(?!\1).)+\.m3u8(?:(?!\1).)*)\1',
body, 'm3u8 url', group='url', default=None)
if not m3u8_url:
- source = self._parse_json(self._search_regex(
- r'(?s)sources\s*:\s*\[\s*({.+?})\s*\]', body,
- 'source'), video_id, js_to_json)
- if source.get('integration') == 'verizon-media':
- m3u8_url = 'https://content.uplynk.com/%s.m3u8' % source['id']
+ source = self._search_regex(
+ r'(?s)sources\s*:\s*\[\s*({.+?})\s*\]', body, 'source')
+
+ def get_attr(key):
+ return self._search_regex(
+ r'\b%s\s*:\s*([\'"])(?P<val>(?:(?!\1).)+)\1' % key,
+ source, key, group='val')
+
+ if get_attr('integration') == 'verizon-media':
+ m3u8_url = 'https://content.uplynk.com/%s.m3u8' % get_attr('id')
formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
text_tracks = self._search_regex(