aboutsummaryrefslogtreecommitdiffstats
path: root/youtube_dlc/extractor
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dlc/extractor')
-rw-r--r--youtube_dlc/extractor/aenetworks.py2
-rw-r--r--youtube_dlc/extractor/aljazeera.py41
-rw-r--r--youtube_dlc/extractor/americastestkitchen.py97
-rw-r--r--youtube_dlc/extractor/aol.py12
-rw-r--r--youtube_dlc/extractor/ard.py36
-rw-r--r--youtube_dlc/extractor/comedycentral.py143
-rw-r--r--youtube_dlc/extractor/extractors.py22
-rw-r--r--youtube_dlc/extractor/franceculture.py20
-rw-r--r--youtube_dlc/extractor/lbry.py9
-rw-r--r--youtube_dlc/extractor/minds.py196
-rw-r--r--youtube_dlc/extractor/mtv.py23
-rw-r--r--youtube_dlc/extractor/ninegag.py189
-rw-r--r--youtube_dlc/extractor/njpwworld.py54
-rw-r--r--youtube_dlc/extractor/spike.py25
-rw-r--r--youtube_dlc/extractor/spotify.py156
-rw-r--r--youtube_dlc/extractor/trovo.py193
-rw-r--r--youtube_dlc/extractor/wat.py64
-rw-r--r--youtube_dlc/extractor/yahoo.py80
18 files changed, 961 insertions, 401 deletions
diff --git a/youtube_dlc/extractor/aenetworks.py b/youtube_dlc/extractor/aenetworks.py
index 8e4963131..a5d88ebbe 100644
--- a/youtube_dlc/extractor/aenetworks.py
+++ b/youtube_dlc/extractor/aenetworks.py
@@ -256,7 +256,7 @@ class AENetworksShowIE(AENetworksListBaseIE):
'title': 'Ancient Aliens',
'description': 'md5:3f6d74daf2672ff3ae29ed732e37ea7f',
},
- 'playlist_mincount': 168,
+ 'playlist_mincount': 150,
}]
_RESOURCE = 'series'
_ITEMS_KEY = 'episodes'
diff --git a/youtube_dlc/extractor/aljazeera.py b/youtube_dlc/extractor/aljazeera.py
index c68be3134..c4f915a3c 100644
--- a/youtube_dlc/extractor/aljazeera.py
+++ b/youtube_dlc/extractor/aljazeera.py
@@ -1,13 +1,16 @@
from __future__ import unicode_literals
+import json
+import re
+
from .common import InfoExtractor
class AlJazeeraIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?aljazeera\.com/(?:programmes|video)/.*?/(?P<id>[^/]+)\.html'
+ _VALID_URL = r'https?://(?:www\.)?aljazeera\.com/(?P<type>program/[^/]+|(?:feature|video)s)/\d{4}/\d{1,2}/\d{1,2}/(?P<id>[^/?&#]+)'
_TESTS = [{
- 'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html',
+ 'url': 'https://www.aljazeera.com/program/episode/2014/9/19/deliverance',
'info_dict': {
'id': '3792260579001',
'ext': 'mp4',
@@ -20,14 +23,34 @@ class AlJazeeraIE(InfoExtractor):
'add_ie': ['BrightcoveNew'],
'skip': 'Not accessible from Travis CI server',
}, {
- 'url': 'http://www.aljazeera.com/video/news/2017/05/sierra-leone-709-carat-diamond-auctioned-170511100111930.html',
+ 'url': 'https://www.aljazeera.com/videos/2017/5/11/sierra-leone-709-carat-diamond-to-be-auctioned-off',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.aljazeera.com/features/2017/8/21/transforming-pakistans-buses-into-art',
'only_matching': True,
}]
- BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/665003303001/default_default/index.html?videoId=%s'
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
def _real_extract(self, url):
- program_name = self._match_id(url)
- webpage = self._download_webpage(url, program_name)
- brightcove_id = self._search_regex(
- r'RenderPagesVideo\(\'(.+?)\'', webpage, 'brightcove id')
- return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
+ post_type, name = re.match(self._VALID_URL, url).groups()
+ post_type = {
+ 'features': 'post',
+ 'program': 'episode',
+ 'videos': 'video',
+ }[post_type.split('/')[0]]
+ video = self._download_json(
+ 'https://www.aljazeera.com/graphql', name, query={
+ 'operationName': 'SingleArticleQuery',
+ 'variables': json.dumps({
+ 'name': name,
+ 'postType': post_type,
+ }),
+ }, headers={
+ 'wp-site': 'aje',
+ })['data']['article']['video']
+ video_id = video['id']
+ account_id = video.get('accountId') or '665003303001'
+ player_id = video.get('playerId') or 'BkeSH5BDb'
+ return self.url_result(
+ self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id),
+ 'BrightcoveNew', video_id)
diff --git a/youtube_dlc/extractor/americastestkitchen.py b/youtube_dlc/extractor/americastestkitchen.py
index e20f00fc3..be960c0f9 100644
--- a/youtube_dlc/extractor/americastestkitchen.py
+++ b/youtube_dlc/extractor/americastestkitchen.py
@@ -1,13 +1,16 @@
# coding: utf-8
from __future__ import unicode_literals
+import json
import re
from .common import InfoExtractor
from ..utils import (
clean_html,
+ int_or_none,
try_get,
unified_strdate,
+ unified_timestamp,
)
@@ -22,8 +25,8 @@ class AmericasTestKitchenIE(InfoExtractor):
'ext': 'mp4',
'description': 'md5:64e606bfee910627efc4b5f050de92b3',
'thumbnail': r're:^https?://',
- 'timestamp': 1523664000,
- 'upload_date': '20180414',
+ 'timestamp': 1523318400,
+ 'upload_date': '20180410',
'release_date': '20180410',
'series': "America's Test Kitchen",
'season_number': 18,
@@ -34,6 +37,27 @@ class AmericasTestKitchenIE(InfoExtractor):
'skip_download': True,
},
}, {
+ # Metadata parsing behaves differently for newer episodes (705) as opposed to older episodes (582 above)
+ 'url': 'https://www.americastestkitchen.com/episode/705-simple-chicken-dinner',
+ 'md5': '06451608c57651e985a498e69cec17e5',
+ 'info_dict': {
+ 'id': '5fbe8c61bda2010001c6763b',
+ 'title': 'Simple Chicken Dinner',
+ 'ext': 'mp4',
+ 'description': 'md5:eb68737cc2fd4c26ca7db30139d109e7',
+ 'thumbnail': r're:^https?://',
+ 'timestamp': 1610755200,
+ 'upload_date': '20210116',
+ 'release_date': '20210116',
+ 'series': "America's Test Kitchen",
+ 'season_number': 21,
+ 'episode': 'Simple Chicken Dinner',
+ 'episode_number': 3,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
'only_matching': True,
}, {
@@ -60,7 +84,76 @@ class AmericasTestKitchenIE(InfoExtractor):
'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % video['zypeId'],
'ie_key': 'Zype',
'description': clean_html(video.get('description')),
+ 'timestamp': unified_timestamp(video.get('publishDate')),
'release_date': unified_strdate(video.get('publishDate')),
+ 'episode_number': int_or_none(episode.get('number')),
+ 'season_number': int_or_none(episode.get('season')),
'series': try_get(episode, lambda x: x['show']['title']),
'episode': episode.get('title'),
}
+
+
+class AmericasTestKitchenSeasonIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?(?P<show>americastestkitchen|cookscountry)\.com/episodes/browse/season_(?P<id>\d+)'
+ _TESTS = [{
+ # ATK Season
+ 'url': 'https://www.americastestkitchen.com/episodes/browse/season_1',
+ 'info_dict': {
+ 'id': 'season_1',
+ 'title': 'Season 1',
+ },
+ 'playlist_count': 13,
+ }, {
+ # Cooks Country Season
+ 'url': 'https://www.cookscountry.com/episodes/browse/season_12',
+ 'info_dict': {
+ 'id': 'season_12',
+ 'title': 'Season 12',
+ },
+ 'playlist_count': 13,
+ }]
+
+ def _real_extract(self, url):
+ show_name, season_number = re.match(self._VALID_URL, url).groups()
+ season_number = int(season_number)
+
+ slug = 'atk' if show_name == 'americastestkitchen' else 'cco'
+
+ season = 'Season %d' % season_number
+
+ season_search = self._download_json(
+ 'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug,
+ season, headers={
+ 'Origin': 'https://www.%s.com' % show_name,
+ 'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805',
+ 'X-Algolia-Application-Id': 'Y1FNZXUI30',
+ }, query={
+ 'facetFilters': json.dumps([
+ 'search_season_list:' + season,
+ 'search_document_klass:episode',
+ 'search_show_slug:' + slug,
+ ]),
+ 'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title' % slug,
+ 'attributesToHighlight': '',
+ 'hitsPerPage': 1000,
+ })
+
+ def entries():
+ for episode in (season_search.get('hits') or []):
+ search_url = episode.get('search_url')
+ if not search_url:
+ continue
+ yield {
+ '_type': 'url',
+ 'url': 'https://www.%s.com%s' % (show_name, search_url),
+ 'id': try_get(episode, lambda e: e['objectID'].split('_')[-1]),
+ 'title': episode.get('title'),
+ 'description': episode.get('description'),
+ 'timestamp': unified_timestamp(episode.get('search_document_date')),
+ 'season_number': season_number,
+ 'episode_number': int_or_none(episode.get('search_%s_episode_number' % slug)),
+ 'ie_key': AmericasTestKitchenIE.ie_key(),
+ }
+
+ return self.playlist_result(
+ entries(), 'season_%d' % season_number, season)
diff --git a/youtube_dlc/extractor/aol.py b/youtube_dlc/extractor/aol.py
index e87994a6a..f6ecb8438 100644
--- a/youtube_dlc/extractor/aol.py
+++ b/youtube_dlc/extractor/aol.py
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
import re
-from .common import InfoExtractor
+from .yahoo import YahooIE
from ..compat import (
compat_parse_qs,
compat_urllib_parse_urlparse,
@@ -15,9 +15,9 @@ from ..utils import (
)
-class AolIE(InfoExtractor):
+class AolIE(YahooIE):
IE_NAME = 'aol.com'
- _VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P<id>[0-9a-f]+)'
+ _VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P<id>\d{9}|[0-9a-f]{24}|[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12})'
_TESTS = [{
# video with 5min ID
@@ -76,10 +76,16 @@ class AolIE(InfoExtractor):
}, {
'url': 'https://www.aol.jp/video/playlist/5a28e936a1334d000137da0c/5a28f3151e642219fde19831/',
'only_matching': True,
+ }, {
+ # Yahoo video
+ 'url': 'https://www.aol.com/video/play/991e6700-ac02-11ea-99ff-357400036f61/24bbc846-3e30-3c46-915e-fe8ccd7fcc46/',
+ 'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
+ if '-' in video_id:
+ return self._extract_yahoo_video(video_id, 'us')
response = self._download_json(
'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/%s/details' % video_id,
diff --git a/youtube_dlc/extractor/ard.py b/youtube_dlc/extractor/ard.py
index 6f1e477a9..733793145 100644
--- a/youtube_dlc/extractor/ard.py
+++ b/youtube_dlc/extractor/ard.py
@@ -226,13 +226,13 @@ class ARDMediathekIE(ARDMediathekBaseIE):
if doc.tag == 'rss':
return GenericIE()._extract_rss(url, video_id, doc)
- title = self._html_search_regex(
+ title = self._og_search_title(webpage, default=None) or self._html_search_regex(
[r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
r'<meta name="dcterms\.title" content="(.*?)"/>',
r'<h4 class="headline">(.*?)</h4>',
r'<title[^>]*>(.*?)</title>'],
webpage, 'title')
- description = self._html_search_meta(
+ description = self._og_search_description(webpage, default=None) or self._html_search_meta(
'dcterms.abstract', webpage, 'description', default=None)
if description is None:
description = self._html_search_meta(
@@ -289,18 +289,18 @@ class ARDMediathekIE(ARDMediathekBaseIE):
class ARDIE(InfoExtractor):
- _VALID_URL = r'(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos(?:extern)?/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html'
+ _VALID_URL = r'(?P<mainurl>https?://(?:www\.)?daserste\.de/[^?#]+/videos(?:extern)?/(?P<display_id>[^/?#]+)-(?:video-?)?(?P<id>[0-9]+))\.html'
_TESTS = [{
- # available till 14.02.2019
- 'url': 'http://www.daserste.de/information/talk/maischberger/videos/das-groko-drama-zerlegen-sich-die-volksparteien-video-102.html',
- 'md5': '8e4ec85f31be7c7fc08a26cdbc5a1f49',
+ # available till 7.01.2022
+ 'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-die-woche-video100.html',
+ 'md5': '867d8aa39eeaf6d76407c5ad1bb0d4c1',
'info_dict': {
- 'display_id': 'das-groko-drama-zerlegen-sich-die-volksparteien-video',
- 'id': '102',
+ 'display_id': 'maischberger-die-woche',
+ 'id': '100',
'ext': 'mp4',
- 'duration': 4435.0,
- 'title': 'Das GroKo-Drama: Zerlegen sich die Volksparteien?',
- 'upload_date': '20180214',
+ 'duration': 3687.0,
+ 'title': 'maischberger. die woche vom 7. Januar 2021',
+ 'upload_date': '20210107',
'thumbnail': r're:^https?://.*\.jpg$',
},
}, {
@@ -355,17 +355,17 @@ class ARDIE(InfoExtractor):
class ARDBetaMediathekIE(ARDMediathekBaseIE):
_VALID_URL = r'https://(?:(?:beta|www)\.)?ardmediathek\.de/(?P<client>[^/]+)/(?P<mode>player|live|video|sendung|sammlung)/(?P<display_id>(?:[^/]+/)*)(?P<video_id>[a-zA-Z0-9]+)'
_TESTS = [{
- 'url': 'https://ardmediathek.de/ard/video/die-robuste-roswita/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
- 'md5': 'dfdc87d2e7e09d073d5a80770a9ce88f',
+ 'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/',
+ 'md5': 'a1dc75a39c61601b980648f7c9f9f71d',
'info_dict': {
'display_id': 'die-robuste-roswita',
- 'id': '70153354',
+ 'id': '78566716',
'title': 'Die robuste Roswita',
- 'description': r're:^Der Mord.*trüber ist als die Ilm.',
+ 'description': r're:^Der Mord.*totgeglaubte Ehefrau Roswita',
'duration': 5316,
- 'thumbnail': 'https://img.ardmediathek.de/standard/00/70/15/33/90/-1852531467/16x9/960?mandant=ard',
- 'timestamp': 1577047500,
- 'upload_date': '20191222',
+ 'thumbnail': 'https://img.ardmediathek.de/standard/00/78/56/67/84/575672121/16x9/960?mandant=ard',
+ 'timestamp': 1596658200,
+ 'upload_date': '20200805',
'ext': 'mp4',
},
}, {
diff --git a/youtube_dlc/extractor/comedycentral.py b/youtube_dlc/extractor/comedycentral.py
index f54c4adeb..1bfa912be 100644
--- a/youtube_dlc/extractor/comedycentral.py
+++ b/youtube_dlc/extractor/comedycentral.py
@@ -1,142 +1,51 @@
from __future__ import unicode_literals
from .mtv import MTVServicesInfoExtractor
-from .common import InfoExtractor
class ComedyCentralIE(MTVServicesInfoExtractor):
- _VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/
- (video-clips|episodes|cc-studios|video-collections|shows(?=/[^/]+/(?!full-episodes)))
- /(?P<title>.*)'''
+ _VALID_URL = r'https?://(?:www\.)?cc\.com/(?:episodes|video(?:-clips)?)/(?P<id>[0-9a-z]{6})'
_FEED_URL = 'http://comedycentral.com/feeds/mrss/'
_TESTS = [{
- 'url': 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother',
- 'md5': 'c4f48e9eda1b16dd10add0744344b6d8',
+ 'url': 'http://www.cc.com/video-clips/5ke9v2/the-daily-show-with-trevor-noah-doc-rivers-and-steve-ballmer---the-nba-player-strike',
+ 'md5': 'b8acb347177c680ff18a292aa2166f80',
'info_dict': {
- 'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354',
+ 'id': '89ccc86e-1b02-4f83-b0c9-1d9592ecd025',
'ext': 'mp4',
- 'title': 'CC:Stand-Up|August 18, 2013|1|0101|Uncensored - Too Good of a Mother',
- 'description': 'After a certain point, breastfeeding becomes c**kblocking.',
- 'timestamp': 1376798400,
- 'upload_date': '20130818',
+ 'title': 'The Daily Show with Trevor Noah|August 28, 2020|25|25149|Doc Rivers and Steve Ballmer - The NBA Player Strike',
+ 'description': 'md5:5334307c433892b85f4f5e5ac9ef7498',
+ 'timestamp': 1598670000,
+ 'upload_date': '20200829',
},
}, {
- 'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/interviews/6yx39d/exclusive-rand-paul-extended-interview',
+ 'url': 'http://www.cc.com/episodes/pnzzci/drawn-together--american-idol--parody-clip-show-season-3-ep-314',
'only_matching': True,
- }]
-
-
-class ComedyCentralFullEpisodesIE(MTVServicesInfoExtractor):
- _VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/
- (?:full-episodes|shows(?=/[^/]+/full-episodes))
- /(?P<id>[^?]+)'''
- _FEED_URL = 'http://comedycentral.com/feeds/mrss/'
-
- _TESTS = [{
- 'url': 'http://www.cc.com/full-episodes/pv391a/the-daily-show-with-trevor-noah-november-28--2016---ryan-speedo-green-season-22-ep-22028',
- 'info_dict': {
- 'description': 'Donald Trump is accused of exploiting his president-elect status for personal gain, Cuban leader Fidel Castro dies, and Ryan Speedo Green discusses "Sing for Your Life."',
- 'title': 'November 28, 2016 - Ryan Speedo Green',
- },
- 'playlist_count': 4,
}, {
- 'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- playlist_id = self._match_id(url)
- webpage = self._download_webpage(url, playlist_id)
- mgid = self._extract_mgid(webpage, url, data_zone='t2_lc_promo1')
- videos_info = self._get_videos_info(mgid)
- return videos_info
-
-
-class ToshIE(MTVServicesInfoExtractor):
- IE_DESC = 'Tosh.0'
- _VALID_URL = r'^https?://tosh\.cc\.com/video-(?:clips|collections)/[^/]+/(?P<videotitle>[^/?#]+)'
- _FEED_URL = 'http://tosh.cc.com/feeds/mrss'
-
- _TESTS = [{
- 'url': 'http://tosh.cc.com/video-clips/68g93d/twitter-users-share-summer-plans',
- 'info_dict': {
- 'description': 'Tosh asked fans to share their summer plans.',
- 'title': 'Twitter Users Share Summer Plans',
- },
- 'playlist': [{
- 'md5': 'f269e88114c1805bb6d7653fecea9e06',
- 'info_dict': {
- 'id': '90498ec2-ed00-11e0-aca6-0026b9414f30',
- 'ext': 'mp4',
- 'title': 'Tosh.0|June 9, 2077|2|211|Twitter Users Share Summer Plans',
- 'description': 'Tosh asked fans to share their summer plans.',
- 'thumbnail': r're:^https?://.*\.jpg',
- # It's really reported to be published on year 2077
- 'upload_date': '20770610',
- 'timestamp': 3390510600,
- 'subtitles': {
- 'en': 'mincount:3',
- },
- },
- }]
- }, {
- 'url': 'http://tosh.cc.com/video-collections/x2iz7k/just-plain-foul/m5q4fp',
+ 'url': 'https://www.cc.com/video/k3sdvm/the-daily-show-with-jon-stewart-exclusive-the-fourth-estate',
'only_matching': True,
}]
class ComedyCentralTVIE(MTVServicesInfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/(?:staffeln|shows)/(?P<id>[^/?#&]+)'
+ _VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/folgen/(?P<id>[0-9a-z]{6})'
_TESTS = [{
- 'url': 'http://www.comedycentral.tv/staffeln/7436-the-mindy-project-staffel-4',
+ 'url': 'https://www.comedycentral.tv/folgen/pxdpec/josh-investigates-klimawandel-staffel-1-ep-1',
'info_dict': {
- 'id': 'local_playlist-f99b626bdfe13568579a',
- 'ext': 'flv',
- 'title': 'Episode_the-mindy-project_shows_season-4_episode-3_full-episode_part1',
- },
- 'params': {
- # rtmp download
- 'skip_download': True,
+ 'id': '15907dc3-ec3c-11e8-a442-0e40cf2fc285',
+ 'ext': 'mp4',
+ 'title': 'Josh Investigates',
+ 'description': 'Steht uns das Ende der Welt bevor?',
},
- }, {
- 'url': 'http://www.comedycentral.tv/shows/1074-workaholics',
- 'only_matching': True,
- }, {
- 'url': 'http://www.comedycentral.tv/shows/1727-the-mindy-project/bonus',
- 'only_matching': True,
}]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- mrss_url = self._search_regex(
- r'data-mrss=(["\'])(?P<url>(?:(?!\1).)+)\1',
- webpage, 'mrss url', group='url')
-
- return self._get_videos_info_from_url(mrss_url, video_id)
-
-
-class ComedyCentralShortnameIE(InfoExtractor):
- _VALID_URL = r'^:(?P<id>tds|thedailyshow|theopposition)$'
- _TESTS = [{
- 'url': ':tds',
- 'only_matching': True,
- }, {
- 'url': ':thedailyshow',
- 'only_matching': True,
- }, {
- 'url': ':theopposition',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- shortcut_map = {
- 'tds': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
- 'thedailyshow': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
- 'theopposition': 'http://www.cc.com/shows/the-opposition-with-jordan-klepper/full-episodes',
+ _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
+ _GEO_COUNTRIES = ['DE']
+
+ def _get_feed_query(self, uri):
+ return {
+ 'accountOverride': 'intl.mtvi.com',
+ 'arcEp': 'web.cc.tv',
+ 'ep': 'b9032c3a',
+ 'imageEp': 'web.cc.tv',
+ 'mgid': uri,
}
- return self.url_result(shortcut_map[video_id])
diff --git a/youtube_dlc/extractor/extractors.py b/youtube_dlc/extractor/extractors.py
index 6ea86c097..10fd4a0b5 100644
--- a/youtube_dlc/extractor/extractors.py
+++ b/youtube_dlc/extractor/extractors.py
@@ -50,7 +50,10 @@ from .animelab import (
AnimeLabIE,
AnimeLabShowsIE,
)
-from .americastestkitchen import AmericasTestKitchenIE
+from .americastestkitchen import (
+ AmericasTestKitchenIE,
+ AmericasTestKitchenSeasonIE,
+)
from .animeondemand import AnimeOnDemandIE
from .anvato import AnvatoIE
from .aol import AolIE
@@ -244,11 +247,8 @@ from .cnn import (
)
from .coub import CoubIE
from .comedycentral import (
- ComedyCentralFullEpisodesIE,
ComedyCentralIE,
- ComedyCentralShortnameIE,
ComedyCentralTVIE,
- ToshIE,
)
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
from .commonprotocols import (
@@ -682,6 +682,11 @@ from .mildom import (
MildomVodIE,
MildomUserVodIE,
)
+from .minds import (
+ MindsIE,
+ MindsChannelIE,
+ MindsGroupIE,
+)
from .ministrygrid import MinistryGridIE
from .minoto import MinotoIE
from .miomio import MioMioIE
@@ -1162,6 +1167,10 @@ from .stitcher import StitcherIE
from .sport5 import Sport5IE
from .sportbox import SportBoxIE
from .sportdeutschland import SportDeutschlandIE
+from .spotify import (
+ SpotifyIE,
+ SpotifyShowIE,
+)
from .spreaker import (
SpreakerIE,
SpreakerPageIE,
@@ -1270,7 +1279,10 @@ from .toutv import TouTvIE
from .toypics import ToypicsUserIE, ToypicsIE
from .traileraddict import TrailerAddictIE
from .trilulilu import TriluliluIE
-from .trovolive import TrovoLiveIE
+from .trovo import (
+ TrovoIE,
+ TrovoVodIE,
+)
from .trunews import TruNewsIE
from .trutv import TruTVIE
from .tube8 import Tube8IE
diff --git a/youtube_dlc/extractor/franceculture.py b/youtube_dlc/extractor/franceculture.py
index 306b45fc9..14f4cb489 100644
--- a/youtube_dlc/extractor/franceculture.py
+++ b/youtube_dlc/extractor/franceculture.py
@@ -11,7 +11,7 @@ from ..utils import (
class FranceCultureIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?franceculture\.fr/emissions/(?:[^/]+/)*(?P<id>[^/?#&]+)'
- _TEST = {
+ _TESTS = [{
'url': 'http://www.franceculture.fr/emissions/carnet-nomade/rendez-vous-au-pays-des-geeks',
'info_dict': {
'id': 'rendez-vous-au-pays-des-geeks',
@@ -20,10 +20,14 @@ class FranceCultureIE(InfoExtractor):
'title': 'Rendez-vous au pays des geeks',
'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20140301',
- 'timestamp': 1393642916,
+ 'timestamp': 1393700400,
'vcodec': 'none',
}
- }
+ }, {
+ # no thumbnail
+ 'url': 'https://www.franceculture.fr/emissions/la-recherche-montre-en-main/la-recherche-montre-en-main-du-mercredi-10-octobre-2018',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
display_id = self._match_id(url)
@@ -36,19 +40,19 @@ class FranceCultureIE(InfoExtractor):
</h1>|
<div[^>]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*>
).*?
- (<button[^>]+data-asset-source="[^"]+"[^>]+>)
+ (<button[^>]+data-(?:url|asset-source)="[^"]+"[^>]+>)
''',
webpage, 'video data'))
- video_url = video_data['data-asset-source']
- title = video_data.get('data-asset-title') or self._og_search_title(webpage)
+ video_url = video_data.get('data-url') or video_data['data-asset-source']
+ title = video_data.get('data-asset-title') or video_data.get('data-diffusion-title') or self._og_search_title(webpage)
description = self._html_search_regex(
r'(?s)<div[^>]+class="intro"[^>]*>.*?<h2>(.+?)</h2>',
webpage, 'description', default=None)
thumbnail = self._search_regex(
r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+(?:data-dejavu-)?src="([^"]+)"',
- webpage, 'thumbnail', fatal=False)
+ webpage, 'thumbnail', default=None)
uploader = self._html_search_regex(
r'(?s)<span class="author">(.*?)</span>',
webpage, 'uploader', default=None)
@@ -64,6 +68,6 @@ class FranceCultureIE(InfoExtractor):
'ext': ext,
'vcodec': 'none' if ext == 'mp3' else None,
'uploader': uploader,
- 'timestamp': int_or_none(video_data.get('data-asset-created-date')),
+ 'timestamp': int_or_none(video_data.get('data-start-time')) or int_or_none(video_data.get('data-asset-created-date')),
'duration': int_or_none(video_data.get('data-duration')),
}
diff --git a/youtube_dlc/extractor/lbry.py b/youtube_dlc/extractor/lbry.py
index 41cc245eb..413215a99 100644
--- a/youtube_dlc/extractor/lbry.py
+++ b/youtube_dlc/extractor/lbry.py
@@ -5,7 +5,10 @@ import functools
import json
from .common import InfoExtractor
-from ..compat import compat_str
+from ..compat import (
+ compat_str,
+ compat_urllib_parse_unquote,
+)
from ..utils import (
determine_ext,
ExtractorError,
@@ -131,6 +134,9 @@ class LBRYIE(LBRYBaseIE):
}, {
'url': 'https://lbry.tv/$/download/Episode-1/e7d93d772bd87e2b62d5ab993c1c3ced86ebb396',
'only_matching': True,
+ }, {
+ 'url': 'https://lbry.tv/@lacajadepandora:a/TRUMP-EST%C3%81-BIEN-PUESTO-con-Pilar-Baselga,-Carlos-Senra,-Luis-Palacios-(720p_30fps_H264-192kbit_AAC):1',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -139,6 +145,7 @@ class LBRYIE(LBRYBaseIE):
display_id = display_id.split('/', 2)[-1].replace('/', ':')
else:
display_id = display_id.replace(':', '#')
+ display_id = compat_urllib_parse_unquote(display_id)
uri = 'lbry://' + display_id
result = self._resolve_url(uri, display_id, 'stream')
result_value = result['value']
diff --git a/youtube_dlc/extractor/minds.py b/youtube_dlc/extractor/minds.py
new file mode 100644
index 000000000..8e9f0f825
--- /dev/null
+++ b/youtube_dlc/extractor/minds.py
@@ -0,0 +1,196 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ clean_html,
+ int_or_none,
+ str_or_none,
+ strip_or_none,
+)
+
+
+class MindsBaseIE(InfoExtractor):
+ _VALID_URL_BASE = r'https?://(?:www\.)?minds\.com/'
+
+ def _call_api(self, path, video_id, resource, query=None):
+ api_url = 'https://www.minds.com/api/' + path
+ token = self._get_cookies(api_url).get('XSRF-TOKEN')
+ return self._download_json(
+ api_url, video_id, 'Downloading %s JSON metadata' % resource, headers={
+ 'Referer': 'https://www.minds.com/',
+ 'X-XSRF-TOKEN': token.value if token else '',
+ }, query=query)
+
+
+class MindsIE(MindsBaseIE):
+ IE_NAME = 'minds'
+ _VALID_URL = MindsBaseIE._VALID_URL_BASE + r'(?:media|newsfeed|archive/view)/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://www.minds.com/media/100000000000086822',
+ 'md5': '215a658184a419764852239d4970b045',
+ 'info_dict': {
+ 'id': '100000000000086822',
+ 'ext': 'mp4',
+ 'title': 'Minds intro sequence',
+ 'thumbnail': r're:https?://.+\.png',
+ 'uploader_id': 'ottman',
+ 'upload_date': '20130524',
+ 'timestamp': 1369404826,
+ 'uploader': 'Bill Ottman',
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'tags': ['animation'],
+ 'comment_count': int,
+ 'license': 'attribution-cc',
+ },
+ }, {
+ # entity.type == 'activity' and empty title
+ 'url': 'https://www.minds.com/newsfeed/798025111988506624',
+ 'md5': 'b2733a74af78d7fd3f541c4cbbaa5950',
+ 'info_dict': {
+ 'id': '798022190320226304',
+ 'ext': 'mp4',
+ 'title': '798022190320226304',
+ 'uploader': 'ColinFlaherty',
+ 'upload_date': '20180111',
+ 'timestamp': 1515639316,
+ 'uploader_id': 'ColinFlaherty',
+ },
+ }, {
+ 'url': 'https://www.minds.com/archive/view/715172106794442752',
+ 'only_matching': True,
+ }, {
+ # youtube perma_url
+ 'url': 'https://www.minds.com/newsfeed/1197131838022602752',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ entity_id = self._match_id(url)
+ entity = self._call_api(
+ 'v1/entities/entity/' + entity_id, entity_id, 'entity')['entity']
+ if entity.get('type') == 'activity':
+ if entity.get('custom_type') == 'video':
+ video_id = entity['entity_guid']
+ else:
+ return self.url_result(entity['perma_url'])
+ else:
+ assert(entity['subtype'] == 'video')
+ video_id = entity_id
+ # 1080p and webm formats available only on the sources array
+ video = self._call_api(
+ 'v2/media/video/' + video_id, video_id, 'video')
+
+ formats = []
+ for source in (video.get('sources') or []):
+ src = source.get('src')
+ if not src:
+ continue
+ formats.append({
+ 'format_id': source.get('label'),
+ 'height': int_or_none(source.get('size')),
+ 'url': src,
+ })
+ self._sort_formats(formats)
+
+ entity = video.get('entity') or entity
+ owner = entity.get('ownerObj') or {}
+ uploader_id = owner.get('username')
+
+ tags = entity.get('tags')
+ if tags and isinstance(tags, compat_str):
+ tags = [tags]
+
+ thumbnail = None
+ poster = video.get('poster') or entity.get('thumbnail_src')
+ if poster:
+ urlh = self._request_webpage(poster, video_id, fatal=False)
+ if urlh:
+ thumbnail = urlh.geturl()
+
+ return {
+ 'id': video_id,
+ 'title': entity.get('title') or video_id,
+ 'formats': formats,
+ 'description': clean_html(entity.get('description')) or None,
+ 'license': str_or_none(entity.get('license')),
+ 'timestamp': int_or_none(entity.get('time_created')),
+ 'uploader': strip_or_none(owner.get('name')),
+ 'uploader_id': uploader_id,
+ 'uploader_url': 'https://www.minds.com/' + uploader_id if uploader_id else None,
+ 'view_count': int_or_none(entity.get('play:count')),
+ 'like_count': int_or_none(entity.get('thumbs:up:count')),
+ 'dislike_count': int_or_none(entity.get('thumbs:down:count')),
+ 'tags': tags,
+ 'comment_count': int_or_none(entity.get('comments:count')),
+ 'thumbnail': thumbnail,
+ }
+
+
+class MindsFeedBaseIE(MindsBaseIE):
+ _PAGE_SIZE = 150
+
+ def _entries(self, feed_id):
+ query = {'limit': self._PAGE_SIZE, 'sync': 1}
+ i = 1
+ while True:
+ data = self._call_api(
+ 'v2/feeds/container/%s/videos' % feed_id,
+ feed_id, 'page %s' % i, query)
+ entities = data.get('entities') or []
+ for entity in entities:
+ guid = entity.get('guid')
+ if not guid:
+ continue
+ yield self.url_result(
+ 'https://www.minds.com/newsfeed/' + guid,
+ MindsIE.ie_key(), guid)
+ query['from_timestamp'] = data['load-next']
+ if not (query['from_timestamp'] and len(entities) == self._PAGE_SIZE):
+ break
+ i += 1
+
+ def _real_extract(self, url):
+ feed_id = self._match_id(url)
+ feed = self._call_api(
+ 'v1/%s/%s' % (self._FEED_PATH, feed_id),
+ feed_id, self._FEED_TYPE)[self._FEED_TYPE]
+
+ return self.playlist_result(
+ self._entries(feed['guid']), feed_id,
+ strip_or_none(feed.get('name')),
+ feed.get('briefdescription'))
+
+
+class MindsChannelIE(MindsFeedBaseIE):
+ _FEED_TYPE = 'channel'
+ IE_NAME = 'minds:' + _FEED_TYPE
+ _VALID_URL = MindsBaseIE._VALID_URL_BASE + r'(?!(?:newsfeed|media|api|archive|groups)/)(?P<id>[^/?&#]+)'
+ _FEED_PATH = 'channel'
+ _TEST = {
+ 'url': 'https://www.minds.com/ottman',
+ 'info_dict': {
+ 'id': 'ottman',
+ 'title': 'Bill Ottman',
+ 'description': 'Co-creator & CEO @minds',
+ },
+ 'playlist_mincount': 54,
+ }
+
+
+class MindsGroupIE(MindsFeedBaseIE):
+ _FEED_TYPE = 'group'
+ IE_NAME = 'minds:' + _FEED_TYPE
+ _VALID_URL = MindsBaseIE._VALID_URL_BASE + r'groups/profile/(?P<id>[0-9]+)'
+ _FEED_PATH = 'groups/group'
+ _TEST = {
+ 'url': 'https://www.minds.com/groups/profile/785582576369672204/feed/videos',
+ 'info_dict': {
+ 'id': '785582576369672204',
+ 'title': 'Cooking Videos',
+ },
+ 'playlist_mincount': 1,
+ }
diff --git a/youtube_dlc/extractor/mtv.py b/youtube_dlc/extractor/mtv.py
index d31f53137..68e81ad47 100644
--- a/youtube_dlc/extractor/mtv.py
+++ b/youtube_dlc/extractor/mtv.py
@@ -255,6 +255,10 @@ class MTVServicesInfoExtractor(InfoExtractor):
return try_get(feed, lambda x: x['result']['data']['id'], compat_str)
+ @staticmethod
+ def _extract_child_with_type(parent, t):
+ return next(c for c in parent['children'] if c.get('type') == t)
+
def _extract_new_triforce_mgid(self, webpage, url='', video_id=None):
if url == '':
return
@@ -332,6 +336,13 @@ class MTVServicesInfoExtractor(InfoExtractor):
if not mgid:
mgid = self._extract_triforce_mgid(webpage, data_zone)
+ if not mgid:
+ data = self._parse_json(self._search_regex(
+ r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None)
+ main_container = self._extract_child_with_type(data, 'MainContainer')
+ video_player = self._extract_child_with_type(main_container, 'VideoPlayer')
+ mgid = video_player['props']['media']['video']['config']['uri']
+
return mgid
def _real_extract(self, url):
@@ -403,18 +414,6 @@ class MTVIE(MTVServicesInfoExtractor):
'only_matching': True,
}]
- @staticmethod
- def extract_child_with_type(parent, t):
- children = parent['children']
- return next(c for c in children if c.get('type') == t)
-
- def _extract_mgid(self, webpage):
- data = self._parse_json(self._search_regex(
- r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None)
- main_container = self.extract_child_with_type(data, 'MainContainer')
- video_player = self.extract_child_with_type(main_container, 'VideoPlayer')
- return video_player['props']['media']['video']['config']['uri']
-
class MTVJapanIE(MTVServicesInfoExtractor):
IE_NAME = 'mtvjapan'
diff --git a/youtube_dlc/extractor/ninegag.py b/youtube_dlc/extractor/ninegag.py
index dc6a27d36..440f865bc 100644
--- a/youtube_dlc/extractor/ninegag.py
+++ b/youtube_dlc/extractor/ninegag.py
@@ -1,104 +1,125 @@
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
-from ..utils import str_to_int
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ try_get,
+ url_or_none,
+)
class NineGagIE(InfoExtractor):
IE_NAME = '9gag'
- _VALID_URL = r'https?://(?:www\.)?9gag(?:\.com/tv|\.tv)/(?:p|embed)/(?P<id>[a-zA-Z0-9]+)(?:/(?P<display_id>[^?#/]+))?'
+ _VALID_URL = r'https?://(?:www\.)?9gag\.com/gag/(?P<id>[^/?&#]+)'
- _TESTS = [{
- 'url': 'http://9gag.com/tv/p/Kk2X5/people-are-awesome-2013-is-absolutely-awesome',
- 'info_dict': {
- 'id': 'kXzwOKyGlSA',
- 'ext': 'mp4',
- 'description': 'This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)',
- 'title': '\"People Are Awesome 2013\" Is Absolutely Awesome',
- 'uploader_id': 'UCdEH6EjDKwtTe-sO2f0_1XA',
- 'uploader': 'CompilationChannel',
- 'upload_date': '20131110',
- 'view_count': int,
- },
- 'add_ie': ['Youtube'],
- }, {
- 'url': 'http://9gag.com/tv/p/aKolP3',
+ _TEST = {
+ 'url': 'https://9gag.com/gag/ae5Ag7B',
'info_dict': {
- 'id': 'aKolP3',
+ 'id': 'ae5Ag7B',
'ext': 'mp4',
- 'title': 'This Guy Travelled 11 countries In 44 days Just To Make This Amazing Video',
- 'description': "I just saw more in 1 minute than I've seen in 1 year. This guy's video is epic!!",
- 'uploader_id': 'rickmereki',
- 'uploader': 'Rick Mereki',
- 'upload_date': '20110803',
- 'view_count': int,
- },
- 'add_ie': ['Vimeo'],
- }, {
- 'url': 'http://9gag.com/tv/p/KklwM',
- 'only_matching': True,
- }, {
- 'url': 'http://9gag.tv/p/Kk2X5',
- 'only_matching': True,
- }, {
- 'url': 'http://9gag.com/tv/embed/a5Dmvl',
- 'only_matching': True,
- }]
-
- _EXTERNAL_VIDEO_PROVIDER = {
- '1': {
- 'url': '%s',
- 'ie_key': 'Youtube',
- },
- '2': {
- 'url': 'http://player.vimeo.com/video/%s',
- 'ie_key': 'Vimeo',
- },
- '3': {
- 'url': 'http://instagram.com/p/%s',
- 'ie_key': 'Instagram',
- },
- '4': {
- 'url': 'http://vine.co/v/%s',
- 'ie_key': 'Vine',
- },
+ 'title': 'Capybara Agility Training',
+ 'upload_date': '20191108',
+ 'timestamp': 1573237208,
+ 'categories': ['Awesome'],
+ 'tags': ['Weimaraner', 'American Pit Bull Terrier'],
+ 'duration': 44,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'comment_count': int,
+ }
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- display_id = mobj.group('display_id') or video_id
+ post_id = self._match_id(url)
+ post = self._download_json(
+ 'https://9gag.com/v1/post', post_id, query={
+ 'id': post_id
+ })['data']['post']
+
+ if post.get('type') != 'Animated':
+ raise ExtractorError(
+ 'The given url does not contain a video',
+ expected=True)
+
+ title = post['title']
+
+ duration = None
+ formats = []
+ thumbnails = []
+ for key, image in (post.get('images') or {}).items():
+ image_url = url_or_none(image.get('url'))
+ if not image_url:
+ continue
+ ext = determine_ext(image_url)
+ image_id = key.strip('image')
+ common = {
+ 'url': image_url,
+ 'width': int_or_none(image.get('width')),
+ 'height': int_or_none(image.get('height')),
+ }
+ if ext in ('jpg', 'png'):
+ webp_url = image.get('webpUrl')
+ if webp_url:
+ t = common.copy()
+ t.update({
+ 'id': image_id + '-webp',
+ 'url': webp_url,
+ })
+ thumbnails.append(t)
+ common.update({
+ 'id': image_id,
+ 'ext': ext,
+ })
+ thumbnails.append(common)
+ elif ext in ('webm', 'mp4'):
+ if not duration:
+ duration = int_or_none(image.get('duration'))
+ common['acodec'] = 'none' if image.get('hasAudio') == 0 else None
+ for vcodec in ('vp8', 'vp9', 'h265'):
+ c_url = image.get(vcodec + 'Url')
+ if not c_url:
+ continue
+ c_f = common.copy()
+ c_f.update({
+ 'format_id': image_id + '-' + vcodec,
+ 'url': c_url,
+ 'vcodec': vcodec,
+ })
+ formats.append(c_f)
+ common.update({
+ 'ext': ext,
+ 'format_id': image_id,
+ })
+ formats.append(common)
+ self._sort_formats(formats)
- webpage = self._download_webpage(url, display_id)
+ section = try_get(post, lambda x: x['postSection']['name'])
- post_view = self._parse_json(
- self._search_regex(
- r'var\s+postView\s*=\s*new\s+app\.PostView\({\s*post:\s*({.+?})\s*,\s*posts:\s*prefetchedCurrentPost',
- webpage, 'post view'),
- display_id)
+ tags = None
+ post_tags = post.get('tags')
+ if post_tags:
+ tags = []
+ for tag in post_tags:
+ tag_key = tag.get('key')
+ if not tag_key:
+ continue
+ tags.append(tag_key)
- ie_key = None
- source_url = post_view.get('sourceUrl')
- if not source_url:
- external_video_id = post_view['videoExternalId']
- external_video_provider = post_view['videoExternalProvider']
- source_url = self._EXTERNAL_VIDEO_PROVIDER[external_video_provider]['url'] % external_video_id
- ie_key = self._EXTERNAL_VIDEO_PROVIDER[external_video_provider]['ie_key']
- title = post_view['title']
- description = post_view.get('description')
- view_count = str_to_int(post_view.get('externalView'))
- thumbnail = post_view.get('thumbnail_700w') or post_view.get('ogImageUrl') or post_view.get('thumbnail_300w')
+ get_count = lambda x: int_or_none(post.get(x + 'Count'))
return {
- '_type': 'url_transparent',
- 'url': source_url,
- 'ie_key': ie_key,
- 'id': video_id,
- 'display_id': display_id,
+ 'id': post_id,
'title': title,
- 'description': description,
- 'view_count': view_count,
- 'thumbnail': thumbnail,
+ 'timestamp': int_or_none(post.get('creationTs')),
+ 'duration': duration,
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'like_count': get_count('upVote'),
+ 'dislike_count': get_count('downVote'),
+ 'comment_count': get_count('comments'),
+ 'age_limit': 18 if post.get('nsfw') == 1 else None,
+ 'categories': [section] if section else None,
+ 'tags': tags,
}
diff --git a/youtube_dlc/extractor/njpwworld.py b/youtube_dlc/extractor/njpwworld.py
index 025c5d249..3639d142f 100644
--- a/youtube_dlc/extractor/njpwworld.py
+++ b/youtube_dlc/extractor/njpwworld.py
@@ -6,30 +6,40 @@ import re
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
- extract_attributes,
get_element_by_class,
urlencode_postdata,
)
class NJPWWorldIE(InfoExtractor):
- _VALID_URL = r'https?://njpwworld\.com/p/(?P<id>[a-z0-9_]+)'
+ _VALID_URL = r'https?://(front\.)?njpwworld\.com/p/(?P<id>[a-z0-9_]+)'
IE_DESC = '新日本プロレスワールド'
_NETRC_MACHINE = 'njpwworld'
- _TEST = {
+ _TESTS = [{
'url': 'http://njpwworld.com/p/s_series_00155_1_9/',
'info_dict': {
'id': 's_series_00155_1_9',
'ext': 'mp4',
- 'title': '第9試合 ランディ・サベージ vs リック・スタイナー',
+ 'title': '闘強導夢2000 2000年1月4日 東京ドーム 第9試合 ランディ・サベージ VS リック・スタイナー',
'tags': list,
},
'params': {
'skip_download': True, # AES-encrypted m3u8
},
'skip': 'Requires login',
- }
+ }, {
+ 'url': 'https://front.njpwworld.com/p/s_series_00563_16_bs',
+ 'info_dict': {
+ 'id': 's_series_00563_16_bs',
+ 'ext': 'mp4',
+ 'title': 'WORLD TAG LEAGUE 2020 & BEST OF THE SUPER Jr.27 2020年12月6日 福岡・福岡国際センター バックステージコメント(字幕あり)',
+ 'tags': ["福岡・福岡国際センター", "バックステージコメント", "2020", "20年代"],
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
_LOGIN_URL = 'https://front.njpwworld.com/auth/login'
@@ -64,35 +74,27 @@ class NJPWWorldIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
formats = []
- for mobj in re.finditer(r'<a[^>]+\bhref=(["\'])/player.+?[^>]*>', webpage):
- player = extract_attributes(mobj.group(0))
- player_path = player.get('href')
- if not player_path:
- continue
- kind = self._search_regex(
- r'(low|high)$', player.get('class') or '', 'kind',
- default='low')
+ for kind, vid in re.findall(r'if\s+\(\s*imageQualityType\s*==\s*\'([^\']+)\'\s*\)\s*{\s*video_id\s*=\s*"(\d+)"', webpage):
+ player_path = '/intent?id=%s&type=url' % vid
player_url = compat_urlparse.urljoin(url, player_path)
- player_page = self._download_webpage(
- player_url, video_id, note='Downloading player page')
- entries = self._parse_html5_media_entries(
- player_url, player_page, video_id, m3u8_id='hls-%s' % kind,
- m3u8_entry_protocol='m3u8_native')
- kind_formats = entries[0]['formats']
- for f in kind_formats:
- f['quality'] = 2 if kind == 'high' else 1
- formats.extend(kind_formats)
+ formats.append({
+ 'url': player_url,
+ 'format_id': kind,
+ 'ext': 'mp4',
+ 'protocol': 'm3u8',
+ 'quality': 2 if kind == 'high' else 1,
+ })
self._sort_formats(formats)
- post_content = get_element_by_class('post-content', webpage)
+ tag_block = get_element_by_class('tag-block', webpage)
tags = re.findall(
- r'<li[^>]+class="tag-[^"]+"><a[^>]*>([^<]+)</a></li>', post_content
- ) if post_content else None
+ r'<a[^>]+class="tag-[^"]+"[^>]*>([^<]+)</a>', tag_block
+ ) if tag_block else None
return {
'id': video_id,
- 'title': self._og_search_title(webpage),
+ 'title': get_element_by_class('article-title', webpage) or self._og_search_title(webpage),
'formats': formats,
'tags': tags,
}
diff --git a/youtube_dlc/extractor/spike.py b/youtube_dlc/extractor/spike.py
index 4180e71ef..5805f3d44 100644
--- a/youtube_dlc/extractor/spike.py
+++ b/youtube_dlc/extractor/spike.py
@@ -20,19 +20,6 @@ class BellatorIE(MTVServicesInfoExtractor):
_FEED_URL = 'http://www.bellator.com/feeds/mrss/'
_GEO_COUNTRIES = ['US']
- def _extract_mgid(self, webpage, url):
- mgid = None
-
- if not mgid:
- mgid = self._extract_triforce_mgid(webpage)
-
- if not mgid:
- mgid = self._extract_new_triforce_mgid(webpage, url)
-
- return mgid
-
-# TODO Remove - Reason: Outdated Site
-
class ParamountNetworkIE(MTVServicesInfoExtractor):
_VALID_URL = r'https?://(?:www\.)?paramountnetwork\.com/[^/]+/[\da-z]{6}(?:[/?#&]|$)'
@@ -56,16 +43,6 @@ class ParamountNetworkIE(MTVServicesInfoExtractor):
def _get_feed_query(self, uri):
return {
'arcEp': 'paramountnetwork.com',
+ 'imageEp': 'paramountnetwork.com',
'mgid': uri,
}
-
- def _extract_mgid(self, webpage, url):
- root_data = self._parse_json(self._search_regex(
- r'window\.__DATA__\s*=\s*({.+})',
- webpage, 'data'), None)
-
- def find_sub_data(data, data_type):
- return next(c for c in data['children'] if c.get('type') == data_type)
-
- c = find_sub_data(find_sub_data(root_data, 'MainContainer'), 'VideoPlayer')
- return c['props']['media']['video']['config']['uri']
diff --git a/youtube_dlc/extractor/spotify.py b/youtube_dlc/extractor/spotify.py
new file mode 100644
index 000000000..826f98cff
--- /dev/null
+++ b/youtube_dlc/extractor/spotify.py
@@ -0,0 +1,156 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_podcast_url,
+ float_or_none,
+ int_or_none,
+ strip_or_none,
+ try_get,
+ unified_strdate,
+)
+
+
+class SpotifyBaseIE(InfoExtractor):
+ _ACCESS_TOKEN = None
+ _OPERATION_HASHES = {
+ 'Episode': '8276d4423d709ae9b68ec1b74cc047ba0f7479059a37820be730f125189ac2bf',
+ 'MinimalShow': '13ee079672fad3f858ea45a55eb109553b4fb0969ed793185b2e34cbb6ee7cc0',
+ 'ShowEpisodes': 'e0e5ce27bd7748d2c59b4d44ba245a8992a05be75d6fabc3b20753fc8857444d',
+ }
+ _VALID_URL_TEMPL = r'https?://open\.spotify\.com/%s/(?P<id>[^/?&#]+)'
+
+ def _real_initialize(self):
+ self._ACCESS_TOKEN = self._download_json(
+ 'https://open.spotify.com/get_access_token', None)['accessToken']
+
+ def _call_api(self, operation, video_id, variables):
+ return self._download_json(
+ 'https://api-partner.spotify.com/pathfinder/v1/query', video_id, query={
+ 'operationName': 'query' + operation,
+ 'variables': json.dumps(variables),
+ 'extensions': json.dumps({
+ 'persistedQuery': {
+ 'sha256Hash': self._OPERATION_HASHES[operation],
+ },
+ })
+ }, headers={'authorization': 'Bearer ' + self._ACCESS_TOKEN})['data']
+
+ def _extract_episode(self, episode, series):
+ episode_id = episode['id']
+ title = episode['name'].strip()
+
+ formats = []
+ audio_preview = episode.get('audioPreview') or {}
+ audio_preview_url = audio_preview.get('url')
+ if audio_preview_url:
+ f = {
+ 'url': audio_preview_url.replace('://p.scdn.co/mp3-preview/', '://anon-podcast.scdn.co/'),
+ 'vcodec': 'none',
+ }
+ audio_preview_format = audio_preview.get('format')
+ if audio_preview_format:
+ f['format_id'] = audio_preview_format
+ mobj = re.match(r'([0-9A-Z]{3})_(?:[A-Z]+_)?(\d+)', audio_preview_format)
+ if mobj:
+ f.update({
+ 'abr': int(mobj.group(2)),
+ 'ext': mobj.group(1).lower(),
+ })
+ formats.append(f)
+
+ for item in (try_get(episode, lambda x: x['audio']['items']) or []):
+ item_url = item.get('url')
+ if not (item_url and item.get('externallyHosted')):
+ continue
+ formats.append({
+ 'url': clean_podcast_url(item_url),
+ 'vcodec': 'none',
+ })
+
+ thumbnails = []
+ for source in (try_get(episode, lambda x: x['coverArt']['sources']) or []):
+ source_url = source.get('url')
+ if not source_url:
+ continue
+ thumbnails.append({
+ 'url': source_url,
+ 'width': int_or_none(source.get('width')),
+ 'height': int_or_none(source.get('height')),
+ })
+
+ return {
+ 'id': episode_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'description': strip_or_none(episode.get('description')),
+ 'duration': float_or_none(try_get(
+ episode, lambda x: x['duration']['totalMilliseconds']), 1000),
+ 'release_date': unified_strdate(try_get(
+ episode, lambda x: x['releaseDate']['isoString'])),
+ 'series': series,
+ }
+
+
+class SpotifyIE(SpotifyBaseIE):
+ IE_NAME = 'spotify'
+ _VALID_URL = SpotifyBaseIE._VALID_URL_TEMPL % 'episode'
+ _TEST = {
+ 'url': 'https://open.spotify.com/episode/4Z7GAJ50bgctf6uclHlWKo',
+ 'md5': '74010a1e3fa4d9e1ab3aa7ad14e42d3b',
+ 'info_dict': {
+ 'id': '4Z7GAJ50bgctf6uclHlWKo',
+ 'ext': 'mp3',
+ 'title': 'From the archive: Why time management is ruining our lives',
+ 'description': 'md5:b120d9c4ff4135b42aa9b6d9cde86935',
+ 'duration': 2083.605,
+ 'release_date': '20201217',
+ 'series': "The Guardian's Audio Long Reads",
+ }
+ }
+
+ def _real_extract(self, url):
+ episode_id = self._match_id(url)
+ episode = self._call_api('Episode', episode_id, {
+ 'uri': 'spotify:episode:' + episode_id
+ })['episode']
+ return self._extract_episode(
+ episode, try_get(episode, lambda x: x['podcast']['name']))
+
+
+class SpotifyShowIE(SpotifyBaseIE):
+ IE_NAME = 'spotify:show'
+ _VALID_URL = SpotifyBaseIE._VALID_URL_TEMPL % 'show'
+ _TEST = {
+ 'url': 'https://open.spotify.com/show/4PM9Ke6l66IRNpottHKV9M',
+ 'info_dict': {
+ 'id': '4PM9Ke6l66IRNpottHKV9M',
+ 'title': 'The Story from the Guardian',
+ 'description': 'The Story podcast is dedicated to our finest audio documentaries, investigations and long form stories',
+ },
+ 'playlist_mincount': 36,
+ }
+
+ def _real_extract(self, url):
+ show_id = self._match_id(url)
+ podcast = self._call_api('ShowEpisodes', show_id, {
+ 'limit': 1000000000,
+ 'offset': 0,
+ 'uri': 'spotify:show:' + show_id,
+ })['podcast']
+ podcast_name = podcast.get('name')
+
+ entries = []
+ for item in (try_get(podcast, lambda x: x['episodes']['items']) or []):
+ episode = item.get('episode')
+ if not episode:
+ continue
+ entries.append(self._extract_episode(episode, podcast_name))
+
+ return self.playlist_result(
+ entries, show_id, podcast_name, podcast.get('description'))
diff --git a/youtube_dlc/extractor/trovo.py b/youtube_dlc/extractor/trovo.py
new file mode 100644
index 000000000..43745213d
--- /dev/null
+++ b/youtube_dlc/extractor/trovo.py
@@ -0,0 +1,193 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ str_or_none,
+ try_get,
+)
+
+
+class TrovoBaseIE(InfoExtractor):
+ _VALID_URL_BASE = r'https?://(?:www\.)?trovo\.live/'
+
+ def _extract_streamer_info(self, data):
+ streamer_info = data.get('streamerInfo') or {}
+ username = streamer_info.get('userName')
+ return {
+ 'uploader': streamer_info.get('nickName'),
+ 'uploader_id': str_or_none(streamer_info.get('uid')),
+ 'uploader_url': 'https://trovo.live/' + username if username else None,
+ }
+
+
+class TrovoIE(TrovoBaseIE):
+ _VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?!(?:clip|video)/)(?P<id>[^/?&#]+)'
+
+ def _real_extract(self, url):
+ username = self._match_id(url)
+ live_info = self._download_json(
+ 'https://gql.trovo.live/', username, query={
+ 'query': '''{
+ getLiveInfo(params: {userName: "%s"}) {
+ isLive
+ programInfo {
+ coverUrl
+ id
+ streamInfo {
+ desc
+ playUrl
+ }
+ title
+ }
+ streamerInfo {
+ nickName
+ uid
+ userName
+ }
+ }
+}''' % username,
+ })['data']['getLiveInfo']
+ if live_info.get('isLive') == 0:
+ raise ExtractorError('%s is offline' % username, expected=True)
+ program_info = live_info['programInfo']
+ program_id = program_info['id']
+ title = self._live_title(program_info['title'])
+
+ formats = []
+ for stream_info in (program_info.get('streamInfo') or []):
+ play_url = stream_info.get('playUrl')
+ if not play_url:
+ continue
+ format_id = stream_info.get('desc')
+ formats.append({
+ 'format_id': format_id,
+ 'height': int_or_none(format_id[:-1]) if format_id else None,
+ 'url': play_url,
+ })
+ self._sort_formats(formats)
+
+ info = {
+ 'id': program_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': program_info.get('coverUrl'),
+ 'is_live': True,
+ }
+ info.update(self._extract_streamer_info(live_info))
+ return info
+
+
+class TrovoVodIE(TrovoBaseIE):
+ _VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?:clip|video)/(?P<id>[^/?&#]+)'
+ _TESTS = [{
+ 'url': 'https://trovo.live/video/ltv-100095501_100095501_1609596043',
+ 'info_dict': {
+ 'id': 'ltv-100095501_100095501_1609596043',
+ 'ext': 'mp4',
+ 'title': 'Spontaner 12 Stunden Stream! - Ok Boomer!',
+ 'uploader': 'Exsl',
+ 'timestamp': 1609640305,
+ 'upload_date': '20210103',
+ 'uploader_id': '100095501',
+ 'duration': 43977,
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'comments': 'mincount:8',
+ 'categories': ['Grand Theft Auto V'],
+ },
+ }, {
+ 'url': 'https://trovo.live/clip/lc-5285890810184026005',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ vid = self._match_id(url)
+ resp = self._download_json(
+ 'https://gql.trovo.live/', vid, data=json.dumps([{
+ 'query': '''{
+ batchGetVodDetailInfo(params: {vids: ["%s"]}) {
+ VodDetailInfos
+ }
+}''' % vid,
+ }, {
+ 'query': '''{
+ getCommentList(params: {appInfo: {postID: "%s"}, pageSize: 1000000000, preview: {}}) {
+ commentList {
+ author {
+ nickName
+ uid
+ }
+ commentID
+ content
+ createdAt
+ parentID
+ }
+ }
+}''' % vid,
+ }]).encode(), headers={
+ 'Content-Type': 'application/json',
+ })
+ vod_detail_info = resp[0]['data']['batchGetVodDetailInfo']['VodDetailInfos'][vid]
+ vod_info = vod_detail_info['vodInfo']
+ title = vod_info['title']
+
+ language = vod_info.get('languageName')
+ formats = []
+ for play_info in (vod_info.get('playInfos') or []):
+ play_url = play_info.get('playUrl')
+ if not play_url:
+ continue
+ format_id = play_info.get('desc')
+ formats.append({
+ 'ext': 'mp4',
+ 'filesize': int_or_none(play_info.get('fileSize')),
+ 'format_id': format_id,
+ 'height': int_or_none(format_id[:-1]) if format_id else None,
+ 'language': language,
+ 'protocol': 'm3u8_native',
+ 'tbr': int_or_none(play_info.get('bitrate')),
+ 'url': play_url,
+ })
+ self._sort_formats(formats)
+
+ category = vod_info.get('categoryName')
+ get_count = lambda x: int_or_none(vod_info.get(x + 'Num'))
+
+ comment_list = try_get(resp, lambda x: x[1]['data']['getCommentList']['commentList'], list) or []
+ comments = []
+ for comment in comment_list:
+ content = comment.get('content')
+ if not content:
+ continue
+ author = comment.get('author') or {}
+ parent = comment.get('parentID')
+ comments.append({
+ 'author': author.get('nickName'),
+ 'author_id': str_or_none(author.get('uid')),
+ 'id': str_or_none(comment.get('commentID')),
+ 'text': content,
+ 'timestamp': int_or_none(comment.get('createdAt')),
+ 'parent': 'root' if parent == 0 else str_or_none(parent),
+ })
+
+ info = {
+ 'id': vid,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': vod_info.get('coverUrl'),
+ 'timestamp': int_or_none(vod_info.get('publishTs')),
+ 'duration': int_or_none(vod_info.get('duration')),
+ 'view_count': get_count('watch'),
+ 'like_count': get_count('like'),
+ 'comment_count': get_count('comment'),
+ 'comments': comments,
+ 'categories': [category] if category else None,
+ }
+ info.update(self._extract_streamer_info(vod_detail_info))
+ return info
diff --git a/youtube_dlc/extractor/wat.py b/youtube_dlc/extractor/wat.py
index 8ef3e0906..f6940b371 100644
--- a/youtube_dlc/extractor/wat.py
+++ b/youtube_dlc/extractor/wat.py
@@ -1,12 +1,9 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
- ExtractorError,
unified_strdate,
HEADRequest,
int_or_none,
@@ -46,15 +43,6 @@ class WatIE(InfoExtractor):
},
]
- _FORMATS = (
- (200, 416, 234),
- (400, 480, 270),
- (600, 640, 360),
- (1200, 640, 360),
- (1800, 960, 540),
- (2500, 1280, 720),
- )
-
def _real_extract(self, url):
video_id = self._match_id(url)
video_id = video_id if video_id.isdigit() and len(video_id) > 6 else compat_str(int(video_id, 36))
@@ -97,46 +85,20 @@ class WatIE(InfoExtractor):
return red_url
return None
- def remove_bitrate_limit(manifest_url):
- return re.sub(r'(?:max|min)_bitrate=\d+&?', '', manifest_url)
-
formats = []
- try:
- alt_urls = lambda manifest_url: [re.sub(r'(?:wdv|ssm)?\.ism/', repl + '.ism/', manifest_url) for repl in ('', 'ssm')]
- manifest_urls = self._download_json(
- 'http://www.wat.tv/get/webhtml/' + video_id, video_id)
- m3u8_url = manifest_urls.get('hls')
- if m3u8_url:
- m3u8_url = remove_bitrate_limit(m3u8_url)
- for m3u8_alt_url in alt_urls(m3u8_url):
- formats.extend(self._extract_m3u8_formats(
- m3u8_alt_url, video_id, 'mp4',
- 'm3u8_native', m3u8_id='hls', fatal=False))
- formats.extend(self._extract_f4m_formats(
- m3u8_alt_url.replace('ios', 'web').replace('.m3u8', '.f4m'),
- video_id, f4m_id='hds', fatal=False))
- mpd_url = manifest_urls.get('mpd')
- if mpd_url:
- mpd_url = remove_bitrate_limit(mpd_url)
- for mpd_alt_url in alt_urls(mpd_url):
- formats.extend(self._extract_mpd_formats(
- mpd_alt_url, video_id, mpd_id='dash', fatal=False))
- self._sort_formats(formats)
- except ExtractorError:
- abr = 64
- for vbr, width, height in self._FORMATS:
- tbr = vbr + abr
- format_id = 'http-%s' % tbr
- fmt_url = 'http://dnl.adv.tf1.fr/2/USP-0x0/%s/%s/%s/ssm/%s-%s-64k.mp4' % (video_id[-4:-2], video_id[-2:], video_id, video_id, vbr)
- if self._is_valid_url(fmt_url, video_id, format_id):
- formats.append({
- 'format_id': format_id,
- 'url': fmt_url,
- 'vbr': vbr,
- 'abr': abr,
- 'width': width,
- 'height': height,
- })
+ manifest_urls = self._download_json(
+ 'http://www.wat.tv/get/webhtml/' + video_id, video_id)
+ m3u8_url = manifest_urls.get('hls')
+ if m3u8_url:
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False))
+ mpd_url = manifest_urls.get('mpd')
+ if mpd_url:
+ formats.extend(self._extract_mpd_formats(
+ mpd_url.replace('://das-q1.tf1.fr/', '://das-q1-ssl.tf1.fr/'),
+ video_id, mpd_id='dash', fatal=False))
+ self._sort_formats(formats)
date_diffusion = first_chapter.get('date_diffusion') or video_data.get('configv4', {}).get('estatS4')
upload_date = unified_strdate(date_diffusion) if date_diffusion else None
diff --git a/youtube_dlc/extractor/yahoo.py b/youtube_dlc/extractor/yahoo.py
index e4615376c..a17b10d6e 100644
--- a/youtube_dlc/extractor/yahoo.py
+++ b/youtube_dlc/extractor/yahoo.py
@@ -177,46 +177,9 @@ class YahooIE(InfoExtractor):
'only_matching': True,
}]
- def _real_extract(self, url):
- url, country, display_id = re.match(self._VALID_URL, url).groups()
- if not country:
- country = 'us'
- else:
- country = country.split('-')[0]
- api_base = 'https://%s.yahoo.com/_td/api/resource/' % country
-
- for i, uuid in enumerate(['url=' + url, 'ymedia-alias=' + display_id]):
- content = self._download_json(
- api_base + 'content;getDetailView=true;uuids=["%s"]' % uuid,
- display_id, 'Downloading content JSON metadata', fatal=i == 1)
- if content:
- item = content['items'][0]
- break
-
- if item.get('type') != 'video':
- entries = []
-
- cover = item.get('cover') or {}
- if cover.get('type') == 'yvideo':
- cover_url = cover.get('url')
- if cover_url:
- entries.append(self.url_result(
- cover_url, 'Yahoo', cover.get('uuid')))
-
- for e in item.get('body', []):
- if e.get('type') == 'videoIframe':
- iframe_url = e.get('url')
- if not iframe_url:
- continue
- entries.append(self.url_result(iframe_url))
-
- return self.playlist_result(
- entries, item.get('uuid'),
- item.get('title'), item.get('summary'))
-
- video_id = item['uuid']
+ def _extract_yahoo_video(self, video_id, country):
video = self._download_json(
- api_base + 'VideoService.videos;view=full;video_ids=["%s"]' % video_id,
+ 'https://%s.yahoo.com/_td/api/resource/VideoService.videos;view=full;video_ids=["%s"]' % (country, video_id),
video_id, 'Downloading video JSON metadata')[0]
title = video['title']
@@ -298,7 +261,6 @@ class YahooIE(InfoExtractor):
'id': video_id,
'title': self._live_title(title) if is_live else title,
'formats': formats,
- 'display_id': display_id,
'thumbnails': thumbnails,
'description': clean_html(video.get('description')),
'timestamp': parse_iso8601(video.get('publish_time')),
@@ -311,6 +273,44 @@ class YahooIE(InfoExtractor):
'episode_number': int_or_none(series_info.get('episode_number')),
}
+ def _real_extract(self, url):
+ url, country, display_id = re.match(self._VALID_URL, url).groups()
+ if not country:
+ country = 'us'
+ else:
+ country = country.split('-')[0]
+
+ item = self._download_json(
+ 'https://%s.yahoo.com/caas/content/article' % country, display_id,
+ 'Downloading content JSON metadata', query={
+ 'url': url
+ })['items'][0]['data']['partnerData']
+
+ if item.get('type') != 'video':
+ entries = []
+
+ cover = item.get('cover') or {}
+ if cover.get('type') == 'yvideo':
+ cover_url = cover.get('url')
+ if cover_url:
+ entries.append(self.url_result(
+ cover_url, 'Yahoo', cover.get('uuid')))
+
+ for e in (item.get('body') or []):
+ if e.get('type') == 'videoIframe':
+ iframe_url = e.get('url')
+ if not iframe_url:
+ continue
+ entries.append(self.url_result(iframe_url))
+
+ return self.playlist_result(
+ entries, item.get('uuid'),
+ item.get('title'), item.get('summary'))
+
+ info = self._extract_yahoo_video(item['uuid'], country)
+ info['display_id'] = display_id
+ return info
+
class YahooSearchIE(SearchInfoExtractor):
IE_DESC = 'Yahoo screen search'