aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSipherdrakon <64430430+Sipherdrakon@users.noreply.github.com>2021-11-24 03:01:49 -0500
committerGitHub <noreply@github.com>2021-11-24 13:31:49 +0530
commitfec41d17a587ff18f375c9ec96ee8bc748b57236 (patch)
tree8d027c6d77271ebf7d1edcdfa8fdce13fb208238
parenta61fd4cf6fa23b05729396ae342a5fe9785c231f (diff)
downloadhypervideo-pre-fec41d17a587ff18f375c9ec96ee8bc748b57236.tar.lz
hypervideo-pre-fec41d17a587ff18f375c9ec96ee8bc748b57236.tar.xz
hypervideo-pre-fec41d17a587ff18f375c9ec96ee8bc748b57236.zip
[MTV] Improve mgid extraction (#1713)
Original PR: https://github.com/ytdl-org/youtube-dl/pull/30149 Fixes: #713, #1580, https://github.com/ytdl-org/youtube-dl/issues/30139 Authored by: Sipherdrakon, kikuyan
-rw-r--r--yt_dlp/extractor/mtv.py20
-rw-r--r--yt_dlp/extractor/southpark.py17
2 files changed, 19 insertions, 18 deletions
diff --git a/yt_dlp/extractor/mtv.py b/yt_dlp/extractor/mtv.py
index 4812f11cc..be5de0a70 100644
--- a/yt_dlp/extractor/mtv.py
+++ b/yt_dlp/extractor/mtv.py
@@ -307,20 +307,22 @@ class MTVServicesInfoExtractor(InfoExtractor):
mgid = self._extract_triforce_mgid(webpage)
if not mgid:
- mgid = self._search_regex(
- r'"videoConfig":{"videoId":"(mgid:.*?)"', webpage, 'mgid', default=None)
-
- if not mgid:
- mgid = self._search_regex(
- r'"media":{"video":{"config":{"uri":"(mgid:.*?)"', webpage, 'mgid', default=None)
-
- if not mgid:
data = self._parse_json(self._search_regex(
r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None)
main_container = self._extract_child_with_type(data, 'MainContainer')
ab_testing = self._extract_child_with_type(main_container, 'ABTesting')
video_player = self._extract_child_with_type(ab_testing or main_container, 'VideoPlayer')
- mgid = video_player['props']['media']['video']['config']['uri']
+ if video_player:
+ mgid = try_get(video_player, lambda x: x['props']['media']['video']['config']['uri'])
+ else:
+ flex_wrapper = self._extract_child_with_type(ab_testing or main_container, 'FlexWrapper')
+ auth_suite_wrapper = self._extract_child_with_type(flex_wrapper, 'AuthSuiteWrapper')
+ player = self._extract_child_with_type(auth_suite_wrapper or flex_wrapper, 'Player')
+ if player:
+ mgid = try_get(player, lambda x: x['props']['videoDetail']['mgid'])
+
+ if not mgid:
+ raise ExtractorError('Could not extract mgid')
return mgid
diff --git a/yt_dlp/extractor/southpark.py b/yt_dlp/extractor/southpark.py
index d49749467..942a52dcf 100644
--- a/yt_dlp/extractor/southpark.py
+++ b/yt_dlp/extractor/southpark.py
@@ -6,19 +6,18 @@ from .mtv import MTVServicesInfoExtractor
class SouthParkIE(MTVServicesInfoExtractor):
IE_NAME = 'southpark.cc.com'
- _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark(?:\.cc|studios)\.com/(?:clips|(?:full-)?episodes|collections)/(?P<id>.+?)(\?|#|$))'
+ _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark(?:\.cc|studios)\.com/((?:video-)?clips|(?:full-)?episodes|collections)/(?P<id>.+?)(\?|#|$))'
_FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
_TESTS = [{
- 'url': 'http://southpark.cc.com/clips/104437/bat-daded#tab=featured',
+ 'url': 'https://southpark.cc.com/video-clips/d7wr06/south-park-you-all-agreed-to-counseling',
'info_dict': {
- 'id': 'a7bff6c2-ed00-11e0-aca6-0026b9414f30',
'ext': 'mp4',
- 'title': 'South Park|Bat Daded',
- 'description': 'Randy disqualifies South Park by getting into a fight with Bat Dad.',
- 'timestamp': 1112760000,
- 'upload_date': '20050406',
+ 'title': 'You All Agreed to Counseling',
+ 'description': 'Kenny, Cartman, Stan, and Kyle visit Mr. Mackey and ask for his help getting Mrs. Nelson to come back. Mr. Mackey reveals the only way to get things back to normal is to get the teachers vaccinated.',
+ 'timestamp': 1615352400,
+ 'upload_date': '20210310',
},
}, {
'url': 'http://southpark.cc.com/collections/7758/fan-favorites/1',
@@ -40,11 +39,11 @@ class SouthParkIE(MTVServicesInfoExtractor):
class SouthParkEsIE(SouthParkIE):
IE_NAME = 'southpark.cc.com:espaƱol'
- _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/episodios-en-espanol/(?P<id>.+?)(\?|#|$))'
+ _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/es/episodios/(?P<id>.+?)(\?|#|$))'
_LANG = 'es'
_TESTS = [{
- 'url': 'http://southpark.cc.com/episodios-en-espanol/s01e01-cartman-consigue-una-sonda-anal#source=351c1323-0b96-402d-a8b9-40d01b2e9bde&position=1&sort=!airdate',
+ 'url': 'http://southpark.cc.com/es/episodios/s01e01-cartman-consigue-una-sonda-anal#source=351c1323-0b96-402d-a8b9-40d01b2e9bde&position=1&sort=!airdate',
'info_dict': {
'title': 'Cartman Consigue Una Sonda Anal',
'description': 'Cartman Consigue Una Sonda Anal',