aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHobbyistDev <105957301+HobbyistDev@users.noreply.github.com>2022-10-25 19:30:03 +0900
committerGitHub <noreply@github.com>2022-10-25 16:00:03 +0530
commite091fb92dab691be2ba54644e2dc6125a3a6a7cd (patch)
tree112e2ba6e235e8789ee43f39e7e028b6072588ab
parentc9bd65185c0b3b490d0353e139d5484c93bd9774 (diff)
downloadhypervideo-pre-e091fb92dab691be2ba54644e2dc6125a3a6a7cd.tar.lz
hypervideo-pre-e091fb92dab691be2ba54644e2dc6125a3a6a7cd.tar.xz
hypervideo-pre-e091fb92dab691be2ba54644e2dc6125a3a6a7cd.zip
[extractor/mlb] Add `MLBArticle` extractor (#4832)
Closes #3475 Authored by: HobbyistDev
-rw-r--r--yt_dlp/extractor/_extractors.py1
-rw-r--r--yt_dlp/extractor/mlb.py33
2 files changed, 34 insertions, 0 deletions
diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 2b35cc964..0e1fec152 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -1003,6 +1003,7 @@ from .mlb import (
MLBIE,
MLBVideoIE,
MLBTVIE,
+ MLBArticleIE,
)
from .mlssoccer import MLSSoccerIE
from .mnet import MnetIE
diff --git a/yt_dlp/extractor/mlb.py b/yt_dlp/extractor/mlb.py
index 5e1b28105..2f0f2deab 100644
--- a/yt_dlp/extractor/mlb.py
+++ b/yt_dlp/extractor/mlb.py
@@ -348,3 +348,36 @@ class MLBTVIE(InfoExtractor):
'subtitles': subtitles,
'http_headers': {'Authorization': f'Bearer {self._access_token}'},
}
+
+
+class MLBArticleIE(InfoExtractor):
+ _VALID_URL = r'https?://www\.mlb\.com/news/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://www.mlb.com/news/manny-machado-robs-guillermo-heredia-reacts',
+ 'info_dict': {
+ 'id': '36db7394-343c-4ea3-b8ca-ead2e61bca9a',
+ 'title': 'Machado\'s grab draws hilarious irate reaction',
+ 'modified_timestamp': 1650130737,
+ 'description': 'md5:a19d4eb0487b2cb304e9a176f6b67676',
+ 'modified_date': '20220416',
+ },
+ 'playlist_count': 2,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ apollo_cache_json = self._search_json(r'window\.initState\s*=', webpage, 'window.initState', display_id)['apolloCache']
+
+ content_data_id = traverse_obj(
+ apollo_cache_json, ('ROOT_QUERY', lambda k, _: k.startswith('getForgeContent'), 'id'), get_all=False)
+
+ content_real_info = apollo_cache_json[content_data_id]
+
+ return self.playlist_from_matches(
+ traverse_obj(content_real_info, ('parts', lambda _, v: v['typename'] == 'Video', 'id')),
+ getter=lambda x: f'https://www.mlb.com/video/{apollo_cache_json[x]["slug"]}',
+ ie=MLBVideoIE, playlist_id=content_real_info.get('_translationId'),
+ title=self._html_search_meta('og:title', webpage),
+ description=content_real_info.get('summary'),
+ modified_timestamp=parse_iso8601(content_real_info.get('lastUpdatedDate')))