aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoru-spec-png <54671367+u-spec-png@users.noreply.github.com>2021-12-28 18:58:06 +0000
committerGitHub <noreply@github.com>2021-12-29 00:28:06 +0530
commitdd5e60b15dd720727a0242eec2bd6698cb03b8f2 (patch)
tree8762880e9a11110483a6e8fc14f86438b2ed781b
parente540c56f397b839b826b430a0034e651027ad07c (diff)
downloadhypervideo-pre-dd5e60b15dd720727a0242eec2bd6698cb03b8f2.tar.lz
hypervideo-pre-dd5e60b15dd720727a0242eec2bd6698cb03b8f2.tar.xz
hypervideo-pre-dd5e60b15dd720727a0242eec2bd6698cb03b8f2.zip
[Instagram] Add story/highlight extractor (#2006)
Fixes https://github.com/ytdl-org/youtube-dl/issues/25575 Authored by: u-spec-png
-rw-r--r--yt_dlp/extractor/extractors.py1
-rw-r--r--yt_dlp/extractor/instagram.py74
2 files changed, 75 insertions, 0 deletions
diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py
index fd5c86afe..e15b5a4e5 100644
--- a/yt_dlp/extractor/extractors.py
+++ b/yt_dlp/extractor/extractors.py
@@ -621,6 +621,7 @@ from .instagram import (
InstagramIOSIE,
InstagramUserIE,
InstagramTagIE,
+ InstagramStoryIE,
)
from .internazionale import InternazionaleIE
from .internetvideoarchive import InternetVideoArchiveIE
diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py
index 0dd4aa54a..ab14e5b0a 100644
--- a/yt_dlp/extractor/instagram.py
+++ b/yt_dlp/extractor/instagram.py
@@ -542,3 +542,77 @@ class InstagramTagIE(InstagramPlaylistBaseIE):
'tag_name':
data['entry_data']['TagPage'][0]['graphql']['hashtag']['name']
}
+
+
+class InstagramStoryIE(InstagramBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?instagram\.com/stories/(?P<user>[^/]+)/(?P<id>\d+)'
+ IE_NAME = 'instagram:story'
+
+ _TESTS = [{
+ 'url': 'https://www.instagram.com/stories/highlights/18090946048123978/',
+ 'info_dict': {
+ 'id': '18090946048123978',
+ 'title': 'Rare',
+ },
+ 'playlist_mincount': 50
+ }]
+
+ def _real_extract(self, url):
+ username, story_id = self._match_valid_url(url).groups()
+
+ story_info_url = f'{username}/{story_id}/?__a=1' if username == 'highlights' else f'{username}/?__a=1'
+ story_info = self._download_json(f'https://www.instagram.com/stories/{story_info_url}', story_id, headers={
+ 'X-IG-App-ID': 936619743392459,
+ 'X-ASBD-ID': 198387,
+ 'X-IG-WWW-Claim': 0,
+ 'X-Requested-With': 'XMLHttpRequest',
+ 'Referer': url,
+ })
+ user_id = story_info['user']['id']
+ highlight_title = traverse_obj(story_info, ('highlight', 'title'))
+
+ story_info_url = user_id if username != 'highlights' else f'highlight:{story_id}'
+ videos = self._download_json(f'https://i.instagram.com/api/v1/feed/reels_media/?reel_ids={story_info_url}', story_id, headers={
+ 'X-IG-App-ID': 936619743392459,
+ 'X-ASBD-ID': 198387,
+ 'X-IG-WWW-Claim': 0,
+ })['reels']
+ entites = []
+
+ videos = traverse_obj(videos, (f'highlight:{story_id}', 'items'), (str(user_id), 'items'))
+ for video_info in videos:
+ formats = []
+ if isinstance(video_info, list):
+ video_info = video_info[0]
+ vcodec = video_info.get('video_codec')
+ dash_manifest_raw = video_info.get('video_dash_manifest')
+ videos_list = video_info.get('video_versions')
+ if not (dash_manifest_raw or videos_list):
+ continue
+ for format in videos_list:
+ formats.append({
+ 'url': format.get('url'),
+ 'width': format.get('width'),
+ 'height': format.get('height'),
+ 'vcodec': vcodec,
+ })
+ if dash_manifest_raw:
+ formats.extend(self._parse_mpd_formats(self._parse_xml(dash_manifest_raw, story_id), mpd_id='dash'))
+ self._sort_formats(formats)
+ thumbnails = [{
+ 'url': thumbnail.get('url'),
+ 'width': thumbnail.get('width'),
+ 'height': thumbnail.get('height')
+ } for thumbnail in traverse_obj(video_info, ('image_versions2', 'candidates')) or []]
+ entites.append({
+ 'id': video_info.get('id'),
+ 'title': f'Story by {username}',
+ 'timestamp': int_or_none(video_info.get('taken_at')),
+ 'uploader': traverse_obj(videos, ('user', 'full_name')),
+ 'duration': float_or_none(video_info.get('video_duration')),
+ 'uploader_id': user_id,
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ })
+
+ return self.playlist_result(entites, playlist_id=story_id, playlist_title=highlight_title)