diff options
author | Tom-Oliver Heidel <github@tom-oliver.eu> | 2020-11-30 02:50:39 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-11-30 02:50:39 +0100 |
commit | 929576bb9e4aa31f0516f1437d2ae762afdd9f2c (patch) | |
tree | 78fa7b09f90eae4d6eb064fa854ee63d4b597c0d | |
parent | 7dde463e86e03570b36fb59590b59f62bf9af2fa (diff) | |
parent | a1d6041497c50d59c6d275125d21cd3b613f6a1c (diff) | |
download | hypervideo-pre-929576bb9e4aa31f0516f1437d2ae762afdd9f2c.tar.lz hypervideo-pre-929576bb9e4aa31f0516f1437d2ae762afdd9f2c.tar.xz hypervideo-pre-929576bb9e4aa31f0516f1437d2ae762afdd9f2c.zip |
Merge pull request #126 from nao20010128nao/master
[instagram] Fix extractor
-rw-r--r-- | youtube_dlc/extractor/instagram.py | 23 |
1 files changed, 15 insertions, 8 deletions
diff --git a/youtube_dlc/extractor/instagram.py b/youtube_dlc/extractor/instagram.py index b061850a1..c3eba0114 100644 --- a/youtube_dlc/extractor/instagram.py +++ b/youtube_dlc/extractor/instagram.py @@ -126,16 +126,23 @@ class InstagramIE(InfoExtractor): uploader_id, like_count, comment_count, comments, height, width) = [None] * 11 - shared_data = self._parse_json( - self._search_regex( - r'window\._sharedData\s*=\s*({.+?});', - webpage, 'shared data', default='{}'), - video_id, fatal=False) + shared_data = try_get(webpage, + (lambda x: self._parse_json( + self._search_regex( + r'window\.__additionalDataLoaded\(\'/(?:p|tv)/(?:[^/?#&]+)/\',({.+?})\);', + x, 'additional data', default='{}'), + video_id, fatal=False), + lambda x: self._parse_json( + self._search_regex( + r'window\._sharedData\s*=\s*({.+?});', + x, 'shared data', default='{}'), + video_id, fatal=False)['entry_data']['PostPage'][0]), + None) if shared_data: media = try_get( shared_data, - (lambda x: x['entry_data']['PostPage'][0]['graphql']['shortcode_media'], - lambda x: x['entry_data']['PostPage'][0]['media']), + (lambda x: x['graphql']['shortcode_media'], + lambda x: x['media']), dict) if media: video_url = media.get('video_url') @@ -144,7 +151,7 @@ class InstagramIE(InfoExtractor): description = try_get( media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'], compat_str) or media.get('caption') - thumbnail = media.get('display_src') + thumbnail = media.get('display_src') or media.get('thumbnail_src') timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date')) uploader = media.get('owner', {}).get('full_name') uploader_id = media.get('owner', {}).get('username') |