aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTom-Oliver Heidel <github@tom-oliver.eu>2020-11-30 02:50:39 +0100
committerGitHub <noreply@github.com>2020-11-30 02:50:39 +0100
commit929576bb9e4aa31f0516f1437d2ae762afdd9f2c (patch)
tree78fa7b09f90eae4d6eb064fa854ee63d4b597c0d
parent7dde463e86e03570b36fb59590b59f62bf9af2fa (diff)
parenta1d6041497c50d59c6d275125d21cd3b613f6a1c (diff)
downloadhypervideo-pre-929576bb9e4aa31f0516f1437d2ae762afdd9f2c.tar.lz
hypervideo-pre-929576bb9e4aa31f0516f1437d2ae762afdd9f2c.tar.xz
hypervideo-pre-929576bb9e4aa31f0516f1437d2ae762afdd9f2c.zip
Merge pull request #126 from nao20010128nao/master
[instagram] Fix extractor
-rw-r--r--youtube_dlc/extractor/instagram.py23
1 files changed, 15 insertions, 8 deletions
diff --git a/youtube_dlc/extractor/instagram.py b/youtube_dlc/extractor/instagram.py
index b061850a1..c3eba0114 100644
--- a/youtube_dlc/extractor/instagram.py
+++ b/youtube_dlc/extractor/instagram.py
@@ -126,16 +126,23 @@ class InstagramIE(InfoExtractor):
uploader_id, like_count, comment_count, comments, height,
width) = [None] * 11
- shared_data = self._parse_json(
- self._search_regex(
- r'window\._sharedData\s*=\s*({.+?});',
- webpage, 'shared data', default='{}'),
- video_id, fatal=False)
+ shared_data = try_get(webpage,
+ (lambda x: self._parse_json(
+ self._search_regex(
+ r'window\.__additionalDataLoaded\(\'/(?:p|tv)/(?:[^/?#&]+)/\',({.+?})\);',
+ x, 'additional data', default='{}'),
+ video_id, fatal=False),
+ lambda x: self._parse_json(
+ self._search_regex(
+ r'window\._sharedData\s*=\s*({.+?});',
+ x, 'shared data', default='{}'),
+ video_id, fatal=False)['entry_data']['PostPage'][0]),
+ None)
if shared_data:
media = try_get(
shared_data,
- (lambda x: x['entry_data']['PostPage'][0]['graphql']['shortcode_media'],
- lambda x: x['entry_data']['PostPage'][0]['media']),
+ (lambda x: x['graphql']['shortcode_media'],
+ lambda x: x['media']),
dict)
if media:
video_url = media.get('video_url')
@@ -144,7 +151,7 @@ class InstagramIE(InfoExtractor):
description = try_get(
media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'],
compat_str) or media.get('caption')
- thumbnail = media.get('display_src')
+ thumbnail = media.get('display_src') or media.get('thumbnail_src')
timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date'))
uploader = media.get('owner', {}).get('full_name')
uploader_id = media.get('owner', {}).get('username')