diff options
Diffstat (limited to 'hypervideo_dl/extractor/twitter.py')
-rw-r--r-- | hypervideo_dl/extractor/twitter.py | 46 |
1 files changed, 37 insertions, 9 deletions
diff --git a/hypervideo_dl/extractor/twitter.py b/hypervideo_dl/extractor/twitter.py index cfa7a73..485b781 100644 --- a/hypervideo_dl/extractor/twitter.py +++ b/hypervideo_dl/extractor/twitter.py @@ -37,9 +37,9 @@ class TwitterBaseIE(InfoExtractor): def _extract_variant_formats(self, variant, video_id): variant_url = variant.get('url') if not variant_url: - return [] + return [], {} elif '.m3u8' in variant_url: - return self._extract_m3u8_formats( + return self._extract_m3u8_formats_and_subtitles( variant_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) else: @@ -50,7 +50,7 @@ class TwitterBaseIE(InfoExtractor): 'tbr': tbr, } self._search_dimensions_in_video_url(f, variant_url) - return [f] + return [f], {} def _extract_formats_from_vmap_url(self, vmap_url, video_id): vmap_url = url_or_none(vmap_url) @@ -58,17 +58,22 @@ class TwitterBaseIE(InfoExtractor): return [] vmap_data = self._download_xml(vmap_url, video_id) formats = [] + subtitles = {} urls = [] for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'): video_variant.attrib['url'] = compat_urllib_parse_unquote( video_variant.attrib['url']) urls.append(video_variant.attrib['url']) - formats.extend(self._extract_variant_formats( - video_variant.attrib, video_id)) + fmts, subs = self._extract_variant_formats( + video_variant.attrib, video_id) + formats.extend(fmts) + subtitles = self._merge_subtitles(subtitles, subs) video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile')) if video_url not in urls: - formats.extend(self._extract_variant_formats({'url': video_url}, video_id)) - return formats + fmts, subs = self._extract_variant_formats({'url': video_url}, video_id) + formats.extend(fmts) + subtitles = self._merge_subtitles(subtitles, subs) + return formats, subtitles @staticmethod def _search_dimensions_in_video_url(a_format, video_url): @@ -475,8 +480,11 @@ class TwitterIE(TwitterBaseIE): video_info = media.get('video_info') or {} formats = [] + subtitles = {} for variant in video_info.get('variants', []): - formats.extend(self._extract_variant_formats(variant, twid)) + fmts, subs = self._extract_variant_formats(variant, twid) + subtitles = self._merge_subtitles(subtitles, subs) + formats.extend(fmts) self._sort_formats(formats) thumbnails = [] @@ -495,6 +503,7 @@ class TwitterIE(TwitterBaseIE): info.update({ 'formats': formats, + 'subtitles': subtitles, 'thumbnails': thumbnails, 'duration': float_or_none(video_info.get('duration_millis'), 1000), }) @@ -544,7 +553,7 @@ class TwitterIE(TwitterBaseIE): is_amplify = card_name == 'amplify' vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url') content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player')) - formats = self._extract_formats_from_vmap_url(vmap_url, content_id or twid) + formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid) self._sort_formats(formats) thumbnails = [] @@ -562,6 +571,7 @@ class TwitterIE(TwitterBaseIE): info.update({ 'formats': formats, + 'subtitles': subtitles, 'thumbnails': thumbnails, 'duration': int_or_none(get_binding_value( 'content_duration_seconds')), @@ -667,3 +677,21 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE): info['formats'] = self._extract_pscp_m3u8_formats( m3u8_url, broadcast_id, m3u8_id, state, width, height) return info + + +class TwitterShortenerIE(TwitterBaseIE): + IE_NAME = 'twitter:shortener' + _VALID_URL = r'https?://t.co/(?P<id>[^?]+)|tco:(?P<eid>[^?]+)' + _BASE_URL = 'https://t.co/' + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + eid, id = mobj.group('eid', 'id') + if eid: + id = eid + url = self._BASE_URL + id + new_url = self._request_webpage(url, id, headers={'User-Agent': 'curl'}).geturl() + __UNSAFE_LINK = "https://twitter.com/safety/unsafe_link_warning?unsafe_link=" + if new_url.startswith(__UNSAFE_LINK): + new_url = new_url.replace(__UNSAFE_LINK, "") + return self.url_result(new_url) |