diff options
Diffstat (limited to 'youtube_dl/extractor/storyfire.py')
-rw-r--r-- | youtube_dl/extractor/storyfire.py | 255 |
1 files changed, 0 insertions, 255 deletions
diff --git a/youtube_dl/extractor/storyfire.py b/youtube_dl/extractor/storyfire.py deleted file mode 100644 index 67457cc94..000000000 --- a/youtube_dl/extractor/storyfire.py +++ /dev/null @@ -1,255 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import itertools -from .common import InfoExtractor - - -class StoryFireIE(InfoExtractor): - _VALID_URL = r'(?:(?:https?://(?:www\.)?storyfire\.com/video-details)|(?:https://storyfire.app.link))/(?P<id>[^/\s]+)' - _TESTS = [{ - 'url': 'https://storyfire.com/video-details/5df1d132b6378700117f9181', - 'md5': '560953bfca81a69003cfa5e53ac8a920', - 'info_dict': { - 'id': '5df1d132b6378700117f9181', - 'ext': 'mp4', - 'title': 'Buzzfeed Teaches You About Memes', - 'uploader_id': 'ntZAJFECERSgqHSxzonV5K2E89s1', - 'timestamp': 1576129028, - 'description': 'Mocking Buzzfeed\'s meme lesson. Reuploaded from YouTube because of their new policies', - 'uploader': 'whang!', - 'upload_date': '20191212', - }, - 'params': {'format': 'bestvideo'} # There are no merged formats in the playlist. - }, { - 'url': 'https://storyfire.app.link/5GxAvWOQr8', # Alternate URL format, with unrelated short ID - 'md5': '7a2dc6d60c4889edfed459c620fe690d', - 'info_dict': { - 'id': '5f1e11ecd78a57b6c702001d', - 'ext': 'm4a', - 'title': 'Weird Nintendo Prototype Leaks', - 'description': 'A stream taking a look at some weird Nintendo Prototypes with Luigi in Mario 64 and weird Yoshis', - 'timestamp': 1595808576, - 'upload_date': '20200727', - 'uploader': 'whang!', - 'uploader_id': 'ntZAJFECERSgqHSxzonV5K2E89s1', - }, - 'params': {'format': 'bestaudio'} # Verifying audio extraction - - }] - - _aformats = { - 'audio-medium-audio': {'acodec': 'aac', 'abr': 125, 'preference': -10}, - 'audio-high-audio': {'acodec': 'aac', 'abr': 254, 'preference': -1}, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - # Extracting the json blob is mandatory to proceed with extraction. - jsontext = self._html_search_regex( - r'<script id="__NEXT_DATA__" type="application/json">(.+?)</script>', - webpage, 'json_data') - - json = self._parse_json(jsontext, video_id) - - # The currentVideo field in the json is mandatory - # because it contains the only link to the m3u playlist - video = json['props']['initialState']['video']['currentVideo'] - videourl = video['vimeoVideoURL'] # Video URL is mandatory - - # Extract other fields from the json in an error tolerant fashion - # ID may be incorrect (on short URL format), correct it. - parsed_id = video.get('_id') - if parsed_id: - video_id = parsed_id - - title = video.get('title') - description = video.get('description') - - thumbnail = video.get('storyImage') - views = video.get('views') - likes = video.get('likesCount') - comments = video.get('commentsCount') - duration = video.get('videoDuration') - publishdate = video.get('publishDate') # Apparently epoch time, day only - - uploader = video.get('username') - uploader_id = video.get('hostID') - # Construct an uploader URL - uploader_url = None - if uploader_id: - uploader_url = "https://storyfire.com/user/%s/video" % uploader_id - - # Collect root playlist to determine formats - formats = self._extract_m3u8_formats( - videourl, video_id, 'mp4', 'm3u8_native') - - # Modify formats to fill in missing information about audio codecs - for format in formats: - aformat = self._aformats.get(format['format_id']) - if aformat: - format['acodec'] = aformat['acodec'] - format['abr'] = aformat['abr'] - format['preference'] = aformat['preference'] - format['ext'] = 'm4a' - - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'ext': "mp4", - 'url': videourl, - 'formats': formats, - - 'thumbnail': thumbnail, - 'view_count': views, - 'like_count': likes, - 'comment_count': comments, - 'duration': duration, - 'timestamp': publishdate, - - 'uploader': uploader, - 'uploader_id': uploader_id, - 'uploader_url': uploader_url, - - } - - -class StoryFireUserIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?storyfire\.com/user/(?P<id>[^/\s]+)/video' - _TESTS = [{ - 'url': 'https://storyfire.com/user/ntZAJFECERSgqHSxzonV5K2E89s1/video', - 'info_dict': { - 'id': 'ntZAJFECERSgqHSxzonV5K2E89s1', - 'title': 'whang!', - }, - 'playlist_mincount': 18 - }, { - 'url': 'https://storyfire.com/user/UQ986nFxmAWIgnkZQ0ftVhq4nOk2/video', - 'info_dict': { - 'id': 'UQ986nFxmAWIgnkZQ0ftVhq4nOk2', - 'title': 'McJuggerNuggets', - }, - 'playlist_mincount': 143 - - }] - - # Generator for fetching playlist items - def _enum_videos(self, baseurl, user_id, firstjson): - totalVideos = int(firstjson['videosCount']) - haveVideos = 0 - json = firstjson - - for page in itertools.count(1): - for video in json['videos']: - id = video['_id'] - url = "https://storyfire.com/video-details/%s" % id - haveVideos += 1 - yield { - '_type': 'url', - 'id': id, - 'url': url, - 'ie_key': 'StoryFire', - - 'title': video.get('title'), - 'description': video.get('description'), - 'view_count': video.get('views'), - 'comment_count': video.get('commentsCount'), - 'duration': video.get('videoDuration'), - 'timestamp': video.get('publishDate'), - } - # Are there more pages we could fetch? - if haveVideos < totalVideos: - pageurl = baseurl + ("%i" % haveVideos) - json = self._download_json(pageurl, user_id, - note='Downloading page %s' % page) - - # Are there any videos in the new json? - videos = json.get('videos') - if not videos or len(videos) == 0: - break # no videos - - else: - break # We have fetched all the videos, stop - - def _real_extract(self, url): - user_id = self._match_id(url) - - baseurl = "https://storyfire.com/app/publicVideos/%s?skip=" % user_id - - # Download first page to ensure it can be downloaded, and get user information if available. - firstpage = baseurl + "0" - firstjson = self._download_json(firstpage, user_id) - - title = None - videos = firstjson.get('videos') - if videos and len(videos): - title = videos[1].get('username') - - return { - '_type': 'playlist', - 'entries': self._enum_videos(baseurl, user_id, firstjson), - 'id': user_id, - 'title': title, - } - - -class StoryFireSeriesIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?storyfire\.com/write/series/stories/(?P<id>[^/\s]+)' - _TESTS = [{ - 'url': 'https://storyfire.com/write/series/stories/-Lq6MsuIHLODO6d2dDkr/', - 'info_dict': { - 'id': '-Lq6MsuIHLODO6d2dDkr', - }, - 'playlist_mincount': 13 - }, { - 'url': 'https://storyfire.com/write/series/stories/the_mortal_one/', - 'info_dict': { - 'id': 'the_mortal_one', - }, - 'playlist_count': 0 # This playlist has entries, but no videos. - }, { - 'url': 'https://storyfire.com/write/series/stories/story_time', - 'info_dict': { - 'id': 'story_time', - }, - 'playlist_mincount': 10 - }] - - # Generator for returning playlist items - # This object is substantially different than the one in the user videos page above - def _enum_videos(self, jsonlist): - for video in jsonlist: - id = video['_id'] - if video.get('hasVideo'): # Boolean element - url = "https://storyfire.com/video-details/%s" % id - yield { - '_type': 'url', - 'id': id, - 'url': url, - 'ie_key': 'StoryFire', - - 'title': video.get('title'), - 'description': video.get('description'), - 'view_count': video.get('views'), - 'likes_count': video.get('likesCount'), - 'comment_count': video.get('commentsCount'), - 'duration': video.get('videoDuration'), - 'timestamp': video.get('publishDate'), - } - - def _real_extract(self, url): - list_id = self._match_id(url) - - listurl = "https://storyfire.com/app/seriesStories/%s/list" % list_id - json = self._download_json(listurl, list_id) - - return { - '_type': 'playlist', - 'entries': self._enum_videos(json), - 'id': list_id - } |