aboutsummaryrefslogtreecommitdiffstats
path: root/hypervideo_dl/extractor/linkedin.py
diff options
context:
space:
mode:
authorJesús <heckyel@hyperbola.info>2021-10-18 15:24:21 -0500
committerJesús <heckyel@hyperbola.info>2021-10-18 15:24:21 -0500
commit5122028a4bcac4ae577ef7fbd55ccad5cb34ef5e (patch)
tree65209bc739db35e31f1c9b5b868eb5df4fe12ae3 /hypervideo_dl/extractor/linkedin.py
parent27fe903c511691c078942bef5ee9a05a43b15c8f (diff)
downloadhypervideo-5122028a4bcac4ae577ef7fbd55ccad5cb34ef5e.tar.lz
hypervideo-5122028a4bcac4ae577ef7fbd55ccad5cb34ef5e.tar.xz
hypervideo-5122028a4bcac4ae577ef7fbd55ccad5cb34ef5e.zip
update from upstream
Diffstat (limited to 'hypervideo_dl/extractor/linkedin.py')
-rw-r--r--hypervideo_dl/extractor/linkedin.py32
1 files changed, 29 insertions, 3 deletions
diff --git a/hypervideo_dl/extractor/linkedin.py b/hypervideo_dl/extractor/linkedin.py
index 26fc703..3ce906e 100644
--- a/hypervideo_dl/extractor/linkedin.py
+++ b/hypervideo_dl/extractor/linkedin.py
@@ -1,6 +1,7 @@
# coding: utf-8
from __future__ import unicode_literals
+from itertools import zip_longest
import re
from .common import InfoExtractor
@@ -8,6 +9,8 @@ from ..utils import (
ExtractorError,
float_or_none,
int_or_none,
+ srt_subtitles_timecode,
+ try_get,
urlencode_postdata,
urljoin,
)
@@ -86,8 +89,18 @@ class LinkedInLearningIE(LinkedInLearningBaseIE):
},
}
+ def json2srt(self, transcript_lines, duration=None):
+ srt_data = ''
+ for line, (line_dict, next_dict) in enumerate(zip_longest(transcript_lines, transcript_lines[1:])):
+ start_time, caption = line_dict['transcriptStartAt'] / 1000, line_dict['caption']
+ end_time = next_dict['transcriptStartAt'] / 1000 if next_dict else duration or start_time + 1
+ srt_data += '%d\n%s --> %s\n%s\n\n' % (line + 1, srt_subtitles_timecode(start_time),
+ srt_subtitles_timecode(end_time),
+ caption)
+ return srt_data
+
def _real_extract(self, url):
- course_slug, video_slug = re.match(self._VALID_URL, url).groups()
+ course_slug, video_slug = self._match_valid_url(url).groups()
video_data = None
formats = []
@@ -101,6 +114,7 @@ class LinkedInLearningIE(LinkedInLearningBaseIE):
formats.append({
'format_id': 'progressive-%dp' % height,
'url': progressive_url,
+ 'ext': 'mp4',
'height': height,
'width': width,
'source_preference': 1,
@@ -124,7 +138,18 @@ class LinkedInLearningIE(LinkedInLearningBaseIE):
streaming_url, video_slug, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False))
- self._sort_formats(formats, ('width', 'height', 'source_preference', 'tbr', 'abr'))
+ # It seems like this would be correctly handled by default
+ # However, unless someone can confirm this, the old
+ # behaviour is being kept as-is
+ self._sort_formats(formats, ('res', 'source_preference'))
+ subtitles = {}
+ duration = int_or_none(video_data.get('durationInSeconds'))
+ transcript_lines = try_get(video_data, lambda x: x['transcript']['lines'], expected_type=list)
+ if transcript_lines:
+ subtitles['en'] = [{
+ 'ext': 'srt',
+ 'data': self.json2srt(transcript_lines, duration)
+ }]
return {
'id': self._get_video_id(video_data, course_slug, video_slug),
@@ -132,7 +157,8 @@ class LinkedInLearningIE(LinkedInLearningBaseIE):
'formats': formats,
'thumbnail': video_data.get('defaultThumbnail'),
'timestamp': float_or_none(video_data.get('publishedOn'), 1000),
- 'duration': int_or_none(video_data.get('durationInSeconds')),
+ 'duration': duration,
+ 'subtitles': subtitles,
}