aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAshish Gupta <39122144+Ashish0804@users.noreply.github.com>2021-09-25 16:55:33 +0530
committerGitHub <noreply@github.com>2021-09-25 16:55:33 +0530
commit8dc831f7150bcd2cd07629fb41764778b85a4455 (patch)
tree0ab80e52fa76ddfbeea87776b053e7be220a448f
parente99b2d2771f9373da346222e6b5a88c6e1890457 (diff)
downloadhypervideo-pre-8dc831f7150bcd2cd07629fb41764778b85a4455.tar.lz
hypervideo-pre-8dc831f7150bcd2cd07629fb41764778b85a4455.tar.xz
hypervideo-pre-8dc831f7150bcd2cd07629fb41764778b85a4455.zip
[LinkedInLearning] Add subtitles (#1077)
Authored by: Ashish0804 Closes #1072
-rw-r--r--yt_dlp/extractor/linkedin.py25
1 files changed, 24 insertions, 1 deletions
diff --git a/yt_dlp/extractor/linkedin.py b/yt_dlp/extractor/linkedin.py
index 6d54d638a..f47d59a38 100644
--- a/yt_dlp/extractor/linkedin.py
+++ b/yt_dlp/extractor/linkedin.py
@@ -1,6 +1,7 @@
# coding: utf-8
from __future__ import unicode_literals
+from itertools import zip_longest
import re
from .common import InfoExtractor
@@ -8,6 +9,8 @@ from ..utils import (
ExtractorError,
float_or_none,
int_or_none,
+ srt_subtitles_timecode,
+ try_get,
urlencode_postdata,
urljoin,
)
@@ -86,6 +89,16 @@ class LinkedInLearningIE(LinkedInLearningBaseIE):
},
}
+ def json2srt(self, transcript_lines, duration=None):
+ srt_data = ''
+ for line, (line_dict, next_dict) in enumerate(zip_longest(transcript_lines, transcript_lines[1:])):
+ start_time, caption = line_dict['transcriptStartAt'] / 1000, line_dict['caption']
+ end_time = next_dict['transcriptStartAt'] / 1000 if next_dict else duration or start_time + 1
+ srt_data += '%d\n%s --> %s\n%s\n' % (line + 1, srt_subtitles_timecode(start_time),
+ srt_subtitles_timecode(end_time),
+ caption)
+ return srt_data
+
def _real_extract(self, url):
course_slug, video_slug = self._match_valid_url(url).groups()
@@ -101,6 +114,7 @@ class LinkedInLearningIE(LinkedInLearningBaseIE):
formats.append({
'format_id': 'progressive-%dp' % height,
'url': progressive_url,
+ 'ext': 'mp4',
'height': height,
'width': width,
'source_preference': 1,
@@ -128,6 +142,14 @@ class LinkedInLearningIE(LinkedInLearningBaseIE):
# However, unless someone can confirm this, the old
# behaviour is being kept as-is
self._sort_formats(formats, ('res', 'source_preference'))
+ subtitles = {}
+ duration = int_or_none(video_data.get('durationInSeconds'))
+ transcript_lines = try_get(video_data, lambda x: x['transcript']['lines'], expected_type=list)
+ if transcript_lines:
+ subtitles['en'] = [{
+ 'ext': 'srt',
+ 'data': self.json2srt(transcript_lines, duration)
+ }]
return {
'id': self._get_video_id(video_data, course_slug, video_slug),
@@ -135,7 +157,8 @@ class LinkedInLearningIE(LinkedInLearningBaseIE):
'formats': formats,
'thumbnail': video_data.get('defaultThumbnail'),
'timestamp': float_or_none(video_data.get('publishedOn'), 1000),
- 'duration': int_or_none(video_data.get('durationInSeconds')),
+ 'duration': duration,
+ 'subtitles': subtitles,
}