diff options
author | Unknown <blackjack4494@web.de> | 2020-09-02 20:25:25 +0200 |
---|---|---|
committer | Unknown <blackjack4494@web.de> | 2020-09-02 20:25:25 +0200 |
commit | cefecac12cd3c70f9c7a30992c60b05c2eb5d34e (patch) | |
tree | f7b8e3f8ca2f6e402c83a501f72c09854ae04887 /youtube_dlc/extractor/techtalks.py | |
parent | 9688f237163b6aa546fde00bb3fd1e3445dd4c31 (diff) | |
download | hypervideo-pre-cefecac12cd3c70f9c7a30992c60b05c2eb5d34e.tar.lz hypervideo-pre-cefecac12cd3c70f9c7a30992c60b05c2eb5d34e.tar.xz hypervideo-pre-cefecac12cd3c70f9c7a30992c60b05c2eb5d34e.zip |
[skip travis] renaming
to avoid using same folder when using pip install for example
Diffstat (limited to 'youtube_dlc/extractor/techtalks.py')
-rw-r--r-- | youtube_dlc/extractor/techtalks.py | 82 |
1 files changed, 82 insertions, 0 deletions
diff --git a/youtube_dlc/extractor/techtalks.py b/youtube_dlc/extractor/techtalks.py new file mode 100644 index 000000000..a5b62c717 --- /dev/null +++ b/youtube_dlc/extractor/techtalks.py @@ -0,0 +1,82 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + get_element_by_attribute, + clean_html, +) + + +class TechTalksIE(InfoExtractor): + _VALID_URL = r'https?://techtalks\.tv/talks/(?:[^/]+/)?(?P<id>\d+)' + + _TESTS = [{ + 'url': 'http://techtalks.tv/talks/learning-topic-models-going-beyond-svd/57758/', + 'info_dict': { + 'id': '57758', + 'title': 'Learning Topic Models --- Going beyond SVD', + }, + 'playlist': [ + { + 'info_dict': { + 'id': '57758', + 'ext': 'flv', + 'title': 'Learning Topic Models --- Going beyond SVD', + }, + }, + { + 'info_dict': { + 'id': '57758-slides', + 'ext': 'flv', + 'title': 'Learning Topic Models --- Going beyond SVD', + }, + }, + ], + 'params': { + # rtmp download + 'skip_download': True, + }, + }, { + 'url': 'http://techtalks.tv/talks/57758', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + talk_id = mobj.group('id') + webpage = self._download_webpage(url, talk_id) + rtmp_url = self._search_regex( + r'netConnectionUrl: \'(.*?)\'', webpage, 'rtmp url') + play_path = self._search_regex( + r'href=\'(.*?)\' [^>]*id="flowplayer_presenter"', + webpage, 'presenter play path') + title = clean_html(get_element_by_attribute('class', 'title', webpage)) + video_info = { + 'id': talk_id, + 'title': title, + 'url': rtmp_url, + 'play_path': play_path, + 'ext': 'flv', + } + m_slides = re.search(r'<a class="slides" href=\'(.*?)\'', webpage) + if m_slides is None: + return video_info + else: + return { + '_type': 'playlist', + 'id': talk_id, + 'title': title, + 'entries': [ + video_info, + # The slides video + { + 'id': talk_id + '-slides', + 'title': title, + 'url': rtmp_url, + 'play_path': m_slides.group(1), + 'ext': 'flv', + }, + ], + } |