aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHa Tien Loi <loiht.b17vt220@stu.ptit.edu.vn>2022-03-04 18:18:46 +0700
committerGitHub <noreply@github.com>2022-03-04 03:18:46 -0800
commit5bcccbfec3de4bc69d529016f784d04db7b11e04 (patch)
tree32be86863c501e02a171ebce59648f4b1359a5d9
parentded9f32667d7fc8db672b9360162bc2ec6b095f7 (diff)
downloadhypervideo-pre-5bcccbfec3de4bc69d529016f784d04db7b11e04.tar.lz
hypervideo-pre-5bcccbfec3de4bc69d529016f784d04db7b11e04.tar.xz
hypervideo-pre-5bcccbfec3de4bc69d529016f784d04db7b11e04.zip
[telegram] Add extractor (#2922)
Closes #2910 Authored by: hatienl0i261299
-rw-r--r--yt_dlp/extractor/extractors.py1
-rw-r--r--yt_dlp/extractor/telegram.py37
2 files changed, 38 insertions, 0 deletions
diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py
index 0f26dc24f..c9e1a3f4e 100644
--- a/yt_dlp/extractor/extractors.py
+++ b/yt_dlp/extractor/extractors.py
@@ -1593,6 +1593,7 @@ from .tele13 import Tele13IE
from .telebruxelles import TeleBruxellesIE
from .telecinco import TelecincoIE
from .telegraaf import TelegraafIE
+from .telegram import TelegramEmbedIE
from .telemb import TeleMBIE
from .telemundo import TelemundoIE
from .telequebec import (
diff --git a/yt_dlp/extractor/telegram.py b/yt_dlp/extractor/telegram.py
new file mode 100644
index 000000000..2dfa261e9
--- /dev/null
+++ b/yt_dlp/extractor/telegram.py
@@ -0,0 +1,37 @@
+from .common import InfoExtractor
+
+
+class TelegramEmbedIE(InfoExtractor):
+ IE_NAME = 'telegram:embed'
+ _VALID_URL = r'https?://t\.me/(?P<channel_name>[^/]+)/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://t.me/europa_press/613',
+ 'info_dict': {
+ 'id': '613',
+ 'ext': 'mp4',
+ 'title': 'Europa Press',
+ 'description': '6ce2d7e8d56eda16d80607b23db7b252',
+ 'thumbnail': r're:^https?:\/\/cdn.*?telesco\.pe\/file\/\w+',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ webpage_embed = self._download_webpage(f'{url}?embed=1', video_id)
+
+ formats = [{
+ 'url': self._proto_relative_url(self._search_regex(
+ '<video[^>]+src="([^"]+)"', webpage_embed, 'source')),
+ 'ext': 'mp4',
+ }]
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, fatal=True),
+ 'description': self._html_search_meta(['og:description', 'twitter:description'], webpage, fatal=True),
+ 'thumbnail': self._search_regex(r'tgme_widget_message_video_thumb"[^>]+background-image:url\(\'([^\']+)\'\)',
+ webpage_embed, 'thumbnail'),
+ 'formats': formats,
+ }