aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorpukkandan <pukkandan.ytdlp@gmail.com>2022-05-20 06:01:08 +0530
committerpukkandan <pukkandan.ytdlp@gmail.com>2022-05-20 06:01:08 +0530
commitb801cd7179c9546f4054dc534ec4b713e09976a7 (patch)
treee637c21d8a2482930e1b29d5eee4091d1fe78dc8
parent0b9c08b47bb5e95c21b067044ace4e824d19a9c2 (diff)
downloadhypervideo-pre-b801cd7179c9546f4054dc534ec4b713e09976a7.tar.lz
hypervideo-pre-b801cd7179c9546f4054dc534ec4b713e09976a7.tar.xz
hypervideo-pre-b801cd7179c9546f4054dc534ec4b713e09976a7.zip
[tiktok] Detect embeds
Closes #3799
-rw-r--r--yt_dlp/extractor/generic.py6
-rw-r--r--yt_dlp/extractor/tiktok.py28
2 files changed, 24 insertions, 10 deletions
diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py
index b0fc176ef..c7e9ea059 100644
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@@ -74,6 +74,7 @@ from .teachable import TeachableIE
from .ted import TedEmbedIE
from .theplatform import ThePlatformIE
from .threeqsdn import ThreeQSDNIE
+from .tiktok import TikTokIE
from .tnaflix import TNAFlixNetworkEmbedIE
from .tube8 import Tube8IE
from .tunein import TuneInBaseIE
@@ -3756,6 +3757,11 @@ class GenericIE(InfoExtractor):
if ruutu_urls:
return self.playlist_from_matches(ruutu_urls, video_id, video_title)
+ # Look for Tiktok embeds
+ tiktok_urls = TikTokIE._extract_urls(webpage)
+ if tiktok_urls:
+ return self.playlist_from_matches(tiktok_urls, video_id, video_title)
+
# Look for HTML5 media
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
if entries:
diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py
index 4ba993582..4926096c0 100644
--- a/yt_dlp/extractor/tiktok.py
+++ b/yt_dlp/extractor/tiktok.py
@@ -1,28 +1,26 @@
import itertools
+import json
import random
+import re
import string
import time
-import json
from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse_unquote,
- compat_urllib_parse_urlparse
-)
+from ..compat import compat_urllib_parse_unquote, compat_urllib_parse_urlparse
from ..utils import (
ExtractorError,
HEADRequest,
+ LazyList,
UnsupportedError,
get_first,
int_or_none,
join_nonempty,
- LazyList,
+ qualities,
srt_subtitles_timecode,
str_or_none,
traverse_obj,
try_get,
url_or_none,
- qualities,
)
@@ -36,6 +34,10 @@ class TikTokBaseIE(InfoExtractor):
_WEBPAGE_HOST = 'https://www.tiktok.com/'
QUALITIES = ('360p', '540p', '720p', '1080p')
+ @staticmethod
+ def _create_url(user_id, video_id):
+ return f'https://www.tiktok.com/@{user_id or "_"}/video/{video_id}'
+
def _call_api_impl(self, ep, query, manifest_app_version, video_id, fatal=True,
note='Downloading API JSON', errnote='Unable to download API page'):
self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choice('0123456789abcdef') for _ in range(160)))
@@ -361,7 +363,7 @@ class TikTokBaseIE(InfoExtractor):
class TikTokIE(TikTokBaseIE):
- _VALID_URL = r'https?://www\.tiktok\.com/@[\w\.-]+/video/(?P<id>\d+)'
+ _VALID_URL = r'https?://www\.tiktok\.com/(?:embed|@(?P<user_id>[\w\.-]+)/video)/(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.tiktok.com/@leenabhushan/video/6748451240264420610',
@@ -466,7 +468,7 @@ class TikTokIE(TikTokBaseIE):
'info_dict': {
'id': '7059698374567611694',
'ext': 'mp4',
- 'title': 'tiktok video #7059698374567611694',
+ 'title': 'TikTok video #7059698374567611694',
'description': '',
'uploader': 'pokemonlife22',
'creator': 'Pokemon',
@@ -490,6 +492,11 @@ class TikTokIE(TikTokBaseIE):
'only_matching': True
}]
+ @classmethod
+ def _extract_urls(cls, webpage):
+ return [mobj.group('url') for mobj in re.finditer(
+ rf'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>{cls._VALID_URL})', webpage)]
+
def _extract_aweme_app(self, aweme_id):
try:
aweme_detail = self._call_api('aweme/detail', {'aweme_id': aweme_id}, aweme_id,
@@ -506,7 +513,8 @@ class TikTokIE(TikTokBaseIE):
return self._parse_aweme_video_app(aweme_detail)
def _real_extract(self, url):
- video_id = self._match_id(url)
+ video_id, user_id = self._match_valid_url(url).group('id', 'user_id')
+ url = self._create_url(user_id, video_id)
try:
return self._extract_aweme_app(video_id)