diff options
Diffstat (limited to 'youtube_dlc/extractor/youtube.py')
-rw-r--r-- | youtube_dlc/extractor/youtube.py | 142 |
1 files changed, 84 insertions, 58 deletions
diff --git a/youtube_dlc/extractor/youtube.py b/youtube_dlc/extractor/youtube.py index b2b02f5e2..8fc3706df 100644 --- a/youtube_dlc/extractor/youtube.py +++ b/youtube_dlc/extractor/youtube.py @@ -324,7 +324,9 @@ class YoutubeBaseInfoExtractor(InfoExtractor): r'^([\d,]+)', re.sub(r'\s', '', view_count_text), 'view count', default=None)) uploader = try_get( - renderer, lambda x: x['ownerText']['runs'][0]['text'], compat_str) + renderer, + (lambda x: x['ownerText']['runs'][0]['text'], + lambda x: x['shortBylineText']['runs'][0]['text']), compat_str) return { '_type': 'url_transparent', 'ie_key': YoutubeIE.ie_key(), @@ -340,64 +342,70 @@ class YoutubeBaseInfoExtractor(InfoExtractor): class YoutubeIE(YoutubeBaseInfoExtractor): IE_DESC = 'YouTube.com' + _INVIDIOUS_SITES = ( + # invidious-redirect websites + r'(?:www\.)?redirect\.invidious\.io', + r'(?:(?:www|dev)\.)?invidio\.us', + # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md + r'(?:www\.)?invidious\.pussthecat\.org', + r'(?:www\.)?invidious\.048596\.xyz', + r'(?:www\.)?invidious\.zee\.li', + r'(?:www\.)?vid\.puffyan\.us', + r'(?:(?:www|au)\.)?ytprivate\.com', + r'(?:www\.)?invidious\.namazso\.eu', + r'(?:www\.)?invidious\.ethibox\.fr', + r'(?:www\.)?inv\.skyn3t\.in', + r'(?:www\.)?invidious\.himiko\.cloud', + r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion', + r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion', + r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion', + r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion', + # youtube-dl invidious instances list + r'(?:(?:www|no)\.)?invidiou\.sh', + r'(?:(?:www|fi)\.)?invidious\.snopyta\.org', + r'(?:www\.)?invidious\.kabi\.tk', + r'(?:www\.)?invidious\.13ad\.de', + r'(?:www\.)?invidious\.mastodon\.host', + r'(?:www\.)?invidious\.zapashcanon\.fr', + r'(?:www\.)?invidious\.kavin\.rocks', + r'(?:www\.)?invidious\.tube', + r'(?:www\.)?invidiou\.site', + r'(?:www\.)?invidious\.site', + r'(?:www\.)?invidious\.xyz', + r'(?:www\.)?invidious\.nixnet\.xyz', + r'(?:www\.)?invidious\.drycat\.fr', + r'(?:www\.)?tube\.poal\.co', + r'(?:www\.)?tube\.connect\.cafe', + r'(?:www\.)?vid\.wxzm\.sx', + r'(?:www\.)?vid\.mint\.lgbt', + r'(?:www\.)?yewtu\.be', + r'(?:www\.)?yt\.elukerio\.org', + r'(?:www\.)?yt\.lelux\.fi', + r'(?:www\.)?invidious\.ggc-project\.de', + r'(?:www\.)?yt\.maisputain\.ovh', + r'(?:www\.)?invidious\.toot\.koeln', + r'(?:www\.)?invidious\.fdn\.fr', + r'(?:www\.)?watch\.nettohikari\.com', + r'(?:www\.)?kgg2m7yk5aybusll\.onion', + r'(?:www\.)?qklhadlycap4cnod\.onion', + r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion', + r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion', + r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion', + r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion', + r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p', + r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion', + ) _VALID_URL = r"""(?x)^ ( (?:https?://|//) # http(s):// or protocol-independent URL - (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/| - (?:www\.)?deturl\.com/www\.youtube\.com/| - (?:www\.)?pwnyoutube\.com/| - (?:www\.)?hooktube\.com/| - (?:www\.)?yourepeat\.com/| - tube\.majestyc\.net/| - # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances - (?:www\.)?invidious\.pussthecat\.org/| - (?:www\.)?invidious\.048596\.xyz/| - (?:www\.)?invidious\.zee\.li/| - (?:www\.)?vid\.puffyan\.us/| - (?:(?:www|au)\.)?ytprivate\.com/| - (?:www\.)?invidious\.namazso\.eu/| - (?:www\.)?invidious\.ethibox\.fr/| - (?:www\.)?inv\.skyn3t\.in/| - (?:www\.)?invidious\.himiko\.cloud/| - (?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion/| - (?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion/| - (?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion/| - (?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion/| - (?:(?:www|dev)\.)?invidio\.us/| - (?:(?:www|no)\.)?invidiou\.sh/| - (?:(?:www|fi)\.)?invidious\.snopyta\.org/| - (?:www\.)?invidious\.kabi\.tk/| - (?:www\.)?invidious\.13ad\.de/| - (?:www\.)?invidious\.mastodon\.host/| - (?:www\.)?invidious\.zapashcanon\.fr/| - (?:www\.)?invidious\.kavin\.rocks/| - (?:www\.)?invidious\.tube/| - (?:www\.)?invidiou\.site/| - (?:www\.)?invidious\.site/| - (?:www\.)?invidious\.xyz/| - (?:www\.)?invidious\.nixnet\.xyz/| - (?:www\.)?invidious\.drycat\.fr/| - (?:www\.)?tube\.poal\.co/| - (?:www\.)?tube\.connect\.cafe/| - (?:www\.)?vid\.wxzm\.sx/| - (?:www\.)?vid\.mint\.lgbt/| - (?:www\.)?yewtu\.be/| - (?:www\.)?yt\.elukerio\.org/| - (?:www\.)?yt\.lelux\.fi/| - (?:www\.)?invidious\.ggc-project\.de/| - (?:www\.)?yt\.maisputain\.ovh/| - (?:www\.)?invidious\.toot\.koeln/| - (?:www\.)?invidious\.fdn\.fr/| - (?:www\.)?watch\.nettohikari\.com/| - (?:www\.)?kgg2m7yk5aybusll\.onion/| - (?:www\.)?qklhadlycap4cnod\.onion/| - (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/| - (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/| - (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/| - (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/| - (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/| - (?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/| - youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains + (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com| + (?:www\.)?deturl\.com/www\.youtube\.com| + (?:www\.)?pwnyoutube\.com| + (?:www\.)?hooktube\.com| + (?:www\.)?yourepeat\.com| + tube\.majestyc\.net| + %(invidious)s| + youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains (?:.*?\#/)? # handle anchor (#/) redirect urls (?: # the various things that can precede the ID: (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/ @@ -412,6 +420,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): youtu\.be| # just youtu.be/xxxx vid\.plus| # or vid.plus/xxxx zwearz\.com/watch| # or zwearz.com/watch/xxxx + %(invidious)s )/ |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId= ) @@ -424,7 +433,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): ) ) (?(1).+)? # if we found the ID, everything can follow - $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE} + $""" % { + 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE, + 'invidious': '|'.join(_INVIDIOUS_SITES), + } _PLAYER_INFO_RE = ( r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player', r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$', @@ -1032,6 +1044,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'only_matching': True, }, { + 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc', + 'only_matching': True, + }, + { + # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m + 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA', + 'only_matching': True, + }, + { # DRM protected 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc', 'only_matching': True, @@ -1169,6 +1190,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'skip_download': True, }, }, + { + # controversial video, only works with bpctr when authenticated with cookies + 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg', + 'only_matching': True, + }, ] def __init__(self, *args, **kwargs): @@ -1426,7 +1452,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): url, smuggled_data = unsmuggle_url(url, {}) video_id = self._match_id(url) base_url = self.http_scheme() + '//www.youtube.com/' - webpage_url = base_url + 'watch?v=' + video_id + '&has_verified=1' + webpage_url = base_url + 'watch?v=' + video_id + '&has_verified=1&bpctr=9999999999' webpage = self._download_webpage(webpage_url, video_id, fatal=False) player_response = None |