aboutsummaryrefslogtreecommitdiffstats
path: root/lib/WWW/FairViewer.pm
diff options
context:
space:
mode:
Diffstat (limited to 'lib/WWW/FairViewer.pm')
-rw-r--r--lib/WWW/FairViewer.pm393
1 files changed, 317 insertions, 76 deletions
diff --git a/lib/WWW/FairViewer.pm b/lib/WWW/FairViewer.pm
index a192396..dff63f4 100644
--- a/lib/WWW/FairViewer.pm
+++ b/lib/WWW/FairViewer.pm
@@ -6,12 +6,14 @@ use warnings;
use Memoize;
-memoize('_get_video_info');
+#memoize('_get_video_info');
memoize('_ytdl_is_available');
+memoize('_info_from_ytdl');
memoize('_extract_from_ytdl');
memoize('_extract_from_invidious');
use parent qw(
+ WWW::FairViewer::InitialData
WWW::FairViewer::Search
WWW::FairViewer::Videos
WWW::FairViewer::Channels
@@ -23,11 +25,11 @@ use parent qw(
WWW::FairViewer::CommentThreads
WWW::FairViewer::Authentication
WWW::FairViewer::VideoCategories
- );
+);
=head1 NAME
-WWW::FairViewer - A very easy interface to YouTube, using the API of invidio.us.
+WWW::FairViewer - A very easy interface to YouTube, using the API of invidious.
=cut
@@ -79,10 +81,11 @@ my %valid_options = (
ytdl_cmd => {valid => qr/\w/, default => "hypervideo"},
# Booleans
- env_proxy => {valid => [1, 0], default => 1},
- escape_utf8 => {valid => [1, 0], default => 0},
- prefer_mp4 => {valid => [1, 0], default => 0},
- prefer_av1 => {valid => [1, 0], default => 0},
+ env_proxy => {valid => [1, 0], default => 1},
+ escape_utf8 => {valid => [1, 0], default => 0},
+ prefer_mp4 => {valid => [1, 0], default => 0},
+ prefer_av1 => {valid => [1, 0], default => 0},
+ prefer_invidious => {valid => [1, 0], default => 0},
# API/OAuth
key => {valid => qr/^.{15}/, default => undef},
@@ -95,16 +98,21 @@ my %valid_options = (
authentication_file => {valid => qr/^./, default => undef},
api_host => {valid => qr/\w/, default => "auto"},
+#<<<
# No input value allowed
api_path => {valid => q[], default => '/api/v1/'},
video_info_url => {valid => q[], default => 'https://www.youtube.com/get_video_info'},
oauth_url => {valid => q[], default => 'https://accounts.google.com/o/oauth2/'},
- video_info_args => {valid => q[], default => '?video_id=%s&el=detailpage&ps=default&eurl=&gl=US&hl=en'},
+ video_info_args => {valid => q[], default => '?video_id=%s&el=detailpage&ps=default&eurl=&gl=US&hl=en&html5=1&c=TVHTML5&cver=6.20180913'},
www_content_type => {valid => q[], default => 'application/x-www-form-urlencoded'},
+ m_youtube_url => {valid => q[], default => 'https://m.youtube.com'},
+ youtubei_url => {valid => q[], default => 'https://youtubei.googleapis.com/youtubei/v1/%s?key=' . reverse("8Wcq11_9Y_wliCGLHETS4Q8UqlS2JF_OAySazIA")},
+#>>>
#<<<
# LWP user agent
- user_agent => {valid => qr/^.{5}/, default => 'Mozilla/5.0 (Windows NT 10.0; Win64; gzip; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.0.0 Safari/537.36'},
+ #user_agent => {valid => qr/^.{5}/, default => 'Mozilla/5.0 (iPad; CPU OS 7_1_1 like Mac OS X) AppleWebKit/537.51.2 (KHTML, like Gecko) Version/7.0 Mobile/11D201 Safari/9537.53'},
+ user_agent => {valid => qr/^.{5}/, default => 'Mozilla/5.0 (Android 11; Tablet; rv:83.0) Gecko/83.0 Firefox/83.0,gzip(gfe)'},
#>>>
);
@@ -297,7 +305,7 @@ sub set_lwp_useragent {
require LWP::ConnCache;
state $cache = LWP::ConnCache->new;
- $cache->total_capacity(undef); # no limit
+ $cache->total_capacity(undef); # no limit
state $accepted_encodings = do {
require HTTP::Message;
@@ -319,14 +327,11 @@ sub set_lwp_useragent {
## Netscape HTTP Cookies
- # Chrome extension:
- # https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg
-
# Firefox extension:
# https://addons.mozilla.org/en-US/firefox/addon/cookies-txt/
# See also:
- # https://libregit.org/heckyel/hypervideo#how-do-i-pass-cookies-to-hypervideo
+ # https://git.conocimientoslibres.ga/software/hypervideo.git/about/#how-do-i-pass-cookies-to-hypervideo
require HTTP::Cookies::Netscape;
@@ -339,6 +344,19 @@ sub set_lwp_useragent {
$cookies->load;
$agent->cookie_jar($cookies);
}
+ else {
+
+ require HTTP::Cookies;
+
+ my $cookies = HTTP::Cookies->new();
+
+ # Consent cookie
+ $cookies->set_cookie(0, "CONSENT", "YES+cb-m.20210615-14-p0.en+FX+096",
+ "/", ".youtube.com", undef, 0, 1, '21' . join('', map { int(rand(10)) } 1 .. 8),
+ 0, {});
+
+ $agent->cookie_jar($cookies);
+ }
push @{$agent->requests_redirectable}, 'POST';
$self->{lwp} = $agent;
@@ -396,56 +414,57 @@ sub lwp_get {
$url // return;
$self->{lwp} // $self->set_lwp_useragent();
- my %lwp_header = ($opt{simple} ? () : $self->_auth_lwp_header);
- my $response = $self->{lwp}->get($url, %lwp_header);
+ if ($url =~ m{^//}) {
+ $url = 'https:' . $url;
+ }
- if ($response->is_success) {
- return $response->decoded_content;
+ if ($url =~ m{^/vi/}) {
+ $url = 'https://i.ytimg.com' . $url;
}
- if ($response->status_line() =~ /^401 / and defined($self->get_refresh_token)) {
- if (defined(my $refresh_token = $self->oauth_refresh_token())) {
- if (defined $refresh_token->{access_token}) {
+ # Fix YouTube thumbnails for results from invidious instances
+ $url =~ s{^https?://[^/]+(/vi/.*\.jpg)\z}{https://i.ytimg.com$1};
- $self->set_access_token($refresh_token->{access_token});
+ my %lwp_header = ($opt{simple} ? () : $self->_auth_lwp_header);
+
+ my $response = do {
+ my $r;
- # Don't be tempted to use recursion here, because bad things will happen!
- $response = $self->{lwp}->get($url, $self->_auth_lwp_header);
+ if ($url =~ m{^https?://[^/]+\.onion/}) { # onion URL
- if ($response->is_success) {
- $self->save_authentication_tokens();
- return $response->decoded_content;
+ if (not defined($self->get_http_proxy)) { # no proxy defined
+ if ($self->get_env_proxy and (defined($ENV{HTTP_PROXY}) or defined($ENV{HTTPS_PROXY}))) {
+ ## ok -- LWP::UserAgent will use proxy defined in ENV
}
- elsif ($response->status_line() =~ /^401 /) {
- $self->set_refresh_token(); # refresh token was invalid
- $self->set_access_token(); # access token is also broken
- warn "[!] Can't refresh the access token! Logging out...\n";
+ else {
+ say ":: Setting proxy for onion websites..." if $self->get_debug;
+ $self->{lwp}->proxy(['http', 'https'], 'socks://localhost:9050');
+ $r = $self->{lwp}->get($url, %lwp_header);
+ $self->{lwp}->proxy(['http', 'https'], undef);
}
}
- else {
- warn "[!] Can't get the access_token! Logging out...\n";
- $self->set_refresh_token();
- $self->set_access_token();
- }
- }
- else {
- warn "[!] Invalid refresh_token! Logging out...\n";
- $self->set_refresh_token();
- $self->set_access_token();
}
+
+ $r // $self->{lwp}->get($url, %lwp_header);
+ };
+
+ if ($response->is_success) {
+ return $response->decoded_content;
}
$opt{depth} ||= 0;
# Try again on 500+ HTTP errors
- if ( $opt{depth} < 3
+ if ( $opt{depth} < 1
and $response->code() >= 500
and $response->status_line() =~ /(?:Temporary|Server) Error|Timeout|Service Unavailable/i) {
return $self->lwp_get($url, %opt, depth => $opt{depth} + 1);
}
# Too many errors. Pick another invidious instance.
- $self->pick_and_set_random_instance();
+ if ($url !~ m{\byoutube\.com\b/}) {
+ $self->pick_and_set_random_instance();
+ }
_warn_reponse_error($response, $url);
return;
@@ -527,7 +546,7 @@ sub get_invidious_instances {
my $lwp = LWP::UserAgent->new(timeout => $self->get_timeout);
$lwp->show_progress(1) if $self->get_debug;
- my $resp = $lwp->get("https://instances.invidio.us/instances.json");
+ my $resp = $lwp->get("https://api.invidious.io/instances.json");
$resp->is_success() or return;
@@ -558,12 +577,17 @@ sub select_good_invidious_instances {
'yewtu.be' => 1,
'invidious.tube' => 1,
'invidiou.site' => 0,
+ 'invidious.site' => 1,
+ 'invidious.zee.li' => 1,
+ 'invidious.048596.xyz' => 1,
'invidious.xyz' => 1,
'vid.mint.lgbt' => 1,
'invidious.ggc-project.de' => 1,
'invidious.toot.koeln' => 1,
- 'invidious.kavin.rocks' => 0,
+ 'invidious.kavin.rocks' => 1,
'invidious.snopyta.org' => 0,
+ 'invidious.silkky.cloud' => 1, # broken thumbnail URLs for popular videos
+ 'invidious.moomoo.me' => 1, # ==//==
);
#<<<
@@ -587,25 +611,24 @@ sub select_good_invidious_instances {
return @candidates;
}
-sub pick_good_random_instance {
- my ($self) = @_;
-
- my @candidates = $self->select_good_invidious_instances();
- my @extra_candidates = $self->select_good_invidious_instances(lax => 1);
+sub _find_working_instance {
+ my ($self, $candidates, $extra_candidates) = @_;
require List::Util;
require WWW::FairViewer::Utils;
state $yv_utils = WWW::FairViewer::Utils->new();
- foreach my $instance (List::Util::shuffle(@candidates), List::Util::shuffle(@extra_candidates)) {
+ foreach my $instance (List::Util::shuffle(@$candidates), List::Util::shuffle(@$extra_candidates)) {
ref($instance) eq 'ARRAY' or next;
my $uri = $instance->[1]{uri} // next;
$uri =~ s{/+\z}{}; # remove trailing '/'
- local $self->{api_host} = $uri;
+ local $self->{api_host} = $uri;
+ local $self->{prefer_invidious} = 1;
+
my $results = $self->search_videos('test');
if ($yv_utils->has_entries($results)) {
@@ -613,13 +636,32 @@ sub pick_good_random_instance {
}
}
+ return;
+}
+
+sub pick_random_instance {
+ my ($self) = @_;
+
+ my @candidates = $self->select_good_invidious_instances();
+ my @extra_candidates = $self->select_good_invidious_instances(lax => 1);
+
+ if ($self->get_prefer_invidious) {
+ if (defined(my $instance = $self->_find_working_instance(\@candidates, \@extra_candidates))) {
+ return $instance;
+ }
+ }
+
+ if (not @candidates) {
+ @candidates = @extra_candidates;
+ }
+
$candidates[rand @candidates];
}
sub pick_and_set_random_instance {
my ($self) = @_;
- my $instance = $self->pick_good_random_instance() // return;
+ my $instance = $self->pick_random_instance() // return;
ref($instance) eq 'ARRAY' or return;
@@ -640,8 +682,15 @@ sub get_api_url {
$host =~ s{/+\z}{}; # remove trailing '/'
- if ($host =~ m{^[-\w]+(?>\.[-\w]+)+\z}) { # no protocol specified
- $host = 'https://' . $host; # default to HTTPS
+ if ($host =~ /\w\.\w/ and $host !~ m{^\w+://}) { # no protocol specified
+
+ my $protocol = 'https://'; # default to HTTPS
+
+ if ($host =~ m{^[^/]+\.onion\z}) { # onion URL
+ $protocol = 'http://'; # default to HTTP
+ }
+
+ $host = $protocol . $host;
}
# Pick a random instance when `--instance=auto` or `--instance=invidio.us`.
@@ -725,7 +774,7 @@ sub _extract_from_invidious {
invidious.site
invidious.fdn.fr
invidious.snopyta.org
- );
+ );
}
if ($self->get_debug) {
@@ -768,7 +817,7 @@ sub _ytdl_is_available {
($self->proxy_stdout($self->get_ytdl_cmd(), '--version') // '') =~ /\d/;
}
-sub _extract_from_ytdl {
+sub _info_from_ytdl {
my ($self, $videoID) = @_;
$self->_ytdl_is_available() || return;
@@ -782,9 +831,23 @@ sub _extract_from_ytdl {
}
my $json = $self->proxy_stdout(@ytdl_cmd, quotemeta("https://www.youtube.com/watch?v=" . $videoID));
- my $ref = $self->parse_json_string($json);
+ my $ref = $self->parse_json_string($json // return);
+
+ if ($self->get_debug >= 3) {
+ require Data::Dump;
+ Data::Dump::pp($ref);
+ }
+
+ return $ref;
+}
+
+sub _extract_from_ytdl {
+ my ($self, $videoID) = @_;
+
+ my $ref = $self->_info_from_ytdl($videoID) // return;
my @formats;
+
if (ref($ref) eq 'HASH' and exists($ref->{formats}) and ref($ref->{formats}) eq 'ARRAY') {
foreach my $format (@{$ref->{formats}}) {
if (exists($format->{format_id}) and exists($format->{url})) {
@@ -825,9 +888,9 @@ sub _fallback_extract_urls {
@formats && return @formats;
}
- # Use the API of invidio.us
+ # Use the API of invidious
if ($self->get_debug) {
- say STDERR ":: Using invidio.us to extract the streaming URLs...";
+ say STDERR ":: Using invidious to extract the streaming URLs...";
}
push @formats, $self->_extract_from_invidious($videoID);
@@ -1029,7 +1092,7 @@ sub _extract_streaming_urls {
@results = grep { $_->{itag} == 22 or (exists($_->{contentLength}) and $_->{contentLength} > 0) } @results;
# Filter out streams with "dur=0.000"
- @results = grep { $_->{url} !~ /\bdur=0\.000\b/ } @results;
+ @results = grep { $_->{url} !~ /\bdur=0\.000\b/ } grep { defined($_->{url}) } @results;
# Detect livestream
if (!@results and exists($json->{streamingData}) and exists($json->{streamingData}{hlsManifestUrl})) {
@@ -1053,7 +1116,36 @@ sub _extract_streaming_urls {
return @results;
}
-sub _get_video_info {
+sub _get_youtubei_content {
+ my ($self, $endpoint, $videoID) = @_;
+
+ # Valid endpoints: browse, player, next
+
+ my $url = sprintf($self->get_youtubei_url(), $endpoint);
+
+ require Time::Piece;
+
+ local $self->{access_token} = undef;
+ my $content = $self->post_as_json(
+ $url,
+ scalar {
+ "videoId" => $videoID,
+ "context" => {
+ "client" => {
+ "hl" => "en",
+ "gl" => "US",
+ "clientName" => "WEB",
+ "clientVersion" =>
+ sprintf("2.%s.05.00", Time::Piece->new(time)->strftime("%Y%m%d")),
+ }
+ }
+ }
+ );
+
+ return $content;
+}
+
+sub _old_get_video_info {
my ($self, $videoID) = @_;
my $url = $self->get_video_info_url() . sprintf($self->get_video_info_args(), $videoID);
@@ -1063,6 +1155,109 @@ sub _get_video_info {
return %info;
}
+sub _get_video_info {
+ my ($self, $videoID) = @_;
+
+ my ($content, %info);
+
+ for (1 .. 1) {
+ $content = $self->_get_youtubei_content('player', $videoID) // return $self->_old_get_video_info($videoID);
+ %info = (player_response => $content);
+ }
+
+ return %info;
+}
+
+sub _get_video_next_info {
+ my ($self, $videoID) = @_;
+ $self->_get_youtubei_content('next', $videoID);
+}
+
+sub _make_translated_captions {
+ my ($self, $caption_urls) = @_;
+
+ my @languages = qw(
+ af am ar az be bg bn bs ca ceb co cs cy da de el en eo es et eu fa fi fil
+ fr fy ga gd gl gu ha haw hi hmn hr ht hu hy id ig is it iw ja jv ka kk km
+ kn ko ku ky la lb lo lt lv mg mi mk ml mn mr ms mt my ne nl no ny or pa pl
+ ps pt ro ru rw sd si sk sl sm sn so sq sr st su sv sw ta te tg th tk tr tt
+ ug uk ur uz vi xh yi yo zh-Hans zh-Hant zu
+ );
+
+ my %trans_languages = map { $_->{languageCode} => 1 } @$caption_urls;
+ @languages = grep { not exists $trans_languages{$_} } @languages;
+
+ my @asr;
+ foreach my $caption (@$caption_urls) {
+ foreach my $lang_code (@languages) {
+ my %caption_copy = %$caption;
+ $caption_copy{languageCode} = $lang_code;
+ $caption_copy{baseUrl} = $caption_copy{baseUrl} . "&tlang=$lang_code";
+ push @asr, \%caption_copy;
+ }
+ }
+
+ return @asr;
+}
+
+sub _fallback_extract_captions {
+ my ($self, $videoID) = @_;
+
+ if ($self->get_debug) {
+ say STDERR ":: Extracting closed-caption URLs with `hypervideo`...";
+ }
+
+ # Extract closed-caption URLs with hypervideo if our code failed
+ my $ytdl_info = $self->_info_from_ytdl($videoID);
+
+ my @caption_urls;
+
+ if (defined($ytdl_info) and ref($ytdl_info) eq 'HASH') {
+
+ my $has_subtitles = 0;
+
+ foreach my $key (qw(subtitles automatic_captions)) {
+
+ my $ccaps = $ytdl_info->{$key} // next;
+
+ ref($ccaps) eq 'HASH' or next;
+
+ foreach my $lang_code (sort keys %$ccaps) {
+
+ my ($caption_info) = grep { $_->{ext} eq 'srv1' } @{$ccaps->{$lang_code}};
+
+ if (defined($caption_info) and ref($caption_info) eq 'HASH' and defined($caption_info->{url})) {
+
+ push @caption_urls,
+ scalar {
+ kind => ($key eq 'automatic_captions' ? 'asr' : ''),
+ languageCode => $lang_code,
+ baseUrl => $caption_info->{url},
+ };
+
+ if ($key eq 'subtitles') {
+ $has_subtitles = 1;
+ }
+ }
+ }
+
+ last if $has_subtitles;
+ }
+
+ # Auto-translated captions
+ if ($has_subtitles) {
+
+ if ($self->get_debug) {
+ say STDERR ":: Generating translated closed-caption URLs...";
+ }
+
+ push @caption_urls, $self->_make_translated_captions(\@caption_urls);
+ }
+ }
+
+ return @caption_urls;
+}
+
=head2 get_streaming_urls($videoID)
Returns a list of streaming URLs for a videoID.
@@ -1077,15 +1272,35 @@ sub get_streaming_urls {
my @streaming_urls = $self->_extract_streaming_urls(\%info, $videoID);
my @caption_urls;
- if (exists $info{player_response}) {
+
+ if (defined $info{player_response}) {
my $captions_json = $info{player_response}; # don't run uri_unescape() on this
my $caption_data = $self->parse_json_string($captions_json);
if (eval { ref($caption_data->{captions}{playerCaptionsTracklistRenderer}{captionTracks}) eq 'ARRAY' }) {
- push @caption_urls, @{$caption_data->{captions}{playerCaptionsTracklistRenderer}{captionTracks}};
+
+ my @caption_tracks = @{$caption_data->{captions}{playerCaptionsTracklistRenderer}{captionTracks}};
+ my @human_made_cc = grep { ($_->{kind} // '') ne 'asr' } @caption_tracks;
+
+ push @caption_urls, @human_made_cc, @caption_tracks;
+
+ foreach my $caption (@caption_urls) {
+ $caption->{baseUrl} =~ s{\bfmt=srv[0-9]\b}{fmt=srv1}g;
+ }
+
+ push @caption_urls, $self->_make_translated_captions(\@caption_urls);
+ }
+
+ # Try again with hypervideo
+ if (!@streaming_urls or (($caption_data->{playabilityStatus}{status} // '') =~ /fail|error/i)) {
+ @streaming_urls = $self->_fallback_extract_urls($videoID);
+ push @caption_urls, $self->_fallback_extract_captions($videoID);
}
}
+ else {
+ push @caption_urls, $self->_fallback_extract_captions($videoID);
+ }
if ($self->get_debug) {
my $count = scalar(@streaming_urls);
@@ -1093,8 +1308,9 @@ sub get_streaming_urls {
}
# Try again with hypervideo
- if (!@streaming_urls or $info{status} =~ /fail|error/i) {
+ if (!@streaming_urls or (($info{status} // '') =~ /fail|error/i)) {
@streaming_urls = $self->_fallback_extract_urls($videoID);
+ push @caption_urls, $self->_fallback_extract_captions($videoID);
}
if ($self->get_prefer_mp4 or $self->get_prefer_av1) {
@@ -1208,6 +1424,31 @@ sub post_as_json {
sub next_page_with_token {
my ($self, $url, $token) = @_;
+ if (ref($token) eq 'CODE') {
+ return $token->();
+ }
+
+ if ($token =~ /^yt(search|browse):(\w+):(.*)/) {
+ if ($1 eq 'browse') {
+ return $self->yt_browse_next_page($url, $3, type => $2, url => $url);
+ }
+ else {
+ return $self->yt_search_next_page($url, $3, type => $2, url => $url);
+ }
+ }
+
+ if ($token =~ /^ytplaylist:(\w+):(.*)/) {
+ return $self->yt_playlist_next_page($url, $2, type => $1, url => $url);
+ }
+
+ if ($url =~ m{^https://m\.youtube\.com}) {
+ return
+ scalar {
+ url => $url,
+ results => [],
+ };
+ }
+
if (not $url =~ s{[?&]continuation=\K([^&]+)}{$token}) {
$url = $self->_append_url_args($url, continuation => $token);
}
@@ -1224,6 +1465,14 @@ sub next_page {
return $self->next_page_with_token($url, $token);
}
+ if ($url =~ m{^https://m\.youtube\.com}) {
+ return
+ scalar {
+ url => $url,
+ results => [],
+ };
+ }
+
if (not $url =~ s{[?&]page=\K(\d+)}{$1+1}e) {
$url = $self->_append_url_args($url, page => 2);
}
@@ -1233,16 +1482,6 @@ sub next_page {
return $res;
}
-sub previous_page {
- my ($self, $url) = @_;
-
- $url =~ s{[?&]page=\K(\d+)}{($1 > 2) ? ($1-1) : 1}e;
-
- my $res = $self->_get_results($url);
- $res->{url} = $url;
- return $res;
-}
-
# SUBROUTINE FACTORY
{
no strict 'refs';
@@ -1276,13 +1515,14 @@ Trizen, C<< <echo dHJpemVuQHByb3Rvbm1haWwuY29tCg== | base64 -d> >>
Jesus, C<< <echo aGVja3llbEBoeXBlcmJvbGEuaW5mbw== | base64 -d> >>
+
=head1 SEE ALSO
https://developers.google.com/youtube/v3/docs/
=head1 LICENSE AND COPYRIGHT
-Copyright 2013-2015 Trizen.
+Copyright 2012-2015 Trizen.
Copyright 2020 Jesus E.
@@ -1322,6 +1562,7 @@ CONTRIBUTOR WILL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, OR
CONSEQUENTIAL DAMAGES ARISING IN ANY WAY OUT OF THE USE OF THE PACKAGE,
EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
=cut
1; # End of WWW::FairViewer