diff options
author | Jesús <heckyel@hyperbola.info> | 2021-07-09 15:27:16 -0500 |
---|---|---|
committer | Jesús <heckyel@hyperbola.info> | 2021-07-09 15:27:16 -0500 |
commit | 739c821a54c01816e60eb5f774c8977a1e221ea0 (patch) | |
tree | e04a7f5a6fe4d450d43fd45c412f9d415bcb7a7e /lib/WWW | |
parent | c1322a4e9a1fb0a286dab1277a740072d0ab30f9 (diff) | |
download | fair-viewer-739c821a54c01816e60eb5f774c8977a1e221ea0.tar.lz fair-viewer-739c821a54c01816e60eb5f774c8977a1e221ea0.tar.xz fair-viewer-739c821a54c01816e60eb5f774c8977a1e221ea0.zip |
upstream
Diffstat (limited to 'lib/WWW')
-rw-r--r-- | lib/WWW/FairViewer.pm | 393 | ||||
-rw-r--r-- | lib/WWW/FairViewer/Channels.pm | 91 | ||||
-rw-r--r-- | lib/WWW/FairViewer/CommentThreads.pm | 1 | ||||
-rw-r--r-- | lib/WWW/FairViewer/GetCaption.pm | 16 | ||||
-rw-r--r-- | lib/WWW/FairViewer/GuideCategories.pm | 1 | ||||
-rw-r--r-- | lib/WWW/FairViewer/InitialData.pm | 1050 | ||||
-rw-r--r-- | lib/WWW/FairViewer/Itags.pm | 207 | ||||
-rw-r--r-- | lib/WWW/FairViewer/ParseJSON.pm | 12 | ||||
-rw-r--r-- | lib/WWW/FairViewer/ParseXML.pm | 3 | ||||
-rw-r--r-- | lib/WWW/FairViewer/PlaylistItems.pm | 8 | ||||
-rw-r--r-- | lib/WWW/FairViewer/Playlists.pm | 12 | ||||
-rw-r--r-- | lib/WWW/FairViewer/Search.pm | 25 | ||||
-rw-r--r-- | lib/WWW/FairViewer/Utils.pm | 351 | ||||
-rw-r--r-- | lib/WWW/FairViewer/Videos.pm | 105 |
14 files changed, 1974 insertions, 301 deletions
diff --git a/lib/WWW/FairViewer.pm b/lib/WWW/FairViewer.pm index a192396..dff63f4 100644 --- a/lib/WWW/FairViewer.pm +++ b/lib/WWW/FairViewer.pm @@ -6,12 +6,14 @@ use warnings; use Memoize; -memoize('_get_video_info'); +#memoize('_get_video_info'); memoize('_ytdl_is_available'); +memoize('_info_from_ytdl'); memoize('_extract_from_ytdl'); memoize('_extract_from_invidious'); use parent qw( + WWW::FairViewer::InitialData WWW::FairViewer::Search WWW::FairViewer::Videos WWW::FairViewer::Channels @@ -23,11 +25,11 @@ use parent qw( WWW::FairViewer::CommentThreads WWW::FairViewer::Authentication WWW::FairViewer::VideoCategories - ); +); =head1 NAME -WWW::FairViewer - A very easy interface to YouTube, using the API of invidio.us. +WWW::FairViewer - A very easy interface to YouTube, using the API of invidious. =cut @@ -79,10 +81,11 @@ my %valid_options = ( ytdl_cmd => {valid => qr/\w/, default => "hypervideo"}, # Booleans - env_proxy => {valid => [1, 0], default => 1}, - escape_utf8 => {valid => [1, 0], default => 0}, - prefer_mp4 => {valid => [1, 0], default => 0}, - prefer_av1 => {valid => [1, 0], default => 0}, + env_proxy => {valid => [1, 0], default => 1}, + escape_utf8 => {valid => [1, 0], default => 0}, + prefer_mp4 => {valid => [1, 0], default => 0}, + prefer_av1 => {valid => [1, 0], default => 0}, + prefer_invidious => {valid => [1, 0], default => 0}, # API/OAuth key => {valid => qr/^.{15}/, default => undef}, @@ -95,16 +98,21 @@ my %valid_options = ( authentication_file => {valid => qr/^./, default => undef}, api_host => {valid => qr/\w/, default => "auto"}, +#<<< # No input value allowed api_path => {valid => q[], default => '/api/v1/'}, video_info_url => {valid => q[], default => 'https://www.youtube.com/get_video_info'}, oauth_url => {valid => q[], default => 'https://accounts.google.com/o/oauth2/'}, - video_info_args => {valid => q[], default => '?video_id=%s&el=detailpage&ps=default&eurl=&gl=US&hl=en'}, + video_info_args => {valid => q[], default => '?video_id=%s&el=detailpage&ps=default&eurl=&gl=US&hl=en&html5=1&c=TVHTML5&cver=6.20180913'}, www_content_type => {valid => q[], default => 'application/x-www-form-urlencoded'}, + m_youtube_url => {valid => q[], default => 'https://m.youtube.com'}, + youtubei_url => {valid => q[], default => 'https://youtubei.googleapis.com/youtubei/v1/%s?key=' . reverse("8Wcq11_9Y_wliCGLHETS4Q8UqlS2JF_OAySazIA")}, +#>>> #<<< # LWP user agent - user_agent => {valid => qr/^.{5}/, default => 'Mozilla/5.0 (Windows NT 10.0; Win64; gzip; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.0.0 Safari/537.36'}, + #user_agent => {valid => qr/^.{5}/, default => 'Mozilla/5.0 (iPad; CPU OS 7_1_1 like Mac OS X) AppleWebKit/537.51.2 (KHTML, like Gecko) Version/7.0 Mobile/11D201 Safari/9537.53'}, + user_agent => {valid => qr/^.{5}/, default => 'Mozilla/5.0 (Android 11; Tablet; rv:83.0) Gecko/83.0 Firefox/83.0,gzip(gfe)'}, #>>> ); @@ -297,7 +305,7 @@ sub set_lwp_useragent { require LWP::ConnCache; state $cache = LWP::ConnCache->new; - $cache->total_capacity(undef); # no limit + $cache->total_capacity(undef); # no limit state $accepted_encodings = do { require HTTP::Message; @@ -319,14 +327,11 @@ sub set_lwp_useragent { ## Netscape HTTP Cookies - # Chrome extension: - # https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg - # Firefox extension: # https://addons.mozilla.org/en-US/firefox/addon/cookies-txt/ # See also: - # https://libregit.org/heckyel/hypervideo#how-do-i-pass-cookies-to-hypervideo + # https://git.conocimientoslibres.ga/software/hypervideo.git/about/#how-do-i-pass-cookies-to-hypervideo require HTTP::Cookies::Netscape; @@ -339,6 +344,19 @@ sub set_lwp_useragent { $cookies->load; $agent->cookie_jar($cookies); } + else { + + require HTTP::Cookies; + + my $cookies = HTTP::Cookies->new(); + + # Consent cookie + $cookies->set_cookie(0, "CONSENT", "YES+cb-m.20210615-14-p0.en+FX+096", + "/", ".youtube.com", undef, 0, 1, '21' . join('', map { int(rand(10)) } 1 .. 8), + 0, {}); + + $agent->cookie_jar($cookies); + } push @{$agent->requests_redirectable}, 'POST'; $self->{lwp} = $agent; @@ -396,56 +414,57 @@ sub lwp_get { $url // return; $self->{lwp} // $self->set_lwp_useragent(); - my %lwp_header = ($opt{simple} ? () : $self->_auth_lwp_header); - my $response = $self->{lwp}->get($url, %lwp_header); + if ($url =~ m{^//}) { + $url = 'https:' . $url; + } - if ($response->is_success) { - return $response->decoded_content; + if ($url =~ m{^/vi/}) { + $url = 'https://i.ytimg.com' . $url; } - if ($response->status_line() =~ /^401 / and defined($self->get_refresh_token)) { - if (defined(my $refresh_token = $self->oauth_refresh_token())) { - if (defined $refresh_token->{access_token}) { + # Fix YouTube thumbnails for results from invidious instances + $url =~ s{^https?://[^/]+(/vi/.*\.jpg)\z}{https://i.ytimg.com$1}; - $self->set_access_token($refresh_token->{access_token}); + my %lwp_header = ($opt{simple} ? () : $self->_auth_lwp_header); + + my $response = do { + my $r; - # Don't be tempted to use recursion here, because bad things will happen! - $response = $self->{lwp}->get($url, $self->_auth_lwp_header); + if ($url =~ m{^https?://[^/]+\.onion/}) { # onion URL - if ($response->is_success) { - $self->save_authentication_tokens(); - return $response->decoded_content; + if (not defined($self->get_http_proxy)) { # no proxy defined + if ($self->get_env_proxy and (defined($ENV{HTTP_PROXY}) or defined($ENV{HTTPS_PROXY}))) { + ## ok -- LWP::UserAgent will use proxy defined in ENV } - elsif ($response->status_line() =~ /^401 /) { - $self->set_refresh_token(); # refresh token was invalid - $self->set_access_token(); # access token is also broken - warn "[!] Can't refresh the access token! Logging out...\n"; + else { + say ":: Setting proxy for onion websites..." if $self->get_debug; + $self->{lwp}->proxy(['http', 'https'], 'socks://localhost:9050'); + $r = $self->{lwp}->get($url, %lwp_header); + $self->{lwp}->proxy(['http', 'https'], undef); } } - else { - warn "[!] Can't get the access_token! Logging out...\n"; - $self->set_refresh_token(); - $self->set_access_token(); - } - } - else { - warn "[!] Invalid refresh_token! Logging out...\n"; - $self->set_refresh_token(); - $self->set_access_token(); } + + $r // $self->{lwp}->get($url, %lwp_header); + }; + + if ($response->is_success) { + return $response->decoded_content; } $opt{depth} ||= 0; # Try again on 500+ HTTP errors - if ( $opt{depth} < 3 + if ( $opt{depth} < 1 and $response->code() >= 500 and $response->status_line() =~ /(?:Temporary|Server) Error|Timeout|Service Unavailable/i) { return $self->lwp_get($url, %opt, depth => $opt{depth} + 1); } # Too many errors. Pick another invidious instance. - $self->pick_and_set_random_instance(); + if ($url !~ m{\byoutube\.com\b/}) { + $self->pick_and_set_random_instance(); + } _warn_reponse_error($response, $url); return; @@ -527,7 +546,7 @@ sub get_invidious_instances { my $lwp = LWP::UserAgent->new(timeout => $self->get_timeout); $lwp->show_progress(1) if $self->get_debug; - my $resp = $lwp->get("https://instances.invidio.us/instances.json"); + my $resp = $lwp->get("https://api.invidious.io/instances.json"); $resp->is_success() or return; @@ -558,12 +577,17 @@ sub select_good_invidious_instances { 'yewtu.be' => 1, 'invidious.tube' => 1, 'invidiou.site' => 0, + 'invidious.site' => 1, + 'invidious.zee.li' => 1, + 'invidious.048596.xyz' => 1, 'invidious.xyz' => 1, 'vid.mint.lgbt' => 1, 'invidious.ggc-project.de' => 1, 'invidious.toot.koeln' => 1, - 'invidious.kavin.rocks' => 0, + 'invidious.kavin.rocks' => 1, 'invidious.snopyta.org' => 0, + 'invidious.silkky.cloud' => 1, # broken thumbnail URLs for popular videos + 'invidious.moomoo.me' => 1, # ==//== ); #<<< @@ -587,25 +611,24 @@ sub select_good_invidious_instances { return @candidates; } -sub pick_good_random_instance { - my ($self) = @_; - - my @candidates = $self->select_good_invidious_instances(); - my @extra_candidates = $self->select_good_invidious_instances(lax => 1); +sub _find_working_instance { + my ($self, $candidates, $extra_candidates) = @_; require List::Util; require WWW::FairViewer::Utils; state $yv_utils = WWW::FairViewer::Utils->new(); - foreach my $instance (List::Util::shuffle(@candidates), List::Util::shuffle(@extra_candidates)) { + foreach my $instance (List::Util::shuffle(@$candidates), List::Util::shuffle(@$extra_candidates)) { ref($instance) eq 'ARRAY' or next; my $uri = $instance->[1]{uri} // next; $uri =~ s{/+\z}{}; # remove trailing '/' - local $self->{api_host} = $uri; + local $self->{api_host} = $uri; + local $self->{prefer_invidious} = 1; + my $results = $self->search_videos('test'); if ($yv_utils->has_entries($results)) { @@ -613,13 +636,32 @@ sub pick_good_random_instance { } } + return; +} + +sub pick_random_instance { + my ($self) = @_; + + my @candidates = $self->select_good_invidious_instances(); + my @extra_candidates = $self->select_good_invidious_instances(lax => 1); + + if ($self->get_prefer_invidious) { + if (defined(my $instance = $self->_find_working_instance(\@candidates, \@extra_candidates))) { + return $instance; + } + } + + if (not @candidates) { + @candidates = @extra_candidates; + } + $candidates[rand @candidates]; } sub pick_and_set_random_instance { my ($self) = @_; - my $instance = $self->pick_good_random_instance() // return; + my $instance = $self->pick_random_instance() // return; ref($instance) eq 'ARRAY' or return; @@ -640,8 +682,15 @@ sub get_api_url { $host =~ s{/+\z}{}; # remove trailing '/' - if ($host =~ m{^[-\w]+(?>\.[-\w]+)+\z}) { # no protocol specified - $host = 'https://' . $host; # default to HTTPS + if ($host =~ /\w\.\w/ and $host !~ m{^\w+://}) { # no protocol specified + + my $protocol = 'https://'; # default to HTTPS + + if ($host =~ m{^[^/]+\.onion\z}) { # onion URL + $protocol = 'http://'; # default to HTTP + } + + $host = $protocol . $host; } # Pick a random instance when `--instance=auto` or `--instance=invidio.us`. @@ -725,7 +774,7 @@ sub _extract_from_invidious { invidious.site invidious.fdn.fr invidious.snopyta.org - ); + ); } if ($self->get_debug) { @@ -768,7 +817,7 @@ sub _ytdl_is_available { ($self->proxy_stdout($self->get_ytdl_cmd(), '--version') // '') =~ /\d/; } -sub _extract_from_ytdl { +sub _info_from_ytdl { my ($self, $videoID) = @_; $self->_ytdl_is_available() || return; @@ -782,9 +831,23 @@ sub _extract_from_ytdl { } my $json = $self->proxy_stdout(@ytdl_cmd, quotemeta("https://www.youtube.com/watch?v=" . $videoID)); - my $ref = $self->parse_json_string($json); + my $ref = $self->parse_json_string($json // return); + + if ($self->get_debug >= 3) { + require Data::Dump; + Data::Dump::pp($ref); + } + + return $ref; +} + +sub _extract_from_ytdl { + my ($self, $videoID) = @_; + + my $ref = $self->_info_from_ytdl($videoID) // return; my @formats; + if (ref($ref) eq 'HASH' and exists($ref->{formats}) and ref($ref->{formats}) eq 'ARRAY') { foreach my $format (@{$ref->{formats}}) { if (exists($format->{format_id}) and exists($format->{url})) { @@ -825,9 +888,9 @@ sub _fallback_extract_urls { @formats && return @formats; } - # Use the API of invidio.us + # Use the API of invidious if ($self->get_debug) { - say STDERR ":: Using invidio.us to extract the streaming URLs..."; + say STDERR ":: Using invidious to extract the streaming URLs..."; } push @formats, $self->_extract_from_invidious($videoID); @@ -1029,7 +1092,7 @@ sub _extract_streaming_urls { @results = grep { $_->{itag} == 22 or (exists($_->{contentLength}) and $_->{contentLength} > 0) } @results; # Filter out streams with "dur=0.000" - @results = grep { $_->{url} !~ /\bdur=0\.000\b/ } @results; + @results = grep { $_->{url} !~ /\bdur=0\.000\b/ } grep { defined($_->{url}) } @results; # Detect livestream if (!@results and exists($json->{streamingData}) and exists($json->{streamingData}{hlsManifestUrl})) { @@ -1053,7 +1116,36 @@ sub _extract_streaming_urls { return @results; } -sub _get_video_info { +sub _get_youtubei_content { + my ($self, $endpoint, $videoID) = @_; + + # Valid endpoints: browse, player, next + + my $url = sprintf($self->get_youtubei_url(), $endpoint); + + require Time::Piece; + + local $self->{access_token} = undef; + my $content = $self->post_as_json( + $url, + scalar { + "videoId" => $videoID, + "context" => { + "client" => { + "hl" => "en", + "gl" => "US", + "clientName" => "WEB", + "clientVersion" => + sprintf("2.%s.05.00", Time::Piece->new(time)->strftime("%Y%m%d")), + } + } + } + ); + + return $content; +} + +sub _old_get_video_info { my ($self, $videoID) = @_; my $url = $self->get_video_info_url() . sprintf($self->get_video_info_args(), $videoID); @@ -1063,6 +1155,109 @@ sub _get_video_info { return %info; } +sub _get_video_info { + my ($self, $videoID) = @_; + + my ($content, %info); + + for (1 .. 1) { + $content = $self->_get_youtubei_content('player', $videoID) // return $self->_old_get_video_info($videoID); + %info = (player_response => $content); + } + + return %info; +} + +sub _get_video_next_info { + my ($self, $videoID) = @_; + $self->_get_youtubei_content('next', $videoID); +} + +sub _make_translated_captions { + my ($self, $caption_urls) = @_; + + my @languages = qw( + af am ar az be bg bn bs ca ceb co cs cy da de el en eo es et eu fa fi fil + fr fy ga gd gl gu ha haw hi hmn hr ht hu hy id ig is it iw ja jv ka kk km + kn ko ku ky la lb lo lt lv mg mi mk ml mn mr ms mt my ne nl no ny or pa pl + ps pt ro ru rw sd si sk sl sm sn so sq sr st su sv sw ta te tg th tk tr tt + ug uk ur uz vi xh yi yo zh-Hans zh-Hant zu + ); + + my %trans_languages = map { $_->{languageCode} => 1 } @$caption_urls; + @languages = grep { not exists $trans_languages{$_} } @languages; + + my @asr; + foreach my $caption (@$caption_urls) { + foreach my $lang_code (@languages) { + my %caption_copy = %$caption; + $caption_copy{languageCode} = $lang_code; + $caption_copy{baseUrl} = $caption_copy{baseUrl} . "&tlang=$lang_code"; + push @asr, \%caption_copy; + } + } + + return @asr; +} + +sub _fallback_extract_captions { + my ($self, $videoID) = @_; + + if ($self->get_debug) { + say STDERR ":: Extracting closed-caption URLs with `hypervideo`..."; + } + + # Extract closed-caption URLs with hypervideo if our code failed + my $ytdl_info = $self->_info_from_ytdl($videoID); + + my @caption_urls; + + if (defined($ytdl_info) and ref($ytdl_info) eq 'HASH') { + + my $has_subtitles = 0; + + foreach my $key (qw(subtitles automatic_captions)) { + + my $ccaps = $ytdl_info->{$key} // next; + + ref($ccaps) eq 'HASH' or next; + + foreach my $lang_code (sort keys %$ccaps) { + + my ($caption_info) = grep { $_->{ext} eq 'srv1' } @{$ccaps->{$lang_code}}; + + if (defined($caption_info) and ref($caption_info) eq 'HASH' and defined($caption_info->{url})) { + + push @caption_urls, + scalar { + kind => ($key eq 'automatic_captions' ? 'asr' : ''), + languageCode => $lang_code, + baseUrl => $caption_info->{url}, + }; + + if ($key eq 'subtitles') { + $has_subtitles = 1; + } + } + } + + last if $has_subtitles; + } + + # Auto-translated captions + if ($has_subtitles) { + + if ($self->get_debug) { + say STDERR ":: Generating translated closed-caption URLs..."; + } + + push @caption_urls, $self->_make_translated_captions(\@caption_urls); + } + } + + return @caption_urls; +} + =head2 get_streaming_urls($videoID) Returns a list of streaming URLs for a videoID. @@ -1077,15 +1272,35 @@ sub get_streaming_urls { my @streaming_urls = $self->_extract_streaming_urls(\%info, $videoID); my @caption_urls; - if (exists $info{player_response}) { + + if (defined $info{player_response}) { my $captions_json = $info{player_response}; # don't run uri_unescape() on this my $caption_data = $self->parse_json_string($captions_json); if (eval { ref($caption_data->{captions}{playerCaptionsTracklistRenderer}{captionTracks}) eq 'ARRAY' }) { - push @caption_urls, @{$caption_data->{captions}{playerCaptionsTracklistRenderer}{captionTracks}}; + + my @caption_tracks = @{$caption_data->{captions}{playerCaptionsTracklistRenderer}{captionTracks}}; + my @human_made_cc = grep { ($_->{kind} // '') ne 'asr' } @caption_tracks; + + push @caption_urls, @human_made_cc, @caption_tracks; + + foreach my $caption (@caption_urls) { + $caption->{baseUrl} =~ s{\bfmt=srv[0-9]\b}{fmt=srv1}g; + } + + push @caption_urls, $self->_make_translated_captions(\@caption_urls); + } + + # Try again with hypervideo + if (!@streaming_urls or (($caption_data->{playabilityStatus}{status} // '') =~ /fail|error/i)) { + @streaming_urls = $self->_fallback_extract_urls($videoID); + push @caption_urls, $self->_fallback_extract_captions($videoID); } } + else { + push @caption_urls, $self->_fallback_extract_captions($videoID); + } if ($self->get_debug) { my $count = scalar(@streaming_urls); @@ -1093,8 +1308,9 @@ sub get_streaming_urls { } # Try again with hypervideo - if (!@streaming_urls or $info{status} =~ /fail|error/i) { + if (!@streaming_urls or (($info{status} // '') =~ /fail|error/i)) { @streaming_urls = $self->_fallback_extract_urls($videoID); + push @caption_urls, $self->_fallback_extract_captions($videoID); } if ($self->get_prefer_mp4 or $self->get_prefer_av1) { @@ -1208,6 +1424,31 @@ sub post_as_json { sub next_page_with_token { my ($self, $url, $token) = @_; + if (ref($token) eq 'CODE') { + return $token->(); + } + + if ($token =~ /^yt(search|browse):(\w+):(.*)/) { + if ($1 eq 'browse') { + return $self->yt_browse_next_page($url, $3, type => $2, url => $url); + } + else { + return $self->yt_search_next_page($url, $3, type => $2, url => $url); + } + } + + if ($token =~ /^ytplaylist:(\w+):(.*)/) { + return $self->yt_playlist_next_page($url, $2, type => $1, url => $url); + } + + if ($url =~ m{^https://m\.youtube\.com}) { + return + scalar { + url => $url, + results => [], + }; + } + if (not $url =~ s{[?&]continuation=\K([^&]+)}{$token}) { $url = $self->_append_url_args($url, continuation => $token); } @@ -1224,6 +1465,14 @@ sub next_page { return $self->next_page_with_token($url, $token); } + if ($url =~ m{^https://m\.youtube\.com}) { + return + scalar { + url => $url, + results => [], + }; + } + if (not $url =~ s{[?&]page=\K(\d+)}{$1+1}e) { $url = $self->_append_url_args($url, page => 2); } @@ -1233,16 +1482,6 @@ sub next_page { return $res; } -sub previous_page { - my ($self, $url) = @_; - - $url =~ s{[?&]page=\K(\d+)}{($1 > 2) ? ($1-1) : 1}e; - - my $res = $self->_get_results($url); - $res->{url} = $url; - return $res; -} - # SUBROUTINE FACTORY { no strict 'refs'; @@ -1276,13 +1515,14 @@ Trizen, C<< <echo dHJpemVuQHByb3Rvbm1haWwuY29tCg== | base64 -d> >> Jesus, C<< <echo aGVja3llbEBoeXBlcmJvbGEuaW5mbw== | base64 -d> >> + =head1 SEE ALSO https://developers.google.com/youtube/v3/docs/ =head1 LICENSE AND COPYRIGHT -Copyright 2013-2015 Trizen. +Copyright 2012-2015 Trizen. Copyright 2020 Jesus E. @@ -1322,6 +1562,7 @@ CONTRIBUTOR WILL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING IN ANY WAY OUT OF THE USE OF THE PACKAGE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + =cut 1; # End of WWW::FairViewer diff --git a/lib/WWW/FairViewer/Channels.pm b/lib/WWW/FairViewer/Channels.pm index 3ee44d4..55598d0 100644 --- a/lib/WWW/FairViewer/Channels.pm +++ b/lib/WWW/FairViewer/Channels.pm @@ -25,12 +25,18 @@ sub _make_channels_url { sub videos_from_channel_id { my ($self, $channel_id) = @_; - return $self->_get_results($self->_make_feed_url("channels/$channel_id/videos")); + + if (my $results = $self->yt_channel_uploads($channel_id)) { + return $results; + } + + my $url = $self->_make_feed_url("channels/$channel_id/videos"); + return $self->_get_results($url); } sub videos_from_username { my ($self, $channel_id) = @_; - return $self->_get_results($self->_make_feed_url("channels/$channel_id/videos")); + $self->videos_from_channel_id($channel_id); } =head2 popular_videos($channel_id) @@ -46,7 +52,12 @@ sub popular_videos { return $self->_get_results($self->_make_feed_url('popular')); } - return $self->_get_results($self->_make_feed_url("channels/$channel_id/videos", sort_by => 'popular')); + if (my $results = $self->yt_channel_uploads($channel_id, sort_by => 'popular')) { + return $results; + } + + my $url = $self->_make_feed_url("channels/$channel_id/videos", sort_by => 'popular'); + return $self->_get_results($url); } =head2 channels_from_categoryID($category_id) @@ -94,65 +105,34 @@ For all functions, C<$channels->{results}{items}> contains: } } -=head2 my_channel() - -Returns info about the channel of the current authenticated user. - -=cut - -sub my_channel { - my ($self) = @_; - $self->get_access_token() // return; - return $self->_get_results($self->_make_channels_url(part => 'snippet', mine => 'true')); -} - -=head2 my_channel_id() +=head2 channel_id_from_username($username) -Returns the channel ID of the current authenticated user. +Return the channel ID for an username. =cut -sub my_channel_id { - my ($self) = @_; +sub channel_id_from_username { + my ($self, $username) = @_; state $cache = {}; - if (exists $cache->{id}) { - return $cache->{id}; + if (exists $cache->{username}) { + return $cache->{username}; } - $cache->{id} = undef; - my $channel = $self->my_channel() // return; - $cache->{id} = $channel->{results}{items}[0]{id} // return; -} - -=head2 channels_my_subscribers() - -Retrieve a list of channels that subscribed to the authenticated user's channel. - -=cut - -sub channels_my_subscribers { - my ($self) = @_; - $self->get_access_token() // return; - return $self->_get_results($self->_make_channels_url(mySubscribers => 'true')); -} - -=head2 channel_id_from_username($username) - -Return the channel ID for an username. - -=cut - -sub channel_id_from_username { - my ($self, $username) = @_; + if (defined(my $id = $self->yt_channel_id($username))) { + if (ref($id) eq '' and $id =~ /\S/) { + $cache->{$username} = $id; + return $id; + } + } # A channel's username (if it doesn't include spaces) is also valid in place of ucid. if ($username =~ /\w/ and not $username =~ /\s/) { return $username; } - # TODO: resolve channel name to channel ID + # Unable to resolve channel name to channel ID (return as it is) return $username; } @@ -165,11 +145,22 @@ Return the channel title for a given channel ID. sub channel_title_from_id { my ($self, $channel_id) = @_; - if ($channel_id eq 'mine') { - $channel_id = $self->my_channel_id(); + $channel_id // return; + + state $cache = {}; + + if (exists $cache->{channel_id}) { + return $cache->{channel_id}; + } + + if (defined(my $title = $self->yt_channel_title($channel_id))) { + if (ref($title) eq '' and $title =~ /\S/) { + $cache->{$channel_id} = $title; + return $title; + } } - my $info = $self->channels_info($channel_id // return) // return; + my $info = $self->channels_info($channel_id) // return; ( ref($info) eq 'HASH' and ref($info->{results}) eq 'HASH' diff --git a/lib/WWW/FairViewer/CommentThreads.pm b/lib/WWW/FairViewer/CommentThreads.pm index 760756e..9bceff5 100644 --- a/lib/WWW/FairViewer/CommentThreads.pm +++ b/lib/WWW/FairViewer/CommentThreads.pm @@ -73,6 +73,7 @@ Trizen, C<< <echo dHJpemVuQHByb3Rvbm1haWwuY29tCg== | base64 -d> >> Jesus, C<< <echo aGVja3llbEBoeXBlcmJvbGEuaW5mbw== | base64 -d> >> + =head1 SUPPORT You can find documentation for this module with the perldoc command. diff --git a/lib/WWW/FairViewer/GetCaption.pm b/lib/WWW/FairViewer/GetCaption.pm index 710a2af..d919fe9 100644 --- a/lib/WWW/FairViewer/GetCaption.pm +++ b/lib/WWW/FairViewer/GetCaption.pm @@ -182,17 +182,6 @@ sub xml2srt { return join("\n\n", @text); } -=head2 get_xml_data($caption_data) - -Get the XML content for a given caption data. - -=cut - -sub get_xml_data { - my ($self, $url) = @_; - $self->{yv_obj}->lwp_get($url, simple => 1); -} - =head2 save_caption($video_ID) Save the caption in a .srt file and return its file path. @@ -213,8 +202,9 @@ sub save_caption { return $srt_file if (-e $srt_file); # Get XML data, then transform it to SubRip data - my $xml = $self->get_xml_data($info->{baseUrl} // return) // return; - my $srt = $self->xml2srt($xml) // return; + my $url = $info->{baseUrl} // return; + my $xml = $self->{yv_obj}->lwp_get($url, simple => 1) // return; + my $srt = $self->xml2srt($xml) // return; # Write the SubRib data to the $srt_file open(my $fh, '>:utf8', $srt_file) or return; diff --git a/lib/WWW/FairViewer/GuideCategories.pm b/lib/WWW/FairViewer/GuideCategories.pm index cead9f6..86dfe0f 100644 --- a/lib/WWW/FairViewer/GuideCategories.pm +++ b/lib/WWW/FairViewer/GuideCategories.pm @@ -64,6 +64,7 @@ Trizen, C<< <echo dHJpemVuQHByb3Rvbm1haWwuY29tCg== | base64 -d> >> Jesus, C<< <echo aGVja3llbEBoeXBlcmJvbGEuaW5mbw== | base64 -d> >> + =head1 SUPPORT You can find documentation for this module with the perldoc command. diff --git a/lib/WWW/FairViewer/InitialData.pm b/lib/WWW/FairViewer/InitialData.pm new file mode 100644 index 0000000..50ea500 --- /dev/null +++ b/lib/WWW/FairViewer/InitialData.pm @@ -0,0 +1,1050 @@ +package WWW::FairViewer::InitialData; + +use utf8; +use 5.014; +use warnings; + +=head1 NAME + +WWW::FairViewer::InitialData - Extract initial data. + +=head1 SYNOPSIS + + use WWW::FairViewer; + my $obj = WWW::FairViewer->new(%opts); + + my $results = $obj->yt_search(q => $keywords); + my $playlists = $obj->yt_channel_playlists($channel_ID); + +=head1 SUBROUTINES/METHODS + +=cut + +sub _time_to_seconds { + my ($time) = @_; + + my ($hours, $minutes, $seconds) = (0, 0, 0); + + if ($time =~ /(\d+):(\d+):(\d+)/) { + ($hours, $minutes, $seconds) = ($1, $2, $3); + } + elsif ($time =~ /(\d+):(\d+)/) { + ($minutes, $seconds) = ($1, $2); + } + elsif ($time =~ /(\d+)/) { + $seconds = $1; + } + + $hours * 3600 + $minutes * 60 + $seconds; +} + +sub _human_number_to_int { + my ($text) = @_; + + # 7.6K -> 7600; 7.6M -> 7600000 + if ($text =~ /([\d,.]+)\s*([KMB])/i) { + + my $v = $1; + my $u = $2; + my $m = ($u eq 'K' ? 1e3 : ($u eq 'M' ? 1e6 : ($u eq 'B' ? 1e9 : 1))); + + $v =~ tr/,/./; + + return int($v * $m); + } + + if ($text =~ /([\d,.]+)/) { + my $v = $1; + $v =~ tr/,.//d; + return int($v); + } + + return 0; +} + +sub _thumbnail_quality { + my ($width) = @_; + + $width // return 'medium'; + + if ($width == 1280) { + return "maxres"; + } + + if ($width == 640) { + return "sddefault"; + } + + if ($width == 480) { + return 'high'; + } + + if ($width == 320) { + return 'medium'; + } + + if ($width == 120) { + return 'default'; + } + + if ($width <= 120) { + return 'small'; + } + + if ($width <= 176) { + return 'medium'; + } + + if ($width <= 480) { + return 'high'; + } + + if ($width <= 640) { + return 'sddefault'; + } + + if ($width <= 1280) { + return "maxres"; + } + + return 'medium'; +} + +sub _fix_url_protocol { + my ($url) = @_; + + $url // return undef; + + if ($url =~ m{^https://}) { # ok + return $url; + } + if ($url =~ s{^.*?//}{}) { + return "https://" . $url; + } + if ($url =~ /^\w+\./) { + return "https://" . $url; + } + + return $url; +} + +sub _unscramble { + my ($str) = @_; + + my $i = my $l = length($str); + + $str =~ s/(.)(.{$i})/$2$1/sg while (--$i > 0); + $str =~ s/(.)(.{$i})/$2$1/sg while (++$i < $l); + + return $str; +} + +sub _extract_youtube_mix { + my ($self, $data) = @_; + + my $info = eval { $data->{callToAction}{watchCardHeroVideoRenderer} } || return; + my $header = eval { $data->{header}{watchCardRichHeaderRenderer} }; + + my %mix; + + $mix{type} = 'playlist'; + + $mix{title} = + eval { $header->{title}{runs}[0]{text} } + // eval { $info->{accessibility}{accessibilityData}{label} } + // eval { $info->{callToActionButton}{callToActionButtonRenderer}{label}{runs}[0]{text} } // 'Youtube Mix'; + + $mix{playlistId} = eval { $info->{navigationEndpoint}{watchEndpoint}{playlistId} } || return; + + $mix{playlistThumbnail} = eval { _fix_url_protocol($header->{avatar}{thumbnails}[0]{url}) } + // eval { _fix_url_protocol($info->{heroImage}{collageHeroImageRenderer}{leftThumbnail}{thumbnails}[0]{url}) }; + + $mix{description} = _extract_description({title => $info}); + + $mix{author} = eval { $header->{title}{runs}[0]{text} } // "YouTube"; + $mix{authorId} = eval { $header->{titleNavigationEndpoint}{browseEndpoint}{browseId} } // "youtube"; + + return \%mix; +} + +sub _extract_author_name { + my ($info) = @_; + eval { $info->{longBylineText}{runs}[0]{text} } // eval { $info->{shortBylineText}{runs}[0]{text} }; +} + +sub _extract_video_id { + my ($info) = @_; + eval { $info->{videoId} } || eval { $info->{navigationEndpoint}{watchEndpoint}{videoId} } || undef; +} + +sub _extract_length_seconds { + my ($info) = @_; + eval { $info->{lengthSeconds} } + || _time_to_seconds(eval { $info->{thumbnailOverlays}[0]{thumbnailOverlayTimeStatusRenderer}{text}{runs}[0]{text} } // 0) + || _time_to_seconds(eval { $info->{lengthText}{runs}[0]{text} // 0 }); +} + +sub _extract_published_text { + my ($info) = @_; + + my $text = eval { $info->{publishedTimeText}{runs}[0]{text} } || return undef; + + if ($text =~ /(\d+)\s+(\w+)/) { + return "$1 $2 ago"; + } + + if ($text =~ /(\d+)\s*(\w+)/) { + return "$1 $2 ago"; + } + + return $text; +} + +sub _extract_channel_id { + my ($info) = @_; + eval { $info->{channelId} } + // eval { $info->{shortBylineText}{runs}[0]{navigationEndpoint}{browseEndpoint}{browseId} } + // eval { $info->{navigationEndpoint}{browseEndpoint}{browseId} }; +} + +sub _extract_view_count_text { + my ($info) = @_; + eval { $info->{shortViewCountText}{runs}[0]{text} }; +} + +sub _extract_thumbnails { + my ($info) = @_; + eval { + [ + map { + my %thumb = %$_; + $thumb{quality} = _thumbnail_quality($thumb{width}); + $thumb{url} = _fix_url_protocol($thumb{url}); + \%thumb; + } @{$info->{thumbnail}{thumbnails}} + ] + }; +} + +sub _extract_playlist_thumbnail { + my ($info) = @_; + eval { + _fix_url_protocol( + ( + grep { _thumbnail_quality($_->{width}) =~ /medium|high/ } + @{$info->{thumbnailRenderer}{playlistVideoThumbnailRenderer}{thumbnail}{thumbnails}} + )[0]{url} // $info->{thumbnailRenderer}{playlistVideoThumbnailRenderer}{thumbnail}{thumbnails}[0]{url} + ); + } // eval { + _fix_url_protocol((grep { _thumbnail_quality($_->{width}) =~ /medium|high/ } @{$info->{thumbnail}{thumbnails}})[0]{url} + // $info->{thumbnail}{thumbnails}[0]{url}); + }; +} + +sub _extract_title { + my ($info) = @_; + eval { $info->{title}{runs}[0]{text} } // eval { $info->{title}{accessibility}{accessibilityData}{label} }; +} + +sub _extract_description { + my ($info) = @_; + + # FIXME: this is not the video description + eval { $info->{title}{accessibility}{accessibilityData}{label} }; +} + +sub _extract_view_count { + my ($info) = @_; + _human_number_to_int(eval { $info->{viewCountText}{runs}[0]{text} } || 0); +} + +sub _extract_video_count { + my ($info) = @_; + _human_number_to_int( eval { $info->{videoCountShortText}{runs}[0]{text} } + || eval { $info->{videoCountText}{runs}[0]{text} } + || 0); +} + +sub _extract_subscriber_count { + my ($info) = @_; + _human_number_to_int(eval { $info->{subscriberCountText}{runs}[0]{text} } || 0); +} + +sub _extract_playlist_id { + my ($info) = @_; + eval { $info->{playlistId} }; +} + +sub _extract_itemSection_entry { + my ($self, $data, %args) = @_; + + ref($data) eq 'HASH' or return; + + # Album + if ($args{type} eq 'all' and exists $data->{horizontalCardListRenderer}) { # TODO + return; + } + + # Video + if (exists($data->{compactVideoRenderer}) or exists($data->{playlistVideoRenderer})) { + + my %video; + my $info = $data->{compactVideoRenderer} // $data->{playlistVideoRenderer}; + + $video{type} = 'video'; + + # Deleted video + if (defined(eval { $info->{isPlayable} }) and not $info->{isPlayable}) { + return; + } + + $video{videoId} = _extract_video_id($info) // return; + $video{title} = _extract_title($info) // return; + $video{lengthSeconds} = _extract_length_seconds($info) || 0; + $video{liveNow} = ($video{lengthSeconds} == 0); + $video{author} = _extract_author_name($info); + $video{authorId} = _extract_channel_id($info); + $video{publishedText} = _extract_published_text($info); + $video{viewCountText} = _extract_view_count_text($info); + $video{videoThumbnails} = _extract_thumbnails($info); + $video{description} = _extract_description($info); + $video{viewCount} = _extract_view_count($info); + + # Filter out private/deleted videos from playlists + if (exists($data->{playlistVideoRenderer})) { + $video{author} // return; + $video{authorId} // return; + } + + return \%video; + } + + # Playlist + if ($args{type} ne 'video' and exists $data->{compactPlaylistRenderer}) { + + my %playlist; + my $info = $data->{compactPlaylistRenderer}; + + $playlist{type} = 'playlist'; + + $playlist{title} = _extract_title($info) // return; + $playlist{playlistId} = _extract_playlist_id($info) // return; + $playlist{author} = _extract_author_name($info); + $playlist{authorId} = _extract_channel_id($info); + $playlist{videoCount} = _extract_video_count($info); + $playlist{playlistThumbnail} = _extract_playlist_thumbnail($info); + $playlist{description} = _extract_description($info); + + return \%playlist; + } + + # Channel + if ($args{type} ne 'video' and exists $data->{compactChannelRenderer}) { + + my %channel; + my $info = $data->{compactChannelRenderer}; + + $channel{type} = 'channel'; + + $channel{author} = _extract_title($info) // return; + $channel{authorId} = _extract_channel_id($info) // return; + $channel{subCount} = _extract_subscriber_count($info); + $channel{videoCount} = _extract_video_count($info); + $channel{authorThumbnails} = _extract_thumbnails($info); + $channel{description} = _extract_description($info); + + return \%channel; + } + + return; +} + +sub _parse_itemSection { + my ($self, $entry, %args) = @_; + + eval { ref($entry->{contents}) eq 'ARRAY' } || return; + + my @results; + + foreach my $entry (@{$entry->{contents}}) { + + my $item = $self->_extract_itemSection_entry($entry, %args); + + if (defined($item) and ref($item) eq 'HASH') { + push @results, $item; + } + } + + if (exists($entry->{continuations}) and ref($entry->{continuations}) eq 'ARRAY') { + + my $token = eval { $entry->{continuations}[0]{nextContinuationData}{continuation} }; + + if (defined($token)) { + push @results, + scalar { + type => 'nextpage', + token => "ytplaylist:$args{type}:$token", + }; + } + } + + return @results; +} + +sub _parse_itemSection_nextpage { + my ($self, $entry, %args) = @_; + + eval { ref($entry->{contents}) eq 'ARRAY' } || return; + + foreach my $entry (@{$entry->{contents}}) { + + # Continuation page + if (exists $entry->{continuationItemRenderer}) { + + my $info = $entry->{continuationItemRenderer}; + my $token = eval { $info->{continuationEndpoint}{continuationCommand}{token} }; + + if (defined($token)) { + return + scalar { + type => 'nextpage', + token => "ytbrowse:$args{type}:$token", + }; + } + } + } + + return; +} + +sub _extract_sectionList_results { + my ($self, $data, %args) = @_; + + eval { ref($data->{contents}) eq 'ARRAY' } or return; + + my @results; + + foreach my $entry (@{$data->{contents}}) { + + # Playlists + if (eval { ref($entry->{shelfRenderer}{content}{verticalListRenderer}{items}) eq 'ARRAY' }) { + my $res = {contents => $entry->{shelfRenderer}{content}{verticalListRenderer}{items}}; + push @results, $self->_parse_itemSection($res, %args); + push @results, $self->_parse_itemSection_nextpage($res, %args); + next; + } + + # Playlist videos + if (eval { ref($entry->{itemSectionRenderer}{contents}[0]{playlistVideoListRenderer}{contents}) eq 'ARRAY' }) { + my $res = $entry->{itemSectionRenderer}{contents}[0]{playlistVideoListRenderer}; + push @results, $self->_parse_itemSection($res, %args); + push @results, $self->_parse_itemSection_nextpage($res, %args); + next; + } + + # YouTube Mix + if ($args{type} eq 'all' and exists $entry->{universalWatchCardRenderer}) { + + my $mix = $self->_extract_youtube_mix($entry->{universalWatchCardRenderer}); + + if (defined($mix)) { + push(@results, $mix); + } + } + + # Video results + if (exists $entry->{itemSectionRenderer}) { + my $res = $entry->{itemSectionRenderer}; + push @results, $self->_parse_itemSection($res, %args); + push @results, $self->_parse_itemSection_nextpage($res, %args); + } + + # Continuation page + if (exists $entry->{continuationItemRenderer}) { + + my $info = $entry->{continuationItemRenderer}; + my $token = eval { $info->{continuationEndpoint}{continuationCommand}{token} }; + + if (defined($token)) { + push @results, + scalar { + type => 'nextpage', + token => "ytsearch:$args{type}:$token", + }; + } + } + } + + if (@results and exists $data->{continuations}) { + push @results, $self->_parse_itemSection($data, %args); + } + + return @results; +} + +sub _extract_channel_header { + my ($self, $data, %args) = @_; + eval { $data->{header}{c4TabbedHeaderRenderer} } // eval { $data->{metadata}{channelMetadataRenderer} }; +} + +sub _add_author_to_results { + my ($self, $data, $results, %args) = @_; + + my $header = $self->_extract_channel_header($data, %args); + + my $channel_id = eval { $header->{channelId} } // eval { $header->{externalId} }; + my $channel_name = eval { $header->{title} }; + + foreach my $result (@$results) { + if (ref($result) eq 'HASH') { + $result->{author} = $channel_name if defined($channel_name); + $result->{authorId} = $channel_id if defined($channel_id); + } + } + + return 1; +} + +sub _find_sectionList { + my ($self, $data) = @_; + + eval { + ( + grep { + eval { exists($_->{tabRenderer}{content}{sectionListRenderer}{contents}) } + } @{$data->{contents}{singleColumnBrowseResultsRenderer}{tabs}} + )[0]{tabRenderer}{content}{sectionListRenderer}; + } // undef; +} + +sub _extract_channel_uploads { + my ($self, $data, %args) = @_; + + my @results = $self->_extract_sectionList_results($self->_find_sectionList($data), %args); + $self->_add_author_to_results($data, \@results, %args); + return @results; +} + +sub _extract_channel_playlists { + my ($self, $data, %args) = @_; + + my @results = $self->_extract_sectionList_results($self->_find_sectionList($data), %args); + $self->_add_author_to_results($data, \@results, %args); + return @results; +} + +sub _extract_playlist_videos { + my ($self, $data, %args) = @_; + + my @results = $self->_extract_sectionList_results($self->_find_sectionList($data), %args); + $self->_add_author_to_results($data, \@results, %args); + return @results; +} + +sub _get_initial_data { + my ($self, $url) = @_; + + $self->get_prefer_invidious() and return; + + my $content = $self->lwp_get($url) // return; + + if ($content =~ m{var\s+ytInitialData\s*=\s*'(.*?)'}is) { + my $json = $1; + + $json =~ s{\\x([[:xdigit:]]{2})}{chr(hex($1))}ge; + $json =~ s{\\u([[:xdigit:]]{4})}{chr(hex($1))}ge; + $json =~ s{\\(["&])}{$1}g; + + my $hash = $self->parse_utf8_json_string($json); + return $hash; + } + + if ($content =~ m{<div id="initial-data"><!--(.*?)--></div>}is) { + my $json = $1; + my $hash = $self->parse_utf8_json_string($json); + return $hash; + } + + return; +} + +sub _channel_data { + my ($self, $channel, %args) = @_; + + state $yv_utils = WWW::FairViewer::Utils->new(); + + my $url = $self->get_m_youtube_url; + + if ($yv_utils->is_channelID($channel)) { + $url .= "/channel/$channel/$args{type}"; + } + else { + $url .= "/c/$channel/$args{type}"; + } + + my %params = (hl => "en"); + + if (defined(my $sort = $args{sort_by})) { + if ($sort eq 'popular') { + $params{sort} = 'p'; + } + elsif ($sort eq 'old') { + $params{sort} = 'da'; + } + } + + if (exists($args{params}) and ref($args{params}) eq 'HASH') { + %params = (%params, %{$args{params}}); + } + + $url = $self->_append_url_args($url, %params); + my $result = $self->_get_initial_data($url); + + # When /c/ failed, try /user/ + if ((!defined($result) or !scalar(keys %$result)) and $url =~ s{/c/}{/user/}) { + $result = $self->_get_initial_data($url); + } + + ($url, $result); +} + +sub _prepare_results_for_return { + my ($self, $results, %args) = @_; + + (defined($results) and ref($results) eq 'ARRAY') || return; + + my @results = @$results; + + @results || return; + + if (@results and $results[-1]{type} eq 'nextpage') { + + my $nextpage = pop(@results); + + if (defined($nextpage->{token}) and @results) { + + if ($self->get_debug) { + say STDERR ":: Returning results with a continuation page token..."; + } + + return { + url => $args{url}, + results => { + entries => \@results, + continuation => $nextpage->{token}, + }, + }; + } + } + + my $url = $args{url}; + + if ($url =~ m{^https://m\.youtube\.com}) { + $url = undef; + } + + return { + url => $url, + results => \@results, + }; +} + +=head2 yt_search(q => $keyword, %args) + +Search for videos given a keyword string (uri-escaped). + +=cut + +sub yt_search { + my ($self, %args) = @_; + + my $url = $self->get_m_youtube_url . "/results?search_query=$args{q}"; + + my @sp; + my %params = (hl => 'en',); + + $args{type} //= 'video'; + + if ($args{type} eq 'video') { + + if (defined(my $duration = $self->get_videoDuration)) { + if ($duration eq 'long') { + push @sp, 'EgQQARgC'; + } + elsif ($duration eq 'short') { + push @sp, 'EgQQARgB'; + } + } + + if (defined(my $date = $self->get_date)) { + if ($date eq 'hour') { + push @sp, 'EgQIARAB'; + } + elsif ($date eq 'today') { + push @sp, "EgQIAhAB"; + } + elsif ($date eq 'week') { + push @sp, "EgQIAxAB"; + } + elsif ($date eq 'month') { + push @sp, "EgQIBBAB"; + } + elsif ($date eq 'year') { + push @sp, "EgQIBRAB"; + } + } + + if (defined(my $order = $self->get_order)) { + if ($order eq 'upload_date') { + push @sp, "CAISAhAB"; + } + elsif ($order eq 'view_count') { + push @sp, "CAMSAhAB"; + } + elsif ($order eq 'rating') { + push @sp, "CAESAhAB"; + } + } + + if (defined(my $license = $self->get_videoLicense)) { + if ($license eq 'creative_commons') { + push @sp, "EgIwAQ%253D%253D"; + } + } + + if (defined(my $vd = $self->get_videoDefinition)) { + if ($vd eq 'high') { + push @sp, "EgIgAQ%253D%253D"; + } + } + + if (defined(my $vc = $self->get_videoCaption)) { + if ($vc eq 'true' or $vc eq '1') { + push @sp, "EgIoAQ%253D%253D"; + } + } + + if (defined(my $vd = $self->get_videoDimension)) { + if ($vd eq '3d') { + push @sp, "EgI4AQ%253D%253D"; + } + } + } + + if ($args{type} eq 'video') { + push @sp, "EgIQAQ%253D%253D"; + } + elsif ($args{type} eq 'playlist') { + push @sp, "EgIQAw%253D%253D"; + } + elsif ($args{type} eq 'channel') { + push @sp, "EgIQAg%253D%253D"; + } + elsif ($args{type} eq 'movie') { # TODO: implement support for movies + push @sp, "EgIQBA%253D%253D"; + } + + $params{sp} = join('+', @sp); + $url = $self->_append_url_args($url, %params); + + my $hash = $self->_get_initial_data($url) // return; + my @results = $self->_extract_sectionList_results(eval { $hash->{contents}{sectionListRenderer} } // undef, %args); + + $self->_prepare_results_for_return(\@results, %args, url => $url); +} + +=head2 yt_channel_search($channel, q => $keyword, %args) + +Search for videos given a keyword string (uri-escaped) from a given channel ID or username. + +=cut + +sub yt_channel_search { + my ($self, $channel, %args) = @_; + my ($url, $hash) = $self->_channel_data($channel, %args, type => 'search', params => {query => $args{q}}); + + $hash // return; + + my @results = $self->_extract_sectionList_results($self->_find_sectionList($hash), %args, type => 'video'); + $self->_prepare_results_for_return(\@results, %args, url => $url); +} + +=head2 yt_channel_uploads($channel, %args) + +Latest uploads for a given channel ID or username. + +=cut + +sub yt_channel_uploads { + my ($self, $channel, %args) = @_; + my ($url, $hash) = $self->_channel_data($channel, %args, type => 'videos'); + + $hash // return; + + my @results = $self->_extract_channel_uploads($hash, %args, type => 'video'); + $self->_prepare_results_for_return(\@results, %args, url => $url); +} + +=head2 yt_channel_info($channel, %args) + +Channel info (such as title) for a given channel ID or username. + +=cut + +sub yt_channel_info { + my ($self, $channel, %args) = @_; + my ($url, $hash) = $self->_channel_data($channel, %args, type => ''); + return $hash; +} + +=head2 yt_channel_title($channel, %args) + +Exact the channel title (as a string) for a given channel ID or username. + +=cut + +sub yt_channel_title { + my ($self, $channel, %args) = @_; + my ($url, $hash) = $self->_channel_data($channel, %args, type => ''); + $hash // return; + my $header = $self->_extract_channel_header($hash, %args) // return; + my $title = eval { $header->{title} }; + return $title; +} + +=head2 yt_channel_id($username, %args) + +Exact the channel ID (as a string) for a given channel username. + +=cut + +sub yt_channel_id { + my ($self, $username, %args) = @_; + my ($url, $hash) = $self->_channel_data($username, %args, type => ''); + $hash // return; + my $header = $self->_extract_channel_header($hash, %args) // return; + my $id = eval { $header->{channelId} } // eval { $header->{externalId} }; + return $id; +} + +=head2 yt_channel_playlists($channel, %args) + +Playlists for a given channel ID or username. + +=cut + +sub yt_channel_playlists { + my ($self, $channel, %args) = @_; + my ($url, $hash) = $self->_channel_data($channel, %args, type => 'playlists'); + + $hash // return; + + my @results = $self->_extract_channel_playlists($hash, %args, type => 'playlist'); + $self->_prepare_results_for_return(\@results, %args, url => $url); +} + +=head2 yt_playlist_videos($playlist_id, %args) + +Videos from a given playlist ID. + +=cut + +sub yt_playlist_videos { + my ($self, $playlist_id, %args) = @_; + + my $url = $self->_append_url_args($self->get_m_youtube_url . "/playlist", list => $playlist_id, hl => "en"); + my $hash = $self->_get_initial_data($url) // return; + + my @results = $self->_extract_sectionList_results($self->_find_sectionList($hash), %args, type => 'video'); + $self->_prepare_results_for_return(\@results, %args, url => $url); +} + +=head2 yt_playlist_next_page($url, $token, %args) + +Load more items from a playlist, given a continuation token. + +=cut + +sub yt_playlist_next_page { + my ($self, $url, $token, %args) = @_; + + my $request_url = $self->_append_url_args($url, ctoken => $token); + my $hash = $self->_get_initial_data($request_url) // return; + + my @results = $self->_parse_itemSection( + eval { $hash->{continuationContents}{playlistVideoListContinuation} } + // eval { $hash->{continuationContents}{itemSectionContinuation} }, + %args + ); + + if (!@results) { + @results = + $self->_extract_sectionList_results(eval { $hash->{continuationContents}{sectionListContinuation} } // undef, %args); + } + + $self->_add_author_to_results($hash, \@results, %args); + $self->_prepare_results_for_return(\@results, %args, url => $url); +} + +sub yt_browse_next_page { + my ($self, $url, $token, %args) = @_; + + my %request = ( + context => { + client => { + browserName => "Firefox", + browserVersion => "83.0", + clientFormFactor => "LARGE_FORM_FACTOR", + clientName => "MWEB", + clientVersion => "2.20210308.03.00", + deviceMake => "Generic", + deviceModel => "Android 11.0", + hl => "en", + mainAppWebInfo => { + graftUrl => $url, + }, + originalUrl => $url, + osName => "Android", + osVersion => "11", + platform => "TABLET", + playerType => "UNIPLAYER", + screenDensityFloat => 1, + screenHeightPoints => 500, + screenPixelDensity => 1, + screenWidthPoints => 1800, + timeZone => "UTC", + userAgent => "Mozilla/5.0 (Android 11; Tablet; rv:83.0) Gecko/83.0 Firefox/83.0,gzip(gfe)", + userInterfaceTheme => "USER_INTERFACE_THEME_LIGHT", + utcOffsetMinutes => 0, + }, + request => { + consistencyTokenJars => [], + internalExperimentFlags => [], + }, + user => {}, + }, + continuation => $token, + ); + + my $content = $self->post_as_json( + $self->get_m_youtube_url . '/youtubei/v1/browse?key=' . _unscramble('1HUCiSlOalFEcYQSS8_9q1LW4y8JAwI2zT_qA_G'), + \%request) // return; + + my $hash = $self->parse_json_string($content); + + my $res = + eval { $hash->{continuationContents}{playlistVideoListContinuation} } + // eval { $hash->{continuationContents}{itemSectionContinuation} } + // eval { {contents => $hash->{onResponseReceivedActions}[0]{appendContinuationItemsAction}{continuationItems}} } + // undef; + + my @results = $self->_parse_itemSection($res, %args); + + if (@results) { + push @results, $self->_parse_itemSection_nextpage($res, %args); + } + + if (!@results) { + @results = + $self->_extract_sectionList_results(eval { $hash->{continuationContents}{sectionListContinuation} } // undef, %args); + } + + $self->_add_author_to_results($hash, \@results, %args); + $self->_prepare_results_for_return(\@results, %args, url => $url); +} + +=head2 yt_search_next_page($url, $token, %args) + +Load more search results, given a continuation token. + +=cut + +sub yt_search_next_page { + my ($self, $url, $token, %args) = @_; + + my %request = ( + "context" => { + "client" => { + "browserName" => "Firefox", + "browserVersion" => "83.0", + "clientFormFactor" => "LARGE_FORM_FACTOR", + "clientName" => "MWEB", + "clientVersion" => "2.20201030.01.00", + "deviceMake" => "generic", + "deviceModel" => "android 11.0", + "gl" => "US", + "hl" => "en", + "mainAppWebInfo" => { + "graftUrl" => "https://m.youtube.com/results?search_query=youtube" + }, + "osName" => "Android", + "osVersion" => "11", + "platform" => "TABLET", + "playerType" => "UNIPLAYER", + "screenDensityFloat" => 1, + "screenHeightPoints" => 420, + "screenPixelDensity" => 1, + "screenWidthPoints" => 1442, + "userAgent" => "Mozilla/5.0 (Android 11; Tablet; rv:83.0) Gecko/83.0 Firefox/83.0,gzip(gfe)", + "userInterfaceTheme" => "USER_INTERFACE_THEME_LIGHT", + "utcOffsetMinutes" => 0, + }, + "request" => { + "consistencyTokenJars" => [], + "internalExperimentFlags" => [], + }, + "user" => {} + }, + "continuation" => $token, + ); + + my $content = $self->post_as_json( + $self->get_m_youtube_url + . _unscramble('o/ebseky?u1ri//hvcuyta=e') + . _unscramble('1HUCiSlOalFEcYQSS8_9q1LW4y8JAwI2zT_qA_G'), + \%request + ) // return; + + my $hash = $self->parse_json_string($content); + + my @results = $self->_extract_sectionList_results( + scalar { + contents => eval { + $hash->{onResponseReceivedCommands}[0]{appendContinuationItemsAction}{continuationItems}; + } // undef + }, + %args + ); + + $self->_prepare_results_for_return(\@results, %args, url => $url); +} + +=head1 AUTHOR + +Trizen, C<< <echo dHJpemVuQHByb3Rvbm1haWwuY29tCg== | base64 -d> >> + +Jesus, C<< <echo aGVja3llbEBoeXBlcmJvbGEuaW5mbw== | base64 -d> >> + + +=head1 SUPPORT + +You can find documentation for this module with the perldoc command. + + perldoc WWW::FairViewer::InitialData + + +=head1 LICENSE AND COPYRIGHT + +Copyright 2013-2015 Trizen. + +Copyright 2020 Jesus E. + +This program is free software; you can redistribute it and/or modify it +under the terms of either: the GNU General Public License as published +by the Free Software Foundation; or the Artistic License. + +See L<http://dev.perl.org/licenses/> for more information. + +=cut + +1; # End of WWW::FairViewer::InitialData diff --git a/lib/WWW/FairViewer/Itags.pm b/lib/WWW/FairViewer/Itags.pm index 85473c2..856775c 100644 --- a/lib/WWW/FairViewer/Itags.pm +++ b/lib/WWW/FairViewer/Itags.pm @@ -41,90 +41,90 @@ Reference: http://en.wikipedia.org/wiki/YouTube#Quality_and_formats sub get_itags { scalar { - 'best' => [{value => 38, format => 'mp4'}, # mp4 (3072p) (v-a) - {value => 138, format => 'mp4', dash => 1}, # mp4 (2160p-4320p) (v) - {value => 266, format => 'mp4', dash => 1}, # mp4 (2160p-2304p) (v) + 'best' => [{value => 38, format => 'mp4'}, # mp4 (3072p) (v-a) + {value => 138, format => 'mp4', split => 1}, # mp4 (2160p-4320p) (v) + {value => 266, format => 'mp4', split => 1}, # mp4 (2160p-2304p) (v) ], - '2160' => [{value => 315, format => 'webm', dash => 1, hfr => 1}, # webm HFR (v) - {value => 272, format => 'webm', dash => 1}, # webm (v) - {value => 313, format => 'webm', dash => 1}, # webm (v) - {value => 401, format => 'av1', dash => 1}, # av1 (v) + '2160' => [{value => 315, format => 'webm', split => 1, hfr => 1}, # webm HFR (v) + {value => 272, format => 'webm', split => 1}, # webm (v) + {value => 313, format => 'webm', split => 1}, # webm (v) + {value => 401, format => 'av1', split => 1}, # av1 (v) ], - '1440' => [{value => 308, format => 'webm', dash => 1, hfr => 1}, # webm HFR (v) - {value => 271, format => 'webm', dash => 1}, # webm (v) - {value => 264, format => 'mp4', dash => 1}, # mp4 (v) - {value => 400, format => 'av1', dash => 1}, # av1 (v) + '1440' => [{value => 308, format => 'webm', split => 1, hfr => 1}, # webm HFR (v) + {value => 271, format => 'webm', split => 1}, # webm (v) + {value => 264, format => 'mp4', split => 1}, # mp4 (v) + {value => 400, format => 'av1', split => 1}, # av1 (v) ], - '1080' => [{value => 303, format => 'webm', dash => 1, hfr => 1}, # webm HFR (v) - {value => 299, format => 'mp4', dash => 1, hfr => 1}, # mp4 HFR (v) - {value => 248, format => 'webm', dash => 1}, # webm (v) - {value => 137, format => 'mp4', dash => 1}, # mp4 (v) - {value => 399, format => 'av1', dash => 1, hfr => 1}, # av1 (v) - {value => 46, format => 'webm'}, # webm (v-a) - {value => 37, format => 'mp4'}, # mp4 (v-a) - {value => 301, format => 'mp4', live => 1}, # mp4 (live) (v-a) - {value => 96, format => 'ts', live => 1}, # ts (live) (v-a) + '1080' => [{value => 303, format => 'webm', split => 1, hfr => 1}, # webm HFR (v) + {value => 299, format => 'mp4', split => 1, hfr => 1}, # mp4 HFR (v) + {value => 248, format => 'webm', split => 1}, # webm (v) + {value => 137, format => 'mp4', split => 1}, # mp4 (v) + {value => 399, format => 'av1', split => 1, hfr => 1}, # av1 (v) + {value => 46, format => 'webm'}, # webm (v-a) + {value => 37, format => 'mp4'}, # mp4 (v-a) + {value => 301, format => 'mp4', live => 1}, # mp4 (live) (v-a) + {value => 96, format => 'ts', live => 1}, # ts (live) (v-a) ], - '720' => [{value => 302, format => 'webm', dash => 1, hfr => 1}, # webm HFR (v) - {value => 298, format => 'mp4', dash => 1, hfr => 1}, # mp4 HFR (v) - {value => 247, format => 'webm', dash => 1}, # webm (v) - {value => 136, format => 'mp4', dash => 1}, # mp4 (v) - {value => 398, format => 'av1', dash => 1, hfr => 1}, # av1 (v) - {value => 45, format => 'webm'}, # webm (v-a) - {value => 22, format => 'mp4'}, # mp4 (v-a) - {value => 300, format => 'mp4', live => 1}, # mp4 (live) (v-a) - {value => 120, format => 'flv', live => 1}, # flv (live) (v-a) - {value => 95, format => 'ts', live => 1}, # ts (live) (v-a) + '720' => [{value => 302, format => 'webm', split => 1, hfr => 1}, # webm HFR (v) + {value => 298, format => 'mp4', split => 1, hfr => 1}, # mp4 HFR (v) + {value => 247, format => 'webm', split => 1}, # webm (v) + {value => 136, format => 'mp4', split => 1}, # mp4 (v) + {value => 398, format => 'av1', split => 1, hfr => 1}, # av1 (v) + {value => 45, format => 'webm'}, # webm (v-a) + {value => 22, format => 'mp4'}, # mp4 (v-a) + {value => 300, format => 'mp4', live => 1}, # mp4 (live) (v-a) + {value => 120, format => 'flv', live => 1}, # flv (live) (v-a) + {value => 95, format => 'ts', live => 1}, # ts (live) (v-a) ], - '480' => [{value => 244, format => 'webm', dash => 1}, # webm (v) - {value => 135, format => 'mp4', dash => 1}, # mp4 (v) - {value => 397, format => 'av1', dash => 1}, # av1 (v) - {value => 44, format => 'webm'}, # webm (v-a) - {value => 35, format => 'flv'}, # flv (v-a) - {value => 94, format => 'mp4', live => 1}, # mp4 (live) (v-a) + '480' => [{value => 244, format => 'webm', split => 1}, # webm (v) + {value => 135, format => 'mp4', split => 1}, # mp4 (v) + {value => 397, format => 'av1', split => 1}, # av1 (v) + {value => 44, format => 'webm'}, # webm (v-a) + {value => 35, format => 'flv'}, # flv (v-a) + {value => 94, format => 'mp4', live => 1}, # mp4 (live) (v-a) ], - '360' => [{value => 243, format => 'webm', dash => 1}, # webm (v) - {value => 134, format => 'mp4', dash => 1}, # mp4 (v) - {value => 396, format => 'av1', dash => 1}, # av1 (v) - {value => 43, format => 'webm'}, # webm (v-a) - {value => 34, format => 'flv'}, # flv (v-a) - {value => 93, format => 'mp4', live => 1}, # mp4 (live) (v-a) - {value => 18, format => 'mp4'}, # mp4 (v-a) + '360' => [{value => 243, format => 'webm', split => 1}, # webm (v) + {value => 134, format => 'mp4', split => 1}, # mp4 (v) + {value => 396, format => 'av1', split => 1}, # av1 (v) + {value => 43, format => 'webm'}, # webm (v-a) + {value => 34, format => 'flv'}, # flv (v-a) + {value => 93, format => 'mp4', live => 1}, # mp4 (live) (v-a) + {value => 18, format => 'mp4'}, # mp4 (v-a) ], - '240' => [{value => 242, format => 'webm', dash => 1}, # webm (v) - {value => 133, format => 'mp4', dash => 1}, # mp4 (v) - {value => 395, format => 'av1', dash => 1}, # av1 (v) - {value => 6, format => 'flv'}, # flv (270p) (v-a) - {value => 5, format => 'flv'}, # flv (v-a) - {value => 36, format => '3gp'}, # 3gp (v-a) - {value => 13, format => '3gp'}, # 3gp (v-a) - {value => 92, format => 'mp4', live => 1}, # mp4 (live) (v-a) - {value => 132, format => 'ts', live => 1}, # ts (live) (v-a) + '240' => [{value => 242, format => 'webm', split => 1}, # webm (v) + {value => 133, format => 'mp4', split => 1}, # mp4 (v) + {value => 395, format => 'av1', split => 1}, # av1 (v) + {value => 6, format => 'flv'}, # flv (270p) (v-a) + {value => 5, format => 'flv'}, # flv (v-a) + {value => 36, format => '3gp'}, # 3gp (v-a) + {value => 13, format => '3gp'}, # 3gp (v-a) + {value => 92, format => 'mp4', live => 1}, # mp4 (live) (v-a) + {value => 132, format => 'ts', live => 1}, # ts (live) (v-a) ], - '144' => [{value => 278, format => 'webm', dash => 1}, # webm (v) - {value => 160, format => 'mp4', dash => 1}, # mp4 (v) - {value => 394, format => 'av1', dash => 1}, # av1 (v) - {value => 17, format => '3gp'}, # 3gp (v-a) - {value => 91, format => 'mp4'}, # mp4 (live) (v-a) - {value => 151, format => 'ts'}, # ts (live) (v-a) + '144' => [{value => 278, format => 'webm', split => 1}, # webm (v) + {value => 160, format => 'mp4', split => 1}, # mp4 (v) + {value => 394, format => 'av1', split => 1}, # av1 (v) + {value => 17, format => '3gp'}, # 3gp (v-a) + {value => 91, format => 'mp4'}, # mp4 (live) (v-a) + {value => 151, format => 'ts'}, # ts (live) (v-a) ], - 'audio' => [{value => 172, format => 'webm', kbps => 192}, # webm (192 kbps) - {value => 251, format => 'opus', kbps => 160}, # webm opus (128-160 kbps) - {value => 171, format => 'webm', kbps => 128}, # webm vorbis (92-128 kbps) - {value => 140, format => 'm4a', kbps => 128}, # mp4a (128 kbps) - {value => 141, format => 'm4a', kbps => 256}, # mp4a (256 kbps) - {value => 250, format => 'opus', kbps => 64}, # webm opus (64 kbps) - {value => 249, format => 'opus', kbps => 48}, # webm opus (48 kbps) - {value => 139, format => 'm4a', kbps => 48}, # mp4a (48 kbps) + 'audio' => [{value => 172, format => 'webm', kbps => 192}, # webm (192 kbps) + {value => 251, format => 'opus', kbps => 160}, # webm opus (128-160 kbps) + {value => 171, format => 'webm', kbps => 128}, # webm vorbis (92-128 kbps) + {value => 140, format => 'm4a', kbps => 128}, # mp4a (128 kbps) + {value => 141, format => 'm4a', kbps => 256}, # mp4a (256 kbps) + {value => 250, format => 'opus', kbps => 64}, # webm opus (64 kbps) + {value => 249, format => 'opus', kbps => 48}, # webm opus (48 kbps) + {value => 139, format => 'm4a', kbps => 48}, # mp4a (48 kbps) ], }; } @@ -176,12 +176,19 @@ sub _find_streaming_url { $args{ignore_av1} && next; # ignore videos in AV1 format } - if ($itag->{dash}) { + # Ignored video projections + if (ref($args{ignored_projections}) eq 'ARRAY') { + if (grep { lc($entry->{projectionType} // '') eq lc($_) } @{$args{ignored_projections}}) { + next; + } + } + + if ($itag->{split}) { - $args{dash} || next; + $args{split} || next; my $video_info = $stream->{$itag->{value}}; - my $audio_info = $self->_find_streaming_url(%args, resolution => 'audio', dash => 0); + my $audio_info = $self->_find_streaming_url(%args, resolution => 'audio', split => 0); if (defined($audio_info)) { $video_info->{__AUDIO__} = $audio_info; @@ -191,14 +198,14 @@ sub _find_streaming_url { next; } - if ($resolution eq 'audio' and not $args{dash_mp4_audio}) { - if ($itag->{format} eq 'm4a') { - next; # skip m4a audio URLs + if ($resolution eq 'audio' and $args{prefer_m4a}) { + if ($itag->{format} ne 'm4a') { + next; # skip non-M4A audio URLs } } # Ignore segmented DASH URLs (they load pretty slow in mpv) - if (not $args{dash_segmented}) { + if (not $args{dash}) { next if ($entry->{url} =~ m{/api/manifest/dash/}); } @@ -215,9 +222,12 @@ Return the streaming URL which corresponds with the specified resolution. ( urls => \@streaming_urls, resolution => 'resolution_name', # from $obj->get_resolutions(), - dash => 1/0, # include or exclude DASH itags - dash_mp4_audio => 1/0, # include or exclude DASH videos with MP4 audio - dash_segmented => 1/0, # include or exclude segmented DASH videos + + hfr => 1/0, # include or exclude High Frame Rate videos + ignore_av1 => 1/0, # true to ignore videos in AV1 format + split => 1/0, # include or exclude split videos + m4a_audio => 1/0, # incldue or exclude M4A audio files + dash => 1/0, # include or exclude streams in DASH format ) =cut @@ -253,11 +263,50 @@ sub find_streaming_url { $found_resolution = $resolution; } - # Otherwise, find the best resolution available - if (not defined $streaming) { + state $resolutions = $self->get_resolutions(); + + # Find the nearest available resolution + if (defined($resolution) and not defined($streaming)) { + + my $end = $#{$resolutions} - 1; # -1 to ignore 'audio' + + foreach my $i (0 .. $end) { + if ($resolutions->[$i] eq $resolution) { + for (my $k = 1 ; ; ++$k) { + + if ($i + $k > $end and $i - $k < 0) { + last; + } + + if ($i + $k <= $end) { # nearest below - state $resolutions = $self->get_resolutions(); + my $res = $resolutions->[$i + $k]; + $streaming = $self->_find_streaming_url(%args, resolution => $res); + if (defined($streaming)) { + $found_resolution = $res; + last; + } + } + + if ($i - $k >= 0) { # nearest above + + my $res = $resolutions->[$i - $k]; + $streaming = $self->_find_streaming_url(%args, resolution => $res); + + if (defined($streaming)) { + $found_resolution = $res; + last; + } + } + } + last; + } + } + } + + # Otherwise, find the best resolution available + if (not defined $streaming) { foreach my $res (@{$resolutions}) { $streaming = $self->_find_streaming_url(%args, resolution => $res); diff --git a/lib/WWW/FairViewer/ParseJSON.pm b/lib/WWW/FairViewer/ParseJSON.pm index 4945a2a..6733eb0 100644 --- a/lib/WWW/FairViewer/ParseJSON.pm +++ b/lib/WWW/FairViewer/ParseJSON.pm @@ -23,6 +23,18 @@ Parse a JSON string and return a HASH ref. =cut +sub parse_utf8_json_string { + my ($self, $json) = @_; + + if (not defined($json) or $json eq '') { + return {}; + } + + require JSON; + my $hash = eval { JSON::from_json($json) }; + return $@ ? do { warn "[JSON]: $@\n"; {} } : $hash; +} + sub parse_json_string { my ($self, $json) = @_; diff --git a/lib/WWW/FairViewer/ParseXML.pm b/lib/WWW/FairViewer/ParseXML.pm index 733c2bc..9c4fa04 100644 --- a/lib/WWW/FairViewer/ParseXML.pm +++ b/lib/WWW/FairViewer/ParseXML.pm @@ -287,8 +287,11 @@ sub xml2hash { =head1 AUTHOR +Trizen, C<< <echo dHJpemVuQHByb3Rvbm1haWwuY29tCg== | base64 -d> >> + Jesus, C<< <echo aGVja3llbEBoeXBlcmJvbGEuaW5mbw== | base64 -d> >> + =head1 SUPPORT You can find documentation for this module with the perldoc command. diff --git a/lib/WWW/FairViewer/PlaylistItems.pm b/lib/WWW/FairViewer/PlaylistItems.pm index 090a4b3..5555265 100644 --- a/lib/WWW/FairViewer/PlaylistItems.pm +++ b/lib/WWW/FairViewer/PlaylistItems.pm @@ -80,7 +80,13 @@ Get videos from a specific playlistID. sub videos_from_playlist_id { my ($self, $id) = @_; - $self->_get_results($self->_make_feed_url("playlists/$id")); + + if (my $results = $self->yt_playlist_videos($id)) { + return $results; + } + + my $url = $self->_make_feed_url("playlists/$id"); + $self->_get_results($url); } =head2 favorites($channel_id) diff --git a/lib/WWW/FairViewer/Playlists.pm b/lib/WWW/FairViewer/Playlists.pm index 01277c0..4294352 100644 --- a/lib/WWW/FairViewer/Playlists.pm +++ b/lib/WWW/FairViewer/Playlists.pm @@ -6,7 +6,7 @@ use warnings; =head1 NAME -WWW::FairViewer::Playlists - Fair playlists handle. +WWW::FairViewer::Playlists - YouTube playlists related mehods. =head1 SYNOPSIS @@ -25,7 +25,7 @@ sub _make_playlists_url { $opts{'part'} = 'snippet,contentDetails'; } - $self->_make_feed_url('playlists', %opts); + $self->_make_feed_url('playlists', %opts,); } sub get_playlist_id { @@ -60,7 +60,13 @@ Get and return playlists from a channel ID. sub playlists { my ($self, $channel_id) = @_; - $self->_get_results($self->_make_feed_url("channels/playlists/$channel_id")); + + if (my $results = $self->yt_channel_playlists($channel_id)) { + return $results; + } + + my $url = $self->_make_feed_url("channels/playlists/$channel_id"); + $self->_get_results($url); } =head2 playlists_from_username($username) diff --git a/lib/WWW/FairViewer/Search.pm b/lib/WWW/FairViewer/Search.pm index 7242637..68f4521 100644 --- a/lib/WWW/FairViewer/Search.pm +++ b/lib/WWW/FairViewer/Search.pm @@ -6,7 +6,7 @@ use warnings; =head1 NAME -WWW::FairViewer::Search - Search functions for Fair API v3 +WWW::FairViewer::Search - Search for stuff on YouTube =head1 SYNOPSIS @@ -85,16 +85,26 @@ sub search_for { # Search in a channel's videos if (defined(my $channel_id = $self->get_channelId)) { - my $url = $self->_make_feed_url("channels/search/$channel_id", q => $keywords,); + + $self->set_channelId(); # clear the channel ID + + if (my $results = $self->yt_channel_search($channel_id, q => $keywords, type => $type, %$args)) { + return $results; + } + + my $url = $self->_make_feed_url("channels/search/$channel_id", q => $keywords); return $self->_get_results($url); } + if (my $results = $self->yt_search(q => $keywords, type => $type, %$args)) { + return $results; + } + my $url = $self->_make_search_url( type => $type, q => $keywords, - %$args, + %$args ); - return $self->_get_results($url); } @@ -161,15 +171,12 @@ be set to a YouTube video ID. sub related_to_videoID { my ($self, $videoID) = @_; - my %info = $self->_get_video_info($videoID); - my $watch_next_response = $self->parse_json_string($info{watch_next_response}); + my $watch_next_response = $self->parse_json_string($self->_get_video_next_info($videoID) // return {results => []}); + my $related = eval { $watch_next_response->{contents}{twoColumnWatchNextResults}{secondaryResults}{secondaryResults}{results} } // return {results => []}; - #use Data::Dump qw(pp); - #pp $related; - my @results; foreach my $entry (@$related) { diff --git a/lib/WWW/FairViewer/Utils.pm b/lib/WWW/FairViewer/Utils.pm index 8cdcce3..1076af2 100644 --- a/lib/WWW/FairViewer/Utils.pm +++ b/lib/WWW/FairViewer/Utils.pm @@ -96,6 +96,9 @@ Returns time from seconds. sub format_time { my ($self, $sec) = @_; + + $sec //= 0; + $sec >= 3600 ? join q{:}, map { sprintf '%02d', $_ } $sec / 3600 % 24, $sec / 60 % 60, $sec % 60 : join q{:}, map { sprintf '%02d', $_ } $sec / 60 % 60, $sec % 60; @@ -133,6 +136,8 @@ Return string "04 May 2010" from "2010-05-04T00:25:55.000Z" sub format_date { my ($self, $date) = @_; + $date // return undef; + # 2010-05-04T00:25:55.000Z # to: 04 May 2010 @@ -158,6 +163,8 @@ Return the (approximated) age for a given date of the form "2010-05-04T00:25:55. sub date_to_age { my ($self, $date) = @_; + $date // return undef; + $date =~ m{^ (?<year>\d{4}) - @@ -177,6 +184,21 @@ sub date_to_age { $year += 1900; $month += 1; + my %month_days = ( + 1 => 31, + 2 => 28, + 3 => 31, + 4 => 30, + 5 => 31, + 6 => 30, + 7 => 31, + 8 => 31, + 9 => 30, + 10 => 31, + 11 => 30, + 12 => 31, + ); + my $lambda = sub { if ($year == $+{year}) { @@ -192,6 +214,14 @@ sub date_to_age { } return join(' ', $day - $+{day}, 'days'); } + + if ($month - $+{month} == 1) { + my $day_diff = $+{day} - $day; + if ($day_diff > 0 and $day_diff < $month_days{$+{month} + 0}) { + return join(' ', $month_days{$+{month} + 0} - $day_diff, 'days'); + } + } + return join(' ', $month - $+{month}, 'months'); } @@ -227,7 +257,7 @@ sub has_entries { if (ref($result->{results}) eq 'HASH') { - foreach my $type (qw(comments videos playlists)) { + foreach my $type (qw(comments videos playlists entries)) { if (exists $result->{results}{$type}) { ref($result->{results}{$type}) eq 'ARRAY' or return 0; return (@{$result->{results}{$type}} > 0); @@ -252,15 +282,21 @@ sub has_entries { return 1; # maybe? } -=head2 normalize_video_title($title, $fat32safe) +=head2 normalize_filename($title, $fat32safe) Replace file-unsafe characters and trim spaces. =cut -sub normalize_video_title { +sub normalize_filename { my ($self, $title, $fat32safe) = @_; + state $unix_like = $^O =~ /^(?:linux|freebsd|openbsd)\z/i; + + if (not $fat32safe and not $unix_like) { + $fat32safe = 1; + } + if ($fat32safe) { $title =~ s/: / - /g; $title =~ tr{:"*/?\\|}{;'+%!%%}; # " @@ -270,7 +306,9 @@ sub normalize_video_title { $title =~ tr{/}{%}; } - join(q{ }, split(q{ }, $title)); + my $basename = join(q{ }, split(q{ }, $title)); + $basename = substr($basename, 0, 200); # make sure the filename is not too long + return $basename; } =head2 format_text(%opt) @@ -299,20 +337,32 @@ sub format_text { my $fat32safe = $opt{fat32safe}; my %special_tokens = ( - ID => sub { $self->get_video_id($info) }, - AUTHOR => sub { $self->get_channel_title($info) }, - CHANNELID => sub { $self->get_channel_id($info) }, - DEFINITION => sub { $self->get_definition($info) }, - DIMENSION => sub { $self->get_dimension($info) }, + ID => sub { $self->get_video_id($info) }, + AUTHOR => sub { $self->get_channel_title($info) }, + CHANNELID => sub { $self->get_channel_id($info) }, + DEFINITION => sub { $self->get_definition($info) }, + DIMENSION => sub { $self->get_dimension($info) }, + VIEWS => sub { $self->get_views($info) }, VIEWS_SHORT => sub { $self->get_views_approx($info) }, - LIKES => sub { $self->get_likes($info) }, - DISLIKES => sub { $self->get_dislikes($info) }, + + VIDEOS => sub { $self->set_thousands($self->get_channel_video_count($info)) }, + VIDEOS_SHORT => sub { $self->short_human_number($self->get_channel_video_count($info)) }, + + SUBS => sub { $self->get_channel_subscriber_count($info) }, + SUBS_SHORT => sub { $self->short_human_number($self->get_channel_subscriber_count($info)) }, + + ITEMS => sub { $self->set_thousands($self->get_playlist_item_count($info)) }, + ITEMS_SHORT => sub { $self->short_human_number($self->get_playlist_item_count($info)) }, + + LIKES => sub { $self->get_likes($info) }, + DISLIKES => sub { $self->get_dislikes($info) }, + COMMENTS => sub { $self->get_comments($info) }, DURATION => sub { $self->get_duration($info) }, TIME => sub { $self->get_time($info) }, TITLE => sub { $self->get_title($info) }, - FTITLE => sub { $self->normalize_video_title($self->get_title($info), $fat32safe) }, + FTITLE => sub { $self->normalize_filename($self->get_title($info), $fat32safe) }, CAPTION => sub { $self->get_caption($info) }, PUBLISHED => sub { $self->get_publication_date($info) }, AGE => sub { $self->get_publication_age($info) }, @@ -386,8 +436,8 @@ sub format_text { $text =~ s/$escapes_re/$special_escapes{$1}/g; $escape - ? $text =~ s/$tokens_re/\Q${\$special_tokens{$1}()}\E/gr - : $text =~ s/$tokens_re/${\$special_tokens{$1}()}/gr; + ? $text =~ s<$tokens_re><\Q${\($special_tokens{$1}() // '')}\E>gr + : $text =~ s<$tokens_re><${\($special_tokens{$1}() // '')}>gr; } =head2 set_thousands($num) @@ -487,13 +537,112 @@ sub get_description { $desc = HTML::Entities::decode_entities($desc); $desc =~ s/^\s+//; - if (not $desc =~ /\S/) { + if (not $desc =~ /\S/ or length($desc) < length($info->{description} // '')) { $desc = $info->{description} // ''; } ($desc =~ /\S/) ? $desc : 'No description available...'; } +sub read_lines_from_file { + my ($self, $file, $mode) = @_; + + $mode //= '<'; + + open(my $fh, $mode, $file) or return; + chomp(my @lines = <$fh>); + close $fh; + + my %seen; + + # Keep the most recent ones + @lines = reverse(@lines); + @lines = grep { !$seen{$_}++ } @lines; + + return @lines; +} + +sub read_channels_from_file { + my ($self, $file, $mode) = @_; + + $mode //= '<:utf8'; + + # Read channels and remove duplicates + my %channels = map { split(/ /, $_, 2) } $self->read_lines_from_file($file, $mode); + + # Filter valid channels and pair with channel ID with title + my @channels = map { [$_, $channels{$_}] } grep { defined($channels{$_}) } keys %channels; + + # Sort channels by channel name + @channels = sort { CORE::fc($a->[1]) cmp CORE::fc($b->[1]) } @channels; + + return @channels; +} + +sub get_local_playlist_filenames { + my ($self, $dir) = @_; + require Encode; + grep { -f $_ } sort { CORE::fc($a) cmp CORE::fc($b) } map { Encode::decode_utf8($_) } glob("$dir/*.dat"); +} + +sub make_local_playlist_filename { + my ($self, $title, $playlistID) = @_; + my $basename = $title . ' -- ' . $playlistID . '.txt'; + $basename = $self->normalize_filename($basename); + return $basename; +} + +sub local_playlist_snippet { + my ($self, $id) = @_; + + require File::Basename; + my $title = File::Basename::basename($id); + + $title =~ s/\.dat\z//; + $title =~ s/ -- PL[-\w]+\z//; + $title =~ s/_/ /g; + $title = ucfirst($title); + + require Storable; + my $entries = eval { Storable::retrieve($id) } // []; + + if (ref($entries) ne 'ARRAY') { + $entries = []; + } + + my $video_count = 0; + my $video_id = undef; + + if (@$entries) { + $video_id = $self->get_video_id($entries->[0]); + $video_count = scalar(@$entries); + } + + scalar { + author => "local", + authorId => "local", + description => $title, + playlistId => $id, + playlistThumbnail => (defined($video_id) ? "https://i.ytimg.com/vi/$video_id/mqdefault.jpg" : undef), + title => $title, + type => "playlist", + videoCount => $video_count, + }; +} + +sub local_channel_snippet { + my ($self, $id, $title) = @_; + + scalar { + author => $title, + authorId => $id, + type => "channel", + description => "<local channel>", + subCount => undef, + videoCount => undef, + }; +} + =head2 get_title($info) Get title. @@ -545,7 +694,7 @@ sub get_thumbnail_url { $url = eval { $wanted[0]{url} } // return ''; } else { - warn "[!] Couldn't find thumbnail of type <<$type>>..."; + ## warn "[!] Couldn't find thumbnail of type <<$type>>..."; $url = eval { $thumbs[0]{url} } // return ''; } @@ -559,7 +708,7 @@ sub get_channel_title { my ($self, $info) = @_; #$info->{snippet}{channelTitle} || $self->get_channel_id($info); - $info->{author}; + $info->{author} // $info->{title}; } sub get_author { @@ -572,6 +721,31 @@ sub get_comment_id { $info->{commentId}; } +sub get_video_count { + my ($self, $info) = @_; + $info->{videoCount} // 0; +} + +sub get_subscriber_count { + my ($self, $info) = @_; + $info->{subCount} // 0; +} + +sub get_channel_subscriber_count { + my ($self, $info) = @_; + $info->{subCount} // 0; +} + +sub get_channel_video_count { + my ($self, $info) = @_; + $info->{videoCount} // 0; +} + +sub get_playlist_item_count { + my ($self, $info) = @_; + $info->{videoCount} // 0; +} + sub get_comment_content { my ($self, $info) = @_; $info->{content}; @@ -579,24 +753,23 @@ sub get_comment_content { sub get_id { my ($self, $info) = @_; - - #$info->{id}; $info->{videoId}; } -sub get_channel_id { +sub get_rating { my ($self, $info) = @_; + my $rating = $info->{rating} // return; + sprintf('%.2f', $rating); +} - #$info->{snippet}{resourceId}{channelId} // $info->{snippet}{channelId}; +sub get_channel_id { + my ($self, $info) = @_; $info->{authorId}; } sub get_category_id { my ($self, $info) = @_; - - #$info->{snippet}{resourceId}{categoryId} // $info->{snippet}{categoryId}; - #"unknown"; - $info->{genre} // 'Unknown'; + $info->{genre} // $info->{category} // 'Unknown'; } sub get_category_name { @@ -620,9 +793,7 @@ sub get_category_name { 29 => 'Nonprofits & Activism', }; - #$categories->{$self->get_category_id($info) // ''} // 'Unknown'; - - $info->{genre} // 'Unknown'; + $info->{genre} // $info->{category} // 'Unknown'; } sub get_publication_date { @@ -635,8 +806,80 @@ sub get_publication_date { require Encode; require Time::Piece; - my $time = Time::Piece->new($info->{published}); - Encode::decode_utf8($time->strftime("%d %B %Y")); + my $time; + + if (defined($info->{published})) { + $time = eval { Time::Piece->new($info->{published}) }; + } + elsif (defined($info->{publishDate})) { + if ($info->{publishDate} =~ /^[0-9]+\z/) { # time given as "%yyyy%mm%dd" (from hypervideo) + $time = eval { Time::Piece->strptime($info->{publishDate}, '%Y%m%d') }; + } + else { + $time = eval { Time::Piece->strptime($info->{publishDate}, '%Y-%m-%d') }; + } + } + + defined($time) ? Encode::decode_utf8($time->strftime("%d %B %Y")) : undef; +} + +sub get_publication_time { + my ($self, $info) = @_; + + require Time::Piece; + require Time::Seconds; + + if ($self->get_time($info) eq 'LIVE') { + my $time = $info->{timestamp} // Time::Piece->new(); + + if (ref($time) eq 'ARRAY') { + $time = bless($time, "Time::Piece"); + } + + return $time; + } + + if (defined($info->{publishedText})) { + + my $age = $info->{publishedText}; + my $t = $info->{timestamp} // Time::Piece->new(); + + if (ref($t) eq 'ARRAY') { + $t = bless($t, "Time::Piece"); + } + + if ($age =~ /^(\d+) sec/) { + $t -= $1; + } + + if ($age =~ /^(\d+) min/) { + $t -= $1 * Time::Seconds::ONE_MINUTE(); + } + + if ($age =~ /^(\d+) hour/) { + $t -= $1 * Time::Seconds::ONE_HOUR(); + } + + if ($age =~ /^(\d+) day/) { + $t -= $1 * Time::Seconds::ONE_DAY(); + } + + if ($age =~ /^(\d+) week/) { + $t -= $1 * Time::Seconds::ONE_WEEK(); + } + + if ($age =~ /^(\d+) month/) { + $t -= $1 * Time::Seconds::ONE_MONTH(); + } + + if ($age =~ /^(\d+) year/) { + $t -= $1 * Time::Seconds::ONE_YEAR(); + } + + return $t; + } + + return $self->get_publication_date($info); # should not happen } sub get_publication_age { @@ -674,22 +917,17 @@ sub get_publication_age_approx { sub get_duration { my ($self, $info) = @_; - - #$self->format_duration($info->{contentDetails}{duration}); - #$self->format_duration($info->{lengthSeconds}); $info->{lengthSeconds}; } sub get_time { my ($self, $info) = @_; - if ($info->{liveNow}) { + if ($info->{liveNow} and ($self->get_duration($info) || 0) == 0) { return 'LIVE'; } $self->format_time($self->get_duration($info)); - - #$self->format_time($self->get_duration($info)); } sub get_definition { @@ -721,39 +959,44 @@ sub get_views { $info->{viewCount} // 0; } -sub get_views_approx { - my ($self, $info) = @_; - my $views = $self->get_views($info); +sub short_human_number { + my ($self, $int) = @_; - if ($views < 1000) { - return $views; + if ($int < 1000) { + return $int; } - if ($views >= 10 * 1e9) { # ten billions - return sprintf("%dB", int($views / 1e9)); + if ($int >= 10 * 1e9) { # ten billions + return sprintf("%dB", int($int / 1e9)); } - if ($views >= 1e9) { # billions - return sprintf("%.2gB", $views / 1e9); + if ($int >= 1e9) { # billions + return sprintf("%.2gB", $int / 1e9); } - if ($views >= 10 * 1e6) { # ten millions - return sprintf("%dM", int($views / 1e6)); + if ($int >= 10 * 1e6) { # ten millions + return sprintf("%dM", int($int / 1e6)); } - if ($views >= 1e6) { # millions - return sprintf("%.2gM", $views / 1e6); + if ($int >= 1e6) { # millions + return sprintf("%.2gM", $int / 1e6); } - if ($views >= 10 * 1e3) { # ten thousands - return sprintf("%dK", int($views / 1e3)); + if ($int >= 10 * 1e3) { # ten thousands + return sprintf("%dK", int($int / 1e3)); } - if ($views >= 1e3) { # thousands - return sprintf("%.2gK", $views / 1e3); + if ($int >= 1e3) { # thousands + return sprintf("%.2gK", $int / 1e3); } - return $views; + return $int; +} + +sub get_views_approx { + my ($self, $info) = @_; + my $views = $self->get_views($info); + $self->short_human_number($views); } sub get_likes { diff --git a/lib/WWW/FairViewer/Videos.pm b/lib/WWW/FairViewer/Videos.pm index 4acd866..72ad523 100644 --- a/lib/WWW/FairViewer/Videos.pm +++ b/lib/WWW/FairViewer/Videos.pm @@ -149,7 +149,7 @@ When C<$part> is C<undef>, it defaults to I<snippet>. =cut -sub video_details { +sub _invidious_video_details { my ($self, $id, $fields) = @_; $fields //= $self->basic_video_info_fields; @@ -159,24 +159,92 @@ sub video_details { return $info; } + return; +} + +sub _ytdl_video_details { + my ($self, $id) = @_; + $self->_info_from_ytdl($id); +} + +sub _fallback_video_details { + my ($self, $id, $fields) = @_; + + if ($self->get_debug) { + say STDERR ":: Extracting video info with hypervideo..."; + } + + my $info = $self->_ytdl_video_details($id); + + if (defined($info) and ref($info) eq 'HASH') { + return scalar { + + title => $info->{fulltitle} // $info->{title}, + videoId => $id, + + videoThumbnails => [ + map { + scalar { + quality => 'medium', + url => $_->{url}, + width => $_->{width}, + height => $_->{height}, + } + } @{$info->{thumbnails}} + ], + + liveNow => ($info->{is_live} ? 1 : 0), + description => $info->{description}, + lengthSeconds => $info->{duration}, + + likeCount => $info->{like_count}, + dislikeCount => $info->{dislike_count}, + + category => eval { $info->{categories}[0] } // $info->{category}, + publishDate => $info->{upload_date}, + + keywords => $info->{tags}, + viewCount => $info->{view_count}, + + author => $info->{channel}, + authorId => $info->{channel_id} // $info->{uploader_id}, + rating => $info->{average_rating}, + }; + } + else { + #$info = $self->_invidious_video_details($id, $fields); # too slow + } + + return {}; +} + +sub video_details { + my ($self, $id, $fields) = @_; + if ($self->get_debug) { say STDERR ":: Extracting video info using the fallback method..."; } - # Fallback using the `get_video_info` URL my %video_info = $self->_get_video_info($id); - my $video = $self->parse_json_string($video_info{player_response} // return); + my $video = $self->parse_json_string($video_info{player_response} // return $self->_fallback_video_details($id, $fields)); + + my $videoDetails = {}; + my $microformat = {}; if (exists $video->{videoDetails}) { - $video = $video->{videoDetails}; + $videoDetails = $video->{videoDetails}; } else { - return; + return $self->_fallback_video_details($id, $fields); + } + + if (exists $video->{microformat}) { + $microformat = eval { $video->{microformat}{playerMicroformatRenderer} } // {}; } my %details = ( - title => $video->{title}, - videoId => $video->{videoId}, + title => eval { $microformat->{title}{simpleText} } // $videoDetails->{title}, + videoId => $videoDetails->{videoId}, videoThumbnails => [ map { @@ -186,19 +254,22 @@ sub video_details { width => $_->{width}, height => $_->{height}, } - } @{$video->{thumbnail}{thumbnails}} + } @{$videoDetails->{thumbnail}{thumbnails}} ], - liveNow => $video->{isLiveContent}, - description => $video->{shortDescription}, - lengthSeconds => $video->{lengthSeconds}, + liveNow => ($videoDetails->{isLiveContent} || (($videoDetails->{lengthSeconds} || 0) == 0)), + description => eval { $microformat->{description}{simpleText} } // $videoDetails->{shortDescription}, + lengthSeconds => $videoDetails->{lengthSeconds} // $microformat->{lengthSeconds}, - keywords => $video->{keywords}, - viewCount => $video->{viewCount}, + category => $microformat->{category}, + publishDate => $microformat->{publishDate}, - author => $video->{author}, - authorId => $video->{channelId}, - rating => $video->{averageRating}, + keywords => $videoDetails->{keywords}, + viewCount => $videoDetails->{viewCount} // $microformat->{viewCount}, + + author => $videoDetails->{author} // $microformat->{ownerChannelName}, + authorId => $videoDetails->{channelId} // $microformat->{externalChannelId}, + rating => $videoDetails->{averageRating}, ); return \%details; @@ -218,6 +289,8 @@ with a HASH ref for each result. An example of the item array's content are show =head1 AUTHOR +Trizen, C<< <echo dHJpemVuQHByb3Rvbm1haWwuY29tCg== | base64 -d> >> + Jesus, C<< <echo aGVja3llbEBoeXBlcmJvbGEuaW5mbw== | base64 -d> >> |