diff options
Diffstat (limited to 'lib/WWW/FairViewer/Utils.pm')
-rw-r--r-- | lib/WWW/FairViewer/Utils.pm | 351 |
1 files changed, 297 insertions, 54 deletions
diff --git a/lib/WWW/FairViewer/Utils.pm b/lib/WWW/FairViewer/Utils.pm index 8cdcce3..1076af2 100644 --- a/lib/WWW/FairViewer/Utils.pm +++ b/lib/WWW/FairViewer/Utils.pm @@ -96,6 +96,9 @@ Returns time from seconds. sub format_time { my ($self, $sec) = @_; + + $sec //= 0; + $sec >= 3600 ? join q{:}, map { sprintf '%02d', $_ } $sec / 3600 % 24, $sec / 60 % 60, $sec % 60 : join q{:}, map { sprintf '%02d', $_ } $sec / 60 % 60, $sec % 60; @@ -133,6 +136,8 @@ Return string "04 May 2010" from "2010-05-04T00:25:55.000Z" sub format_date { my ($self, $date) = @_; + $date // return undef; + # 2010-05-04T00:25:55.000Z # to: 04 May 2010 @@ -158,6 +163,8 @@ Return the (approximated) age for a given date of the form "2010-05-04T00:25:55. sub date_to_age { my ($self, $date) = @_; + $date // return undef; + $date =~ m{^ (?<year>\d{4}) - @@ -177,6 +184,21 @@ sub date_to_age { $year += 1900; $month += 1; + my %month_days = ( + 1 => 31, + 2 => 28, + 3 => 31, + 4 => 30, + 5 => 31, + 6 => 30, + 7 => 31, + 8 => 31, + 9 => 30, + 10 => 31, + 11 => 30, + 12 => 31, + ); + my $lambda = sub { if ($year == $+{year}) { @@ -192,6 +214,14 @@ sub date_to_age { } return join(' ', $day - $+{day}, 'days'); } + + if ($month - $+{month} == 1) { + my $day_diff = $+{day} - $day; + if ($day_diff > 0 and $day_diff < $month_days{$+{month} + 0}) { + return join(' ', $month_days{$+{month} + 0} - $day_diff, 'days'); + } + } + return join(' ', $month - $+{month}, 'months'); } @@ -227,7 +257,7 @@ sub has_entries { if (ref($result->{results}) eq 'HASH') { - foreach my $type (qw(comments videos playlists)) { + foreach my $type (qw(comments videos playlists entries)) { if (exists $result->{results}{$type}) { ref($result->{results}{$type}) eq 'ARRAY' or return 0; return (@{$result->{results}{$type}} > 0); @@ -252,15 +282,21 @@ sub has_entries { return 1; # maybe? } -=head2 normalize_video_title($title, $fat32safe) +=head2 normalize_filename($title, $fat32safe) Replace file-unsafe characters and trim spaces. =cut -sub normalize_video_title { +sub normalize_filename { my ($self, $title, $fat32safe) = @_; + state $unix_like = $^O =~ /^(?:linux|freebsd|openbsd)\z/i; + + if (not $fat32safe and not $unix_like) { + $fat32safe = 1; + } + if ($fat32safe) { $title =~ s/: / - /g; $title =~ tr{:"*/?\\|}{;'+%!%%}; # " @@ -270,7 +306,9 @@ sub normalize_video_title { $title =~ tr{/}{%}; } - join(q{ }, split(q{ }, $title)); + my $basename = join(q{ }, split(q{ }, $title)); + $basename = substr($basename, 0, 200); # make sure the filename is not too long + return $basename; } =head2 format_text(%opt) @@ -299,20 +337,32 @@ sub format_text { my $fat32safe = $opt{fat32safe}; my %special_tokens = ( - ID => sub { $self->get_video_id($info) }, - AUTHOR => sub { $self->get_channel_title($info) }, - CHANNELID => sub { $self->get_channel_id($info) }, - DEFINITION => sub { $self->get_definition($info) }, - DIMENSION => sub { $self->get_dimension($info) }, + ID => sub { $self->get_video_id($info) }, + AUTHOR => sub { $self->get_channel_title($info) }, + CHANNELID => sub { $self->get_channel_id($info) }, + DEFINITION => sub { $self->get_definition($info) }, + DIMENSION => sub { $self->get_dimension($info) }, + VIEWS => sub { $self->get_views($info) }, VIEWS_SHORT => sub { $self->get_views_approx($info) }, - LIKES => sub { $self->get_likes($info) }, - DISLIKES => sub { $self->get_dislikes($info) }, + + VIDEOS => sub { $self->set_thousands($self->get_channel_video_count($info)) }, + VIDEOS_SHORT => sub { $self->short_human_number($self->get_channel_video_count($info)) }, + + SUBS => sub { $self->get_channel_subscriber_count($info) }, + SUBS_SHORT => sub { $self->short_human_number($self->get_channel_subscriber_count($info)) }, + + ITEMS => sub { $self->set_thousands($self->get_playlist_item_count($info)) }, + ITEMS_SHORT => sub { $self->short_human_number($self->get_playlist_item_count($info)) }, + + LIKES => sub { $self->get_likes($info) }, + DISLIKES => sub { $self->get_dislikes($info) }, + COMMENTS => sub { $self->get_comments($info) }, DURATION => sub { $self->get_duration($info) }, TIME => sub { $self->get_time($info) }, TITLE => sub { $self->get_title($info) }, - FTITLE => sub { $self->normalize_video_title($self->get_title($info), $fat32safe) }, + FTITLE => sub { $self->normalize_filename($self->get_title($info), $fat32safe) }, CAPTION => sub { $self->get_caption($info) }, PUBLISHED => sub { $self->get_publication_date($info) }, AGE => sub { $self->get_publication_age($info) }, @@ -386,8 +436,8 @@ sub format_text { $text =~ s/$escapes_re/$special_escapes{$1}/g; $escape - ? $text =~ s/$tokens_re/\Q${\$special_tokens{$1}()}\E/gr - : $text =~ s/$tokens_re/${\$special_tokens{$1}()}/gr; + ? $text =~ s<$tokens_re><\Q${\($special_tokens{$1}() // '')}\E>gr + : $text =~ s<$tokens_re><${\($special_tokens{$1}() // '')}>gr; } =head2 set_thousands($num) @@ -487,13 +537,112 @@ sub get_description { $desc = HTML::Entities::decode_entities($desc); $desc =~ s/^\s+//; - if (not $desc =~ /\S/) { + if (not $desc =~ /\S/ or length($desc) < length($info->{description} // '')) { $desc = $info->{description} // ''; } ($desc =~ /\S/) ? $desc : 'No description available...'; } +sub read_lines_from_file { + my ($self, $file, $mode) = @_; + + $mode //= '<'; + + open(my $fh, $mode, $file) or return; + chomp(my @lines = <$fh>); + close $fh; + + my %seen; + + # Keep the most recent ones + @lines = reverse(@lines); + @lines = grep { !$seen{$_}++ } @lines; + + return @lines; +} + +sub read_channels_from_file { + my ($self, $file, $mode) = @_; + + $mode //= '<:utf8'; + + # Read channels and remove duplicates + my %channels = map { split(/ /, $_, 2) } $self->read_lines_from_file($file, $mode); + + # Filter valid channels and pair with channel ID with title + my @channels = map { [$_, $channels{$_}] } grep { defined($channels{$_}) } keys %channels; + + # Sort channels by channel name + @channels = sort { CORE::fc($a->[1]) cmp CORE::fc($b->[1]) } @channels; + + return @channels; +} + +sub get_local_playlist_filenames { + my ($self, $dir) = @_; + require Encode; + grep { -f $_ } sort { CORE::fc($a) cmp CORE::fc($b) } map { Encode::decode_utf8($_) } glob("$dir/*.dat"); +} + +sub make_local_playlist_filename { + my ($self, $title, $playlistID) = @_; + my $basename = $title . ' -- ' . $playlistID . '.txt'; + $basename = $self->normalize_filename($basename); + return $basename; +} + +sub local_playlist_snippet { + my ($self, $id) = @_; + + require File::Basename; + my $title = File::Basename::basename($id); + + $title =~ s/\.dat\z//; + $title =~ s/ -- PL[-\w]+\z//; + $title =~ s/_/ /g; + $title = ucfirst($title); + + require Storable; + my $entries = eval { Storable::retrieve($id) } // []; + + if (ref($entries) ne 'ARRAY') { + $entries = []; + } + + my $video_count = 0; + my $video_id = undef; + + if (@$entries) { + $video_id = $self->get_video_id($entries->[0]); + $video_count = scalar(@$entries); + } + + scalar { + author => "local", + authorId => "local", + description => $title, + playlistId => $id, + playlistThumbnail => (defined($video_id) ? "https://i.ytimg.com/vi/$video_id/mqdefault.jpg" : undef), + title => $title, + type => "playlist", + videoCount => $video_count, + }; +} + +sub local_channel_snippet { + my ($self, $id, $title) = @_; + + scalar { + author => $title, + authorId => $id, + type => "channel", + description => "<local channel>", + subCount => undef, + videoCount => undef, + }; +} + =head2 get_title($info) Get title. @@ -545,7 +694,7 @@ sub get_thumbnail_url { $url = eval { $wanted[0]{url} } // return ''; } else { - warn "[!] Couldn't find thumbnail of type <<$type>>..."; + ## warn "[!] Couldn't find thumbnail of type <<$type>>..."; $url = eval { $thumbs[0]{url} } // return ''; } @@ -559,7 +708,7 @@ sub get_channel_title { my ($self, $info) = @_; #$info->{snippet}{channelTitle} || $self->get_channel_id($info); - $info->{author}; + $info->{author} // $info->{title}; } sub get_author { @@ -572,6 +721,31 @@ sub get_comment_id { $info->{commentId}; } +sub get_video_count { + my ($self, $info) = @_; + $info->{videoCount} // 0; +} + +sub get_subscriber_count { + my ($self, $info) = @_; + $info->{subCount} // 0; +} + +sub get_channel_subscriber_count { + my ($self, $info) = @_; + $info->{subCount} // 0; +} + +sub get_channel_video_count { + my ($self, $info) = @_; + $info->{videoCount} // 0; +} + +sub get_playlist_item_count { + my ($self, $info) = @_; + $info->{videoCount} // 0; +} + sub get_comment_content { my ($self, $info) = @_; $info->{content}; @@ -579,24 +753,23 @@ sub get_comment_content { sub get_id { my ($self, $info) = @_; - - #$info->{id}; $info->{videoId}; } -sub get_channel_id { +sub get_rating { my ($self, $info) = @_; + my $rating = $info->{rating} // return; + sprintf('%.2f', $rating); +} - #$info->{snippet}{resourceId}{channelId} // $info->{snippet}{channelId}; +sub get_channel_id { + my ($self, $info) = @_; $info->{authorId}; } sub get_category_id { my ($self, $info) = @_; - - #$info->{snippet}{resourceId}{categoryId} // $info->{snippet}{categoryId}; - #"unknown"; - $info->{genre} // 'Unknown'; + $info->{genre} // $info->{category} // 'Unknown'; } sub get_category_name { @@ -620,9 +793,7 @@ sub get_category_name { 29 => 'Nonprofits & Activism', }; - #$categories->{$self->get_category_id($info) // ''} // 'Unknown'; - - $info->{genre} // 'Unknown'; + $info->{genre} // $info->{category} // 'Unknown'; } sub get_publication_date { @@ -635,8 +806,80 @@ sub get_publication_date { require Encode; require Time::Piece; - my $time = Time::Piece->new($info->{published}); - Encode::decode_utf8($time->strftime("%d %B %Y")); + my $time; + + if (defined($info->{published})) { + $time = eval { Time::Piece->new($info->{published}) }; + } + elsif (defined($info->{publishDate})) { + if ($info->{publishDate} =~ /^[0-9]+\z/) { # time given as "%yyyy%mm%dd" (from hypervideo) + $time = eval { Time::Piece->strptime($info->{publishDate}, '%Y%m%d') }; + } + else { + $time = eval { Time::Piece->strptime($info->{publishDate}, '%Y-%m-%d') }; + } + } + + defined($time) ? Encode::decode_utf8($time->strftime("%d %B %Y")) : undef; +} + +sub get_publication_time { + my ($self, $info) = @_; + + require Time::Piece; + require Time::Seconds; + + if ($self->get_time($info) eq 'LIVE') { + my $time = $info->{timestamp} // Time::Piece->new(); + + if (ref($time) eq 'ARRAY') { + $time = bless($time, "Time::Piece"); + } + + return $time; + } + + if (defined($info->{publishedText})) { + + my $age = $info->{publishedText}; + my $t = $info->{timestamp} // Time::Piece->new(); + + if (ref($t) eq 'ARRAY') { + $t = bless($t, "Time::Piece"); + } + + if ($age =~ /^(\d+) sec/) { + $t -= $1; + } + + if ($age =~ /^(\d+) min/) { + $t -= $1 * Time::Seconds::ONE_MINUTE(); + } + + if ($age =~ /^(\d+) hour/) { + $t -= $1 * Time::Seconds::ONE_HOUR(); + } + + if ($age =~ /^(\d+) day/) { + $t -= $1 * Time::Seconds::ONE_DAY(); + } + + if ($age =~ /^(\d+) week/) { + $t -= $1 * Time::Seconds::ONE_WEEK(); + } + + if ($age =~ /^(\d+) month/) { + $t -= $1 * Time::Seconds::ONE_MONTH(); + } + + if ($age =~ /^(\d+) year/) { + $t -= $1 * Time::Seconds::ONE_YEAR(); + } + + return $t; + } + + return $self->get_publication_date($info); # should not happen } sub get_publication_age { @@ -674,22 +917,17 @@ sub get_publication_age_approx { sub get_duration { my ($self, $info) = @_; - - #$self->format_duration($info->{contentDetails}{duration}); - #$self->format_duration($info->{lengthSeconds}); $info->{lengthSeconds}; } sub get_time { my ($self, $info) = @_; - if ($info->{liveNow}) { + if ($info->{liveNow} and ($self->get_duration($info) || 0) == 0) { return 'LIVE'; } $self->format_time($self->get_duration($info)); - - #$self->format_time($self->get_duration($info)); } sub get_definition { @@ -721,39 +959,44 @@ sub get_views { $info->{viewCount} // 0; } -sub get_views_approx { - my ($self, $info) = @_; - my $views = $self->get_views($info); +sub short_human_number { + my ($self, $int) = @_; - if ($views < 1000) { - return $views; + if ($int < 1000) { + return $int; } - if ($views >= 10 * 1e9) { # ten billions - return sprintf("%dB", int($views / 1e9)); + if ($int >= 10 * 1e9) { # ten billions + return sprintf("%dB", int($int / 1e9)); } - if ($views >= 1e9) { # billions - return sprintf("%.2gB", $views / 1e9); + if ($int >= 1e9) { # billions + return sprintf("%.2gB", $int / 1e9); } - if ($views >= 10 * 1e6) { # ten millions - return sprintf("%dM", int($views / 1e6)); + if ($int >= 10 * 1e6) { # ten millions + return sprintf("%dM", int($int / 1e6)); } - if ($views >= 1e6) { # millions - return sprintf("%.2gM", $views / 1e6); + if ($int >= 1e6) { # millions + return sprintf("%.2gM", $int / 1e6); } - if ($views >= 10 * 1e3) { # ten thousands - return sprintf("%dK", int($views / 1e3)); + if ($int >= 10 * 1e3) { # ten thousands + return sprintf("%dK", int($int / 1e3)); } - if ($views >= 1e3) { # thousands - return sprintf("%.2gK", $views / 1e3); + if ($int >= 1e3) { # thousands + return sprintf("%.2gK", $int / 1e3); } - return $views; + return $int; +} + +sub get_views_approx { + my ($self, $info) = @_; + my $views = $self->get_views($info); + $self->short_human_number($views); } sub get_likes { |