From d2c82222588f72f2313c2d1658b30ac611d69d06 Mon Sep 17 00:00:00 2001 From: trizen Date: Fri, 13 Mar 2020 16:22:25 +0200 Subject: - Extract URLs in description from `descriptionHtml` field. (https://github.com/trizen/fair-viewer/issues/6) - Added the `--api-host=<...>` command-line option for changing the API host. - Fixed the `--info=` option. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jesús --- lib/WWW/FairViewer/Utils.pm | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) (limited to 'lib/WWW/FairViewer/Utils.pm') diff --git a/lib/WWW/FairViewer/Utils.pm b/lib/WWW/FairViewer/Utils.pm index 5ca1106..09c0174 100644 --- a/lib/WWW/FairViewer/Utils.pm +++ b/lib/WWW/FairViewer/Utils.pm @@ -452,8 +452,29 @@ Get description. sub get_description { my ($self, $info) = @_; - my $desc = $info->{description}; - (defined($desc) and $desc =~ /\S/) ? $desc : 'No description available...'; + + my $desc = $info->{descriptionHtml} // ''; + + require URI::Escape; + require HTML::Entities; + + $desc =~ s{.*?}{ + my $url = $1; + if ($url =~ /(?:^|;)q=([^&]+)/) { + URI::Escape::uri_unescape($1); + } + else { + $url; + } + }segi; + + $desc =~ s/
/\n/gi; + $desc =~ s{(.*?)}{$1}sgi; + $desc =~ s/<.*?>//gs; + + $desc = HTML::Entities::decode_entities($desc); + + ($desc =~ /\S/) ? $desc : 'No description available...'; } =head2 get_title($info) -- cgit v1.2.3