diff options
author | Mahmoud Abdel-Fattah <accounts@abdel-fattah.net> | 2023-07-11 05:00:38 +0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-07-11 06:30:38 +0530 |
commit | 2af4eeb77246b8183aae75a0a8d19f18c08115b2 (patch) | |
tree | 1a0fadbd677635c9d6f3b80cf3d43f6fe214fa90 | |
parent | 325191d0c9bf3fe257b8a7c2eb95080f44f6ddfc (diff) | |
download | hypervideo-pre-2af4eeb77246b8183aae75a0a8d19f18c08115b2.tar.lz hypervideo-pre-2af4eeb77246b8183aae75a0a8d19f18c08115b2.tar.xz hypervideo-pre-2af4eeb77246b8183aae75a0a8d19f18c08115b2.zip |
[utils] `clean_podcast_url`: Handle more trackers (#7556)
Authored by: mabdelfattah, bashonly
Closes #7544
-rw-r--r-- | test/test_utils.py | 2 | ||||
-rw-r--r-- | yt_dlp/utils/_utils.py | 10 |
2 files changed, 9 insertions, 3 deletions
diff --git a/test/test_utils.py b/test/test_utils.py index a22f25d73..bdbd2d879 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1835,6 +1835,8 @@ Line 1 def test_clean_podcast_url(self): self.assertEqual(clean_podcast_url('https://www.podtrac.com/pts/redirect.mp3/chtbl.com/track/5899E/traffic.megaphone.fm/HSW7835899191.mp3'), 'https://traffic.megaphone.fm/HSW7835899191.mp3') self.assertEqual(clean_podcast_url('https://play.podtrac.com/npr-344098539/edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3'), 'https://edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3') + self.assertEqual(clean_podcast_url('https://pdst.fm/e/2.gum.fm/chtbl.com/track/chrt.fm/track/34D33/pscrb.fm/rss/p/traffic.megaphone.fm/ITLLC7765286967.mp3?updated=1687282661'), 'https://traffic.megaphone.fm/ITLLC7765286967.mp3?updated=1687282661') + self.assertEqual(clean_podcast_url('https://pdst.fm/e/https://mgln.ai/e/441/www.buzzsprout.com/1121972/13019085-ep-252-the-deep-life-stack.mp3'), 'https://www.buzzsprout.com/1121972/13019085-ep-252-the-deep-life-stack.mp3') def test_LazyList(self): it = list(range(10)) diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index 82d9ba4d5..3023c33b2 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -5123,14 +5123,18 @@ def clean_podcast_url(url): (?: chtbl\.com/track| media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/ - play\.podtrac\.com - )/[^/]+| + play\.podtrac\.com| + chrt\.fm/track| + mgln\.ai/e + )(?:/[^/.]+)?| (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure flex\.acast\.com| pd(?: cn\.co| # https://podcorn.com/analytics-prefix/ st\.fm # https://podsights.com/docs/ - )/e + )/e| + [0-9]\.gum\.fm| + pscrb\.fm/rss/p )/''', '', url) return re.sub(r'^\w+://(\w+://)', r'\1', url) |