diff options
| author | Remita Amine <remitamine@gmail.com> | 2021-01-04 00:51:55 +0100 | 
|---|---|---|
| committer | Remita Amine <remitamine@gmail.com> | 2021-01-04 01:14:25 +0100 | 
| commit | e88c9ef62a4a26cc77370b741a4244d298c7d45a (patch) | |
| tree | 1a51cee08f0515853f9c5661a8098f89d5a43668 | |
| parent | 0889eb33e0d40d567be5b2f8431952a5517276fc (diff) | |
[utils] add a function to clean podcast URLs
| -rw-r--r-- | test/test_utils.py | 5 | ||||
| -rw-r--r-- | youtube_dl/utils.py | 17 | 
2 files changed, 22 insertions, 0 deletions
| diff --git a/test/test_utils.py b/test/test_utils.py index d49d3239c..259c4763e 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -21,6 +21,7 @@ from youtube_dl.utils import (      encode_base_n,      caesar,      clean_html, +    clean_podcast_url,      date_from_str,      DateRange,      detect_exe_version, @@ -1470,6 +1471,10 @@ Line 1          self.assertEqual(get_elements_by_attribute('class', 'foo', html), [])          self.assertEqual(get_elements_by_attribute('class', 'no-such-foo', html), []) +    def test_clean_podcast_url(self): +        self.assertEqual(clean_podcast_url('https://www.podtrac.com/pts/redirect.mp3/chtbl.com/track/5899E/traffic.megaphone.fm/HSW7835899191.mp3'), 'https://traffic.megaphone.fm/HSW7835899191.mp3') +        self.assertEqual(clean_podcast_url('https://play.podtrac.com/npr-344098539/edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3'), 'https://edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3') +  if __name__ == '__main__':      unittest.main() diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index d5fb6fd24..8e4d144c9 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -5706,3 +5706,20 @@ def random_birthday(year_field, month_field, day_field):          month_field: str(random_date.month),          day_field: str(random_date.day),      } + + +def clean_podcast_url(url): +    return re.sub(r'''(?x) +        (?: +            (?: +                chtbl\.com/track| +                media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/ +                play\.podtrac\.com +            )/[^/]+| +            (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure +            flex\.acast\.com| +            pd(?: +                cn\.co| # https://podcorn.com/analytics-prefix/ +                st\.fm # https://podsights.com/docs/ +            )/e +        )/''', '', url) | 
