diff options
Diffstat (limited to 'youtube_dl/extractor/generic.py')
-rw-r--r-- | youtube_dl/extractor/generic.py | 45 |
1 files changed, 35 insertions, 10 deletions
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index a62287e50..6df89f814 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -276,14 +276,6 @@ class GenericIE(InfoExtractor): 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.', }, }, - # BBC iPlayer embeds - { - 'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER', - 'info_dict': { - 'title': 'BBC - Blogs - Adam Curtis - BUGGER', - }, - 'playlist_mincount': 18, - }, # RUTV embed { 'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html', @@ -407,6 +399,26 @@ class GenericIE(InfoExtractor): 'skip_download': 'Requires rtmpdump' } }, + # francetv embed + { + 'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero', + 'info_dict': { + 'id': 'EV_30231', + 'ext': 'mp4', + 'title': 'Alcaline, le concert avec Calogero', + 'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff', + 'upload_date': '20150226', + 'timestamp': 1424989860, + 'duration': 5400, + }, + 'params': { + # m3u8 downloads + 'skip_download': True, + }, + 'expected_warnings': [ + 'Forbidden' + ] + }, # Condé Nast embed { 'url': 'http://www.wired.com/2014/04/honda-asimo/', @@ -1176,6 +1188,12 @@ class GenericIE(InfoExtractor): if vimeo_url is not None: return self.url_result(vimeo_url) + vid_me_embed_url = self._search_regex( + r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]', + webpage, 'vid.me embed', default=None) + if vid_me_embed_url is not None: + return self.url_result(vid_me_embed_url, 'Vidme') + # Look for embedded YouTube player matches = re.findall(r'''(?x) (?: @@ -1431,6 +1449,13 @@ class GenericIE(InfoExtractor): if mobj is not None: return self.url_result(mobj.group('url'), 'ArteTVEmbed') + # Look for embedded francetv player + mobj = re.search( + r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1', + webpage) + if mobj is not None: + return self.url_result(mobj.group('url')) + # Look for embedded smotri.com player smotri_url = SmotriIE._extract_url(webpage) if smotri_url: @@ -1630,7 +1655,7 @@ class GenericIE(InfoExtractor): if not found: # Broaden the findall a little bit: JWPlayer JS loader found = filter_video(re.findall( - r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage)) + r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage)) if not found: # Flow player found = filter_video(re.findall(r'''(?xs) @@ -1669,7 +1694,7 @@ class GenericIE(InfoExtractor): if refresh_header: found = re.search(REDIRECT_REGEX, refresh_header) if found: - new_url = compat_urlparse.urljoin(url, found.group(1)) + new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1))) self.report_following_redirect(new_url) return { '_type': 'url', |