diff options
Diffstat (limited to 'youtube_dl/extractor/generic.py')
| -rw-r--r-- | youtube_dl/extractor/generic.py | 477 | 
1 files changed, 363 insertions, 114 deletions
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index cd7c47d6d..6d2efb22e 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -8,7 +8,8 @@ import re  from .common import InfoExtractor  from .youtube import YoutubeIE  from ..compat import ( -    compat_urllib_parse, +    compat_urllib_parse_unquote, +    compat_urllib_request,      compat_urlparse,      compat_xml_parse_error,  ) @@ -32,11 +33,21 @@ from .brightcove import BrightcoveIE  from .nbc import NBCSportsVPlayerIE  from .ooyala import OoyalaIE  from .rutv import RUTVIE +from .tvc import TVCIE +from .sportbox import SportBoxEmbedIE  from .smotri import SmotriIE +from .myvi import MyviIE  from .condenast import CondeNastIE  from .udn import UDNEmbedIE  from .senateisvp import SenateISVPIE  from .bliptv import BlipTVIE +from .svt import SVTIE +from .pornhub import PornHubIE +from .xhamster import XHamsterEmbedIE +from .vimeo import VimeoIE +from .dailymotion import DailymotionCloudIE +from .onionstudios import OnionStudiosIE +from .snagfilms import SnagFilmsEmbedIE  class GenericIE(InfoExtractor): @@ -44,6 +55,97 @@ class GenericIE(InfoExtractor):      _VALID_URL = r'.*'      IE_NAME = 'generic'      _TESTS = [ +        # Direct link to a video +        { +            'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4', +            'md5': '67d406c2bcb6af27fa886f31aa934bbe', +            'info_dict': { +                'id': 'trailer', +                'ext': 'mp4', +                'title': 'trailer', +                'upload_date': '20100513', +            } +        }, +        # Direct link to media delivered compressed (until Accept-Encoding is *) +        { +            'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac', +            'md5': '128c42e68b13950268b648275386fc74', +            'info_dict': { +                'id': 'FictionJunction-Parallel_Hearts', +                'ext': 'flac', +                'title': 'FictionJunction-Parallel_Hearts', +                'upload_date': '20140522', +            }, +            'expected_warnings': [ +                'URL could be a direct video link, returning it as such.' +            ] +        }, +        # Direct download with broken HEAD +        { +            'url': 'http://ai-radio.org:8000/radio.opus', +            'info_dict': { +                'id': 'radio', +                'ext': 'opus', +                'title': 'radio', +            }, +            'params': { +                'skip_download': True,  # infinite live stream +            }, +            'expected_warnings': [ +                r'501.*Not Implemented' +            ], +        }, +        # Direct link with incorrect MIME type +        { +            'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm', +            'md5': '4ccbebe5f36706d85221f204d7eb5913', +            'info_dict': { +                'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm', +                'id': '5_Lennart_Poettering_-_Systemd', +                'ext': 'webm', +                'title': '5_Lennart_Poettering_-_Systemd', +                'upload_date': '20141120', +            }, +            'expected_warnings': [ +                'URL could be a direct video link, returning it as such.' +            ] +        }, +        # RSS feed +        { +            'url': 'http://phihag.de/2014/youtube-dl/rss2.xml', +            'info_dict': { +                'id': 'http://phihag.de/2014/youtube-dl/rss2.xml', +                'title': 'Zero Punctuation', +                'description': 're:.*groundbreaking video review series.*' +            }, +            'playlist_mincount': 11, +        }, +        # RSS feed with enclosure +        { +            'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml', +            'info_dict': { +                'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624', +                'ext': 'm4v', +                'upload_date': '20150228', +                'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624', +            } +        }, +        # google redirect +        { +            'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE', +            'info_dict': { +                'id': 'cmQHVoWB5FY', +                'ext': 'mp4', +                'upload_date': '20130224', +                'uploader_id': 'TheVerge', +                'description': 're:^Chris Ziegler takes a look at the\.*', +                'uploader': 'The Verge', +                'title': 'First Firefox OS phones side-by-side', +            }, +            'params': { +                'skip_download': False, +            } +        },          {              'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',              'md5': '85b90ccc9d73b4acd9138d3af4c27f89', @@ -123,17 +225,6 @@ class GenericIE(InfoExtractor):                  'skip_download': True,  # m3u8 download              },          }, -        # Direct link to a video -        { -            'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4', -            'md5': '67d406c2bcb6af27fa886f31aa934bbe', -            'info_dict': { -                'id': 'trailer', -                'ext': 'mp4', -                'title': 'trailer', -                'upload_date': '20100513', -            } -        },          # ooyala video          {              'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219', @@ -158,22 +249,6 @@ class GenericIE(InfoExtractor):              },              'add_ie': ['Ooyala'],          }, -        # google redirect -        { -            'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE', -            'info_dict': { -                'id': 'cmQHVoWB5FY', -                'ext': 'mp4', -                'upload_date': '20130224', -                'uploader_id': 'TheVerge', -                'description': 're:^Chris Ziegler takes a look at the\.*', -                'uploader': 'The Verge', -                'title': 'First Firefox OS phones side-by-side', -            }, -            'params': { -                'skip_download': False, -            } -        },          # embed.ly video          {              'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/', @@ -223,6 +298,66 @@ class GenericIE(InfoExtractor):                  'skip_download': True,              },          }, +        # TVC embed +        { +            'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/', +            'info_dict': { +                'id': '55304', +                'ext': 'mp4', +                'title': 'Дошкольное воспитание', +            }, +        }, +        # SportBox embed +        { +            'url': 'http://www.vestifinance.ru/articles/25753', +            'info_dict': { +                'id': '25753', +                'title': 'Вести Экономика ― Прямые трансляции с Форума-выставки "Госзаказ-2013"', +            }, +            'playlist': [{ +                'info_dict': { +                    'id': '370908', +                    'title': 'Госзаказ. День 3', +                    'ext': 'mp4', +                } +            }, { +                'info_dict': { +                    'id': '370905', +                    'title': 'Госзаказ. День 2', +                    'ext': 'mp4', +                } +            }, { +                'info_dict': { +                    'id': '370902', +                    'title': 'Госзаказ. День 1', +                    'ext': 'mp4', +                } +            }], +            'params': { +                # m3u8 download +                'skip_download': True, +            }, +        }, +        # Myvi.ru embed +        { +            'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1', +            'info_dict': { +                'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e', +                'ext': 'mp4', +                'title': 'Ужастики, русский трейлер (2015)', +                'thumbnail': 're:^https?://.*\.jpg$', +                'duration': 153, +            } +        }, +        # XHamster embed +        { +            'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8', +            'info_dict': { +                'id': 'showthread', +                'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )', +            }, +            'playlist_mincount': 7, +        },          # Embedded TED video          {              'url': 'http://en.support.wordpress.com/videos/ted-talks/', @@ -272,6 +407,26 @@ class GenericIE(InfoExtractor):                  'skip_download': 'Requires rtmpdump'              }          }, +        # francetv embed +        { +            'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero', +            'info_dict': { +                'id': 'EV_30231', +                'ext': 'mp4', +                'title': 'Alcaline, le concert avec Calogero', +                'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff', +                'upload_date': '20150226', +                'timestamp': 1424989860, +                'duration': 5400, +            }, +            'params': { +                # m3u8 downloads +                'skip_download': True, +            }, +            'expected_warnings': [ +                'Forbidden' +            ] +        },          # Condé Nast embed          {              'url': 'http://www.wired.com/2014/04/honda-asimo/', @@ -374,16 +529,6 @@ class GenericIE(InfoExtractor):                  'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',              }          }, -        # RSS feed -        { -            'url': 'http://phihag.de/2014/youtube-dl/rss2.xml', -            'info_dict': { -                'id': 'http://phihag.de/2014/youtube-dl/rss2.xml', -                'title': 'Zero Punctuation', -                'description': 're:.*groundbreaking video review series.*' -            }, -            'playlist_mincount': 11, -        },          # Multiple brightcove videos          # https://github.com/rg3/youtube-dl/issues/2283          { @@ -413,19 +558,6 @@ class GenericIE(InfoExtractor):                  'thumbnail': 're:^https?://.*\.jpg$',              },          }, -        # MLB articles -        { -            'url': 'http://m.mlb.com/news/article/118550098/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer', -            'md5': 'b190e70141fb9a1552a85426b4da1b5d', -            'info_dict': { -                'id': '75609783', -                'ext': 'mp4', -                'title': 'Must C: Pillar climbs for catch', -                'description': '4/15/15: Blue Jays outfielder Kevin Pillar continues his defensive dominance by climbing the wall in left to rob Tim Beckham of a home run', -                'timestamp': 1429124820, -                'upload_date': '20150415', -            } -        },          # Wistia embed          {              'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson', @@ -450,21 +582,6 @@ class GenericIE(InfoExtractor):                  'uploader': 'thoughtworks.wistia.com',              },          }, -        # Direct download with broken HEAD -        { -            'url': 'http://ai-radio.org:8000/radio.opus', -            'info_dict': { -                'id': 'radio', -                'ext': 'opus', -                'title': 'radio', -            }, -            'params': { -                'skip_download': True,  # infinite live stream -            }, -            'expected_warnings': [ -                r'501.*Not Implemented' -            ], -        },          # Soundcloud embed          {              'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/', @@ -496,21 +613,6 @@ class GenericIE(InfoExtractor):              },              'playlist_mincount': 2,          }, -        # Direct link with incorrect MIME type -        { -            'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm', -            'md5': '4ccbebe5f36706d85221f204d7eb5913', -            'info_dict': { -                'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm', -                'id': '5_Lennart_Poettering_-_Systemd', -                'ext': 'webm', -                'title': '5_Lennart_Poettering_-_Systemd', -                'upload_date': '20141120', -            }, -            'expected_warnings': [ -                'URL could be a direct video link, returning it as such.' -            ] -        },          # Cinchcast embed          {              'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/', @@ -598,6 +700,18 @@ class GenericIE(InfoExtractor):                  'title': 'John Carlson Postgame 2/25/15',              },          }, +        # Kaltura embed (different embed code) +        { +            'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014', +            'info_dict': { +                'id': '1_a52wc67y', +                'ext': 'flv', +                'upload_date': '20150127', +                'uploader_id': 'PremierMedia', +                'timestamp': int, +                'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014', +            }, +        },          # Eagle.Platform embed (generic URL)          {              'url': 'http://lenta.ru/news/2015/03/06/navalny/', @@ -658,15 +772,16 @@ class GenericIE(InfoExtractor):                  'title': 'Facebook Creates "On This Day" | Crunch Report',              },          }, -        # RSS feed with enclosure +        # SVT embed          { -            'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml', +            'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',              'info_dict': { -                'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624', -                'ext': 'm4v', -                'upload_date': '20150228', -                'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624', -            } +                'id': '2900353', +                'ext': 'flv', +                'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)', +                'duration': 27, +                'age_limit': 0, +            },          },          # Crooks and Liars embed          { @@ -742,6 +857,62 @@ class GenericIE(InfoExtractor):                  # rtmpe downloads                  'skip_download': True,              } +        }, +        # Brightcove URL in single quotes +        { +            'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/', +            'md5': '4ae374f1f8b91c889c4b9203c8c752af', +            'info_dict': { +                'id': '4255764656001', +                'ext': 'mp4', +                'title': 'SN Presents: Russell Martin, World Citizen', +                'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.', +                'uploader': 'Rogers Sportsnet', +            }, +        }, +        # Dailymotion Cloud video +        { +            'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910', +            'md5': '49444254273501a64675a7e68c502681', +            'info_dict': { +                'id': '5585de919473990de4bee11b', +                'ext': 'mp4', +                'title': 'Le débat', +                'thumbnail': 're:^https?://.*\.jpe?g$', +            } +        }, +        # OnionStudios embed +        { +            'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537', +            'info_dict': { +                'id': '2855', +                'ext': 'mp4', +                'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You', +                'thumbnail': 're:^https?://.*\.jpe?g$', +                'uploader': 'ClickHole', +                'uploader_id': 'clickhole', +            } +        }, +        # SnagFilms embed +        { +            'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html', +            'info_dict': { +                'id': '74849a00-85a9-11e1-9660-123139220831', +                'ext': 'mp4', +                'title': '#whilewewatch', +            } +        }, +        # AdobeTVVideo embed +        { +            'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners', +            'md5': '43662b577c018ad707a63766462b1e87', +            'info_dict': { +                'id': '2456', +                'ext': 'mp4', +                'title': 'New experience with Acrobat DC', +                'description': 'New experience with Acrobat DC', +                'duration': 248.667, +            },          }      ] @@ -863,7 +1034,7 @@ class GenericIE(InfoExtractor):              force_videoid = smuggled_data['force_videoid']              video_id = force_videoid          else: -            video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0] +            video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])          self.to_screen('%s: Requesting header' % video_id) @@ -885,7 +1056,9 @@ class GenericIE(InfoExtractor):          full_response = None          if head_response is False: -            full_response = self._request_webpage(url, video_id) +            request = compat_urllib_request.Request(url) +            request.add_header('Accept-Encoding', '*') +            full_response = self._request_webpage(request, video_id)              head_response = full_response          # Check for direct link to a video @@ -896,7 +1069,7 @@ class GenericIE(InfoExtractor):                  head_response.headers.get('Last-Modified'))              return {                  'id': video_id, -                'title': os.path.splitext(url_basename(url))[0], +                'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),                  'direct': True,                  'formats': [{                      'format_id': m.group('format_id'), @@ -907,10 +1080,22 @@ class GenericIE(InfoExtractor):              }          if not self._downloader.params.get('test', False) and not is_intentional: -            self._downloader.report_warning('Falling back on generic information extractor.') +            force = self._downloader.params.get('force_generic_extractor', False) +            self._downloader.report_warning( +                '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))          if not full_response: -            full_response = self._request_webpage(url, video_id) +            request = compat_urllib_request.Request(url) +            # Some webservers may serve compressed content of rather big size (e.g. gzipped flac) +            # making it impossible to download only chunk of the file (yet we need only 512kB to +            # test whether it's HTML or not). According to youtube-dl default Accept-Encoding +            # that will always result in downloading the whole file that is not desirable. +            # Therefore for extraction pass we have to override Accept-Encoding to any in order +            # to accept raw bytes and being able to download only a chunk. +            # It may probably better to solve this by checking Content-Type for application/octet-stream +            # after HEAD request finishes, but not sure if we can rely on this. +            request.add_header('Accept-Encoding', '*') +            full_response = self._request_webpage(request, video_id)          # Maybe it's a direct link to a video?          # Be careful not to download the whole thing! @@ -922,7 +1107,7 @@ class GenericIE(InfoExtractor):                  head_response.headers.get('Last-Modified'))              return {                  'id': video_id, -                'title': os.path.splitext(url_basename(url))[0], +                'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),                  'direct': True,                  'url': url,                  'upload_date': upload_date, @@ -949,7 +1134,7 @@ class GenericIE(InfoExtractor):          # Sometimes embedded video player is hidden behind percent encoding          # (e.g. https://github.com/rg3/youtube-dl/issues/2448)          # Unescaping the whole page allows to handle those cases in a generic way -        webpage = compat_urllib_parse.unquote(webpage) +        webpage = compat_urllib_parse_unquote(webpage)          # it's tempting to parse this further, but you would          # have to take into account all the variations like @@ -1002,23 +1187,20 @@ class GenericIE(InfoExtractor):          # Look for embedded rtl.nl player          matches = re.findall( -            r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+video_embed[^"]+)"', +            r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',              webpage)          if matches:              return _playlist_from_matches(matches, ie='RtlNl') -        # Look for embedded (iframe) Vimeo player -        mobj = re.search( -            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage) -        if mobj: -            player_url = unescapeHTML(mobj.group('url')) -            surl = smuggle_url(player_url, {'Referer': url}) -            return self.url_result(surl) -        # Look for embedded (swf embed) Vimeo player -        mobj = re.search( -            r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage) -        if mobj: -            return self.url_result(mobj.group(1)) +        vimeo_url = VimeoIE._extract_vimeo_url(url, webpage) +        if vimeo_url is not None: +            return self.url_result(vimeo_url) + +        vid_me_embed_url = self._search_regex( +            r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]', +            webpage, 'vid.me embed', default=None) +        if vid_me_embed_url is not None: +            return self.url_result(vid_me_embed_url, 'Vidme')          # Look for embedded YouTube player          matches = re.findall(r'''(?x) @@ -1091,6 +1273,11 @@ class GenericIE(InfoExtractor):          if bliptv_url:              return self.url_result(bliptv_url, 'BlipTV') +        # Look for SVT player +        svt_url = SVTIE._extract_url(webpage) +        if svt_url: +            return self.url_result(svt_url, 'SVT') +          # Look for embedded condenast player          matches = re.findall(              r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")', @@ -1207,7 +1394,7 @@ class GenericIE(InfoExtractor):              return self.url_result(mobj.group('url'))          mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)          if mobj is not None: -            return self.url_result(compat_urllib_parse.unquote(mobj.group('url'))) +            return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))          # Look for funnyordie embed          matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage) @@ -1225,6 +1412,32 @@ class GenericIE(InfoExtractor):          if rutv_url:              return self.url_result(rutv_url, 'RUTV') +        # Look for embedded TVC player +        tvc_url = TVCIE._extract_url(webpage) +        if tvc_url: +            return self.url_result(tvc_url, 'TVC') + +        # Look for embedded SportBox player +        sportbox_urls = SportBoxEmbedIE._extract_urls(webpage) +        if sportbox_urls: +            return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed') + +        # Look for embedded PornHub player +        pornhub_url = PornHubIE._extract_url(webpage) +        if pornhub_url: +            return self.url_result(pornhub_url, 'PornHub') + +        # Look for embedded XHamster player +        xhamster_urls = XHamsterEmbedIE._extract_urls(webpage) +        if xhamster_urls: +            return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed') + +        # Look for embedded Tvigle player +        mobj = re.search( +            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage) +        if mobj is not None: +            return self.url_result(mobj.group('url'), 'Tvigle') +          # Look for embedded TED player          mobj = re.search(              r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage) @@ -1244,11 +1457,23 @@ class GenericIE(InfoExtractor):          if mobj is not None:              return self.url_result(mobj.group('url'), 'ArteTVEmbed') +        # Look for embedded francetv player +        mobj = re.search( +            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1', +            webpage) +        if mobj is not None: +            return self.url_result(mobj.group('url')) +          # Look for embedded smotri.com player          smotri_url = SmotriIE._extract_url(webpage)          if smotri_url:              return self.url_result(smotri_url, 'Smotri') +        # Look for embedded Myvi.ru player +        myvi_url = MyviIE._extract_url(webpage) +        if myvi_url: +            return self.url_result(myvi_url) +          # Look for embeded soundcloud player          mobj = re.search(              r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"', @@ -1328,8 +1553,8 @@ class GenericIE(InfoExtractor):              return self.url_result(mobj.group('url'), 'Zapiks')          # Look for Kaltura embeds -        mobj = re.search( -            r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage) +        mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage) or +                re.search(r'(?s)(["\'])(?:https?:)?//cdnapisec\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?\1.*?entry_id\s*:\s*(["\'])(?P<id>[^\2]+?)\2', webpage))          if mobj is not None:              return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura') @@ -1384,7 +1609,31 @@ class GenericIE(InfoExtractor):          # Look for Senate ISVP iframe          senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)          if senate_isvp_url: -            return self.url_result(surl, 'SenateISVP') +            return self.url_result(senate_isvp_url, 'SenateISVP') + +        # Look for Dailymotion Cloud videos +        dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage) +        if dmcloud_url: +            return self.url_result(dmcloud_url, 'DailymotionCloud') + +        # Look for OnionStudios embeds +        onionstudios_url = OnionStudiosIE._extract_url(webpage) +        if onionstudios_url: +            return self.url_result(onionstudios_url) + +        # Look for SnagFilms embeds +        snagfilms_url = SnagFilmsEmbedIE._extract_url(webpage) +        if snagfilms_url: +            return self.url_result(snagfilms_url) + +        # Look for AdobeTVVideo embeds +        mobj = re.search( +            r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]', +            webpage) +        if mobj is not None: +            return self.url_result( +                self._proto_relative_url(unescapeHTML(mobj.group(1))), +                'AdobeTVVideo')          def check_video(vurl):              if YoutubeIE.suitable(vurl): @@ -1453,7 +1702,7 @@ class GenericIE(InfoExtractor):                  if refresh_header:                      found = re.search(REDIRECT_REGEX, refresh_header)              if found: -                new_url = compat_urlparse.urljoin(url, found.group(1)) +                new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))                  self.report_following_redirect(new_url)                  return {                      '_type': 'url', @@ -1465,7 +1714,7 @@ class GenericIE(InfoExtractor):          entries = []          for video_url in found:              video_url = compat_urlparse.urljoin(url, video_url) -            video_id = compat_urllib_parse.unquote(os.path.basename(video_url)) +            video_id = compat_urllib_parse_unquote(os.path.basename(video_url))              # Sometimes, jwplayer extraction will result in a YouTube URL              if YoutubeIE.suitable(video_url):  | 
