diff options
Diffstat (limited to 'youtube_dl/extractor/generic.py')
| -rw-r--r-- | youtube_dl/extractor/generic.py | 89 | 
1 files changed, 64 insertions, 25 deletions
| diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index e7ee31877..cf245d9cd 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -25,6 +25,7 @@ from ..utils import (  from .brightcove import BrightcoveIE  from .ooyala import OoyalaIE  from .rutv import RUTVIE +from .smotri import SmotriIE  class GenericIE(InfoExtractor): @@ -81,6 +82,17 @@ class GenericIE(InfoExtractor):              },              'add_ie': ['Brightcove'],          }, +        { +            'url': 'http://www.championat.com/video/football/v/87/87499.html', +            'md5': 'fb973ecf6e4a78a67453647444222983', +            'info_dict': { +                'id': '3414141473001', +                'ext': 'mp4', +                'title': 'Видео. Удаление Дзагоева (ЦСКА)', +                'description': 'Онлайн-трансляция матча ЦСКА - "Волга"', +                'uploader': 'Championat', +            }, +        },          # Direct link to a video          {              'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4', @@ -102,20 +114,6 @@ class GenericIE(InfoExtractor):                  'title': '2cc213299525360.mov',  # that's what we get              },          }, -        # second style of embedded ooyala videos -        { -            'url': 'http://www.smh.com.au/tv/business/show/financial-review-sunday/behind-the-scenes-financial-review-sunday--4350201.html', -            'info_dict': { -                'id': '13djJjYjptA1XpPx8r9kuzPyj3UZH0Uk', -                'ext': 'mp4', -                'title': 'Behind-the-scenes: Financial Review Sunday ', -                'description': 'Step inside Channel Nine studios for an exclusive tour of its upcoming financial business show.', -            }, -            'params': { -                # m3u8 download -                'skip_download': True, -            }, -        },          # google redirect          {              'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE', @@ -186,6 +184,17 @@ class GenericIE(InfoExtractor):                  'description': 'md5:ddb2a40ecd6b6a147e400e535874947b',              }          }, +        # Embeded Ustream video +        { +            'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm', +            'md5': '27b99cdb639c9b12a79bca876a073417', +            'info_dict': { +                'id': '45734260', +                'ext': 'flv', +                'uploader': 'AU SPA:  The NSA and Privacy', +                'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman' +            } +        },          # nowvideo embed hidden behind percent encoding          {              'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/', @@ -212,6 +221,21 @@ class GenericIE(InfoExtractor):                  'skip_download': 'Requires rtmpdump'              }          }, +        # smotri embed +        { +            'url': 'http://rbctv.rbc.ru/archive/news/562949990879132.shtml', +            'md5': 'ec40048448e9284c9a1de77bb188108b', +            'info_dict': { +                'id': 'v27008541fad', +                'ext': 'mp4', +                'title': 'Крым и Севастополь вошли в состав России', +                'description': 'md5:fae01b61f68984c7bd2fa741e11c3175', +                'duration': 900, +                'upload_date': '20140318', +                'uploader': 'rbctv_2012_4', +                'uploader_id': 'rbctv_2012_4', +            }, +        },      ]      def report_download_webpage(self, video_id): @@ -300,13 +324,16 @@ class GenericIE(InfoExtractor):          if not parsed_url.scheme:              default_search = self._downloader.params.get('default_search')              if default_search is None: -                default_search = 'auto' +                default_search = 'auto_warning' -            if default_search == 'auto': +            if default_search in ('auto', 'auto_warning'):                  if '/' in url:                      self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')                      return self.url_result('http://' + url)                  else: +                    if default_search == 'auto_warning': +                        self._downloader.report_warning( +                            'Falling back to youtube search for  %s . Set --default-search to "auto" to suppress this warning.' % url)                      return self.url_result('ytsearch:' + url)              else:                  assert ':' in default_search @@ -484,17 +511,18 @@ class GenericIE(InfoExtractor):          if mobj is not None:              return self.url_result(mobj.group(1), 'Mpora') -        # Look for embedded NovaMov player +        # Look for embedded NovaMov-based player          mobj = re.search( -            r'<iframe[^>]+?src=(["\'])(?P<url>http://(?:(?:embed|www)\.)?novamov\.com/embed\.php.+?)\1', webpage) +            r'''(?x)<iframe[^>]+?src=(["\']) +                    (?P<url>http://(?:(?:embed|www)\.)? +                        (?:novamov\.com| +                           nowvideo\.(?:ch|sx|eu|at|ag|co)| +                           videoweed\.(?:es|com)| +                           movshare\.(?:net|sx|ag)| +                           divxstage\.(?:eu|net|ch|co|at|ag)) +                        /embed\.php.+?)\1''', webpage)          if mobj is not None: -            return self.url_result(mobj.group('url'), 'NovaMov') - -        # Look for embedded NowVideo player -        mobj = re.search( -            r'<iframe[^>]+?src=(["\'])(?P<url>http://(?:(?:embed|www)\.)?nowvideo\.(?:ch|sx|eu)/embed\.php.+?)\1', webpage) -        if mobj is not None: -            return self.url_result(mobj.group('url'), 'NowVideo') +            return self.url_result(mobj.group('url'))          # Look for embedded Facebook player          mobj = re.search( @@ -540,6 +568,12 @@ class GenericIE(InfoExtractor):          if mobj is not None:              return self.url_result(mobj.group('url'), 'TED') +        # Look for embedded Ustream videos +        mobj = re.search( +            r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage) +        if mobj is not None: +            return self.url_result(mobj.group('url'), 'Ustream') +          # Look for embedded arte.tv player          mobj = re.search(              r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"', @@ -547,6 +581,11 @@ class GenericIE(InfoExtractor):          if mobj is not None:              return self.url_result(mobj.group('url'), 'ArteTVEmbed') +        # Look for embedded smotri.com player +        smotri_url = SmotriIE._extract_url(webpage) +        if smotri_url: +            return self.url_result(smotri_url, 'Smotri') +          # Start with something easy: JW Player in SWFObject          mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)          if mobj is None: | 
