diff options
| author | Filippo Valsorda <filippo.valsorda@gmail.com> | 2013-05-18 19:17:19 +0200 | 
|---|---|---|
| committer | Filippo Valsorda <filippo.valsorda@gmail.com> | 2013-05-18 19:17:19 +0200 | 
| commit | d4f76f1674c5fd6d5714a7500bc119b4b230f2b2 (patch) | |
| tree | 44c35d7bb8285d25a37c90f98cf7df46f8ae1500 /youtube_dl/InfoExtractors.py | |
| parent | 340fa21198b214d97e73c114fcb27a9d2b04012f (diff) | |
Add support for Howcast.com - closes #835
Diffstat (limited to 'youtube_dl/InfoExtractors.py')
| -rwxr-xr-x | youtube_dl/InfoExtractors.py | 39 | 
1 files changed, 38 insertions, 1 deletions
| diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 112d97a86..938d2d805 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -4041,7 +4041,7 @@ class RedTubeIE(InfoExtractor):  class InaIE(InfoExtractor):      """Information Extractor for Ina.fr""" -    _VALID_URL = r'(?:http://)?(?:www.)?ina\.fr/video/(?P<id>I[0-9]+)/.*' +    _VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I[0-9]+)/.*'      def _real_extract(self,url):          mobj = re.match(self._VALID_URL, url) @@ -4068,6 +4068,42 @@ class InaIE(InfoExtractor):              'title':    video_title,          }] +class HowcastIE(InfoExtractor): +    """Information Extractor for Ina.fr""" +    _VALID_URL = r'(?:https?://)?(?:www\.)?howcast\.com/videos/(?P<id>[\d]+)' + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) + +        video_id = mobj.group('id') +        webpage_url = 'http://www.howcast.com/videos/' + video_id +        webpage = self._download_webpage(webpage_url, video_id) + +        mobj = re.search(r'\'file\': "(http://mobile-media\.howcast\.com/\d+\.mp4)"', webpage) +        if mobj is None: +            raise ExtractorError(u'Unable to extract video URL') +        video_url = mobj.group(1) + +        mobj = re.search(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') property=\'og:title\'', webpage) +        if mobj is None: +            raise ExtractorError(u'Unable to extract title') +        video_title = mobj.group(1) or mobj.group(2) + +        mobj = re.search(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') name=\'description\'', webpage) +        if mobj is None: +            self._downloader.report_warning(u'unable to extract description') +            video_description = None +        else: +            video_description = mobj.group(1) or mobj.group(2) + +        return [{ +            'id':       video_id, +            'url':      video_url, +            'ext':      'mp4', +            'title':    video_title, +            'description': video_description, +        }] +  def gen_extractors():      """ Return a list of an instance of every supported extractor.      The order does matter; the first extractor matched is the one handling the URL. @@ -4125,6 +4161,7 @@ def gen_extractors():          BandcampIE(),          RedTubeIE(),          InaIE(), +        HowcastIE(),          GenericIE()      ] | 
