diff options
| -rwxr-xr-x | youtube_dl/InfoExtractors.py | 34 | ||||
| -rw-r--r-- | youtube_dl/extractor/howcast.py | 37 | 
2 files changed, 38 insertions, 33 deletions
| diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 4bb4aa84a..cc685e274 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -34,6 +34,7 @@ from .extractor.gametrailers import GametrailersIE  from .extractor.generic import GenericIE  from .extractor.googleplus import GooglePlusIE  from .extractor.googlesearch import GoogleSearchIE +from .extractor.howcast import HowcastIE  from .extractor.hypem import HypemIE  from .extractor.ina import InaIE  from .extractor.infoq import InfoQIE @@ -106,39 +107,6 @@ from .extractor.zdf import ZDFIE -class HowcastIE(InfoExtractor): -    """Information Extractor for Howcast.com""" -    _VALID_URL = r'(?:https?://)?(?:www\.)?howcast\.com/videos/(?P<id>\d+)' - -    def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) - -        video_id = mobj.group('id') -        webpage_url = 'http://www.howcast.com/videos/' + video_id -        webpage = self._download_webpage(webpage_url, video_id) - -        self.report_extraction(video_id) - -        video_url = self._search_regex(r'\'?file\'?: "(http://mobile-media\.howcast\.com/[0-9]+\.mp4)', -            webpage, u'video URL') - -        video_title = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') property=\'og:title\'', -            webpage, u'title') - -        video_description = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') name=\'description\'', -            webpage, u'description', fatal=False) - -        thumbnail = self._html_search_regex(r'<meta content=\'(.+?)\' property=\'og:image\'', -            webpage, u'thumbnail', fatal=False) - -        return [{ -            'id':       video_id, -            'url':      video_url, -            'ext':      'mp4', -            'title':    video_title, -            'description': video_description, -            'thumbnail': thumbnail, -        }]  class FlickrIE(InfoExtractor): diff --git a/youtube_dl/extractor/howcast.py b/youtube_dl/extractor/howcast.py new file mode 100644 index 000000000..7b94f85ad --- /dev/null +++ b/youtube_dl/extractor/howcast.py @@ -0,0 +1,37 @@ +import re + +from .common import InfoExtractor + + +class HowcastIE(InfoExtractor): +    _VALID_URL = r'(?:https?://)?(?:www\.)?howcast\.com/videos/(?P<id>\d+)' + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) + +        video_id = mobj.group('id') +        webpage_url = 'http://www.howcast.com/videos/' + video_id +        webpage = self._download_webpage(webpage_url, video_id) + +        self.report_extraction(video_id) + +        video_url = self._search_regex(r'\'?file\'?: "(http://mobile-media\.howcast\.com/[0-9]+\.mp4)', +            webpage, u'video URL') + +        video_title = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') property=\'og:title\'', +            webpage, u'title') + +        video_description = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') name=\'description\'', +            webpage, u'description', fatal=False) + +        thumbnail = self._html_search_regex(r'<meta content=\'(.+?)\' property=\'og:image\'', +            webpage, u'thumbnail', fatal=False) + +        return [{ +            'id':       video_id, +            'url':      video_url, +            'ext':      'mp4', +            'title':    video_title, +            'description': video_description, +            'thumbnail': thumbnail, +        }] | 
