aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFilippo Valsorda <filippo.valsorda@gmail.com>2013-05-18 19:17:19 +0200
committerFilippo Valsorda <filippo.valsorda@gmail.com>2013-05-18 19:17:19 +0200
commitd4f76f1674c5fd6d5714a7500bc119b4b230f2b2 (patch)
tree44c35d7bb8285d25a37c90f98cf7df46f8ae1500
parent340fa21198b214d97e73c114fcb27a9d2b04012f (diff)
Add support for Howcast.com - closes #835
-rw-r--r--test/tests.json10
-rwxr-xr-xyoutube_dl/InfoExtractors.py39
2 files changed, 48 insertions, 1 deletions
diff --git a/test/tests.json b/test/tests.json
index 0a9052f9d..740f7756c 100644
--- a/test/tests.json
+++ b/test/tests.json
@@ -441,5 +441,15 @@
"title": "Obama Celebrates Iraq Victory"
},
"skip": "Requires rtmpdump"
+ },
+ {
+ "name": "Howcast",
+ "url": "http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly",
+ "file": "390161.mp4",
+ "md5": "1d7ba54e2c9d7dc6935ef39e00529138",
+ "info_dict":{
+ "title":"How to Tie a Square Knot Properly",
+ "description":"The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here's the proper way to tie a square knot."
+ }
}
]
diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py
index 112d97a86..938d2d805 100755
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@@ -4041,7 +4041,7 @@ class RedTubeIE(InfoExtractor):
class InaIE(InfoExtractor):
"""Information Extractor for Ina.fr"""
- _VALID_URL = r'(?:http://)?(?:www.)?ina\.fr/video/(?P<id>I[0-9]+)/.*'
+ _VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I[0-9]+)/.*'
def _real_extract(self,url):
mobj = re.match(self._VALID_URL, url)
@@ -4068,6 +4068,42 @@ class InaIE(InfoExtractor):
'title': video_title,
}]
+class HowcastIE(InfoExtractor):
+ """Information Extractor for Ina.fr"""
+ _VALID_URL = r'(?:https?://)?(?:www\.)?howcast\.com/videos/(?P<id>[\d]+)'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+
+ video_id = mobj.group('id')
+ webpage_url = 'http://www.howcast.com/videos/' + video_id
+ webpage = self._download_webpage(webpage_url, video_id)
+
+ mobj = re.search(r'\'file\': "(http://mobile-media\.howcast\.com/\d+\.mp4)"', webpage)
+ if mobj is None:
+ raise ExtractorError(u'Unable to extract video URL')
+ video_url = mobj.group(1)
+
+ mobj = re.search(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') property=\'og:title\'', webpage)
+ if mobj is None:
+ raise ExtractorError(u'Unable to extract title')
+ video_title = mobj.group(1) or mobj.group(2)
+
+ mobj = re.search(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') name=\'description\'', webpage)
+ if mobj is None:
+ self._downloader.report_warning(u'unable to extract description')
+ video_description = None
+ else:
+ video_description = mobj.group(1) or mobj.group(2)
+
+ return [{
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': 'mp4',
+ 'title': video_title,
+ 'description': video_description,
+ }]
+
def gen_extractors():
""" Return a list of an instance of every supported extractor.
The order does matter; the first extractor matched is the one handling the URL.
@@ -4125,6 +4161,7 @@ def gen_extractors():
BandcampIE(),
RedTubeIE(),
InaIE(),
+ HowcastIE(),
GenericIE()
]