aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--test/tests.json23
-rwxr-xr-xyoutube_dl/InfoExtractors.py111
2 files changed, 126 insertions, 8 deletions
diff --git a/test/tests.json b/test/tests.json
index f0e5cd8bf..1ff9ad7ee 100644
--- a/test/tests.json
+++ b/test/tests.json
@@ -152,7 +152,8 @@
"file": "20274954.flv",
"md5": "088f151799e8f572f84eb62f17d73e5c",
"info_dict": {
- "title": "Young Americans for Liberty February 7, 2012 2:28 AM"
+ "title": "Young Americans for Liberty February 7, 2012 2:28 AM",
+ "uploader": "Young Americans for Liberty"
}
},
{
@@ -440,5 +441,25 @@
"title": "Obama Celebrates Iraq Victory"
},
"skip": "Requires rtmpdump"
+ },
+ {
+ "name": "Howcast",
+ "url": "http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly",
+ "file": "390161.mp4",
+ "md5": "1d7ba54e2c9d7dc6935ef39e00529138",
+ "info_dict":{
+ "title":"How to Tie a Square Knot Properly",
+ "description":"The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here's the proper way to tie a square knot."
+ }
+ },
+ {
+ "name": "Vine",
+ "url": "https://vine.co/v/b9KOOWX7HUx",
+ "file": "b9KOOWX7HUx.mp4",
+ "md5": "2f36fed6235b16da96ce9b4dc890940d",
+ "info_dict":{
+ "title": "Chicken.",
+ "uploader": "Jack Dorsey"
+ }
}
]
diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py
index cd7923c52..2dd1c49f6 100755
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@@ -3427,18 +3427,26 @@ class UstreamIE(InfoExtractor):
video_id = m.group('videoID')
video_url = u'http://tcdn.ustream.tv/video/%s' % video_id
webpage = self._download_webpage(url, video_id)
- m = re.search(r'data-title="(?P<title>.+)"',webpage)
- title = m.group('title')
- m = re.search(r'<a class="state" data-content-type="channel" data-content-id="(?P<uploader>\d+)"',webpage)
- uploader = m.group('uploader')
+ self.report_extraction(video_id)
+ try:
+ m = re.search(r'data-title="(?P<title>.+)"',webpage)
+ title = m.group('title')
+ m = re.search(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>',
+ webpage, re.DOTALL)
+ uploader = unescapeHTML(m.group('uploader').strip())
+ m = re.search(r'<link rel="image_src" href="(?P<thumb>.*?)"', webpage)
+ thumb = m.group('thumb')
+ except AttributeError:
+ raise ExtractorError(u'Unable to extract info')
info = {
'id':video_id,
'url':video_url,
'ext': 'flv',
'title': title,
- 'uploader': uploader
+ 'uploader': uploader,
+ 'thumbnail': thumb,
}
- return [info]
+ return info
class WorldStarHipHopIE(InfoExtractor):
_VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/videos/video\.php\?v=(?P<id>.*)'
@@ -4159,7 +4167,7 @@ class RedTubeIE(InfoExtractor):
class InaIE(InfoExtractor):
"""Information Extractor for Ina.fr"""
- _VALID_URL = r'(?:http://)?(?:www.)?ina\.fr/video/(?P<id>I[0-9]+)/.*'
+ _VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I[0-9]+)/.*'
def _real_extract(self,url):
mobj = re.match(self._VALID_URL, url)
@@ -4186,6 +4194,93 @@ class InaIE(InfoExtractor):
'title': video_title,
}]
+class HowcastIE(InfoExtractor):
+ """Information Extractor for Howcast.com"""
+ _VALID_URL = r'(?:https?://)?(?:www\.)?howcast\.com/videos/(?P<id>\d+)'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+
+ video_id = mobj.group('id')
+ webpage_url = 'http://www.howcast.com/videos/' + video_id
+ webpage = self._download_webpage(webpage_url, video_id)
+
+ self.report_extraction(video_id)
+
+ mobj = re.search(r'\'file\': "(http://mobile-media\.howcast\.com/\d+\.mp4)"', webpage)
+ if mobj is None:
+ raise ExtractorError(u'Unable to extract video URL')
+ video_url = mobj.group(1)
+
+ mobj = re.search(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') property=\'og:title\'', webpage)
+ if mobj is None:
+ raise ExtractorError(u'Unable to extract title')
+ video_title = mobj.group(1) or mobj.group(2)
+
+ mobj = re.search(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') name=\'description\'', webpage)
+ if mobj is None:
+ self._downloader.report_warning(u'unable to extract description')
+ video_description = None
+ else:
+ video_description = mobj.group(1) or mobj.group(2)
+
+ mobj = re.search(r'<meta content=\'(.+?)\' property=\'og:image\'', webpage)
+ if mobj is None:
+ raise ExtractorError(u'Unable to extract thumbnail')
+ thumbnail = mobj.group(1)
+
+ return [{
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': 'mp4',
+ 'title': video_title,
+ 'description': video_description,
+ 'thumbnail': thumbnail,
+ }]
+
+class VineIE(InfoExtractor):
+ """Information Extractor for Vine.co"""
+ _VALID_URL = r'(?:https?://)?(?:www\.)?vine\.co/v/(?P<id>\w+)'
+
+ def _real_extract(self, url):
+
+ mobj = re.match(self._VALID_URL, url)
+
+ video_id = mobj.group('id')
+ webpage_url = 'https://vine.co/v/' + video_id
+ webpage = self._download_webpage(webpage_url, video_id)
+
+ self.report_extraction(video_id)
+
+ mobj = re.search(r'<meta property="twitter:player:stream" content="(.+?)"', webpage)
+ if mobj is None:
+ raise ExtractorError(u'Unable to extract video URL')
+ video_url = mobj.group(1)
+
+ mobj = re.search(r'<meta property="og:title" content="(.+?)"', webpage)
+ if mobj is None:
+ raise ExtractorError(u'Unable to extract title')
+ video_title = mobj.group(1)
+
+ mobj = re.search(r'<meta property="og:image" content="(.+?)(\?.*?)?"', webpage)
+ if mobj is None:
+ raise ExtractorError(u'Unable to extract thumbnail')
+ thumbnail = mobj.group(1)
+
+ mobj = re.search(r'<div class="user">.*?<h2>(.+?)</h2>', webpage, re.DOTALL)
+ if mobj is None:
+ raise ExtractorError(u'Unable to extract uploader')
+ uploader = mobj.group(1)
+
+ return [{
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': 'mp4',
+ 'title': video_title,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ }]
+
def gen_extractors():
""" Return a list of an instance of every supported extractor.
The order does matter; the first extractor matched is the one handling the URL.
@@ -4243,6 +4338,8 @@ def gen_extractors():
BandcampIE(),
RedTubeIE(),
InaIE(),
+ HowcastIE(),
+ VineIE(),
GenericIE()
]