diff options
author | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2013-07-23 11:41:05 +0200 |
---|---|---|
committer | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2013-07-23 11:41:05 +0200 |
commit | 67ae7b4760be259d82013fcabe3e0e4cd8f0a9df (patch) | |
tree | 2173ebfb9c4018cb8b32a73055dba36d60deb6b1 | |
parent | de48addae2ae61eded2992cae61d405647847f8e (diff) |
Fix BreakIE
Also detect videos that come from Youtube
-rw-r--r-- | youtube_dl/extractor/breakcom.py | 25 |
1 files changed, 15 insertions, 10 deletions
diff --git a/youtube_dl/extractor/breakcom.py b/youtube_dl/extractor/breakcom.py index 34f555e89..53a898de3 100644 --- a/youtube_dl/extractor/breakcom.py +++ b/youtube_dl/extractor/breakcom.py @@ -1,6 +1,8 @@ import re +import json from .common import InfoExtractor +from ..utils import determine_ext class BreakIE(InfoExtractor): @@ -17,17 +19,20 @@ class BreakIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group(1).split("-")[-1] - webpage = self._download_webpage(url, video_id) - video_url = re.search(r"videoPath: '(.+?)',",webpage).group(1) - key = re.search(r"icon: '(.+?)',",webpage).group(1) - final_url = str(video_url)+"?"+str(key) - thumbnail_url = re.search(r"thumbnailURL: '(.+?)'",webpage).group(1) - title = re.search(r"sVidTitle: '(.+)',",webpage).group(1) - ext = video_url.split('.')[-1] + embed_url = 'http://www.break.com/embed/%s' % video_id + webpage = self._download_webpage(embed_url, video_id) + info_json = self._search_regex(r'var embedVars = ({.*?});', webpage, + u'info json', flags=re.DOTALL) + info = json.loads(info_json) + video_url = info['videoUri'] + m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', video_url) + if m_youtube is not None: + return self.url_result(m_youtube.group(1), 'Youtube') + final_url = video_url + '?' + info['AuthToken'] return [{ 'id': video_id, 'url': final_url, - 'ext': ext, - 'title': title, - 'thumbnail': thumbnail_url, + 'ext': determine_ext(final_url), + 'title': info['contentName'], + 'thumbnail': info['thumbUri'], }] |