diff options
Diffstat (limited to 'youtube_dl/extractor/huffpost.py')
-rw-r--r-- | youtube_dl/extractor/huffpost.py | 70 |
1 files changed, 70 insertions, 0 deletions
diff --git a/youtube_dl/extractor/huffpost.py b/youtube_dl/extractor/huffpost.py new file mode 100644 index 000000000..b47114ab4 --- /dev/null +++ b/youtube_dl/extractor/huffpost.py @@ -0,0 +1,70 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + parse_duration, + unified_strdate, +) + + +class HuffPostIE(InfoExtractor): + IE_DESC = 'Huffington Post' + _VALID_URL = r'''(?x) + https?://(embed\.)?live\.huffingtonpost\.com/ + (?: + r/segment/[^/]+/| + HPLEmbedPlayer/\?segmentId= + ) + (?P<id>[0-9a-f]+)''' + + _TEST = { + 'url': 'http://live.huffingtonpost.com/r/segment/legalese-it/52dd3e4b02a7602131000677', + 'file': '52dd3e4b02a7602131000677.mp4', + 'md5': 'TODO', + 'info_dict': { + 'title': 'TODO', + 'description': 'TODO', + 'duration': 1549, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + api_url = 'http://embed.live.huffingtonpost.com/api/segments/%s.json' % video_id + data = self._download_json(api_url, video_id)['data'] + + video_title = data['title'] + duration = parse_duration(data['running_time']) + upload_date = unified_strdate(data['schedule']['started_at']) + + thumbnails = [] + for url in data['images'].values(): + m = re.match('.*-([0-9]+x[0-9]+)\.', url) + if not m: + continue + thumbnails.append({ + 'url': url, + 'resolution': m.group(1), + }) + + formats = [{ + 'format': key, + 'format_id': key.replace('/', '.'), + 'ext': 'mp4', + 'url': url, + 'vcodec': 'none' if key.startswith('audio/') else None, + } for key, url in data['sources']['live'].items()] + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': video_title, + 'formats': formats, + 'duration': duration, + 'upload_date': upload_date, + 'thumbnails': thumbnails, + } |