diff options
Diffstat (limited to 'youtube_dl/extractor')
| -rw-r--r-- | youtube_dl/extractor/hbo.py | 63 | 
1 files changed, 49 insertions, 14 deletions
| diff --git a/youtube_dl/extractor/hbo.py b/youtube_dl/extractor/hbo.py index dad0f3994..3606d64fd 100644 --- a/youtube_dl/extractor/hbo.py +++ b/youtube_dl/extractor/hbo.py @@ -12,17 +12,7 @@ from ..utils import (  ) -class HBOIE(InfoExtractor): -    _VALID_URL = r'https?://(?:www\.)?hbo\.com/video/video\.html\?.*vid=(?P<id>[0-9]+)' -    _TEST = { -        'url': 'http://www.hbo.com/video/video.html?autoplay=true&g=u&vid=1437839', -        'md5': '1c33253f0c7782142c993c0ba62a8753', -        'info_dict': { -            'id': '1437839', -            'ext': 'mp4', -            'title': 'Ep. 64 Clip: Encryption', -        } -    } +class HBOBaseIE(InfoExtractor):      _FORMATS_INFO = {          '1920': {              'width': 1280, @@ -50,8 +40,7 @@ class HBOIE(InfoExtractor):          },      } -    def _real_extract(self, url): -        video_id = self._match_id(url) +    def _extract_from_id(self, video_id):          video_data = self._download_xml(              'http://render.lv3.hbo.com/data/content/global/videos/data/%s.xml' % video_id, video_id)          title = xpath_text(video_data, 'title', 'title', True) @@ -116,7 +105,53 @@ class HBOIE(InfoExtractor):          return {              'id': video_id,              'title': title, -            'duration': parse_duration(xpath_element(video_data, 'duration/tv14')), +            'duration': parse_duration(xpath_text(video_data, 'duration/tv14')),              'formats': formats,              'thumbnails': thumbnails,          } + + +class HBOIE(HBOBaseIE): +    _VALID_URL = r'https?://(?:www\.)?hbo\.com/video/video\.html\?.*vid=(?P<id>[0-9]+)' +    _TEST = { +        'url': 'http://www.hbo.com/video/video.html?autoplay=true&g=u&vid=1437839', +        'md5': '1c33253f0c7782142c993c0ba62a8753', +        'info_dict': { +            'id': '1437839', +            'ext': 'mp4', +            'title': 'Ep. 64 Clip: Encryption', +            'thumbnail': 're:https?://.*\.jpg$', +            'duration': 1072, +        } +    } + +    def _real_extract(self, url): +        video_id = self._match_id(url) +        return self._extract_from_id(video_id) + + +class HBOEpisodeIE(HBOBaseIE): +    _VALID_URL = r'https?://(?:www\.)?hbo\.com/(?!video)([^/]+/)+video/(?P<id>[0-9a-z-]+)\.html' + +    _TESTS = [{ +        'url': 'http://www.hbo.com/girls/episodes/5/52-i-love-you-baby/video/ep-52-inside-the-episode.html?autoplay=true', +        'md5': '689132b253cc0ab7434237fc3a293210', +        'info_dict': { +            'id': '1439518', +            'ext': 'mp4', +            'title': 'Ep. 52: Inside the Episode', +            'thumbnail': 're:https?://.*\.jpg$', +            'duration': 240, +        }, +    }] + +    def _real_extract(self, url): +        display_id = self._match_id(url) + +        webpage = self._download_webpage(url, display_id) + +        video_id = self._search_regex( +            r'(?P<q1>[\'"])videoId(?P=q1)\s*:\s*(?P<q2>[\'"])(?P<video_id>\d+)(?P=q2)', +            webpage, 'video ID', group='video_id') + +        return self._extract_from_id(video_id) | 
