diff options
Diffstat (limited to 'youtube_dl/extractor/livestream.py')
| -rw-r--r-- | youtube_dl/extractor/livestream.py | 104 | 
1 files changed, 90 insertions, 14 deletions
diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py index 1ea1bbab4..281a0ce40 100644 --- a/youtube_dl/extractor/livestream.py +++ b/youtube_dl/extractor/livestream.py @@ -5,11 +5,14 @@ import json  from .common import InfoExtractor  from ..utils import ( +    compat_str,      compat_urllib_parse_urlparse,      compat_urlparse, -    xpath_with_ns, -    compat_str, +    ExtractorError, +    find_xpath_attr, +    int_or_none,      orderedSet, +    xpath_with_ns,  ) @@ -24,20 +27,82 @@ class LivestreamIE(InfoExtractor):              'ext': 'mp4',              'title': 'Live from Webster Hall NYC',              'upload_date': '20121012', +            'like_count': int, +            'view_count': int, +            'thumbnail': 're:^http://.*\.jpg$'          }      } +    def _parse_smil(self, video_id, smil_url): +        formats = [] +        _SWITCH_XPATH = ( +            './/{http://www.w3.org/2001/SMIL20/Language}body/' +            '{http://www.w3.org/2001/SMIL20/Language}switch') +        smil_doc = self._download_xml( +            smil_url, video_id, +            note='Downloading SMIL information', +            errnote='Unable to download SMIL information', +            fatal=False) +        if smil_doc is False:  # Download failed +            return formats +        title_node = find_xpath_attr( +            smil_doc, './/{http://www.w3.org/2001/SMIL20/Language}meta', +            'name', 'title') +        if title_node is None: +            self.report_warning('Cannot find SMIL id') +            switch_node = smil_doc.find(_SWITCH_XPATH) +        else: +            title_id = title_node.attrib['content'] +            switch_node = find_xpath_attr( +                smil_doc, _SWITCH_XPATH, 'id', title_id) +        if switch_node is None: +            raise ExtractorError('Cannot find switch node') +        video_nodes = switch_node.findall( +            '{http://www.w3.org/2001/SMIL20/Language}video') + +        for vn in video_nodes: +            tbr = int_or_none(vn.attrib.get('system-bitrate')) +            furl = ( +                'http://livestream-f.akamaihd.net/%s?v=3.0.3&fp=WIN%%2014,0,0,145' % +                (vn.attrib['src'])) +            if 'clipBegin' in vn.attrib: +                furl += '&ssek=' + vn.attrib['clipBegin'] +            formats.append({ +                'url': furl, +                'format_id': 'smil_%d' % tbr, +                'ext': 'flv', +                'tbr': tbr, +                'preference': -1000, +            }) +        return formats +      def _extract_video_info(self, video_data): -        video_url = ( -            video_data.get('progressive_url_hd') or -            video_data.get('progressive_url') +        video_id = compat_str(video_data['id']) + +        FORMAT_KEYS = ( +            ('sd', 'progressive_url'), +            ('hd', 'progressive_url_hd'),          ) +        formats = [{ +            'format_id': format_id, +            'url': video_data[key], +            'quality': i + 1, +        } for i, (format_id, key) in enumerate(FORMAT_KEYS) +            if video_data.get(key)] + +        smil_url = video_data.get('smil_url') +        if smil_url: +            formats.extend(self._parse_smil(video_id, smil_url)) +        self._sort_formats(formats) +          return { -            'id': compat_str(video_data['id']), -            'url': video_url, +            'id': video_id, +            'formats': formats,              'title': video_data['caption'], -            'thumbnail': video_data['thumbnail_url'], +            'thumbnail': video_data.get('thumbnail_url'),              'upload_date': video_data['updated_at'].replace('-', '')[:8], +            'like_count': video_data.get('likes', {}).get('total'), +            'view_count': video_data.get('views'),          }      def _real_extract(self, url): @@ -46,17 +111,28 @@ class LivestreamIE(InfoExtractor):          event_name = mobj.group('event_name')          webpage = self._download_webpage(url, video_id or event_name) -        if video_id is None: -            # This is an event page: +        og_video = self._og_search_video_url(webpage, 'player url', fatal=False, default=None) +        if og_video is None:              config_json = self._search_regex(                  r'window.config = ({.*?});', webpage, 'window config')              info = json.loads(config_json)['event'] + +            def is_relevant(vdata, vid): +                result = vdata['type'] == 'video' +                if video_id is not None: +                    result = result and compat_str(vdata['data']['id']) == vid +                return result +              videos = [self._extract_video_info(video_data['data']) -                for video_data in info['feed']['data'] -                if video_data['type'] == 'video'] -            return self.playlist_result(videos, info['id'], info['full_name']) +                      for video_data in info['feed']['data'] +                      if is_relevant(video_data, video_id)] +            if video_id is None: +                # This is an event page: +                return self.playlist_result(videos, info['id'], info['full_name']) +            else: +                if videos: +                    return videos[0]          else: -            og_video = self._og_search_video_url(webpage, 'player url')              query_str = compat_urllib_parse_urlparse(og_video).query              query = compat_urlparse.parse_qs(query_str)              api_url = query['play_url'][0].replace('.smil', '')  | 
