diff options
| author | Philipp Hagemeister <phihag@phihag.de> | 2014-01-05 04:30:00 +0100 | 
|---|---|---|
| committer | Philipp Hagemeister <phihag@phihag.de> | 2014-01-05 04:30:19 +0100 | 
| commit | ca9e79225323a2c18ebc2e8cbbaaba1bd11a704c (patch) | |
| tree | b48d320bb78fae05d3006891c569212c3427176f | |
| parent | aff24732b96b5ec89cb41a05fd132e12c5990caf (diff) | |
[cspan] Use HTTP download (Fixes #2098)
| -rw-r--r-- | youtube_dl/extractor/cspan.py | 65 | 
1 files changed, 31 insertions, 34 deletions
| diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py index d5730684d..a2cbd4d8d 100644 --- a/youtube_dl/extractor/cspan.py +++ b/youtube_dl/extractor/cspan.py @@ -1,20 +1,25 @@ +from __future__ import unicode_literals + +import json  import re  from .common import InfoExtractor  from ..utils import ( -    compat_urllib_parse, +    unescapeHTML,  ) +  class CSpanIE(InfoExtractor):      _VALID_URL = r'http://www\.c-spanvideo\.org/program/(.*)' +    IE_DESC = 'C-SPAN'      _TEST = { -        u'url': u'http://www.c-spanvideo.org/program/HolderonV', -        u'file': u'315139.flv', -        u'md5': u'74a623266956f69e4df0068ab6c80fe4', -        u'info_dict': { -            u"title": u"Attorney General Eric Holder on Voting Rights Act Decision" +        'url': 'http://www.c-spanvideo.org/program/HolderonV', +        'file': '315139.mp4', +        'md5': '8e44ce11f0f725527daccc453f553eb0', +        'info_dict': { +            'title': 'Attorney General Eric Holder on Voting Rights Act Decision', +            'description': 'Attorney General Eric Holder spoke to reporters following the Supreme Court decision in [Shelby County v. Holder] in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced until Congress established new guidelines for review.',          }, -        u'skip': u'Requires rtmpdump'      }      def _real_extract(self, url): @@ -22,30 +27,22 @@ class CSpanIE(InfoExtractor):          prog_name = mobj.group(1)          webpage = self._download_webpage(url, prog_name)          video_id = self._search_regex(r'programid=(.*?)&', webpage, 'video id') -        data = compat_urllib_parse.urlencode({'programid': video_id, -                                              'dynamic':'1'}) -        info_url = 'http://www.c-spanvideo.org/common/services/flashXml.php?' + data -        video_info = self._download_webpage(info_url, video_id, u'Downloading video info') - -        self.report_extraction(video_id) - -        title = self._html_search_regex(r'<string name="title">(.*?)</string>', -                                        video_info, 'title') -        description = self._html_search_regex(r'<meta (?:property="og:|name=")description" content="(.*?)"', -                                              webpage, 'description', -                                              flags=re.MULTILINE|re.DOTALL) - -        url = self._search_regex(r'<string name="URL">(.*?)</string>', -                                 video_info, 'video url') -        url = url.replace('$(protocol)', 'rtmp').replace('$(port)', '443') -        path = self._search_regex(r'<string name="path">(.*?)</string>', -                            video_info, 'rtmp play path') - -        return {'id': video_id, -                'title': title, -                'ext': 'flv', -                'url': url, -                'play_path': path, -                'description': description, -                'thumbnail': self._og_search_thumbnail(webpage), -                } + +        title = self._html_search_regex( +            r'<!-- title -->\n\s*<h1[^>]*>(.*?)</h1>', webpage, 'title') +        description = self._og_search_description(webpage) + +        info_url = 'http://c-spanvideo.org/videoLibrary/assets/player/ajax-player.php?os=android&html5=program&id=' + video_id +        data_json = self._download_webpage( +            info_url, video_id, 'Downloading video info') +        data = json.loads(data_json) + +        url = unescapeHTML(data['video']['files'][0]['path']['#text']) + +        return { +            'id': video_id, +            'title': title, +            'url': url, +            'description': description, +            'thumbnail': self._og_search_thumbnail(webpage), +        } | 
