diff options
| author | Jaime Marquínez Ferrándiz <jaimemf93@gmail.com> | 2013-02-17 17:13:06 +0100 | 
|---|---|---|
| committer | Jaime Marquínez Ferrándiz <jaimemf93@gmail.com> | 2013-02-17 17:13:06 +0100 | 
| commit | 3a468f2d8b0261d4f45a7c5837f54edc33acdd8f (patch) | |
| tree | b76942ae479fea79231b0f5e461ffc19d1028501 | |
| parent | 906417c7c586218c5a61f9966ce7d67528f97b69 (diff) | |
Basic support for TED
| -rw-r--r-- | test/tests.json | 9 | ||||
| -rwxr-xr-x | youtube_dl/InfoExtractors.py | 25 | 
2 files changed, 34 insertions, 0 deletions
| diff --git a/test/tests.json b/test/tests.json index 5c46af2c8..a6782ed4c 100644 --- a/test/tests.json +++ b/test/tests.json @@ -286,5 +286,14 @@        "title": "test chars: \"'/\\ä<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de ."      } +  }, +  { +    "name": "TED", +    "url": "http://www.ted.com/talks/dan_dennett_on_our_consciousness.html", +    "file": "102.mp4", +    "md5": "7bc087e71d16f18f9b8ab9fa62a8a031", +    "info_dict": { +        "title": "Dan Dennett: The illusion of consciousness" +    }    }  ] diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index ac69f82fe..742b036d3 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -3967,6 +3967,30 @@ class KeekIE(InfoExtractor):                  'uploader': uploader          }          return [info] +         +class TEDIE(InfoExtractor): +    _VALID_URL=r'http://www.ted.com/talks/(?P<videoName>\w+)' +    def _real_extract(self, url): +        m=re.match(self._VALID_URL, url) +        videoName=m.group('videoName') +        webpage=self._download_webpage(url, 0, 'Downloading \"%s\" page' % videoName) +        #If the url includes the language we get the title translated +        title_RE=r'<h1><span id="altHeadline" >(?P<title>[\s\w:/\.\?=\+-]*)</span></h1>' +        title=re.search(title_RE, webpage).group('title') +        info_RE=r'''<script\ type="text/javascript">var\ talkDetails\ =(.*?) +                        "id":(?P<videoID>[\d]+).*? +                        "mediaSlug":"(?P<mediaSlug>[\w\d]+?)"''' +        info_match=re.search(info_RE,webpage,re.VERBOSE) +        video_id=info_match.group('videoID') +        mediaSlug=info_match.group('mediaSlug') +        video_url='http://download.ted.com/talks/%s.mp4' % mediaSlug +        info = { +                'id':video_id, +                'url':video_url, +                'ext': 'mp4', +                'title': title +        } +        return [info]  def gen_extractors():      """ Return a list of an instance of every supported extractor. @@ -4015,6 +4039,7 @@ def gen_extractors():          RBMARadioIE(),          EightTracksIE(),          KeekIE(), +        TEDIE(),          GenericIE()      ] | 
