Basic support for TED

author: Jaime Marquínez Ferrándiz <jaimemf93@gmail.com> 2013-02-17 17:13:06 +0100
committer: Jaime Marquínez Ferrándiz <jaimemf93@gmail.com> 2013-02-17 17:13:06 +0100
commit: 3a468f2d8b0261d4f45a7c5837f54edc33acdd8f (patch)
tree: b76942ae479fea79231b0f5e461ffc19d1028501
parent: 906417c7c586218c5a61f9966ce7d67528f97b69 (diff)
2 files changed, 34 insertions, 0 deletions
diff --git a/test/tests.json b/test/tests.json
index 5c46af2c8..a6782ed4c 100644
--- a/test/tests.json
+++ b/test/tests.json
@@ -286,5 +286,14 @@
       "title": "test chars: \"'/\\ä<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de ."
     }
 
+  },
+  {
+    "name": "TED",
+    "url": "http://www.ted.com/talks/dan_dennett_on_our_consciousness.html",
+    "file": "102.mp4",
+    "md5": "7bc087e71d16f18f9b8ab9fa62a8a031",
+    "info_dict": {
+        "title": "Dan Dennett: The illusion of consciousness"
+    }
   }
 ]
diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py
index ac69f82fe..742b036d3 100755
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@@ -3967,6 +3967,30 @@ class KeekIE(InfoExtractor):
                 'uploader': uploader
         }
         return [info]
+        
+class TEDIE(InfoExtractor):
+    _VALID_URL=r'http://www.ted.com/talks/(?P<videoName>\w+)'
+    def _real_extract(self, url):
+        m=re.match(self._VALID_URL, url)
+        videoName=m.group('videoName')
+        webpage=self._download_webpage(url, 0, 'Downloading \"%s\" page' % videoName)
+        #If the url includes the language we get the title translated
+        title_RE=r'<h1><span id="altHeadline" >(?P<title>[\s\w:/\.\?=\+-]*)</span></h1>'
+        title=re.search(title_RE, webpage).group('title')
+        info_RE=r'''<script\ type="text/javascript">var\ talkDetails\ =(.*?)
+                        "id":(?P<videoID>[\d]+).*?
+                        "mediaSlug":"(?P<mediaSlug>[\w\d]+?)"'''
+        info_match=re.search(info_RE,webpage,re.VERBOSE)
+        video_id=info_match.group('videoID')
+        mediaSlug=info_match.group('mediaSlug')
+        video_url='http://download.ted.com/talks/%s.mp4' % mediaSlug
+        info = {
+                'id':video_id,
+                'url':video_url,
+                'ext': 'mp4',
+                'title': title
+        }
+        return [info]
 
 def gen_extractors():
     """ Return a list of an instance of every supported extractor.
@@ -4015,6 +4039,7 @@ def gen_extractors():
         RBMARadioIE(),
         EightTracksIE(),
         KeekIE(),
+        TEDIE(),
         GenericIE()
     ]
author	Jaime Marquínez Ferrándiz <jaimemf93@gmail.com>	2013-02-17 17:13:06 +0100
committer	Jaime Marquínez Ferrándiz <jaimemf93@gmail.com>	2013-02-17 17:13:06 +0100
commit	3a468f2d8b0261d4f45a7c5837f54edc33acdd8f (patch)
tree	b76942ae479fea79231b0f5e461ffc19d1028501
parent	906417c7c586218c5a61f9966ce7d67528f97b69 (diff)