diff options
| -rw-r--r-- | test/tests.json | 18 | ||||
| -rwxr-xr-x | youtube_dl/InfoExtractors.py | 82 | 
2 files changed, 100 insertions, 0 deletions
| diff --git a/test/tests.json b/test/tests.json index 5c46af2c8..a3c31ae51 100644 --- a/test/tests.json +++ b/test/tests.json @@ -286,5 +286,23 @@        "title": "test chars: \"'/\\รค<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de ."      } +  }, +  { +    "name": "TED", +    "url": "http://www.ted.com/talks/dan_dennett_on_our_consciousness.html", +    "file": "102.mp4", +    "md5": "7bc087e71d16f18f9b8ab9fa62a8a031", +    "info_dict": { +        "title": "Dan Dennett: The illusion of consciousness" +    } +  }, +  { +    "name": "MySpass", +    "url": "http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/", +    "file": "11741.mp4", +    "md5": "0b49f4844a068f8b33f4b7c88405862b", +    "info_dict": { +        "title": "Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2" +    }    }  ] diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index ac69f82fe..fe9bd97d0 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -3968,6 +3968,86 @@ class KeekIE(InfoExtractor):          }          return [info] +class TEDIE(InfoExtractor): +    _VALID_URL=r'http://www.ted.com/talks/(?P<videoName>\w+)' +    def _real_extract(self, url): +        m=re.match(self._VALID_URL, url) +        videoName=m.group('videoName') +        webpage=self._download_webpage(url, 0, 'Downloading \"%s\" page' % videoName) +        #If the url includes the language we get the title translated +        title_RE=r'<h1><span id="altHeadline" >(?P<title>[\s\w:/\.\?=\+-\\\']*)</span></h1>' +        title=re.search(title_RE, webpage).group('title') +        info_RE=r'''<script\ type="text/javascript">var\ talkDetails\ =(.*?) +                        "id":(?P<videoID>[\d]+).*? +                        "mediaSlug":"(?P<mediaSlug>[\w\d]+?)"''' +        info_match=re.search(info_RE,webpage,re.VERBOSE) +        video_id=info_match.group('videoID') +        mediaSlug=info_match.group('mediaSlug') +        video_url='http://download.ted.com/talks/%s.mp4' % mediaSlug +        info = { +                'id':video_id, +                'url':video_url, +                'ext': 'mp4', +                'title': title +        } +        return [info] + +class MySpassIE(InfoExtractor): +    _VALID_URL = r'http://www.myspass.de/.*' +     +    def _real_extract(self, url): +        META_DATA_URL_TEMPLATE = 'http://www.myspass.de/myspass/includes/apps/video/getvideometadataxml.php?id=%s' + +        # video id is the last path element of the URL +        # usually there is a trailing slash, so also try the second but last +        url_path = compat_urllib_parse_urlparse(url).path +        url_parent_path, video_id = os.path.split(url_path) +        if not video_id: +            _, video_id = os.path.split(url_parent_path) +         +        # get metadata +        metadata_url = META_DATA_URL_TEMPLATE % video_id +        metadata_text = self._download_webpage(metadata_url, video_id) +        metadata = xml.etree.ElementTree.fromstring(metadata_text.encode('utf-8')) +         +        # extract values from metadata +        url_flv_el = metadata.find('url_flv') +        if url_flv_el is None: +            self._downloader.trouble(u'ERROR: unable to extract download url') +            return +        video_url = url_flv_el.text +        extension = os.path.splitext(video_url)[1][1:] +        title_el = metadata.find('title') +        if title_el is None: +            self._downloader.trouble(u'ERROR: unable to extract title') +            return +        title = title_el.text +        format_id_el = metadata.find('format_id') +        if format_id_el is None: +            format = ext +        else: +            format = format_id_el.text +        description_el = metadata.find('description') +        if description_el is not None: +            description = description_el.text +        else: +            description = None +        imagePreview_el = metadata.find('imagePreview') +        if imagePreview_el is not None: +            thumbnail = imagePreview_el.text +        else: +            thumbnail = None +        info = { +            'id': video_id, +            'url': video_url, +            'title': title, +            'ext': extension, +            'format': format, +            'thumbnail': thumbnail, +            'description': description +        } +        return [info] +  def gen_extractors():      """ Return a list of an instance of every supported extractor.      The order does matter; the first extractor matched is the one handling the URL. @@ -4015,6 +4095,8 @@ def gen_extractors():          RBMARadioIE(),          EightTracksIE(),          KeekIE(), +        TEDIE(), +        MySpassIE(),          GenericIE()      ] | 
