diff options
| -rw-r--r-- | README.md | 6 | ||||
| -rw-r--r-- | youtube-dl.plugin.zsh | 7 | ||||
| -rw-r--r-- | youtube_dl/extractor/collegehumor.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/ro220.py | 19 | ||||
| -rw-r--r-- | youtube_dl/extractor/spiegel.py | 37 | ||||
| -rw-r--r-- | youtube_dl/extractor/ustream.py | 48 | ||||
| -rw-r--r-- | youtube_dl/extractor/vevo.py | 39 | 
7 files changed, 83 insertions, 75 deletions
| @@ -325,7 +325,7 @@ Since June 2012 (#342) youtube-dl is packed as an executable zipfile, simply unz  To run the exe you need to install first the [Microsoft Visual C++ 2008 Redistributable Package](http://www.microsoft.com/en-us/download/details.aspx?id=29). -# BUILD INSTRUCTIONS +# DEVELOPER INSTRUCTIONS  Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution. @@ -347,6 +347,10 @@ If you want to create a build of youtube-dl yourself, you'll need  * zip  * nosetests +### Adding support for a new site + +If you want to add support for a new site, copy *any* [recently modified](https://github.com/rg3/youtube-dl/commits/master/youtube_dl/extractor) file in `youtube_dl/extractor`, add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Don't forget to run the tests with `python test/test_download.py Test_Download.test_YourExtractor`! For a detailed tutorial, refer to [this blog post](http://filippo.io/add-support-for-a-new-video-site-to-youtube-dl/). +  # BUGS  Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues> . Unless you were prompted so or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. diff --git a/youtube-dl.plugin.zsh b/youtube-dl.plugin.zsh index 2a1d7527e..4edab5214 100644 --- a/youtube-dl.plugin.zsh +++ b/youtube-dl.plugin.zsh @@ -18,6 +18,7 @@  # code is documented here:  # https://github.com/zsh-users/antigen#notes-on-writing-plugins -# This specific script just adds the downloaded folder to the end of the $PATH, -# which allows the contained youtube-dl executable to be found. -export PATH=${PATH}:$(dirname $0) +# This specific script just aliases youtube-dl to the python script that this +# library provides. This requires updating the PYTHONPATH to ensure that the +# full set of code can be located. +alias youtube-dl="PYTHONPATH=$(dirname $0) $(dirname $0)/bin/youtube-dl" diff --git a/youtube_dl/extractor/collegehumor.py b/youtube_dl/extractor/collegehumor.py index 026d023a0..1db27026a 100644 --- a/youtube_dl/extractor/collegehumor.py +++ b/youtube_dl/extractor/collegehumor.py @@ -38,7 +38,7 @@ class CollegeHumorIE(InfoExtractor):              'title': 'Funny Dogs Protecting Babies Compilation 2014 [NEW HD]',              'uploader': 'Funnyplox TV',              'uploader_id': 'funnyploxtv', -            'description': 'md5:b20fc87608e2837596bbc8df85a3c34d', +            'description': 'md5:7e8899d3f749db50fa089eb243cba17f',              'upload_date': '20140128',          },          'params': { diff --git a/youtube_dl/extractor/ro220.py b/youtube_dl/extractor/ro220.py index c32f64d99..4678f62df 100644 --- a/youtube_dl/extractor/ro220.py +++ b/youtube_dl/extractor/ro220.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals +  import re  from .common import InfoExtractor @@ -11,12 +13,12 @@ class Ro220IE(InfoExtractor):      IE_NAME = '220.ro'      _VALID_URL = r'(?x)(?:https?://)?(?:www\.)?220\.ro/(?P<category>[^/]+)/(?P<shorttitle>[^/]+)/(?P<video_id>[^/]+)'      _TEST = { -        u"url": u"http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/", -        u'file': u'LYV6doKo7f.mp4', -        u'md5': u'03af18b73a07b4088753930db7a34add', -        u'info_dict': { -            u"title": u"Luati-le Banii sez 4 ep 1", -            u"description": u"Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.", +        "url": "http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/", +        'file': 'LYV6doKo7f.mp4', +        'md5': '03af18b73a07b4088753930db7a34add', +        'info_dict': { +            "title": "Luati-le Banii sez 4 ep 1", +            "description": "Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.",          }      } @@ -27,10 +29,10 @@ class Ro220IE(InfoExtractor):          webpage = self._download_webpage(url, video_id)          flashVars_str = self._search_regex(              r'<param name="flashVars" value="([^"]+)"', -            webpage, u'flashVars') +            webpage, 'flashVars')          flashVars = compat_parse_qs(flashVars_str) -        info = { +        return {              '_type': 'video',              'id': video_id,              'ext': 'mp4', @@ -39,4 +41,3 @@ class Ro220IE(InfoExtractor):              'description': clean_html(flashVars['desc'][0]),              'thumbnail': flashVars['preview'][0],          } -        return info diff --git a/youtube_dl/extractor/spiegel.py b/youtube_dl/extractor/spiegel.py index 051a34d5b..9156d7faf 100644 --- a/youtube_dl/extractor/spiegel.py +++ b/youtube_dl/extractor/spiegel.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals +  import re  from .common import InfoExtractor @@ -6,20 +8,20 @@ from .common import InfoExtractor  class SpiegelIE(InfoExtractor):      _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$'      _TESTS = [{ -        u'url': u'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html', -        u'file': u'1259285.mp4', -        u'md5': u'2c2754212136f35fb4b19767d242f66e', -        u'info_dict': { -            u"title": u"Vulkanausbruch in Ecuador: Der \"Feuerschlund\" ist wieder aktiv" -        } +        'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html', +        'file': '1259285.mp4', +        'md5': '2c2754212136f35fb4b19767d242f66e', +        'info_dict': { +            'title': 'Vulkanausbruch in Ecuador: Der "Feuerschlund" ist wieder aktiv', +        },      },      { -        u'url': u'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html', -        u'file': u'1309159.mp4', -        u'md5': u'f2cdf638d7aa47654e251e1aee360af1', -        u'info_dict': { -            u'title': u'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers' -        } +        'url': 'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html', +        'file': '1309159.mp4', +        'md5': 'f2cdf638d7aa47654e251e1aee360af1', +        'info_dict': { +            'title': 'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers', +        },      }]      def _real_extract(self, url): @@ -29,17 +31,17 @@ class SpiegelIE(InfoExtractor):          webpage = self._download_webpage(url, video_id)          video_title = self._html_search_regex( -            r'<div class="module-title">(.*?)</div>', webpage, u'title') +            r'<div class="module-title">(.*?)</div>', webpage, 'title') -        xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml' +        xml_url = 'http://video2.spiegel.de/flash/' + video_id + '.xml'          idoc = self._download_xml(              xml_url, video_id, -            note=u'Downloading XML', errnote=u'Failed to download XML') +            note='Downloading XML', errnote='Failed to download XML')          formats = [              {                  'format_id': n.tag.rpartition('type')[2], -                'url': u'http://video2.spiegel.de/flash/' + n.find('./filename').text, +                'url': 'http://video2.spiegel.de/flash/' + n.find('./filename').text,                  'width': int(n.find('./width').text),                  'height': int(n.find('./height').text),                  'abr': int(n.find('./audiobitrate').text), @@ -55,10 +57,9 @@ class SpiegelIE(InfoExtractor):          self._sort_formats(formats) -        info = { +        return {              'id': video_id,              'title': video_title,              'duration': duration,              'formats': formats,          } -        return info diff --git a/youtube_dl/extractor/ustream.py b/youtube_dl/extractor/ustream.py index 74c82587f..7fa2b9e15 100644 --- a/youtube_dl/extractor/ustream.py +++ b/youtube_dl/extractor/ustream.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals +  import json  import re @@ -10,48 +12,48 @@ from ..utils import (  class UstreamIE(InfoExtractor):      _VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P<videoID>\d+)' -    IE_NAME = u'ustream' +    IE_NAME = 'ustream'      _TEST = { -        u'url': u'http://www.ustream.tv/recorded/20274954', -        u'file': u'20274954.flv', -        u'md5': u'088f151799e8f572f84eb62f17d73e5c', -        u'info_dict': { -            u"uploader": u"Young Americans for Liberty",  -            u"title": u"Young Americans for Liberty February 7, 2012 2:28 AM" -        } +        'url': 'http://www.ustream.tv/recorded/20274954', +        'file': '20274954.flv', +        'md5': '088f151799e8f572f84eb62f17d73e5c', +        'info_dict': { +            "uploader": "Young Americans for Liberty", +            "title": "Young Americans for Liberty February 7, 2012 2:28 AM", +        },      }      def _real_extract(self, url):          m = re.match(self._VALID_URL, url)          video_id = m.group('videoID') -        video_url = u'http://tcdn.ustream.tv/video/%s' % video_id +        video_url = 'http://tcdn.ustream.tv/video/%s' % video_id          webpage = self._download_webpage(url, video_id)          self.report_extraction(video_id)          video_title = self._html_search_regex(r'data-title="(?P<title>.+)"', -            webpage, u'title') +            webpage, 'title')          uploader = self._html_search_regex(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>', -            webpage, u'uploader', fatal=False, flags=re.DOTALL) +            webpage, 'uploader', fatal=False, flags=re.DOTALL)          thumbnail = self._html_search_regex(r'<link rel="image_src" href="(?P<thumb>.*?)"', -            webpage, u'thumbnail', fatal=False) - -        info = { -                'id': video_id, -                'url': video_url, -                'ext': 'flv', -                'title': video_title, -                'uploader': uploader, -                'thumbnail': thumbnail, -               } -        return info +            webpage, 'thumbnail', fatal=False) + +        return { +            'id': video_id, +            'url': video_url, +            'ext': 'flv', +            'title': video_title, +            'uploader': uploader, +            'thumbnail': thumbnail, +        } +  class UstreamChannelIE(InfoExtractor):      _VALID_URL = r'https?://www\.ustream\.tv/channel/(?P<slug>.+)' -    IE_NAME = u'ustream:channel' +    IE_NAME = 'ustream:channel'      def _real_extract(self, url):          m = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index a4b26a26f..f0673972c 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals +  import re  import json  import xml.etree.ElementTree @@ -22,16 +24,16 @@ class VevoIE(InfoExtractor):             vevo:)          (?P<id>[^&?#]+)'''      _TESTS = [{ -        u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280', -        u'file': u'GB1101300280.mp4', -        u"md5": u"06bea460acb744eab74a9d7dcb4bfd61", -        u'info_dict': { -            u"upload_date": u"20130624", -            u"uploader": u"Hurts", -            u"title": u"Somebody to Die For", -            u"duration": 230.12, -            u"width": 1920, -            u"height": 1080, +        'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280', +        'file': 'GB1101300280.mp4', +        "md5": "06bea460acb744eab74a9d7dcb4bfd61", +        'info_dict': { +            "upload_date": "20130624", +            "uploader": "Hurts", +            "title": "Somebody to Die For", +            "duration": 230.12, +            "width": 1920, +            "height": 1080,          }      }]      _SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/' @@ -44,7 +46,7 @@ class VevoIE(InfoExtractor):                  if version['version'] > last_version['version']:                      last_version = version          if last_version['version'] == -1: -            raise ExtractorError(u'Unable to extract last version of the video') +            raise ExtractorError('Unable to extract last version of the video')          renditions = xml.etree.ElementTree.fromstring(last_version['data'])          formats = [] @@ -85,7 +87,7 @@ class VevoIE(InfoExtractor):              format_url = self._SMIL_BASE_URL + m.group('path')              formats.append({                  'url': format_url, -                'format_id': u'SMIL_' + m.group('cbr'), +                'format_id': 'SMIL_' + m.group('cbr'),                  'vcodec': m.group('vcodec'),                  'acodec': m.group('acodec'),                  'vbr': int(m.group('vbr')), @@ -101,26 +103,25 @@ class VevoIE(InfoExtractor):          video_id = mobj.group('id')          json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id -        info_json = self._download_webpage(json_url, video_id, u'Downloading json info') -        video_info = json.loads(info_json)['video'] +        video_info = self._download_json(json_url, video_id)['video']          formats = self._formats_from_json(video_info)          try:              smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (                  self._SMIL_BASE_URL, video_id, video_id.lower())              smil_xml = self._download_webpage(smil_url, video_id, -                                              u'Downloading SMIL info') +                                              'Downloading SMIL info')              formats.extend(self._formats_from_smil(smil_xml))          except ExtractorError as ee:              if not isinstance(ee.cause, compat_HTTPError):                  raise              self._downloader.report_warning( -                u'Cannot download SMIL information, falling back to JSON ..') +                'Cannot download SMIL information, falling back to JSON ..')          timestamp_ms = int(self._search_regex( -            r'/Date\((\d+)\)/', video_info['launchDate'], u'launch date')) +            r'/Date\((\d+)\)/', video_info['launchDate'], 'launch date'))          upload_date = datetime.datetime.fromtimestamp(timestamp_ms // 1000) -        info = { +        return {              'id': video_id,              'title': video_info['title'],              'formats': formats, @@ -129,5 +130,3 @@ class VevoIE(InfoExtractor):              'uploader': video_info['mainArtists'][0]['artistName'],              'duration': video_info['duration'],          } - -        return info | 
