diff options
| -rw-r--r-- | devscripts/youtube_genalgo.py | 4 | ||||
| -rw-r--r-- | test/test_playlists.py | 10 | ||||
| -rw-r--r-- | youtube_dl/YoutubeDL.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/canalplus.py | 22 | ||||
| -rw-r--r-- | youtube_dl/extractor/gamespot.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/kickstarter.py | 37 | ||||
| -rw-r--r-- | youtube_dl/extractor/ustream.py | 22 | ||||
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 2 | 
9 files changed, 77 insertions, 25 deletions
| diff --git a/devscripts/youtube_genalgo.py b/devscripts/youtube_genalgo.py index 6e3595366..b390c7e2e 100644 --- a/devscripts/youtube_genalgo.py +++ b/devscripts/youtube_genalgo.py @@ -32,9 +32,9 @@ tests = [      # 83      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",       ".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"), -    # 82 - vflZK4ZYR 2013/08/23 +    # 82 - vflGNjMhJ 2013/09/12      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<", -     "wertyuioplkjhgfdsaqxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&z(-+={[};?/>.<"), +     ".>/?;}[<=+-(*&^%$#@!MNBVCXeASDFGHKLPOqUYTREWQ0987654321mnbvcxzasdfghjklpoiuytrIwZ"),      # 81 - vflLC8JvQ 2013/07/25      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.",       "C>/?;}[{=+-(*&^%$#@!MNBVYXZASDFGHKLPOIU.TREWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"), diff --git a/test/test_playlists.py b/test/test_playlists.py index 65de3a55c..4a2e00b01 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -8,7 +8,7 @@ import json  import os  sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from youtube_dl.extractor import DailymotionPlaylistIE, VimeoChannelIE +from youtube_dl.extractor import DailymotionPlaylistIE, VimeoChannelIE, UstreamChannelIE  from youtube_dl.utils import *  from helper import FakeYDL @@ -34,5 +34,13 @@ class TestPlaylists(unittest.TestCase):          self.assertEqual(result['title'], u'Vimeo Tributes')          self.assertTrue(len(result['entries']) > 24) +    def test_ustream_channel(self): +        dl = FakeYDL() +        ie = UstreamChannelIE(dl) +        result = ie.extract('http://www.ustream.tv/channel/young-americans-for-liberty') +        self.assertIsPlaylist(result) +        self.assertEqual(result['id'], u'5124905') +        self.assertTrue(len(result['entries']) >= 11) +  if __name__ == '__main__':      unittest.main() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index b289bd9e2..c2f992b8e 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -492,6 +492,8 @@ class YoutubeDL(object):                  self.report_writedescription(descfn)                  with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:                      descfile.write(info_dict['description']) +            except (KeyError, TypeError): +                self.report_warning(u'There\'s no description to write.')              except (OSError, IOError):                  self.report_error(u'Cannot write description file ' + descfn)                  return diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index a7cddef73..06f9542d2 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -52,6 +52,7 @@ from .jeuxvideo import JeuxVideoIE  from .jukebox import JukeboxIE  from .justintv import JustinTVIE  from .kankan import KankanIE +from .kickstarter import KickStarterIE  from .keek import KeekIE  from .liveleak import LiveLeakIE  from .livestream import LivestreamIE diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py index 1f02519a0..1db9b24cf 100644 --- a/youtube_dl/extractor/canalplus.py +++ b/youtube_dl/extractor/canalplus.py @@ -1,3 +1,4 @@ +# encoding: utf-8  import re  import xml.etree.ElementTree @@ -5,24 +6,29 @@ from .common import InfoExtractor  from ..utils import unified_strdate  class CanalplusIE(InfoExtractor): -    _VALID_URL = r'https?://(www\.canalplus\.fr/.*?\?vid=|player\.canalplus\.fr/#/)(?P<id>\d+)' +    _VALID_URL = r'https?://(www\.canalplus\.fr/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>\d+))'      _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s'      IE_NAME = u'canalplus.fr'      _TEST = { -        u'url': u'http://www.canalplus.fr/c-divertissement/pid3351-c-le-petit-journal.html?vid=889861', -        u'file': u'889861.flv', -        u'md5': u'590a888158b5f0d6832f84001fbf3e99', +        u'url': u'http://www.canalplus.fr/c-infos-documentaires/pid1830-c-zapping.html?vid=922470', +        u'file': u'922470.flv',          u'info_dict': { -            u'title': u'Le Petit Journal 20/06/13 - La guerre des drone', -            u'upload_date': u'20130620', +            u'title': u'Zapping - 26/08/13', +            u'description': u'Le meilleur de toutes les chaînes, tous les jours.\nEmission du 26 août 2013', +            u'upload_date': u'20130826', +        }, +        u'params': { +            u'skip_download': True,          }, -        u'skip': u'Requires rtmpdump'      }      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url)          video_id = mobj.group('id') +        if video_id is None: +            webpage = self._download_webpage(url, mobj.group('path')) +            video_id = self._search_regex(r'videoId = "(\d+)";', webpage, u'video id')          info_url = self._VIDEO_INFO_TEMPLATE % video_id          info_page = self._download_webpage(info_url,video_id,                                              u'Downloading video info') @@ -43,4 +49,6 @@ class CanalplusIE(InfoExtractor):                  'ext': 'flv',                  'upload_date': unified_strdate(infos.find('PUBLICATION/DATE').text),                  'thumbnail': media.find('IMAGES/GRAND').text, +                'description': infos.find('DESCRIPTION').text, +                'view_count': int(infos.find('NB_VUES').text),                  } diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py index 7585b7061..cd3bbe65f 100644 --- a/youtube_dl/extractor/gamespot.py +++ b/youtube_dl/extractor/gamespot.py @@ -14,7 +14,7 @@ class GameSpotIE(InfoExtractor):          u"file": u"6410818.mp4",          u"md5": u"b2a30deaa8654fcccd43713a6b6a4825",          u"info_dict": { -            u"title": u"Arma III - Community Guide: SITREP I", +            u"title": u"Arma 3 - Community Guide: SITREP I",              u"upload_date": u"20130627",           }      } diff --git a/youtube_dl/extractor/kickstarter.py b/youtube_dl/extractor/kickstarter.py new file mode 100644 index 000000000..50bc883ef --- /dev/null +++ b/youtube_dl/extractor/kickstarter.py @@ -0,0 +1,37 @@ +import re + +from .common import InfoExtractor + + +class KickStarterIE(InfoExtractor): +    _VALID_URL = r'https?://www\.kickstarter\.com/projects/(?P<id>\d*)/.*' +    _TEST = { +        u"url": u"https://www.kickstarter.com/projects/1404461844/intersection-the-story-of-josh-grant?ref=home_location", +        u"file": u"1404461844.mp4", +        u"md5": u"c81addca81327ffa66c642b5d8b08cab", +        u"info_dict": { +            u"title": u"Intersection: The Story of Josh Grant by Kyle Cowling", +        }, +    } + +    def _real_extract(self, url): +        m = re.match(self._VALID_URL, url) +        video_id = m.group('id') +        webpage_src = self._download_webpage(url, video_id) + +        video_url = self._search_regex(r'data-video="(.*?)">', +            webpage_src, u'video URL') +        if 'mp4' in video_url: +            ext = 'mp4' +        else: +            ext = 'flv' +        video_title = self._html_search_regex(r"<title>(.*?)</title>", +            webpage_src, u'title').rpartition(u'\u2014 Kickstarter')[0].strip() + +        results = [{ +                    'id': video_id, +                    'url': video_url, +                    'title': video_title, +                    'ext': ext, +                    }] +        return results diff --git a/youtube_dl/extractor/ustream.py b/youtube_dl/extractor/ustream.py index 16cdcc765..f69b27d44 100644 --- a/youtube_dl/extractor/ustream.py +++ b/youtube_dl/extractor/ustream.py @@ -1,9 +1,11 @@ -from HTMLParser import HTMLParser  import json  import re -from urlparse import urljoin  from .common import InfoExtractor +from ..utils import ( +    compat_urlparse, +    compat_html_parser, +)  class UstreamIE(InfoExtractor): @@ -49,7 +51,7 @@ class UstreamIE(InfoExtractor):  # More robust than regular expressions -class ChannelParser(HTMLParser): +class ChannelParser(compat_html_parser.HTMLParser):      """      <meta name="ustream:channel_id" content="1234">      """ @@ -65,13 +67,13 @@ class ChannelParser(HTMLParser):          if value.isdigit():              self.channel_id = value -class SocialstreamParser(HTMLParser): +class SocialstreamParser(compat_html_parser.HTMLParser):      """      <li class="content123 video" data-content-id="123" data-length="1452"          data-href="/recorded/123" data-og-url="/recorded/123">      """      def __init__(self): -        HTMLParser.__init__(self) +        compat_html_parser.HTMLParser.__init__(self)          self.content_ids = []      def handle_starttag(self, tag, attrs): @@ -88,8 +90,6 @@ class UstreamChannelIE(InfoExtractor):      def _real_extract(self, url):          m = re.match(self._VALID_URL, url)          slug = m.group('slug') -        # Slugs can be non-ascii, but youtube-dl can't handle non-ascii command lines, -        # so if we got this far it's probably percent encoded and we needn't worry.          p = ChannelParser()          p.feed(self._download_webpage(url, slug)) @@ -100,16 +100,12 @@ class UstreamChannelIE(InfoExtractor):          BASE = 'http://www.ustream.tv'          next_url = '/ajax/socialstream/videos/%s/1.json' % channel_id          while next_url: -            reply = json.loads(self._download_webpage(urljoin(BASE, next_url), channel_id)) +            reply = json.loads(self._download_webpage(compat_urlparse.urljoin(BASE, next_url), channel_id))              p.feed(reply['data'])              next_url = reply['nextUrl']          p.close()          video_ids = p.content_ids -        # From YoutubeChannelIE - -        self._downloader.to_screen(u'[ustream] Channel %s: Found %i videos' % (channel_id, len(video_ids))) -          urls = ['http://www.ustream.tv/recorded/' + vid for vid in video_ids]          url_entries = [self.url_result(eurl, 'Ustream') for eurl in urls] -        return [self.playlist_result(url_entries, channel_id)] +        return self.playlist_result(url_entries, channel_id) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 2e0d70eaf..f49665925 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -434,7 +434,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):          elif len(s) == 83:              return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]          elif len(s) == 82: -            return s[1:19] + s[0] + s[20:68] + s[19] + s[69:82] +            return s[80:73:-1] + s[81] + s[72:54:-1] + s[2] + s[53:43:-1] + s[0] + s[42:2:-1] + s[43] + s[1] + s[54]          elif len(s) == 81:              return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]          elif len(s) == 80: | 
