diff options
| -rw-r--r-- | setup.py | 24 | ||||
| -rw-r--r-- | test/test_youtube_lists.py | 2 | ||||
| -rw-r--r-- | youtube_dl/YoutubeDL.py | 6 | ||||
| -rw-r--r-- | youtube_dl/__init__.py | 1 | ||||
| -rw-r--r-- | youtube_dl/downloader/__init__.py | 7 | ||||
| -rw-r--r-- | youtube_dl/downloader/common.py | 1 | ||||
| -rw-r--r-- | youtube_dl/downloader/http.py | 6 | ||||
| -rw-r--r-- | youtube_dl/downloader/mplayer.py | 4 | ||||
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 5 | ||||
| -rw-r--r-- | youtube_dl/extractor/common.py | 7 | ||||
| -rw-r--r-- | youtube_dl/extractor/condenast.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/freespeech.py | 37 | ||||
| -rw-r--r-- | youtube_dl/extractor/generic.py | 8 | ||||
| -rw-r--r-- | youtube_dl/extractor/hotnewhiphop.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/huffpost.py | 82 | ||||
| -rw-r--r-- | youtube_dl/extractor/la7.py | 62 | ||||
| -rw-r--r-- | youtube_dl/extractor/malemotion.py | 58 | ||||
| -rw-r--r-- | youtube_dl/extractor/rtlnow.py | 150 | ||||
| -rw-r--r-- | youtube_dl/extractor/tumblr.py | 24 | ||||
| -rw-r--r-- | youtube_dl/extractor/vimeo.py | 10 | ||||
| -rw-r--r-- | youtube_dl/extractor/websurg.py | 59 | ||||
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 19 | ||||
| -rw-r--r-- | youtube_dl/version.py | 2 | 
23 files changed, 398 insertions, 180 deletions
| @@ -3,7 +3,9 @@  from __future__ import print_function +import os.path  import pkg_resources +import warnings  import sys  try: @@ -44,12 +46,24 @@ py2exe_params = {  if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe':      params = py2exe_params  else: +    files_spec = [ +        ('etc/bash_completion.d', ['youtube-dl.bash-completion']), +        ('share/doc/youtube_dl', ['README.txt']), +        ('share/man/man1', ['youtube-dl.1']) +    ] +    root = os.path.dirname(os.path.abspath(__file__)) +    data_files = [] +    for dirname, files in files_spec: +        resfiles = [] +        for fn in files: +            if not os.path.exists(fn): +                warnings.warn('Skipping file %s since it is not present. Type  make  to build all automatically generated files.' % fn) +            else: +                resfiles.append(fn) +        data_files.append((dirname, resfiles)) +      params = { -        'data_files': [  # Installing system-wide would require sudo... -            ('etc/bash_completion.d', ['youtube-dl.bash-completion']), -            ('share/doc/youtube_dl', ['README.txt']), -            ('share/man/man1', ['youtube-dl.1']) -        ] +        'data_files': data_files,      }      if setuptools_available:          params['entry_points'] = {'console_scripts': ['youtube-dl = youtube_dl:main']} diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index d9fe5af4e..de157f657 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -120,7 +120,7 @@ class TestYoutubeLists(unittest.TestCase):      def test_youtube_toplist(self):          dl = FakeYDL()          ie = YoutubeTopListIE(dl) -        result = ie.extract('yttoplist:music:Top Tracks') +        result = ie.extract('yttoplist:music:Trending')          entries = result['entries']          self.assertTrue(len(entries) >= 5) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 1e94d8ac6..42cbcf699 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -331,7 +331,7 @@ class YoutubeDL(object):      def __exit__(self, *args):          self.restore_console_title() -     +          if self.params.get('cookiefile') is not None:              self.cookiejar.save() @@ -710,10 +710,10 @@ class YoutubeDL(object):          # TODO Central sorting goes here -        if formats[0] is not info_dict:  +        if formats[0] is not info_dict:              # only set the 'formats' fields if the original info_dict list them              # otherwise we end up with a circular reference, the first (and unique) -            # element in the 'formats' field in info_dict is info_dict itself,  +            # element in the 'formats' field in info_dict is info_dict itself,              # wich can't be exported to json              info_dict['formats'] = formats          if self.params.get('listformats', None): diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 294fccb44..08cf2f934 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -40,6 +40,7 @@ __authors__  = (      'Michael Orlitzky',      'Chris Gahan',      'Saimadhav Heblikar', +    'Mike Col',  )  __license__ = 'Public Domain' diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py index f19b490f1..aaa92bc75 100644 --- a/youtube_dl/downloader/__init__.py +++ b/youtube_dl/downloader/__init__.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals +  from .common import FileDownloader  from .hls import HlsFD  from .http import HttpFD @@ -8,16 +10,17 @@ from ..utils import (      determine_ext,  ) +  def get_suitable_downloader(info_dict):      """Get the downloader class that can handle the info dict."""      url = info_dict['url'] +    protocol = info_dict.get('protocol')      if url.startswith('rtmp'):          return RtmpFD -    if determine_ext(url) == u'm3u8': +    if (protocol == 'm3u8') or (protocol is None and determine_ext(url) == 'm3u8'):          return HlsFD      if url.startswith('mms') or url.startswith('rtsp'):          return MplayerFD      else:          return HttpFD - diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 10143d56a..5a068aa8b 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -314,4 +314,3 @@ class FileDownloader(object):          if the download is successful.          """          self._progress_hooks.append(ph) - diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 8407727ba..748f9f3ad 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -27,7 +27,7 @@ class HttpFD(FileDownloader):          request = compat_urllib_request.Request(url, None, headers)          if self.params.get('test', False): -            request.add_header('Range','bytes=0-10240') +            request.add_header('Range', 'bytes=0-10240')          # Establish possible resume length          if os.path.isfile(encodeFilename(tmpfilename)): @@ -39,7 +39,7 @@ class HttpFD(FileDownloader):          if resume_len != 0:              if self.params.get('continuedl', False):                  self.report_resuming_byte(resume_len) -                request.add_header('Range','bytes=%d-' % resume_len) +                request.add_header('Range', 'bytes=%d-' % resume_len)                  open_mode = 'ab'              else:                  resume_len = 0 @@ -100,7 +100,7 @@ class HttpFD(FileDownloader):          if data_len is not None:              data_len = int(data_len) + resume_len              min_data_len = self.params.get("min_filesize", None) -            max_data_len =  self.params.get("max_filesize", None) +            max_data_len = self.params.get("max_filesize", None)              if min_data_len is not None and data_len < min_data_len:                  self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))                  return False diff --git a/youtube_dl/downloader/mplayer.py b/youtube_dl/downloader/mplayer.py index 67e0e4189..4de7f15f4 100644 --- a/youtube_dl/downloader/mplayer.py +++ b/youtube_dl/downloader/mplayer.py @@ -18,10 +18,10 @@ class MplayerFD(FileDownloader):          try:              subprocess.call(['mplayer', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)          except (OSError, IOError): -            self.report_error(u'MMS or RTSP download detected but "%s" could not be run' % args[0] ) +            self.report_error(u'MMS or RTSP download detected but "%s" could not be run' % args[0])              return False -        # Download using mplayer.  +        # Download using mplayer.          retval = subprocess.call(args)          if retval == 0:              fsize = os.path.getsize(encodeFilename(tmpfilename)) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 80fc1f6ae..b8d635c30 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -73,6 +73,7 @@ from .francetv import (      CultureboxIE,  )  from .freesound import FreesoundIE +from .freespeech import FreespeechIE  from .funnyordie import FunnyOrDieIE  from .gamekings import GamekingsIE  from .gamespot import GameSpotIE @@ -83,6 +84,7 @@ from .googlesearch import GoogleSearchIE  from .hark import HarkIE  from .hotnewhiphop import HotNewHipHopIE  from .howcast import HowcastIE +from .huffpost import HuffPostIE  from .hypem import HypemIE  from .ign import IGNIE, OneUPIE  from .imdb import ( @@ -106,6 +108,7 @@ from .keezmovies import KeezMoviesIE  from .khanacademy import KhanAcademyIE  from .kickstarter import KickStarterIE  from .keek import KeekIE +from .la7 import LA7IE  from .liveleak import LiveLeakIE  from .livestream import LivestreamIE, LivestreamOriginalIE  from .lynda import ( @@ -113,6 +116,7 @@ from .lynda import (      LyndaCourseIE  )  from .macgamestore import MacGameStoreIE +from .malemotion import MalemotionIE  from .mdr import MDRIE  from .metacafe import MetacafeIE  from .metacritic import MetacriticIE @@ -219,7 +223,6 @@ from .vine import VineIE  from .viki import VikiIE  from .vk import VKIE  from .wat import WatIE -from .websurg import WeBSurgIE  from .weibo import WeiboIE  from .wimp import WimpIE  from .wistia import WistiaIE diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index aa48bd4e6..f7478d459 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -71,7 +71,7 @@ class InfoExtractor(object):                      * player_url SWF Player URL (used for rtmpdump).                      * protocol   The protocol that will be used for the actual                                   download, lower-case. -                                 "http", "https", "rtsp", "rtmp" or so. +                                 "http", "https", "rtsp", "rtmp", "m3u8" or so.                      * preference Order number of this format. If this field is                                   present and not None, the formats get sorted                                   by this field. @@ -240,7 +240,7 @@ class InfoExtractor(object):              except AttributeError:                  url = url_or_request              if len(url) > 200: -                h = u'___' + hashlib.md5(url).hexdigest() +                h = u'___' + hashlib.md5(url.encode('utf-8')).hexdigest()                  url = url[:200 - len(h)] + h              raw_filename = ('%s_%s.dump' % (video_id, url))              filename = sanitize_filename(raw_filename, restricted=True) @@ -466,6 +466,9 @@ class InfoExtractor(object):          return RATING_TABLE.get(rating.lower(), None)      def _sort_formats(self, formats): +        if not formats: +            raise ExtractorError(u'No video formats found') +          def _formats_key(f):              # TODO remove the following workaround              from ..utils import determine_ext diff --git a/youtube_dl/extractor/condenast.py b/youtube_dl/extractor/condenast.py index 03b75b80d..91c1c1348 100644 --- a/youtube_dl/extractor/condenast.py +++ b/youtube_dl/extractor/condenast.py @@ -30,7 +30,7 @@ class CondeNastIE(InfoExtractor):          'vanityfair': 'Vanity Fair',      } -    _VALID_URL = r'http://(video|www).(?P<site>%s).com/(?P<type>watch|series|video)/(?P<id>.+)' % '|'.join(_SITES.keys()) +    _VALID_URL = r'http://(video|www)\.(?P<site>%s)\.com/(?P<type>watch|series|video)/(?P<id>.+)' % '|'.join(_SITES.keys())      IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values()))      _TEST = { diff --git a/youtube_dl/extractor/freespeech.py b/youtube_dl/extractor/freespeech.py new file mode 100644 index 000000000..c210177f7 --- /dev/null +++ b/youtube_dl/extractor/freespeech.py @@ -0,0 +1,37 @@ +from __future__ import unicode_literals + +import re +import json + +from .common import InfoExtractor + + +class FreespeechIE(InfoExtractor): +    IE_NAME = 'freespeech.org' +    _VALID_URL = r'https://www\.freespeech\.org/video/(?P<title>.+)' +    _TEST = { +        'add_ie': ['Youtube'], +        'url': 'https://www.freespeech.org/video/obama-romney-campaign-colorado-ahead-debate-0', +        'info_dict': { +            'id': 'poKsVCZ64uU', +            'ext': 'mp4', +            'title': 'Obama, Romney Campaign in Colorado Ahead of Debate', +            'description': 'Obama, Romney Campaign in Colorado Ahead of Debate', +            'uploader': 'freespeechtv', +            'uploader_id': 'freespeechtv', +            'upload_date': '20121002', +        }, +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        title = mobj.group('title') +        webpage = self._download_webpage(url, title) +        info_json = self._search_regex(r'jQuery.extend\(Drupal.settings, ({.*?})\);', webpage, 'info') +        info = json.loads(info_json) + +        return { +            '_type': 'url', +            'url': info['jw_player']['basic_video_node_player']['file'], +            'ie_key': 'Youtube', +        } diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index e1933837d..829e5894f 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -332,10 +332,16 @@ class GenericIE(InfoExtractor):          # Look for embedded Facebook player          mobj = re.search( -            r'<iframe[^>]+?src=(["\'])(?P<url>https://www.facebook.com/video/embed.+?)\1', webpage) +            r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)          if mobj is not None:              return self.url_result(mobj.group('url'), 'Facebook') +        # Look for embedded Huffington Post player +        mobj = re.search( +            r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live.huffingtonpost\.com/.+?)\1', webpage) +        if mobj is not None: +            return self.url_result(mobj.group('url'), 'HuffPost') +          # Start with something easy: JW Player in SWFObject          mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)          if mobj is None: diff --git a/youtube_dl/extractor/hotnewhiphop.py b/youtube_dl/extractor/hotnewhiphop.py index a106f81d2..80b48b1b3 100644 --- a/youtube_dl/extractor/hotnewhiphop.py +++ b/youtube_dl/extractor/hotnewhiphop.py @@ -13,7 +13,7 @@ from ..utils import (  class HotNewHipHopIE(InfoExtractor): -    _VALID_URL = r'http://www\.hotnewhiphop.com/.*\.(?P<id>.*)\.html' +    _VALID_URL = r'http://www\.hotnewhiphop\.com/.*\.(?P<id>.*)\.html'      _TEST = {          'url': 'http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html',          'file': '1435540.mp3', diff --git a/youtube_dl/extractor/huffpost.py b/youtube_dl/extractor/huffpost.py new file mode 100644 index 000000000..0d1ea6802 --- /dev/null +++ b/youtube_dl/extractor/huffpost.py @@ -0,0 +1,82 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( +    parse_duration, +    unified_strdate, +) + + +class HuffPostIE(InfoExtractor): +    IE_DESC = 'Huffington Post' +    _VALID_URL = r'''(?x) +        https?://(embed\.)?live\.huffingtonpost\.com/ +        (?: +            r/segment/[^/]+/| +            HPLEmbedPlayer/\?segmentId= +        ) +        (?P<id>[0-9a-f]+)''' + +    _TEST = { +        'url': 'http://live.huffingtonpost.com/r/segment/legalese-it/52dd3e4b02a7602131000677', +        'file': '52dd3e4b02a7602131000677.mp4', +        'md5': '55f5e8981c1c80a64706a44b74833de8', +        'info_dict': { +            'title': 'Legalese It! with @MikeSacksHP', +            'description': 'This week on Legalese It, Mike talks to David Bosco about his new book on the ICC, "Rough Justice," he also discusses the Virginia AG\'s historic stance on gay marriage, the execution of Edgar Tamayo, the ICC\'s delay of Kenya\'s President and more.  ', +            'duration': 1549, +            'upload_date': '20140124', +        } +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') + +        api_url = 'http://embed.live.huffingtonpost.com/api/segments/%s.json' % video_id +        data = self._download_json(api_url, video_id)['data'] + +        video_title = data['title'] +        duration = parse_duration(data['running_time']) +        upload_date = unified_strdate(data['schedule']['starts_at']) +        description = data.get('description') + +        thumbnails = [] +        for url in data['images'].values(): +            m = re.match('.*-([0-9]+x[0-9]+)\.', url) +            if not m: +                continue +            thumbnails.append({ +                'url': url, +                'resolution': m.group(1), +            }) + +        formats = [{ +            'format': key, +            'format_id': key.replace('/', '.'), +            'ext': 'mp4', +            'url': url, +            'vcodec': 'none' if key.startswith('audio/') else None, +        } for key, url in data['sources']['live'].items()] +        if data.get('fivemin_id'): +            fid = data['fivemin_id'] +            fcat = str(int(fid) // 100 + 1) +            furl = 'http://avideos.5min.com/2/' + fcat[-3:] + '/' + fcat + '/' + fid + '.mp4' +            formats.append({ +                'format': 'fivemin', +                'url': furl, +                'preference': 1, +            }) +        self._sort_formats(formats) + +        return { +            'id': video_id, +            'title': video_title, +            'description': description, +            'formats': formats, +            'duration': duration, +            'upload_date': upload_date, +            'thumbnails': thumbnails, +        } diff --git a/youtube_dl/extractor/la7.py b/youtube_dl/extractor/la7.py new file mode 100644 index 000000000..6d61f9a90 --- /dev/null +++ b/youtube_dl/extractor/la7.py @@ -0,0 +1,62 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( +    parse_duration, +) + + +class LA7IE(InfoExtractor): +    IE_NAME = 'la7.tv' +    _VALID_URL = r'''(?x) +        https?://(?:www\.)?la7\.tv/ +        (?: +            richplayer/\?assetid=| +            \?contentId= +        ) +        (?P<id>[0-9]+)''' + +    _TEST = { +        'url': 'http://www.la7.tv/richplayer/?assetid=50355319', +        'file': '50355319.mp4', +        'md5': 'ec7d1f0224d20ba293ab56cf2259651f', +        'info_dict': { +            'title': 'IL DIVO', +            'description': 'Un film di Paolo Sorrentino con Toni Servillo, Anna Bonaiuto, Giulio Bosetti  e Flavio Bucci', +            'duration': 6254, +        } +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') + +        xml_url = 'http://www.la7.tv/repliche/content/index.php?contentId=%s' % video_id +        doc = self._download_xml(xml_url, video_id) + +        video_title = doc.find('title').text +        description = doc.find('description').text +        duration = parse_duration(doc.find('duration').text) +        thumbnail = doc.find('img').text +        view_count = int(doc.find('views').text) + +        prefix = doc.find('.//fqdn').text.strip().replace('auto:', 'http:') + +        formats = [{ +            'format': vnode.find('quality').text, +            'tbr': int(vnode.find('quality').text), +            'url': vnode.find('fms').text.strip().replace('mp4:', prefix), +        } for vnode in doc.findall('.//videos/video')] +        self._sort_formats(formats) + +        return { +            'id': video_id, +            'title': video_title, +            'description': description, +            'thumbnail': thumbnail, +            'duration': duration, +            'formats': formats, +            'view_count': view_count, +        } diff --git a/youtube_dl/extractor/malemotion.py b/youtube_dl/extractor/malemotion.py new file mode 100644 index 000000000..62e99091d --- /dev/null +++ b/youtube_dl/extractor/malemotion.py @@ -0,0 +1,58 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( +    compat_urllib_parse, +) + +class MalemotionIE(InfoExtractor): +    _VALID_URL = r'^(?:https?://)?malemotion\.com/video/(.+?)\.(?P<id>.+?)(#|$)' +    _TEST = { +        'url': 'http://malemotion.com/video/bien-dur.10ew', +        'file': '10ew.mp4', +        'md5': 'b3cc49f953b107e4a363cdff07d100ce', +        'info_dict': { +            "title": "Bien dur", +            "age_limit": 18, +        } +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group("id") + +        webpage = self._download_webpage(url, video_id) + +        self.report_extraction(video_id) + +        # Extract video URL +        video_url = compat_urllib_parse.unquote( +            self._search_regex(r'<source type="video/mp4" src="(.+?)"', webpage, 'video URL')) + +        # Extract title +        video_title = self._html_search_regex( +            r'<title>(.*?)</title', webpage, 'title') + +        # Extract video thumbnail +        video_thumbnail = self._search_regex( +            r'<video .+?poster="(.+?)"', webpage, 'thumbnail', fatal=False) + +        formats = [{ +            'url': video_url, +            'ext': 'mp4', +            'format_id': 'mp4', +            'preference': 1, +        }] + +        return { +            'id': video_id, +            'formats': formats, +            'uploader': None, +            'upload_date': None, +            'title': video_title, +            'thumbnail': video_thumbnail, +            'description': None, +            'age_limit': 18, +        } diff --git a/youtube_dl/extractor/rtlnow.py b/youtube_dl/extractor/rtlnow.py index a43d6ced5..cd50f708d 100644 --- a/youtube_dl/extractor/rtlnow.py +++ b/youtube_dl/extractor/rtlnow.py @@ -1,4 +1,7 @@  # encoding: utf-8 + +from __future__ import unicode_literals +  import re  from .common import InfoExtractor @@ -12,78 +15,77 @@ class RTLnowIE(InfoExtractor):      """Information Extractor for RTL NOW, RTL2 NOW, RTL NITRO, SUPER RTL NOW, VOX NOW and n-tv NOW"""      _VALID_URL = r'(?:http://)?(?P<url>(?P<domain>rtl-now\.rtl\.de|rtl2now\.rtl2\.de|(?:www\.)?voxnow\.de|(?:www\.)?rtlnitronow\.de|(?:www\.)?superrtlnow\.de|(?:www\.)?n-tvnow\.de)/+[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'      _TESTS = [{ -        u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1', -        u'file': u'90419.flv', -        u'info_dict': { -            u'upload_date': u'20070416', -            u'title': u'Ahornallee - Folge 1 - Der Einzug', -            u'description': u'Folge 1 - Der Einzug', +        'url': 'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1', +        'file': '90419.flv', +        'info_dict': { +            'upload_date': '20070416', +            'title': 'Ahornallee - Folge 1 - Der Einzug', +            'description': 'Folge 1 - Der Einzug',          }, -        u'params': { -            u'skip_download': True, +        'params': { +            'skip_download': True,          }, -        u'skip': u'Only works from Germany', +        'skip': 'Only works from Germany',      },      { -        u'url': u'http://rtl2now.rtl2.de/aerger-im-revier/episode-15-teil-1.php?film_id=69756&player=1&season=2&index=5', -        u'file': u'69756.flv', -        u'info_dict': { -            u'upload_date': u'20120519',  -            u'title': u'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit...', -            u'description': u'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit u.a.', -            u'thumbnail': u'http://autoimg.static-fra.de/rtl2now/219850/1500x1500/image2.jpg', +        'url': 'http://rtl2now.rtl2.de/aerger-im-revier/episode-15-teil-1.php?film_id=69756&player=1&season=2&index=5', +        'file': '69756.flv', +        'info_dict': { +            'upload_date': '20120519', +            'title': 'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit...', +            'description': 'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit u.a.', +            'thumbnail': 'http://autoimg.static-fra.de/rtl2now/219850/1500x1500/image2.jpg',          }, -        u'params': { -            u'skip_download': True, +        'params': { +            'skip_download': True,          }, -        u'skip': u'Only works from Germany', +        'skip': 'Only works from Germany',      },      { -        u'url': u'http://www.voxnow.de/voxtours/suedafrika-reporter-ii.php?film_id=13883&player=1&season=17', -        u'file': u'13883.flv', -        u'info_dict': { -            u'upload_date': u'20090627',  -            u'title': u'Voxtours - Südafrika-Reporter II', -            u'description': u'Südafrika-Reporter II', +        'url': 'http://www.voxnow.de/voxtours/suedafrika-reporter-ii.php?film_id=13883&player=1&season=17', +        'file': '13883.flv', +        'info_dict': { +            'upload_date': '20090627', +            'title': 'Voxtours - Südafrika-Reporter II', +            'description': 'Südafrika-Reporter II',          }, -        u'params': { -            u'skip_download': True, +        'params': { +            'skip_download': True,          },      },      { -        u'url': u'http://superrtlnow.de/medicopter-117/angst.php?film_id=99205&player=1', -        u'file': u'99205.flv', -        u'info_dict': { -            u'upload_date': u'20080928',  -            u'title': u'Medicopter 117 - Angst!', -            u'description': u'Angst!', -            u'thumbnail': u'http://autoimg.static-fra.de/superrtlnow/287529/1500x1500/image2.jpg' +        'url': 'http://superrtlnow.de/medicopter-117/angst.php?film_id=99205&player=1', +        'file': '99205.flv', +        'info_dict': { +            'upload_date': '20080928',  +            'title': 'Medicopter 117 - Angst!', +            'description': 'Angst!', +            'thumbnail': 'http://autoimg.static-fra.de/superrtlnow/287529/1500x1500/image2.jpg'          }, -        u'params': { -            u'skip_download': True, +        'params': { +            'skip_download': True,          },      },      { -        u'url': u'http://www.n-tvnow.de/top-gear/episode-1-2013-01-01-00-00-00.php?film_id=124903&player=1&season=10', -        u'file': u'124903.flv', -        u'info_dict': { -            u'upload_date': u'20130101',  -            u'title': u'Top Gear vom 01.01.2013', -            u'description': u'Episode 1', +        'url': 'http://www.n-tvnow.de/top-gear/episode-1-2013-01-01-00-00-00.php?film_id=124903&player=1&season=10', +        'file': '124903.flv', +        'info_dict': { +            'upload_date': '20130101', +            'title': 'Top Gear vom 01.01.2013', +            'description': 'Episode 1',          }, -        u'params': { -            u'skip_download': True, +        'params': { +            'skip_download': True,          }, -        u'skip': u'Only works from Germany', +        'skip': 'Only works from Germany',      }] - -    def _real_extract(self,url): +    def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url) -        webpage_url = u'http://' + mobj.group('url') -        video_page_url = u'http://' + mobj.group('domain') + u'/' -        video_id = mobj.group(u'video_id') +        webpage_url = 'http://' + mobj.group('url') +        video_page_url = 'http://' + mobj.group('domain') + '/' +        video_id = mobj.group('video_id')          webpage = self._download_webpage(webpage_url, video_id) @@ -94,51 +96,53 @@ class RTLnowIE(InfoExtractor):              msg = clean_html(note_m.group(1))              raise ExtractorError(msg) -        video_title = self._html_search_regex(r'<title>(?P<title>[^<]+?)( \| [^<]*)?</title>', -            webpage, u'title') -        playerdata_url = self._html_search_regex(r'\'playerdata\': \'(?P<playerdata_url>[^\']+)\'', -            webpage, u'playerdata_url') +        video_title = self._html_search_regex( +            r'<title>(?P<title>[^<]+?)( \| [^<]*)?</title>', +            webpage, 'title') +        playerdata_url = self._html_search_regex( +            r'\'playerdata\': \'(?P<playerdata_url>[^\']+)\'', +            webpage, 'playerdata_url')          playerdata = self._download_webpage(playerdata_url, video_id)          mobj = re.search(r'<title><!\[CDATA\[(?P<description>.+?)(?:\s+- (?:Sendung )?vom (?P<upload_date_d>[0-9]{2})\.(?P<upload_date_m>[0-9]{2})\.(?:(?P<upload_date_Y>[0-9]{4})|(?P<upload_date_y>[0-9]{2})) [0-9]{2}:[0-9]{2} Uhr)?\]\]></title>', playerdata)          if mobj: -            video_description = mobj.group(u'description') +            video_description = mobj.group('description')              if mobj.group('upload_date_Y'):                  video_upload_date = mobj.group('upload_date_Y')              elif mobj.group('upload_date_y'): -                video_upload_date = u'20' + mobj.group('upload_date_y') +                video_upload_date = '20' + mobj.group('upload_date_y')              else:                  video_upload_date = None              if video_upload_date: -                video_upload_date += mobj.group('upload_date_m')+mobj.group('upload_date_d') +                video_upload_date += mobj.group('upload_date_m') + mobj.group('upload_date_d')          else:              video_description = None              video_upload_date = None -            self._downloader.report_warning(u'Unable to extract description and upload date') +            self._downloader.report_warning('Unable to extract description and upload date')          # Thumbnail: not every video has an thumbnail          mobj = re.search(r'<meta property="og:image" content="(?P<thumbnail>[^"]+)">', webpage)          if mobj: -            video_thumbnail = mobj.group(u'thumbnail') +            video_thumbnail = mobj.group('thumbnail')          else:              video_thumbnail = None          mobj = re.search(r'<filename [^>]+><!\[CDATA\[(?P<url>rtmpe://(?:[^/]+/){2})(?P<play_path>[^\]]+)\]\]></filename>', playerdata)          if mobj is None: -            raise ExtractorError(u'Unable to extract media URL') -        video_url = mobj.group(u'url') -        video_play_path = u'mp4:' + mobj.group(u'play_path') -        video_player_url = video_page_url + u'includes/vodplayer.swf' +            raise ExtractorError('Unable to extract media URL') +        video_url = mobj.group('url') +        video_play_path = 'mp4:' + mobj.group('play_path') +        video_player_url = video_page_url + 'includes/vodplayer.swf' -        return [{ -            'id':          video_id, -            'url':         video_url, -            'play_path':   video_play_path, -            'page_url':    video_page_url, -            'player_url':  video_player_url, -            'ext':         'flv', -            'title':       video_title, +        return { +            'id': video_id, +            'url': video_url, +            'play_path': video_play_path, +            'page_url': video_page_url, +            'player_url': video_player_url, +            'ext': 'flv', +            'title': video_title,              'description': video_description,              'upload_date': video_upload_date, -            'thumbnail':   video_thumbnail, -        }] +            'thumbnail': video_thumbnail, +        } diff --git a/youtube_dl/extractor/tumblr.py b/youtube_dl/extractor/tumblr.py index ad5840ca2..f7bc77c48 100644 --- a/youtube_dl/extractor/tumblr.py +++ b/youtube_dl/extractor/tumblr.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals +  import re  from .common import InfoExtractor @@ -9,11 +11,11 @@ from ..utils import (  class TumblrIE(InfoExtractor):      _VALID_URL = r'http://(?P<blog_name>.*?)\.tumblr\.com/((post)|(video))/(?P<id>\d*)/(.*?)'      _TEST = { -        u'url': u'http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes', -        u'file': u'54196191430.mp4', -        u'md5': u'479bb068e5b16462f5176a6828829767', -        u'info_dict': { -            u"title": u"tatiana maslany news" +        'url': 'http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes', +        'file': '54196191430.mp4', +        'md5': '479bb068e5b16462f5176a6828829767', +        'info_dict': { +            "title": "tatiana maslany news"          }      } @@ -28,18 +30,20 @@ class TumblrIE(InfoExtractor):          re_video = r'src=\\x22(?P<video_url>http://%s\.tumblr\.com/video_file/%s/(.*?))\\x22 type=\\x22video/(?P<ext>.*?)\\x22' % (blog, video_id)          video = re.search(re_video, webpage)          if video is None: -           raise ExtractorError(u'Unable to extract video') +            raise ExtractorError('Unable to extract video')          video_url = video.group('video_url')          ext = video.group('ext') -        video_thumbnail = self._search_regex(r'posters(.*?)\[\\x22(?P<thumb>.*?)\\x22', -            webpage, u'thumbnail', fatal=False)  # We pick the first poster -        if video_thumbnail: video_thumbnail = video_thumbnail.replace('\\', '') +        video_thumbnail = self._search_regex( +            r'posters.*?\[\\x22(.*?)\\x22', +            webpage, 'thumbnail', fatal=False)  # We pick the first poster +        if video_thumbnail: +            video_thumbnail = video_thumbnail.replace('\\\\/', '/')          # The only place where you can get a title, it's not complete,          # but searching in other places doesn't work for all videos          video_title = self._html_search_regex(r'<title>(?P<title>.*?)(?: \| Tumblr)?</title>', -            webpage, u'title', flags=re.DOTALL) +            webpage, 'title', flags=re.DOTALL)          return [{'id': video_id,                   'url': video_url, diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 193675549..a50170ce7 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -291,7 +291,7 @@ class VimeoIE(InfoExtractor):  class VimeoChannelIE(InfoExtractor):      IE_NAME = 'vimeo:channel' -    _VALID_URL = r'(?:https?://)?vimeo.\com/channels/(?P<id>[^/]+)' +    _VALID_URL = r'(?:https?://)?vimeo\.com/channels/(?P<id>[^/]+)'      _MORE_PAGES_INDICATOR = r'<a.+?rel="next"'      _TITLE_RE = r'<link rel="alternate"[^>]+?title="(.*?)"' @@ -327,7 +327,7 @@ class VimeoChannelIE(InfoExtractor):  class VimeoUserIE(VimeoChannelIE):      IE_NAME = 'vimeo:user' -    _VALID_URL = r'(?:https?://)?vimeo.\com/(?P<name>[^/]+)(?:/videos|[#?]|$)' +    _VALID_URL = r'(?:https?://)?vimeo\.com/(?P<name>[^/]+)(?:/videos|[#?]|$)'      _TITLE_RE = r'<a[^>]+?class="user">([^<>]+?)</a>'      @classmethod @@ -344,7 +344,7 @@ class VimeoUserIE(VimeoChannelIE):  class VimeoAlbumIE(VimeoChannelIE):      IE_NAME = 'vimeo:album' -    _VALID_URL = r'(?:https?://)?vimeo.\com/album/(?P<id>\d+)' +    _VALID_URL = r'(?:https?://)?vimeo\.com/album/(?P<id>\d+)'      _TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>'      def _page_url(self, base_url, pagenum): @@ -358,7 +358,7 @@ class VimeoAlbumIE(VimeoChannelIE):  class VimeoGroupsIE(VimeoAlbumIE):      IE_NAME = 'vimeo:group' -    _VALID_URL = r'(?:https?://)?vimeo.\com/groups/(?P<name>[^/]+)' +    _VALID_URL = r'(?:https?://)?vimeo\.com/groups/(?P<name>[^/]+)'      def _extract_list_title(self, webpage):          return self._og_search_title(webpage) @@ -372,7 +372,7 @@ class VimeoGroupsIE(VimeoAlbumIE):  class VimeoReviewIE(InfoExtractor):      IE_NAME = 'vimeo:review'      IE_DESC = 'Review pages on vimeo' -    _VALID_URL = r'(?:https?://)?vimeo.\com/[^/]+/review/(?P<id>[^/]+)' +    _VALID_URL = r'(?:https?://)?vimeo\.com/[^/]+/review/(?P<id>[^/]+)'      _TEST = {          'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d',          'file': '75524534.mp4', diff --git a/youtube_dl/extractor/websurg.py b/youtube_dl/extractor/websurg.py deleted file mode 100644 index 43953bfdd..000000000 --- a/youtube_dl/extractor/websurg.py +++ /dev/null @@ -1,59 +0,0 @@ -# coding: utf-8 - -import re - -from ..utils import ( -    compat_urllib_request, -    compat_urllib_parse -) - -from .common import InfoExtractor - -class WeBSurgIE(InfoExtractor): -    IE_NAME = u'websurg.com' -    _VALID_URL = r'http://.*?\.websurg\.com/MEDIA/\?noheader=1&doi=(.*)' - -    _TEST = { -        u'url': u'http://www.websurg.com/MEDIA/?noheader=1&doi=vd01en4012', -        u'file': u'vd01en4012.mp4', -        u'params': { -            u'skip_download': True, -        }, -        u'skip': u'Requires login information', -    } -     -    _LOGIN_URL = 'http://www.websurg.com/inc/login/login_div.ajax.php?login=1' - -    def _real_initialize(self): - -        login_form = { -            'username': self._downloader.params['username'], -            'password': self._downloader.params['password'], -            'Submit': 1 -        } -         -        request = compat_urllib_request.Request( -            self._LOGIN_URL, compat_urllib_parse.urlencode(login_form)) -        request.add_header( -            'Content-Type', 'application/x-www-form-urlencoded;charset=utf-8') -        compat_urllib_request.urlopen(request).info() -        webpage = self._download_webpage(self._LOGIN_URL, '', 'Logging in') -         -        if webpage != 'OK': -            self._downloader.report_error( -                u'Unable to log in: bad username/password') -         -    def _real_extract(self, url): -        video_id = re.match(self._VALID_URL, url).group(1) -         -        webpage = self._download_webpage(url, video_id) -         -        url_info = re.search(r'streamer="(.*?)" src="(.*?)"', webpage) -         -        return {'id': video_id, -                'title': self._og_search_title(webpage), -                'description': self._og_search_description(webpage), -                'ext' : 'mp4', -                'url' : url_info.group(1) + '/' + url_info.group(2), -                'thumbnail': self._og_search_thumbnail(webpage) -                } diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 1bc2dc22b..87a5a452e 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -40,7 +40,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):      """Provide base functions for Youtube extractors"""      _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'      _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' -    _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' +    _AGE_URL = 'https://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'      _NETRC_MACHINE = 'youtube'      # If True it will raise an error if no login info is provided      _LOGIN_REQUIRED = False @@ -111,7 +111,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):              'next_url': '/',              'action_confirm': 'Confirm',          } -        req = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form)) +        req = compat_urllib_request.Request(self._AGE_URL, +            compat_urllib_parse.urlencode(age_form).encode('ascii'))          self._download_webpage(              req, None, @@ -1014,7 +1015,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):      def _get_available_subtitles(self, video_id, webpage):          try:              sub_list = self._download_webpage( -                'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id, +                'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,                  video_id, note=False)          except ExtractorError as err:              self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err)) @@ -1030,7 +1031,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):                  'fmt': self._downloader.params.get('subtitlesformat', 'srt'),                  'name': unescapeHTML(l[0]).encode('utf-8'),              }) -            url = u'http://www.youtube.com/api/timedtext?' + params +            url = u'https://www.youtube.com/api/timedtext?' + params              sub_lang_list[lang] = url          if not sub_lang_list:              self._downloader.report_warning(u'video doesn\'t have subtitles') @@ -1529,7 +1530,7 @@ class YoutubeTopListIE(YoutubePlaylistIE):          channel = mobj.group('chann')          title = mobj.group('title')          query = compat_urllib_parse.urlencode({'title': title}) -        playlist_re = 'href="([^"]+?%s[^"]+?)"' % re.escape(query) +        playlist_re = 'href="([^"]+?%s.*?)"' % re.escape(query)          channel_page = self._download_webpage('https://www.youtube.com/%s' % channel, title)          link = self._html_search_regex(playlist_re, channel_page, u'list')          url = compat_urlparse.urljoin('https://www.youtube.com/', link) @@ -1554,7 +1555,7 @@ class YoutubeChannelIE(InfoExtractor):      IE_DESC = u'YouTube.com channels'      _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"      _MORE_PAGES_INDICATOR = 'yt-uix-load-more' -    _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s' +    _MORE_PAGES_URL = 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'      IE_NAME = u'youtube:channel'      def extract_videos_from_page(self, page): @@ -1610,9 +1611,9 @@ class YoutubeChannelIE(InfoExtractor):  class YoutubeUserIE(InfoExtractor):      IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'      _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)' -    _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s' +    _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s'      _GDATA_PAGE_SIZE = 50 -    _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json' +    _GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'      IE_NAME = u'youtube:user'      @classmethod @@ -1743,7 +1744,7 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):          action = 'action_load_system_feed'          if self._PERSONAL_FEED:              action = 'action_load_personal_feed' -        return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME) +        return 'https://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)      @property      def IE_NAME(self): diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 0701961a5..dd3c37007 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.01.23.4' +__version__ = '2014.01.27.1' | 
