diff options
| -rw-r--r-- | youtube_dl/downloader/hls.py | 2 | ||||
| -rw-r--r-- | youtube_dl/downloader/rtmp.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 3 | ||||
| -rw-r--r-- | youtube_dl/extractor/ard.py | 11 | ||||
| -rw-r--r-- | youtube_dl/extractor/blinkx.py | 14 | ||||
| -rw-r--r-- | youtube_dl/extractor/brightcove.py | 50 | ||||
| -rw-r--r-- | youtube_dl/extractor/dreisat.py | 30 | ||||
| -rw-r--r-- | youtube_dl/extractor/generic.py | 36 | ||||
| -rw-r--r-- | youtube_dl/extractor/hypem.py | 69 | ||||
| -rw-r--r-- | youtube_dl/extractor/livestream.py | 63 | ||||
| -rw-r--r-- | youtube_dl/extractor/ndtv.py | 46 | ||||
| -rw-r--r-- | youtube_dl/extractor/nrk.py | 8 | ||||
| -rw-r--r-- | youtube_dl/extractor/nuvid.py | 53 | ||||
| -rw-r--r-- | youtube_dl/extractor/rai.py | 121 | ||||
| -rw-r--r-- | youtube_dl/extractor/teachertube.py | 4 | ||||
| -rw-r--r-- | youtube_dl/extractor/ted.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/tube8.py | 5 | ||||
| -rw-r--r-- | youtube_dl/extractor/veoh.py | 7 | ||||
| -rw-r--r-- | youtube_dl/extractor/vulture.py | 69 | ||||
| -rw-r--r-- | youtube_dl/extractor/wrzuta.py | 81 | ||||
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 4 | ||||
| -rw-r--r-- | youtube_dl/version.py | 2 | 
22 files changed, 533 insertions, 149 deletions
| diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 9d407fe6e..9f29e2f81 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -25,7 +25,7 @@ class HlsFD(FileDownloader):              except (OSError, IOError):                  pass          else: -            self.report_error(u'm3u8 download detected but ffmpeg or avconv could not be found') +            self.report_error(u'm3u8 download detected but ffmpeg or avconv could not be found. Please install one.')          cmd = [program] + args          retval = subprocess.call(cmd) diff --git a/youtube_dl/downloader/rtmp.py b/youtube_dl/downloader/rtmp.py index cc6a84106..68646709a 100644 --- a/youtube_dl/downloader/rtmp.py +++ b/youtube_dl/downloader/rtmp.py @@ -106,7 +106,7 @@ class RtmpFD(FileDownloader):          try:              subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)          except (OSError, IOError): -            self.report_error('RTMP download detected but "rtmpdump" could not be run') +            self.report_error('RTMP download detected but "rtmpdump" could not be run. Please install it.')              return False          # Download using rtmpdump. rtmpdump returns exit code 2 when diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 6c9a7593a..dcf64d034 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -217,6 +217,7 @@ from .pornotube import PornotubeIE  from .prosiebensat1 import ProSiebenSat1IE  from .pyvideo import PyvideoIE  from .radiofrance import RadioFranceIE +from .rai import RaiIE  from .rbmaradio import RBMARadioIE  from .redtube import RedTubeIE  from .ringtv import RingTVIE @@ -333,6 +334,7 @@ from .viki import VikiIE  from .vk import VKIE  from .vube import VubeIE  from .vuclip import VuClipIE +from .vulture import VultureIE  from .washingtonpost import WashingtonPostIE  from .wat import WatIE  from .wdr import ( @@ -344,6 +346,7 @@ from .weibo import WeiboIE  from .wimp import WimpIE  from .wistia import WistiaIE  from .worldstarhiphop import WorldStarHipHopIE +from .wrzuta import WrzutaIE  from .xbef import XBefIE  from .xhamster import XHamsterIE  from .xnxx import XNXXIE diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index c6d22c029..b36a4d46a 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -56,7 +56,18 @@ class ARDIE(InfoExtractor):                  raise ExtractorError('This video is only available after 20:00')          formats = [] +          for s in streams: +            if type(s['_stream']) == list: +                for index, url in enumerate(s['_stream'][::-1]): +                    quality = s['_quality'] + index +                    formats.append({ +                        'quality': quality, +                        'url': url, +                        'format_id': '%s-%s' % (determine_ext(url), quality) +                        }) +                continue +              format = {                  'quality': s['_quality'],                  'url': s['_stream'], diff --git a/youtube_dl/extractor/blinkx.py b/youtube_dl/extractor/blinkx.py index 38ccd957f..7d558e262 100644 --- a/youtube_dl/extractor/blinkx.py +++ b/youtube_dl/extractor/blinkx.py @@ -4,9 +4,7 @@ import json  import re  from .common import InfoExtractor -from ..utils import ( -    remove_start, -) +from ..utils import remove_start  class BlinkxIE(InfoExtractor): @@ -15,9 +13,10 @@ class BlinkxIE(InfoExtractor):      _TEST = {          'url': 'http://www.blinkx.com/ce/8aQUy7GVFYgFzpKhT0oqsilwOGFRVXk3R1ZGWWdGenBLaFQwb3FzaWx3OGFRVXk3R1ZGWWdGenB', -        'file': '8aQUy7GV.mp4',          'md5': '2e9a07364af40163a908edbf10bb2492',          'info_dict': { +            'id': '8aQUy7GV', +            'ext': 'mp4',              'title': 'Police Car Rolls Away',              'uploader': 'stupidvideos.com',              'upload_date': '20131215', @@ -27,6 +26,7 @@ class BlinkxIE(InfoExtractor):              'thumbnails': [{                  'width': 100,                  'height': 76, +                'resolution': '100x76',                  'url': 'http://cdn.blinkx.com/stream/b/41/StupidVideos/20131215/1873969261/1873969261_tn_0.jpg',              }],          }, @@ -37,7 +37,7 @@ class BlinkxIE(InfoExtractor):          video_id = m.group('id')          display_id = video_id[:8] -        api_url = (u'https://apib4.blinkx.com/api.php?action=play_video&' + +        api_url = ('https://apib4.blinkx.com/api.php?action=play_video&' +                     'video=%s' % video_id)          data_json = self._download_webpage(api_url, display_id)          data = json.loads(data_json)['api']['results'][0] @@ -55,13 +55,13 @@ class BlinkxIE(InfoExtractor):                  duration = m['d']              elif m['type'] == 'youtube':                  yt_id = m['link'] -                self.to_screen(u'Youtube video detected: %s' % yt_id) +                self.to_screen('Youtube video detected: %s' % yt_id)                  return self.url_result(yt_id, 'Youtube', video_id=yt_id)              elif m['type'] in ('flv', 'mp4'):                  vcodec = remove_start(m['vcodec'], 'ff')                  acodec = remove_start(m['acodec'], 'ff')                  tbr = (int(m['vbr']) + int(m['abr'])) // 1000 -                format_id = u'%s-%sk-%s' % (vcodec, tbr, m['w']) +                format_id = '%s-%sk-%s' % (vcodec, tbr, m['w'])                  formats.append({                      'format_id': format_id,                      'url': m['link'], diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 3c02c297a..419951b62 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -15,6 +15,7 @@ from ..utils import (      compat_urllib_request,      compat_parse_qs, +    determine_ext,      ExtractorError,      unsmuggle_url,      unescapeHTML, @@ -29,10 +30,11 @@ class BrightcoveIE(InfoExtractor):          {              # From http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/              'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1654948606001&flashID=myExperience&%40videoPlayer=2371591881001', -            'file': '2371591881001.mp4',              'md5': '5423e113865d26e40624dce2e4b45d95',              'note': 'Test Brightcove downloads and detection in GenericIE',              'info_dict': { +                'id': '2371591881001', +                'ext': 'mp4',                  'title': 'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”',                  'uploader': '8TV',                  'description': 'md5:a950cc4285c43e44d763d036710cd9cd', @@ -41,8 +43,9 @@ class BrightcoveIE(InfoExtractor):          {              # From http://medianetwork.oracle.com/video/player/1785452137001              'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1217746023001&flashID=myPlayer&%40videoPlayer=1785452137001', -            'file': '1785452137001.flv',              'info_dict': { +                'id': '1785452137001', +                'ext': 'flv',                  'title': 'JVMLS 2012: Arrays 2.0 - Opportunities and Challenges',                  'description': 'John Rose speaks at the JVM Language Summit, August 1, 2012.',                  'uploader': 'Oracle', @@ -70,7 +73,20 @@ class BrightcoveIE(InfoExtractor):                  'description': 'md5:363109c02998fee92ec02211bd8000df',                  'uploader': 'National Ballet of Canada',              }, -        } +        }, +        { +            # test flv videos served by akamaihd.net +            # From http://www.redbull.com/en/bike/stories/1331655643987/replay-uci-dh-world-cup-2014-from-fort-william +            'url': 'http://c.brightcove.com/services/viewer/htmlFederated?%40videoPlayer=ref%3ABC2996102916001&linkBaseURL=http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fvideos%2F1331655630249%2Freplay-uci-fort-william-2014-dh&playerKey=AQ%7E%7E%2CAAAApYJ7UqE%7E%2Cxqr_zXk0I-zzNndy8NlHogrCb5QdyZRf&playerID=1398061561001#__youtubedl_smuggle=%7B%22Referer%22%3A+%22http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fstories%2F1331655643987%2Freplay-uci-dh-world-cup-2014-from-fort-william%22%7D', +            # The md5 checksum changes on each download +            'info_dict': { +                'id': '2996102916001', +                'ext': 'flv', +                'title': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals', +                'uploader': 'Red Bull TV', +                'description': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals', +            }, +        },      ]      @classmethod @@ -187,7 +203,7 @@ class BrightcoveIE(InfoExtractor):          webpage = self._download_webpage(req, video_id)          self.report_extraction(video_id) -        info = self._search_regex(r'var experienceJSON = ({.*?});', webpage, 'json') +        info = self._search_regex(r'var experienceJSON = ({.*});', webpage, 'json')          info = json.loads(info)['data']          video_info = info['programmedContent']['videoPlayer']['mediaDTO']          video_info['_youtubedl_adServerURL'] = info.get('adServerURL') @@ -219,12 +235,26 @@ class BrightcoveIE(InfoExtractor):          renditions = video_info.get('renditions')          if renditions: -            renditions = sorted(renditions, key=lambda r: r['size']) -            info['formats'] = [{ -                'url': rend['defaultURL'], -                'height': rend.get('frameHeight'), -                'width': rend.get('frameWidth'), -            } for rend in renditions] +            formats = [] +            for rend in renditions: +                url = rend['defaultURL'] +                if rend['remote']: +                    # This type of renditions are served through akamaihd.net, +                    # but they don't use f4m manifests +                    url = url.replace('control/', '') + '?&v=3.3.0&fp=13&r=FEEFJ&g=RTSJIMBMPFPB' +                    ext = 'flv' +                else: +                    ext = determine_ext(url) +                size = rend.get('size') +                formats.append({ +                    'url': url, +                    'ext': ext, +                    'height': rend.get('frameHeight'), +                    'width': rend.get('frameWidth'), +                    'filesize': size if size != 0 else None, +                }) +            self._sort_formats(formats) +            info['formats'] = formats          elif video_info.get('FLVFullLengthURL') is not None:              info.update({                  'url': video_info['FLVFullLengthURL'], diff --git a/youtube_dl/extractor/dreisat.py b/youtube_dl/extractor/dreisat.py index 0b11d1f10..69ca75423 100644 --- a/youtube_dl/extractor/dreisat.py +++ b/youtube_dl/extractor/dreisat.py @@ -1,39 +1,37 @@ -# coding: utf-8 +from __future__ import unicode_literals  import re  from .common import InfoExtractor -from ..utils import ( -    unified_strdate, -) +from ..utils import unified_strdate  class DreiSatIE(InfoExtractor):      IE_NAME = '3sat'      _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'      _TEST = { -        u"url": u"http://www.3sat.de/mediathek/index.php?obj=36983", -        u'file': u'36983.mp4', -        u'md5': u'9dcfe344732808dbfcc901537973c922', -        u'info_dict': { -            u"title": u"Kaffeeland Schweiz", -            u"description": u"Über 80 Kaffeeröstereien liefern in der Schweiz das Getränk, in das das Land so vernarrt ist: Mehr als 1000 Tassen trinkt ein Schweizer pro Jahr. SCHWEIZWEIT nimmt die Kaffeekultur unter die...",  -            u"uploader": u"3sat", -            u"upload_date": u"20130622" +        'url': 'http://www.3sat.de/mediathek/index.php?obj=36983', +        'md5': '9dcfe344732808dbfcc901537973c922', +        'info_dict': { +            'id': '36983', +            'ext': 'mp4', +            'title': 'Kaffeeland Schweiz', +            'description': 'md5:cc4424b18b75ae9948b13929a0814033', +            'uploader': '3sat', +            'upload_date': '20130622'          }      } -      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url)          video_id = mobj.group('id')          details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id -        details_doc = self._download_xml(details_url, video_id, note=u'Downloading video details') +        details_doc = self._download_xml(details_url, video_id, 'Downloading video details')          thumbnail_els = details_doc.findall('.//teaserimage')          thumbnails = [{ -            'width': te.attrib['key'].partition('x')[0], -            'height': te.attrib['key'].partition('x')[2], +            'width': int(te.attrib['key'].partition('x')[0]), +            'height': int(te.attrib['key'].partition('x')[2]),              'url': te.text,          } for te in thumbnail_els] diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 38a357d3b..3105b47ab 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -260,7 +260,24 @@ class GenericIE(InfoExtractor):                  'uploader': 'Spi0n',              },              'add_ie': ['Dailymotion'], -        } +        }, +        # YouTube embed +        { +            'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html', +            'info_dict': { +                'id': 'FXRb4ykk4S0', +                'ext': 'mp4', +                'title': 'The NBL Auction 2014', +                'uploader': 'BADMINTON England', +                'uploader_id': 'BADMINTONEvents', +                'upload_date': '20140603', +                'description': 'md5:9ef128a69f1e262a700ed83edb163a73', +            }, +            'add_ie': ['Youtube'], +            'params': { +                'skip_download': True, +            } +        },      ]      def report_download_webpage(self, video_id): @@ -478,8 +495,13 @@ class GenericIE(InfoExtractor):          # Look for embedded YouTube player          matches = re.findall(r'''(?x) -            (?:<iframe[^>]+?src=|embedSWF\(\s*) -            (["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube\.com/ +            (?: +                <iframe[^>]+?src=| +                <embed[^>]+?src=| +                embedSWF\(?:\s* +            ) +            (["\']) +                (?P<url>(?:https?:)?//(?:www\.)?youtube\.com/                  (?:embed|v)/.+?)              \1''', webpage)          if matches: @@ -646,6 +668,14 @@ class GenericIE(InfoExtractor):              url = unescapeHTML(mobj.group('url'))              return self.url_result(url) +        # Look for embedded vulture.com player +        mobj = re.search( +            r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"', +            webpage) +        if mobj is not None: +            url = unescapeHTML(mobj.group('url')) +            return self.url_result(url, ie='Vulture') +          # Start with something easy: JW Player in SWFObject          found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)          if not found: diff --git a/youtube_dl/extractor/hypem.py b/youtube_dl/extractor/hypem.py index 9bd06e7c7..6d0d847c6 100644 --- a/youtube_dl/extractor/hypem.py +++ b/youtube_dl/extractor/hypem.py @@ -1,10 +1,11 @@ +from __future__ import unicode_literals +  import json  import re  import time  from .common import InfoExtractor  from ..utils import ( -    compat_str,      compat_urllib_parse,      compat_urllib_request, @@ -13,59 +14,55 @@ from ..utils import (  class HypemIE(InfoExtractor): -    """Information Extractor for hypem""" -    _VALID_URL = r'(?:http://)?(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)' +    _VALID_URL = r'http://(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)'      _TEST = { -        u'url': u'http://hypem.com/track/1v6ga/BODYWORK+-+TAME', -        u'file': u'1v6ga.mp3', -        u'md5': u'b9cc91b5af8995e9f0c1cee04c575828', -        u'info_dict': { -            u"title": u"Tame" +        'url': 'http://hypem.com/track/1v6ga/BODYWORK+-+TAME', +        'md5': 'b9cc91b5af8995e9f0c1cee04c575828', +        'info_dict': { +            'id': '1v6ga', +            'ext': 'mp3', +            'title': 'Tame', +            'uploader': 'BODYWORK',          }      }      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url) -        if mobj is None: -            raise ExtractorError(u'Invalid URL: %s' % url)          track_id = mobj.group(1)          data = {'ax': 1, 'ts': time.time()}          data_encoded = compat_urllib_parse.urlencode(data)          complete_url = url + "?" + data_encoded          request = compat_urllib_request.Request(complete_url) -        response, urlh = self._download_webpage_handle(request, track_id, u'Downloading webpage with the url') +        response, urlh = self._download_webpage_handle( +            request, track_id, 'Downloading webpage with the url')          cookie = urlh.headers.get('Set-Cookie', '') -        self.report_extraction(track_id) - -        html_tracks = self._html_search_regex(r'<script type="application/json" id="displayList-data">(.*?)</script>', -            response, u'tracks', flags=re.MULTILINE|re.DOTALL).strip() +        html_tracks = self._html_search_regex( +            r'(?ms)<script type="application/json" id="displayList-data">\s*(.*?)\s*</script>', +            response, 'tracks')          try:              track_list = json.loads(html_tracks) -            track = track_list[u'tracks'][0] +            track = track_list['tracks'][0]          except ValueError: -            raise ExtractorError(u'Hypemachine contained invalid JSON.') +            raise ExtractorError('Hypemachine contained invalid JSON.') -        key = track[u"key"] -        track_id = track[u"id"] -        artist = track[u"artist"] -        title = track[u"song"] +        key = track['key'] +        track_id = track['id'] +        artist = track['artist'] +        title = track['song'] -        serve_url = "http://hypem.com/serve/source/%s/%s" % (compat_str(track_id), compat_str(key)) -        request = compat_urllib_request.Request(serve_url, "" , {'Content-Type': 'application/json'}) +        serve_url = "http://hypem.com/serve/source/%s/%s" % (track_id, key) +        request = compat_urllib_request.Request( +            serve_url, '', {'Content-Type': 'application/json'})          request.add_header('cookie', cookie) -        song_data_json = self._download_webpage(request, track_id, u'Downloading metadata') -        try: -            song_data = json.loads(song_data_json) -        except ValueError: -            raise ExtractorError(u'Hypemachine contained invalid JSON.') -        final_url = song_data[u"url"] +        song_data = self._download_json(request, track_id, 'Downloading metadata') +        final_url = song_data["url"] -        return [{ -            'id':       track_id, -            'url':      final_url, -            'ext':      "mp3", -            'title':    title, -            'artist':   artist, -        }] +        return { +            'id': track_id, +            'url': final_url, +            'ext': 'mp3', +            'title': title, +            'uploader': artist, +        } diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py index 1dcd1fb2d..5c71f4f09 100644 --- a/youtube_dl/extractor/livestream.py +++ b/youtube_dl/extractor/livestream.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals +  import re  import json @@ -6,31 +8,34 @@ from ..utils import (      compat_urllib_parse_urlparse,      compat_urlparse,      xpath_with_ns, +    compat_str,  )  class LivestreamIE(InfoExtractor): -    IE_NAME = u'livestream' +    IE_NAME = 'livestream'      _VALID_URL = r'http://new\.livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>\d+))?/?$'      _TEST = { -        u'url': u'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370', -        u'file': u'4719370.mp4', -        u'md5': u'0d2186e3187d185a04b3cdd02b828836', -        u'info_dict': { -            u'title': u'Live from Webster Hall NYC', -            u'upload_date': u'20121012', +        'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370', +        'md5': '53274c76ba7754fb0e8d072716f2292b', +        'info_dict': { +            'id': '4719370', +            'ext': 'mp4', +            'title': 'Live from Webster Hall NYC', +            'upload_date': '20121012',          }      }      def _extract_video_info(self, video_data):          video_url = video_data.get('progressive_url_hd') or video_data.get('progressive_url') -        return {'id': video_data['id'], -                'url': video_url, -                'ext': 'mp4', -                'title': video_data['caption'], -                'thumbnail': video_data['thumbnail_url'], -                'upload_date': video_data['updated_at'].replace('-','')[:8], -                } +        return { +            'id': compat_str(video_data['id']), +            'url': video_url, +            'ext': 'mp4', +            'title': video_data['caption'], +            'thumbnail': video_data['thumbnail_url'], +            'upload_date': video_data['updated_at'].replace('-', '')[:8], +        }      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url) @@ -40,36 +45,36 @@ class LivestreamIE(InfoExtractor):          if video_id is None:              # This is an event page: -            config_json = self._search_regex(r'window.config = ({.*?});', -                webpage, u'window config') +            config_json = self._search_regex( +                r'window.config = ({.*?});', webpage, 'window config')              info = json.loads(config_json)['event']              videos = [self._extract_video_info(video_data['data']) -                for video_data in info['feed']['data'] if video_data['type'] == u'video'] +                for video_data in info['feed']['data'] if video_data['type'] == 'video']              return self.playlist_result(videos, info['id'], info['full_name'])          else: -            og_video = self._og_search_video_url(webpage, name=u'player url') +            og_video = self._og_search_video_url(webpage, 'player url')              query_str = compat_urllib_parse_urlparse(og_video).query              query = compat_urlparse.parse_qs(query_str)              api_url = query['play_url'][0].replace('.smil', '') -            info = json.loads(self._download_webpage(api_url, video_id, -                                                     u'Downloading video info')) +            info = json.loads(self._download_webpage( +                api_url, video_id, 'Downloading video info'))              return self._extract_video_info(info)  # The original version of Livestream uses a different system  class LivestreamOriginalIE(InfoExtractor): -    IE_NAME = u'livestream:original' +    IE_NAME = 'livestream:original'      _VALID_URL = r'https?://www\.livestream\.com/(?P<user>[^/]+)/video\?.*?clipId=(?P<id>.*?)(&|$)'      _TEST = { -        u'url': u'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb', -        u'info_dict': { -            u'id': u'pla_8aa4a3f1-ba15-46a4-893b-902210e138fb', -            u'ext': u'flv', -            u'title': u'Spark 1 (BitCoin) with Cameron Winklevoss & Tyler Winklevoss of Winklevoss Capital', +        'url': 'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb', +        'info_dict': { +            'id': 'pla_8aa4a3f1-ba15-46a4-893b-902210e138fb', +            'ext': 'flv', +            'title': 'Spark 1 (BitCoin) with Cameron Winklevoss & Tyler Winklevoss of Winklevoss Capital',          }, -        u'params': { +        'params': {              # rtmp -            u'skip_download': True, +            'skip_download': True,          },      } @@ -84,7 +89,7 @@ class LivestreamOriginalIE(InfoExtractor):          ns = {'media': 'http://search.yahoo.com/mrss'}          thumbnail_url = item.find(xpath_with_ns('media:thumbnail', ns)).attrib['url']          # Remove the extension and number from the path (like 1.jpg) -        path = self._search_regex(r'(user-files/.+)_.*?\.jpg$', thumbnail_url, u'path') +        path = self._search_regex(r'(user-files/.+)_.*?\.jpg$', thumbnail_url, 'path')          return {              'id': video_id, diff --git a/youtube_dl/extractor/ndtv.py b/youtube_dl/extractor/ndtv.py index d81df3c10..95e7d63aa 100644 --- a/youtube_dl/extractor/ndtv.py +++ b/youtube_dl/extractor/ndtv.py @@ -1,22 +1,28 @@ +from __future__ import unicode_literals +  import re  from .common import InfoExtractor -from ..utils import month_by_name +from ..utils import ( +    month_by_name, +    int_or_none, +)  class NDTVIE(InfoExtractor):      _VALID_URL = r'^https?://(?:www\.)?ndtv\.com/video/player/[^/]*/[^/]*/(?P<id>[a-z0-9]+)'      _TEST = { -        u"url": u"http://www.ndtv.com/video/player/news/ndtv-exclusive-don-t-need-character-certificate-from-rahul-gandhi-says-arvind-kejriwal/300710", -        u"file": u"300710.mp4", -        u"md5": u"39f992dbe5fb531c395d8bbedb1e5e88", -        u"info_dict": { -            u"title": u"NDTV exclusive: Don't need character certificate from Rahul Gandhi, says Arvind Kejriwal", -            u"description": u"In an exclusive interview to NDTV, Aam Aadmi Party's Arvind Kejriwal says it makes no difference to him that Rahul Gandhi said the Congress needs to learn from his party.", -            u"upload_date": u"20131208", -            u"duration": 1327, -            u"thumbnail": u"http://i.ndtvimg.com/video/images/vod/medium/2013-12/big_300710_1386518307.jpg", +        'url': 'http://www.ndtv.com/video/player/news/ndtv-exclusive-don-t-need-character-certificate-from-rahul-gandhi-says-arvind-kejriwal/300710', +        'md5': '39f992dbe5fb531c395d8bbedb1e5e88', +        'info_dict': { +            'id': '300710', +            'ext': 'mp4', +            'title': "NDTV exclusive: Don't need character certificate from Rahul Gandhi, says Arvind Kejriwal", +            'description': 'md5:ab2d4b4a6056c5cb4caa6d729deabf02', +            'upload_date': '20131208', +            'duration': 1327, +            'thumbnail': 'http://i.ndtvimg.com/video/images/vod/medium/2013-12/big_300710_1386518307.jpg',          },      } @@ -27,13 +33,12 @@ class NDTVIE(InfoExtractor):          webpage = self._download_webpage(url, video_id)          filename = self._search_regex( -            r"__filename='([^']+)'", webpage, u'video filename') -        video_url = (u'http://bitcast-b.bitgravity.com/ndtvod/23372/ndtv/%s' % +            r"__filename='([^']+)'", webpage, 'video filename') +        video_url = ('http://bitcast-b.bitgravity.com/ndtvod/23372/ndtv/%s' %                       filename) -        duration_str = filename = self._search_regex( -            r"__duration='([^']+)'", webpage, u'duration', fatal=False) -        duration = None if duration_str is None else int(duration_str) +        duration = int_or_none(self._search_regex( +            r"__duration='([^']+)'", webpage, 'duration', fatal=False))          date_m = re.search(r'''(?x)              <p\s+class="vod_dateline">\s* @@ -41,7 +46,7 @@ class NDTVIE(InfoExtractor):                  (?P<monthname>[A-Za-z]+)\s+(?P<day>[0-9]+),\s*(?P<year>[0-9]+)              ''', webpage)          upload_date = None -        assert date_m +          if date_m is not None:              month = month_by_name(date_m.group('monthname'))              if month is not None: @@ -49,14 +54,19 @@ class NDTVIE(InfoExtractor):                      date_m.group('year'), month, int(date_m.group('day')))          description = self._og_search_description(webpage) -        READ_MORE = u' (Read more)' +        READ_MORE = ' (Read more)'          if description.endswith(READ_MORE):              description = description[:-len(READ_MORE)] +        title = self._og_search_title(webpage) +        TITLE_SUFFIX = ' - NDTV' +        if title.endswith(TITLE_SUFFIX): +            title = title[:-len(TITLE_SUFFIX)] +          return {              'id': video_id,              'url': video_url, -            'title': self._og_search_title(webpage), +            'title': title,              'description': description,              'thumbnail': self._og_search_thumbnail(webpage),              'duration': duration, diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 1f066cf05..96f0ae1eb 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -72,14 +72,14 @@ class NRKIE(InfoExtractor):  class NRKTVIE(InfoExtractor): -    _VALID_URL = r'http://tv\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-z]{4}\d{8})' +    _VALID_URL = r'http://tv\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})'      _TESTS = [          { -            'url': 'http://tv.nrk.no/serie/20-spoersmaal-tv/muhh48000314/23-05-2014', +            'url': 'http://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',              'md5': '7b96112fbae1faf09a6f9ae1aff6cb84',              'info_dict': { -                'id': 'muhh48000314', +                'id': 'MUHH48000314',                  'ext': 'flv',                  'title': '20 spørsmål',                  'description': 'md5:bdea103bc35494c143c6a9acdd84887a', @@ -141,4 +141,4 @@ class NRKTVIE(InfoExtractor):              'upload_date': upload_date,              'duration': duration,              'formats': formats, -        }
\ No newline at end of file +        } diff --git a/youtube_dl/extractor/nuvid.py b/youtube_dl/extractor/nuvid.py index e3db9fe8c..280328b78 100644 --- a/youtube_dl/extractor/nuvid.py +++ b/youtube_dl/extractor/nuvid.py @@ -3,6 +3,11 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor +from ..utils import ( +    parse_duration, +    unified_strdate, +    compat_urllib_request, +)  class NuvidIE(InfoExtractor): @@ -13,8 +18,10 @@ class NuvidIE(InfoExtractor):          'info_dict': {              'id': '1310741',              'ext': 'mp4', -            "title": "Horny babes show their awesome bodeis and", -            "age_limit": 18, +            'title': 'Horny babes show their awesome bodeis and', +            'duration': 129, +            'upload_date': '20140508', +            'age_limit': 18,          }      } @@ -22,27 +29,41 @@ class NuvidIE(InfoExtractor):          mobj = re.match(self._VALID_URL, url)          video_id = mobj.group('id') -        murl = url.replace('://www.', '://m.') -        webpage = self._download_webpage(murl, video_id) - -        title = self._html_search_regex( -            r'<div class="title">\s+<h2[^>]*>([^<]+)</h2>', -            webpage, 'title').strip() +        formats = [] -        url_end = self._html_search_regex( -            r'href="(/[^"]+)"[^>]*data-link_type="mp4"', -            webpage, 'video_url') -        video_url = 'http://m.nuvid.com' + url_end +        for dwnld_speed, format_id in [(0, '3gp'), (5, 'mp4')]: +            request = compat_urllib_request.Request( +                'http://m.nuvid.com/play/%s' % video_id) +            request.add_header('Cookie', 'skip_download_page=1; dwnld_speed=%d; adv_show=1' % dwnld_speed) +            webpage = self._download_webpage( +                request, video_id, 'Downloading %s page' % format_id) +            video_url = self._html_search_regex( +                r'<a href="([^"]+)"\s*>Continue to watch video', webpage, '%s video URL' % format_id, fatal=False) +            if not video_url: +                continue +            formats.append({ +                'url': video_url, +                'format_id': format_id, +            }) +        webpage = self._download_webpage( +            'http://m.nuvid.com/video/%s' % video_id, video_id, 'Downloading video page') +        title = self._html_search_regex( +            r'<div class="title">\s+<h2[^>]*>([^<]+)</h2>', webpage, 'title').strip()          thumbnail = self._html_search_regex(              r'href="(/thumbs/[^"]+)"[^>]*data-link_type="thumbs"',              webpage, 'thumbnail URL', fatal=False) +        duration = parse_duration(self._html_search_regex( +            r'Length:\s*<span>(\d{2}:\d{2})</span>',webpage, 'duration', fatal=False)) +        upload_date = unified_strdate(self._html_search_regex( +            r'Added:\s*<span>(\d{4}-\d{2}-\d{2})</span>', webpage, 'upload date', fatal=False))          return {              'id': video_id, -            'url': video_url, -            'ext': 'mp4',              'title': title, -            'thumbnail': thumbnail, +            'thumbnail': 'http://m.nuvid.com%s' % thumbnail, +            'duration': duration, +            'upload_date': upload_date,              'age_limit': 18, -        } +            'formats': formats, +        }
\ No newline at end of file diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py new file mode 100644 index 000000000..cb4305349 --- /dev/null +++ b/youtube_dl/extractor/rai.py @@ -0,0 +1,121 @@ +from __future__ import unicode_literals + +import re + +from .subtitles import SubtitlesInfoExtractor +from ..utils import ( +    parse_duration, +    unified_strdate, +    compat_urllib_parse, +) + + +class RaiIE(SubtitlesInfoExtractor): +    _VALID_URL = r'(?P<url>http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html)' +    _TESTS = [ +        { +            'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html', +            'md5': 'c064c0b2d09c278fb293116ef5d0a32d', +            'info_dict': { +                'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391', +                'ext': 'mp4', +                'title': 'Report del 07/04/2014', +                'description': 'md5:f27c544694cacb46a078db84ec35d2d9', +                'upload_date': '20140407', +                'duration': 6160, +            } +        }, +        { +            'url': 'http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html', +            'md5': '8bb9c151924ce241b74dd52ef29ceafa', +            'info_dict': { +                'id': '04a9f4bd-b563-40cf-82a6-aad3529cb4a9', +                'ext': 'mp4', +                'title': 'TG PRIMO TEMPO', +                'description': '', +                'upload_date': '20140612', +                'duration': 1758, +            } +        }, +        { +            'url': 'http://www.rainews.it/dl/rainews/media/state-of-the-net-Antonella-La-Carpia-regole-virali-7aafdea9-0e5d-49d5-88a6-7e65da67ae13.html', +            'md5': '35cf7c229f22eeef43e48b5cf923bef0', +            'info_dict': { +                'id': '7aafdea9-0e5d-49d5-88a6-7e65da67ae13', +                'ext': 'mp4', +                'title': 'State of the Net, Antonella La Carpia: regole virali', +                'description': 'md5:b0ba04a324126903e3da7763272ae63c', +                'upload_date': '20140613', +            }, +            'skip': 'Error 404', +        }, +        { +            'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-b4a49761-e0cc-4b14-8736-2729f6f73132-tg2.html', +            'md5': '35694f062977fe6619943f08ed935730', +            'info_dict': { +                'id': 'b4a49761-e0cc-4b14-8736-2729f6f73132', +                'ext': 'mp4', +                'title': 'Alluvione in Sardegna e dissesto idrogeologico', +                'description': 'Edizione delle ore 20:30 ', +            } +        }, +    ] + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') + +        media = self._download_json('%s?json' % mobj.group('url'), video_id, 'Downloading video JSON') + +        title = media.get('name') +        description = media.get('desc') +        thumbnail = media.get('image_300') or media.get('image_medium') or media.get('image') +        duration = parse_duration(media.get('length')) +        uploader = media.get('author') +        upload_date = unified_strdate(media.get('date')) + +        formats = [] + +        for format_id in ['wmv', 'm3u8', 'mediaUri', 'h264']: +            media_url = media.get(format_id) +            if not media_url: +                continue +            formats.append({ +                'url': media_url, +                'format_id': format_id, +                'ext': 'mp4', +            }) + +        if self._downloader.params.get('listsubtitles', False): +            page = self._download_webpage(url, video_id) +            self._list_available_subtitles(video_id, page) +            return + +        subtitles = {} +        if self._have_to_download_any_subtitles: +            page = self._download_webpage(url, video_id) +            subtitles = self.extract_subtitles(video_id, page) + +        return { +            'id': video_id, +            'title': title, +            'description': description, +            'thumbnail': thumbnail, +            'uploader': uploader, +            'upload_date': upload_date, +            'duration': duration, +            'formats': formats, +            'subtitles': subtitles, +        } + +    def _get_available_subtitles(self, video_id, webpage): +        subtitles = {} +        m = re.search(r'<meta name="closedcaption" content="(?P<captions>[^"]+)"', webpage) +        if m: +            captions = m.group('captions') +            STL_EXT = '.stl' +            SRT_EXT = '.srt' +            if captions.endswith(STL_EXT): +                captions = captions[:-len(STL_EXT)] + SRT_EXT +            subtitles['it'] = 'http://www.rai.tv%s' % compat_urllib_parse.quote(captions) +        return subtitles
\ No newline at end of file diff --git a/youtube_dl/extractor/teachertube.py b/youtube_dl/extractor/teachertube.py index 6d52763f9..4d9666c6b 100644 --- a/youtube_dl/extractor/teachertube.py +++ b/youtube_dl/extractor/teachertube.py @@ -55,11 +55,13 @@ class TeacherTubeIE(InfoExtractor):          quality = qualities(['mp3', 'flv', 'mp4']) +        _, media_urls = zip(*re.findall(r'([\'"])file\1\s*:\s*"([^"]+)"', webpage)) +          formats = [              {                  'url': media_url,                  'quality': quality(determine_ext(media_url)) -            } for media_url in set(zip(*re.findall(r'([\'"])file\1\s*:\s*"([^"]+)"', webpage))[1]) +            } for media_url in set(media_urls)          ]          self._sort_formats(formats) diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index d260c91c2..bce32a873 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -27,7 +27,7 @@ class TEDIE(SubtitlesInfoExtractor):          '''      _TESTS = [{          'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html', -        'md5': '4ea1dada91e4174b53dac2bb8ace429d', +        'md5': 'fc94ac279feebbce69f21c0c6ee82810',          'info_dict': {              'id': '102',              'ext': 'mp4', diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py index 36bc36ad8..08a48c05a 100644 --- a/youtube_dl/extractor/tube8.py +++ b/youtube_dl/extractor/tube8.py @@ -17,9 +17,10 @@ class Tube8IE(InfoExtractor):      _VALID_URL = r'https?://(?:www\.)?tube8\.com/(?:[^/]+/){2}(?P<id>\d+)'      _TEST = {          'url': 'http://www.tube8.com/teen/kasia-music-video/229795/', -        'file': '229795.mp4', -        'md5': 'e9e0b0c86734e5e3766e653509475db0', +        'md5': '44bf12b98313827dd52d35b8706a4ea0',          'info_dict': { +            'id': '229795', +            'ext': 'mp4',              'description': 'hot teen Kasia grinding',              'uploader': 'unknown',              'title': 'Kasia music video', diff --git a/youtube_dl/extractor/veoh.py b/youtube_dl/extractor/veoh.py index d16993daf..fb132aef6 100644 --- a/youtube_dl/extractor/veoh.py +++ b/youtube_dl/extractor/veoh.py @@ -7,6 +7,7 @@ from .common import InfoExtractor  from ..utils import (      compat_urllib_request,      int_or_none, +    ExtractorError,  ) @@ -94,8 +95,12 @@ class VeohIE(InfoExtractor):          if video_id.startswith('v'):              rsp = self._download_xml(                  r'http://www.veoh.com/api/findByPermalink?permalink=%s' % video_id, video_id, 'Downloading video XML') -            if rsp.get('stat') == 'ok': +            stat = rsp.get('stat') +            if stat == 'ok':                  return self._extract_video(rsp.find('./videoList/video')) +            elif stat == 'fail': +                raise ExtractorError( +                    '%s said: %s' % (self.IE_NAME, rsp.find('./errorList/error').get('errorMessage')), expected=True)          webpage = self._download_webpage(url, video_id)          age_limit = 0 diff --git a/youtube_dl/extractor/vulture.py b/youtube_dl/extractor/vulture.py new file mode 100644 index 000000000..1eb24a3d6 --- /dev/null +++ b/youtube_dl/extractor/vulture.py @@ -0,0 +1,69 @@ +from __future__ import unicode_literals + +import json +import os.path +import re + +from .common import InfoExtractor +from ..utils import ( +    int_or_none, +    parse_iso8601, +) + + +class VultureIE(InfoExtractor): +    IE_NAME = 'vulture.com' +    _VALID_URL = r'https?://video\.vulture\.com/video/(?P<display_id>[^/]+)/' +    _TEST = { +        'url': 'http://video.vulture.com/video/Mindy-Kaling-s-Harvard-Speech/player?layout=compact&read_more=1', +        'md5': '8d997845642a2b5152820f7257871bc8', +        'info_dict': { +            'id': '6GHRQL3RV7MSD1H4', +            'ext': 'mp4', +            'title': 'kaling-speech-2-MAGNIFY STANDARD CONTAINER REVISED', +            'uploader_id': 'Sarah', +            'thumbnail': 're:^http://.*\.jpg$', +            'timestamp': 1401288564, +            'upload_date': '20140528', +            'description': 'Uplifting and witty, as predicted.', +            'duration': 1015, +        } +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        display_id = mobj.group('display_id') + +        webpage = self._download_webpage(url, display_id) +        query_string = self._search_regex( +            r"queryString\s*=\s*'([^']+)'", webpage, 'query string') +        video_id = self._search_regex( +            r'content=([^&]+)', query_string, 'video ID') +        query_url = 'http://video.vulture.com/embed/player/container/1000/1000/?%s' % query_string + +        query_webpage = self._download_webpage( +            query_url, display_id, note='Downloading query page') +        params_json = self._search_regex( +            r'(?sm)new MagnifyEmbeddablePlayer\({.*?contentItem:\s*(\{.*?\})\n,\n', +            query_webpage, +            'player params') +        params = json.loads(params_json) + +        upload_timestamp = parse_iso8601(params['posted'].replace(' ', 'T')) +        uploader_id = params.get('user', {}).get('handle') + +        media_item = params['media_item'] +        title = os.path.splitext(media_item['title'])[0] +        duration = int_or_none(media_item.get('duration_seconds')) + +        return { +            'id': video_id, +            'display_id': display_id, +            'url': media_item['pipeline_xid'], +            'title': title, +            'timestamp': upload_timestamp, +            'thumbnail': params.get('thumbnail_url'), +            'uploader_id': uploader_id, +            'description': params.get('description'), +            'duration': duration, +        } diff --git a/youtube_dl/extractor/wrzuta.py b/youtube_dl/extractor/wrzuta.py new file mode 100644 index 000000000..34dd6d952 --- /dev/null +++ b/youtube_dl/extractor/wrzuta.py @@ -0,0 +1,81 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( +    int_or_none, +    qualities, +) + + +class WrzutaIE(InfoExtractor): +    IE_NAME = 'wrzuta.pl' + +    _VALID_URL = r'https?://(?P<uploader>[0-9a-zA-Z]+)\.wrzuta\.pl/(?P<typ>film|audio)/(?P<id>[0-9a-zA-Z]+)' + +    _TESTS = [{ +        'url': 'http://laboratoriumdextera.wrzuta.pl/film/aq4hIZWrkBu/nike_football_the_last_game', +        'md5': '9e67e05bed7c03b82488d87233a9efe7', +        'info_dict': { +            'id': 'aq4hIZWrkBu', +            'ext': 'mp4', +            'title': 'Nike Football: The Last Game', +            'duration': 307, +            'uploader_id': 'laboratoriumdextera', +            'description': 'md5:7fb5ef3c21c5893375fda51d9b15d9cd', +        }, +    }, { +        'url': 'http://w729.wrzuta.pl/audio/9oXJqdcndqv/david_guetta_amp_showtek_ft._vassy_-_bad', +        'md5': '1e546a18e1c22ac6e9adce17b8961ff5', +        'info_dict': { +            'id': '9oXJqdcndqv', +            'ext': 'ogg', +            'title': 'David Guetta & Showtek ft. Vassy - Bad', +            'duration': 270, +            'uploader_id': 'w729', +            'description': 'md5:4628f01c666bbaaecefa83476cfa794a', +        }, +    }] + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') +        typ = mobj.group('typ') +        uploader = mobj.group('uploader') + +        webpage = self._download_webpage(url, video_id) + +        quality = qualities(['SD', 'MQ', 'HQ', 'HD']) + +        audio_table = {'flv': 'mp3', 'webm': 'ogg'} + +        embedpage = self._download_json('http://www.wrzuta.pl/npp/embed/%s/%s' % (uploader, video_id), video_id) + +        formats = [] +        for media in embedpage['url']: +            if typ == 'audio': +                ext = audio_table[media['type'].split('@')[0]] +            else: +                ext = media['type'].split('@')[0] + +            formats.append({ +                'format_id': '%s_%s' % (ext, media['quality'].lower()), +                'url': media['url'], +                'ext': ext, +                'quality': quality(media['quality']), +            }) + +        self._sort_formats(formats) + +        return { +            'id': video_id, +            'title': self._og_search_title(webpage), +            'thumbnail': self._og_search_thumbnail(webpage), +            'formats': formats, +            'duration': int_or_none(embedpage['duration']), +            'uploader_id': uploader, +            'description': self._og_search_description(webpage), +            'age_limit': embedpage.get('minimalAge', 0), +        } diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 7c50881c4..ece62dfce 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1386,13 +1386,13 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):                          |  p/                          )                          ( -                            (?:PL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,} +                            (?:PL|LL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,}                              # Top tracks, they can also include dots                               |(?:MC)[\w\.]*                          )                          .*                       | -                        ((?:PL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,}) +                        ((?:PL|LL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})                       )"""      _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'      _MORE_PAGES_INDICATOR = r'data-link-type="next"' diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 6fe7c7b25..56d5d0f2c 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.06.07' +__version__ = '2014.06.16' | 
