diff options
27 files changed, 822 insertions, 281 deletions
| diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index 1fa99f88b..410f9edc2 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -10,7 +10,6 @@ from test.helper import FakeYDL  from youtube_dl.extractor import ( -    YoutubeUserIE,      YoutubePlaylistIE,      YoutubeIE,      YoutubeChannelIE, @@ -43,28 +42,6 @@ class TestYoutubeLists(unittest.TestCase):          self.assertEqual(len(entries), 25)          self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0') -    def test_youtube_channel(self): -        dl = FakeYDL() -        ie = YoutubeChannelIE(dl) -        #test paginated channel -        result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w') -        self.assertTrue(len(result['entries']) > 90) -        #test autogenerated channel -        result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos') -        self.assertTrue(len(result['entries']) >= 18) - -    def test_youtube_user(self): -        dl = FakeYDL() -        ie = YoutubeUserIE(dl) -        result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation') -        self.assertTrue(len(result['entries']) >= 320) - -    def test_youtube_show(self): -        dl = FakeYDL() -        ie = YoutubeShowIE(dl) -        result = ie.extract('http://www.youtube.com/show/airdisasters') -        self.assertTrue(len(result) >= 3) -      def test_youtube_mix(self):          dl = FakeYDL()          ie = YoutubePlaylistIE(dl) @@ -83,21 +60,5 @@ class TestYoutubeLists(unittest.TestCase):          entries = result['entries']          self.assertEqual(len(entries), 100) -    def test_youtube_toplist(self): -        dl = FakeYDL() -        ie = YoutubeTopListIE(dl) -        result = ie.extract('yttoplist:music:Trending') -        entries = result['entries'] -        self.assertTrue(len(entries) >= 5) - -    def test_youtube_search_url(self): -        dl = FakeYDL() -        ie = YoutubeSearchURLIE(dl) -        result = ie.extract('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video') -        entries = result['entries'] -        self.assertIsPlaylist(result) -        self.assertEqual(result['title'], 'youtube-dl test video') -        self.assertTrue(len(entries) >= 5) -  if __name__ == '__main__':      unittest.main() diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py index 4ea5811a5..3f941596e 100644 --- a/youtube_dl/downloader/__init__.py +++ b/youtube_dl/downloader/__init__.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals  from .common import FileDownloader  from .hls import HlsFD +from .hls import NativeHlsFD  from .http import HttpFD  from .mplayer import MplayerFD  from .rtmp import RtmpFD @@ -19,6 +20,8 @@ def get_suitable_downloader(info_dict):      if url.startswith('rtmp'):          return RtmpFD +    if protocol == 'm3u8_native': +        return NativeHlsFD      if (protocol == 'm3u8') or (protocol is None and determine_ext(url) == 'm3u8'):          return HlsFD      if url.startswith('mms') or url.startswith('rtsp'): diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 9ce97f5fe..f85f0c94e 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -42,6 +42,7 @@ class FileDownloader(object):      Subclasses of this one must re-define the real_download method.      """ +    _TEST_FILE_SIZE = 10241      params = None      def __init__(self, ydl, params): diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index 71353f607..b3be16ff1 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -16,6 +16,7 @@ from ..utils import (      format_bytes,      encodeFilename,      sanitize_open, +    xpath_text,  ) @@ -251,6 +252,8 @@ class F4mFD(FileDownloader):              # We only download the first fragment              fragments_list = fragments_list[:1]          total_frags = len(fragments_list) +        # For some akamai manifests we'll need to add a query to the fragment url +        akamai_pv = xpath_text(doc, _add_ns('pv-2.0'))          tmpfilename = self.temp_name(filename)          (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb') @@ -290,6 +293,8 @@ class F4mFD(FileDownloader):          for (seg_i, frag_i) in fragments_list:              name = 'Seg%d-Frag%d' % (seg_i, frag_i)              url = base_url + name +            if akamai_pv: +                url += '?' + akamai_pv.strip(';')              frag_filename = '%s-%s' % (tmpfilename, name)              success = http_dl.download(frag_filename, {'url': url})              if not success: diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 32852f333..56cce2811 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -1,8 +1,13 @@ +from __future__ import unicode_literals +  import os +import re  import subprocess  from .common import FileDownloader  from ..utils import ( +    compat_urlparse, +    compat_urllib_request,      check_executable,      encodeFilename,  ) @@ -43,3 +48,57 @@ class HlsFD(FileDownloader):              self.to_stderr(u"\n")              self.report_error(u'%s exited with code %d' % (program, retval))              return False + + +class NativeHlsFD(FileDownloader): +    """ A more limited implementation that does not require ffmpeg """ + +    def real_download(self, filename, info_dict): +        url = info_dict['url'] +        self.report_destination(filename) +        tmpfilename = self.temp_name(filename) + +        self.to_screen( +            '[hlsnative] %s: Downloading m3u8 manifest' % info_dict['id']) +        data = self.ydl.urlopen(url).read() +        s = data.decode('utf-8', 'ignore') +        segment_urls = [] +        for line in s.splitlines(): +            line = line.strip() +            if line and not line.startswith('#'): +                segment_url = ( +                    line +                    if re.match(r'^https?://', line) +                    else compat_urlparse.urljoin(url, line)) +                segment_urls.append(segment_url) + +        is_test = self.params.get('test', False) +        remaining_bytes = self._TEST_FILE_SIZE if is_test else None +        byte_counter = 0 +        with open(tmpfilename, 'wb') as outf: +            for i, segurl in enumerate(segment_urls): +                self.to_screen( +                    '[hlsnative] %s: Downloading segment %d / %d' % +                    (info_dict['id'], i + 1, len(segment_urls))) +                seg_req = compat_urllib_request.Request(segurl) +                if remaining_bytes: +                    seg_req.add_header('Range', 'bytes=0-%d' % (remaining_bytes - 1)) + +                segment = self.ydl.urlopen(seg_req).read() +                if remaining_bytes: +                    segment = segment[:remaining_bytes] +                    remaining_bytes -= len(segment) +                outf.write(segment) +                byte_counter += len(segment) +                if remaining_bytes <= 0: +                    break + +        self._hook_progress({ +            'downloaded_bytes': byte_counter, +            'total_bytes': byte_counter, +            'filename': filename, +            'status': 'finished', +        }) +        self.try_rename(tmpfilename, filename) +        return True + diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 6caf7451e..f62555ce0 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -14,8 +14,6 @@ from ..utils import (  class HttpFD(FileDownloader): -    _TEST_FILE_SIZE = 10241 -      def real_download(self, filename, info_dict):          url = info_dict['url']          tmpfilename = self.temp_name(filename) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 625666acb..1f1fc0eb2 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -199,6 +199,7 @@ from .malemotion import MalemotionIE  from .mdr import MDRIE  from .metacafe import MetacafeIE  from .metacritic import MetacriticIE +from .mgoon import MgoonIE  from .ministrygrid import MinistryGridIE  from .mit import TechTVMITIE, MITIE, OCWMITIE  from .mitele import MiTeleIE @@ -239,6 +240,7 @@ from .ndtv import NDTVIE  from .newgrounds import NewgroundsIE  from .newstube import NewstubeIE  from .nfb import NFBIE +from .nfl import NFLIE  from .nhl import NHLIE, NHLVideocenterIE  from .niconico import NiconicoIE  from .ninegag import NineGagIE @@ -248,7 +250,10 @@ from .nosvideo import NosVideoIE  from .novamov import NovaMovIE  from .nowness import NownessIE  from .nowvideo import NowVideoIE -from .npo import NPOIE +from .npo import ( +    NPOIE, +    TegenlichtVproIE, +)  from .nrk import (      NRKIE,      NRKTVIE, @@ -361,6 +366,7 @@ from .thisav import ThisAVIE  from .tinypic import TinyPicIE  from .tlc import TlcIE, TlcDeIE  from .tnaflix import TNAFlixIE +from .thvideo import THVideoIE  from .toutv import TouTvIE  from .toypics import ToypicsUserIE, ToypicsIE  from .traileraddict import TrailerAddictIE @@ -447,6 +453,7 @@ from .yahoo import (  from .youjizz import YouJizzIE  from .youku import YoukuIE  from .youporn import YouPornIE +from .yourupload import YourUploadIE  from .youtube import (      YoutubeIE,      YoutubeChannelIE, diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 9c30a1d33..60cab6f4e 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -15,6 +15,7 @@ from ..utils import (      compat_http_client,      compat_urllib_error,      compat_urllib_parse_urlparse, +    compat_urlparse,      compat_str,      clean_html, @@ -640,7 +641,9 @@ class InfoExtractor(object):          return formats -    def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None): +    def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None, +                              entry_protocol='m3u8', preference=None): +          formats = [{              'format_id': 'm3u8-meta',              'url': m3u8_url, @@ -651,6 +654,11 @@ class InfoExtractor(object):              'format_note': 'Quality selection URL',          }] +        format_url = lambda u: ( +            u +            if re.match(r'^https?://', u) +            else compat_urlparse.urljoin(m3u8_url, u)) +          m3u8_doc = self._download_webpage(m3u8_url, video_id)          last_info = None          kv_rex = re.compile( @@ -667,15 +675,17 @@ class InfoExtractor(object):                  continue              else:                  if last_info is None: -                    formats.append({'url': line}) +                    formats.append({'url': format_url(line)})                      continue                  tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000)                  f = {                      'format_id': 'm3u8-%d' % (tbr if tbr else len(formats)), -                    'url': line.strip(), +                    'url': format_url(line.strip()),                      'tbr': tbr,                      'ext': ext, +                    'protocol': entry_protocol, +                    'preference': preference,                  }                  codecs = last_info.get('CODECS')                  if codecs: diff --git a/youtube_dl/extractor/divxstage.py b/youtube_dl/extractor/divxstage.py index 4ca3f37a2..b88379e06 100644 --- a/youtube_dl/extractor/divxstage.py +++ b/youtube_dl/extractor/divxstage.py @@ -7,7 +7,7 @@ class DivxStageIE(NovaMovIE):      IE_NAME = 'divxstage'      IE_DESC = 'DivxStage' -    _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'divxstage\.(?:eu|net|ch|co|at|ag)'} +    _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'divxstage\.(?:eu|net|ch|co|at|ag|to)'}      _HOST = 'www.divxstage.eu' @@ -24,4 +24,4 @@ class DivxStageIE(NovaMovIE):              'title': 'youtubedl test video',              'description': 'This is a test video for youtubedl.',          } -    }
\ No newline at end of file +    } diff --git a/youtube_dl/extractor/dropbox.py b/youtube_dl/extractor/dropbox.py index 1e1763abf..817a9bd61 100644 --- a/youtube_dl/extractor/dropbox.py +++ b/youtube_dl/extractor/dropbox.py @@ -5,24 +5,29 @@ import os.path  import re  from .common import InfoExtractor -from ..utils import compat_urllib_parse_unquote +from ..utils import compat_urllib_parse_unquote, url_basename  class DropboxIE(InfoExtractor): -    _VALID_URL = r'https?://(?:www\.)?dropbox[.]com/s/(?P<id>[a-zA-Z0-9]{15})/(?P<title>[^?#]*)' -    _TEST = { +    _VALID_URL = r'https?://(?:www\.)?dropbox[.]com/sh?/(?P<id>[a-zA-Z0-9]{15})/.*' +    _TESTS = [{          'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4?dl=0',          'info_dict': {              'id': 'nelirfsxnmcfbfh',              'ext': 'mp4',              'title': 'youtube-dl test video \'ä"BaW_jenozKc'          } -    } +    }, +    { +        'url': 'https://www.dropbox.com/sh/662glsejgzoj9sr/AAByil3FGH9KFNZ13e08eSa1a/Pregame%20Ceremony%20Program%20PA%2020140518.m4v', +        'only_matching': True, +    }, +    ]      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url)          video_id = mobj.group('id') -        fn = compat_urllib_parse_unquote(mobj.group('title')) +        fn = compat_urllib_parse_unquote(url_basename(url))          title = os.path.splitext(fn)[0]          video_url = (              re.sub(r'[?&]dl=0', '', url) + diff --git a/youtube_dl/extractor/flickr.py b/youtube_dl/extractor/flickr.py index 21ea5ec2b..e09982e88 100644 --- a/youtube_dl/extractor/flickr.py +++ b/youtube_dl/extractor/flickr.py @@ -10,13 +10,13 @@ from ..utils import (  class FlickrIE(InfoExtractor): -    """Information Extractor for Flickr videos""" -    _VALID_URL = r'(?:https?://)?(?:www\.|secure\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*' +    _VALID_URL = r'https?://(?:www\.|secure\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*'      _TEST = {          'url': 'http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/', -        'file': '5645318632.mp4',          'md5': '6fdc01adbc89d72fc9c4f15b4a4ba87b',          'info_dict': { +            'id': '5645318632', +            'ext': 'mp4',              "description": "Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.",               "uploader_id": "forestwander-nature-pictures",               "title": "Dark Hollow Waterfalls" @@ -49,12 +49,12 @@ class FlickrIE(InfoExtractor):              raise ExtractorError('Unable to extract video url')          video_url = mobj.group(1) + unescapeHTML(mobj.group(2)) -        return [{ -            'id':          video_id, -            'url':         video_url, -            'ext':         'mp4', -            'title':       self._og_search_title(webpage), +        return { +            'id': video_id, +            'url': video_url, +            'ext': 'mp4', +            'title': self._og_search_title(webpage),              'description': self._og_search_description(webpage), -            'thumbnail':   self._og_search_thumbnail(webpage), +            'thumbnail': self._og_search_thumbnail(webpage),              'uploader_id': video_uploader_id, -        }] +        } diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 2d77f604a..367f930dd 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -597,7 +597,9 @@ class GenericIE(InfoExtractor):          # Helper method          def _playlist_from_matches(matches, getter, ie=None): -            urlrs = orderedSet(self.url_result(getter(m), ie) for m in matches) +            urlrs = orderedSet( +                self.url_result(self._proto_relative_url(getter(m)), ie) +                for m in matches)              return self.playlist_result(                  urlrs, playlist_id=video_id, playlist_title=video_title) @@ -642,11 +644,11 @@ class GenericIE(InfoExtractor):              )              (["\'])                  (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/ -                (?:embed|v)/.+?) +                (?:embed|v|p)/.+?)              \1''', webpage)          if matches:              return _playlist_from_matches( -                matches, lambda m: unescapeHTML(m[1]), ie='Youtube') +                matches, lambda m: unescapeHTML(m[1]))          # Look for embedded Dailymotion player          matches = re.findall( diff --git a/youtube_dl/extractor/mgoon.py b/youtube_dl/extractor/mgoon.py new file mode 100644 index 000000000..94bc87b00 --- /dev/null +++ b/youtube_dl/extractor/mgoon.py @@ -0,0 +1,87 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( +    ExtractorError, +    qualities, +    unified_strdate, +) + + +class MgoonIE(InfoExtractor): +    _VALID_URL = r'''(?x)https?://(?:www\.)? +    (?:(:?m\.)?mgoon\.com/(?:ch/(?:.+)/v|play/view)| +        video\.mgoon\.com)/(?P<id>[0-9]+)''' +    _API_URL = 'http://mpos.mgoon.com/player/video?id={0:}' +    _TESTS = [ +        { +            'url': 'http://m.mgoon.com/ch/hi6618/v/5582148', +            'md5': 'dd46bb66ab35cf6d51cc812fd82da79d', +            'info_dict': { +                'id': '5582148', +                'uploader_id': 'hi6618', +                'duration': 240.419, +                'upload_date': '20131220', +                'ext': 'mp4', +                'title': 'md5:543aa4c27a4931d371c3f433e8cebebc', +                'thumbnail': 're:^https?://.*\.jpg$', +            } +        }, +        { +            'url': 'http://www.mgoon.com/play/view/5582148', +            'only_matching': True, +        }, +        { +            'url': 'http://video.mgoon.com/5582148', +            'only_matching': True, +        }, +    ] + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') + +        data = self._download_json(self._API_URL.format(video_id), video_id) + +        if data.get('errorInfo', {}).get('code') != 'NONE': +            raise ExtractorError('%s encountered an error: %s' % ( +                self.IE_NAME, data['errorInfo']['message']), expected=True) + +        v_info = data['videoInfo'] +        title = v_info.get('v_title') +        thumbnail = v_info.get('v_thumbnail') +        duration = v_info.get('v_duration') +        upload_date = unified_strdate(v_info.get('v_reg_date')) +        uploader_id = data.get('userInfo', {}).get('u_alias') +        if duration: +            duration /= 1000.0 + +        age_limit = None +        if data.get('accessInfo', {}).get('code') == 'VIDEO_STATUS_ADULT': +            age_limit = 18 + +        formats = [] +        get_quality = qualities(['360p', '480p', '720p', '1080p']) +        for fmt in data['videoFiles']: +            formats.append({ +                'format_id': fmt['label'], +                'quality': get_quality(fmt['label']), +                'url': fmt['url'], +                'ext': fmt['format'], + +            }) +        self._sort_formats(formats) + +        return { +            'id': video_id, +            'title': title, +            'formats': formats, +            'thumbnail': thumbnail, +            'duration': duration, +            'upload_date': upload_date, +            'uploader_id': uploader_id, +            'age_limit': age_limit, +        } diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index d2e4acbad..e75ab7c39 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -16,9 +16,9 @@ class NBCIE(InfoExtractor):      _TEST = {          'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188', -        'md5': '54d0fbc33e0b853a65d7b4de5c06d64e', +        # md5 checksum is not stable          'info_dict': { -            'id': 'u1RInQZRN7QJ', +            'id': 'bTmnLCvIbaaH',              'ext': 'flv',              'title': 'I Am a Firefighter',              'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.', diff --git a/youtube_dl/extractor/nfl.py b/youtube_dl/extractor/nfl.py new file mode 100644 index 000000000..963c4587c --- /dev/null +++ b/youtube_dl/extractor/nfl.py @@ -0,0 +1,103 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( +    ExtractorError, +    int_or_none, +    remove_end, +) + + +class NFLIE(InfoExtractor): +    IE_NAME = 'nfl.com' +    _VALID_URL = r'(?x)https?://(?:www\.)?nfl\.com/(?:videos/(?:.+)/|.*?\#video=)(?P<id>\d..[0-9]+)' +    _PLAYER_CONFIG_URL = 'http://www.nfl.com/static/content/static/config/video/config.json' +    _TEST = { +        'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights', +        # 'md5': '5eb8c40a727dda106d510e5d6ffa79e5',  # md5 checksum fluctuates +        'info_dict': { +            'id': '0ap3000000398478', +            'ext': 'mp4', +            'title': 'Week 3: Washington Redskins vs. Philadelphia Eagles highlights', +            'description': 'md5:56323bfb0ac4ee5ab24bd05fdf3bf478', +            'upload_date': '20140921', +            'timestamp': 1411337580, +            'thumbnail': 're:^https?://.*\.jpg$', +        } +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') + +        config = self._download_json(self._PLAYER_CONFIG_URL, video_id, +                                     note='Downloading player config') +        url_template = 'http://nfl.com{contentURLTemplate:s}'.format(**config) +        video_data = self._download_json(url_template.format(id=video_id), video_id) + +        cdns = config.get('cdns') +        if not cdns: +            raise ExtractorError('Failed to get CDN data', expected=True) + +        formats = [] +        streams = video_data.get('cdnData', {}).get('bitrateInfo', []) +        for name, cdn in cdns.items(): +            # LimeLight streams don't seem to work +            if cdn.get('name') == 'LIMELIGHT': +                continue + +            protocol = cdn.get('protocol') +            host = remove_end(cdn.get('host', ''), '/') +            if not (protocol and host): +                continue + +            path_prefix = cdn.get('pathprefix', '') +            if path_prefix and not path_prefix.endswith('/'): +                path_prefix = '%s/' % path_prefix + +            get_url = lambda p: '{protocol:s}://{host:s}/{prefix:s}{path:}'.format( +                protocol=protocol, +                host=host, +                prefix=path_prefix, +                path=p, +            ) + +            if protocol == 'rtmp': +                preference = -2 +            elif 'prog' in name.lower(): +                preference = -1 +            else: +                preference = 0 + +            for stream in streams: +                path = stream.get('path') +                if not path: +                    continue + +                formats.append({ +                    'url': get_url(path), +                    'vbr': int_or_none(stream.get('rate', 0), 1000), +                    'preference': preference, +                    'format_note': name, +                }) + +        self._sort_formats(formats) + +        thumbnail = None +        for q in ('xl', 'l', 'm', 's', 'xs'): +            thumbnail = video_data.get('imagePaths', {}).get(q) +            if thumbnail: +                break + +        return { +            'id': video_id, +            'title': video_data.get('storyHeadline'), +            'formats': formats, +            'description': video_data.get('caption'), +            'duration': video_data.get('duration'), +            'thumbnail': thumbnail, +            'timestamp': int_or_none(video_data.get('posted'), 1000), +        } diff --git a/youtube_dl/extractor/noco.py b/youtube_dl/extractor/noco.py index 959fdf590..7f1bc6377 100644 --- a/youtube_dl/extractor/noco.py +++ b/youtube_dl/extractor/noco.py @@ -2,6 +2,8 @@  from __future__ import unicode_literals  import re +import time +import hashlib  from .common import InfoExtractor  from ..utils import ( @@ -17,6 +19,7 @@ from ..utils import (  class NocoIE(InfoExtractor):      _VALID_URL = r'http://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)'      _LOGIN_URL = 'http://noco.tv/do.php' +    _API_URL_TEMPLATE = 'https://api.noco.tv/1.1/%s?ts=%s&tk=%s'      _NETRC_MACHINE = 'noco'      _TEST = { @@ -55,33 +58,52 @@ class NocoIE(InfoExtractor):          login = self._download_json(request, None, 'Logging in as %s' % username)          if 'erreur' in login: -            raise  ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True) +            raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True) + +    def _call_api(self, path, video_id, note): +        ts = compat_str(int(time.time() * 1000)) +        tk = hashlib.md5((hashlib.md5(ts.encode('ascii')).hexdigest() + '#8S?uCraTedap6a').encode('ascii')).hexdigest() +        url = self._API_URL_TEMPLATE % (path, ts, tk) + +        resp = self._download_json(url, video_id, note) + +        if isinstance(resp, dict) and resp.get('error'): +            self._raise_error(resp['error'], resp['description']) + +        return resp + +    def _raise_error(self, error, description): +        raise ExtractorError( +            '%s returned error: %s - %s' % (self.IE_NAME, error, description), +            expected=True)      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url)          video_id = mobj.group('id') -        medias = self._download_json( -            'https://api.noco.tv/1.0/video/medias/%s' % video_id, video_id, 'Downloading video JSON') +        medias = self._call_api( +            'shows/%s/medias' % video_id, +            video_id, 'Downloading video JSON') + +        qualities = self._call_api( +            'qualities', +            video_id, 'Downloading qualities JSON')          formats = [] -        for fmt in medias['fr']['video_list']['default']['quality_list']: -            format_id = fmt['quality_key'] +        for format_id, fmt in medias['fr']['video_list']['none']['quality_list'].items(): -            file = self._download_json( -                'https://api.noco.tv/1.0/video/file/%s/fr/%s' % (format_id.lower(), video_id), +            video = self._call_api( +                'shows/%s/video/%s/fr' % (video_id, format_id.lower()),                  video_id, 'Downloading %s video JSON' % format_id) -            file_url = file['file'] +            file_url = video['file']              if not file_url:                  continue -            if file_url == 'forbidden': -                raise ExtractorError( -                    '%s returned error: %s - %s' % ( -                        self.IE_NAME, file['popmessage']['title'], file['popmessage']['message']), -                    expected=True) +            if file_url in ['forbidden', 'not found']: +                popmessage = video['popmessage'] +                self._raise_error(popmessage['title'], popmessage['message'])              formats.append({                  'url': file_url, @@ -91,20 +113,31 @@ class NocoIE(InfoExtractor):                  'abr': fmt['audiobitrate'],                  'vbr': fmt['videobitrate'],                  'filesize': fmt['filesize'], -                'format_note': fmt['quality_name'], -                'preference': fmt['priority'], +                'format_note': qualities[format_id]['quality_name'], +                'preference': qualities[format_id]['priority'],              })          self._sort_formats(formats) -        show = self._download_json( -            'https://api.noco.tv/1.0/shows/show/%s' % video_id, video_id, 'Downloading show JSON')[0] +        show = self._call_api( +            'shows/by_id/%s' % video_id, +            video_id, 'Downloading show JSON')[0] -        upload_date = unified_strdate(show['indexed']) +        upload_date = unified_strdate(show['online_date_start_utc'])          uploader = show['partner_name']          uploader_id = show['partner_key']          duration = show['duration_ms'] / 1000.0 -        thumbnail = show['screenshot'] + +        thumbnails = [] +        for thumbnail_key, thumbnail_url in show.items(): +            m = re.search(r'^screenshot_(?P<width>\d+)x(?P<height>\d+)$', thumbnail_key) +            if not m: +                continue +            thumbnails.append({ +                'url': thumbnail_url, +                'width': int(m.group('width')), +                'height': int(m.group('height')), +            })          episode = show.get('show_TT') or show.get('show_OT')          family = show.get('family_TT') or show.get('family_OT') @@ -124,7 +157,7 @@ class NocoIE(InfoExtractor):              'id': video_id,              'title': title,              'description': description, -            'thumbnail': thumbnail, +            'thumbnails': thumbnails,              'upload_date': upload_date,              'uploader': uploader,              'uploader_id': uploader_id, diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 7a154e94a..f36d446d2 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -7,6 +7,7 @@ from ..utils import (      unified_strdate,      parse_duration,      qualities, +    url_basename,  ) @@ -55,7 +56,9 @@ class NPOIE(InfoExtractor):      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url)          video_id = mobj.group('id') +        return self._get_info(video_id) +    def _get_info(self, video_id):          metadata = self._download_json(              'http://e.omroep.nl/metadata/aflevering/%s' % video_id,              video_id, @@ -106,3 +109,30 @@ class NPOIE(InfoExtractor):              'duration': parse_duration(metadata.get('tijdsduur')),              'formats': formats,          } + + +class TegenlichtVproIE(NPOIE): +    IE_NAME = 'tegenlicht.vpro.nl' +    _VALID_URL = r'https?://tegenlicht\.vpro\.nl/afleveringen/.*?' + +    _TESTS = [ +        { +            'url': 'http://tegenlicht.vpro.nl/afleveringen/2012-2013/de-toekomst-komt-uit-afrika.html', +            'md5': 'f8065e4e5a7824068ed3c7e783178f2c', +            'info_dict': { +                'id': 'VPWON_1169289', +                'ext': 'm4v', +                'title': 'Tegenlicht', +                'description': 'md5:d6476bceb17a8c103c76c3b708f05dd1', +                'upload_date': '20130225', +            }, +        }, +    ] + +    def _real_extract(self, url): +        name = url_basename(url) +        webpage = self._download_webpage(url, name) +        urn = self._html_search_meta('mediaurn', webpage) +        info_page = self._download_json( +            'http://rs.vpro.nl/v2/api/media/%s.json' % urn, name) +        return self._get_info(info_page['mid']) diff --git a/youtube_dl/extractor/playfm.py b/youtube_dl/extractor/playfm.py index 72df4d842..ebc046804 100644 --- a/youtube_dl/extractor/playfm.py +++ b/youtube_dl/extractor/playfm.py @@ -10,6 +10,7 @@ from ..utils import (      ExtractorError,      float_or_none,      int_or_none, +    str_to_int,  ) @@ -29,6 +30,7 @@ class PlayFMIE(InfoExtractor):              'duration': 5627.428,              'upload_date': '20140712',              'view_count': int, +            'comment_count': int,              'thumbnail': 're:^https?://.*\.jpg$',          },      } @@ -51,7 +53,8 @@ class PlayFMIE(InfoExtractor):          recording = rec_doc.find('./recording')          title = recording.find('./title').text -        view_count = int_or_none(recording.find('./stats/playcount').text) +        view_count = str_to_int(recording.find('./stats/playcount').text) +        comment_count = str_to_int(recording.find('./stats/comments').text)          duration = float_or_none(recording.find('./duration').text, scale=1000)          thumbnail = recording.find('./image').text @@ -75,6 +78,7 @@ class PlayFMIE(InfoExtractor):              'title': title,              'upload_date': upload_date,              'view_count': view_count, +            'comment_count': comment_count,              'duration': duration,              'thumbnail': thumbnail,              'uploader': uploader, diff --git a/youtube_dl/extractor/sbs.py b/youtube_dl/extractor/sbs.py index 34058fd4b..409f8540a 100644 --- a/youtube_dl/extractor/sbs.py +++ b/youtube_dl/extractor/sbs.py @@ -12,7 +12,7 @@ from ..utils import (  class SBSIE(InfoExtractor):      IE_DESC = 'sbs.com.au' -    _VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/ondemand/video/single/(?P<id>[0-9]+)/' +    _VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/ondemand/video/(?:single/)?(?P<id>[0-9]+)'      _TESTS = [{          # Original URL is handled by the generic IE which finds the iframe: @@ -21,12 +21,16 @@ class SBSIE(InfoExtractor):          'md5': '3150cf278965eeabb5b4cea1c963fe0a',          'info_dict': {              'id': '320403011771', -            'ext': 'flv', +            'ext': 'mp4',              'title': 'Dingo Conservation',              'description': 'Dingoes are on the brink of extinction; most of the animals we think are dingoes are in fact crossbred with wild dogs. This family run a dingo conservation park to prevent their extinction',              'thumbnail': 're:http://.*\.jpg',          },          'add_ies': ['generic'], +    }, +    { +        'url': 'http://www.sbs.com.au/ondemand/video/320403011771/Dingo-Conservation-The-Feed', +        'only_matching': True,      }]      def _real_extract(self, url): diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index b6b2dba9c..0be793b1c 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -5,6 +5,7 @@ import json  from .common import InfoExtractor  from ..utils import ( +    compat_str,      ExtractorError,      xpath_with_ns,  ) @@ -55,36 +56,44 @@ class ThePlatformIE(InfoExtractor):          body = meta.find(_x('smil:body'))          f4m_node = body.find(_x('smil:seq//smil:video')) -        if f4m_node is not None: +        if f4m_node is not None and '.f4m' in f4m_node.attrib['src']:              f4m_url = f4m_node.attrib['src']              if 'manifest.f4m?' not in f4m_url:                  f4m_url += '?'              # the parameters are from syfy.com, other sites may use others,              # they also work for nbc.com              f4m_url += '&g=UXWGVKRWHFSP&hdcore=3.0.3' -            formats = [{ -                'ext': 'flv', -                'url': f4m_url, -            }] +            formats = self._extract_f4m_formats(f4m_url, video_id)          else: -            base_url = head.find(_x('smil:meta')).attrib['base'] -            switch = body.find(_x('smil:switch'))              formats = [] -            for f in switch.findall(_x('smil:video')): -                attr = f.attrib -                width = int(attr['width']) -                height = int(attr['height']) -                vbr = int(attr['system-bitrate']) // 1000 -                format_id = '%dx%d_%dk' % (width, height, vbr) -                formats.append({ -                    'format_id': format_id, -                    'url': base_url, -                    'play_path': 'mp4:' + attr['src'], -                    'ext': 'flv', -                    'width': width, -                    'height': height, -                    'vbr': vbr, -                }) +            switch = body.find(_x('smil:switch')) +            if switch is not None: +                base_url = head.find(_x('smil:meta')).attrib['base'] +                for f in switch.findall(_x('smil:video')): +                    attr = f.attrib +                    width = int(attr['width']) +                    height = int(attr['height']) +                    vbr = int(attr['system-bitrate']) // 1000 +                    format_id = '%dx%d_%dk' % (width, height, vbr) +                    formats.append({ +                        'format_id': format_id, +                        'url': base_url, +                        'play_path': 'mp4:' + attr['src'], +                        'ext': 'flv', +                        'width': width, +                        'height': height, +                        'vbr': vbr, +                    }) +            else: +                switch = body.find(_x('smil:seq//smil:switch')) +                for f in switch.findall(_x('smil:video')): +                    attr = f.attrib +                    vbr = int(attr['system-bitrate']) // 1000 +                    formats.append({ +                        'format_id': compat_str(vbr), +                        'url': attr['src'], +                        'vbr': vbr, +                    })              self._sort_formats(formats)          return { diff --git a/youtube_dl/extractor/thvideo.py b/youtube_dl/extractor/thvideo.py new file mode 100644 index 000000000..607e947bb --- /dev/null +++ b/youtube_dl/extractor/thvideo.py @@ -0,0 +1,59 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( +    unified_strdate +) + + +class THVideoIE(InfoExtractor): +    _VALID_URL = r'http://(?:www\.)?thvideo\.tv/(?:v/th|mobile\.php\?cid=)(?P<id>[0-9]+)' +    _TEST = { +        'url': 'http://thvideo.tv/v/th1987/', +        'md5': 'fa107b1f73817e325e9433505a70db50', +        'info_dict': { +            'id': '1987', +            'ext': 'mp4', +            'title': '【动画】秘封活动记录 ~ The Sealed Esoteric History.分镜稿预览', +            'display_id': 'th1987', +            'thumbnail': 'http://thvideo.tv/uploadfile/2014/0722/20140722013459856.jpg', +            'description': '社团京都幻想剧团的第一个东方二次同人动画作品「秘封活动记录 ~ The Sealed Esoteric History.」 本视频是该动画第一期的分镜草稿...', +            'upload_date': '20140722' +        } +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') + +        # extract download link from mobile player page +        webpage_player = self._download_webpage( +            'http://thvideo.tv/mobile.php?cid=%s-0' % (video_id), +            video_id, note='Downloading video source page') +        video_url = self._html_search_regex( +            r'<source src="(.*?)" type', webpage_player, 'video url') + +        # extract video info from main page +        webpage = self._download_webpage( +            'http://thvideo.tv/v/th%s' % (video_id), video_id) +        title = self._og_search_title(webpage) +        display_id = 'th%s' % video_id +        thumbnail = self._og_search_thumbnail(webpage) +        description = self._og_search_description(webpage) +        upload_date = unified_strdate(self._html_search_regex( +            r'span itemprop="datePublished" content="(.*?)">', webpage, +            'upload date', fatal=False)) + +        return { +            'id': video_id, +            'ext': 'mp4', +            'url': video_url, +            'title': title, +            'display_id': display_id, +            'thumbnail': thumbnail, +            'description': description, +            'upload_date': upload_date +        } diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py index 08a48c05a..64a1e9030 100644 --- a/youtube_dl/extractor/tube8.py +++ b/youtube_dl/extractor/tube8.py @@ -14,27 +14,35 @@ from ..aes import aes_decrypt_text  class Tube8IE(InfoExtractor): -    _VALID_URL = r'https?://(?:www\.)?tube8\.com/(?:[^/]+/){2}(?P<id>\d+)' -    _TEST = { -        'url': 'http://www.tube8.com/teen/kasia-music-video/229795/', -        'md5': '44bf12b98313827dd52d35b8706a4ea0', -        'info_dict': { -            'id': '229795', -            'ext': 'mp4', -            'description': 'hot teen Kasia grinding', -            'uploader': 'unknown', -            'title': 'Kasia music video', -            'age_limit': 18, -        } -    } +    _VALID_URL = r'https?://(?:www\.)?tube8\.com/(?:[^/]+/)+(?P<display_id>[^/]+)/(?P<id>\d+)' +    _TESTS = [ +        { +            'url': 'http://www.tube8.com/teen/kasia-music-video/229795/', +            'md5': '44bf12b98313827dd52d35b8706a4ea0', +            'info_dict': { +                'id': '229795', +                'display_id': 'kasia-music-video', +                'ext': 'mp4', +                'description': 'hot teen Kasia grinding', +                'uploader': 'unknown', +                'title': 'Kasia music video', +                'age_limit': 18, +            } +        }, +        { +            'url': 'http://www.tube8.com/shemale/teen/blonde-cd-gets-kidnapped-by-two-blacks-and-punished-for-being-a-slutty-girl/19569151/', +            'only_matching': True, +        }, +    ]      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url)          video_id = mobj.group('id') +        display_id = mobj.group('display_id')          req = compat_urllib_request.Request(url)          req.add_header('Cookie', 'age_verified=1') -        webpage = self._download_webpage(req, video_id) +        webpage = self._download_webpage(req, display_id)          flashvars = json.loads(self._html_search_regex(              r'var flashvars\s*=\s*({.+?})', webpage, 'flashvars')) @@ -70,6 +78,7 @@ class Tube8IE(InfoExtractor):          return {              'id': video_id, +            'display_id': display_id,              'url': video_url,              'title': title,              'description': description, diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index d2ffd1b6b..ebab8b86c 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -6,6 +6,7 @@ import xml.etree.ElementTree  from .common import InfoExtractor  from ..utils import (      compat_HTTPError, +    compat_urllib_request,      ExtractorError,  ) @@ -24,7 +25,7 @@ class VevoIE(InfoExtractor):      _TESTS = [{          'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280', -        "md5": "06bea460acb744eab74a9d7dcb4bfd61", +        "md5": "95ee28ee45e70130e3ab02b0f579ae23",          'info_dict': {              'id': 'GB1101300280',              'ext': 'mp4', @@ -40,7 +41,7 @@ class VevoIE(InfoExtractor):      }, {          'note': 'v3 SMIL format',          'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923', -        'md5': '893ec0e0d4426a1d96c01de8f2bdff58', +        'md5': 'f6ab09b034f8c22969020b042e5ac7fc',          'info_dict': {              'id': 'USUV71302923',              'ext': 'mp4', @@ -69,6 +70,21 @@ class VevoIE(InfoExtractor):      }]      _SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/' +    def _real_initialize(self): +        req = compat_urllib_request.Request( +            'http://www.vevo.com/auth', data=b'') +        webpage = self._download_webpage( +            req, None, +            note='Retrieving oauth token', +            errnote='Unable to retrieve oauth token', +            fatal=False) +        if webpage is False: +            self._oauth_token = None +        else: +            self._oauth_token = self._search_regex( +                r'access_token":\s*"([^"]+)"', +                webpage, 'access token', fatal=False) +      def _formats_from_json(self, video_info):          last_version = {'version': -1}          for version in video_info['videoVersions']: @@ -129,6 +145,26 @@ class VevoIE(InfoExtractor):              })          return formats +    def _download_api_formats(self, video_id): +        if not self._oauth_token: +            self._downloader.report_warning( +                'No oauth token available, skipping API HLS download') +            return [] + +        api_url = 'https://apiv2.vevo.com/video/%s/streams/hls?token=%s' % ( +            video_id, self._oauth_token) +        api_data = self._download_json( +            api_url, video_id, +            note='Downloading HLS formats', +            errnote='Failed to download HLS format list', fatal=False) +        if api_data is None: +            return [] + +        m3u8_url = api_data[0]['url'] +        return self._extract_m3u8_formats( +            m3u8_url, video_id, entry_protocol='m3u8_native', ext='mp4', +            preference=0) +      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url)          video_id = mobj.group('id') @@ -152,30 +188,8 @@ class VevoIE(InfoExtractor):          else:              age_limit = None -        # Download SMIL -        smil_blocks = sorted(( -            f for f in video_info['videoVersions'] -            if f['sourceType'] == 13), -            key=lambda f: f['version']) - -        smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % ( -            self._SMIL_BASE_URL, video_id, video_id.lower()) -        if smil_blocks: -            smil_url_m = self._search_regex( -                r'url="([^"]+)"', smil_blocks[-1]['data'], 'SMIL URL', -                fatal=False) -            if smil_url_m is not None: -                smil_url = smil_url_m - -        try: -            smil_xml = self._download_webpage(smil_url, video_id, -                                              'Downloading SMIL info') -            formats.extend(self._formats_from_smil(smil_xml)) -        except ExtractorError as ee: -            if not isinstance(ee.cause, compat_HTTPError): -                raise -            self._downloader.report_warning( -                'Cannot download SMIL information, falling back to JSON ..') +        # Download via HLS API +        formats.extend(self._download_api_formats(video_id))          self._sort_formats(formats)          timestamp_ms = int(self._search_regex( diff --git a/youtube_dl/extractor/wat.py b/youtube_dl/extractor/wat.py index 46b4d9133..268e2f618 100644 --- a/youtube_dl/extractor/wat.py +++ b/youtube_dl/extractor/wat.py @@ -5,7 +5,10 @@ import re  import hashlib  from .common import InfoExtractor -from ..utils import unified_strdate +from ..utils import ( +    ExtractorError, +    unified_strdate, +)  class WatIE(InfoExtractor): @@ -57,6 +60,11 @@ class WatIE(InfoExtractor):          video_info = self.download_video_info(real_id) +        error_desc = video_info.get('error_desc') +        if error_desc: +            raise ExtractorError( +                '%s returned error: %s' % (self.IE_NAME, error_desc), expected=True) +          geo_list = video_info.get('geoList')          country = geo_list[0] if geo_list else '' diff --git a/youtube_dl/extractor/yourupload.py b/youtube_dl/extractor/yourupload.py new file mode 100644 index 000000000..40fc4165f --- /dev/null +++ b/youtube_dl/extractor/yourupload.py @@ -0,0 +1,58 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class YourUploadIE(InfoExtractor): +    _VALID_URL = r'''(?x)https?://(?:www\.)? +        (?:yourupload\.com/watch| +           embed\.yourupload\.com| +           embed\.yucache\.net +        )/(?P<id>[A-Za-z0-9]+) +        ''' +    _TESTS = [ +        { +            'url': 'http://yourupload.com/watch/14i14h', +            'md5': 'bf5c2f95c4c917536e80936af7bc51e1', +            'info_dict': { +                'id': '14i14h', +                'ext': 'mp4', +                'title': 'BigBuckBunny_320x180.mp4', +                'thumbnail': 're:^https?://.*\.jpe?g', +            } +        }, +        { +            'url': 'http://embed.yourupload.com/14i14h', +            'only_matching': True, +        }, +        { +            'url': 'http://embed.yucache.net/14i14h?client_file_id=803349', +            'only_matching': True, +        }, +    ] + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') + +        url = 'http://embed.yucache.net/{0:}'.format(video_id) +        webpage = self._download_webpage(url, video_id) + +        title = self._og_search_title(webpage) +        thumbnail = self._og_search_thumbnail(webpage) +        url = self._og_search_video_url(webpage) + +        formats = [{ +            'format_id': 'sd', +            'url': url, +        }] + +        return { +            'id': video_id, +            'title': title, +            'formats': formats, +            'thumbnail': thumbnail, +        } diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index b54c69122..99198e380 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -46,7 +46,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):      def _set_language(self):          return bool(self._download_webpage(              self._LANG_URL, None, -            note=u'Setting language', errnote='unable to set language', +            note='Setting language', errnote='unable to set language',              fatal=False))      def _login(self): @@ -61,13 +61,13 @@ class YoutubeBaseInfoExtractor(InfoExtractor):          # No authentication to be performed          if username is None:              if self._LOGIN_REQUIRED: -                raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True) +                raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)              return True          login_page = self._download_webpage(              self._LOGIN_URL, None, -            note=u'Downloading login page', -            errnote=u'unable to fetch login page', fatal=False) +            note='Downloading login page', +            errnote='unable to fetch login page', fatal=False)          if login_page is False:              return @@ -105,12 +105,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):          req = compat_urllib_request.Request(self._LOGIN_URL, login_data)          login_results = self._download_webpage(              req, None, -            note=u'Logging in', errnote=u'unable to log in', fatal=False) +            note='Logging in', errnote='unable to log in', fatal=False)          if login_results is False:              return False          if re.search(r'id="errormsg_0_Passwd"', login_results) is not None: -            raise ExtractorError(u'Please use your account password and a two-factor code instead of an application-specific password.', expected=True) +            raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)          # Two-Factor          # TODO add SMS and phone call support - these require making a request and then prompting the user @@ -119,19 +119,19 @@ class YoutubeBaseInfoExtractor(InfoExtractor):              tfa_code = self._get_tfa_info()              if tfa_code is None: -                self._downloader.report_warning(u'Two-factor authentication required. Provide it with --twofactor <code>') -                self._downloader.report_warning(u'(Note that only TOTP (Google Authenticator App) codes work at this time.)') +                self._downloader.report_warning('Two-factor authentication required. Provide it with --twofactor <code>') +                self._downloader.report_warning('(Note that only TOTP (Google Authenticator App) codes work at this time.)')                  return False              # Unlike the first login form, secTok and timeStmp are both required for the TFA form              match = re.search(r'id="secTok"\n\s+value=\'(.+)\'/>', login_results, re.M | re.U)              if match is None: -                self._downloader.report_warning(u'Failed to get secTok - did the page structure change?') +                self._downloader.report_warning('Failed to get secTok - did the page structure change?')              secTok = match.group(1)              match = re.search(r'id="timeStmp"\n\s+value=\'(.+)\'/>', login_results, re.M | re.U)              if match is None: -                self._downloader.report_warning(u'Failed to get timeStmp - did the page structure change?') +                self._downloader.report_warning('Failed to get timeStmp - did the page structure change?')              timeStmp = match.group(1)              tfa_form_strs = { @@ -155,23 +155,23 @@ class YoutubeBaseInfoExtractor(InfoExtractor):              tfa_req = compat_urllib_request.Request(self._TWOFACTOR_URL, tfa_data)              tfa_results = self._download_webpage(                  tfa_req, None, -                note=u'Submitting TFA code', errnote=u'unable to submit tfa', fatal=False) +                note='Submitting TFA code', errnote='unable to submit tfa', fatal=False)              if tfa_results is False:                  return False              if re.search(r'(?i)<form[^>]* id="gaia_secondfactorform"', tfa_results) is not None: -                self._downloader.report_warning(u'Two-factor code expired. Please try again, or use a one-use backup code instead.') +                self._downloader.report_warning('Two-factor code expired. Please try again, or use a one-use backup code instead.')                  return False              if re.search(r'(?i)<form[^>]* id="gaia_loginform"', tfa_results) is not None: -                self._downloader.report_warning(u'unable to log in - did the page structure change?') +                self._downloader.report_warning('unable to log in - did the page structure change?')                  return False              if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None: -                self._downloader.report_warning(u'Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.') +                self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')                  return False          if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None: -            self._downloader.report_warning(u'unable to log in: bad username or password') +            self._downloader.report_warning('unable to log in: bad username or password')              return False          return True @@ -185,7 +185,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):          self._download_webpage(              req, None, -            note=u'Confirming age', errnote=u'Unable to confirm age') +            note='Confirming age', errnote='Unable to confirm age')          return True      def _real_initialize(self): @@ -211,7 +211,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):                              youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains                           (?:.*?\#/)?                                          # handle anchor (#/) redirect urls                           (?:                                                  # the various things that can precede the ID: -                             (?:(?:v|embed|e)/)                               # v/ or embed/ or e/ +                             (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/                               |(?:                                             # or the v= param in all its forms                                   (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)                                   (?:\?|\#!?)                                  # the params delimiter ? or # or #! @@ -307,69 +307,74 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):      IE_NAME = 'youtube'      _TESTS = [          { -            u"url":  u"http://www.youtube.com/watch?v=BaW_jenozKc", -            u"file":  u"BaW_jenozKc.mp4", -            u"info_dict": { -                u"title": u"youtube-dl test video \"'/\\ä↭𝕐", -                u"uploader": u"Philipp Hagemeister", -                u"uploader_id": u"phihag", -                u"upload_date": u"20121002", -                u"description": u"test chars:  \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .", -                u"categories": [u'Science & Technology'], +            'url': 'http://www.youtube.com/watch?v=BaW_jenozKc', +            'info_dict': { +                'id': 'BaW_jenozKc', +                'ext': 'mp4', +                'title': 'youtube-dl test video "\'/\\ä↭𝕐', +                'uploader': 'Philipp Hagemeister', +                'uploader_id': 'phihag', +                'upload_date': '20121002', +                'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .', +                'categories': ['Science & Technology'],                  'like_count': int,                  'dislike_count': int,              }          },          { -            u"url":  u"http://www.youtube.com/watch?v=UxxajLWwzqY", -            u"file":  u"UxxajLWwzqY.mp4", -            u"note": u"Test generic use_cipher_signature video (#897)", -            u"info_dict": { -                u"upload_date": u"20120506", -                u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]", -                u"description": u"md5:fea86fda2d5a5784273df5c7cc994d9f", -                u"uploader": u"Icona Pop", -                u"uploader_id": u"IconaPop" +            'url': 'http://www.youtube.com/watch?v=UxxajLWwzqY', +            'note': 'Test generic use_cipher_signature video (#897)', +            'info_dict': { +                'id': 'UxxajLWwzqY', +                'ext': 'mp4', +                'upload_date': '20120506', +                'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]', +                'description': 'md5:fea86fda2d5a5784273df5c7cc994d9f', +                'uploader': 'Icona Pop', +                'uploader_id': 'IconaPop',              }          },          { -            u"url":  u"https://www.youtube.com/watch?v=07FYdnEawAQ", -            u"file":  u"07FYdnEawAQ.mp4", -            u"note": u"Test VEVO video with age protection (#956)", -            u"info_dict": { -                u"upload_date": u"20130703", -                u"title": u"Justin Timberlake - Tunnel Vision (Explicit)", -                u"description": u"md5:64249768eec3bc4276236606ea996373", -                u"uploader": u"justintimberlakeVEVO", -                u"uploader_id": u"justintimberlakeVEVO" +            'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ', +            'note': 'Test VEVO video with age protection (#956)', +            'info_dict': { +                'id': '07FYdnEawAQ', +                'ext': 'mp4', +                'upload_date': '20130703', +                'title': 'Justin Timberlake - Tunnel Vision (Explicit)', +                'description': 'md5:64249768eec3bc4276236606ea996373', +                'uploader': 'justintimberlakeVEVO', +                'uploader_id': 'justintimberlakeVEVO',              }          },          { -            u"url":  u"//www.YouTube.com/watch?v=yZIXLfi8CZQ", -            u"file":  u"yZIXLfi8CZQ.mp4", -            u"note": u"Embed-only video (#1746)", -            u"info_dict": { -                u"upload_date": u"20120608", -                u"title": u"Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012", -                u"description": u"md5:09b78bd971f1e3e289601dfba15ca4f7", -                u"uploader": u"SET India", -                u"uploader_id": u"setindia" +            'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ', +            'note': 'Embed-only video (#1746)', +            'info_dict': { +                'id': 'yZIXLfi8CZQ', +                'ext': 'mp4', +                'upload_date': '20120608', +                'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012', +                'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7', +                'uploader': 'SET India', +                'uploader_id': 'setindia'              }          },          { -            u"url": u"http://www.youtube.com/watch?v=a9LDPn-MO4I", -            u"file": u"a9LDPn-MO4I.m4a", -            u"note": u"256k DASH audio (format 141) via DASH manifest", -            u"info_dict": { -                u"upload_date": "20121002", -                u"uploader_id": "8KVIDEO", -                u"description": '', -                u"uploader": "8KVIDEO", -                u"title": "UHDTV TEST 8K VIDEO.mp4" +            'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I', +            'note': '256k DASH audio (format 141) via DASH manifest', +            'info_dict': { +                'id': 'a9LDPn-MO4I', +                'ext': 'm4a', +                'upload_date': '20121002', +                'uploader_id': '8KVIDEO', +                'description': '', +                'uploader': '8KVIDEO', +                'title': 'UHDTV TEST 8K VIDEO.mp4'              }, -            u"params": { -                u"youtube_include_dash_manifest": True, -                u"format": "141", +            'params': { +                'youtube_include_dash_manifest': True, +                'format': '141',              },          },          # DASH manifest with encrypted signature @@ -384,7 +389,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):                  'uploader_id': 'AfrojackVEVO',                  'upload_date': '20131011',              }, -            u"params": { +            'params': {                  'youtube_include_dash_manifest': True,                  'format': '141',              }, @@ -397,19 +402,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):      def report_video_info_webpage_download(self, video_id):          """Report attempt to download video info webpage.""" -        self.to_screen(u'%s: Downloading video info webpage' % video_id) +        self.to_screen('%s: Downloading video info webpage' % video_id)      def report_information_extraction(self, video_id):          """Report attempt to extract video information.""" -        self.to_screen(u'%s: Extracting video information' % video_id) +        self.to_screen('%s: Extracting video information' % video_id)      def report_unavailable_format(self, video_id, format):          """Report extracted video URL.""" -        self.to_screen(u'%s: Format %s not available' % (video_id, format)) +        self.to_screen('%s: Format %s not available' % (video_id, format))      def report_rtmp_download(self):          """Indicate the download will use the RTMP protocol.""" -        self.to_screen(u'RTMP download detected') +        self.to_screen('RTMP download detected')      def _signature_cache_id(self, example_sig):          """ Return a string representation of a signature """ @@ -429,21 +434,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):              player_type, player_id, self._signature_cache_id(example_sig))          assert os.path.basename(func_id) == func_id -        cache_spec = self._downloader.cache.load(u'youtube-sigfuncs', func_id) +        cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)          if cache_spec is not None:              return lambda s: ''.join(s[i] for i in cache_spec)          if player_type == 'js':              code = self._download_webpage(                  player_url, video_id, -                note=u'Downloading %s player %s' % (player_type, player_id), -                errnote=u'Download of %s failed' % player_url) +                note='Downloading %s player %s' % (player_type, player_id), +                errnote='Download of %s failed' % player_url)              res = self._parse_sig_js(code)          elif player_type == 'swf':              urlh = self._request_webpage(                  player_url, video_id, -                note=u'Downloading %s player %s' % (player_type, player_id), -                errnote=u'Download of %s failed' % player_url) +                note='Downloading %s player %s' % (player_type, player_id), +                errnote='Download of %s failed' % player_url)              code = urlh.read()              res = self._parse_sig_swf(code)          else: @@ -454,15 +459,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):              cache_res = res(test_string)              cache_spec = [ord(c) for c in cache_res] -        self._downloader.cache.store(u'youtube-sigfuncs', func_id, cache_spec) +        self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)          return res      def _print_sig_code(self, func, example_sig):          def gen_sig_code(idxs):              def _genslice(start, end, step):                  starts = '' if start == 0 else str(start) -                ends = (u':%d' % (end+step)) if end + step >= 0 else ':' -                steps = '' if step == 1 else (u':%d' % step) +                ends = (':%d' % (end+step)) if end + step >= 0 else ':' +                steps = '' if step == 1 else (':%d' % step)                  return 's[%s%s%s]' % (starts, ends, steps)              step = None @@ -492,9 +497,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):          expr_code = ' + '.join(gen_sig_code(cache_spec))          signature_id_tuple = '(%s)' % (              ', '.join(compat_str(len(p)) for p in example_sig.split('.'))) -        code = (u'if tuple(len(p) for p in s.split(\'.\')) == %s:\n' +        code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'                  '    return %s\n') % (signature_id_tuple, expr_code) -        self.to_screen(u'Extracted signature function:\n' + code) +        self.to_screen('Extracted signature function:\n' + code)      def _parse_sig_js(self, jscode):          funcname = self._search_regex( @@ -516,9 +521,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):          """Turn the encrypted s field into a working signature"""          if player_url is None: -            raise ExtractorError(u'Cannot decrypt signature without player_url') +            raise ExtractorError('Cannot decrypt signature without player_url') -        if player_url.startswith(u'//'): +        if player_url.startswith('//'):              player_url = 'https:' + player_url          try:              player_id = (player_url, self._signature_cache_id(s)) @@ -542,7 +547,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):                  'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,                  video_id, note=False)          except ExtractorError as err: -            self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err)) +            self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))              return {}          lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list) @@ -560,7 +565,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):              url = 'https://www.youtube.com/api/timedtext?' + params              sub_lang_list[lang] = url          if not sub_lang_list: -            self._downloader.report_warning(u'video doesn\'t have subtitles') +            self._downloader.report_warning('video doesn\'t have subtitles')              return {}          return sub_lang_list @@ -568,7 +573,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):          """We need the webpage for getting the captions url, pass it as an             argument to speed up the process."""          sub_format = self._downloader.params.get('subtitlesformat', 'srt') -        self.to_screen(u'%s: Looking for automatic captions' % video_id) +        self.to_screen('%s: Looking for automatic captions' % video_id)          mobj = re.search(r';ytplayer.config = ({.*?});', webpage)          err_msg = 'Couldn\'t find automatic captions for %s' % video_id          if mobj is None: @@ -589,7 +594,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):              caption_list = self._download_xml(list_url, video_id)              original_lang_node = caption_list.find('track')              if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' : -                self._downloader.report_warning(u'Video doesn\'t have automatic captions') +                self._downloader.report_warning('Video doesn\'t have automatic captions')                  return {}              original_lang = original_lang_node.attrib['lang_code'] @@ -615,7 +620,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):      def extract_id(cls, url):          mobj = re.match(cls._VALID_URL, url, re.VERBOSE)          if mobj is None: -            raise ExtractorError(u'Invalid URL: %s' % url) +            raise ExtractorError('Invalid URL: %s' % url)          video_id = mobj.group(2)          return video_id @@ -635,7 +640,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):      def _extract_annotations(self, video_id):          url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id -        return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.') +        return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')      def _real_extract(self, url):          proto = ( @@ -705,14 +710,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):          # Check for "rental" videos          if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info: -            raise ExtractorError(u'"rental" videos not supported') +            raise ExtractorError('"rental" videos not supported')          # Start extracting information          self.report_information_extraction(video_id)          # uploader          if 'author' not in video_info: -            raise ExtractorError(u'Unable to extract uploader name') +            raise ExtractorError('Unable to extract uploader name')          video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])          # uploader_id @@ -721,13 +726,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):          if mobj is not None:              video_uploader_id = mobj.group(1)          else: -            self._downloader.report_warning(u'unable to extract uploader nickname') +            self._downloader.report_warning('unable to extract uploader nickname')          # title          if 'title' in video_info:              video_title = video_info['title'][0]          else: -            self._downloader.report_warning(u'Unable to extract video title') +            self._downloader.report_warning('Unable to extract video title')              video_title = '_'          # thumbnail image @@ -737,7 +742,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):          if m_thumb is not None:              video_thumbnail = m_thumb.group(1)          elif 'thumbnail_url' not in video_info: -            self._downloader.report_warning(u'unable to extract video thumbnail') +            self._downloader.report_warning('unable to extract video thumbnail')              video_thumbnail = None          else:   # don't panic if we can't find it              video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0]) @@ -791,8 +796,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):              if count is not None:                  return int(count.replace(',', ''))              return None -        like_count = _extract_count(u'like') -        dislike_count = _extract_count(u'dislike') +        like_count = _extract_count('like') +        dislike_count = _extract_count('dislike')          # subtitles          video_subtitles = self.extract_subtitles(video_id, video_webpage) @@ -802,7 +807,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):              return          if 'length_seconds' not in video_info: -            self._downloader.report_warning(u'unable to extract video duration') +            self._downloader.report_warning('unable to extract video duration')              video_duration = None          else:              video_duration = int(compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])) @@ -823,11 +828,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):              # Easy way to know if the 's' value is in url_encoded_fmt_stream_map              # this signatures are encrypted              if 'url_encoded_fmt_stream_map' not in args: -                raise ValueError(u'No stream_map present')  # caught below +                raise ValueError('No stream_map present')  # caught below              re_signature = re.compile(r'[&,]s=')              m_s = re_signature.search(args['url_encoded_fmt_stream_map'])              if m_s is not None: -                self.to_screen(u'%s: Encrypted signatures detected.' % video_id) +                self.to_screen('%s: Encrypted signatures detected.' % video_id)                  video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]              m_s = re_signature.search(args.get('adaptive_fmts', ''))              if m_s is not None: @@ -905,7 +910,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):                                  player_desc = 'html5 player %s' % player_version                          parts_sizes = self._signature_cache_id(encrypted_sig) -                        self.to_screen(u'{%s} signature length %s, %s' % +                        self.to_screen('{%s} signature length %s, %s' %                              (format_id, parts_sizes, player_desc))                      signature = self._decrypt_signature( @@ -920,7 +925,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):              url_map = self._extract_from_m3u8(manifest_url, video_id)              formats = _map_to_format_list(url_map)          else: -            raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info') +            raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')          # Look for the DASH manifest          if (self._downloader.params.get('youtube_include_dash_manifest', False)): @@ -941,9 +946,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):                  dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url)                  dash_doc = self._download_xml(                      dash_manifest_url, video_id, -                    note=u'Downloading DASH manifest', -                    errnote=u'Could not download DASH manifest') -                for r in dash_doc.findall(u'.//{urn:mpeg:DASH:schema:MPD:2011}Representation'): +                    note='Downloading DASH manifest', +                    errnote='Could not download DASH manifest') +                for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):                      url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')                      if url_el is None:                          continue @@ -969,7 +974,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):                          existing_format.update(f)              except (ExtractorError, KeyError) as e: -                self.report_warning(u'Skipping DASH manifest: %s' % e, video_id) +                self.report_warning('Skipping DASH manifest: %s' % e, video_id)          self._sort_formats(formats) @@ -1000,7 +1005,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):                          (?:\w+\.)?                          youtube\.com/                          (?: -                           (?:course|view_play_list|my_playlists|artist|playlist|watch) +                           (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries)                             \? (?:.*?&)*? (?:p|a|list)=                          |  p/                          ) @@ -1056,6 +1061,20 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):              'title': 'YDL_safe_search',          },          'playlist_count': 2, +    }, { +        'note': 'embedded', +        'url': 'http://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu', +        'playlist_count': 4, +        'info_dict': { +            'title': 'JODA15', +        } +    }, { +        'note': 'Embedded SWF player', +        'url': 'http://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0', +        'playlist_count': 4, +        'info_dict': { +            'title': 'JODA7', +        }      }]      def _real_initialize(self): @@ -1090,7 +1109,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):          # Extract playlist id          mobj = re.match(self._VALID_URL, url)          if mobj is None: -            raise ExtractorError(u'Invalid URL: %s' % url) +            raise ExtractorError('Invalid URL: %s' % url)          playlist_id = mobj.group(1) or mobj.group(2)          # Check if it's a video-specific URL @@ -1098,16 +1117,16 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):          if 'v' in query_dict:              video_id = query_dict['v'][0]              if self._downloader.params.get('noplaylist'): -                self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id) +                self.to_screen('Downloading just video %s because of --no-playlist' % video_id)                  return self.url_result(video_id, 'Youtube', video_id=video_id)              else: -                self.to_screen(u'Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id)) +                self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))          if playlist_id.startswith('RD'):              # Mixes require a custom extraction process              return self._extract_mix(playlist_id)          if playlist_id.startswith('TL'): -            raise ExtractorError(u'For downloading YouTube.com top lists, use ' +            raise ExtractorError('For downloading YouTube.com top lists, use '                  'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)          url = self._TEMPLATE_URL % playlist_id @@ -1152,19 +1171,28 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):  class YoutubeTopListIE(YoutubePlaylistIE):      IE_NAME = 'youtube:toplist' -    IE_DESC = (u'YouTube.com top lists, "yttoplist:{channel}:{list title}"' +    IE_DESC = ('YouTube.com top lists, "yttoplist:{channel}:{list title}"'          ' (Example: "yttoplist:music:Top Tracks")')      _VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$' -    _TESTS = [] +    _TESTS = [{ +        'url': 'yttoplist:music:Trending', +        'playlist_mincount': 5, +        'skip': 'Only works for logged-in users', +    }]      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url)          channel = mobj.group('chann')          title = mobj.group('title')          query = compat_urllib_parse.urlencode({'title': title}) -        playlist_re = 'href="([^"]+?%s.*?)"' % re.escape(query) -        channel_page = self._download_webpage('https://www.youtube.com/%s' % channel, title) -        link = self._html_search_regex(playlist_re, channel_page, 'list') +        channel_page = self._download_webpage( +            'https://www.youtube.com/%s' % channel, title) +        link = self._html_search_regex( +            r'''(?x) +                <a\s+href="([^"]+)".*?>\s* +                <span\s+class="branded-page-module-title-text">\s* +                <span[^>]*>.*?%s.*?</span>''' % re.escape(query), +            channel_page, 'list')          url = compat_urlparse.urljoin('https://www.youtube.com/', link)          video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"' @@ -1190,6 +1218,11 @@ class YoutubeChannelIE(InfoExtractor):      _MORE_PAGES_INDICATOR = 'yt-uix-load-more'      _MORE_PAGES_URL = 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'      IE_NAME = 'youtube:channel' +    _TESTS = [{ +        'note': 'paginated channel', +        'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w', +        'playlist_mincount': 91, +    }]      def extract_videos_from_page(self, page):          ids_in_page = [] @@ -1202,7 +1235,7 @@ class YoutubeChannelIE(InfoExtractor):          # Extract channel id          mobj = re.match(self._VALID_URL, url)          if mobj is None: -            raise ExtractorError(u'Invalid URL: %s' % url) +            raise ExtractorError('Invalid URL: %s' % url)          # Download channel page          channel_id = mobj.group(1) @@ -1224,7 +1257,7 @@ class YoutubeChannelIE(InfoExtractor):              for pagenum in itertools.count(1):                  url = self._MORE_PAGES_URL % (pagenum, channel_id)                  page = self._download_json( -                    url, channel_id, note=u'Downloading page #%s' % pagenum, +                    url, channel_id, note='Downloading page #%s' % pagenum,                      transform_source=uppercase_escape)                  ids_in_page = self.extract_videos_from_page(page['content_html']) @@ -1233,7 +1266,7 @@ class YoutubeChannelIE(InfoExtractor):                  if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:                      break -        self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids))) +        self._downloader.to_screen('[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))          url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id)                         for video_id in video_ids] @@ -1248,6 +1281,17 @@ class YoutubeUserIE(InfoExtractor):      _GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'      IE_NAME = 'youtube:user' +    _TESTS = [{ +        'url': 'https://www.youtube.com/user/TheLinuxFoundation', +        'playlist_mincount': 320, +        'info_dict': { +            'title': 'TheLinuxFoundation', +        } +    }, { +        'url': 'ytuser:phihag', +        'only_matching': True, +    }] +      @classmethod      def suitable(cls, url):          # Don't return True if the url can be extracted with other youtube @@ -1260,7 +1304,7 @@ class YoutubeUserIE(InfoExtractor):          # Extract username          mobj = re.match(self._VALID_URL, url)          if mobj is None: -            raise ExtractorError(u'Invalid URL: %s' % url) +            raise ExtractorError('Invalid URL: %s' % url)          username = mobj.group(1) @@ -1281,7 +1325,7 @@ class YoutubeUserIE(InfoExtractor):              try:                  response = json.loads(page)              except ValueError as err: -                raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err)) +                raise ExtractorError('Invalid JSON in API response: ' + compat_str(err))              if 'entry' not in response['feed']:                  return @@ -1322,9 +1366,9 @@ class YoutubeSearchIE(SearchInfoExtractor):                  compat_urllib_parse.quote_plus(query.encode('utf-8')),                  (PAGE_SIZE * pagenum) + 1)              data_json = self._download_webpage( -                result_url, video_id=u'query "%s"' % query, -                note=u'Downloading page %s' % (pagenum + 1), -                errnote=u'Unable to download API page') +                result_url, video_id='query "%s"' % query, +                note='Downloading page %s' % (pagenum + 1), +                errnote='Unable to download API page')              data = json.loads(data_json)              api_response = data['data'] @@ -1356,6 +1400,13 @@ class YoutubeSearchURLIE(InfoExtractor):      IE_DESC = 'YouTube.com search URLs'      IE_NAME = 'youtube:search_url'      _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)' +    _TESTS = [{ +        'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', +        'playlist_mincount': 5, +        'info_dict': { +            'title': 'youtube-dl test video', +        } +    }]      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url) @@ -1390,17 +1441,38 @@ class YoutubeSearchURLIE(InfoExtractor):  class YoutubeShowIE(InfoExtractor):      IE_DESC = 'YouTube.com (multi-season) shows' -    _VALID_URL = r'https?://www\.youtube\.com/show/(.*)' +    _VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)'      IE_NAME = 'youtube:show' +    _TESTS = [{ +        'url': 'http://www.youtube.com/show/airdisasters', +        'playlist_mincount': 3, +        'info_dict': { +            'id': 'airdisasters', +            'title': 'Air Disasters', +        } +    }]      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url) -        show_name = mobj.group(1) -        webpage = self._download_webpage(url, show_name, 'Downloading show webpage') +        playlist_id = mobj.group('id') +        webpage = self._download_webpage( +            url, playlist_id, 'Downloading show webpage')          # There's one playlist for each season of the show          m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage)) -        self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons))) -        return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons] +        self.to_screen('%s: Found %s seasons' % (playlist_id, len(m_seasons))) +        entries = [ +            self.url_result( +                'https://www.youtube.com' + season.group(1), 'YoutubePlaylist') +            for season in m_seasons +        ] +        title = self._og_search_title(webpage, fatal=False) + +        return { +            '_type': 'playlist', +            'id': playlist_id, +            'title': title, +            'entries': entries, +        }  class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor): diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 940e9c8cf..c17701d6a 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.09.19' +__version__ = '2014.09.25' | 
