diff options
29 files changed, 693 insertions, 297 deletions
| diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index 1fa99f88b..410f9edc2 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -10,7 +10,6 @@ from test.helper import FakeYDL  from youtube_dl.extractor import ( -    YoutubeUserIE,      YoutubePlaylistIE,      YoutubeIE,      YoutubeChannelIE, @@ -43,28 +42,6 @@ class TestYoutubeLists(unittest.TestCase):          self.assertEqual(len(entries), 25)          self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0') -    def test_youtube_channel(self): -        dl = FakeYDL() -        ie = YoutubeChannelIE(dl) -        #test paginated channel -        result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w') -        self.assertTrue(len(result['entries']) > 90) -        #test autogenerated channel -        result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos') -        self.assertTrue(len(result['entries']) >= 18) - -    def test_youtube_user(self): -        dl = FakeYDL() -        ie = YoutubeUserIE(dl) -        result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation') -        self.assertTrue(len(result['entries']) >= 320) - -    def test_youtube_show(self): -        dl = FakeYDL() -        ie = YoutubeShowIE(dl) -        result = ie.extract('http://www.youtube.com/show/airdisasters') -        self.assertTrue(len(result) >= 3) -      def test_youtube_mix(self):          dl = FakeYDL()          ie = YoutubePlaylistIE(dl) @@ -83,21 +60,5 @@ class TestYoutubeLists(unittest.TestCase):          entries = result['entries']          self.assertEqual(len(entries), 100) -    def test_youtube_toplist(self): -        dl = FakeYDL() -        ie = YoutubeTopListIE(dl) -        result = ie.extract('yttoplist:music:Trending') -        entries = result['entries'] -        self.assertTrue(len(entries) >= 5) - -    def test_youtube_search_url(self): -        dl = FakeYDL() -        ie = YoutubeSearchURLIE(dl) -        result = ie.extract('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video') -        entries = result['entries'] -        self.assertIsPlaylist(result) -        self.assertEqual(result['title'], 'youtube-dl test video') -        self.assertTrue(len(entries) >= 5) -  if __name__ == '__main__':      unittest.main() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index a1713dc5a..4a9610355 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1250,12 +1250,13 @@ class YoutubeDL(object):          # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)          # To work around aforementioned issue we will replace request's original URL with          # percent-encoded one -        url = req if isinstance(req, compat_str) else req.get_full_url() +        req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str) +        url = req if req_is_string else req.get_full_url()          url_escaped = escape_url(url)          # Substitute URL if any change after escaping          if url != url_escaped: -            if isinstance(req, compat_str): +            if req_is_string:                  req = url_escaped              else:                  req = compat_urllib_request.Request( diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 20d7a57ce..7f2b4dfcc 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -78,6 +78,7 @@ __authors__  = (      'Hari Padmanaban',      'Carlos Ramos',      '5moufl', +    'lenaten',  )  __license__ = 'Public Domain' diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py index 4ea5811a5..3f941596e 100644 --- a/youtube_dl/downloader/__init__.py +++ b/youtube_dl/downloader/__init__.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals  from .common import FileDownloader  from .hls import HlsFD +from .hls import NativeHlsFD  from .http import HttpFD  from .mplayer import MplayerFD  from .rtmp import RtmpFD @@ -19,6 +20,8 @@ def get_suitable_downloader(info_dict):      if url.startswith('rtmp'):          return RtmpFD +    if protocol == 'm3u8_native': +        return NativeHlsFD      if (protocol == 'm3u8') or (protocol is None and determine_ext(url) == 'm3u8'):          return HlsFD      if url.startswith('mms') or url.startswith('rtsp'): diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 9ce97f5fe..f85f0c94e 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -42,6 +42,7 @@ class FileDownloader(object):      Subclasses of this one must re-define the real_download method.      """ +    _TEST_FILE_SIZE = 10241      params = None      def __init__(self, ydl, params): diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 32852f333..68eafa403 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -1,8 +1,13 @@ +from __future__ import unicode_literals +  import os +import re  import subprocess  from .common import FileDownloader  from ..utils import ( +    compat_urlparse, +    compat_urllib_request,      check_executable,      encodeFilename,  ) @@ -43,3 +48,57 @@ class HlsFD(FileDownloader):              self.to_stderr(u"\n")              self.report_error(u'%s exited with code %d' % (program, retval))              return False + + +class NativeHlsFD(FileDownloader): +    """ A more limited implementation that does not require ffmpeg """ + +    def real_download(self, filename, info_dict): +        url = info_dict['url'] +        self.report_destination(filename) +        tmpfilename = self.temp_name(filename) + +        self.to_screen( +            '[hlsnative] %s: Downloading m3u8 manifest' % info_dict['id']) +        data = self.ydl.urlopen(url).read() +        s = data.decode('utf-8', 'ignore') +        segment_urls = [] +        for line in s.splitlines(): +            line = line.strip() +            if line and not line.startswith('#'): +                segment_url = ( +                    line +                    if re.match(r'^https?://', line) +                    else compat_urlparse.urljoin(url, line)) +                segment_urls.append(segment_url) + +        is_test = self.params.get('test', False) +        remaining_bytes = self._TEST_FILE_SIZE if is_test else None +        byte_counter = 0 +        with open(tmpfilename, 'wb') as outf: +            for i, segurl in enumerate(segment_urls): +                self.to_screen( +                    '[hlsnative] %s: Downloading segment %d / %d' % +                    (info_dict['id'], i + 1, len(segment_urls))) +                seg_req = compat_urllib_request.Request(segurl) +                if remaining_bytes is not None: +                    seg_req.add_header('Range', 'bytes=0-%d' % (remaining_bytes - 1)) + +                segment = self.ydl.urlopen(seg_req).read() +                if remaining_bytes is not None: +                    segment = segment[:remaining_bytes] +                    remaining_bytes -= len(segment) +                outf.write(segment) +                byte_counter += len(segment) +                if remaining_bytes is not None and remaining_bytes <= 0: +                    break + +        self._hook_progress({ +            'downloaded_bytes': byte_counter, +            'total_bytes': byte_counter, +            'filename': filename, +            'status': 'finished', +        }) +        self.try_rename(tmpfilename, filename) +        return True + diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 6caf7451e..f62555ce0 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -14,8 +14,6 @@ from ..utils import (  class HttpFD(FileDownloader): -    _TEST_FILE_SIZE = 10241 -      def real_download(self, filename, info_dict):          url = info_dict['url']          tmpfilename = self.temp_name(filename) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 71fe38ca0..f815e6b91 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -262,6 +262,7 @@ from .nrk import (  from .ntv import NTVIE  from .nytimes import NYTimesIE  from .nuvid import NuvidIE +from .oktoberfesttv import OktoberfestTVIE  from .ooyala import OoyalaIE  from .orf import (      ORFTVthekIE, @@ -340,6 +341,7 @@ from .spankwire import SpankwireIE  from .spiegel import SpiegelIE, SpiegelArticleIE  from .spiegeltv import SpiegeltvIE  from .spike import SpikeIE +from .sport5 import Sport5IE  from .sportdeutschland import SportDeutschlandIE  from .stanfordoc import StanfordOpenClassroomIE  from .steam import SteamIE @@ -451,6 +453,7 @@ from .yahoo import (      YahooNewsIE,      YahooSearchIE,  ) +from .ynet import YnetIE  from .youjizz import YouJizzIE  from .youku import YoukuIE  from .youporn import YouPornIE diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index 7d89f44ee..69f89320c 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -22,8 +22,7 @@ class ABCIE(InfoExtractor):      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') +        video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id)          urls_info_json = self._search_regex( diff --git a/youtube_dl/extractor/anysex.py b/youtube_dl/extractor/anysex.py index bc64423a3..ad86d6e58 100644 --- a/youtube_dl/extractor/anysex.py +++ b/youtube_dl/extractor/anysex.py @@ -35,7 +35,7 @@ class AnySexIE(InfoExtractor):          title = self._html_search_regex(r'<title>(.*?)</title>', webpage, 'title')          description = self._html_search_regex( -            r'<div class="description">([^<]+)</div>', webpage, 'description', fatal=False) +            r'<div class="description"[^>]*>([^<]+)</div>', webpage, 'description', fatal=False)          thumbnail = self._html_search_regex(              r'preview_url\s*:\s*\'(.*?)\'', webpage, 'thumbnail', fatal=False) @@ -43,7 +43,7 @@ class AnySexIE(InfoExtractor):              r'<a href="http://anysex\.com/categories/[^"]+" title="[^"]*">([^<]+)</a>', webpage)          duration = parse_duration(self._search_regex( -            r'<b>Duration:</b> (\d+:\d+)', webpage, 'duration', fatal=False)) +            r'<b>Duration:</b> (?:<q itemprop="duration">)?(\d+:\d+)', webpage, 'duration', fatal=False))          view_count = int_or_none(self._html_search_regex(              r'<b>Views:</b> (\d+)', webpage, 'view count', fatal=False)) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 9c30a1d33..8d6a6f601 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1,6 +1,7 @@  from __future__ import unicode_literals  import base64 +import datetime  import hashlib  import json  import netrc @@ -15,6 +16,7 @@ from ..utils import (      compat_http_client,      compat_urllib_error,      compat_urllib_parse_urlparse, +    compat_urlparse,      compat_str,      clean_html, @@ -164,6 +166,14 @@ class InfoExtractor(object):          return cls._VALID_URL_RE.match(url) is not None      @classmethod +    def _match_id(cls, url): +        if '_VALID_URL_RE' not in cls.__dict__: +            cls._VALID_URL_RE = re.compile(cls._VALID_URL) +        m = cls._VALID_URL_RE.match(url) +        assert m +        return m.group('id') + +    @classmethod      def working(cls):          """Getter method for _WORKING."""          return cls._WORKING @@ -640,7 +650,9 @@ class InfoExtractor(object):          return formats -    def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None): +    def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None, +                              entry_protocol='m3u8', preference=None): +          formats = [{              'format_id': 'm3u8-meta',              'url': m3u8_url, @@ -651,6 +663,11 @@ class InfoExtractor(object):              'format_note': 'Quality selection URL',          }] +        format_url = lambda u: ( +            u +            if re.match(r'^https?://', u) +            else compat_urlparse.urljoin(m3u8_url, u)) +          m3u8_doc = self._download_webpage(m3u8_url, video_id)          last_info = None          kv_rex = re.compile( @@ -667,15 +684,17 @@ class InfoExtractor(object):                  continue              else:                  if last_info is None: -                    formats.append({'url': line}) +                    formats.append({'url': format_url(line)})                      continue                  tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000)                  f = {                      'format_id': 'm3u8-%d' % (tbr if tbr else len(formats)), -                    'url': line.strip(), +                    'url': format_url(line.strip()),                      'tbr': tbr,                      'ext': ext, +                    'protocol': entry_protocol, +                    'preference': preference,                  }                  codecs = last_info.get('CODECS')                  if codecs: @@ -695,6 +714,12 @@ class InfoExtractor(object):          self._sort_formats(formats)          return formats +    def _live_title(self, name): +        """ Generate the title for a live video """ +        now = datetime.datetime.now() +        now_str = now.strftime("%Y-%m-%d %H:%M") +        return name + ' ' + now_str +  class SearchInfoExtractor(InfoExtractor):      """ diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 4903764f7..f99888ecc 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -9,7 +9,7 @@ import xml.etree.ElementTree  from hashlib import sha1  from math import pow, sqrt, floor -from .common import InfoExtractor +from .subtitles import SubtitlesInfoExtractor  from ..utils import (      ExtractorError,      compat_urllib_parse, @@ -26,7 +26,7 @@ from ..aes import (  ) -class CrunchyrollIE(InfoExtractor): +class CrunchyrollIE(SubtitlesInfoExtractor):      _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'      _TEST = {          'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513', @@ -271,6 +271,10 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text              else:                  subtitles[lang_code] = self._convert_subtitles_to_srt(subtitle) +        if self._downloader.params.get('listsubtitles', False): +            self._list_available_subtitles(video_id, subtitles) +            return +          return {              'id':          video_id,              'title':       video_title, diff --git a/youtube_dl/extractor/eitb.py b/youtube_dl/extractor/eitb.py index 4ba323148..2cba82532 100644 --- a/youtube_dl/extractor/eitb.py +++ b/youtube_dl/extractor/eitb.py @@ -1,4 +1,6 @@  # encoding: utf-8 +from __future__ import unicode_literals +  import re  from .common import InfoExtractor @@ -7,20 +9,20 @@ from ..utils import ExtractorError  class EitbIE(InfoExtractor): -    IE_NAME = u'eitb.tv' +    IE_NAME = 'eitb.tv'      _VALID_URL = r'https?://www\.eitb\.tv/(eu/bideoa|es/video)/[^/]+/(?P<playlist_id>\d+)/(?P<chapter_id>\d+)'      _TEST = { -        u'add_ie': ['Brightcove'], -        u'url': u'http://www.eitb.tv/es/video/60-minutos-60-minutos-2013-2014/2677100210001/2743577154001/lasa-y-zabala-30-anos/', -        u'md5': u'edf4436247185adee3ea18ce64c47998', -        u'info_dict': { -            u'id': u'2743577154001', -            u'ext': u'mp4', -            u'title': u'60 minutos (Lasa y Zabala, 30 años)', +        'add_ie': ['Brightcove'], +        'url': 'http://www.eitb.tv/es/video/60-minutos-60-minutos-2013-2014/2677100210001/2743577154001/lasa-y-zabala-30-anos/', +        'md5': 'edf4436247185adee3ea18ce64c47998', +        'info_dict': { +            'id': '2743577154001', +            'ext': 'mp4', +            'title': '60 minutos (Lasa y Zabala, 30 años)',              # All videos from eitb has this description in the brightcove info -            u'description': u'.', -            u'uploader': u'Euskal Telebista', +            'description': '.', +            'uploader': 'Euskal Telebista',          },      } @@ -30,7 +32,7 @@ class EitbIE(InfoExtractor):          webpage = self._download_webpage(url, chapter_id)          bc_url = BrightcoveIE._extract_brightcove_url(webpage)          if bc_url is None: -            raise ExtractorError(u'Could not extract the Brightcove url') +            raise ExtractorError('Could not extract the Brightcove url')          # The BrightcoveExperience object doesn't contain the video id, we set          # it manually          bc_url += '&%40videoPlayer={0}'.format(chapter_id) diff --git a/youtube_dl/extractor/extremetube.py b/youtube_dl/extractor/extremetube.py index 14a196ffc..aacbf1414 100644 --- a/youtube_dl/extractor/extremetube.py +++ b/youtube_dl/extractor/extremetube.py @@ -7,6 +7,7 @@ from ..utils import (      compat_urllib_parse_urlparse,      compat_urllib_request,      compat_urllib_parse, +    str_to_int,  ) @@ -20,6 +21,7 @@ class ExtremeTubeIE(InfoExtractor):              'ext': 'mp4',              'title': 'Music Video 14 british euro brit european cumshots swallow',              'uploader': 'unknown', +            'view_count': int,              'age_limit': 18,          }      }, { @@ -39,8 +41,12 @@ class ExtremeTubeIE(InfoExtractor):          video_title = self._html_search_regex(              r'<h1 [^>]*?title="([^"]+)"[^>]*>', webpage, 'title')          uploader = self._html_search_regex( -            r'>Posted by:(?=<)(?:\s|<[^>]*>)*(.+?)\|', webpage, 'uploader', -            fatal=False) +            r'Uploaded by:\s*</strong>\s*(.+?)\s*</div>', +            webpage, 'uploader', fatal=False) +        view_count = str_to_int(self._html_search_regex( +            r'Views:\s*</strong>\s*<span>([\d,\.]+)</span>', +            webpage, 'view count', fatal=False)) +          video_url = compat_urllib_parse.unquote(self._html_search_regex(              r'video_url=(.+?)&', webpage, 'video_url'))          path = compat_urllib_parse_urlparse(video_url).path @@ -51,6 +57,7 @@ class ExtremeTubeIE(InfoExtractor):              'id': video_id,              'title': video_title,              'uploader': uploader, +            'view_count': view_count,              'url': video_url,              'format': format,              'format_id': format, diff --git a/youtube_dl/extractor/flickr.py b/youtube_dl/extractor/flickr.py index 21ea5ec2b..e09982e88 100644 --- a/youtube_dl/extractor/flickr.py +++ b/youtube_dl/extractor/flickr.py @@ -10,13 +10,13 @@ from ..utils import (  class FlickrIE(InfoExtractor): -    """Information Extractor for Flickr videos""" -    _VALID_URL = r'(?:https?://)?(?:www\.|secure\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*' +    _VALID_URL = r'https?://(?:www\.|secure\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*'      _TEST = {          'url': 'http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/', -        'file': '5645318632.mp4',          'md5': '6fdc01adbc89d72fc9c4f15b4a4ba87b',          'info_dict': { +            'id': '5645318632', +            'ext': 'mp4',              "description": "Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.",               "uploader_id": "forestwander-nature-pictures",               "title": "Dark Hollow Waterfalls" @@ -49,12 +49,12 @@ class FlickrIE(InfoExtractor):              raise ExtractorError('Unable to extract video url')          video_url = mobj.group(1) + unescapeHTML(mobj.group(2)) -        return [{ -            'id':          video_id, -            'url':         video_url, -            'ext':         'mp4', -            'title':       self._og_search_title(webpage), +        return { +            'id': video_id, +            'url': video_url, +            'ext': 'mp4', +            'title': self._og_search_title(webpage),              'description': self._og_search_description(webpage), -            'thumbnail':   self._og_search_thumbnail(webpage), +            'thumbnail': self._og_search_thumbnail(webpage),              'uploader_id': video_uploader_id, -        }] +        } diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 40eeaad16..367f930dd 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -382,6 +382,19 @@ class GenericIE(InfoExtractor):                  'thumbnail': 're:^https?://.*\.jpg$',              },          }, +        # Wistia embed +        { +            'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson', +            'md5': '8788b683c777a5cf25621eaf286d0c23', +            'info_dict': { +                'id': '1cfaf6b7ea', +                'ext': 'mov', +                'title': 'md5:51364a8d3d009997ba99656004b5e20d', +                'duration': 643.0, +                'filesize': 182808282, +                'uploader': 'education-portal.com', +            }, +        },      ]      def report_download_webpage(self, video_id): @@ -584,7 +597,9 @@ class GenericIE(InfoExtractor):          # Helper method          def _playlist_from_matches(matches, getter, ie=None): -            urlrs = orderedSet(self.url_result(getter(m), ie) for m in matches) +            urlrs = orderedSet( +                self.url_result(self._proto_relative_url(getter(m)), ie) +                for m in matches)              return self.playlist_result(                  urlrs, playlist_id=video_id, playlist_title=video_title) @@ -629,11 +644,11 @@ class GenericIE(InfoExtractor):              )              (["\'])                  (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/ -                (?:embed|v)/.+?) +                (?:embed|v|p)/.+?)              \1''', webpage)          if matches:              return _playlist_from_matches( -                matches, lambda m: unescapeHTML(m[1]), ie='Youtube') +                matches, lambda m: unescapeHTML(m[1]))          # Look for embedded Dailymotion player          matches = re.findall( @@ -654,6 +669,16 @@ class GenericIE(InfoExtractor):                  'title': video_title,                  'id': video_id,              } +        match = re.search(r'(?:id=["\']wistia_|data-wistiaid=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage) +        if match: +            return { +                '_type': 'url_transparent', +                'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')), +                'ie_key': 'Wistia', +                'uploader': video_uploader, +                'title': video_title, +                'id': match.group('id') +            }          # Look for embedded blip.tv player          mobj = re.search(r'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage) diff --git a/youtube_dl/extractor/muenchentv.py b/youtube_dl/extractor/muenchentv.py index 3a938861b..7cb6749be 100644 --- a/youtube_dl/extractor/muenchentv.py +++ b/youtube_dl/extractor/muenchentv.py @@ -1,7 +1,6 @@  # coding: utf-8  from __future__ import unicode_literals -import datetime  import json  from .common import InfoExtractor @@ -33,9 +32,7 @@ class MuenchenTVIE(InfoExtractor):          display_id = 'live'          webpage = self._download_webpage(url, display_id) -        now = datetime.datetime.now() -        now_str = now.strftime("%Y-%m-%d %H:%M") -        title = self._og_search_title(webpage) + ' ' + now_str +        title = self._live_title(self._og_search_title(webpage))          data_js = self._search_regex(              r'(?s)\nplaylist:\s*(\[.*?}\]),related:', diff --git a/youtube_dl/extractor/nfl.py b/youtube_dl/extractor/nfl.py index f53596f5e..963c4587c 100644 --- a/youtube_dl/extractor/nfl.py +++ b/youtube_dl/extractor/nfl.py @@ -17,7 +17,7 @@ class NFLIE(InfoExtractor):      _PLAYER_CONFIG_URL = 'http://www.nfl.com/static/content/static/config/video/config.json'      _TEST = {          'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights', -        'skip_download': True,  # md5 sum fluctuates +        # 'md5': '5eb8c40a727dda106d510e5d6ffa79e5',  # md5 checksum fluctuates          'info_dict': {              'id': '0ap3000000398478',              'ext': 'mp4', diff --git a/youtube_dl/extractor/oktoberfesttv.py b/youtube_dl/extractor/oktoberfesttv.py new file mode 100644 index 000000000..4a41c0542 --- /dev/null +++ b/youtube_dl/extractor/oktoberfesttv.py @@ -0,0 +1,47 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class OktoberfestTVIE(InfoExtractor): +    _VALID_URL = r'https?://www\.oktoberfest-tv\.de/[^/]+/[^/]+/video/(?P<id>[^/?#]+)' + +    _TEST = { +        'url': 'http://www.oktoberfest-tv.de/de/kameras/video/hb-zelt', +        'info_dict': { +            'id': 'hb-zelt', +            'ext': 'mp4', +            'title': 're:^Live-Kamera: Hofbräuzelt [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', +            'thumbnail': 're:^https?://.*\.jpg$', +            'is_live': True, +        }, +        'params': { +            'skip_download': True, +        } +    } + +    def _real_extract(self, url): +        video_id = self._match_id(url) +        webpage = self._download_webpage(url, video_id) + +        title = self._live_title(self._html_search_regex( +            r'<h1><strong>.*?</strong>(.*?)</h1>', webpage, 'title')) + +        clip = self._search_regex( +            r"clip:\s*\{\s*url:\s*'([^']+)'", webpage, 'clip') +        ncurl = self._search_regex( +            r"netConnectionUrl:\s*'([^']+)'", webpage, 'rtmp base') +        video_url = ncurl + clip +        thumbnail = self._search_regex( +            r"canvas:\s*\{\s*backgroundImage:\s*'url\(([^)]+)\)'", webpage, +            'thumbnail', fatal=False) + +        return { +            'id': video_id, +            'title': title, +            'url': video_url, +            'ext': 'mp4', +            'is_live': True, +            'thumbnail': thumbnail, +        } diff --git a/youtube_dl/extractor/sport5.py b/youtube_dl/extractor/sport5.py new file mode 100644 index 000000000..3f680bfc6 --- /dev/null +++ b/youtube_dl/extractor/sport5.py @@ -0,0 +1,92 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ExtractorError + + +class Sport5IE(InfoExtractor): +    _VALID_URL = r'http://(?:www|vod)?\.sport5\.co\.il/.*\b(?:Vi|docID)=(?P<id>\d+)' +    _TESTS = [ +        { +            'url': 'http://vod.sport5.co.il/?Vc=147&Vi=176331&Page=1', +            'info_dict': { +                'id': 's5-Y59xx1-GUh2', +                'ext': 'mp4', +                'title': 'ולנסיה-קורדובה 0:3', +                'description': 'אלקאסר, גאייה ופגולי סידרו לקבוצה של נונו ניצחון על קורדובה ואת המקום הראשון בליגה', +                'duration': 228, +                'categories': list, +            }, +            'skip': 'Blocked outside of Israel', +        }, { +            'url': 'http://www.sport5.co.il/articles.aspx?FolderID=3075&docID=176372&lang=HE', +            'info_dict': { +                'id': 's5-SiXxx1-hKh2', +                'ext': 'mp4', +                'title': 'GOALS_CELTIC_270914.mp4', +                'description': '', +                'duration': 87, +                'categories': list, +            }, +            'skip': 'Blocked outside of Israel', +        } +    ] + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        media_id = mobj.group('id') + +        webpage = self._download_webpage(url, media_id) + +        video_id = self._html_search_regex('clipId=([\w-]+)', webpage, 'video id') + +        metadata = self._download_xml( +            'http://sport5-metadata-rr-d.nsacdn.com/vod/vod/%s/HDS/metadata.xml' % video_id, +            video_id) + +        error = metadata.find('./Error') +        if error is not None: +            raise ExtractorError( +                '%s returned error: %s - %s' % ( +                    self.IE_NAME, +                    error.find('./Name').text, +                    error.find('./Description').text), +                expected=True) + +        title = metadata.find('./Title').text +        description = metadata.find('./Description').text +        duration = int(metadata.find('./Duration').text) + +        posters_el = metadata.find('./PosterLinks') +        thumbnails = [{ +            'url': thumbnail.text, +            'width': int(thumbnail.get('width')), +            'height': int(thumbnail.get('height')), +        } for thumbnail in posters_el.findall('./PosterIMG')] if posters_el is not None else [] + +        categories_el = metadata.find('./Categories') +        categories = [ +            cat.get('name') for cat in categories_el.findall('./Category') +        ] if categories_el is not None else [] + +        formats = [{ +            'url': fmt.text, +            'ext': 'mp4', +            'vbr': int(fmt.get('bitrate')), +            'width': int(fmt.get('width')), +            'height': int(fmt.get('height')), +        } for fmt in metadata.findall('./PlaybackLinks/FileURL')] +        self._sort_formats(formats) + +        return { +            'id': video_id, +            'title': title, +            'description': description, +            'thumbnails': thumbnails, +            'duration': duration, +            'categories': categories, +            'formats': formats, +        }
\ No newline at end of file diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py index df115d251..ebd64f0f5 100644 --- a/youtube_dl/extractor/vbox7.py +++ b/youtube_dl/extractor/vbox7.py @@ -19,7 +19,7 @@ class Vbox7IE(InfoExtractor):          'md5': '99f65c0c9ef9b682b97313e052734c3f',          'info_dict': {              'id': '249bb972c2', -            'ext': 'flv', +            'ext': 'mp4',              'title': 'Смях! Чудо - чист за секунди - Скрита камера',          },      } @@ -50,7 +50,6 @@ class Vbox7IE(InfoExtractor):          return {              'id': video_id,              'url': final_url, -            'ext': 'flv',              'title': title,              'thumbnail': thumbnail_url,          } diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index d2ffd1b6b..ebab8b86c 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -6,6 +6,7 @@ import xml.etree.ElementTree  from .common import InfoExtractor  from ..utils import (      compat_HTTPError, +    compat_urllib_request,      ExtractorError,  ) @@ -24,7 +25,7 @@ class VevoIE(InfoExtractor):      _TESTS = [{          'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280', -        "md5": "06bea460acb744eab74a9d7dcb4bfd61", +        "md5": "95ee28ee45e70130e3ab02b0f579ae23",          'info_dict': {              'id': 'GB1101300280',              'ext': 'mp4', @@ -40,7 +41,7 @@ class VevoIE(InfoExtractor):      }, {          'note': 'v3 SMIL format',          'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923', -        'md5': '893ec0e0d4426a1d96c01de8f2bdff58', +        'md5': 'f6ab09b034f8c22969020b042e5ac7fc',          'info_dict': {              'id': 'USUV71302923',              'ext': 'mp4', @@ -69,6 +70,21 @@ class VevoIE(InfoExtractor):      }]      _SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/' +    def _real_initialize(self): +        req = compat_urllib_request.Request( +            'http://www.vevo.com/auth', data=b'') +        webpage = self._download_webpage( +            req, None, +            note='Retrieving oauth token', +            errnote='Unable to retrieve oauth token', +            fatal=False) +        if webpage is False: +            self._oauth_token = None +        else: +            self._oauth_token = self._search_regex( +                r'access_token":\s*"([^"]+)"', +                webpage, 'access token', fatal=False) +      def _formats_from_json(self, video_info):          last_version = {'version': -1}          for version in video_info['videoVersions']: @@ -129,6 +145,26 @@ class VevoIE(InfoExtractor):              })          return formats +    def _download_api_formats(self, video_id): +        if not self._oauth_token: +            self._downloader.report_warning( +                'No oauth token available, skipping API HLS download') +            return [] + +        api_url = 'https://apiv2.vevo.com/video/%s/streams/hls?token=%s' % ( +            video_id, self._oauth_token) +        api_data = self._download_json( +            api_url, video_id, +            note='Downloading HLS formats', +            errnote='Failed to download HLS format list', fatal=False) +        if api_data is None: +            return [] + +        m3u8_url = api_data[0]['url'] +        return self._extract_m3u8_formats( +            m3u8_url, video_id, entry_protocol='m3u8_native', ext='mp4', +            preference=0) +      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url)          video_id = mobj.group('id') @@ -152,30 +188,8 @@ class VevoIE(InfoExtractor):          else:              age_limit = None -        # Download SMIL -        smil_blocks = sorted(( -            f for f in video_info['videoVersions'] -            if f['sourceType'] == 13), -            key=lambda f: f['version']) - -        smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % ( -            self._SMIL_BASE_URL, video_id, video_id.lower()) -        if smil_blocks: -            smil_url_m = self._search_regex( -                r'url="([^"]+)"', smil_blocks[-1]['data'], 'SMIL URL', -                fatal=False) -            if smil_url_m is not None: -                smil_url = smil_url_m - -        try: -            smil_xml = self._download_webpage(smil_url, video_id, -                                              'Downloading SMIL info') -            formats.extend(self._formats_from_smil(smil_xml)) -        except ExtractorError as ee: -            if not isinstance(ee.cause, compat_HTTPError): -                raise -            self._downloader.report_warning( -                'Cannot download SMIL information, falling back to JSON ..') +        # Download via HLS API +        formats.extend(self._download_api_formats(video_id))          self._sort_formats(formats)          timestamp_ms = int(self._search_regex( diff --git a/youtube_dl/extractor/vube.py b/youtube_dl/extractor/vube.py index 2544c24bd..1b2f731e9 100644 --- a/youtube_dl/extractor/vube.py +++ b/youtube_dl/extractor/vube.py @@ -6,6 +6,7 @@ from .common import InfoExtractor  from ..utils import (      int_or_none,      compat_str, +    ExtractorError,  ) @@ -16,6 +17,24 @@ class VubeIE(InfoExtractor):      _TESTS = [          { +            'url': 'http://vube.com/trending/William+Wei/Y8NUZ69Tf7?t=s', +            'md5': 'e7aabe1f8f1aa826b9e4735e1f9cee42', +            'info_dict': { +                'id': 'Y8NUZ69Tf7', +                'ext': 'mp4', +                'title': 'Best Drummer Ever [HD]', +                'description': 'md5:2d63c4b277b85c2277761c2cf7337d71', +                'thumbnail': 're:^https?://.*\.jpg', +                'uploader': 'William', +                'timestamp': 1406876915, +                'upload_date': '20140801', +                'duration': 258.051, +                'like_count': int, +                'dislike_count': int, +                'comment_count': int, +                'categories': ['amazing', 'hd', 'best drummer ever', 'william wei', 'bucket drumming', 'street drummer', 'epic street drumming'], +            }, +        }, {              'url': 'http://vube.com/Chiara+Grispo+Video+Channel/YL2qNPkqon',              'md5': 'db7aba89d4603dadd627e9d1973946fe',              'info_dict': { @@ -32,7 +51,8 @@ class VubeIE(InfoExtractor):                  'dislike_count': int,                  'comment_count': int,                  'categories': ['pop', 'music', 'cover', 'singing', 'jessie j', 'price tag', 'chiara grispo'], -            } +            }, +            'skip': 'Removed due to DMCA',          },          {              'url': 'http://vube.com/SerainaMusic/my-7-year-old-sister-and-i-singing-alive-by-krewella/UeBhTudbfS?t=s&n=1', @@ -51,7 +71,8 @@ class VubeIE(InfoExtractor):                  'dislike_count': int,                  'comment_count': int,                  'categories': ['seraina', 'jessica', 'krewella', 'alive'], -            } +            }, +            'skip': 'Removed due to DMCA',          }, {              'url': 'http://vube.com/vote/Siren+Gene/0nmsMY5vEq?n=2&t=s',              'md5': '0584fc13b50f887127d9d1007589d27f', @@ -69,7 +90,8 @@ class VubeIE(InfoExtractor):                  'dislike_count': int,                  'comment_count': int,                  'categories': ['let it go', 'cover', 'idina menzel', 'frozen', 'singing', 'disney', 'siren gene'], -            } +            }, +            'skip': 'Removed due to DMCA',          }      ] @@ -102,6 +124,11 @@ class VubeIE(InfoExtractor):          self._sort_formats(formats) +        if not formats and video.get('vst') == 'dmca': +            raise ExtractorError( +                'This video has been removed in response to a complaint received under the US Digital Millennium Copyright Act.', +                expected=True) +          title = video['title']          description = video.get('description')          thumbnail = self._proto_relative_url(video.get('thumbnail_src'), scheme='http:') diff --git a/youtube_dl/extractor/wat.py b/youtube_dl/extractor/wat.py index 268e2f618..bf9e40bad 100644 --- a/youtube_dl/extractor/wat.py +++ b/youtube_dl/extractor/wat.py @@ -40,6 +40,7 @@ class WatIE(InfoExtractor):                  'upload_date': '20140816',                  'duration': 2910,              }, +            'skip': "Ce contenu n'est pas disponible pour l'instant.",          },      ] diff --git a/youtube_dl/extractor/wistia.py b/youtube_dl/extractor/wistia.py index e6bfa9e14..748443f81 100644 --- a/youtube_dl/extractor/wistia.py +++ b/youtube_dl/extractor/wistia.py @@ -1,13 +1,14 @@  from __future__ import unicode_literals -import json  import re  from .common import InfoExtractor +from ..utils import ExtractorError, compat_urllib_request  class WistiaIE(InfoExtractor):      _VALID_URL = r'https?://(?:fast\.)?wistia\.net/embed/iframe/(?P<id>[a-z0-9]+)' +    _API_URL = 'http://fast.wistia.com/embed/medias/{0:}.json'      _TEST = {          'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt', @@ -24,11 +25,13 @@ class WistiaIE(InfoExtractor):          mobj = re.match(self._VALID_URL, url)          video_id = mobj.group('id') -        webpage = self._download_webpage(url, video_id) -        data_json = self._html_search_regex( -            r'Wistia\.iframeInit\((.*?), {}\);', webpage, 'video data') - -        data = json.loads(data_json) +        request = compat_urllib_request.Request(self._API_URL.format(video_id)) +        request.add_header('Referer', url)  # Some videos require this. +        data_json = self._download_json(request, video_id) +        if data_json.get('error'): +            raise ExtractorError('Error while getting the playlist', +                                 expected=True) +        data = data_json['media']          formats = []          thumbnails = [] diff --git a/youtube_dl/extractor/ynet.py b/youtube_dl/extractor/ynet.py new file mode 100644 index 000000000..24872861a --- /dev/null +++ b/youtube_dl/extractor/ynet.py @@ -0,0 +1,54 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +import json + +from .common import InfoExtractor +from ..utils import compat_urllib_parse + + +class YnetIE(InfoExtractor): +    _VALID_URL = r'http://(?:.+?\.)?ynet\.co\.il/(?:.+?/)?0,7340,(?P<id>L(?:-[0-9]+)+),00\.html' +    _TESTS = [ +        { +            'url': 'http://hot.ynet.co.il/home/0,7340,L-11659-99244,00.html', +            'md5': '002b44ee2f33d50363a1c153bed524cf', +            'info_dict': { +                'id': 'L-11659-99244', +                'ext': 'flv', +                'title': 'איש לא יודע מאיפה באנו', +                'thumbnail': 're:^https?://.*\.jpg', +            } +        }, { +            'url': 'http://hot.ynet.co.il/home/0,7340,L-8859-84418,00.html', +            'md5': '6455046ae1b48cf7e2b7cae285e53a16', +            'info_dict': { +                'id': 'L-8859-84418', +                'ext': 'flv', +                'title': "צפו: הנשיקה הלוהטת של תורגי' ויוליה פלוטקין", +                'thumbnail': 're:^https?://.*\.jpg', +            } +        } +    ] + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') +         +        webpage = self._download_webpage(url, video_id) + +        content = compat_urllib_parse.unquote_plus(self._og_search_video_url(webpage)) +        config = json.loads(self._search_regex(r'config=({.+?})$', content, 'video config')) +        f4m_url = config['clip']['url'] +        title = self._og_search_title(webpage) +        m = re.search(r'ynet - HOT -- (["\']+)(?P<title>.+?)\1', title) +        if m: +            title = m.group('title') + +        return { +            'id': video_id, +            'title': title, +            'formats': self._extract_f4m_formats(f4m_url, video_id), +            'thumbnail': self._og_search_thumbnail(webpage), +        }
\ No newline at end of file diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index a8fd40c83..07ed7cbd1 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -1,5 +1,7 @@  # coding: utf-8 +from __future__ import unicode_literals +  import json  import math  import random @@ -13,18 +15,25 @@ from ..utils import (  class YoukuIE(InfoExtractor): -    _VALID_URL =  r'(?:(?:http://)?(?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)|youku:)(?P<ID>[A-Za-z0-9]+)(?:\.html|/v\.swf|)' -    _TEST =   { -        u"url": u"http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html", -        u"file": u"XNDgyMDQ2NTQw_part00.flv", -        u"md5": u"ffe3f2e435663dc2d1eea34faeff5b5b", -        u"params": {u"test": False}, -        u"info_dict": { -            u"title": u"youtube-dl test video \"'/\\ä↭𝕐" +    _VALID_URL = r'''(?x) +        (?: +            http://(?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)| +            youku:) +        (?P<id>[A-Za-z0-9]+)(?:\.html|/v\.swf|) +    ''' +    _TEST = { +        'url': 'http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html', +        'md5': 'ffe3f2e435663dc2d1eea34faeff5b5b', +        'params': { +            'test': False +        }, +        'info_dict': { +            'id': 'XNDgyMDQ2NTQw_part00', +            'ext': 'flv', +            'title': 'youtube-dl test video "\'/\\ä↭𝕐'          }      } -      def _gen_sid(self):          nowTime = int(time.time() * 1000)          random1 = random.randint(1000,1998) @@ -55,49 +64,42 @@ class YoukuIE(InfoExtractor):      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url) -        if mobj is None: -            raise ExtractorError(u'Invalid URL: %s' % url) -        video_id = mobj.group('ID') +        video_id = mobj.group('id')          info_url = 'http://v.youku.com/player/getPlayList/VideoIDS/' + video_id -        jsondata = self._download_webpage(info_url, video_id) - -        self.report_extraction(video_id) -        try: -            config = json.loads(jsondata) -            error_code = config['data'][0].get('error_code') -            if error_code: -                # -8 means blocked outside China. -                error = config['data'][0].get('error')  # Chinese and English, separated by newline. -                raise ExtractorError(error or u'Server reported error %i' % error_code, -                    expected=True) - -            video_title =  config['data'][0]['title'] -            seed = config['data'][0]['seed'] - -            format = self._downloader.params.get('format', None) -            supported_format = list(config['data'][0]['streamfileids'].keys()) - -            if format is None or format == 'best': -                if 'hd2' in supported_format: -                    format = 'hd2' -                else: -                    format = 'flv' -                ext = u'flv' -            elif format == 'worst': -                format = 'mp4' -                ext = u'mp4' -            else: -                format = 'flv' -                ext = u'flv' +        config = self._download_json(info_url, video_id) + +        error_code = config['data'][0].get('error_code') +        if error_code: +            # -8 means blocked outside China. +            error = config['data'][0].get('error')  # Chinese and English, separated by newline. +            raise ExtractorError(error or 'Server reported error %i' % error_code, +                expected=True) +        video_title = config['data'][0]['title'] +        seed = config['data'][0]['seed'] -            fileid = config['data'][0]['streamfileids'][format] -            keys = [s['k'] for s in config['data'][0]['segs'][format]] -            # segs is usually a dictionary, but an empty *list* if an error occured. -        except (UnicodeDecodeError, ValueError, KeyError): -            raise ExtractorError(u'Unable to extract info section') +        format = self._downloader.params.get('format', None) +        supported_format = list(config['data'][0]['streamfileids'].keys()) + +        # TODO proper format selection +        if format is None or format == 'best': +            if 'hd2' in supported_format: +                format = 'hd2' +            else: +                format = 'flv' +            ext = 'flv' +        elif format == 'worst': +            format = 'mp4' +            ext = 'mp4' +        else: +            format = 'flv' +            ext = 'flv' + +        fileid = config['data'][0]['streamfileids'][format] +        keys = [s['k'] for s in config['data'][0]['segs'][format]] +        # segs is usually a dictionary, but an empty *list* if an error occured.          files_info=[]          sid = self._gen_sid() @@ -106,9 +108,8 @@ class YoukuIE(InfoExtractor):          #column 8,9 of fileid represent the segment number          #fileid[7:9] should be changed          for index, key in enumerate(keys): -              temp_fileid = '%s%02X%s' % (fileid[0:8], index, fileid[10:]) -            download_url = 'http://f.youku.com/player/getFlvPath/sid/%s_%02X/st/flv/fileid/%s?k=%s' % (sid, index, temp_fileid, key) +            download_url = 'http://k.youku.com/player/getFlvPath/sid/%s_%02X/st/flv/fileid/%s?k=%s' % (sid, index, temp_fileid, key)              info = {                  'id': '%s_part%02d' % (video_id, index), diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index b54c69122..99198e380 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -46,7 +46,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):      def _set_language(self):          return bool(self._download_webpage(              self._LANG_URL, None, -            note=u'Setting language', errnote='unable to set language', +            note='Setting language', errnote='unable to set language',              fatal=False))      def _login(self): @@ -61,13 +61,13 @@ class YoutubeBaseInfoExtractor(InfoExtractor):          # No authentication to be performed          if username is None:              if self._LOGIN_REQUIRED: -                raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True) +                raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)              return True          login_page = self._download_webpage(              self._LOGIN_URL, None, -            note=u'Downloading login page', -            errnote=u'unable to fetch login page', fatal=False) +            note='Downloading login page', +            errnote='unable to fetch login page', fatal=False)          if login_page is False:              return @@ -105,12 +105,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):          req = compat_urllib_request.Request(self._LOGIN_URL, login_data)          login_results = self._download_webpage(              req, None, -            note=u'Logging in', errnote=u'unable to log in', fatal=False) +            note='Logging in', errnote='unable to log in', fatal=False)          if login_results is False:              return False          if re.search(r'id="errormsg_0_Passwd"', login_results) is not None: -            raise ExtractorError(u'Please use your account password and a two-factor code instead of an application-specific password.', expected=True) +            raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)          # Two-Factor          # TODO add SMS and phone call support - these require making a request and then prompting the user @@ -119,19 +119,19 @@ class YoutubeBaseInfoExtractor(InfoExtractor):              tfa_code = self._get_tfa_info()              if tfa_code is None: -                self._downloader.report_warning(u'Two-factor authentication required. Provide it with --twofactor <code>') -                self._downloader.report_warning(u'(Note that only TOTP (Google Authenticator App) codes work at this time.)') +                self._downloader.report_warning('Two-factor authentication required. Provide it with --twofactor <code>') +                self._downloader.report_warning('(Note that only TOTP (Google Authenticator App) codes work at this time.)')                  return False              # Unlike the first login form, secTok and timeStmp are both required for the TFA form              match = re.search(r'id="secTok"\n\s+value=\'(.+)\'/>', login_results, re.M | re.U)              if match is None: -                self._downloader.report_warning(u'Failed to get secTok - did the page structure change?') +                self._downloader.report_warning('Failed to get secTok - did the page structure change?')              secTok = match.group(1)              match = re.search(r'id="timeStmp"\n\s+value=\'(.+)\'/>', login_results, re.M | re.U)              if match is None: -                self._downloader.report_warning(u'Failed to get timeStmp - did the page structure change?') +                self._downloader.report_warning('Failed to get timeStmp - did the page structure change?')              timeStmp = match.group(1)              tfa_form_strs = { @@ -155,23 +155,23 @@ class YoutubeBaseInfoExtractor(InfoExtractor):              tfa_req = compat_urllib_request.Request(self._TWOFACTOR_URL, tfa_data)              tfa_results = self._download_webpage(                  tfa_req, None, -                note=u'Submitting TFA code', errnote=u'unable to submit tfa', fatal=False) +                note='Submitting TFA code', errnote='unable to submit tfa', fatal=False)              if tfa_results is False:                  return False              if re.search(r'(?i)<form[^>]* id="gaia_secondfactorform"', tfa_results) is not None: -                self._downloader.report_warning(u'Two-factor code expired. Please try again, or use a one-use backup code instead.') +                self._downloader.report_warning('Two-factor code expired. Please try again, or use a one-use backup code instead.')                  return False              if re.search(r'(?i)<form[^>]* id="gaia_loginform"', tfa_results) is not None: -                self._downloader.report_warning(u'unable to log in - did the page structure change?') +                self._downloader.report_warning('unable to log in - did the page structure change?')                  return False              if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None: -                self._downloader.report_warning(u'Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.') +                self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')                  return False          if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None: -            self._downloader.report_warning(u'unable to log in: bad username or password') +            self._downloader.report_warning('unable to log in: bad username or password')              return False          return True @@ -185,7 +185,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):          self._download_webpage(              req, None, -            note=u'Confirming age', errnote=u'Unable to confirm age') +            note='Confirming age', errnote='Unable to confirm age')          return True      def _real_initialize(self): @@ -211,7 +211,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):                              youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains                           (?:.*?\#/)?                                          # handle anchor (#/) redirect urls                           (?:                                                  # the various things that can precede the ID: -                             (?:(?:v|embed|e)/)                               # v/ or embed/ or e/ +                             (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/                               |(?:                                             # or the v= param in all its forms                                   (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)                                   (?:\?|\#!?)                                  # the params delimiter ? or # or #! @@ -307,69 +307,74 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):      IE_NAME = 'youtube'      _TESTS = [          { -            u"url":  u"http://www.youtube.com/watch?v=BaW_jenozKc", -            u"file":  u"BaW_jenozKc.mp4", -            u"info_dict": { -                u"title": u"youtube-dl test video \"'/\\ä↭𝕐", -                u"uploader": u"Philipp Hagemeister", -                u"uploader_id": u"phihag", -                u"upload_date": u"20121002", -                u"description": u"test chars:  \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .", -                u"categories": [u'Science & Technology'], +            'url': 'http://www.youtube.com/watch?v=BaW_jenozKc', +            'info_dict': { +                'id': 'BaW_jenozKc', +                'ext': 'mp4', +                'title': 'youtube-dl test video "\'/\\ä↭𝕐', +                'uploader': 'Philipp Hagemeister', +                'uploader_id': 'phihag', +                'upload_date': '20121002', +                'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .', +                'categories': ['Science & Technology'],                  'like_count': int,                  'dislike_count': int,              }          },          { -            u"url":  u"http://www.youtube.com/watch?v=UxxajLWwzqY", -            u"file":  u"UxxajLWwzqY.mp4", -            u"note": u"Test generic use_cipher_signature video (#897)", -            u"info_dict": { -                u"upload_date": u"20120506", -                u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]", -                u"description": u"md5:fea86fda2d5a5784273df5c7cc994d9f", -                u"uploader": u"Icona Pop", -                u"uploader_id": u"IconaPop" +            'url': 'http://www.youtube.com/watch?v=UxxajLWwzqY', +            'note': 'Test generic use_cipher_signature video (#897)', +            'info_dict': { +                'id': 'UxxajLWwzqY', +                'ext': 'mp4', +                'upload_date': '20120506', +                'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]', +                'description': 'md5:fea86fda2d5a5784273df5c7cc994d9f', +                'uploader': 'Icona Pop', +                'uploader_id': 'IconaPop',              }          },          { -            u"url":  u"https://www.youtube.com/watch?v=07FYdnEawAQ", -            u"file":  u"07FYdnEawAQ.mp4", -            u"note": u"Test VEVO video with age protection (#956)", -            u"info_dict": { -                u"upload_date": u"20130703", -                u"title": u"Justin Timberlake - Tunnel Vision (Explicit)", -                u"description": u"md5:64249768eec3bc4276236606ea996373", -                u"uploader": u"justintimberlakeVEVO", -                u"uploader_id": u"justintimberlakeVEVO" +            'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ', +            'note': 'Test VEVO video with age protection (#956)', +            'info_dict': { +                'id': '07FYdnEawAQ', +                'ext': 'mp4', +                'upload_date': '20130703', +                'title': 'Justin Timberlake - Tunnel Vision (Explicit)', +                'description': 'md5:64249768eec3bc4276236606ea996373', +                'uploader': 'justintimberlakeVEVO', +                'uploader_id': 'justintimberlakeVEVO',              }          },          { -            u"url":  u"//www.YouTube.com/watch?v=yZIXLfi8CZQ", -            u"file":  u"yZIXLfi8CZQ.mp4", -            u"note": u"Embed-only video (#1746)", -            u"info_dict": { -                u"upload_date": u"20120608", -                u"title": u"Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012", -                u"description": u"md5:09b78bd971f1e3e289601dfba15ca4f7", -                u"uploader": u"SET India", -                u"uploader_id": u"setindia" +            'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ', +            'note': 'Embed-only video (#1746)', +            'info_dict': { +                'id': 'yZIXLfi8CZQ', +                'ext': 'mp4', +                'upload_date': '20120608', +                'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012', +                'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7', +                'uploader': 'SET India', +                'uploader_id': 'setindia'              }          },          { -            u"url": u"http://www.youtube.com/watch?v=a9LDPn-MO4I", -            u"file": u"a9LDPn-MO4I.m4a", -            u"note": u"256k DASH audio (format 141) via DASH manifest", -            u"info_dict": { -                u"upload_date": "20121002", -                u"uploader_id": "8KVIDEO", -                u"description": '', -                u"uploader": "8KVIDEO", -                u"title": "UHDTV TEST 8K VIDEO.mp4" +            'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I', +            'note': '256k DASH audio (format 141) via DASH manifest', +            'info_dict': { +                'id': 'a9LDPn-MO4I', +                'ext': 'm4a', +                'upload_date': '20121002', +                'uploader_id': '8KVIDEO', +                'description': '', +                'uploader': '8KVIDEO', +                'title': 'UHDTV TEST 8K VIDEO.mp4'              }, -            u"params": { -                u"youtube_include_dash_manifest": True, -                u"format": "141", +            'params': { +                'youtube_include_dash_manifest': True, +                'format': '141',              },          },          # DASH manifest with encrypted signature @@ -384,7 +389,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):                  'uploader_id': 'AfrojackVEVO',                  'upload_date': '20131011',              }, -            u"params": { +            'params': {                  'youtube_include_dash_manifest': True,                  'format': '141',              }, @@ -397,19 +402,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):      def report_video_info_webpage_download(self, video_id):          """Report attempt to download video info webpage.""" -        self.to_screen(u'%s: Downloading video info webpage' % video_id) +        self.to_screen('%s: Downloading video info webpage' % video_id)      def report_information_extraction(self, video_id):          """Report attempt to extract video information.""" -        self.to_screen(u'%s: Extracting video information' % video_id) +        self.to_screen('%s: Extracting video information' % video_id)      def report_unavailable_format(self, video_id, format):          """Report extracted video URL.""" -        self.to_screen(u'%s: Format %s not available' % (video_id, format)) +        self.to_screen('%s: Format %s not available' % (video_id, format))      def report_rtmp_download(self):          """Indicate the download will use the RTMP protocol.""" -        self.to_screen(u'RTMP download detected') +        self.to_screen('RTMP download detected')      def _signature_cache_id(self, example_sig):          """ Return a string representation of a signature """ @@ -429,21 +434,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):              player_type, player_id, self._signature_cache_id(example_sig))          assert os.path.basename(func_id) == func_id -        cache_spec = self._downloader.cache.load(u'youtube-sigfuncs', func_id) +        cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)          if cache_spec is not None:              return lambda s: ''.join(s[i] for i in cache_spec)          if player_type == 'js':              code = self._download_webpage(                  player_url, video_id, -                note=u'Downloading %s player %s' % (player_type, player_id), -                errnote=u'Download of %s failed' % player_url) +                note='Downloading %s player %s' % (player_type, player_id), +                errnote='Download of %s failed' % player_url)              res = self._parse_sig_js(code)          elif player_type == 'swf':              urlh = self._request_webpage(                  player_url, video_id, -                note=u'Downloading %s player %s' % (player_type, player_id), -                errnote=u'Download of %s failed' % player_url) +                note='Downloading %s player %s' % (player_type, player_id), +                errnote='Download of %s failed' % player_url)              code = urlh.read()              res = self._parse_sig_swf(code)          else: @@ -454,15 +459,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):              cache_res = res(test_string)              cache_spec = [ord(c) for c in cache_res] -        self._downloader.cache.store(u'youtube-sigfuncs', func_id, cache_spec) +        self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)          return res      def _print_sig_code(self, func, example_sig):          def gen_sig_code(idxs):              def _genslice(start, end, step):                  starts = '' if start == 0 else str(start) -                ends = (u':%d' % (end+step)) if end + step >= 0 else ':' -                steps = '' if step == 1 else (u':%d' % step) +                ends = (':%d' % (end+step)) if end + step >= 0 else ':' +                steps = '' if step == 1 else (':%d' % step)                  return 's[%s%s%s]' % (starts, ends, steps)              step = None @@ -492,9 +497,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):          expr_code = ' + '.join(gen_sig_code(cache_spec))          signature_id_tuple = '(%s)' % (              ', '.join(compat_str(len(p)) for p in example_sig.split('.'))) -        code = (u'if tuple(len(p) for p in s.split(\'.\')) == %s:\n' +        code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'                  '    return %s\n') % (signature_id_tuple, expr_code) -        self.to_screen(u'Extracted signature function:\n' + code) +        self.to_screen('Extracted signature function:\n' + code)      def _parse_sig_js(self, jscode):          funcname = self._search_regex( @@ -516,9 +521,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):          """Turn the encrypted s field into a working signature"""          if player_url is None: -            raise ExtractorError(u'Cannot decrypt signature without player_url') +            raise ExtractorError('Cannot decrypt signature without player_url') -        if player_url.startswith(u'//'): +        if player_url.startswith('//'):              player_url = 'https:' + player_url          try:              player_id = (player_url, self._signature_cache_id(s)) @@ -542,7 +547,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):                  'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,                  video_id, note=False)          except ExtractorError as err: -            self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err)) +            self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))              return {}          lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list) @@ -560,7 +565,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):              url = 'https://www.youtube.com/api/timedtext?' + params              sub_lang_list[lang] = url          if not sub_lang_list: -            self._downloader.report_warning(u'video doesn\'t have subtitles') +            self._downloader.report_warning('video doesn\'t have subtitles')              return {}          return sub_lang_list @@ -568,7 +573,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):          """We need the webpage for getting the captions url, pass it as an             argument to speed up the process."""          sub_format = self._downloader.params.get('subtitlesformat', 'srt') -        self.to_screen(u'%s: Looking for automatic captions' % video_id) +        self.to_screen('%s: Looking for automatic captions' % video_id)          mobj = re.search(r';ytplayer.config = ({.*?});', webpage)          err_msg = 'Couldn\'t find automatic captions for %s' % video_id          if mobj is None: @@ -589,7 +594,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):              caption_list = self._download_xml(list_url, video_id)              original_lang_node = caption_list.find('track')              if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' : -                self._downloader.report_warning(u'Video doesn\'t have automatic captions') +                self._downloader.report_warning('Video doesn\'t have automatic captions')                  return {}              original_lang = original_lang_node.attrib['lang_code'] @@ -615,7 +620,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):      def extract_id(cls, url):          mobj = re.match(cls._VALID_URL, url, re.VERBOSE)          if mobj is None: -            raise ExtractorError(u'Invalid URL: %s' % url) +            raise ExtractorError('Invalid URL: %s' % url)          video_id = mobj.group(2)          return video_id @@ -635,7 +640,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):      def _extract_annotations(self, video_id):          url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id -        return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.') +        return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')      def _real_extract(self, url):          proto = ( @@ -705,14 +710,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):          # Check for "rental" videos          if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info: -            raise ExtractorError(u'"rental" videos not supported') +            raise ExtractorError('"rental" videos not supported')          # Start extracting information          self.report_information_extraction(video_id)          # uploader          if 'author' not in video_info: -            raise ExtractorError(u'Unable to extract uploader name') +            raise ExtractorError('Unable to extract uploader name')          video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])          # uploader_id @@ -721,13 +726,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):          if mobj is not None:              video_uploader_id = mobj.group(1)          else: -            self._downloader.report_warning(u'unable to extract uploader nickname') +            self._downloader.report_warning('unable to extract uploader nickname')          # title          if 'title' in video_info:              video_title = video_info['title'][0]          else: -            self._downloader.report_warning(u'Unable to extract video title') +            self._downloader.report_warning('Unable to extract video title')              video_title = '_'          # thumbnail image @@ -737,7 +742,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):          if m_thumb is not None:              video_thumbnail = m_thumb.group(1)          elif 'thumbnail_url' not in video_info: -            self._downloader.report_warning(u'unable to extract video thumbnail') +            self._downloader.report_warning('unable to extract video thumbnail')              video_thumbnail = None          else:   # don't panic if we can't find it              video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0]) @@ -791,8 +796,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):              if count is not None:                  return int(count.replace(',', ''))              return None -        like_count = _extract_count(u'like') -        dislike_count = _extract_count(u'dislike') +        like_count = _extract_count('like') +        dislike_count = _extract_count('dislike')          # subtitles          video_subtitles = self.extract_subtitles(video_id, video_webpage) @@ -802,7 +807,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):              return          if 'length_seconds' not in video_info: -            self._downloader.report_warning(u'unable to extract video duration') +            self._downloader.report_warning('unable to extract video duration')              video_duration = None          else:              video_duration = int(compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])) @@ -823,11 +828,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):              # Easy way to know if the 's' value is in url_encoded_fmt_stream_map              # this signatures are encrypted              if 'url_encoded_fmt_stream_map' not in args: -                raise ValueError(u'No stream_map present')  # caught below +                raise ValueError('No stream_map present')  # caught below              re_signature = re.compile(r'[&,]s=')              m_s = re_signature.search(args['url_encoded_fmt_stream_map'])              if m_s is not None: -                self.to_screen(u'%s: Encrypted signatures detected.' % video_id) +                self.to_screen('%s: Encrypted signatures detected.' % video_id)                  video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]              m_s = re_signature.search(args.get('adaptive_fmts', ''))              if m_s is not None: @@ -905,7 +910,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):                                  player_desc = 'html5 player %s' % player_version                          parts_sizes = self._signature_cache_id(encrypted_sig) -                        self.to_screen(u'{%s} signature length %s, %s' % +                        self.to_screen('{%s} signature length %s, %s' %                              (format_id, parts_sizes, player_desc))                      signature = self._decrypt_signature( @@ -920,7 +925,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):              url_map = self._extract_from_m3u8(manifest_url, video_id)              formats = _map_to_format_list(url_map)          else: -            raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info') +            raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')          # Look for the DASH manifest          if (self._downloader.params.get('youtube_include_dash_manifest', False)): @@ -941,9 +946,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):                  dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url)                  dash_doc = self._download_xml(                      dash_manifest_url, video_id, -                    note=u'Downloading DASH manifest', -                    errnote=u'Could not download DASH manifest') -                for r in dash_doc.findall(u'.//{urn:mpeg:DASH:schema:MPD:2011}Representation'): +                    note='Downloading DASH manifest', +                    errnote='Could not download DASH manifest') +                for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):                      url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')                      if url_el is None:                          continue @@ -969,7 +974,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):                          existing_format.update(f)              except (ExtractorError, KeyError) as e: -                self.report_warning(u'Skipping DASH manifest: %s' % e, video_id) +                self.report_warning('Skipping DASH manifest: %s' % e, video_id)          self._sort_formats(formats) @@ -1000,7 +1005,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):                          (?:\w+\.)?                          youtube\.com/                          (?: -                           (?:course|view_play_list|my_playlists|artist|playlist|watch) +                           (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries)                             \? (?:.*?&)*? (?:p|a|list)=                          |  p/                          ) @@ -1056,6 +1061,20 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):              'title': 'YDL_safe_search',          },          'playlist_count': 2, +    }, { +        'note': 'embedded', +        'url': 'http://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu', +        'playlist_count': 4, +        'info_dict': { +            'title': 'JODA15', +        } +    }, { +        'note': 'Embedded SWF player', +        'url': 'http://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0', +        'playlist_count': 4, +        'info_dict': { +            'title': 'JODA7', +        }      }]      def _real_initialize(self): @@ -1090,7 +1109,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):          # Extract playlist id          mobj = re.match(self._VALID_URL, url)          if mobj is None: -            raise ExtractorError(u'Invalid URL: %s' % url) +            raise ExtractorError('Invalid URL: %s' % url)          playlist_id = mobj.group(1) or mobj.group(2)          # Check if it's a video-specific URL @@ -1098,16 +1117,16 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):          if 'v' in query_dict:              video_id = query_dict['v'][0]              if self._downloader.params.get('noplaylist'): -                self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id) +                self.to_screen('Downloading just video %s because of --no-playlist' % video_id)                  return self.url_result(video_id, 'Youtube', video_id=video_id)              else: -                self.to_screen(u'Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id)) +                self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))          if playlist_id.startswith('RD'):              # Mixes require a custom extraction process              return self._extract_mix(playlist_id)          if playlist_id.startswith('TL'): -            raise ExtractorError(u'For downloading YouTube.com top lists, use ' +            raise ExtractorError('For downloading YouTube.com top lists, use '                  'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)          url = self._TEMPLATE_URL % playlist_id @@ -1152,19 +1171,28 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):  class YoutubeTopListIE(YoutubePlaylistIE):      IE_NAME = 'youtube:toplist' -    IE_DESC = (u'YouTube.com top lists, "yttoplist:{channel}:{list title}"' +    IE_DESC = ('YouTube.com top lists, "yttoplist:{channel}:{list title}"'          ' (Example: "yttoplist:music:Top Tracks")')      _VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$' -    _TESTS = [] +    _TESTS = [{ +        'url': 'yttoplist:music:Trending', +        'playlist_mincount': 5, +        'skip': 'Only works for logged-in users', +    }]      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url)          channel = mobj.group('chann')          title = mobj.group('title')          query = compat_urllib_parse.urlencode({'title': title}) -        playlist_re = 'href="([^"]+?%s.*?)"' % re.escape(query) -        channel_page = self._download_webpage('https://www.youtube.com/%s' % channel, title) -        link = self._html_search_regex(playlist_re, channel_page, 'list') +        channel_page = self._download_webpage( +            'https://www.youtube.com/%s' % channel, title) +        link = self._html_search_regex( +            r'''(?x) +                <a\s+href="([^"]+)".*?>\s* +                <span\s+class="branded-page-module-title-text">\s* +                <span[^>]*>.*?%s.*?</span>''' % re.escape(query), +            channel_page, 'list')          url = compat_urlparse.urljoin('https://www.youtube.com/', link)          video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"' @@ -1190,6 +1218,11 @@ class YoutubeChannelIE(InfoExtractor):      _MORE_PAGES_INDICATOR = 'yt-uix-load-more'      _MORE_PAGES_URL = 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'      IE_NAME = 'youtube:channel' +    _TESTS = [{ +        'note': 'paginated channel', +        'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w', +        'playlist_mincount': 91, +    }]      def extract_videos_from_page(self, page):          ids_in_page = [] @@ -1202,7 +1235,7 @@ class YoutubeChannelIE(InfoExtractor):          # Extract channel id          mobj = re.match(self._VALID_URL, url)          if mobj is None: -            raise ExtractorError(u'Invalid URL: %s' % url) +            raise ExtractorError('Invalid URL: %s' % url)          # Download channel page          channel_id = mobj.group(1) @@ -1224,7 +1257,7 @@ class YoutubeChannelIE(InfoExtractor):              for pagenum in itertools.count(1):                  url = self._MORE_PAGES_URL % (pagenum, channel_id)                  page = self._download_json( -                    url, channel_id, note=u'Downloading page #%s' % pagenum, +                    url, channel_id, note='Downloading page #%s' % pagenum,                      transform_source=uppercase_escape)                  ids_in_page = self.extract_videos_from_page(page['content_html']) @@ -1233,7 +1266,7 @@ class YoutubeChannelIE(InfoExtractor):                  if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:                      break -        self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids))) +        self._downloader.to_screen('[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))          url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id)                         for video_id in video_ids] @@ -1248,6 +1281,17 @@ class YoutubeUserIE(InfoExtractor):      _GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'      IE_NAME = 'youtube:user' +    _TESTS = [{ +        'url': 'https://www.youtube.com/user/TheLinuxFoundation', +        'playlist_mincount': 320, +        'info_dict': { +            'title': 'TheLinuxFoundation', +        } +    }, { +        'url': 'ytuser:phihag', +        'only_matching': True, +    }] +      @classmethod      def suitable(cls, url):          # Don't return True if the url can be extracted with other youtube @@ -1260,7 +1304,7 @@ class YoutubeUserIE(InfoExtractor):          # Extract username          mobj = re.match(self._VALID_URL, url)          if mobj is None: -            raise ExtractorError(u'Invalid URL: %s' % url) +            raise ExtractorError('Invalid URL: %s' % url)          username = mobj.group(1) @@ -1281,7 +1325,7 @@ class YoutubeUserIE(InfoExtractor):              try:                  response = json.loads(page)              except ValueError as err: -                raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err)) +                raise ExtractorError('Invalid JSON in API response: ' + compat_str(err))              if 'entry' not in response['feed']:                  return @@ -1322,9 +1366,9 @@ class YoutubeSearchIE(SearchInfoExtractor):                  compat_urllib_parse.quote_plus(query.encode('utf-8')),                  (PAGE_SIZE * pagenum) + 1)              data_json = self._download_webpage( -                result_url, video_id=u'query "%s"' % query, -                note=u'Downloading page %s' % (pagenum + 1), -                errnote=u'Unable to download API page') +                result_url, video_id='query "%s"' % query, +                note='Downloading page %s' % (pagenum + 1), +                errnote='Unable to download API page')              data = json.loads(data_json)              api_response = data['data'] @@ -1356,6 +1400,13 @@ class YoutubeSearchURLIE(InfoExtractor):      IE_DESC = 'YouTube.com search URLs'      IE_NAME = 'youtube:search_url'      _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)' +    _TESTS = [{ +        'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', +        'playlist_mincount': 5, +        'info_dict': { +            'title': 'youtube-dl test video', +        } +    }]      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url) @@ -1390,17 +1441,38 @@ class YoutubeSearchURLIE(InfoExtractor):  class YoutubeShowIE(InfoExtractor):      IE_DESC = 'YouTube.com (multi-season) shows' -    _VALID_URL = r'https?://www\.youtube\.com/show/(.*)' +    _VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)'      IE_NAME = 'youtube:show' +    _TESTS = [{ +        'url': 'http://www.youtube.com/show/airdisasters', +        'playlist_mincount': 3, +        'info_dict': { +            'id': 'airdisasters', +            'title': 'Air Disasters', +        } +    }]      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url) -        show_name = mobj.group(1) -        webpage = self._download_webpage(url, show_name, 'Downloading show webpage') +        playlist_id = mobj.group('id') +        webpage = self._download_webpage( +            url, playlist_id, 'Downloading show webpage')          # There's one playlist for each season of the show          m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage)) -        self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons))) -        return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons] +        self.to_screen('%s: Found %s seasons' % (playlist_id, len(m_seasons))) +        entries = [ +            self.url_result( +                'https://www.youtube.com' + season.group(1), 'YoutubePlaylist') +            for season in m_seasons +        ] +        title = self._og_search_title(webpage, fatal=False) + +        return { +            '_type': 'playlist', +            'id': playlist_id, +            'title': title, +            'entries': entries, +        }  class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor): diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 2853c79c9..e62bef2cf 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.09.22.1' +__version__ = '2014.09.28' | 
