diff options
| -rw-r--r-- | docs/supportedsites.md | 5 | ||||
| -rwxr-xr-x | youtube_dl/YoutubeDL.py | 9 | ||||
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 6 | ||||
| -rw-r--r-- | youtube_dl/extractor/brightcove.py | 9 | ||||
| -rw-r--r-- | youtube_dl/extractor/comedycentral.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/common.py | 38 | ||||
| -rw-r--r-- | youtube_dl/extractor/generic.py | 79 | ||||
| -rw-r--r-- | youtube_dl/extractor/miomio.py | 17 | ||||
| -rw-r--r-- | youtube_dl/extractor/mixcloud.py | 72 | ||||
| -rw-r--r-- | youtube_dl/extractor/mtv.py | 8 | ||||
| -rw-r--r-- | youtube_dl/extractor/pladform.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/qqmusic.py | 170 | ||||
| -rw-r--r-- | youtube_dl/extractor/soundcloud.py | 12 | ||||
| -rw-r--r-- | youtube_dl/extractor/spike.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/srf.py | 104 | ||||
| -rw-r--r-- | youtube_dl/extractor/teamcoco.py | 31 | ||||
| -rw-r--r-- | youtube_dl/extractor/tumblr.py | 4 | ||||
| -rw-r--r-- | youtube_dl/extractor/udn.py | 18 | ||||
| -rw-r--r-- | youtube_dl/extractor/vimple.py | 71 | ||||
| -rw-r--r-- | youtube_dl/postprocessor/atomicparsley.py | 9 | ||||
| -rw-r--r-- | youtube_dl/postprocessor/ffmpeg.py | 15 | ||||
| -rw-r--r-- | youtube_dl/utils.py | 33 | ||||
| -rw-r--r-- | youtube_dl/version.py | 2 | 
23 files changed, 530 insertions, 188 deletions
| diff --git a/docs/supportedsites.md b/docs/supportedsites.md index c85a39918..80e86c1b6 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -98,6 +98,7 @@   - **CondeNast**: Condé Nast media group: Condé Nast, GQ, Glamour, Vanity Fair, Vogue, W Magazine, WIRED   - **Cracked**   - **Criterion** + - **CrooksAndLiars**   - **Crunchyroll**   - **crunchyroll:playlist**   - **CSpan**: C-SPAN @@ -359,6 +360,9 @@   - **prosiebensat1**: ProSiebenSat.1 Digital   - **Puls4**   - **Pyvideo** + - **QQMusic** + - **QQMusicAlbum** + - **QQMusicSinger**   - **QuickVid**   - **R7**   - **radio.de** @@ -434,6 +438,7 @@   - **Sport5**   - **SportBox**   - **SportDeutschland** + - **Srf**   - **SRMediathek**: Saarländischer Rundfunk   - **SSA**   - **stanfordoc**: Stanford Open ClassRoom diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index a68b24ab4..6ac85f4e7 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1486,16 +1486,9 @@ class YoutubeDL(object):              pps_chain.extend(ie_info['__postprocessors'])          pps_chain.extend(self._pps)          for pp in pps_chain: -            keep_video = None              old_filename = info['filepath']              try: -                keep_video_wish, info = pp.run(info) -                if keep_video_wish is not None: -                    if keep_video_wish: -                        keep_video = keep_video_wish -                    elif keep_video is None: -                        # No clear decision yet, let IE decide -                        keep_video = keep_video_wish +                keep_video, info = pp.run(info)              except PostProcessingError as e:                  self.report_error(e.msg)              if keep_video is False and not self.params.get('keepvideo', False): diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index d32f1cbd2..9e9e20589 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -397,6 +397,11 @@ from .promptfile import PromptFileIE  from .prosiebensat1 import ProSiebenSat1IE  from .puls4 import Puls4IE  from .pyvideo import PyvideoIE +from .qqmusic import ( +    QQMusicIE, +    QQMusicSingerIE, +    QQMusicAlbumIE, +)  from .quickvid import QuickVidIE  from .r7 import R7IE  from .radiode import RadioDeIE @@ -481,6 +486,7 @@ from .spike import SpikeIE  from .sport5 import Sport5IE  from .sportbox import SportBoxIE  from .sportdeutschland import SportDeutschlandIE +from .srf import SrfIE  from .srmediathek import SRMediathekIE  from .ssa import SSAIE  from .stanfordoc import StanfordOpenClassroomIE diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 0733bece7..4f60d5366 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -117,7 +117,10 @@ class BrightcoveIE(InfoExtractor):          object_str = re.sub(r'(<object[^>]*)(xmlns=".*?")', r'\1', object_str)          object_str = fix_xml_ampersands(object_str) -        object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8')) +        try: +            object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8')) +        except xml.etree.ElementTree.ParseError: +            return          fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars')          if fv_el is not None: @@ -183,9 +186,9 @@ class BrightcoveIE(InfoExtractor):              (?:                  [^>]+?class=[\'"][^>]*?BrightcoveExperience.*?[\'"] |                  [^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/ -            ).+?</object>''', +            ).+?>\s*</object>''',              webpage) -        return [cls._build_brighcove_url(m) for m in matches] +        return list(filter(None, [cls._build_brighcove_url(m) for m in matches]))      def _real_extract(self, url):          url, smuggled_data = unsmuggle_url(url, {}) diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index e5edcc84b..91ebb0ce5 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -201,7 +201,7 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):          uri = mMovieParams[0][1]          # Correct cc.com in uri -        uri = re.sub(r'(episode:[^.]+)(\.cc)?\.com', r'\1.cc.com', uri) +        uri = re.sub(r'(episode:[^.]+)(\.cc)?\.com', r'\1.com', uri)          index_url = 'http://%s.cc.com/feeds/mrss?%s' % (show_name, compat_urllib_parse.urlencode({'uri': uri}))          idoc = self._download_xml( diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 8ed97f8dd..7757bf950 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -23,6 +23,7 @@ from ..compat import (  )  from ..utils import (      age_restricted, +    bug_reports_message,      clean_html,      compiled_regex_type,      ExtractorError, @@ -324,7 +325,7 @@ class InfoExtractor(object):                  self._downloader.report_warning(errmsg)                  return False -    def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True): +    def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None):          """ Returns a tuple (page content as string, URL handle) """          # Strip hashes from the URL (#1038)          if isinstance(url_or_request, (compat_str, str)): @@ -334,14 +335,11 @@ class InfoExtractor(object):          if urlh is False:              assert not fatal              return False -        content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal) +        content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal, encoding=encoding)          return (content, urlh) -    def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None): -        content_type = urlh.headers.get('Content-Type', '') -        webpage_bytes = urlh.read() -        if prefix is not None: -            webpage_bytes = prefix + webpage_bytes +    @staticmethod +    def _guess_encoding_from_content(content_type, webpage_bytes):          m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)          if m:              encoding = m.group(1) @@ -354,6 +352,16 @@ class InfoExtractor(object):                  encoding = 'utf-16'              else:                  encoding = 'utf-8' + +        return encoding + +    def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None, encoding=None): +        content_type = urlh.headers.get('Content-Type', '') +        webpage_bytes = urlh.read() +        if prefix is not None: +            webpage_bytes = prefix + webpage_bytes +        if not encoding: +            encoding = self._guess_encoding_from_content(content_type, webpage_bytes)          if self._downloader.params.get('dump_intermediate_pages', False):              try:                  url = url_or_request.get_full_url() @@ -410,13 +418,13 @@ class InfoExtractor(object):          return content -    def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5): +    def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None):          """ Returns the data of the page as a string """          success = False          try_count = 0          while success is False:              try: -                res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal) +                res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal, encoding=encoding)                  success = True              except compat_http_client.IncompleteRead as e:                  try_count += 1 @@ -431,10 +439,10 @@ class InfoExtractor(object):      def _download_xml(self, url_or_request, video_id,                        note='Downloading XML', errnote='Unable to download XML', -                      transform_source=None, fatal=True): +                      transform_source=None, fatal=True, encoding=None):          """Return the xml as an xml.etree.ElementTree.Element"""          xml_string = self._download_webpage( -            url_or_request, video_id, note, errnote, fatal=fatal) +            url_or_request, video_id, note, errnote, fatal=fatal, encoding=encoding)          if xml_string is False:              return xml_string          if transform_source: @@ -445,9 +453,10 @@ class InfoExtractor(object):                         note='Downloading JSON metadata',                         errnote='Unable to download JSON metadata',                         transform_source=None, -                       fatal=True): +                       fatal=True, encoding=None):          json_string = self._download_webpage( -            url_or_request, video_id, note, errnote, fatal=fatal) +            url_or_request, video_id, note, errnote, fatal=fatal, +            encoding=encoding)          if (not fatal) and json_string is False:              return None          return self._parse_json( @@ -548,8 +557,7 @@ class InfoExtractor(object):          elif fatal:              raise RegexNotFoundError('Unable to extract %s' % _name)          else: -            self._downloader.report_warning('unable to extract %s; ' -                                            'please report this issue on http://yt-dl.org/bug' % _name) +            self._downloader.report_warning('unable to extract %s' % _name + bug_reports_message())              return None      def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None): diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 7ad555e9f..e645d1bb3 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -615,13 +615,24 @@ class GenericIE(InfoExtractor):              'info_dict': {                  'id': '100183293',                  'ext': 'mp4', -                'title': 'Тайны перевала Дятлова • Тайна перевала Дятлова 1 серия 2 часть', +                'title': 'Тайны перевала Дятлова • 1 серия 2 часть',                  'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',                  'thumbnail': 're:^https?://.*\.jpg$',                  'duration': 694,                  'age_limit': 0,              },          }, +        # Playwire embed +        { +            'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html', +            'info_dict': { +                'id': '3519514', +                'ext': 'mp4', +                'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer', +                'thumbnail': 're:^https?://.*\.png$', +                'duration': 45.115, +            }, +        },          # 5min embed          {              'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/', @@ -681,13 +692,41 @@ class GenericIE(InfoExtractor):          # UDN embed          {              'url': 'http://www.udn.com/news/story/7314/822787', -            'md5': 'de06b4c90b042c128395a88f0384817e', +            'md5': 'fd2060e988c326991037b9aff9df21a6',              'info_dict': { -                'id': '300040', +                'id': '300346',                  'ext': 'mp4', -                'title': '生物老師男變女 全校挺"做自己"', +                'title': '中一中男師變性 全校師生力挺',                  'thumbnail': 're:^https?://.*\.jpg$',              } +        }, +        # Ooyala embed +        { +            'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T', +            'info_dict': { +                'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs', +                'ext': 'mp4', +                'description': 'VIDEO: Index/Match versus VLOOKUP.', +                'title': 'This is what separates the Excel masters from the wannabes', +            }, +            'params': { +                # m3u8 downloads +                'skip_download': True, +            } +        }, +        # Contains a SMIL manifest +        { +            'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html', +            'info_dict': { +                'id': 'file', +                'ext': 'flv', +                'title': '+ Football: Lottery Champions League Europe', +                'uploader': 'www.telewebion.com', +            }, +            'params': { +                # rtmpe downloads +                'skip_download': True, +            }          }      ] @@ -1092,7 +1131,8 @@ class GenericIE(InfoExtractor):          # Look for Ooyala videos          mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or                  re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or -                re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage)) +                re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or +                re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))          if mobj is not None:              return OoyalaIE._build_url_result(mobj.group('ec')) @@ -1295,6 +1335,12 @@ class GenericIE(InfoExtractor):          if mobj is not None:              return self.url_result(mobj.group('url'), 'Pladform') +        # Look for Playwire embeds +        mobj = re.search( +            r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage) +        if mobj is not None: +            return self.url_result(mobj.group('url')) +          # Look for 5min embeds          mobj = re.search(              r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage) @@ -1408,13 +1454,22 @@ class GenericIE(InfoExtractor):              # here's a fun little line of code for you:              video_id = os.path.splitext(video_id)[0] -            entries.append({ -                'id': video_id, -                'url': video_url, -                'uploader': video_uploader, -                'title': video_title, -                'age_limit': age_limit, -            }) +            if determine_ext(video_url) == 'smil': +                entries.append({ +                    'id': video_id, +                    'formats': self._extract_smil_formats(video_url, video_id), +                    'uploader': video_uploader, +                    'title': video_title, +                    'age_limit': age_limit, +                }) +            else: +                entries.append({ +                    'id': video_id, +                    'url': video_url, +                    'uploader': video_uploader, +                    'title': video_title, +                    'age_limit': age_limit, +                })          if len(entries) == 1:              return entries[0] diff --git a/youtube_dl/extractor/miomio.py b/youtube_dl/extractor/miomio.py index cc3f27194..d41195a96 100644 --- a/youtube_dl/extractor/miomio.py +++ b/youtube_dl/extractor/miomio.py @@ -7,6 +7,7 @@ from .common import InfoExtractor  from ..utils import (      xpath_text,      int_or_none, +    ExtractorError,  ) @@ -14,13 +15,14 @@ class MioMioIE(InfoExtractor):      IE_NAME = 'miomio.tv'      _VALID_URL = r'https?://(?:www\.)?miomio\.tv/watch/cc(?P<id>[0-9]+)'      _TESTS = [{ -        'url': 'http://www.miomio.tv/watch/cc179734/', -        'md5': '48de02137d0739c15b440a224ad364b9', +        # "type=video" in flashvars +        'url': 'http://www.miomio.tv/watch/cc88912/', +        'md5': '317a5f7f6b544ce8419b784ca8edae65',          'info_dict': { -            'id': '179734', +            'id': '88912',              'ext': 'flv', -            'title': '手绘动漫鬼泣但丁全程画法', -            'duration': 354, +            'title': '【SKY】字幕 铠武昭和VS平成 假面骑士大战FEAT战队 魔星字幕组 字幕', +            'duration': 5923,          },      }, {          'url': 'http://www.miomio.tv/watch/cc184024/', @@ -42,7 +44,7 @@ class MioMioIE(InfoExtractor):              r'src="(/mioplayer/[^"]+)"', webpage, 'ref_path')          xml_config = self._search_regex( -            r'flashvars="type=sina&(.+?)&', +            r'flashvars="type=(?:sina|video)&(.+?)&',              webpage, 'xml config')          # skipping the following page causes lags and eventually connection drop-outs @@ -59,6 +61,9 @@ class MioMioIE(InfoExtractor):              'Referer': 'http://www.miomio.tv%s' % mioplayer_path,          } +        if not int_or_none(xpath_text(vid_config, 'timelength')): +            raise ExtractorError('Unable to load videos!', expected=True) +          entries = []          for f in vid_config.findall('./durl'):              segment_url = xpath_text(f, 'url', 'video url') diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 84f291558..425a4ccf1 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -1,7 +1,6 @@  from __future__ import unicode_literals  import re -import itertools  from .common import InfoExtractor  from ..compat import ( @@ -46,20 +45,16 @@ class MixcloudIE(InfoExtractor):          },      }] -    def _get_url(self, track_id, template_url, server_number): -        boundaries = (1, 30) -        for nr in server_numbers(server_number, boundaries): -            url = template_url % nr -            try: -                # We only want to know if the request succeed -                # don't download the whole file -                self._request_webpage( -                    HEADRequest(url), track_id, -                    'Checking URL %d/%d ...' % (nr, boundaries[-1])) -                return url -            except ExtractorError: -                pass -        return None +    def _check_url(self, url, track_id, ext): +        try: +            # We only want to know if the request succeed +            # don't download the whole file +            self._request_webpage( +                HEADRequest(url), track_id, +                'Trying %s URL' % ext) +            return True +        except ExtractorError: +            return False      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url) @@ -72,15 +67,10 @@ class MixcloudIE(InfoExtractor):          preview_url = self._search_regex(              r'\s(?:data-preview-url|m-preview)="([^"]+)"', webpage, 'preview url')          song_url = preview_url.replace('/previews/', '/c/originals/') -        server_number = int(self._search_regex(r'stream(\d+)', song_url, 'server number')) -        template_url = re.sub(r'(stream\d*)', 'stream%d', song_url) -        final_song_url = self._get_url(track_id, template_url, server_number) -        if final_song_url is None: -            self.to_screen('Trying with m4a extension') -            template_url = template_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/') -            final_song_url = self._get_url(track_id, template_url, server_number) -        if final_song_url is None: -            raise ExtractorError('Unable to extract track url') +        if not self._check_url(song_url, track_id, 'mp3'): +            song_url = song_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/') +            if not self._check_url(song_url, track_id, 'm4a'): +                raise ExtractorError('Unable to extract track url')          PREFIX = (              r'm-play-on-spacebar[^>]+' @@ -107,7 +97,7 @@ class MixcloudIE(InfoExtractor):          return {              'id': track_id,              'title': title, -            'url': final_song_url, +            'url': song_url,              'description': description,              'thumbnail': thumbnail,              'uploader': uploader, @@ -115,35 +105,3 @@ class MixcloudIE(InfoExtractor):              'view_count': view_count,              'like_count': like_count,          } - - -def server_numbers(first, boundaries): -    """ Server numbers to try in descending order of probable availability. -    Starting from first (i.e. the number of the server hosting the preview file) -    and going further and further up to the higher boundary and down to the -    lower one in an alternating fashion. Namely: - -        server_numbers(2, (1, 5)) - -        # Where the preview server is 2, min number is 1 and max is 5. -        # Yields: 2, 3, 1, 4, 5 - -    Why not random numbers or increasing sequences? Since from what I've seen, -    full length files seem to be hosted on servers whose number is closer to -    that of the preview; to be confirmed. -    """ -    zip_longest = getattr(itertools, 'zip_longest', None) -    if zip_longest is None: -        # python 2.x -        zip_longest = itertools.izip_longest - -    if len(boundaries) != 2: -        raise ValueError("boundaries should be a two-element tuple") -    min, max = boundaries -    highs = range(first + 1, max + 1) -    lows = range(first - 1, min - 1, -1) -    rest = filter( -        None, itertools.chain.from_iterable(zip_longest(highs, lows))) -    yield first -    for n in rest: -        yield n diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index c11de1cb6..4430b3416 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -118,6 +118,14 @@ class MTVServicesInfoExtractor(InfoExtractor):          mediagen_doc = self._download_xml(mediagen_url, video_id,                                            'Downloading video urls') +        item = mediagen_doc.find('./video/item') +        if item is not None and item.get('type') == 'text': +            message = '%s returned error: ' % self.IE_NAME +            if item.get('code') is not None: +                message += '%s - ' % item.get('code') +            message += item.text +            raise ExtractorError(message, expected=True) +          description_node = itemdoc.find('description')          if description_node is not None:              description = description_node.text.strip() diff --git a/youtube_dl/extractor/pladform.py b/youtube_dl/extractor/pladform.py index abde34b94..551c8c9f0 100644 --- a/youtube_dl/extractor/pladform.py +++ b/youtube_dl/extractor/pladform.py @@ -30,7 +30,7 @@ class PladformIE(InfoExtractor):          'info_dict': {              'id': '100183293',              'ext': 'mp4', -            'title': 'Тайны перевала Дятлова • Тайна перевала Дятлова 1 серия 2 часть', +            'title': 'Тайны перевала Дятлова • 1 серия 2 часть',              'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',              'thumbnail': 're:^https?://.*\.jpg$',              'duration': 694, diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py new file mode 100644 index 000000000..174c8e0ae --- /dev/null +++ b/youtube_dl/extractor/qqmusic.py @@ -0,0 +1,170 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import random +import time +import re + +from .common import InfoExtractor +from ..utils import ( +    strip_jsonp, +    unescapeHTML, +) +from ..compat import compat_urllib_request + + +class QQMusicIE(InfoExtractor): +    _VALID_URL = r'http://y.qq.com/#type=song&mid=(?P<id>[0-9A-Za-z]+)' +    _TESTS = [{ +        'url': 'http://y.qq.com/#type=song&mid=004295Et37taLD', +        'md5': 'bed90b6db2a7a7a7e11bc585f471f63a', +        'info_dict': { +            'id': '004295Et37taLD', +            'ext': 'm4a', +            'title': '可惜没如果', +            'upload_date': '20141227', +            'creator': '林俊杰', +            'description': 'md5:4348ff1dd24036906baa7b6f973f8d30', +        } +    }] + +    # Reference: m_r_GetRUin() in top_player.js +    # http://imgcache.gtimg.cn/music/portal_v3/y/top_player.js +    @staticmethod +    def m_r_get_ruin(): +        curMs = int(time.time() * 1000) % 1000 +        return int(round(random.random() * 2147483647) * curMs % 1E10) + +    def _real_extract(self, url): +        mid = self._match_id(url) + +        detail_info_page = self._download_webpage( +            'http://s.plcloud.music.qq.com/fcgi-bin/fcg_yqq_song_detail_info.fcg?songmid=%s&play=0' % mid, +            mid, note='Download song detail info', +            errnote='Unable to get song detail info', encoding='gbk') + +        song_name = self._html_search_regex( +            r"songname:\s*'([^']+)'", detail_info_page, 'song name') + +        publish_time = self._html_search_regex( +            r'发行时间:(\d{4}-\d{2}-\d{2})', detail_info_page, +            'publish time', default=None) +        if publish_time: +            publish_time = publish_time.replace('-', '') + +        singer = self._html_search_regex( +            r"singer:\s*'([^']+)", detail_info_page, 'singer', default=None) + +        lrc_content = self._html_search_regex( +            r'<div class="content" id="lrc_content"[^<>]*>([^<>]+)</div>', +            detail_info_page, 'LRC lyrics', default=None) + +        guid = self.m_r_get_ruin() + +        vkey = self._download_json( +            'http://base.music.qq.com/fcgi-bin/fcg_musicexpress.fcg?json=3&guid=%s' % guid, +            mid, note='Retrieve vkey', errnote='Unable to get vkey', +            transform_source=strip_jsonp)['key'] +        song_url = 'http://cc.stream.qqmusic.qq.com/C200%s.m4a?vkey=%s&guid=%s&fromtag=0' % (mid, vkey, guid) + +        return { +            'id': mid, +            'url': song_url, +            'title': song_name, +            'upload_date': publish_time, +            'creator': singer, +            'description': lrc_content, +        } + + +class QQPlaylistBaseIE(InfoExtractor): +    @staticmethod +    def qq_static_url(category, mid): +        return 'http://y.qq.com/y/static/%s/%s/%s/%s.html' % (category, mid[-2], mid[-1], mid) + +    @classmethod +    def get_entries_from_page(cls, page): +        entries = [] + +        for item in re.findall(r'class="data"[^<>]*>([^<>]+)</', page): +            song_mid = unescapeHTML(item).split('|')[-5] +            entries.append(cls.url_result( +                'http://y.qq.com/#type=song&mid=' + song_mid, 'QQMusic', +                song_mid)) + +        return entries + + +class QQMusicSingerIE(QQPlaylistBaseIE): +    _VALID_URL = r'http://y.qq.com/#type=singer&mid=(?P<id>[0-9A-Za-z]+)' +    _TEST = { +        'url': 'http://y.qq.com/#type=singer&mid=001BLpXF2DyJe2', +        'info_dict': { +            'id': '001BLpXF2DyJe2', +            'title': '林俊杰', +            'description': 'md5:2a222d89ba4455a3af19940c0481bb78', +        }, +        'playlist_count': 12, +    } + +    def _real_extract(self, url): +        mid = self._match_id(url) + +        singer_page = self._download_webpage( +            self.qq_static_url('singer', mid), mid, 'Download singer page') + +        entries = self.get_entries_from_page(singer_page) + +        singer_name = self._html_search_regex( +            r"singername\s*:\s*'([^']+)'", singer_page, 'singer name', +            default=None) + +        singer_id = self._html_search_regex( +            r"singerid\s*:\s*'([0-9]+)'", singer_page, 'singer id', +            default=None) + +        singer_desc = None + +        if singer_id: +            req = compat_urllib_request.Request( +                'http://s.plcloud.music.qq.com/fcgi-bin/fcg_get_singer_desc.fcg?utf8=1&outCharset=utf-8&format=xml&singerid=%s' % singer_id) +            req.add_header( +                'Referer', 'http://s.plcloud.music.qq.com/xhr_proxy_utf8.html') +            singer_desc_page = self._download_xml( +                req, mid, 'Donwload singer description XML') + +            singer_desc = singer_desc_page.find('./data/info/desc').text + +        return self.playlist_result(entries, mid, singer_name, singer_desc) + + +class QQMusicAlbumIE(QQPlaylistBaseIE): +    _VALID_URL = r'http://y.qq.com/#type=album&mid=(?P<id>[0-9A-Za-z]+)' + +    _TEST = { +        'url': 'http://y.qq.com/#type=album&mid=000gXCTb2AhRR1&play=0', +        'info_dict': { +            'id': '000gXCTb2AhRR1', +            'title': '我们都是这样长大的', +            'description': 'md5:d216c55a2d4b3537fe4415b8767d74d6', +        }, +        'playlist_count': 4, +    } + +    def _real_extract(self, url): +        mid = self._match_id(url) + +        album_page = self._download_webpage( +            self.qq_static_url('album', mid), mid, 'Download album page') + +        entries = self.get_entries_from_page(album_page) + +        album_name = self._html_search_regex( +            r"albumname\s*:\s*'([^']+)',", album_page, 'album name', +            default=None) + +        album_detail = self._html_search_regex( +            r'<div class="album_detail close_detail">\s*<p>((?:[^<>]+(?:<br />)?)+)</p>', +            album_page, 'album details', default=None) + +        return self.playlist_result(entries, mid, album_name, album_detail) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 316b2c90f..183ff50f4 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -221,7 +221,12 @@ class SoundcloudIE(InfoExtractor):                  info_json_url += "&secret_token=" + token          elif mobj.group('player'):              query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) -            return self.url_result(query['url'][0]) +            real_url = query['url'][0] +            # If the token is in the query of the original url we have to +            # manually add it +            if 'secret_token' in query: +                real_url += '?secret_token=' + query['secret_token'][0] +            return self.url_result(real_url)          else:              # extract uploader (which is in the url)              uploader = mobj.group('uploader') @@ -274,9 +279,8 @@ class SoundcloudSetIE(SoundcloudIE):          info = self._download_json(resolv_url, full_title)          if 'errors' in info: -            for err in info['errors']: -                self._downloader.report_error('unable to download video webpage: %s' % compat_str(err['error_message'])) -            return +            msgs = (compat_str(err['error_message']) for err in info['errors']) +            raise ExtractorError('unable to download video webpage: %s' % ','.join(msgs))          return {              '_type': 'playlist', diff --git a/youtube_dl/extractor/spike.py b/youtube_dl/extractor/spike.py index e529bb55c..182f286df 100644 --- a/youtube_dl/extractor/spike.py +++ b/youtube_dl/extractor/spike.py @@ -5,7 +5,7 @@ from .mtv import MTVServicesInfoExtractor  class SpikeIE(MTVServicesInfoExtractor):      _VALID_URL = r'''(?x)https?:// -        (?:www\.spike\.com/(?:video-clips|(?:full-)?episodes)/.+| +        (?:www\.spike\.com/(?:video-(?:clips|playlists)|(?:full-)?episodes)/.+|           m\.spike\.com/videos/video\.rbml\?id=(?P<id>[^&]+))          '''      _TEST = { diff --git a/youtube_dl/extractor/srf.py b/youtube_dl/extractor/srf.py new file mode 100644 index 000000000..77eec0bc7 --- /dev/null +++ b/youtube_dl/extractor/srf.py @@ -0,0 +1,104 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +from .common import InfoExtractor +from ..utils import ( +    determine_ext, +    parse_iso8601, +    xpath_text, +) + + +class SrfIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.srf\.ch/play(?:er)?/tv/[^/]+/video/(?P<display_id>[^?]+)\?id=|tp\.srgssr\.ch/p/flash\?urn=urn:srf:ais:video:)(?P<id>[0-9a-f\-]{36})' +    _TESTS = [{ +        'url': 'http://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5', +        'md5': '4cd93523723beff51bb4bee974ee238d', +        'info_dict': { +            'id': '28e1a57d-5b76-4399-8ab3-9097f071e6c5', +            'display_id': 'snowden-beantragt-asyl-in-russland', +            'ext': 'm4v', +            'upload_date': '20130701', +            'title': 'Snowden beantragt Asyl in Russland', +            'timestamp': 1372713995, +        } +    }, { +        # No Speichern (Save) button +        'url': 'http://www.srf.ch/play/tv/top-gear/video/jaguar-xk120-shadow-und-tornado-dampflokomotive?id=677f5829-e473-4823-ac83-a1087fe97faa', +        'md5': 'd97e236e80d1d24729e5d0953d276a4f', +        'info_dict': { +            'id': '677f5829-e473-4823-ac83-a1087fe97faa', +            'display_id': 'jaguar-xk120-shadow-und-tornado-dampflokomotive', +            'ext': 'flv', +            'upload_date': '20130710', +            'title': 'Jaguar XK120, Shadow und Tornado-Dampflokomotive', +            'timestamp': 1373493600, +        }, +    }, { +        'url': 'http://www.srf.ch/player/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5', +        'only_matching': True, +    }, { +        'url': 'https://tp.srgssr.ch/p/flash?urn=urn:srf:ais:video:28e1a57d-5b76-4399-8ab3-9097f071e6c5', +        'only_matching': True, +    }] + +    def _real_extract(self, url): +        video_id = self._match_id(url) +        display_id = re.match(self._VALID_URL, url).group('display_id') or video_id + +        video_data = self._download_xml( +            'http://il.srgssr.ch/integrationlayer/1.0/ue/srf/video/play/%s.xml' % video_id, +            display_id) + +        title = xpath_text( +            video_data, './AssetMetadatas/AssetMetadata/title', fatal=True) +        thumbnails = [{ +            'url': s.text +        } for s in video_data.findall('.//ImageRepresentation/url')] +        timestamp = parse_iso8601(xpath_text(video_data, './createdDate')) +        # The <duration> field in XML is different from the exact duration, skipping + +        formats = [] +        for item in video_data.findall('./Playlists/Playlist') + video_data.findall('./Downloads/Download'): +            for url_node in item.findall('url'): +                quality = url_node.attrib['quality'] +                full_url = url_node.text +                original_ext = determine_ext(full_url) +                format_id = '%s-%s' % (quality, item.attrib['protocol']) +                if original_ext == 'f4m': +                    formats.extend(self._extract_f4m_formats( +                        full_url + '?hdcore=3.4.0', display_id, f4m_id=format_id)) +                elif original_ext == 'm3u8': +                    formats.extend(self._extract_m3u8_formats( +                        full_url, display_id, 'mp4', m3u8_id=format_id)) +                else: +                    formats.append({ +                        'url': full_url, +                        'ext': original_ext, +                        'format_id': format_id, +                        'quality': 0 if 'HD' in quality else -1, +                        'preference': 1, +                    }) + +        self._sort_formats(formats) + +        subtitles = {} +        subtitles_data = video_data.find('Subtitles') +        if subtitles_data is not None: +            subtitles_list = [{ +                'url': sub.text, +                'ext': determine_ext(sub.text), +            } for sub in subtitles_data] +            if subtitles_list: +                subtitles['de'] = subtitles_list + +        return { +            'id': video_id, +            'display_id': display_id, +            'formats': formats, +            'title': title, +            'thumbnails': thumbnails, +            'timestamp': timestamp, +            'subtitles': subtitles, +        } diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py index 1caf08cb7..2381676b4 100644 --- a/youtube_dl/extractor/teamcoco.py +++ b/youtube_dl/extractor/teamcoco.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*-  from __future__ import unicode_literals  import base64 @@ -35,6 +36,17 @@ class TeamcocoIE(InfoExtractor):                  'duration': 288,                  'age_limit': 0,              } +        }, { +            'url': 'http://teamcoco.com/video/timothy-olyphant-drinking-whiskey', +            'info_dict': { +                'id': '88748', +                'ext': 'mp4', +                'title': 'Timothy Olyphant Raises A Toast To “Justified”', +                'description': 'md5:15501f23f020e793aeca761205e42c24', +            }, +            'params': { +                'skip_download': True,  # m3u8 downloads +            }          }      ]      _VIDEO_ID_REGEXES = ( @@ -54,10 +66,23 @@ class TeamcocoIE(InfoExtractor):              video_id = self._html_search_regex(                  self._VIDEO_ID_REGEXES, webpage, 'video id') +        preload = None          preloads = re.findall(r'"preload":\s*"([^"]+)"', webpage) -        if not preloads: -            raise ExtractorError('Preload information could not be extracted') -        preload = max([(len(p), p) for p in preloads])[1] +        if preloads: +            preload = max([(len(p), p) for p in preloads])[1] + +        if not preload: +            preload = ''.join(re.findall(r'this\.push\("([^"]+)"\);', webpage)) + +        if not preload: +            preload = self._html_search_regex([ +                r'player,\[?"([^"]+)"\]?', r'player.init\(\[?"([^"]+)"\]?\)' +            ], webpage.replace('","', ''), 'preload data', default=None) + +        if not preload: +            raise ExtractorError( +                'Preload information could not be extracted', expected=True) +          data = self._parse_json(              base64.b64decode(preload.encode('ascii')).decode('utf-8'), video_id) diff --git a/youtube_dl/extractor/tumblr.py b/youtube_dl/extractor/tumblr.py index 2a1ae5a71..828c808a6 100644 --- a/youtube_dl/extractor/tumblr.py +++ b/youtube_dl/extractor/tumblr.py @@ -56,6 +56,6 @@ class TumblrIE(InfoExtractor):              'url': video_url,              'ext': 'mp4',              'title': video_title, -            'description': self._og_search_description(webpage), -            'thumbnail': self._og_search_thumbnail(webpage), +            'description': self._og_search_description(webpage, default=None), +            'thumbnail': self._og_search_thumbnail(webpage, default=None),          } diff --git a/youtube_dl/extractor/udn.py b/youtube_dl/extractor/udn.py index bba25bb58..c08428acf 100644 --- a/youtube_dl/extractor/udn.py +++ b/youtube_dl/extractor/udn.py @@ -3,12 +3,15 @@ from __future__ import unicode_literals  import json  from .common import InfoExtractor -from ..utils import js_to_json +from ..utils import ( +    js_to_json, +    ExtractorError, +)  from ..compat import compat_urlparse  class UDNEmbedIE(InfoExtractor): -    _VALID_URL = r'(?:https?:)?//video\.udn\.com/embed/news/(?P<id>\d+)' +    _VALID_URL = r'https?://video\.udn\.com/(?:embed|play)/news/(?P<id>\d+)'      _TESTS = [{          'url': 'http://video.udn.com/embed/news/300040',          'md5': 'de06b4c90b042c128395a88f0384817e', @@ -19,7 +22,11 @@ class UDNEmbedIE(InfoExtractor):              'thumbnail': 're:^https?://.*\.jpg$',          }      }, { -        'url': '//video.udn.com/embed/news/300040', +        'url': 'https://video.udn.com/embed/news/300040', +        'only_matching': True, +    }, { +        # From https://video.udn.com/news/303776 +        'url': 'https://video.udn.com/play/news/303776',          'only_matching': True,      }] @@ -47,7 +54,10 @@ class UDNEmbedIE(InfoExtractor):                  'retrieve url for %s video' % video_type),              'format_id': video_type,              'preference': 0 if video_type == 'mp4' else -1, -        } for video_type, api_url in video_urls.items()] +        } for video_type, api_url in video_urls.items() if api_url] + +        if not formats: +            raise ExtractorError('No videos found', expected=True)          self._sort_formats(formats) diff --git a/youtube_dl/extractor/vimple.py b/youtube_dl/extractor/vimple.py index ee3d86117..aa3d6ddfd 100644 --- a/youtube_dl/extractor/vimple.py +++ b/youtube_dl/extractor/vimple.py @@ -1,75 +1,54 @@ -# coding: utf-8  from __future__ import unicode_literals -import base64 -import re -import xml.etree.ElementTree -import zlib -  from .common import InfoExtractor  from ..utils import int_or_none  class VimpleIE(InfoExtractor): -    IE_DESC = 'Vimple.ru' -    _VALID_URL = r'https?://(player.vimple.ru/iframe|vimple.ru)/(?P<id>[a-f0-9]{10,})' +    IE_DESC = 'Vimple - one-click video hosting' +    _VALID_URL = r'https?://(?:player\.vimple\.ru/iframe|vimple\.ru)/(?P<id>[\da-f-]{32,36})'      _TESTS = [          {              'url': 'http://vimple.ru/c0f6b1687dcd4000a97ebe70068039cf',              'md5': '2e750a330ed211d3fd41821c6ad9a279',              'info_dict': { -                'id': 'c0f6b1687dcd4000a97ebe70068039cf', +                'id': 'c0f6b168-7dcd-4000-a97e-be70068039cf',                  'ext': 'mp4',                  'title': 'Sunset',                  'duration': 20,                  'thumbnail': 're:https?://.*?\.jpg',              }, -        }, +        }, { +            'url': 'http://player.vimple.ru/iframe/52e1beec-1314-4a83-aeac-c61562eadbf9', +            'only_matching': True, +        }      ]      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') - -        iframe_url = 'http://player.vimple.ru/iframe/%s' % video_id - -        iframe = self._download_webpage( -            iframe_url, video_id, -            note='Downloading iframe', errnote='unable to fetch iframe') -        player_url = self._html_search_regex( -            r'"(http://player.vimple.ru/flash/.+?)"', iframe, 'player url') +        video_id = self._match_id(url) -        player = self._request_webpage( -            player_url, video_id, note='Downloading swf player').read() +        webpage = self._download_webpage( +            'http://player.vimple.ru/iframe/%s' % video_id, video_id) -        player = zlib.decompress(player[8:]) +        playlist = self._parse_json( +            self._search_regex( +                r'sprutoData\s*:\s*({.+?}),\r\n', webpage, 'spruto data'), +            video_id)['playlist'][0] -        xml_pieces = re.findall(b'([a-zA-Z0-9 =+/]{500})', player) -        xml_pieces = [piece[1:-1] for piece in xml_pieces] +        title = playlist['title'] +        video_id = playlist.get('videoId') or video_id +        thumbnail = playlist.get('posterUrl') or playlist.get('thumbnailUrl') +        duration = int_or_none(playlist.get('duration')) -        xml_data = b''.join(xml_pieces) -        xml_data = base64.b64decode(xml_data) - -        xml_data = xml.etree.ElementTree.fromstring(xml_data) - -        video = xml_data.find('Video') -        quality = video.get('quality') -        q_tag = video.find(quality.capitalize()) - -        formats = [ -            { -                'url': q_tag.get('url'), -                'tbr': int(q_tag.get('bitrate')), -                'filesize': int(q_tag.get('filesize')), -                'format_id': quality, -            }, -        ] +        formats = [{ +            'url': f['url'], +        } for f in playlist['video']] +        self._sort_formats(formats)          return {              'id': video_id, -            'title': video.find('Title').text, +            'title': title, +            'thumbnail': thumbnail, +            'duration': duration,              'formats': formats, -            'thumbnail': video.find('Poster').get('url'), -            'duration': int_or_none(video.get('duration')), -            'webpage_url': video.find('Share').get('videoPageUrl'),          } diff --git a/youtube_dl/postprocessor/atomicparsley.py b/youtube_dl/postprocessor/atomicparsley.py index 448ccc5f3..a5dfc136a 100644 --- a/youtube_dl/postprocessor/atomicparsley.py +++ b/youtube_dl/postprocessor/atomicparsley.py @@ -50,8 +50,13 @@ class AtomicParsleyPP(PostProcessor):              msg = stderr.decode('utf-8', 'replace').strip()              raise AtomicParsleyPPError(msg) -        os.remove(encodeFilename(filename))          os.remove(encodeFilename(temp_thumbnail)) -        os.rename(encodeFilename(temp_filename), encodeFilename(filename)) +        # for formats that don't support thumbnails (like 3gp) AtomicParsley +        # won't create to the temporary file +        if b'No changes' in stdout: +            self._downloader.report_warning('The file format doesn\'t support embedding a thumbnail') +        else: +            os.remove(encodeFilename(filename)) +            os.rename(encodeFilename(temp_filename), encodeFilename(filename))          return True, info diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 8e99a3c2c..4c4a038f9 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -264,15 +264,14 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):          new_path = prefix + sep + extension          # If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly. -        if new_path == path: -            self._nopostoverwrites = True +        if (new_path == path or +                (self._nopostoverwrites and os.path.exists(encodeFilename(new_path)))): +            self._downloader.to_screen('[youtube] Post-process file %s exists, skipping' % new_path) +            return True, information          try: -            if self._nopostoverwrites and os.path.exists(encodeFilename(new_path)): -                self._downloader.to_screen('[youtube] Post-process file %s exists, skipping' % new_path) -            else: -                self._downloader.to_screen('[' + self.basename + '] Destination: ' + new_path) -                self.run_ffmpeg(path, new_path, acodec, more_opts) +            self._downloader.to_screen('[' + self.basename + '] Destination: ' + new_path) +            self.run_ffmpeg(path, new_path, acodec, more_opts)          except AudioConversionError as e:              raise PostProcessingError(                  'audio conversion failed: ' + e.msg) @@ -286,7 +285,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):                  errnote='Cannot update utime of audio file')          information['filepath'] = new_path -        return self._nopostoverwrites, information +        return False, information  class FFmpegVideoConvertorPP(FFmpegPostProcessor): diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 52f0dd09a..edeee1853 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -312,17 +312,17 @@ def sanitize_path(s):      """Sanitizes and normalizes path on Windows"""      if sys.platform != 'win32':          return s -    drive, _ = os.path.splitdrive(s) -    unc, _ = os.path.splitunc(s) -    unc_or_drive = unc or drive -    norm_path = os.path.normpath(remove_start(s, unc_or_drive)).split(os.path.sep) -    if unc_or_drive: +    drive_or_unc, _ = os.path.splitdrive(s) +    if sys.version_info < (2, 7) and not drive_or_unc: +        drive_or_unc, _ = os.path.splitunc(s) +    norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep) +    if drive_or_unc:          norm_path.pop(0)      sanitized_path = [          path_part if path_part in ['.', '..'] else re.sub('(?:[/<>:"\\|\\\\?\\*]|\.$)', '#', path_part)          for path_part in norm_path] -    if unc_or_drive: -        sanitized_path.insert(0, unc_or_drive + os.path.sep) +    if drive_or_unc: +        sanitized_path.insert(0, drive_or_unc + os.path.sep)      return os.path.join(*sanitized_path) @@ -452,6 +452,17 @@ def make_HTTPS_handler(params, **kwargs):          return YoutubeDLHTTPSHandler(params, context=context, **kwargs) +def bug_reports_message(): +    if ytdl_is_updateable(): +        update_cmd = 'type  youtube-dl -U  to update' +    else: +        update_cmd = 'see  https://yt-dl.org/update  on how to update' +    msg = '; please report this issue on https://yt-dl.org/bug .' +    msg += ' Make sure you are using the latest version; %s.' % update_cmd +    msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.' +    return msg + +  class ExtractorError(Exception):      """Error during info extraction.""" @@ -467,13 +478,7 @@ class ExtractorError(Exception):          if cause:              msg += ' (caused by %r)' % cause          if not expected: -            if ytdl_is_updateable(): -                update_cmd = 'type  youtube-dl -U  to update' -            else: -                update_cmd = 'see  https://yt-dl.org/update  on how to update' -            msg += '; please report this issue on https://yt-dl.org/bug .' -            msg += ' Make sure you are using the latest version; %s.' % update_cmd -            msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.' +            msg += bug_reports_message()          super(ExtractorError, self).__init__(msg)          self.traceback = tb diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 1095fea2f..3fd0e7e56 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@  from __future__ import unicode_literals -__version__ = '2015.04.09' +__version__ = '2015.04.17' | 
