diff options
| -rw-r--r-- | .github/ISSUE_TEMPLATE.md | 6 | ||||
| -rw-r--r-- | ChangeLog | 20 | ||||
| -rw-r--r-- | docs/supportedsites.md | 2 | ||||
| -rw-r--r-- | youtube_dl/__init__.py | 6 | ||||
| -rw-r--r-- | youtube_dl/downloader/http.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/amcnetworks.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/byutv.py | 90 | ||||
| -rw-r--r-- | youtube_dl/extractor/clubic.py | 11 | ||||
| -rw-r--r-- | youtube_dl/extractor/criterion.py | 12 | ||||
| -rw-r--r-- | youtube_dl/extractor/extractors.py | 5 | ||||
| -rw-r--r-- | youtube_dl/extractor/generic.py | 16 | ||||
| -rw-r--r-- | youtube_dl/extractor/jwplatform.py | 12 | ||||
| -rw-r--r-- | youtube_dl/extractor/periscope.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/rudo.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/twitch.py | 1 | ||||
| -rw-r--r-- | youtube_dl/utils.py | 30 | ||||
| -rw-r--r-- | youtube_dl/version.py | 2 | 
17 files changed, 152 insertions, 68 deletions
| diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 273eb8c0b..e813e4c59 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@  --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.27*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.27** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.10.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.10.02**  ### Before submitting an *issue* make sure you have:  - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>  [debug] User config: []  [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']  [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.09.27 +[debug] youtube-dl version 2016.10.02  [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2  [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4  [debug] Proxy map: {} @@ -1,8 +1,26 @@ -version <unreleased> +version 2016.10.02 + +Core +* Fix possibly lost extended attributes during post-processing ++ Support pyxattr as well as python-xattr for --xattrs and +  --xattr-set-filesize (#9054)  Extractors ++ [jwplatform] Support DASH streams in JWPlayer ++ [jwplatform] Support old-style JWPlayer playlists ++ [byutv:event] Add extractor +* [periscope:user] Fix extraction (#10820)  * [dctp] Fix extraction (#10734) ++ [instagram] Extract video dimensions (#10790) ++ [tvland] Extend URL regular expression (#10812) ++ [vgtv] Add support for tv.aftonbladet.se (#10800) +- [aftonbladet] Remove extractor +* [vk] Fix timestamp and view count extraction (#10760) ++ [vk] Add support for running and finished live streams (#10799)  + [leeco] Recognize more Le Sports URLs (#10794) ++ [instagram] Extract comments (#10788) ++ [ketnet] Extract mzsource formats (#10770) +* [limelight:media] Improve HTTP formats extraction  version 2016.09.27 diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 26f275577..828ed0ba9 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -34,7 +34,6 @@   - **AdultSwim**   - **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network   - **AfreecaTV**: afreecatv.com - - **Aftonbladet**   - **AirMozilla**   - **AlJazeera**   - **Allocine** @@ -112,6 +111,7 @@   - **bt:vestlendingen**: Bergens Tidende - Vestlendingen   - **BuzzFeed**   - **BYUtv** + - **BYUtvEvent**   - **Camdemy**   - **CamdemyFolder**   - **CamWithHer** diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 72141b983..f84b866df 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -266,8 +266,6 @@ def _real_main(argv=None):          postprocessors.append({              'key': 'FFmpegEmbedSubtitle',          }) -    if opts.xattrs: -        postprocessors.append({'key': 'XAttrMetadata'})      if opts.embedthumbnail:          already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails          postprocessors.append({ @@ -276,6 +274,10 @@ def _real_main(argv=None):          })          if not already_have_thumbnail:              opts.writethumbnail = True +    # XAttrMetadataPP should be run after post-processors that may change file +    # contents +    if opts.xattrs: +        postprocessors.append({'key': 'XAttrMetadata'})      # Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way.      # So if the user is able to remove the file before your postprocessor runs it might cause a few problems.      if opts.exec_cmd: diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 11294d106..af405b950 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -182,7 +182,7 @@ class HttpFD(FileDownloader):                  if self.params.get('xattr_set_filesize', False) and data_len is not None:                      try: -                        write_xattr(tmpfilename, 'user.ytdl.filesize', str(data_len)) +                        write_xattr(tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8'))                      except (XAttrUnavailableError, XAttrMetadataError) as err:                          self.report_error('unable to set filesize xattr: %s' % str(err)) diff --git a/youtube_dl/extractor/amcnetworks.py b/youtube_dl/extractor/amcnetworks.py index c739d2c99..d2b03b177 100644 --- a/youtube_dl/extractor/amcnetworks.py +++ b/youtube_dl/extractor/amcnetworks.py @@ -28,6 +28,7 @@ class AMCNetworksIE(ThePlatformIE):              # m3u8 download              'skip_download': True,          }, +        'skip': 'Requires TV provider accounts',      }, {          'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge',          'only_matching': True, diff --git a/youtube_dl/extractor/byutv.py b/youtube_dl/extractor/byutv.py index 3aec601f8..4be175d70 100644 --- a/youtube_dl/extractor/byutv.py +++ b/youtube_dl/extractor/byutv.py @@ -1,6 +1,5 @@  from __future__ import unicode_literals -import json  import re  from .common import InfoExtractor @@ -8,15 +7,15 @@ from ..utils import ExtractorError  class BYUtvIE(InfoExtractor): -    _VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P<video_id>[^/?#]+)' -    _TEST = { +    _VALID_URL = r'https?://(?:www\.)?byutv\.org/watch/(?!event/)(?P<id>[0-9a-f-]+)(?:/(?P<display_id>[^/?#&]+))?' +    _TESTS = [{          'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5', -        'md5': '05850eb8c749e2ee05ad5a1c34668493',          'info_dict': { -            'id': 'studio-c-season-5-episode-5', +            'id': '6587b9a3-89d2-42a6-a7f7-fd2f81840a7d', +            'display_id': 'studio-c-season-5-episode-5',              'ext': 'mp4', -            'description': 'md5:e07269172baff037f8e8bf9956bc9747',              'title': 'Season 5 Episode 5', +            'description': 'md5:e07269172baff037f8e8bf9956bc9747',              'thumbnail': 're:^https?://.*\.jpg$',              'duration': 1486.486,          }, @@ -24,28 +23,71 @@ class BYUtvIE(InfoExtractor):              'skip_download': True,          },          'add_ie': ['Ooyala'], -    } +    }, { +        'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d', +        'only_matching': True, +    }]      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('video_id') +        video_id = mobj.group('id') +        display_id = mobj.group('display_id') or video_id -        webpage = self._download_webpage(url, video_id) +        webpage = self._download_webpage(url, display_id)          episode_code = self._search_regex(              r'(?s)episode:(.*?\}),\s*\n', webpage, 'episode information') -        episode_json = re.sub( -            r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"', episode_code) -        ep = json.loads(episode_json) - -        if ep['providerType'] == 'Ooyala': -            return { -                '_type': 'url_transparent', -                'ie_key': 'Ooyala', -                'url': 'ooyala:%s' % ep['providerId'], -                'id': video_id, -                'title': ep['title'], -                'description': ep.get('description'), -                'thumbnail': ep.get('imageThumbnail'), -            } -        else: + +        ep = self._parse_json( +            episode_code, display_id, transform_source=lambda s: +            re.sub(r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"', s)) + +        if ep['providerType'] != 'Ooyala':              raise ExtractorError('Unsupported provider %s' % ep['provider']) + +        return { +            '_type': 'url_transparent', +            'ie_key': 'Ooyala', +            'url': 'ooyala:%s' % ep['providerId'], +            'id': video_id, +            'display_id': display_id, +            'title': ep['title'], +            'description': ep.get('description'), +            'thumbnail': ep.get('imageThumbnail'), +        } + + +class BYUtvEventIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?byutv\.org/watch/event/(?P<id>[0-9a-f-]+)' +    _TEST = { +        'url': 'http://www.byutv.org/watch/event/29941b9b-8bf6-48d2-aebf-7a87add9e34b', +        'info_dict': { +            'id': '29941b9b-8bf6-48d2-aebf-7a87add9e34b', +            'ext': 'mp4', +            'title': 'Toledo vs. BYU (9/30/16)', +        }, +        'params': { +            'skip_download': True, +        }, +        'add_ie': ['Ooyala'], +    } + +    def _real_extract(self, url): +        video_id = self._match_id(url) + +        webpage = self._download_webpage(url, video_id) + +        ooyala_id = self._search_regex( +            r'providerId\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1', +            webpage, 'ooyala id', group='id') + +        title = self._search_regex( +            r'class=["\']description["\'][^>]*>\s*<h1>([^<]+)</h1>', webpage, +            'title').strip() + +        return { +            '_type': 'url_transparent', +            'ie_key': 'Ooyala', +            'url': 'ooyala:%s' % ooyala_id, +            'id': video_id, +            'title': title, +        } diff --git a/youtube_dl/extractor/clubic.py b/youtube_dl/extractor/clubic.py index 2fba93543..f7ee3a8f8 100644 --- a/youtube_dl/extractor/clubic.py +++ b/youtube_dl/extractor/clubic.py @@ -1,9 +1,6 @@  # coding: utf-8  from __future__ import unicode_literals -import json -import re -  from .common import InfoExtractor  from ..utils import (      clean_html, @@ -30,16 +27,14 @@ class ClubicIE(InfoExtractor):      }]      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') +        video_id = self._match_id(url)          player_url = 'http://player.m6web.fr/v1/player/clubic/%s.html' % video_id          player_page = self._download_webpage(player_url, video_id) -        config_json = self._search_regex( +        config = self._parse_json(self._search_regex(              r'(?m)M6\.Player\.config\s*=\s*(\{.+?\});$', player_page, -            'configuration') -        config = json.loads(config_json) +            'configuration'), video_id)          video_info = config['videoInfo']          sources = config['sources'] diff --git a/youtube_dl/extractor/criterion.py b/youtube_dl/extractor/criterion.py index ad32673a8..cf6a5d6cb 100644 --- a/youtube_dl/extractor/criterion.py +++ b/youtube_dl/extractor/criterion.py @@ -1,8 +1,6 @@ -# -*- coding: utf-8 -*- +# coding: utf-8  from __future__ import unicode_literals -import re -  from .common import InfoExtractor @@ -16,20 +14,20 @@ class CriterionIE(InfoExtractor):              'ext': 'mp4',              'title': 'Le Samouraï',              'description': 'md5:a2b4b116326558149bef81f76dcbb93f', +            'thumbnail': 're:^https?://.*\.jpg$',          }      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') +        video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id)          final_url = self._search_regex( -            r'so.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url') +            r'so\.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url')          title = self._og_search_title(webpage)          description = self._html_search_meta('description', webpage)          thumbnail = self._search_regex( -            r'so.addVariable\("thumbnailURL", "(.+?)"\)\;', +            r'so\.addVariable\("thumbnailURL", "(.+?)"\)\;',              webpage, 'thumbnail url')          return { diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 09b3b4942..e8928307c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -116,7 +116,10 @@ from .brightcove import (      BrightcoveNewIE,  )  from .buzzfeed import BuzzFeedIE -from .byutv import BYUtvIE +from .byutv import ( +    BYUtvIE, +    BYUtvEventIE, +)  from .c56 import C56IE  from .camdemy import (      CamdemyIE, diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index c1792c534..489b3c7c1 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2332,12 +2332,23 @@ class GenericIE(InfoExtractor):              info_dict.update(json_ld)              return info_dict +        # Look for HTML5 media +        entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') +        if entries: +            for entry in entries: +                entry.update({ +                    'id': video_id, +                    'title': video_title, +                }) +                self._sort_formats(entry['formats']) +            return self.playlist_result(entries) +          def check_video(vurl):              if YoutubeIE.suitable(vurl):                  return True              vpath = compat_urlparse.urlparse(vurl).path              vext = determine_ext(vpath) -            return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml') +            return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js')          def filter_video(urls):              return list(filter(check_video, urls)) @@ -2388,9 +2399,6 @@ class GenericIE(InfoExtractor):              if m_video_type is not None:                  found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))          if not found: -            # HTML5 video -            found = re.findall(r'(?s)<(?:video|audio)[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage) -        if not found:              REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'              found = re.search(                  r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")' diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py index 38199fcd0..5d56e0a28 100644 --- a/youtube_dl/extractor/jwplatform.py +++ b/youtube_dl/extractor/jwplatform.py @@ -32,13 +32,20 @@ class JWPlatformBaseIE(InfoExtractor):          return self._parse_jwplayer_data(              jwplayer_data, video_id, *args, **kwargs) -    def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True, m3u8_id=None, rtmp_params=None, base_url=None): +    def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True, +                             m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):          # JWPlayer backward compatibility: flattened playlists          # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96          if 'playlist' not in jwplayer_data:              jwplayer_data = {'playlist': [jwplayer_data]}          entries = [] + +        # JWPlayer backward compatibility: single playlist item +        # https://github.com/jwplayer/jwplayer/blob/v7.7.0/src/js/playlist/playlist.js#L10 +        if not isinstance(jwplayer_data['playlist'], list): +            jwplayer_data['playlist'] = [jwplayer_data['playlist']] +          for video_data in jwplayer_data['playlist']:              # JWPlayer backward compatibility: flattened sources              # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35 @@ -57,6 +64,9 @@ class JWPlatformBaseIE(InfoExtractor):                  if source_type == 'hls' or ext == 'm3u8':                      formats.extend(self._extract_m3u8_formats(                          source_url, this_video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False)) +                elif ext == 'mpd': +                    formats.extend(self._extract_mpd_formats( +                        source_url, this_video_id, mpd_id=mpd_id, fatal=False))                  # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67                  elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'):                      formats.append({ diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index 61043cad5..0e3623024 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -132,7 +132,7 @@ class PeriscopeUserIE(PeriscopeBaseIE):          user = list(data_store['UserCache']['users'].values())[0]['user']          user_id = user['id'] -        session_id = data_store['SessionToken']['broadcastHistory']['token']['session_id'] +        session_id = data_store['SessionToken']['public']['broadcastHistory']['token']['session_id']          broadcasts = self._call_api(              'getUserBroadcastsPublic', diff --git a/youtube_dl/extractor/rudo.py b/youtube_dl/extractor/rudo.py index 38366b784..9a330c196 100644 --- a/youtube_dl/extractor/rudo.py +++ b/youtube_dl/extractor/rudo.py @@ -43,7 +43,7 @@ class RudoIE(JWPlatformBaseIE):              transform_source=lambda s: js_to_json(re.sub(r'encodeURI\([^)]+\)', '""', s)))          info_dict = self._parse_jwplayer_data( -            jwplayer_data, video_id, require_title=False, m3u8_id='hls') +            jwplayer_data, video_id, require_title=False, m3u8_id='hls', mpd_id='dash')          info_dict.update({              'title': self._og_search_title(webpage), diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index bc352391e..46c2cfe7b 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -247,6 +247,7 @@ class TwitchVodIE(TwitchItemBaseIE):              # m3u8 download              'skip_download': True,          }, +        'skip': 'HTTP Error 404: Not Found',      }]      def _real_extract(self, url): diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index d2dfa8013..044520037 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -782,6 +782,7 @@ class XAttrMetadataError(Exception):      def __init__(self, code=None, msg='Unknown error'):          super(XAttrMetadataError, self).__init__(msg)          self.code = code +        self.msg = msg          # Parsing code and msg          if (self.code in (errno.ENOSPC, errno.EDQUOT) or @@ -3161,20 +3162,25 @@ def write_xattr(path, key, value):          # try the pyxattr module...          import xattr -        # Unicode arguments are not supported in python-pyxattr until -        # version 0.5.0 -        # See https://github.com/rg3/youtube-dl/issues/5498 -        pyxattr_required_version = '0.5.0' -        if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version): -            # TODO: fallback to CLI tools -            raise XAttrUnavailableError( -                'python-pyxattr is detected but is too old. ' -                'youtube-dl requires %s or above while your version is %s. ' -                'Falling back to other xattr implementations' % ( -                    pyxattr_required_version, xattr.__version__)) +        if hasattr(xattr, 'set'):  # pyxattr +            # Unicode arguments are not supported in python-pyxattr until +            # version 0.5.0 +            # See https://github.com/rg3/youtube-dl/issues/5498 +            pyxattr_required_version = '0.5.0' +            if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version): +                # TODO: fallback to CLI tools +                raise XAttrUnavailableError( +                    'python-pyxattr is detected but is too old. ' +                    'youtube-dl requires %s or above while your version is %s. ' +                    'Falling back to other xattr implementations' % ( +                        pyxattr_required_version, xattr.__version__)) + +            setxattr = xattr.set +        else:  # xattr +            setxattr = xattr.setxattr          try: -            xattr.set(path, key, value) +            setxattr(path, key, value)          except EnvironmentError as e:              raise XAttrMetadataError(e.errno, e.strerror) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index af0c2cfc4..161ba4391 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@  from __future__ import unicode_literals -__version__ = '2016.09.27' +__version__ = '2016.10.02' | 
