diff options
| -rw-r--r-- | README.md | 2 | ||||
| -rwxr-xr-x | youtube_dl/YoutubeDL.py | 11 | ||||
| -rw-r--r-- | youtube_dl/__init__.py | 3 | ||||
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/arte.py | 38 | ||||
| -rw-r--r-- | youtube_dl/extractor/cinemassacre.py | 17 | ||||
| -rw-r--r-- | youtube_dl/extractor/cnn.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/crunchyroll.py | 11 | ||||
| -rw-r--r-- | youtube_dl/extractor/francetv.py | 4 | ||||
| -rw-r--r-- | youtube_dl/extractor/funnyordie.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/generic.py | 17 | ||||
| -rw-r--r-- | youtube_dl/extractor/mitele.py | 13 | ||||
| -rw-r--r-- | youtube_dl/extractor/pbs.py | 20 | ||||
| -rw-r--r-- | youtube_dl/extractor/telecinco.py | 19 | ||||
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 5 | ||||
| -rw-r--r-- | youtube_dl/options.py | 5 | ||||
| -rw-r--r-- | youtube_dl/version.py | 2 | 
17 files changed, 125 insertions, 47 deletions
@@ -69,6 +69,8 @@ which means you can modify it, redistribute it or use it however you like.                                       configuration in ~/.config/youtube-dl.conf                                       (%APPDATA%/youtube-dl/config.txt on                                       Windows) +    --flat-playlist                  Do not extract the videos of a playlist, +                                     only list them.  ## Video Selection:      --playlist-start NUMBER          playlist video to start at (default is 1) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index dec0e20e7..623f9d6fe 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -165,6 +165,8 @@ class YoutubeDL(object):                         'auto' for elaborate guessing      encoding:          Use this encoding instead of the system-specified.      extract_flat:      Do not resolve URLs, return the immediate result. +                       Pass in 'in_playlist' to only show this behavior for +                       playlist items.      The following parameters are not used by YoutubeDL itself, they are used by      the FileDownloader: @@ -568,8 +570,13 @@ class YoutubeDL(object):          result_type = ie_result.get('_type', 'video') -        if self.params.get('extract_flat', False): -            if result_type in ('url', 'url_transparent'): +        if result_type in ('url', 'url_transparent'): +            extract_flat = self.params.get('extract_flat', False) +            if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or +                    extract_flat is True): +                self.add_extra_info(ie_result, extra_info) +                if self.params.get('forcejson', False): +                    self.to_stdout(json.dumps(ie_result))                  return ie_result          if result_type == 'video': diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 7f2b4dfcc..c4e1d32db 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -255,8 +255,6 @@ def _real_main(argv=None):          date = DateRange.day(opts.date)      else:          date = DateRange(opts.dateafter, opts.datebefore) -    if opts.default_search not in ('auto', 'auto_warning', 'error', 'fixup_error', None) and ':' not in opts.default_search: -        parser.error(u'--default-search invalid; did you forget a colon (:) at the end?')      # Do not download videos when there are audio-only formats      if opts.extractaudio and not opts.keepvideo and opts.format is None: @@ -369,6 +367,7 @@ def _real_main(argv=None):          'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,          'encoding': opts.encoding,          'exec_cmd': opts.exec_cmd, +        'extract_flat': opts.extract_flat,      }      with YoutubeDL(ydl_opts) as ydl: diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index e0957987c..3023c3095 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -368,6 +368,7 @@ from .teachingchannel import TeachingChannelIE  from .teamcoco import TeamcocoIE  from .techtalks import TechTalksIE  from .ted import TEDIE +from .telecinco import TelecincoIE  from .telemb import TeleMBIE  from .tenplay import TenPlayIE  from .testurl import TestURLIE diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 3a34d1ecc..b9a9440c0 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -10,8 +10,8 @@ from ..utils import (      unified_strdate,      determine_ext,      get_element_by_id, -    compat_str,      get_element_by_attribute, +    int_or_none,  )  # There are different sources of video in arte.tv, the extraction process  @@ -90,15 +90,24 @@ class ArteTVPlus7IE(InfoExtractor):          if not upload_date_str:              upload_date_str = player_info.get('VDA', '').split(' ')[0] +        title = player_info['VTI'].strip() +        subtitle = player_info.get('VSU', '').strip() +        if subtitle: +            title += ' - %s' % subtitle +          info_dict = {              'id': player_info['VID'], -            'title': player_info['VTI'], +            'title': title,              'description': player_info.get('VDE'),              'upload_date': unified_strdate(upload_date_str),              'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),          } -        all_formats = player_info['VSR'].values() +        all_formats = [] +        for format_id, format_dict in player_info['VSR'].items(): +            fmt = dict(format_dict) +            fmt['format_id'] = format_id +            all_formats.append(fmt)          # Some formats use the m3u8 protocol          all_formats = list(filter(lambda f: f.get('videoFormat') != 'M3U8', all_formats))          def _match_lang(f): @@ -149,25 +158,12 @@ class ArteTVPlus7IE(InfoExtractor):                  )          formats = sorted(formats, key=sort_key)          def _format(format_info): -            quality = '' -            height = format_info.get('height') -            if height is not None: -                quality = compat_str(height) -            bitrate = format_info.get('bitrate') -            if bitrate is not None: -                quality += '-%d' % bitrate -            if format_info.get('versionCode') is not None: -                format_id = '%s-%s' % (quality, format_info['versionCode']) -            else: -                format_id = quality -            media_type = format_info.get('mediaType') -            if media_type is not None: -                format_id += '-%s' % media_type              info = { -                'format_id': format_id, -                'format_note': format_info.get('versionLibelle'), -                'width': format_info.get('width'), -                'height': height, +                'format_id': format_info['format_id'], +                'format_note': '%s, %s' % (format_info.get('versionCode'), format_info.get('versionLibelle')), +                'width': int_or_none(format_info.get('width')), +                'height': int_or_none(format_info.get('height')), +                'tbr': int_or_none(format_info.get('bitrate')),              }              if format_info['mediaType'] == 'rtmp':                  info['url'] = format_info['streamer'] diff --git a/youtube_dl/extractor/cinemassacre.py b/youtube_dl/extractor/cinemassacre.py index 496271be4..d064a28f9 100644 --- a/youtube_dl/extractor/cinemassacre.py +++ b/youtube_dl/extractor/cinemassacre.py @@ -42,7 +42,7 @@ class CinemassacreIE(InfoExtractor):          webpage = self._download_webpage(url, display_id)          video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d') -        mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?id=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage) +        mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage)          if not mobj:              raise ExtractorError('Can\'t extract embed url and video id')          playerdata_url = mobj.group('embed_url') @@ -53,17 +53,22 @@ class CinemassacreIE(InfoExtractor):          video_description = self._html_search_regex(              r'<div class="entry-content">(?P<description>.+?)</div>',              webpage, 'description', flags=re.DOTALL, fatal=False) +        video_thumbnail = self._og_search_thumbnail(webpage)          playerdata = self._download_webpage(playerdata_url, video_id, 'Downloading player webpage') -        video_thumbnail = self._search_regex( -            r'image: \'(?P<thumbnail>[^\']+)\'', playerdata, 'thumbnail', fatal=False) -        sd_url = self._search_regex(r'file: \'([^\']+)\', label: \'SD\'', playerdata, 'sd_file') -        videolist_url = self._search_regex(r'file: \'([^\']+\.smil)\'}', playerdata, 'videolist_url') +        vidurl = self._search_regex( +            r'\'vidurl\'\s*:\s*"([^\']+)"', playerdata, 'vidurl').replace('\\/', '/') +        vidid = self._search_regex( +            r'\'vidid\'\s*:\s*"([^\']+)"', playerdata, 'vidid') +        videoserver = self._html_search_regex( +            r"'videoserver'\s*:\s*'([^']+)'", playerdata, 'videoserver') + +        videolist_url = 'http://%s/vod/smil:%s.smil/jwplayer.smil' % (videoserver, vidid)          videolist = self._download_xml(videolist_url, video_id, 'Downloading videolist XML')          formats = [] -        baseurl = sd_url[:sd_url.rfind('/')+1] +        baseurl = vidurl[:vidurl.rfind('/')+1]          for video in videolist.findall('.//video'):              src = video.get('src')              if not src: diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py index dae40c136..78877b1cf 100644 --- a/youtube_dl/extractor/cnn.py +++ b/youtube_dl/extractor/cnn.py @@ -12,7 +12,7 @@ from ..utils import (  class CNNIE(InfoExtractor):      _VALID_URL = r'''(?x)https?://((edition|www)\.)?cnn\.com/video/(data/.+?|\?)/ -        (?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn|(?=&)))''' +        (?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn(-ap)?|(?=&)))'''      _TESTS = [{          'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn', diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index f99888ecc..e3057d900 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -39,6 +39,7 @@ class CrunchyrollIE(SubtitlesInfoExtractor):              'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg',              'uploader': 'Yomiuri Telecasting Corporation (YTV)',              'upload_date': '20131013', +            'url': 're:(?!.*&)',          },          'params': {              # rtmp @@ -237,12 +238,14 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text              streamdata_req.data = 'req=RpcApiVideoEncode%5FGetStreamInfo&video%5Fencode%5Fquality='+stream_quality+'&media%5Fid='+stream_id+'&video%5Fformat='+stream_format              streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')              streamdata_req.add_header('Content-Length', str(len(streamdata_req.data))) -            streamdata = self._download_webpage(streamdata_req, video_id, note='Downloading media info for '+video_format) -            video_url = self._search_regex(r'<host>([^<]+)', streamdata, 'video_url') -            video_play_path = self._search_regex(r'<file>([^<]+)', streamdata, 'video_play_path') +            streamdata = self._download_xml( +                streamdata_req, video_id, +                note='Downloading media info for %s' % video_format) +            video_url = streamdata.find('.//host').text +            video_play_path = streamdata.find('.//file').text              formats.append({                  'url': video_url, -                'play_path':   video_play_path, +                'play_path': video_play_path,                  'ext': 'flv',                  'format': video_format,                  'format_id': video_format, diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index 0b3374d97..566e20d76 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -46,7 +46,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor):                          f4m_format['preference'] = 1                      formats.extend(f4m_formats)              elif video_url.endswith('.m3u8'): -                formats.extend(self._extract_m3u8_formats(video_url, video_id)) +                formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4'))              elif video_url.startswith('rtmp'):                  formats.append({                      'url': video_url, @@ -58,7 +58,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor):                  formats.append({                      'url': video_url,                      'format_id': format_id, -                    'preference': 2, +                    'preference': -1,                  })          self._sort_formats(formats) diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py index d966e8403..ec6d96ada 100644 --- a/youtube_dl/extractor/funnyordie.py +++ b/youtube_dl/extractor/funnyordie.py @@ -37,7 +37,7 @@ class FunnyOrDieIE(InfoExtractor):          video_id = mobj.group('id')          webpage = self._download_webpage(url, video_id) -        links = re.findall(r'<source src="([^"]+/v)\d+\.([^"]+)" type=\'video', webpage) +        links = re.findall(r'<source src="([^"]+/v)[^"]+\.([^"]+)" type=\'video', webpage)          if not links:              raise ExtractorError('No media links available for %s' % video_id) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 9057a6beb..9b6498894 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -380,6 +380,17 @@ class GenericIE(InfoExtractor):                  'uploader': 'education-portal.com',              },          }, +        { +            'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz', +            'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4', +            'info_dict': { +                'id': 'uxjb0lwrcz', +                'ext': 'mp4', +                'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks', +                'duration': 1715.0, +                'uploader': 'thoughtworks.wistia.com', +            },    +        },      ]      def report_following_redirect(self, new_url): @@ -476,7 +487,8 @@ class GenericIE(InfoExtractor):                       'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'                      ) % (url, url), expected=True)              else: -                assert ':' in default_search +                if ':' not in default_search: +                    default_search += ':'                  return self.url_result(default_search + url)          url, smuggled_data = unsmuggle_url(url) @@ -652,7 +664,7 @@ class GenericIE(InfoExtractor):          # Look for embedded Wistia player          match = re.search( -            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage) +            r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)          if match:              embed_url = self._proto_relative_url(                  unescapeHTML(match.group('url'))) @@ -664,6 +676,7 @@ class GenericIE(InfoExtractor):                  'title': video_title,                  'id': video_id,              } +                      match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)          if match:              return { diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py index 979f3d692..6691521e5 100644 --- a/youtube_dl/extractor/mitele.py +++ b/youtube_dl/extractor/mitele.py @@ -6,6 +6,7 @@ import json  from .common import InfoExtractor  from ..utils import (      compat_urllib_parse, +    compat_urlparse,      get_element_by_attribute,      parse_duration,      strip_jsonp, @@ -39,13 +40,21 @@ class MiTeleIE(InfoExtractor):          ).replace('\'', '"')          embed_data = json.loads(embed_data_json) -        info_url = embed_data['flashvars']['host'] +        domain = embed_data['mediaUrl'] +        if not domain.startswith('http'): +            # only happens in telecinco.es videos +            domain = 'http://' + domain +        info_url = compat_urlparse.urljoin( +            domain, +            compat_urllib_parse.unquote(embed_data['flashvars']['host']) +        )          info_el = self._download_xml(info_url, episode).find('./video/info')          video_link = info_el.find('videoUrl/link').text          token_query = compat_urllib_parse.urlencode({'id': video_link})          token_info = self._download_json( -            'http://token.mitele.es/?' + token_query, episode, +            embed_data['flashvars']['ov_tk'] + '?' + token_query, +            episode,              transform_source=strip_jsonp          ) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 8f140d626..6118ed5c2 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -80,8 +80,14 @@ class PBSIE(InfoExtractor):                  'thumbnail': 're:^https?://.*\.jpg$',                  'upload_date': '20140122',              } +        }, +        { +            'url': 'http://www.pbs.org/wgbh/pages/frontline/united-states-of-secrets/', +            'info_dict': { +                'id': 'united-states-of-secrets', +            }, +            'playlist_count': 2,          } -      ]      def _extract_webpage(self, url): @@ -96,6 +102,12 @@ class PBSIE(InfoExtractor):                  r'<input type="hidden" id="air_date_[0-9]+" value="([^"]+)"',                  webpage, 'upload date', default=None)) +            # tabbed frontline videos +            tabbed_videos = re.findall( +                r'<div[^>]+class="videotab[^"]*"[^>]+vid="(\d+)"', webpage) +            if tabbed_videos: +                return tabbed_videos, presumptive_id, upload_date +              MEDIA_ID_REGEXES = [                  r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'",  # frontline video embed                  r'class="coveplayerid">([^<]+)<',                       # coveplayer @@ -130,6 +142,12 @@ class PBSIE(InfoExtractor):      def _real_extract(self, url):          video_id, display_id, upload_date = self._extract_webpage(url) +        if isinstance(video_id, list): +            entries = [self.url_result( +                'http://video.pbs.org/video/%s' % vid_id, 'PBS', vid_id) +                for vid_id in video_id] +            return self.playlist_result(entries, display_id) +          info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id          info = self._download_json(info_url, display_id) diff --git a/youtube_dl/extractor/telecinco.py b/youtube_dl/extractor/telecinco.py new file mode 100644 index 000000000..db9788c18 --- /dev/null +++ b/youtube_dl/extractor/telecinco.py @@ -0,0 +1,19 @@ +#coding: utf-8 +from __future__ import unicode_literals + +from .mitele import MiTeleIE + + +class TelecincoIE(MiTeleIE): +    IE_NAME = 'telecinco.es' +    _VALID_URL = r'https?://www\.telecinco\.es/[^/]+/[^/]+/[^/]+/(?P<episode>.*?)\.html' + +    _TEST = { +        'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html', +        'info_dict': { +            'id': 'MDSVID20141015_0058', +            'ext': 'mp4', +            'title': 'Con MartÃn Berasategui, hacer un bacalao al ...', +            'duration': 662, +        }, +    } diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index cfae2de89..4ab56e0ac 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -191,8 +191,9 @@ class YoutubeBaseInfoExtractor(InfoExtractor):      def _real_initialize(self):          if self._downloader is None:              return -        if not self._set_language(): -            return +        if self._get_login_info()[0] is not None: +            if not self._set_language(): +                return          if not self._login():              return          self._confirm_age() diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 649361bde..2b1cd7438 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -159,6 +159,11 @@ def parseOpts(overrideArguments=None):          '--ignore-config',          action='store_true',          help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)') +    general.add_option( +        '--flat-playlist', +        action='store_const', dest='extract_flat', const='in_playlist', +        default=False, +        help='Do not extract the videos of a playlist, only list them.')      selection = optparse.OptionGroup(parser, 'Video Selection')      selection.add_option( diff --git a/youtube_dl/version.py b/youtube_dl/version.py index e7f6adef1..59cb3b1a1 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.10.18' +__version__ = '2014.10.24'  | 
