diff options
27 files changed, 577 insertions, 234 deletions
@@ -69,6 +69,8 @@ which means you can modify it, redistribute it or use it however you like.                                       configuration in ~/.config/youtube-dl.conf                                       (%APPDATA%/youtube-dl/config.txt on                                       Windows) +    --flat-playlist                  Do not extract the videos of a playlist, +                                     only list them.  ## Video Selection:      --playlist-start NUMBER          playlist video to start at (default is 1) diff --git a/test/test_all_urls.py b/test/test_all_urls.py index 84b05da39..965e5d8a5 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -14,7 +14,7 @@ from test.helper import gettestcases  from youtube_dl.extractor import (      FacebookIE,      gen_extractors, -    JustinTVIE, +    TwitchIE,      YoutubeIE,  ) @@ -72,21 +72,17 @@ class TestAllURLsMatching(unittest.TestCase):          self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])          self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url']) -    def test_justin_tv_channelid_matching(self): -        self.assertTrue(JustinTVIE.suitable('justin.tv/vanillatv')) -        self.assertTrue(JustinTVIE.suitable('twitch.tv/vanillatv')) -        self.assertTrue(JustinTVIE.suitable('www.justin.tv/vanillatv')) -        self.assertTrue(JustinTVIE.suitable('www.twitch.tv/vanillatv')) -        self.assertTrue(JustinTVIE.suitable('http://www.justin.tv/vanillatv')) -        self.assertTrue(JustinTVIE.suitable('http://www.twitch.tv/vanillatv')) -        self.assertTrue(JustinTVIE.suitable('http://www.justin.tv/vanillatv/')) -        self.assertTrue(JustinTVIE.suitable('http://www.twitch.tv/vanillatv/')) - -    def test_justintv_videoid_matching(self): -        self.assertTrue(JustinTVIE.suitable('http://www.twitch.tv/vanillatv/b/328087483')) - -    def test_justin_tv_chapterid_matching(self): -        self.assertTrue(JustinTVIE.suitable('http://www.twitch.tv/tsm_theoddone/c/2349361')) +    def test_twitch_channelid_matching(self): +        self.assertTrue(TwitchIE.suitable('twitch.tv/vanillatv')) +        self.assertTrue(TwitchIE.suitable('www.twitch.tv/vanillatv')) +        self.assertTrue(TwitchIE.suitable('http://www.twitch.tv/vanillatv')) +        self.assertTrue(TwitchIE.suitable('http://www.twitch.tv/vanillatv/')) + +    def test_twitch_videoid_matching(self): +        self.assertTrue(TwitchIE.suitable('http://www.twitch.tv/vanillatv/b/328087483')) + +    def test_twitch_chapterid_matching(self): +        self.assertTrue(TwitchIE.suitable('http://www.twitch.tv/tsm_theoddone/c/2349361'))      def test_youtube_extract(self):          assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index dec0e20e7..623f9d6fe 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -165,6 +165,8 @@ class YoutubeDL(object):                         'auto' for elaborate guessing      encoding:          Use this encoding instead of the system-specified.      extract_flat:      Do not resolve URLs, return the immediate result. +                       Pass in 'in_playlist' to only show this behavior for +                       playlist items.      The following parameters are not used by YoutubeDL itself, they are used by      the FileDownloader: @@ -568,8 +570,13 @@ class YoutubeDL(object):          result_type = ie_result.get('_type', 'video') -        if self.params.get('extract_flat', False): -            if result_type in ('url', 'url_transparent'): +        if result_type in ('url', 'url_transparent'): +            extract_flat = self.params.get('extract_flat', False) +            if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or +                    extract_flat is True): +                self.add_extra_info(ie_result, extra_info) +                if self.params.get('forcejson', False): +                    self.to_stdout(json.dumps(ie_result))                  return ie_result          if result_type == 'video': diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 7f2b4dfcc..a1079e596 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -79,6 +79,7 @@ __authors__  = (      'Carlos Ramos',      '5moufl',      'lenaten', +    'Dennis Scheiba',  )  __license__ = 'Public Domain' @@ -255,8 +256,6 @@ def _real_main(argv=None):          date = DateRange.day(opts.date)      else:          date = DateRange(opts.dateafter, opts.datebefore) -    if opts.default_search not in ('auto', 'auto_warning', 'error', 'fixup_error', None) and ':' not in opts.default_search: -        parser.error(u'--default-search invalid; did you forget a colon (:) at the end?')      # Do not download videos when there are audio-only formats      if opts.extractaudio and not opts.keepvideo and opts.format is None: @@ -369,6 +368,7 @@ def _real_main(argv=None):          'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,          'encoding': opts.encoding,          'exec_cmd': opts.exec_cmd, +        'extract_flat': opts.extract_flat,      }      with YoutubeDL(ydl_opts) as ydl: diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index c0b26c5b3..ab4af2079 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -26,6 +26,7 @@ from .bandcamp import BandcampIE, BandcampAlbumIE  from .bbccouk import BBCCoUkIE  from .beeg import BeegIE  from .behindkink import BehindKinkIE +from .bild import BildIE  from .bilibili import BiliBiliIE  from .blinkx import BlinkxIE  from .bliptv import BlipTVIE, BlipTVUserIE @@ -174,7 +175,6 @@ from .jadorecettepub import JadoreCettePubIE  from .jeuxvideo import JeuxVideoIE  from .jove import JoveIE  from .jukebox import JukeboxIE -from .justintv import JustinTVIE  from .jpopsukitv import JpopsukiIE  from .kankan import KankanIE  from .keezmovies import KeezMoviesIE @@ -317,6 +317,7 @@ from .sbs import SBSIE  from .scivee import SciVeeIE  from .screencast import ScreencastIE  from .servingsys import ServingSysIE +from .sexykarma import SexyKarmaIE  from .shared import SharedIE  from .sharesix import ShareSixIE  from .sina import SinaIE @@ -368,6 +369,7 @@ from .teachingchannel import TeachingChannelIE  from .teamcoco import TeamcocoIE  from .techtalks import TechTalksIE  from .ted import TEDIE +from .telecinco import TelecincoIE  from .telemb import TeleMBIE  from .tenplay import TenPlayIE  from .testurl import TestURLIE @@ -396,6 +398,7 @@ from .tutv import TutvIE  from .tvigle import TvigleIE  from .tvp import TvpIE  from .tvplay import TVPlayIE +from .twitch import TwitchIE  from .ubu import UbuIE  from .udemy import (      UdemyIE, @@ -421,6 +424,7 @@ from .videopremium import VideoPremiumIE  from .videott import VideoTtIE  from .videoweed import VideoWeedIE  from .vidme import VidmeIE +from .vidzi import VidziIE  from .vimeo import (      VimeoIE,      VimeoAlbumIE, @@ -489,10 +493,8 @@ from .youtube import (      YoutubeUserIE,      YoutubeWatchLaterIE,  ) -  from .zdf import ZDFIE -  _ALL_CLASSES = [      klass      for name, klass in globals().items() diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index c3d02f85e..b9a9440c0 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -10,8 +10,8 @@ from ..utils import (      unified_strdate,      determine_ext,      get_element_by_id, -    compat_str,      get_element_by_attribute, +    int_or_none,  )  # There are different sources of video in arte.tv, the extraction process  @@ -90,15 +90,24 @@ class ArteTVPlus7IE(InfoExtractor):          if not upload_date_str:              upload_date_str = player_info.get('VDA', '').split(' ')[0] +        title = player_info['VTI'].strip() +        subtitle = player_info.get('VSU', '').strip() +        if subtitle: +            title += ' - %s' % subtitle +          info_dict = {              'id': player_info['VID'], -            'title': player_info['VTI'], +            'title': title,              'description': player_info.get('VDE'),              'upload_date': unified_strdate(upload_date_str),              'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),          } -        all_formats = player_info['VSR'].values() +        all_formats = [] +        for format_id, format_dict in player_info['VSR'].items(): +            fmt = dict(format_dict) +            fmt['format_id'] = format_id +            all_formats.append(fmt)          # Some formats use the m3u8 protocol          all_formats = list(filter(lambda f: f.get('videoFormat') != 'M3U8', all_formats))          def _match_lang(f): @@ -149,22 +158,12 @@ class ArteTVPlus7IE(InfoExtractor):                  )          formats = sorted(formats, key=sort_key)          def _format(format_info): -            quality = '' -            height = format_info.get('height') -            if height is not None: -                quality = compat_str(height) -            bitrate = format_info.get('bitrate') -            if bitrate is not None: -                quality += '-%d' % bitrate -            if format_info.get('versionCode') is not None: -                format_id = '%s-%s' % (quality, format_info['versionCode']) -            else: -                format_id = quality              info = { -                'format_id': format_id, -                'format_note': format_info.get('versionLibelle'), -                'width': format_info.get('width'), -                'height': height, +                'format_id': format_info['format_id'], +                'format_note': '%s, %s' % (format_info.get('versionCode'), format_info.get('versionLibelle')), +                'width': int_or_none(format_info.get('width')), +                'height': int_or_none(format_info.get('height')), +                'tbr': int_or_none(format_info.get('bitrate')),              }              if format_info['mediaType'] == 'rtmp':                  info['url'] = format_info['streamer'] diff --git a/youtube_dl/extractor/bild.py b/youtube_dl/extractor/bild.py new file mode 100644 index 000000000..0269d1174 --- /dev/null +++ b/youtube_dl/extractor/bild.py @@ -0,0 +1,39 @@ +#coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import int_or_none + + +class BildIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?bild\.de/(?:[^/]+/)+(?P<display_id>[^/]+)-(?P<id>\d+)(?:,auto=true)?\.bild\.html' +    IE_DESC = 'Bild.de' +    _TEST = { +        'url': 'http://www.bild.de/video/clip/apple-ipad-air/das-koennen-die-neuen-ipads-38184146.bild.html', +        'md5': 'dd495cbd99f2413502a1713a1156ac8a', +        'info_dict': { +            'id': '38184146', +            'ext': 'mp4', +            'title': 'BILD hat sie getestet', +            'thumbnail': 'http://bilder.bild.de/fotos/stand-das-koennen-die-neuen-ipads-38184138/Bild/1.bild.jpg', +            'duration': 196, +            'description': 'Mit dem iPad Air 2 und dem iPad Mini 3 hat Apple zwei neue Tablet-Modelle präsentiert. BILD-Reporter Sven Stein durfte die Geräte bereits testen. ', +        } +    } + +    def _real_extract(self, url): +        video_id = self._match_id(url) + +        xml_url = url.split(".bild.html")[0] + ",view=xml.bild.xml" +        doc = self._download_xml(xml_url, video_id) + +        duration = int_or_none(doc.attrib.get('duration'), scale=1000) + +        return { +            'id': video_id, +            'title': doc.attrib['ueberschrift'], +            'description': doc.attrib.get('text'), +            'url': doc.attrib['src'], +            'thumbnail': doc.attrib.get('img'), +            'duration': duration, +        } diff --git a/youtube_dl/extractor/cinemassacre.py b/youtube_dl/extractor/cinemassacre.py index 496271be4..d064a28f9 100644 --- a/youtube_dl/extractor/cinemassacre.py +++ b/youtube_dl/extractor/cinemassacre.py @@ -42,7 +42,7 @@ class CinemassacreIE(InfoExtractor):          webpage = self._download_webpage(url, display_id)          video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d') -        mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?id=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage) +        mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage)          if not mobj:              raise ExtractorError('Can\'t extract embed url and video id')          playerdata_url = mobj.group('embed_url') @@ -53,17 +53,22 @@ class CinemassacreIE(InfoExtractor):          video_description = self._html_search_regex(              r'<div class="entry-content">(?P<description>.+?)</div>',              webpage, 'description', flags=re.DOTALL, fatal=False) +        video_thumbnail = self._og_search_thumbnail(webpage)          playerdata = self._download_webpage(playerdata_url, video_id, 'Downloading player webpage') -        video_thumbnail = self._search_regex( -            r'image: \'(?P<thumbnail>[^\']+)\'', playerdata, 'thumbnail', fatal=False) -        sd_url = self._search_regex(r'file: \'([^\']+)\', label: \'SD\'', playerdata, 'sd_file') -        videolist_url = self._search_regex(r'file: \'([^\']+\.smil)\'}', playerdata, 'videolist_url') +        vidurl = self._search_regex( +            r'\'vidurl\'\s*:\s*"([^\']+)"', playerdata, 'vidurl').replace('\\/', '/') +        vidid = self._search_regex( +            r'\'vidid\'\s*:\s*"([^\']+)"', playerdata, 'vidid') +        videoserver = self._html_search_regex( +            r"'videoserver'\s*:\s*'([^']+)'", playerdata, 'videoserver') + +        videolist_url = 'http://%s/vod/smil:%s.smil/jwplayer.smil' % (videoserver, vidid)          videolist = self._download_xml(videolist_url, video_id, 'Downloading videolist XML')          formats = [] -        baseurl = sd_url[:sd_url.rfind('/')+1] +        baseurl = vidurl[:vidurl.rfind('/')+1]          for video in videolist.findall('.//video'):              src = video.get('src')              if not src: diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py index dae40c136..78877b1cf 100644 --- a/youtube_dl/extractor/cnn.py +++ b/youtube_dl/extractor/cnn.py @@ -12,7 +12,7 @@ from ..utils import (  class CNNIE(InfoExtractor):      _VALID_URL = r'''(?x)https?://((edition|www)\.)?cnn\.com/video/(data/.+?|\?)/ -        (?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn|(?=&)))''' +        (?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn(-ap)?|(?=&)))'''      _TESTS = [{          'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn', diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 450c7dfd6..e8366f7f9 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -281,6 +281,12 @@ class InfoExtractor(object):              raw_filename = basen + '.dump'              filename = sanitize_filename(raw_filename, restricted=True)              self.to_screen('Saving request to ' + filename) +            # Working around MAX_PATH limitation on Windows (see +            # http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx) +            if os.name == 'nt': +                absfilepath = os.path.abspath(filename) +                if len(absfilepath) > 259: +                    filename = '\\\\?\\' + absfilepath              with open(filename, 'wb') as outf:                  outf.write(webpage_bytes) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index f99888ecc..e3057d900 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -39,6 +39,7 @@ class CrunchyrollIE(SubtitlesInfoExtractor):              'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg',              'uploader': 'Yomiuri Telecasting Corporation (YTV)',              'upload_date': '20131013', +            'url': 're:(?!.*&)',          },          'params': {              # rtmp @@ -237,12 +238,14 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text              streamdata_req.data = 'req=RpcApiVideoEncode%5FGetStreamInfo&video%5Fencode%5Fquality='+stream_quality+'&media%5Fid='+stream_id+'&video%5Fformat='+stream_format              streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')              streamdata_req.add_header('Content-Length', str(len(streamdata_req.data))) -            streamdata = self._download_webpage(streamdata_req, video_id, note='Downloading media info for '+video_format) -            video_url = self._search_regex(r'<host>([^<]+)', streamdata, 'video_url') -            video_play_path = self._search_regex(r'<file>([^<]+)', streamdata, 'video_play_path') +            streamdata = self._download_xml( +                streamdata_req, video_id, +                note='Downloading media info for %s' % video_format) +            video_url = streamdata.find('.//host').text +            video_play_path = streamdata.find('.//file').text              formats.append({                  'url': video_url, -                'play_path':   video_play_path, +                'play_path': video_play_path,                  'ext': 'flv',                  'format': video_format,                  'format_id': video_format, diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index 0b3374d97..566e20d76 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -46,7 +46,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor):                          f4m_format['preference'] = 1                      formats.extend(f4m_formats)              elif video_url.endswith('.m3u8'): -                formats.extend(self._extract_m3u8_formats(video_url, video_id)) +                formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4'))              elif video_url.startswith('rtmp'):                  formats.append({                      'url': video_url, @@ -58,7 +58,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor):                  formats.append({                      'url': video_url,                      'format_id': format_id, -                    'preference': 2, +                    'preference': -1,                  })          self._sort_formats(formats) diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py index d966e8403..ec6d96ada 100644 --- a/youtube_dl/extractor/funnyordie.py +++ b/youtube_dl/extractor/funnyordie.py @@ -37,7 +37,7 @@ class FunnyOrDieIE(InfoExtractor):          video_id = mobj.group('id')          webpage = self._download_webpage(url, video_id) -        links = re.findall(r'<source src="([^"]+/v)\d+\.([^"]+)" type=\'video', webpage) +        links = re.findall(r'<source src="([^"]+/v)[^"]+\.([^"]+)" type=\'video', webpage)          if not links:              raise ExtractorError('No media links available for %s' % video_id) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 122763a23..9b6498894 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -380,6 +380,17 @@ class GenericIE(InfoExtractor):                  'uploader': 'education-portal.com',              },          }, +        { +            'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz', +            'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4', +            'info_dict': { +                'id': 'uxjb0lwrcz', +                'ext': 'mp4', +                'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks', +                'duration': 1715.0, +                'uploader': 'thoughtworks.wistia.com', +            },    +        },      ]      def report_following_redirect(self, new_url): @@ -476,7 +487,8 @@ class GenericIE(InfoExtractor):                       'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'                      ) % (url, url), expected=True)              else: -                assert ':' in default_search +                if ':' not in default_search: +                    default_search += ':'                  return self.url_result(default_search + url)          url, smuggled_data = unsmuggle_url(url) @@ -609,13 +621,13 @@ class GenericIE(InfoExtractor):          if mobj:              player_url = unescapeHTML(mobj.group('url'))              surl = smuggle_url(player_url, {'Referer': url}) -            return self.url_result(surl, 'Vimeo') +            return self.url_result(surl)          # Look for embedded (swf embed) Vimeo player          mobj = re.search( -            r'<embed[^>]+?src="(https?://(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage) +            r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)          if mobj: -            return self.url_result(mobj.group(1), 'Vimeo') +            return self.url_result(mobj.group(1))          # Look for embedded YouTube player          matches = re.findall(r'''(?x) @@ -652,17 +664,20 @@ class GenericIE(InfoExtractor):          # Look for embedded Wistia player          match = re.search( -            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage) +            r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)          if match: +            embed_url = self._proto_relative_url( +                unescapeHTML(match.group('url')))              return {                  '_type': 'url_transparent', -                'url': unescapeHTML(match.group('url')), +                'url': embed_url,                  'ie_key': 'Wistia',                  'uploader': video_uploader,                  'title': video_title,                  'id': video_id,              } -        match = re.search(r'(?:id=["\']wistia_|data-wistiaid=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage) +             +        match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)          if match:              return {                  '_type': 'url_transparent', diff --git a/youtube_dl/extractor/justintv.py b/youtube_dl/extractor/justintv.py deleted file mode 100644 index 27017e89f..000000000 --- a/youtube_dl/extractor/justintv.py +++ /dev/null @@ -1,155 +0,0 @@ -from __future__ import unicode_literals - -import itertools -import json -import os -import re - -from .common import InfoExtractor -from ..utils import ( -    compat_str, -    ExtractorError, -    formatSeconds, -) - - -class JustinTVIE(InfoExtractor): -    """Information extractor for justin.tv and twitch.tv""" -    # TODO: One broadcast may be split into multiple videos. The key -    # 'broadcast_id' is the same for all parts, and 'broadcast_part' -    # starts at 1 and increases. Can we treat all parts as one video? - -    _VALID_URL = r"""(?x)^(?:http://)?(?:www\.)?(?:twitch|justin)\.tv/ -        (?: -            (?P<channelid>[^/]+)| -            (?:(?:[^/]+)/b/(?P<videoid>[^/]+))| -            (?:(?:[^/]+)/c/(?P<chapterid>[^/]+)) -        ) -        /?(?:\#.*)?$ -        """ -    _JUSTIN_PAGE_LIMIT = 100 -    IE_NAME = 'justin.tv' -    IE_DESC = 'justin.tv and twitch.tv' -    _TEST = { -        'url': 'http://www.twitch.tv/thegamedevhub/b/296128360', -        'md5': 'ecaa8a790c22a40770901460af191c9a', -        'info_dict': { -            'id': '296128360', -            'ext': 'flv', -            'upload_date': '20110927', -            'uploader_id': 25114803, -            'uploader': 'thegamedevhub', -            'title': 'Beginner Series - Scripting With Python Pt.1' -        } -    } - -    # Return count of items, list of *valid* items -    def _parse_page(self, url, video_id, counter): -        info_json = self._download_webpage( -            url, video_id, -            'Downloading video info JSON on page %d' % counter, -            'Unable to download video info JSON %d' % counter) - -        response = json.loads(info_json) -        if type(response) != list: -            error_text = response.get('error', 'unknown error') -            raise ExtractorError('Justin.tv API: %s' % error_text) -        info = [] -        for clip in response: -            video_url = clip['video_file_url'] -            if video_url: -                video_extension = os.path.splitext(video_url)[1][1:] -                video_date = re.sub('-', '', clip['start_time'][:10]) -                video_uploader_id = clip.get('user_id', clip.get('channel_id')) -                video_id = clip['id'] -                video_title = clip.get('title', video_id) -                info.append({ -                    'id': compat_str(video_id), -                    'url': video_url, -                    'title': video_title, -                    'uploader': clip.get('channel_name', video_uploader_id), -                    'uploader_id': video_uploader_id, -                    'upload_date': video_date, -                    'ext': video_extension, -                }) -        return (len(response), info) - -    def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) - -        api_base = 'http://api.justin.tv' -        paged = False -        if mobj.group('channelid'): -            paged = True -            video_id = mobj.group('channelid') -            api = api_base + '/channel/archives/%s.json' % video_id -        elif mobj.group('chapterid'): -            chapter_id = mobj.group('chapterid') - -            webpage = self._download_webpage(url, chapter_id) -            m = re.search(r'PP\.archive_id = "([0-9]+)";', webpage) -            if not m: -                raise ExtractorError('Cannot find archive of a chapter') -            archive_id = m.group(1) - -            api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id -            doc = self._download_xml( -                api, chapter_id, -                note='Downloading chapter information', -                errnote='Chapter information download failed') -            for a in doc.findall('.//archive'): -                if archive_id == a.find('./id').text: -                    break -            else: -                raise ExtractorError('Could not find chapter in chapter information') - -            video_url = a.find('./video_file_url').text -            video_ext = video_url.rpartition('.')[2] or 'flv' - -            chapter_api_url = 'https://api.twitch.tv/kraken/videos/c' + chapter_id -            chapter_info = self._download_json( -                chapter_api_url, 'c' + chapter_id, -                note='Downloading chapter metadata', -                errnote='Download of chapter metadata failed') - -            bracket_start = int(doc.find('.//bracket_start').text) -            bracket_end = int(doc.find('.//bracket_end').text) - -            # TODO determine start (and probably fix up file) -            #  youtube-dl -v http://www.twitch.tv/firmbelief/c/1757457 -            #video_url += '?start=' + TODO:start_timestamp -            # bracket_start is 13290, but we want 51670615 -            self._downloader.report_warning('Chapter detected, but we can just download the whole file. ' -                                            'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start), formatSeconds(bracket_end))) - -            info = { -                'id': 'c' + chapter_id, -                'url': video_url, -                'ext': video_ext, -                'title': chapter_info['title'], -                'thumbnail': chapter_info['preview'], -                'description': chapter_info['description'], -                'uploader': chapter_info['channel']['display_name'], -                'uploader_id': chapter_info['channel']['name'], -            } -            return info -        else: -            video_id = mobj.group('videoid') -            api = api_base + '/broadcast/by_archive/%s.json' % video_id - -        entries = [] -        offset = 0 -        limit = self._JUSTIN_PAGE_LIMIT -        for counter in itertools.count(1): -            page_url = api + ('?offset=%d&limit=%d' % (offset, limit)) -            page_count, page_info = self._parse_page( -                page_url, video_id, counter) -            entries.extend(page_info) -            if not paged or page_count != limit: -                break -            offset += limit -        return { -            '_type': 'playlist', -            'id': video_id, -            'entries': entries, -        } diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py index 979f3d692..6691521e5 100644 --- a/youtube_dl/extractor/mitele.py +++ b/youtube_dl/extractor/mitele.py @@ -6,6 +6,7 @@ import json  from .common import InfoExtractor  from ..utils import (      compat_urllib_parse, +    compat_urlparse,      get_element_by_attribute,      parse_duration,      strip_jsonp, @@ -39,13 +40,21 @@ class MiTeleIE(InfoExtractor):          ).replace('\'', '"')          embed_data = json.loads(embed_data_json) -        info_url = embed_data['flashvars']['host'] +        domain = embed_data['mediaUrl'] +        if not domain.startswith('http'): +            # only happens in telecinco.es videos +            domain = 'http://' + domain +        info_url = compat_urlparse.urljoin( +            domain, +            compat_urllib_parse.unquote(embed_data['flashvars']['host']) +        )          info_el = self._download_xml(info_url, episode).find('./video/info')          video_link = info_el.find('videoUrl/link').text          token_query = compat_urllib_parse.urlencode({'id': video_link})          token_info = self._download_json( -            'http://token.mitele.es/?' + token_query, episode, +            embed_data['flashvars']['ov_tk'] + '?' + token_query, +            episode,              transform_source=strip_jsonp          ) diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index a4564d3de..bb8937c4d 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -33,22 +33,22 @@ class MixcloudIE(InfoExtractor):          },      } -    def check_urls(self, url_list): -        """Returns 1st active url from list""" -        for url in url_list: +    def _get_url(self, track_id, template_url): +        server_count = 30 +        for i in range(server_count): +            url = template_url % i              try:                  # We only want to know if the request succeed                  # don't download the whole file -                self._request_webpage(HEADRequest(url), None, False) +                self._request_webpage( +                    HEADRequest(url), track_id, +                    'Checking URL %d/%d ...' % (i + 1, server_count + 1))                  return url              except ExtractorError: -                url = None +                pass          return None -    def _get_url(self, template_url): -        return self.check_urls(template_url % i for i in range(30)) -      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url)          uploader = mobj.group(1) @@ -61,11 +61,11 @@ class MixcloudIE(InfoExtractor):              r'\s(?:data-preview-url|m-preview)="(.+?)"', webpage, 'preview url')          song_url = preview_url.replace('/previews/', '/c/originals/')          template_url = re.sub(r'(stream\d*)', 'stream%d', song_url) -        final_song_url = self._get_url(template_url) +        final_song_url = self._get_url(track_id, template_url)          if final_song_url is None:              self.to_screen('Trying with m4a extension')              template_url = template_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/') -            final_song_url = self._get_url(template_url) +            final_song_url = self._get_url(track_id, template_url)          if final_song_url is None:              raise ExtractorError('Unable to extract track url') diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 8f140d626..6118ed5c2 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -80,8 +80,14 @@ class PBSIE(InfoExtractor):                  'thumbnail': 're:^https?://.*\.jpg$',                  'upload_date': '20140122',              } +        }, +        { +            'url': 'http://www.pbs.org/wgbh/pages/frontline/united-states-of-secrets/', +            'info_dict': { +                'id': 'united-states-of-secrets', +            }, +            'playlist_count': 2,          } -      ]      def _extract_webpage(self, url): @@ -96,6 +102,12 @@ class PBSIE(InfoExtractor):                  r'<input type="hidden" id="air_date_[0-9]+" value="([^"]+)"',                  webpage, 'upload date', default=None)) +            # tabbed frontline videos +            tabbed_videos = re.findall( +                r'<div[^>]+class="videotab[^"]*"[^>]+vid="(\d+)"', webpage) +            if tabbed_videos: +                return tabbed_videos, presumptive_id, upload_date +              MEDIA_ID_REGEXES = [                  r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'",  # frontline video embed                  r'class="coveplayerid">([^<]+)<',                       # coveplayer @@ -130,6 +142,12 @@ class PBSIE(InfoExtractor):      def _real_extract(self, url):          video_id, display_id, upload_date = self._extract_webpage(url) +        if isinstance(video_id, list): +            entries = [self.url_result( +                'http://video.pbs.org/video/%s' % vid_id, 'PBS', vid_id) +                for vid_id in video_id] +            return self.playlist_result(entries, display_id) +          info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id          info = self._download_json(info_url, display_id) diff --git a/youtube_dl/extractor/sexykarma.py b/youtube_dl/extractor/sexykarma.py new file mode 100644 index 000000000..c833fc8ee --- /dev/null +++ b/youtube_dl/extractor/sexykarma.py @@ -0,0 +1,117 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( +    unified_strdate, +    parse_duration, +    int_or_none, +) + + +class SexyKarmaIE(InfoExtractor): +    IE_DESC = 'Sexy Karma and Watch Indian Porn' +    _VALID_URL = r'https?://(?:www\.)?(?:sexykarma\.com|watchindianporn\.net)/(?:[^/]+/)*video/(?P<display_id>[^/]+)-(?P<id>[a-zA-Z0-9]+)\.html' +    _TESTS = [{ +        'url': 'http://www.sexykarma.com/gonewild/video/taking-a-quick-pee-yHI70cOyIHt.html', +        'md5': 'b9798e7d1ef1765116a8f516c8091dbd', +        'info_dict': { +            'id': 'yHI70cOyIHt', +            'display_id': 'taking-a-quick-pee', +            'ext': 'mp4', +            'title': 'Taking a quick pee.', +            'thumbnail': 're:^https?://.*\.jpg$', +            'uploader': 'wildginger7', +            'upload_date': '20141007', +            'duration': 22, +            'view_count': int, +            'comment_count': int, +            'categories': list, +        } +    }, { +        'url': 'http://www.sexykarma.com/gonewild/video/pot-pixie-tribute-8Id6EZPbuHf.html', +        'md5': 'dd216c68d29b49b12842b9babe762a5d', +        'info_dict': { +            'id': '8Id6EZPbuHf', +            'display_id': 'pot-pixie-tribute', +            'ext': 'mp4', +            'title': 'pot_pixie tribute', +            'thumbnail': 're:^https?://.*\.jpg$', +            'uploader': 'banffite', +            'upload_date': '20141013', +            'duration': 16, +            'view_count': int, +            'comment_count': int, +            'categories': list, +        } +    }, { +        'url': 'http://www.watchindianporn.net/video/desi-dancer-namrata-stripping-completely-nude-and-dancing-on-a-hot-number-dW2mtctxJfs.html', +        'md5': '9afb80675550406ed9a63ac2819ef69d', +        'info_dict': { +            'id': 'dW2mtctxJfs', +            'display_id': 'desi-dancer-namrata-stripping-completely-nude-and-dancing-on-a-hot-number', +            'ext': 'mp4', +            'title': 'Desi dancer namrata stripping completely nude and dancing on a hot number', +            'thumbnail': 're:^https?://.*\.jpg$', +            'uploader': 'Don', +            'upload_date': '20140213', +            'duration': 83, +            'view_count': int, +            'comment_count': int, +            'categories': list, +        } +    }] + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') +        display_id = mobj.group('display_id') + +        webpage = self._download_webpage(url, display_id) + +        video_url = self._html_search_regex( +            r"url: escape\('([^']+)'\)", webpage, 'url') + +        title = self._html_search_regex( +            r'<h2 class="he2"><span>(.*?)</span>', +            webpage, 'title') +        thumbnail = self._html_search_regex( +            r'<span id="container"><img\s+src="([^"]+)"', +            webpage, 'thumbnail', fatal=False) + +        uploader = self._html_search_regex( +            r'class="aupa">\s*(.*?)</a>', +            webpage, 'uploader') +        upload_date = unified_strdate(self._html_search_regex( +            r'Added: <strong>(.+?)</strong>', webpage, 'upload date', fatal=False)) + +        duration = parse_duration(self._search_regex( +            r'<td>Time:\s*</td>\s*<td align="right"><span>\s*(.+?)\s*</span>', +            webpage, 'duration', fatal=False)) + +        view_count = int_or_none(self._search_regex( +            r'<td>Views:\s*</td>\s*<td align="right"><span>\s*(\d+)\s*</span>', +            webpage, 'view count', fatal=False)) +        comment_count = int_or_none(self._search_regex( +            r'<td>Comments:\s*</td>\s*<td align="right"><span>\s*(\d+)\s*</span>', +            webpage, 'comment count', fatal=False)) + +        categories = re.findall( +            r'<a href="[^"]+/search/video/desi"><span>([^<]+)</span></a>', +            webpage) + +        return { +            'id': video_id, +            'display_id': display_id, +            'url': video_url, +            'title': title, +            'thumbnail': thumbnail, +            'uploader': uploader, +            'upload_date': upload_date, +            'duration': duration, +            'view_count': view_count, +            'comment_count': comment_count, +            'categories': categories, +        } diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index d5e28efad..cd4af96fd 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -65,6 +65,22 @@ class TEDIE(SubtitlesInfoExtractor):              'title': 'Who are the hackers?',          },          'playlist_mincount': 6, +    }, { +        # contains a youtube video +        'url': 'https://www.ted.com/talks/douglas_adams_parrots_the_universe_and_everything', +        'add_ie': ['Youtube'], +        'info_dict': { +            'id': '_ZG8HBuDjgc', +            'ext': 'mp4', +            'title': 'Douglas Adams: Parrots the Universe and Everything', +            'description': 'md5:01ad1e199c49ac640cb1196c0e9016af', +            'uploader': 'University of California Television (UCTV)', +            'uploader_id': 'UCtelevision', +            'upload_date': '20080522', +        }, +        'params': { +            'skip_download': True, +        },      }]      _NATIVE_FORMATS = { @@ -114,6 +130,13 @@ class TEDIE(SubtitlesInfoExtractor):          talk_info = self._extract_info(webpage)['talks'][0] +        if talk_info.get('external') is not None: +            self.to_screen('Found video from %s' % talk_info['external']['service']) +            return { +                '_type': 'url', +                'url': talk_info['external']['uri'], +            } +          formats = [{              'url': format_url,              'format_id': format_id, diff --git a/youtube_dl/extractor/telecinco.py b/youtube_dl/extractor/telecinco.py new file mode 100644 index 000000000..db9788c18 --- /dev/null +++ b/youtube_dl/extractor/telecinco.py @@ -0,0 +1,19 @@ +#coding: utf-8 +from __future__ import unicode_literals + +from .mitele import MiTeleIE + + +class TelecincoIE(MiTeleIE): +    IE_NAME = 'telecinco.es' +    _VALID_URL = r'https?://www\.telecinco\.es/[^/]+/[^/]+/[^/]+/(?P<episode>.*?)\.html' + +    _TEST = { +        'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html', +        'info_dict': { +            'id': 'MDSVID20141015_0058', +            'ext': 'mp4', +            'title': 'Con Martín Berasategui, hacer un bacalao al ...', +            'duration': 662, +        }, +    } diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py new file mode 100644 index 000000000..36aa1ad6e --- /dev/null +++ b/youtube_dl/extractor/twitch.py @@ -0,0 +1,187 @@ +from __future__ import unicode_literals + +import itertools +import re + +from .common import InfoExtractor +from ..utils import ( +    ExtractorError, +    parse_iso8601, +) + + +class TwitchIE(InfoExtractor): +    # TODO: One broadcast may be split into multiple videos. The key +    # 'broadcast_id' is the same for all parts, and 'broadcast_part' +    # starts at 1 and increases. Can we treat all parts as one video? +    _VALID_URL = r"""(?x)^(?:http://)?(?:www\.)?twitch\.tv/ +        (?: +            (?P<channelid>[^/]+)| +            (?:(?:[^/]+)/b/(?P<videoid>[^/]+))| +            (?:(?:[^/]+)/c/(?P<chapterid>[^/]+)) +        ) +        /?(?:\#.*)?$ +        """ +    _PAGE_LIMIT = 100 +    _API_BASE = 'https://api.twitch.tv' +    _TESTS = [{ +        'url': 'http://www.twitch.tv/riotgames/b/577357806', +        'info_dict': { +            'id': 'a577357806', +            'title': 'Worlds Semifinals - Star Horn Royal Club vs. OMG', +        }, +        'playlist_mincount': 12, +    }, { +        'url': 'http://www.twitch.tv/acracingleague/c/5285812', +        'info_dict': { +            'id': 'c5285812', +            'title': 'ACRL Off Season - Sports Cars @ Nordschleife', +        }, +        'playlist_mincount': 3, +    }, { +        'url': 'http://www.twitch.tv/vanillatv', +        'info_dict': { +            'id': 'vanillatv', +            'title': 'VanillaTV', +        }, +        'playlist_mincount': 412, +    }] + +    def _handle_error(self, response): +        if not isinstance(response, dict): +            return +        error = response.get('error') +        if error: +            raise ExtractorError( +                '%s returned error: %s - %s' % (self.IE_NAME, error, response.get('message')), +                expected=True) + +    def _download_json(self, url, video_id, note='Downloading JSON metadata'): +        response = super(TwitchIE, self)._download_json(url, video_id, note) +        self._handle_error(response) +        return response + +    def _extract_media(self, item, item_id): +        ITEMS = { +            'a': 'video', +            'c': 'chapter', +        } +        info = self._extract_info(self._download_json( +            '%s/kraken/videos/%s%s' % (self._API_BASE, item, item_id), item_id, +            'Downloading %s info JSON' % ITEMS[item])) +        response = self._download_json( +            '%s/api/videos/%s%s' % (self._API_BASE, item, item_id), item_id, +            'Downloading %s playlist JSON' % ITEMS[item]) +        entries = [] +        chunks = response['chunks'] +        qualities = list(chunks.keys()) +        for num, fragment in enumerate(zip(*chunks.values()), start=1): +            formats = [] +            for fmt_num, fragment_fmt in enumerate(fragment): +                format_id = qualities[fmt_num] +                fmt = { +                    'url': fragment_fmt['url'], +                    'format_id': format_id, +                    'quality': 1 if format_id == 'live' else 0, +                } +                m = re.search(r'^(?P<height>\d+)[Pp]', format_id) +                if m: +                    fmt['height'] = int(m.group('height')) +                formats.append(fmt) +            self._sort_formats(formats) +            entry = dict(info) +            entry['id'] = '%s_%d' % (entry['id'], num) +            entry['title'] = '%s part %d' % (entry['title'], num) +            entry['formats'] = formats +            entries.append(entry) +        return self.playlist_result(entries, info['id'], info['title']) + +    def _extract_info(self, info): +        return { +            'id': info['_id'], +            'title': info['title'], +            'description': info['description'], +            'duration': info['length'], +            'thumbnail': info['preview'], +            'uploader': info['channel']['display_name'], +            'uploader_id': info['channel']['name'], +            'timestamp': parse_iso8601(info['recorded_at']), +            'view_count': info['views'], +        } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        if mobj.group('chapterid'): +            return self._extract_media('c', mobj.group('chapterid')) + +            """ +            webpage = self._download_webpage(url, chapter_id) +            m = re.search(r'PP\.archive_id = "([0-9]+)";', webpage) +            if not m: +                raise ExtractorError('Cannot find archive of a chapter') +            archive_id = m.group(1) + +            api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id +            doc = self._download_xml( +                api, chapter_id, +                note='Downloading chapter information', +                errnote='Chapter information download failed') +            for a in doc.findall('.//archive'): +                if archive_id == a.find('./id').text: +                    break +            else: +                raise ExtractorError('Could not find chapter in chapter information') + +            video_url = a.find('./video_file_url').text +            video_ext = video_url.rpartition('.')[2] or 'flv' + +            chapter_api_url = 'https://api.twitch.tv/kraken/videos/c' + chapter_id +            chapter_info = self._download_json( +                chapter_api_url, 'c' + chapter_id, +                note='Downloading chapter metadata', +                errnote='Download of chapter metadata failed') + +            bracket_start = int(doc.find('.//bracket_start').text) +            bracket_end = int(doc.find('.//bracket_end').text) + +            # TODO determine start (and probably fix up file) +            #  youtube-dl -v http://www.twitch.tv/firmbelief/c/1757457 +            #video_url += '?start=' + TODO:start_timestamp +            # bracket_start is 13290, but we want 51670615 +            self._downloader.report_warning('Chapter detected, but we can just download the whole file. ' +                                            'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start), formatSeconds(bracket_end))) + +            info = { +                'id': 'c' + chapter_id, +                'url': video_url, +                'ext': video_ext, +                'title': chapter_info['title'], +                'thumbnail': chapter_info['preview'], +                'description': chapter_info['description'], +                'uploader': chapter_info['channel']['display_name'], +                'uploader_id': chapter_info['channel']['name'], +            } +            return info +            """ +        elif mobj.group('videoid'): +            return self._extract_media('a', mobj.group('videoid')) +        elif mobj.group('channelid'): +            channel_id = mobj.group('channelid') +            info = self._download_json( +                '%s/kraken/channels/%s' % (self._API_BASE, channel_id), +                channel_id, 'Downloading channel info JSON') +            channel_name = info.get('display_name') or info.get('name') +            entries = [] +            offset = 0 +            limit = self._PAGE_LIMIT +            for counter in itertools.count(1): +                response = self._download_json( +                    '%s/kraken/channels/%s/videos/?offset=%d&limit=%d' +                    % (self._API_BASE, channel_id, offset, limit), +                    channel_id, 'Downloading channel videos JSON page %d' % counter) +                videos = response['videos'] +                if not videos: +                    break +                entries.extend([self.url_result(video['url'], 'Twitch') for video in videos]) +                offset += limit +            return self.playlist_result(entries, channel_id, channel_name) diff --git a/youtube_dl/extractor/vidzi.py b/youtube_dl/extractor/vidzi.py new file mode 100644 index 000000000..669979e13 --- /dev/null +++ b/youtube_dl/extractor/vidzi.py @@ -0,0 +1,33 @@ +#coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class VidziIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?vidzi\.tv/(?P<id>\w+)' +    _TEST = { +        'url': 'http://vidzi.tv/cghql9yq6emu.html', +        'md5': '4f16c71ca0c8c8635ab6932b5f3f1660', +        'info_dict': { +            'id': 'cghql9yq6emu', +            'ext': 'mp4', +            'title': 'youtube-dl test video  1\\\\2\'3/4<5\\\\6ä7↭', +        }, +    } + +    def _real_extract(self, url): +        video_id = self._match_id(url) +         +        webpage = self._download_webpage(url, video_id) +        video_url = self._html_search_regex( +            r'{\s*file\s*:\s*"([^"]+)"\s*}', webpage, 'video url') +        title = self._html_search_regex( +            r'(?s)<h2 class="video-title">(.*?)</h2>', webpage, 'title') +         +        return { +            'id': video_id, +            'title': title, +            'url': video_url, +        } +        
\ No newline at end of file diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index e6a86f18e..07959d3fe 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -157,6 +157,18 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):                  'duration': 62,              }          }, +        { +            # from https://www.ouya.tv/game/Pier-Solar-and-the-Great-Architects/ +            'url': 'https://player.vimeo.com/video/98044508', +            'note': 'The js code contains assignments to the same variable as the config', +            'info_dict': { +                'id': '98044508', +                'ext': 'mp4', +                'title': 'Pier Solar OUYA Official Trailer', +                'uploader': 'Tulio Gonçalves', +                'uploader_id': 'user28849593', +            }, +        },      ]      def _verify_video_password(self, url, video_id, webpage): @@ -244,7 +256,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):                  # We try to find out to which variable is assigned the config dic                  m_variable_name = re.search('(\w)\.video\.id', webpage)                  if m_variable_name is not None: -                    config_re = r'%s=({.+?});' % re.escape(m_variable_name.group(1)) +                    config_re = r'%s=({[^}].+?});' % re.escape(m_variable_name.group(1))                  else:                      config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});']                  config = self._search_regex(config_re, webpage, 'info section', diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index cfae2de89..4ab56e0ac 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -191,8 +191,9 @@ class YoutubeBaseInfoExtractor(InfoExtractor):      def _real_initialize(self):          if self._downloader is None:              return -        if not self._set_language(): -            return +        if self._get_login_info()[0] is not None: +            if not self._set_language(): +                return          if not self._login():              return          self._confirm_age() diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 649361bde..2b1cd7438 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -159,6 +159,11 @@ def parseOpts(overrideArguments=None):          '--ignore-config',          action='store_true',          help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)') +    general.add_option( +        '--flat-playlist', +        action='store_const', dest='extract_flat', const='in_playlist', +        default=False, +        help='Do not extract the videos of a playlist, only list them.')      selection = optparse.OptionGroup(parser, 'Video Selection')      selection.add_option( diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 416e2fd85..59cb3b1a1 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.10.13' +__version__ = '2014.10.24'  | 
