diff options
26 files changed, 448 insertions, 86 deletions
| @@ -571,7 +571,7 @@ Support requests for services that **do** purchase the rights to distribute thei  ### How can I detect whether a given URL is supported by youtube-dl? -For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/v/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug. +For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/video/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.  It is *not* possible to detect whether a URL is supported or not. That's because youtube-dl contains a generic extractor which matches **all** URLs. You may be tempted to disable, exclude, or remove the generic extractor, but the generic extractor not only allows users to extract videos from lots of websites that embed a video from another service, but may also be used to extract video from a service that it's hosting itself. Therefore, we neither recommend nor support disabling, excluding, or removing the generic extractor. diff --git a/devscripts/check-porn.py b/devscripts/check-porn.py index 216282712..6a5bd9eda 100644 --- a/devscripts/check-porn.py +++ b/devscripts/check-porn.py @@ -45,12 +45,12 @@ for test in get_testcases():          RESULT = ('.' + domain + '\n' in LIST or '\n' + domain + '\n' in LIST) -    if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict'] -                   or test['info_dict']['age_limit'] != 18): +    if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict'] or +                   test['info_dict']['age_limit'] != 18):          print('\nPotential missing age_limit check: {0}'.format(test['name'])) -    elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict'] -                         and test['info_dict']['age_limit'] == 18): +    elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict'] and +                         test['info_dict']['age_limit'] == 18):          print('\nPotential false negative: {0}'.format(test['name']))      else: diff --git a/docs/supportedsites.md b/docs/supportedsites.md index f6ba28e7a..5fe3e47cd 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -451,6 +451,7 @@   - **Turbo**   - **Tutv**   - **tv.dfb.de** + - **TV4**: tv4.se and tv4play.se   - **tvigle**: Интернет-телевидение Tvigle.ru   - **tvp.pl**   - **tvp.pl:Series** @@ -558,6 +559,7 @@   - **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)   - **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)   - **youtube:watch_later**: Youtube watch later list, ":ytwatchlater" for short (requires authentication) + - **Zapiks**   - **ZDF**   - **ZDFChannel**   - **zingmp3:album**: mp3.zing.vn albums diff --git a/test/test_swfinterp.py b/test/test_swfinterp.py index 9f18055e6..f1e899819 100644 --- a/test/test_swfinterp.py +++ b/test/test_swfinterp.py @@ -34,8 +34,8 @@ def _make_testfunc(testfile):      def test_func(self):          as_file = os.path.join(TEST_DIR, testfile)          swf_file = os.path.join(TEST_DIR, test_id + '.swf') -        if ((not os.path.exists(swf_file)) -                or os.path.getmtime(swf_file) < os.path.getmtime(as_file)): +        if ((not os.path.exists(swf_file)) or +                os.path.getmtime(swf_file) < os.path.getmtime(as_file)):              # Recompile              try:                  subprocess.check_call([ diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 88809783b..ca7c3f5c6 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -308,8 +308,8 @@ class YoutubeDL(object):                      raise          if (sys.version_info >= (3,) and sys.platform != 'win32' and -                sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] -                and not params.get('restrictfilenames', False)): +                sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and +                not params.get('restrictfilenames', False)):              # On Python 3, the Unicode filesystem API will throw errors (#1474)              self.report_warning(                  'Assuming --restrict-filenames since file system encoding ' @@ -1366,8 +1366,8 @@ class YoutubeDL(object):          """Download a given list of URLs."""          outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)          if (len(url_list) > 1 and -                '%' not in outtmpl -                and self.params.get('max_downloads') != 1): +                '%' not in outtmpl and +                self.params.get('max_downloads') != 1):              raise SameFileError(outtmpl)          for url in url_list: diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index eac2a26ec..25ab3fdfe 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -189,14 +189,14 @@ def _real_main(argv=None):          # In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems)          if opts.outtmpl is not None:              opts.outtmpl = opts.outtmpl.decode(preferredencoding()) -    outtmpl = ((opts.outtmpl is not None and opts.outtmpl) -               or (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') -               or (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') -               or (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s') -               or (opts.usetitle and '%(title)s-%(id)s.%(ext)s') -               or (opts.useid and '%(id)s.%(ext)s') -               or (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s') -               or DEFAULT_OUTTMPL) +    outtmpl = ((opts.outtmpl is not None and opts.outtmpl) or +               (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') or +               (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') or +               (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s') or +               (opts.usetitle and '%(title)s-%(id)s.%(ext)s') or +               (opts.useid and '%(id)s.%(ext)s') or +               (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s') or +               DEFAULT_OUTTMPL)      if not os.path.splitext(outtmpl)[1] and opts.extractaudio:          parser.error('Cannot download a video and extract audio into the same'                       ' file! Use "{0}.%(ext)s" instead of "{0}" as the output' diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 45e55b99c..3ae90021a 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -311,14 +311,14 @@ class FileDownloader(object):          """          nooverwrites_and_exists = ( -            self.params.get('nooverwrites', False) -            and os.path.exists(encodeFilename(filename)) +            self.params.get('nooverwrites', False) and +            os.path.exists(encodeFilename(filename))          )          continuedl_and_exists = ( -            self.params.get('continuedl', False) -            and os.path.isfile(encodeFilename(filename)) -            and not self.params.get('nopart', False) +            self.params.get('continuedl', False) and +            os.path.isfile(encodeFilename(filename)) and +            not self.params.get('nopart', False)          )          # Check file already present diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index b40ebfa50..7b8fe8cf5 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -325,8 +325,8 @@ class F4mFD(FileDownloader):                  state['frag_index'] += 1              estimated_size = ( -                (state['downloaded_bytes'] + frag_total_bytes) -                / (state['frag_index'] + 1) * total_frags) +                (state['downloaded_bytes'] + frag_total_bytes) / +                (state['frag_index'] + 1) * total_frags)              time_now = time.time()              state['total_bytes_estimate'] = estimated_size              state['elapsed'] = time_now - start diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index cf58f0800..c3088fba2 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -365,6 +365,7 @@ from .promptfile import PromptFileIE  from .prosiebensat1 import ProSiebenSat1IE  from .pyvideo import PyvideoIE  from .quickvid import QuickVidIE +from .r7 import R7IE  from .radiode import RadioDeIE  from .radiobremen import RadioBremenIE  from .radiofrance import RadioFranceIE @@ -494,6 +495,7 @@ from .tumblr import TumblrIE  from .tunein import TuneInIE  from .turbo import TurboIE  from .tutv import TutvIE +from .tv4 import TV4IE  from .tvigle import TvigleIE  from .tvp import TvpIE, TvpSeriesIE  from .tvplay import TVPlayIE @@ -615,6 +617,7 @@ from .youtube import (      YoutubeUserIE,      YoutubeWatchLaterIE,  ) +from .zapiks import ZapiksIE  from .zdf import ZDFIE, ZDFChannelIE  from .zingmp3 import (      ZingMp3SongIE, diff --git a/youtube_dl/extractor/adobetv.py b/youtube_dl/extractor/adobetv.py index 28e07f8b0..97d128560 100644 --- a/youtube_dl/extractor/adobetv.py +++ b/youtube_dl/extractor/adobetv.py @@ -28,7 +28,6 @@ class AdobeTVIE(InfoExtractor):      def _real_extract(self, url):          video_id = self._match_id(url) -          webpage = self._download_webpage(url, video_id)          player = self._parse_json( @@ -44,8 +43,10 @@ class AdobeTVIE(InfoExtractor):              self._html_search_meta('datepublished', webpage, 'upload date'))          duration = parse_duration( -            self._html_search_meta('duration', webpage, 'duration') -            or self._search_regex(r'Runtime:\s*(\d{2}:\d{2}:\d{2})', webpage, 'duration')) +            self._html_search_meta('duration', webpage, 'duration') or +            self._search_regex( +                r'Runtime:\s*(\d{2}:\d{2}:\d{2})', +                webpage, 'duration', fatal=False))          view_count = str_to_int(self._search_regex(              r'<div class="views">\s*Views?:\s*([\d,.]+)\s*</div>', diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py index 43e82847f..576f03b5b 100644 --- a/youtube_dl/extractor/appletrailers.py +++ b/youtube_dl/extractor/appletrailers.py @@ -11,8 +11,8 @@ from ..utils import (  class AppleTrailersIE(InfoExtractor): -    _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)' -    _TEST = { +    _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/(?:trailers|ca)/(?P<company>[^/]+)/(?P<movie>[^/]+)' +    _TESTS = [{          "url": "http://trailers.apple.com/trailers/wb/manofsteel/",          'info_dict': {              'id': 'manofsteel', @@ -63,7 +63,10 @@ class AppleTrailersIE(InfoExtractor):                  },              },          ] -    } +    }, { +        'url': 'http://trailers.apple.com/ca/metropole/autrui/', +        'only_matching': True, +    }]      _JSON_RE = r'iTunes.playURL\((.*?)\);' diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 08b8ad37c..79f6d199b 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -391,6 +391,16 @@ class InfoExtractor(object):              if blocked_iframe:                  msg += ' Visit %s for more details' % blocked_iframe              raise ExtractorError(msg, expected=True) +        if '<title>The URL you requested has been blocked</title>' in content[:512]: +            msg = ( +                'Access to this webpage has been blocked by Indian censorship. ' +                'Use a VPN or proxy server (with --proxy) to route around it.') +            block_msg = self._html_search_regex( +                r'</h1><p>(.*?)</p>', +                content, 'block message', default=None) +            if block_msg: +                msg += ' (Message: "%s")' % block_msg.replace('\n', ' ') +            raise ExtractorError(msg, expected=True)          return content @@ -798,8 +808,8 @@ class InfoExtractor(object):              media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')          for i, media_el in enumerate(media_nodes):              if manifest_version == '2.0': -                manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/' -                                + (media_el.attrib.get('href') or media_el.attrib.get('url'))) +                manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/' + +                                (media_el.attrib.get('href') or media_el.attrib.get('url')))              tbr = int_or_none(media_el.attrib.get('bitrate'))              formats.append({                  'format_id': '-'.join(filter(None, [f4m_id, 'f4m-%d' % (i if tbr is None else tbr)])), @@ -823,7 +833,7 @@ class InfoExtractor(object):              'url': m3u8_url,              'ext': ext,              'protocol': 'm3u8', -            'preference': -1, +            'preference': preference - 1 if preference else -1,              'resolution': 'multiple',              'format_note': 'Quality selection URL',          }] diff --git a/youtube_dl/extractor/defense.py b/youtube_dl/extractor/defense.py index 2b90bf4fc..98e3aedfd 100644 --- a/youtube_dl/extractor/defense.py +++ b/youtube_dl/extractor/defense.py @@ -25,8 +25,9 @@ class DefenseGouvFrIE(InfoExtractor):              r"flashvars.pvg_id=\"(\d+)\";",              webpage, 'ID') -        json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/' -                    + video_id) +        json_url = ( +            'http://static.videos.gouv.fr/brightcovehub/export/json/%s' % +            video_id)          info = self._download_json(json_url, title, 'Downloading JSON config')          video_url = info['renditions'][0]['url'] diff --git a/youtube_dl/extractor/escapist.py b/youtube_dl/extractor/escapist.py index 4303feccd..b49b9869f 100644 --- a/youtube_dl/extractor/escapist.py +++ b/youtube_dl/extractor/escapist.py @@ -22,6 +22,7 @@ class EscapistIE(InfoExtractor):              'uploader_id': 'the-escapist-presents',              'uploader': 'The Escapist Presents',              'title': "Breaking Down Baldur's Gate", +            'thumbnail': 're:^https?://.*\.jpg$',          }      } @@ -30,19 +31,18 @@ class EscapistIE(InfoExtractor):          webpage = self._download_webpage(url, video_id)          uploader_id = self._html_search_regex( -            r"<h1 class='headline'><a href='/videos/view/(.*?)'", +            r"<h1\s+class='headline'>\s*<a\s+href='/videos/view/(.*?)'",              webpage, 'uploader ID', fatal=False)          uploader = self._html_search_regex( -            r"<h1 class='headline'>(.*?)</a>", +            r"<h1\s+class='headline'>(.*?)</a>",              webpage, 'uploader', fatal=False)          description = self._html_search_meta('description', webpage)          raw_title = self._html_search_meta('title', webpage, fatal=True)          title = raw_title.partition(' : ')[2] -        player_url = self._og_search_video_url(webpage, name='player URL') -        config_url = compat_urllib_parse.unquote(self._search_regex( -            r'config=(.*)$', player_url, 'config URL')) +        config_url = compat_urllib_parse.unquote(self._html_search_regex( +            r'<param\s+name="flashvars"\s+value="config=([^"&]+)', webpage, 'config URL'))          formats = [] @@ -81,5 +81,4 @@ class EscapistIE(InfoExtractor):              'title': title,              'thumbnail': self._og_search_thumbnail(webpage),              'description': description, -            'player_url': player_url,          } diff --git a/youtube_dl/extractor/gdcvault.py b/youtube_dl/extractor/gdcvault.py index fed968f51..05f58f1af 100644 --- a/youtube_dl/extractor/gdcvault.py +++ b/youtube_dl/extractor/gdcvault.py @@ -7,6 +7,7 @@ from ..compat import (      compat_urllib_parse,      compat_urllib_request,  ) +from ..utils import remove_end  class GDCVaultIE(InfoExtractor): @@ -68,7 +69,9 @@ class GDCVaultIE(InfoExtractor):          akami_url = xml_description.find('./metadata/akamaiHost').text          slide_video_path = xml_description.find('./metadata/slideVideo').text          video_formats.append({ -            'url': 'rtmp://' + akami_url + '/' + slide_video_path, +            'url': 'rtmp://%s/ondemand?ovpfv=1.1' % 'fms.digitallyspeaking.com/cfx/st', +            'play_path': remove_end(slide_video_path, '.flv'), +            'ext': 'flv',              'format_note': 'slide deck video',              'quality': -2,              'preference': -2, @@ -76,7 +79,9 @@ class GDCVaultIE(InfoExtractor):          })          speaker_video_path = xml_description.find('./metadata/speakerVideo').text          video_formats.append({ -            'url': 'rtmp://' + akami_url + '/' + speaker_video_path, +            'url': 'rtmp://%s/ondemand?ovpfv=1.1' % 'fms.digitallyspeaking.com/cfx/st', +            'play_path': remove_end(speaker_video_path, '.flv'), +            'ext': 'flv',              'format_note': 'speaker video',              'quality': -1,              'preference': -1, diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 8dce96a64..875e1bf05 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -547,7 +547,16 @@ class GenericIE(InfoExtractor):                  'id': 'aanslagen-kopenhagen',                  'title': 'Aanslagen Kopenhagen | RTL Nieuws',              } -        } +        }, +        # Zapiks embed +        { +            'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html', +            'info_dict': { +                'id': '118046', +                'ext': 'mp4', +                'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !', +            } +        },      ]      def report_following_redirect(self, new_url): @@ -1098,6 +1107,12 @@ class GenericIE(InfoExtractor):          if mobj is not None:              return self.url_result(mobj.group('url'), 'Livestream') +        # Look for Zapiks embed +        mobj = re.search( +            r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage) +        if mobj is not None: +            return self.url_result(mobj.group('url'), 'Zapiks') +          def check_video(vurl):              if YoutubeIE.suitable(vurl):                  return True diff --git a/youtube_dl/extractor/imgur.py b/youtube_dl/extractor/imgur.py index b16c7aed0..fe5d95e2c 100644 --- a/youtube_dl/extractor/imgur.py +++ b/youtube_dl/extractor/imgur.py @@ -19,16 +19,16 @@ class ImgurIE(InfoExtractor):          'info_dict': {              'id': 'A61SaA1',              'ext': 'mp4', -            'title': 'MRW gifv is up and running without any bugs', -            'description': 'The Internet\'s visual storytelling community. Explore, share, and discuss the best visual stories the Internet has to offer.', +            'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$', +            'description': 're:The origin of the Internet\'s most viral images$|The Internet\'s visual storytelling community\. Explore, share, and discuss the best visual stories the Internet has to offer\.$',          },      }, {          'url': 'https://imgur.com/A61SaA1',          'info_dict': {              'id': 'A61SaA1',              'ext': 'mp4', -            'title': 'MRW gifv is up and running without any bugs', -            'description': 'The Internet\'s visual storytelling community. Explore, share, and discuss the best visual stories the Internet has to offer.', +            'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$', +            'description': 're:The origin of the Internet\'s most viral images$|The Internet\'s visual storytelling community\. Explore, share, and discuss the best visual stories the Internet has to offer\.$',          },      }] diff --git a/youtube_dl/extractor/r7.py b/youtube_dl/extractor/r7.py new file mode 100644 index 000000000..976c8feec --- /dev/null +++ b/youtube_dl/extractor/r7.py @@ -0,0 +1,88 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( +    js_to_json, +    unescapeHTML, +    int_or_none, +) + + +class R7IE(InfoExtractor): +    _VALID_URL = r'''(?x)https?:// +                        (?: +                            (?:[a-zA-Z]+)\.r7\.com(?:/[^/]+)+/idmedia/| +                            noticias\.r7\.com(?:/[^/]+)+/[^/]+-| +                            player\.r7\.com/video/i/ +                        ) +                        (?P<id>[\da-f]{24}) +                        ''' +    _TESTS = [{ +        'url': 'http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html', +        'md5': '403c4e393617e8e8ddc748978ee8efde', +        'info_dict': { +            'id': '54e7050b0cf2ff57e0279389', +            'ext': 'mp4', +            'title': 'Policiais humilham suspeito à beira da morte: "Morre com dignidade"', +            'thumbnail': 're:^https?://.*\.jpg$', +            'duration': 98, +            'like_count': int, +            'view_count': int, +        }, +    }, { +        'url': 'http://esportes.r7.com/videos/cigano-manda-recado-aos-fas/idmedia/4e176727b51a048ee6646a1b.html', +        'only_matching': True, +    }, { +        'url': 'http://noticias.r7.com/record-news/video/representante-do-instituto-sou-da-paz-fala-sobre-fim-do-estatuto-do-desarmamento-5480fc580cf2285b117f438d/', +        'only_matching': True, +    }, { +        'url': 'http://player.r7.com/video/i/54e7050b0cf2ff57e0279389?play=true&video=http://vsh.r7.com/54e7050b0cf2ff57e0279389/ER7_RE_BG_MORTE_JOVENS_570kbps_2015-02-2009f17818-cc82-4c8f-86dc-89a66934e633-ATOS_copy.mp4&linkCallback=http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html&thumbnail=http://vtb.r7.com/ER7_RE_BG_MORTE_JOVENS_570kbps_2015-02-2009f17818-cc82-4c8f-86dc-89a66934e633-thumb.jpg&idCategory=192&share=true&layout=full&full=true', +        'only_matching': True, +    }] + +    def _real_extract(self, url): +        video_id = self._match_id(url) + +        webpage = self._download_webpage( +            'http://player.r7.com/video/i/%s' % video_id, video_id) + +        item = self._parse_json(js_to_json(self._search_regex( +            r'(?s)var\s+item\s*=\s*({.+?});', webpage, 'player')), video_id) + +        title = unescapeHTML(item['title']) +        thumbnail = item.get('init', {}).get('thumbUri') +        duration = None + +        statistics = item.get('statistics', {}) +        like_count = int_or_none(statistics.get('likes')) +        view_count = int_or_none(statistics.get('views')) + +        formats = [] +        for format_key, format_dict in item['playlist'][0].items(): +            src = format_dict.get('src') +            if not src: +                continue +            format_id = format_dict.get('format') or format_key +            if duration is None: +                duration = format_dict.get('duration') +            if '.f4m' in src: +                formats.extend(self._extract_f4m_formats(src, video_id, preference=-1)) +            elif src.endswith('.m3u8'): +                formats.extend(self._extract_m3u8_formats(src, video_id, 'mp4', preference=-2)) +            else: +                formats.append({ +                    'url': src, +                    'format_id': format_id, +                }) +        self._sort_formats(formats) + +        return { +            'id': video_id, +            'title': title, +            'thumbnail': thumbnail, +            'duration': duration, +            'like_count': like_count, +            'view_count': view_count, +            'formats': formats, +        } diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index 3469d9578..e60f85b5b 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -6,6 +6,7 @@ import re  import time  from .common import InfoExtractor +from ..compat import compat_urlparse  from ..utils import (      struct_unpack,      remove_end, @@ -96,12 +97,10 @@ class RTVEALaCartaIE(InfoExtractor):              ).replace('.net.rtve', '.multimedia.cdn.rtve')              video_path = self._download_webpage(                  auth_url, video_id, 'Getting video url') -            # Use mvod.akcdn instead of flash.akamaihd.multimedia.cdn to get +            # Use mvod1.akcdn instead of flash.akamaihd.multimedia.cdn to get              # the right Content-Length header and the mp4 format -            video_url = ( -                'http://mvod.akcdn.rtve.es/{0}&v=2.6.8' -                '&fp=MAC%2016,0,0,296&r=MRUGG&g=OEOJWFXNFGCP'.format(video_path) -            ) +            video_url = compat_urlparse.urljoin( +                'http://mvod1.akcdn.rtve.es/', video_path)          return {              'id': video_id, diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py index a73da1c9c..5793dbc10 100644 --- a/youtube_dl/extractor/teamcoco.py +++ b/youtube_dl/extractor/teamcoco.py @@ -1,8 +1,10 @@  from __future__ import unicode_literals +import base64  import re  from .common import InfoExtractor +from ..utils import qualities  class TeamcocoIE(InfoExtractor): @@ -24,8 +26,8 @@ class TeamcocoIE(InfoExtractor):              'info_dict': {                  'id': '19705',                  'ext': 'mp4', -                "description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.", -                "title": "Louis C.K. Interview Pt. 1 11/3/11", +                'description': 'Louis C.K. got starstruck by George W. Bush, so what? Part one.', +                'title': 'Louis C.K. Interview Pt. 1 11/3/11',                  'age_limit': 0,              }          } @@ -42,42 +44,39 @@ class TeamcocoIE(InfoExtractor):          display_id = mobj.group('display_id')          webpage = self._download_webpage(url, display_id) -        video_id = mobj.group("video_id") +        video_id = mobj.group('video_id')          if not video_id:              video_id = self._html_search_regex(                  self._VIDEO_ID_REGEXES, webpage, 'video id') -        data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id -        data = self._download_xml( -            data_url, display_id, 'Downloading data webpage') +        embed_url = 'http://teamcoco.com/embed/v/%s' % video_id +        embed = self._download_webpage( +            embed_url, video_id, 'Downloading embed page') + +        encoded_data = self._search_regex( +            r'"preload"\s*:\s*"([^"]+)"', embed, 'encoded data') +        data = self._parse_json( +            base64.b64decode(encoded_data.encode('ascii')).decode('utf-8'), video_id) -        qualities = ['500k', '480p', '1000k', '720p', '1080p']          formats = [] -        for filed in data.findall('files/file'): -            if filed.attrib.get('playmode') == 'all': -                # it just duplicates one of the entries -                break -            file_url = filed.text -            m_format = re.search(r'(\d+(k|p))\.mp4', file_url) +        get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p']) +        for filed in data['files']: +            m_format = re.search(r'(\d+(k|p))\.mp4', filed['url'])              if m_format is not None:                  format_id = m_format.group(1)              else: -                format_id = filed.attrib['bitrate'] +                format_id = filed['bitrate']              tbr = ( -                int(filed.attrib['bitrate']) -                if filed.attrib['bitrate'].isdigit() +                int(filed['bitrate']) +                if filed['bitrate'].isdigit()                  else None) -            try: -                quality = qualities.index(format_id) -            except ValueError: -                quality = -1              formats.append({ -                'url': file_url, +                'url': filed['url'],                  'ext': 'mp4',                  'tbr': tbr,                  'format_id': format_id, -                'quality': quality, +                'quality': get_quality(format_id),              })          self._sort_formats(formats) @@ -86,8 +85,8 @@ class TeamcocoIE(InfoExtractor):              'id': video_id,              'display_id': display_id,              'formats': formats, -            'title': self._og_search_title(webpage), -            'thumbnail': self._og_search_thumbnail(webpage), -            'description': self._og_search_description(webpage), +            'title': data['title'], +            'thumbnail': data.get('thumb', {}).get('href'), +            'description': data.get('teaser'),              'age_limit': self._family_friendly_search(webpage),          } diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index 10b3b706a..59678399d 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -83,6 +83,22 @@ class TEDIE(SubtitlesInfoExtractor):          'params': {              'skip_download': True,          }, +    }, { +        # YouTube video +        'url': 'http://www.ted.com/talks/jeffrey_kluger_the_sibling_bond', +        'add_ie': ['Youtube'], +        'info_dict': { +            'id': 'aFBIPO-P7LM', +            'ext': 'mp4', +            'title': 'The hidden power of siblings: Jeff Kluger at TEDxAsheville', +            'description': 'md5:3d7a4f50d95ca5dd67104e2a20f43fe1', +            'uploader': 'TEDx Talks', +            'uploader_id': 'TEDxTalks', +            'upload_date': '20111216', +        }, +        'params': { +            'skip_download': True, +        },      }]      _NATIVE_FORMATS = { @@ -132,11 +148,16 @@ class TEDIE(SubtitlesInfoExtractor):          talk_info = self._extract_info(webpage)['talks'][0] -        if talk_info.get('external') is not None: -            self.to_screen('Found video from %s' % talk_info['external']['service']) +        external = talk_info.get('external') +        if external: +            service = external['service'] +            self.to_screen('Found video from %s' % service) +            ext_url = None +            if service.lower() == 'youtube': +                ext_url = external.get('code')              return {                  '_type': 'url', -                'url': talk_info['external']['uri'], +                'url': ext_url or external['uri'],              }          formats = [{ diff --git a/youtube_dl/extractor/tv4.py b/youtube_dl/extractor/tv4.py new file mode 100644 index 000000000..1c4b6d635 --- /dev/null +++ b/youtube_dl/extractor/tv4.py @@ -0,0 +1,100 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( +    ExtractorError, +    parse_iso8601, +) + + +class TV4IE(InfoExtractor): +    IE_DESC = 'tv4.se and tv4play.se' +    _VALID_URL = r'''(?x)https?://(?:www\.)? +        (?: +            tv4\.se/(?:[^/]+)/klipp/(?:.*)-| +            tv4play\.se/ +            (?: +                (?:program|barn)/(?:[^\?]+)\?video_id=| +                iframe/video/| +                film/| +                sport/| +            ) +        )(?P<id>[0-9]+)''' +    _TESTS = [ +        { +            'url': 'http://www.tv4.se/kalla-fakta/klipp/kalla-fakta-5-english-subtitles-2491650', +            'md5': '909d6454b87b10a25aa04c4bdd416a9b', +            'info_dict': { +                'id': '2491650', +                'ext': 'mp4', +                'title': 'Kalla Fakta 5 (english subtitles)', +                'thumbnail': 're:^https?://.*\.jpg$', +                'timestamp': int, +                'upload_date': '20131125', +            }, +        }, +        { +            'url': 'http://www.tv4play.se/iframe/video/3054113', +            'md5': '77f851c55139ffe0ebd41b6a5552489b', +            'info_dict': { +                'id': '3054113', +                'ext': 'mp4', +                'title': 'Så här jobbar ficktjuvarna - se avslöjande bilder', +                'thumbnail': 're:^https?://.*\.jpg$', +                'description': 'Unika bilder avslöjar hur turisternas fickor vittjas mitt på Stockholms central. Två experter på ficktjuvarna avslöjar knepen du ska se upp för.', +                'timestamp': int, +                'upload_date': '20150130', +            }, +        }, +        { +            'url': 'http://www.tv4play.se/sport/3060959', +            'only_matching': True, +        }, +        { +            'url': 'http://www.tv4play.se/film/2378136', +            'only_matching': True, +        }, +        { +            'url': 'http://www.tv4play.se/barn/looney-tunes?video_id=3062412', +            'only_matching': True, +        }, +    ] + +    def _real_extract(self, url): +        video_id = self._match_id(url) + +        info = self._download_json( +            'http://www.tv4play.se/player/assets/%s.json' % video_id, video_id, 'Downloading video info JSON') + +        # If is_geo_restricted is true, it doesn't neceserally mean we can't download it +        if info['is_geo_restricted']: +            self.report_warning('This content might not be available in your country due to licensing restrictions.') +        if info['requires_subscription']: +            raise ExtractorError('This content requires subscription.', expected=True) + +        sources_data = self._download_json( +            'https://prima.tv4play.se/api/web/asset/%s/play.json?protocol=http&videoFormat=MP4' % video_id, video_id, 'Downloading sources JSON') +        sources = sources_data['playback'] + +        formats = [] +        for item in sources.get('items', {}).get('item', []): +            ext, bitrate = item['mediaFormat'], item['bitrate'] +            formats.append({ +                'format_id': '%s_%s' % (ext, bitrate), +                'tbr': bitrate, +                'ext': ext, +                'url': item['url'], +            }) +        self._sort_formats(formats) + +        return { +            'id': video_id, +            'title': info['title'], +            'formats': formats, +            'description': info.get('description'), +            'timestamp': parse_iso8601(info.get('broadcast_date_time')), +            'duration': info.get('duration'), +            'thumbnail': info.get('image'), +            'is_live': sources.get('live'), +        } diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 78d287e0e..4cd2f73d9 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals  import json  import re  import itertools +import hashlib  from .common import InfoExtractor  from .subtitles import SubtitlesInfoExtractor @@ -225,6 +226,11 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):          if mobj.group('pro') or mobj.group('player'):              url = 'http://player.vimeo.com/video/' + video_id +        password = self._downloader.params.get('videopassword', None) +        if password: +            headers['Cookie'] = '%s_password=%s' % ( +                video_id, hashlib.md5(password.encode('utf-8')).hexdigest()) +          # Retrieve video webpage to extract further information          request = compat_urllib_request.Request(url, None, headers)          try: diff --git a/youtube_dl/extractor/zapiks.py b/youtube_dl/extractor/zapiks.py new file mode 100644 index 000000000..22a9a57e8 --- /dev/null +++ b/youtube_dl/extractor/zapiks.py @@ -0,0 +1,110 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( +    parse_duration, +    parse_iso8601, +    xpath_with_ns, +    xpath_text, +    int_or_none, +) + + +class ZapiksIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?zapiks\.(?:fr|com)/(?:(?:[a-z]{2}/)?(?P<display_id>.+?)\.html|index\.php\?.*\bmedia_id=(?P<id>\d+))' +    _TESTS = [ +        { +            'url': 'http://www.zapiks.fr/ep2s3-bon-appetit-eh-be-viva.html', +            'md5': 'aeb3c473b2d564b2d46d664d28d5f050', +            'info_dict': { +                'id': '80798', +                'ext': 'mp4', +                'title': 'EP2S3 - Bon Appétit - Eh bé viva les pyrénées con!', +                'description': 'md5:7054d6f6f620c6519be1fe710d4da847', +                'thumbnail': 're:^https?://.*\.jpg$', +                'duration': 528, +                'timestamp': 1359044972, +                'upload_date': '20130124', +                'view_count': int, +                'comment_count': int, +            }, +        }, +        { +            'url': 'http://www.zapiks.com/ep3s5-bon-appetit-baqueira-m-1.html', +            'only_matching': True, +        }, +        { +            'url': 'http://www.zapiks.com/nl/ep3s5-bon-appetit-baqueira-m-1.html', +            'only_matching': True, +        }, +        { +            'url': 'http://www.zapiks.fr/index.php?action=playerIframe&media_id=118046&width=640&height=360&autoStart=false&language=fr', +            'only_matching': True, +        }, +    ] + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') +        display_id = mobj.group('display_id') or video_id + +        webpage = self._download_webpage(url, display_id) + +        if not video_id: +            video_id = self._search_regex( +                r'data-media-id="(\d+)"', webpage, 'video id') + +        playlist = self._download_xml( +            'http://www.zapiks.fr/view/index.php?action=playlist&media_id=%s&lang=en' % video_id, +            display_id) + +        NS_MAP = { +            'jwplayer': 'http://rss.jwpcdn.com/' +        } + +        def ns(path): +            return xpath_with_ns(path, NS_MAP) + +        item = playlist.find('./channel/item') + +        title = xpath_text(item, 'title', 'title') or self._og_search_title(webpage) +        description = self._og_search_description(webpage, default=None) +        thumbnail = xpath_text( +            item, ns('./jwplayer:image'), 'thumbnail') or self._og_search_thumbnail(webpage, default=None) +        duration = parse_duration(self._html_search_meta( +            'duration', webpage, 'duration', default=None)) +        timestamp = parse_iso8601(self._html_search_meta( +            'uploadDate', webpage, 'upload date', default=None), ' ') + +        view_count = int_or_none(self._search_regex( +            r'UserPlays:(\d+)', webpage, 'view count', default=None)) +        comment_count = int_or_none(self._search_regex( +            r'UserComments:(\d+)', webpage, 'comment count', default=None)) + +        formats = [] +        for source in item.findall(ns('./jwplayer:source')): +            format_id = source.attrib['label'] +            f = { +                'url': source.attrib['file'], +                'format_id': format_id, +            } +            m = re.search(r'^(?P<height>\d+)[pP]', format_id) +            if m: +                f['height'] = int(m.group('height')) +            formats.append(f) +        self._sort_formats(formats) + +        return { +            'id': video_id, +            'title': title, +            'description': description, +            'thumbnail': thumbnail, +            'duration': duration, +            'timestamp': timestamp, +            'view_count': view_count, +            'comment_count': comment_count, +            'formats': formats, +        } diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 238b6556b..475fad3c9 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -900,8 +900,8 @@ def _windows_write_string(s, out):      def not_a_console(handle):          if handle == INVALID_HANDLE_VALUE or handle is None:              return True -        return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR -                or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0) +        return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR or +                GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)      if not_a_console(h):          return False diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 537e8cf60..7c8b29c3b 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@  from __future__ import unicode_literals -__version__ = '2015.02.19.3' +__version__ = '2015.02.21' | 
