diff options
38 files changed, 595 insertions, 178 deletions
| diff --git a/.gitignore b/.gitignore index 86312d4e4..0422adf44 100644 --- a/.gitignore +++ b/.gitignore @@ -31,3 +31,5 @@ updates_key.pem  test/testdata  .tox  youtube-dl.zsh +.idea +.idea/*
\ No newline at end of file diff --git a/.travis.yml b/.travis.yml index c6cc7a994..f14014414 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,7 +9,6 @@ notifications:    email:      - filippo.valsorda@gmail.com      - phihag@phihag.de -    - jaime.marquinez.ferrandiz+travis@gmail.com      - yasoob.khld@gmail.com  #  irc:  #    channels: @@ -98,3 +98,5 @@ Will Glynn  Max Reimann  Cédric Luthi  Thijs Vermeir +Joel Leclerc +Christopher Krooss @@ -46,7 +46,7 @@ test:  ot: offlinetest  offlinetest: codetest -	nosetests --verbose test --exclude test_download --exclude test_age_restriction --exclude test_subtitles --exclude test_write_annotations +	nosetests --verbose test --exclude test_download --exclude test_age_restriction --exclude test_subtitles --exclude test_write_annotations --exclude test_youtube_lists  tar: youtube-dl.tar.gz diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index f8e4f930e..730f7ec26 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -218,7 +218,7 @@ class TestFormatSelection(unittest.TestCase):              # 3D              '85', '84', '102', '83', '101', '82', '100',              # Dash video -            '138', '137', '248', '136', '247', '135', '246', +            '137', '248', '136', '247', '135', '246',              '245', '244', '134', '243', '133', '242', '160',              # Dash audio              '141', '172', '140', '171', '139', diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index e2b823f66..806e7b239 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1333,7 +1333,9 @@ class YoutubeDL(object):          formats = info_dict.get('formats', [info_dict])          idlen = max(len('format code'),                      max(len(f['format_id']) for f in formats)) -        formats_s = [line(f, idlen) for f in formats] +        formats_s = [ +            line(f, idlen) for f in formats +            if f.get('preference') is None or f['preference'] >= -1000]          if len(formats) > 1:              formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'              formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)' diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 5bb0f3cfd..aa58b52ab 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -11,7 +11,6 @@ from ..compat import (      compat_urllib_request,  )  from ..utils import ( -    check_executable,      encodeFilename,  ) @@ -27,16 +26,13 @@ class HlsFD(FileDownloader):              '-bsf:a', 'aac_adtstoasc',              encodeFilename(tmpfilename, for_subprocess=True)] -        for program in ['avconv', 'ffmpeg']: -            if check_executable(program, ['-version']): -                break -        else: +        ffpp = FFmpegPostProcessor(downloader=self) +        program = ffpp._executable +        if program is None:              self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')              return False -        cmd = [program] + args - -        ffpp = FFmpegPostProcessor(downloader=self)          ffpp.check_version() +        cmd = [program] + args          retval = subprocess.call(cmd)          if retval == 0: diff --git a/youtube_dl/downloader/mplayer.py b/youtube_dl/downloader/mplayer.py index c53195da0..72cef30ea 100644 --- a/youtube_dl/downloader/mplayer.py +++ b/youtube_dl/downloader/mplayer.py @@ -4,8 +4,8 @@ import os  import subprocess  from .common import FileDownloader -from ..compat import compat_subprocess_get_DEVNULL  from ..utils import ( +    check_executable,      encodeFilename,  ) @@ -20,11 +20,7 @@ class MplayerFD(FileDownloader):              'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy',              '-dumpstream', '-dumpfile', tmpfilename, url]          # Check for mplayer first -        try: -            subprocess.call( -                ['mplayer', '-h'], -                stdout=compat_subprocess_get_DEVNULL(), stderr=subprocess.STDOUT) -        except (OSError, IOError): +        if not check_executable('mplayer', ['-h']):              self.report_error('MMS or RTSP download detected but "%s" could not be run' % args[0])              return False diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 9ccd1b32e..b523e9644 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -159,6 +159,7 @@ from .gametrailers import GametrailersIE  from .gdcvault import GDCVaultIE  from .generic import GenericIE  from .giantbomb import GiantBombIE +from .giga import GigaIE  from .glide import GlideIE  from .globo import GloboIE  from .godtube import GodTubeIE @@ -325,6 +326,7 @@ from .prosiebensat1 import ProSiebenSat1IE  from .pyvideo import PyvideoIE  from .quickvid import QuickVidIE  from .radiode import RadioDeIE +from .radiobremen import RadioBremenIE  from .radiofrance import RadioFranceIE  from .rai import RaiIE  from .rbmaradio import RBMARadioIE @@ -345,6 +347,7 @@ from .ruhd import RUHDIE  from .rutube import (      RutubeIE,      RutubeChannelIE, +    RutubeEmbedIE,      RutubeMovieIE,      RutubePersonIE,  ) @@ -510,6 +513,7 @@ from .wdr import (      WDRMobileIE,      WDRMausIE,  ) +from .webofstories import WebOfStoriesIE  from .weibo import WeiboIE  from .wimp import WimpIE  from .wistia import WistiaIE @@ -545,7 +549,6 @@ from .youtube import (      YoutubeSearchURLIE,      YoutubeShowIE,      YoutubeSubscriptionsIE, -    YoutubeTopListIE,      YoutubeTruncatedIDIE,      YoutubeTruncatedURLIE,      YoutubeUserIE, diff --git a/youtube_dl/extractor/auengine.py b/youtube_dl/extractor/auengine.py index 014a21952..a1b666be0 100644 --- a/youtube_dl/extractor/auengine.py +++ b/youtube_dl/extractor/auengine.py @@ -7,6 +7,7 @@ from ..compat import compat_urllib_parse  from ..utils import (      determine_ext,      ExtractorError, +    remove_end,  ) @@ -27,23 +28,18 @@ class AUEngineIE(InfoExtractor):          video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id) -        title = self._html_search_regex(r'<title>(?P<title>.+?)</title>', webpage, 'title') -        title = title.strip() -        links = re.findall(r'\s(?:file|url):\s*["\']([^\'"]+)["\']', webpage) -        links = map(compat_urllib_parse.unquote, links) - -        thumbnail = None -        video_url = None -        for link in links: -            if link.endswith('.png'): -                thumbnail = link -            elif '/videos/' in link: -                video_url = link +        title = self._html_search_regex( +            r'<title>\s*(?P<title>.+?)\s*</title>', webpage, 'title') +        video_urls = re.findall(r'http://\w+.auengine.com/vod/.*[^\W]', webpage) +        video_url = compat_urllib_parse.unquote(video_urls[0]) +        thumbnails = re.findall(r'http://\w+.auengine.com/thumb/.*[^\W]', webpage) +        thumbnail = compat_urllib_parse.unquote(thumbnails[0]) +          if not video_url:              raise ExtractorError('Could not find video URL') +          ext = '.' + determine_ext(video_url) -        if ext == title[-len(ext):]: -            title = title[:-len(ext)] +        title = remove_end(title, ext)          return {              'id': video_id, diff --git a/youtube_dl/extractor/bbccouk.py b/youtube_dl/extractor/bbccouk.py index 73fe66b01..1cf48fe0d 100644 --- a/youtube_dl/extractor/bbccouk.py +++ b/youtube_dl/extractor/bbccouk.py @@ -10,7 +10,7 @@ from ..compat import compat_HTTPError  class BBCCoUkIE(SubtitlesInfoExtractor):      IE_NAME = 'bbc.co.uk'      IE_DESC = 'BBC iPlayer' -    _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:programmes|iplayer/(?:episode|playlist))/(?P<id>[\da-z]{8})' +    _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'      _TESTS = [          { @@ -18,8 +18,8 @@ class BBCCoUkIE(SubtitlesInfoExtractor):              'info_dict': {                  'id': 'b039d07m',                  'ext': 'flv', -                'title': 'Kaleidoscope: Leonard Cohen', -                'description': 'md5:db4755d7a665ae72343779f7dacb402c', +                'title': 'Kaleidoscope, Leonard Cohen', +                'description': 'The Canadian poet and songwriter reflects on his musical career.',                  'duration': 1740,              },              'params': { @@ -85,8 +85,39 @@ class BBCCoUkIE(SubtitlesInfoExtractor):                  'skip_download': True,              }          }, { +            'url': 'http://www.bbc.co.uk/music/clips/p02frcc3', +            'note': 'Audio', +            'info_dict': { +                'id': 'p02frcch', +                'ext': 'flv', +                'title': 'Pete Tong, Past, Present and Future Special, Madeon - After Hours mix', +                'description': 'French house superstar Madeon takes us out of the club and onto the after party.', +                'duration': 3507, +            }, +            'params': { +                # rtmp download +                'skip_download': True, +            } +        }, { +            'url': 'http://www.bbc.co.uk/music/clips/p025c0zz', +            'note': 'Video', +            'info_dict': { +                'id': 'p025c103', +                'ext': 'flv', +                'title': 'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)', +                'description': 'Rae Morris performs Closer for BBC Three at Reading 2014', +                'duration': 226, +            }, +            'params': { +                # rtmp download +                'skip_download': True, +            } +        }, {              'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',              'only_matching': True, +        }, { +            'url': 'http://www.bbc.co.uk/music/clips#p02frcc3', +            'only_matching': True,          }      ] diff --git a/youtube_dl/extractor/bet.py b/youtube_dl/extractor/bet.py index 003e50002..4fbdd6f1c 100644 --- a/youtube_dl/extractor/bet.py +++ b/youtube_dl/extractor/bet.py @@ -16,7 +16,7 @@ class BetIE(InfoExtractor):          {              'url': 'http://www.bet.com/news/politics/2014/12/08/in-bet-exclusive-obama-talks-race-and-racism.html',              'info_dict': { -                'id': '417cd61c-c793-4e8e-b006-e445ecc45add', +                'id': '406429c6-1b8a-463e-83fc-814adb81a9db',                  'display_id': 'in-bet-exclusive-obama-talks-race-and-racism',                  'ext': 'flv',                  'title': 'BET News Presents: A Conversation With President Obama', diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py index 9873728df..11d18d74a 100644 --- a/youtube_dl/extractor/canalplus.py +++ b/youtube_dl/extractor/canalplus.py @@ -5,6 +5,8 @@ import re  from .common import InfoExtractor  from ..utils import ( +    ExtractorError, +    HEADRequest,      unified_strdate,      url_basename,      qualities, @@ -76,6 +78,16 @@ class CanalplusIE(InfoExtractor):          preference = qualities(['MOBILE', 'BAS_DEBIT', 'HAUT_DEBIT', 'HD', 'HLS', 'HDS']) +        fmt_url = next(iter(media.find('VIDEOS'))).text +        if '/geo' in fmt_url.lower(): +            response = self._request_webpage( +                HEADRequest(fmt_url), video_id, +                'Checking if the video is georestricted') +            if '/blocage' in response.geturl(): +                raise ExtractorError( +                    'The video is not available in your country', +                    expected=True) +          formats = []          for fmt in media.find('VIDEOS'):              format_url = fmt.text diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 6e264f687..562e656e0 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -92,6 +92,8 @@ class InfoExtractor(object):                                   by this field, regardless of all other values.                                   -1 for default (order by other properties),                                   -2 or smaller for less than default. +                                 < -1000 to hide the format (if there is +                                    another one which is strictly better)                      * language_preference  Is this in the correct requested                                   language?                                   10 if it's what the URL is about, diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 354046a9e..1680f532f 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -228,7 +228,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text          video_thumbnail = self._search_regex(r'<episode_image_url>([^<]+)', playerdata, 'thumbnail', fatal=False)          formats = [] -        for fmt in re.findall(r'\?p([0-9]{3,4})=1', webpage): +        for fmt in re.findall(r'showmedia\.([0-9]{3,4})p', webpage):              stream_quality, stream_format = self._FORMAT_IDS[fmt]              video_format = fmt + 'p'              streamdata_req = compat_urllib_request.Request('http://www.crunchyroll.com/xml/') diff --git a/youtube_dl/extractor/ellentv.py b/youtube_dl/extractor/ellentv.py index 3e7923648..fc92ff825 100644 --- a/youtube_dl/extractor/ellentv.py +++ b/youtube_dl/extractor/ellentv.py @@ -1,7 +1,6 @@  # coding: utf-8  from __future__ import unicode_literals -import re  import json  from .common import InfoExtractor @@ -12,32 +11,49 @@ from ..utils import (  class EllenTVIE(InfoExtractor): -    _VALID_URL = r'https?://(?:www\.)?ellentv\.com/videos/(?P<id>[a-z0-9_-]+)' -    _TEST = { +    _VALID_URL = r'https?://(?:www\.)?(?:ellentv|ellentube)\.com/videos/(?P<id>[a-z0-9_-]+)' +    _TESTS = [{          'url': 'http://www.ellentv.com/videos/0-7jqrsr18/',          'md5': 'e4af06f3bf0d5f471921a18db5764642',          'info_dict': {              'id': '0-7jqrsr18',              'ext': 'mp4',              'title': 'What\'s Wrong with These Photos? A Whole Lot', +            'description': 'md5:35f152dc66b587cf13e6d2cf4fa467f6',              'timestamp': 1406876400,              'upload_date': '20140801',          } -    } +    }, { +        'url': 'http://ellentube.com/videos/0-dvzmabd5/', +        'md5': '98238118eaa2bbdf6ad7f708e3e4f4eb', +        'info_dict': { +            'id': '0-dvzmabd5', +            'ext': 'mp4', +            'title': '1 year old twin sister makes her brother laugh', +            'description': '1 year old twin sister makes her brother laugh', +            'timestamp': 1419542075, +            'upload_date': '20141225', +        } +    }]      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') +        video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id) +        video_url = self._html_search_meta('VideoURL', webpage, 'url') +        title = self._og_search_title(webpage, default=None) or self._search_regex( +            r'pageName\s*=\s*"([^"]+)"', webpage, 'title') +        description = self._html_search_meta( +            'description', webpage, 'description') or self._og_search_description(webpage)          timestamp = parse_iso8601(self._search_regex(              r'<span class="publish-date"><time datetime="([^"]+)">',              webpage, 'timestamp'))          return {              'id': video_id, -            'title': self._og_search_title(webpage), -            'url': self._html_search_meta('VideoURL', webpage, 'url'), +            'url': video_url, +            'title': title, +            'description': description,              'timestamp': timestamp,          } @@ -55,8 +71,7 @@ class EllenTVClipsIE(InfoExtractor):      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        playlist_id = mobj.group('id') +        playlist_id = self._match_id(url)          webpage = self._download_webpage(url, playlist_id)          playlist = self._extract_playlist(webpage) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 493afb57d..7a5bf9392 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -131,12 +131,13 @@ class GenericIE(InfoExtractor):          # ooyala video          {              'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219', -            'md5': '5644c6ca5d5782c1d0d350dad9bd840c', +            'md5': '166dd577b433b4d4ebfee10b0824d8ff',              'info_dict': {                  'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',                  'ext': 'mp4',                  'title': '2cc213299525360.mov',  # that's what we get              }, +            'add_ie': ['Ooyala'],          },          # google redirect          { @@ -146,7 +147,7 @@ class GenericIE(InfoExtractor):                  'ext': 'mp4',                  'upload_date': '20130224',                  'uploader_id': 'TheVerge', -                'description': 'Chris Ziegler takes a look at the Alcatel OneTouch Fire and the ZTE Open; two of the first Firefox OS handsets to be officially announced.', +                'description': 're:^Chris Ziegler takes a look at the\.*',                  'uploader': 'The Verge',                  'title': 'First Firefox OS phones side-by-side',              }, @@ -925,7 +926,7 @@ class GenericIE(InfoExtractor):          # Look for embedded TED player          mobj = re.search( -            r'<iframe[^>]+?src=(["\'])(?P<url>http://embed\.ted\.com/.+?)\1', webpage) +            r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)          if mobj is not None:              return self.url_result(mobj.group('url'), 'TED') diff --git a/youtube_dl/extractor/giga.py b/youtube_dl/extractor/giga.py new file mode 100644 index 000000000..775890112 --- /dev/null +++ b/youtube_dl/extractor/giga.py @@ -0,0 +1,101 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import itertools + +from .common import InfoExtractor +from ..utils import ( +    qualities, +    compat_str, +    parse_duration, +    parse_iso8601, +    str_to_int, +) + + +class GigaIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?giga\.de/(?:[^/]+/)*(?P<id>[^/]+)' +    _TESTS = [{ +        'url': 'http://www.giga.de/filme/anime-awesome/trailer/anime-awesome-chihiros-reise-ins-zauberland-das-beste-kommt-zum-schluss/', +        'md5': '6bc5535e945e724640664632055a584f', +        'info_dict': { +            'id': '2622086', +            'display_id': 'anime-awesome-chihiros-reise-ins-zauberland-das-beste-kommt-zum-schluss', +            'ext': 'mp4', +            'title': 'Anime Awesome: Chihiros Reise ins Zauberland – Das Beste kommt zum Schluss', +            'description': 'md5:afdf5862241aded4718a30dff6a57baf', +            'thumbnail': 're:^https?://.*\.jpg$', +            'duration': 578, +            'timestamp': 1414749706, +            'upload_date': '20141031', +            'uploader': 'Robin Schweiger', +            'view_count': int, +        }, +    }, { +        'url': 'http://www.giga.de/games/channel/giga-top-montag/giga-topmontag-die-besten-serien-2014/', +        'only_matching': True, +    }, { +        'url': 'http://www.giga.de/extra/netzkultur/videos/giga-games-tom-mats-robin-werden-eigene-wege-gehen-eine-ankuendigung/', +        'only_matching': True, +    }, { +        'url': 'http://www.giga.de/tv/jonas-liest-spieletitel-eingedeutscht-episode-2/', +        'only_matching': True, +    }] + +    def _real_extract(self, url): +        display_id = self._match_id(url) + +        webpage = self._download_webpage(url, display_id) + +        video_id = self._search_regex( +            [r'data-video-id="(\d+)"', r'/api/video/jwplayer/#v=(\d+)'], +            webpage, 'video id') + +        playlist = self._download_json( +            'http://www.giga.de/api/syndication/video/video_id/%s/playlist.json?content=syndication/key/368b5f151da4ae05ced7fa296bdff65a/' +            % video_id, video_id)[0] + +        quality = qualities(['normal', 'hd720']) + +        formats = [] +        for format_id in itertools.count(0): +            fmt = playlist.get(compat_str(format_id)) +            if not fmt: +                break +            formats.append({ +                'url': fmt['src'], +                'format_id': '%s-%s' % (fmt['quality'], fmt['type'].split('/')[-1]), +                'quality': quality(fmt['quality']), +            }) +        self._sort_formats(formats) + +        title = self._html_search_meta( +            'title', webpage, 'title', fatal=True) +        description = self._html_search_meta( +            'description', webpage, 'description') +        thumbnail = self._og_search_thumbnail(webpage) + +        duration = parse_duration(self._search_regex( +            r'(?s)(?:data-video-id="{0}"|data-video="[^"]*/api/video/jwplayer/#v={0}[^"]*")[^>]*>.+?<span class="duration">([^<]+)</span>'.format(video_id), +            webpage, 'duration', fatal=False)) + +        timestamp = parse_iso8601(self._search_regex( +            r'datetime="([^"]+)"', webpage, 'upload date', fatal=False)) +        uploader = self._search_regex( +            r'class="author">([^<]+)</a>', webpage, 'uploader', fatal=False) + +        view_count = str_to_int(self._search_regex( +            r'<span class="views"><strong>([\d.]+)</strong>', webpage, 'view count', fatal=False)) + +        return { +            'id': video_id, +            'display_id': display_id, +            'title': title, +            'description': description, +            'thumbnail': thumbnail, +            'duration': duration, +            'timestamp': timestamp, +            'uploader': uploader, +            'view_count': view_count, +            'formats': formats, +        } diff --git a/youtube_dl/extractor/khanacademy.py b/youtube_dl/extractor/khanacademy.py index 408d00944..08a671fa8 100644 --- a/youtube_dl/extractor/khanacademy.py +++ b/youtube_dl/extractor/khanacademy.py @@ -22,8 +22,10 @@ class KhanAcademyIE(InfoExtractor):              'description': 'The perfect cipher',              'duration': 176,              'uploader': 'Brit Cruise', +            'uploader_id': 'khanacademy',              'upload_date': '20120411', -        } +        }, +        'add_ie': ['Youtube'],      }, {          'url': 'https://www.khanacademy.org/math/applied-math/cryptography',          'info_dict': { diff --git a/youtube_dl/extractor/kontrtube.py b/youtube_dl/extractor/kontrtube.py index 41fd62009..720bc939b 100644 --- a/youtube_dl/extractor/kontrtube.py +++ b/youtube_dl/extractor/kontrtube.py @@ -10,13 +10,14 @@ from ..utils import int_or_none  class KontrTubeIE(InfoExtractor):      IE_NAME = 'kontrtube'      IE_DESC = 'KontrTube.ru - Труба зовёт' -    _VALID_URL = r'http://(?:www\.)?kontrtube\.ru/videos/(?P<id>\d+)/.+' +    _VALID_URL = r'http://(?:www\.)?kontrtube\.ru/videos/(?P<id>\d+)/(?P<display_id>[^/]+)/'      _TEST = {          'url': 'http://www.kontrtube.ru/videos/2678/nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag/',          'md5': '975a991a4926c9a85f383a736a2e6b80',          'info_dict': {              'id': '2678', +            'display_id': 'nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag',              'ext': 'mp4',              'title': 'Над олимпийской деревней в Сочи поднят российский флаг',              'description': 'md5:80edc4c613d5887ae8ccf1d59432be41', @@ -28,21 +29,28 @@ class KontrTubeIE(InfoExtractor):      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url)          video_id = mobj.group('id') +        display_id = mobj.group('display_id') -        webpage = self._download_webpage(url, video_id, 'Downloading page') +        webpage = self._download_webpage( +            url, display_id, 'Downloading page') -        video_url = self._html_search_regex(r"video_url: '(.+?)/?',", webpage, 'video URL') -        thumbnail = self._html_search_regex(r"preview_url: '(.+?)/?',", webpage, 'video thumbnail', fatal=False) +        video_url = self._html_search_regex( +            r"video_url\s*:\s*'(.+?)/?',", webpage, 'video URL') +        thumbnail = self._html_search_regex( +            r"preview_url\s*:\s*'(.+?)/?',", webpage, 'video thumbnail', fatal=False)          title = self._html_search_regex(              r'<title>(.+?)</title>', webpage, 'video title') -        description = self._html_search_meta('description', webpage, 'video description') +        description = self._html_search_meta( +            'description', webpage, 'video description')          mobj = re.search( -            r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>', webpage) +            r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>', +            webpage)          duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None          view_count = self._html_search_regex( -            r'<div class="col_2">Просмотров: <span>(\d+)</span></div>', webpage, 'view count', fatal=False) +            r'<div class="col_2">Просмотров: <span>(\d+)</span></div>', +            webpage, 'view count', fatal=False)          comment_count = None          comment_str = self._html_search_regex( @@ -56,6 +64,7 @@ class KontrTubeIE(InfoExtractor):          return {              'id': video_id, +            'display_id': display_id,              'url': video_url,              'thumbnail': thumbnail,              'title': title, diff --git a/youtube_dl/extractor/lrt.py b/youtube_dl/extractor/lrt.py index d72d470aa..9c2fbdd96 100644 --- a/youtube_dl/extractor/lrt.py +++ b/youtube_dl/extractor/lrt.py @@ -2,7 +2,6 @@  from __future__ import unicode_literals  import re -import json  from .common import InfoExtractor  from ..utils import ( @@ -28,7 +27,6 @@ class LRTIE(InfoExtractor):          'params': {              'skip_download': True,  # HLS download          }, -      }      def _real_extract(self, url): @@ -44,7 +42,9 @@ class LRTIE(InfoExtractor):          formats = []          for js in re.findall(r'(?s)config:\s*(\{.*?\})', webpage): -            data = json.loads(js_to_json(js)) +            data = self._parse_json(js, video_id, transform_source=js_to_json) +            if 'provider' not in data: +                continue              if data['provider'] == 'rtmp':                  formats.append({                      'format_id': 'rtmp', diff --git a/youtube_dl/extractor/motorsport.py b/youtube_dl/extractor/motorsport.py index f5ca74e97..c1a482dba 100644 --- a/youtube_dl/extractor/motorsport.py +++ b/youtube_dl/extractor/motorsport.py @@ -1,63 +1,49 @@  # coding: utf-8  from __future__ import unicode_literals -import hashlib -import json -import time -  from .common import InfoExtractor  from ..compat import ( -    compat_parse_qs, -    compat_str, -) -from ..utils import ( -    int_or_none, +    compat_urlparse,  )  class MotorsportIE(InfoExtractor):      IE_DESC = 'motorsport.com' -    _VALID_URL = r'http://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/(?:$|[?#])' +    _VALID_URL = r'http://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/?(?:$|[?#])'      _TEST = {          'url': 'http://www.motorsport.com/f1/video/main-gallery/red-bull-racing-2014-rules-explained/', -        'md5': '5592cb7c5005d9b2c163df5ac3dc04e4',          'info_dict': { -            'id': '7063', +            'id': '2-T3WuR-KMM',              'ext': 'mp4',              'title': 'Red Bull Racing: 2014 Rules Explained', -            'duration': 207, +            'duration': 208,              'description': 'A new clip from Red Bull sees Daniel Ricciardo and Sebastian Vettel explain the 2014 Formula One regulations – which are arguably the most complex the sport has ever seen.', -            'uploader': 'rainiere', -            'thumbnail': r're:^http://.*motorsport\.com/.+\.jpg$' -        } +            'uploader': 'mcomstaff', +            'uploader_id': 'UC334JIYKkVnyFoNCclfZtHQ', +            'upload_date': '20140903', +            'thumbnail': r're:^https?://.+\.jpg$' +        }, +        'add_ie': ['Youtube'], +        'params': { +            'skip_download': True, +        },      }      def _real_extract(self, url):          display_id = self._match_id(url)          webpage = self._download_webpage(url, display_id) -        flashvars_code = self._html_search_regex( -            r'<embed id="player".*?flashvars="([^"]+)"', webpage, 'flashvars') -        flashvars = compat_parse_qs(flashvars_code) -        params = json.loads(flashvars['parameters'][0]) - -        e = compat_str(int(time.time()) + 24 * 60 * 60) -        base_video_url = params['location'] + '?e=' + e -        s = 'h3hg713fh32' -        h = hashlib.md5((s + base_video_url).encode('utf-8')).hexdigest() -        video_url = base_video_url + '&h=' + h - -        uploader = self._html_search_regex( -            r'(?s)<span class="label">Video by: </span>(.*?)</a>', webpage, -            'uploader', fatal=False) +        iframe_path = self._html_search_regex( +            r'<iframe id="player_iframe"[^>]+src="([^"]+)"', webpage, +            'iframe path') +        iframe = self._download_webpage( +            compat_urlparse.urljoin(url, iframe_path), display_id, +            'Downloading iframe') +        youtube_id = self._search_regex( +            r'www.youtube.com/embed/(.{11})', iframe, 'youtube id')          return { -            'id': params['video_id'], +            '_type': 'url_transparent',              'display_id': display_id, -            'title': params['title'], -            'url': video_url, -            'description': params.get('description'), -            'thumbnail': params.get('main_thumb'), -            'duration': int_or_none(params.get('duration')), -            'uploader': uploader, +            'url': 'https://youtube.com/watch?v=%s' % youtube_id,          } diff --git a/youtube_dl/extractor/normalboots.py b/youtube_dl/extractor/normalboots.py index 3d35b11ac..c13ff0d65 100644 --- a/youtube_dl/extractor/normalboots.py +++ b/youtube_dl/extractor/normalboots.py @@ -22,7 +22,11 @@ class NormalbootsIE(InfoExtractor):              'description': 'Jon is late for Christmas. Typical. Thanks to: Paul Ritchey for Co-Writing/Filming: http://www.youtube.com/user/ContinueShow Michael Azzi for Christmas Intro Animation: http://michafrar.tumblr.com/ Jerrod Waters for Christmas Intro Music: http://www.youtube.com/user/xXJerryTerryXx Casey Ormond for ‘Tense Battle Theme’:\xa0http://www.youtube.com/Kiamet/',              'uploader': 'JonTron',              'upload_date': '20140125', -        } +        }, +        'params': { +            # rtmp download +            'skip_download': True, +        },      }      def _real_extract(self, url): diff --git a/youtube_dl/extractor/played.py b/youtube_dl/extractor/played.py index 449d4836c..45716c75d 100644 --- a/youtube_dl/extractor/played.py +++ b/youtube_dl/extractor/played.py @@ -26,6 +26,7 @@ class PlayedIE(InfoExtractor):              'ext': 'flv',              'title': 'youtube-dl_test_video.mp4',          }, +        'skip': 'Removed for copyright infringement.',  # oh wow      }      def _real_extract(self, url): diff --git a/youtube_dl/extractor/radiobremen.py b/youtube_dl/extractor/radiobremen.py new file mode 100644 index 000000000..0d706312e --- /dev/null +++ b/youtube_dl/extractor/radiobremen.py @@ -0,0 +1,63 @@ +# -*- coding: utf-8 -*- + +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import parse_duration + + +class RadioBremenIE(InfoExtractor): +    _VALID_URL = r'http?://(?:www\.)?radiobremen\.de/mediathek/(?:index\.html)?\?id=(?P<id>[0-9]+)' +    IE_NAME = 'radiobremen' + +    _TEST = { +        'url': 'http://www.radiobremen.de/mediathek/index.html?id=114720', +        'info_dict': { +            'id': '114720', +            'ext': 'mp4', +            'duration': 1685, +            'width': 512, +            'title': 'buten un binnen vom 22. Dezember', +            'thumbnail': 're:https?://.*\.jpg$', +            'description': 'Unter anderem mit diesen Themen: 45 Flüchtlinge sind in Worpswede angekommen +++ Freies Internet für alle: Bremer arbeiten an einem flächendeckenden W-Lan-Netzwerk +++ Aktivisten kämpfen für das Unibad +++ So war das Wetter 2014 +++', +        }, +    } + +    def _real_extract(self, url): +        video_id = self._match_id(url) + +        meta_url = "http://www.radiobremen.de/apps/php/mediathek/metadaten.php?id=%s" % video_id +        meta_doc = self._download_webpage( +            meta_url, video_id, 'Downloading metadata') +        title = self._html_search_regex( +            r"<h1.*>(?P<title>.+)</h1>", meta_doc, "title") +        description = self._html_search_regex( +            r"<p>(?P<description>.*)</p>", meta_doc, "description", fatal=False) +        duration = parse_duration(self._html_search_regex( +            r"Länge:</td>\s+<td>(?P<duration>[0-9]+:[0-9]+)</td>", +            meta_doc, "duration", fatal=False)) + +        page_doc = self._download_webpage( +            url, video_id, 'Downloading video information') +        mobj = re.search( +            r"ardformatplayerclassic\(\'playerbereich\',\'(?P<width>[0-9]+)\',\'.*\',\'(?P<video_id>[0-9]+)\',\'(?P<secret>[0-9]+)\',\'(?P<thumbnail>.+)\',\'\'\)", +            page_doc) +        video_url = ( +            "http://dl-ondemand.radiobremen.de/mediabase/%s/%s_%s_%s.mp4" % +            (video_id, video_id, mobj.group("secret"), mobj.group('width'))) + +        formats = [{ +            'url': video_url, +            'ext': 'mp4', +            'width': int(mobj.group("width")), +        }] +        return { +            'id': video_id, +            'title': title, +            'description': description, +            'duration': duration, +            'formats': formats, +            'thumbnail': mobj.group('thumbnail'), +        } diff --git a/youtube_dl/extractor/rtlnl.py b/youtube_dl/extractor/rtlnl.py index d029b0ec5..a3ca79f2c 100644 --- a/youtube_dl/extractor/rtlnl.py +++ b/youtube_dl/extractor/rtlnl.py @@ -8,7 +8,7 @@ from ..utils import parse_duration  class RtlXlIE(InfoExtractor):      IE_NAME = 'rtlxl.nl' -    _VALID_URL = r'https?://www\.rtlxl\.nl/#!/[^/]+/(?P<uuid>[^/?]+)' +    _VALID_URL = r'https?://(www\.)?rtlxl\.nl/#!/[^/]+/(?P<uuid>[^/?]+)'      _TEST = {          'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/6e4203a6-0a5e-3596-8424-c599a59e0677', diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py index b72b5a586..5b1c3577a 100644 --- a/youtube_dl/extractor/rutube.py +++ b/youtube_dl/extractor/rutube.py @@ -70,6 +70,37 @@ class RutubeIE(InfoExtractor):          } +class RutubeEmbedIE(InfoExtractor): +    IE_NAME = 'rutube:embed' +    IE_DESC = 'Rutube embedded videos' +    _VALID_URL = 'https?://rutube\.ru/video/embed/(?P<id>[0-9]+)' + +    _TEST = { +        'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=', +        'info_dict': { +            'id': 'a10e53b86e8f349080f718582ce4c661', +            'ext': 'mp4', +            'upload_date': '20131223', +            'uploader_id': '297833', +            'description': 'Видео группы ★http://vk.com/foxkidsreset★ музей Fox Kids и Jetix<br/><br/> восстановлено и сделано в шикоформате subziro89 http://vk.com/subziro89', +            'uploader': 'subziro89 ILya', +            'title': 'Мистический городок Эйри в Индиан 5 серия озвучка subziro89', +        }, +        'params': { +            'skip_download': 'Requires ffmpeg', +        }, +    } + +    def _real_extract(self, url): +        embed_id = self._match_id(url) +        webpage = self._download_webpage(url, embed_id) + +        canonical_url = self._html_search_regex( +            r'<link\s+rel="canonical"\s+href="([^"]+?)"', webpage, +            'Canonical URL') +        return self.url_result(canonical_url, 'Rutube') + +  class RutubeChannelIE(InfoExtractor):      IE_NAME = 'rutube:channel'      IE_DESC = 'Rutube channels' diff --git a/youtube_dl/extractor/soulanime.py b/youtube_dl/extractor/soulanime.py new file mode 100644 index 000000000..feef33e27 --- /dev/null +++ b/youtube_dl/extractor/soulanime.py @@ -0,0 +1,80 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( +    HEADRequest, +    urlhandle_detect_ext, +) + + +class SoulAnimeWatchingIE(InfoExtractor): +    IE_NAME = "soulanime:watching" +    IE_DESC = "SoulAnime video" +    _TEST = { +        'url': 'http://www.soul-anime.net/watching/seirei-tsukai-no-blade-dance-episode-9/', +        'md5': '05fae04abf72298098b528e98abf4298', +        'info_dict': { +            'id': 'seirei-tsukai-no-blade-dance-episode-9', +            'ext': 'mp4', +            'title': 'seirei-tsukai-no-blade-dance-episode-9', +            'description': 'seirei-tsukai-no-blade-dance-episode-9' +        } +    } +    _VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/watch[^/]*/(?P<id>[^/]+)' + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') +        domain = mobj.group('domain') + +        page = self._download_webpage(url, video_id) + +        video_url_encoded = self._html_search_regex( +            r'<div id="download">[^<]*<a href="(?P<url>[^"]+)"', page, 'url') +        video_url = "http://www.soul-anime." + domain + video_url_encoded + +        ext_req = HEADRequest(video_url) +        ext_handle = self._request_webpage( +            ext_req, video_id, note='Determining extension') +        ext = urlhandle_detect_ext(ext_handle) + +        return { +            'id': video_id, +            'url': video_url, +            'ext': ext, +            'title': video_id, +            'description': video_id +        } + + +class SoulAnimeSeriesIE(InfoExtractor): +    IE_NAME = "soulanime:series" +    IE_DESC = "SoulAnime Series" + +    _VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/anime./(?P<id>[^/]+)' + +    _EPISODE_REGEX = r'<option value="(/watch[^/]*/[^"]+)">[^<]*</option>' + +    _TEST = { +        'url': 'http://www.soul-anime.net/anime1/black-rock-shooter-tv/', +        'info_dict': { +            'id': 'black-rock-shooter-tv' +        }, +        'playlist_count': 8 +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        series_id = mobj.group('id') +        domain = mobj.group('domain') + +        pattern = re.compile(self._EPISODE_REGEX) + +        page = self._download_webpage(url, series_id, "Downloading series page") +        mobj = pattern.findall(page) + +        entries = [self.url_result("http://www.soul-anime." + domain + obj) for obj in mobj] + +        return self.playlist_result(entries, series_id) diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index 944177426..10b3b706a 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -13,7 +13,7 @@ from ..compat import (  class TEDIE(SubtitlesInfoExtractor):      _VALID_URL = r'''(?x)          (?P<proto>https?://) -        (?P<type>www|embed)(?P<urlmain>\.ted\.com/ +        (?P<type>www|embed(?:-ssl)?)(?P<urlmain>\.ted\.com/          (              (?P<type_playlist>playlists(?:/\d+)?) # We have a playlist              | @@ -98,7 +98,7 @@ class TEDIE(SubtitlesInfoExtractor):      def _real_extract(self, url):          m = re.match(self._VALID_URL, url, re.VERBOSE) -        if m.group('type') == 'embed': +        if m.group('type').startswith('embed'):              desktop_url = m.group('proto') + 'www' + m.group('urlmain')              return self.url_result(desktop_url, 'TED')          name = m.group('name') diff --git a/youtube_dl/extractor/tf1.py b/youtube_dl/extractor/tf1.py index 6e61cc9e2..025d0877c 100644 --- a/youtube_dl/extractor/tf1.py +++ b/youtube_dl/extractor/tf1.py @@ -1,15 +1,13 @@  # coding: utf-8  from __future__ import unicode_literals -import re -  from .common import InfoExtractor  class TF1IE(InfoExtractor):      """TF1 uses the wat.tv player.""" -    _VALID_URL = r'http://videos\.tf1\.fr/.*-(?P<id>.*?)\.html' -    _TEST = { +    _VALID_URL = r'http://(?:videos\.tf1|www\.tfou)\.fr/.*?-(?P<id>\d+)(?:-\d+)?\.html' +    _TESTS = {          'url': 'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',          'info_dict': {              'id': '10635995', @@ -21,14 +19,26 @@ class TF1IE(InfoExtractor):              # Sometimes wat serves the whole file with the --test option              'skip_download': True,          }, +    }, { +        'url': 'http://www.tfou.fr/chuggington/videos/le-grand-mysterioso-chuggington-7085291-739.html', +        'info_dict': { +            'id': '12043945', +            'ext': 'mp4', +            'title': 'Le grand Mystérioso - Chuggington', +            'description': 'Le grand Mystérioso - Emery rêve qu\'un article lui soit consacré dans le journal.', +            'upload_date': '20150103', +        }, +        'params': { +            # Sometimes wat serves the whole file with the --test option +            'skip_download': True, +        },      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') +        video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id)          embed_url = self._html_search_regex( -            r'"(https://www.wat.tv/embedframe/.*?)"', webpage, 'embed url') +            r'["\'](https?://www.wat.tv/embedframe/.*?)["\']', webpage, 'embed url')          embed_page = self._download_webpage(embed_url, video_id,                                              'Downloading embed player page')          wat_id = self._search_regex(r'UVID=(.*?)&', embed_page, 'wat id') diff --git a/youtube_dl/extractor/vier.py b/youtube_dl/extractor/vier.py index 0d9fb09a7..619039e51 100644 --- a/youtube_dl/extractor/vier.py +++ b/youtube_dl/extractor/vier.py @@ -63,7 +63,7 @@ class VierIE(InfoExtractor):  class VierVideosIE(InfoExtractor):      IE_NAME = 'vier:videos' -    _VALID_URL = r'https?://(?:www\.)?vier\.be/(?P<program>[^/]+)/videos(?:\?.*\bpage=(?P<page>\d+))?' +    _VALID_URL = r'https?://(?:www\.)?vier\.be/(?P<program>[^/]+)/videos(?:\?.*\bpage=(?P<page>\d+)|$)'      _TESTS = [{          'url': 'http://www.vier.be/demoestuin/videos',          'info_dict': { diff --git a/youtube_dl/extractor/vimple.py b/youtube_dl/extractor/vimple.py index 33d370e1c..ee3d86117 100644 --- a/youtube_dl/extractor/vimple.py +++ b/youtube_dl/extractor/vimple.py @@ -14,28 +14,17 @@ class VimpleIE(InfoExtractor):      IE_DESC = 'Vimple.ru'      _VALID_URL = r'https?://(player.vimple.ru/iframe|vimple.ru)/(?P<id>[a-f0-9]{10,})'      _TESTS = [ -        # Quality: Large, from iframe          { -            'url': 'http://player.vimple.ru/iframe/b132bdfd71b546d3972f9ab9a25f201c', +            'url': 'http://vimple.ru/c0f6b1687dcd4000a97ebe70068039cf', +            'md5': '2e750a330ed211d3fd41821c6ad9a279',              'info_dict': { -                'id': 'b132bdfd71b546d3972f9ab9a25f201c', -                'title': 'great-escape-minecraft.flv', +                'id': 'c0f6b1687dcd4000a97ebe70068039cf',                  'ext': 'mp4', -                'duration': 352, -                'webpage_url': 'http://vimple.ru/b132bdfd71b546d3972f9ab9a25f201c', +                'title': 'Sunset', +                'duration': 20, +                'thumbnail': 're:https?://.*?\.jpg',              },          }, -        # Quality: Medium, from mainpage -        { -            'url': 'http://vimple.ru/a15950562888453b8e6f9572dc8600cd', -            'info_dict': { -                'id': 'a15950562888453b8e6f9572dc8600cd', -                'title': 'DB 01', -                'ext': 'flv', -                'duration': 1484, -                'webpage_url': 'http://vimple.ru/a15950562888453b8e6f9572dc8600cd', -            } -        },      ]      def _real_extract(self, url): diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 542e9198a..129de6cf3 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -164,6 +164,15 @@ class VKIE(InfoExtractor):              self.to_screen('Youtube video detected')              return self.url_result(m_yt.group(1), 'Youtube') +        m_rutube = re.search( +            r'\ssrc="((?:https?:)?//rutube\.ru\\?/video\\?/embed(?:.*?))\\?"', info_page) +        assert m_rutube +        if m_rutube is not None: +            self.to_screen('rutube video detected') +            rutube_url = self._proto_relative_url( +                m_rutube.group(1).replace('\\', '')) +            return self.url_result(rutube_url) +          m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.*?});', info_page)          if m_opts:              m_opts_url = re.search(r"url\s*:\s*'([^']+)", m_opts.group(1)) diff --git a/youtube_dl/extractor/webofstories.py b/youtube_dl/extractor/webofstories.py new file mode 100644 index 000000000..396cf4e83 --- /dev/null +++ b/youtube_dl/extractor/webofstories.py @@ -0,0 +1,102 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import int_or_none + + +class WebOfStoriesIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?webofstories\.com/play/(?:[^/]+/)?(?P<id>[0-9]+)' +    _VIDEO_DOMAIN = 'http://eu-mobile.webofstories.com/' +    _GREAT_LIFE_STREAMER = 'rtmp://eu-cdn1.webofstories.com/cfx/st/' +    _USER_STREAMER = 'rtmp://eu-users.webofstories.com/cfx/st/' +    _TESTS = [ +        { +            'url': 'http://www.webofstories.com/play/hans.bethe/71', +            'md5': '373e4dd915f60cfe3116322642ddf364', +            'info_dict': { +                'id': '4536', +                'ext': 'mp4', +                'title': 'The temperature of the sun', +                'thumbnail': 're:^https?://.*\.jpg$', +                'description': 'Hans Bethe talks about calculating the temperature of the sun', +                'duration': 238, +            } +        }, +        { +            'url': 'http://www.webofstories.com/play/55908', +            'md5': '2985a698e1fe3211022422c4b5ed962c', +            'info_dict': { +                'id': '55908', +                'ext': 'mp4', +                'title': 'The story of Gemmata obscuriglobus', +                'thumbnail': 're:^https?://.*\.jpg$', +                'description': 'Planctomycete talks about The story of Gemmata obscuriglobus', +                'duration': 169, +            } +        }, +    ] + +    def _real_extract(self, url): +        video_id = self._match_id(url) + +        webpage = self._download_webpage(url, video_id) +        title = self._og_search_title(webpage) +        description = self._html_search_meta('description', webpage) +        thumbnail = self._og_search_thumbnail(webpage) + +        story_filename = self._search_regex( +            r'\.storyFileName\("([^"]+)"\)', webpage, 'story filename') +        speaker_id = self._search_regex( +            r'\.speakerId\("([^"]+)"\)', webpage, 'speaker ID') +        story_id = self._search_regex( +            r'\.storyId\((\d+)\)', webpage, 'story ID') +        speaker_type = self._search_regex( +            r'\.speakerType\("([^"]+)"\)', webpage, 'speaker type') +        great_life = self._search_regex( +            r'isGreatLifeStory\s*=\s*(true|false)', webpage, 'great life story') +        is_great_life_series = great_life == 'true' +        duration = int_or_none(self._search_regex( +            r'\.duration\((\d+)\)', webpage, 'duration', fatal=False)) + +        # URL building, see: http://www.webofstories.com/scripts/player.js +        ms_prefix = '' +        if speaker_type.lower() == 'ms': +            ms_prefix = 'mini_sites/' + +        if is_great_life_series: +            mp4_url = '{0:}lives/{1:}/{2:}.mp4'.format( +                self._VIDEO_DOMAIN, speaker_id, story_filename) +            rtmp_ext = 'flv' +            streamer = self._GREAT_LIFE_STREAMER +            play_path = 'stories/{0:}/{1:}'.format( +                speaker_id, story_filename) +        else: +            mp4_url = '{0:}{1:}{2:}/{3:}.mp4'.format( +                self._VIDEO_DOMAIN, ms_prefix, speaker_id, story_filename) +            rtmp_ext = 'mp4' +            streamer = self._USER_STREAMER +            play_path = 'mp4:{0:}{1:}/{2}.mp4'.format( +                ms_prefix, speaker_id, story_filename) + +        formats = [{ +            'format_id': 'mp4_sd', +            'url': mp4_url, +        }, { +            'format_id': 'rtmp_sd', +            'page_url': url, +            'url': streamer, +            'ext': rtmp_ext, +            'play_path': play_path, +        }] + +        self._sort_formats(formats) + +        return { +            'id': story_id, +            'title': title, +            'formats': formats, +            'thumbnail': thumbnail, +            'description': description, +            'duration': duration, +        } diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 8c7842ee8..f0efaf0d9 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -256,7 +256,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):          '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},          '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},          '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, -        '138': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, +        '138': {'ext': 'mp4', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},  # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)          '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},          '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},          '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'}, @@ -287,7 +287,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):          '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},          '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},          '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'}, +        '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},          '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'VP9'}, +        '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},          # Dash webm audio          '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50}, @@ -736,6 +738,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):                  'format_id': format_id,                  'url': video_url,                  'width': int_or_none(r.attrib.get('width')), +                'height': int_or_none(r.attrib.get('height')),                  'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),                  'asr': int_or_none(r.attrib.get('audioSamplingRate')),                  'filesize': filesize, @@ -746,7 +749,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):                      fo for fo in formats                      if fo['format_id'] == format_id)              except StopIteration: -                f.update(self._formats.get(format_id, {})) +                f.update(self._formats.get(format_id, {}).items())                  formats.append(f)              else:                  existing_format.update(f) @@ -1040,6 +1043,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):                      self.report_warning(                          'Skipping DASH manifest: %r' % e, video_id)                  else: +                    # Hide the formats we found through non-DASH +                    dash_keys = set(df['format_id'] for df in dash_formats) +                    for f in formats: +                        if f['format_id'] in dash_keys: +                            f['format_id'] = 'nondash-%s' % f['format_id'] +                            f['preference'] = f.get('preference', 0) - 10000                      formats.extend(dash_formats)          self._sort_formats(formats) @@ -1199,9 +1208,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):          if playlist_id.startswith('RD'):              # Mixes require a custom extraction process              return self._extract_mix(playlist_id) -        if playlist_id.startswith('TL'): -            raise ExtractorError('For downloading YouTube.com top lists, use ' -                                 'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)          url = self._TEMPLATE_URL % playlist_id          page = self._download_webpage(url, playlist_id) @@ -1247,49 +1253,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):          return self.playlist_result(url_results, playlist_id, playlist_title) -class YoutubeTopListIE(YoutubePlaylistIE): -    IE_NAME = 'youtube:toplist' -    IE_DESC = ('YouTube.com top lists, "yttoplist:{channel}:{list title}"' -               ' (Example: "yttoplist:music:Top Tracks")') -    _VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$' -    _TESTS = [{ -        'url': 'yttoplist:music:Trending', -        'playlist_mincount': 5, -        'skip': 'Only works for logged-in users', -    }] - -    def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        channel = mobj.group('chann') -        title = mobj.group('title') -        query = compat_urllib_parse.urlencode({'title': title}) -        channel_page = self._download_webpage( -            'https://www.youtube.com/%s' % channel, title) -        link = self._html_search_regex( -            r'''(?x) -                <a\s+href="([^"]+)".*?>\s* -                <span\s+class="branded-page-module-title-text">\s* -                <span[^>]*>.*?%s.*?</span>''' % re.escape(query), -            channel_page, 'list') -        url = compat_urlparse.urljoin('https://www.youtube.com/', link) - -        video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"' -        ids = [] -        # sometimes the webpage doesn't contain the videos -        # retry until we get them -        for i in itertools.count(0): -            msg = 'Downloading Youtube mix' -            if i > 0: -                msg += ', retry #%d' % i - -            webpage = self._download_webpage(url, title, msg) -            ids = orderedSet(re.findall(video_re, webpage)) -            if ids: -                break -        url_results = self._ids_to_results(ids) -        return self.playlist_result(url_results, playlist_title=title) - -  class YoutubeChannelIE(InfoExtractor):      IE_DESC = 'YouTube.com channels'      _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)' diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 048525efc..473536dcc 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -520,7 +520,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor):  class FFmpegMergerPP(FFmpegPostProcessor):      def run(self, info):          filename = info['filepath'] -        args = ['-c', 'copy', '-map', '0:v:0', '-map', '1:a:0', '-shortest'] +        args = ['-c', 'copy', '-map', '0:v:0', '-map', '1:a:0']          self._downloader.to_screen('[ffmpeg] Merging formats into "%s"' % filename)          self.run_ffmpeg_multiple_files(info['__files_to_merge'], filename, args)          return True, info diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index efbe64fb3..d4951c406 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1550,3 +1550,13 @@ def ytdl_is_updateable():  def args_to_str(args):      # Get a short string representation for a subprocess command      return ' '.join(shlex_quote(a) for a in args) + + +def urlhandle_detect_ext(url_handle): +    try: +        url_handle.headers +        getheader = lambda h: url_handle.headers[h] +    except AttributeError:  # Python < 3 +        getheader = url_handle.info().getheader + +    return getheader('Content-Type').split("/")[1] diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 58b5021dc..2124e954f 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@  from __future__ import unicode_literals -__version__ = '2015.01.02' +__version__ = '2015.01.05.1' | 
