diff options
| -rw-r--r-- | AUTHORS | 2 | ||||
| -rw-r--r-- | README.md | 2 | ||||
| -rw-r--r-- | docs/supportedsites.md | 6 | ||||
| -rw-r--r-- | test/test_utils.py | 1 | ||||
| -rwxr-xr-x | youtube_dl/YoutubeDL.py | 3 | ||||
| -rw-r--r-- | youtube_dl/downloader/dash.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/dailymotion.py | 158 | ||||
| -rw-r--r-- | youtube_dl/extractor/generic.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/lecture2go.py | 62 | ||||
| -rw-r--r-- | youtube_dl/extractor/nationalgeographic.py | 37 | ||||
| -rw-r--r-- | youtube_dl/extractor/pbs.py | 33 | ||||
| -rw-r--r-- | youtube_dl/extractor/prosiebensat1.py | 11 | ||||
| -rw-r--r-- | youtube_dl/extractor/rtlnl.py | 17 | ||||
| -rw-r--r-- | youtube_dl/extractor/rts.py | 31 | ||||
| -rw-r--r-- | youtube_dl/extractor/snagfilms.py | 16 | ||||
| -rw-r--r-- | youtube_dl/extractor/tagesschau.py | 65 | ||||
| -rw-r--r-- | youtube_dl/extractor/udemy.py | 9 | ||||
| -rw-r--r-- | youtube_dl/extractor/viewster.py | 218 | ||||
| -rw-r--r-- | youtube_dl/extractor/viki.py | 46 | ||||
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 3 | ||||
| -rw-r--r-- | youtube_dl/options.py | 2 | ||||
| -rw-r--r-- | youtube_dl/utils.py | 4 | ||||
| -rw-r--r-- | youtube_dl/version.py | 2 | 
24 files changed, 509 insertions, 224 deletions
| @@ -133,3 +133,5 @@ Remita Amine  Aurélio A. Heckert  Bernhard Minks  sceext +Zach Bruggeman +Tjark Saul @@ -75,7 +75,7 @@ which means you can modify it, redistribute it or use it however you like.  ## Video Selection:      --playlist-start NUMBER          Playlist video to start at (default is 1)      --playlist-end NUMBER            Playlist video to end at (default is last) -    --playlist-items ITEM_SPEC       Playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8" +    --playlist-items ITEM_SPEC       Playlist video items to download. Specify indices of the videos in the playlist separated by commas like: "--playlist-items 1,2,5,8"                                       if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will                                       download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.      --match-title REGEX              Download only matching titles (regex or caseless sub-string) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index a84878026..73445137f 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -28,6 +28,7 @@   - **anitube.se**   - **AnySex**   - **Aparat** + - **AppleConnect**   - **AppleDaily**: 臺灣蘋果日報   - **AppleTrailers**   - **archive.org**: archive.org videos @@ -486,6 +487,7 @@   - **SportBox**   - **SportBoxEmbed**   - **SportDeutschland** + - **Sportschau**   - **Srf**   - **SRMediathek**: Saarländischer Rundfunk   - **SSA** @@ -611,8 +613,8 @@   - **Vimple**: Vimple - one-click video hosting   - **Vine**   - **vine:user** - - **vk.com** - - **vk.com:user-videos**: vk.com:All of a user's videos + - **vk**: VK + - **vk:uservideos**: VK - User's Videos   - **Vodlocker**   - **VoiceRepublic**   - **Vporn** diff --git a/test/test_utils.py b/test/test_utils.py index e13e11b59..65692a9fb 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -324,6 +324,7 @@ class TestUtil(unittest.TestCase):          self.assertEqual(parse_duration('02:03:04'), 7384)          self.assertEqual(parse_duration('01:02:03:04'), 93784)          self.assertEqual(parse_duration('1 hour 3 minutes'), 3780) +        self.assertEqual(parse_duration('87 Min.'), 5220)      def test_fix_xml_ampersands(self):          self.assertEqual( diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 00af78e06..702a6ad50 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1104,7 +1104,8 @@ class YoutubeDL(object):          if req_format is None:              req_format_list = []              if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and -                    info_dict['extractor'] in ['youtube', 'ted']): +                    info_dict['extractor'] in ['youtube', 'ted'] and +                    not info_dict.get('is_live')):                  merger = FFmpegMergerPP(self)                  if merger.available and merger.can_merge():                      req_format_list.append('bestvideo+bestaudio') diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py index a4685d307..8b6fa2753 100644 --- a/youtube_dl/downloader/dash.py +++ b/youtube_dl/downloader/dash.py @@ -37,7 +37,7 @@ class DashSegmentsFD(FileDownloader):          def combine_url(base_url, target_url):              if re.match(r'^https?://', target_url):                  return target_url -            return '%s/%s' % (base_url, target_url) +            return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url)          with open(tmpfilename, 'wb') as outf:              append_url_to_file( diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 50da08830..3cfa804ec 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -275,6 +275,7 @@ from .kuwo import (  )  from .la7 import LA7IE  from .laola1tv import Laola1TvIE +from .lecture2go import Lecture2GoIE  from .letv import (      LetvIE,      LetvTvIE, diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 1a41c0db1..8fcae7402 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -13,8 +13,10 @@ from ..compat import (  )  from ..utils import (      ExtractorError, +    determine_ext,      int_or_none,      orderedSet, +    parse_iso8601,      str_to_int,      unescapeHTML,  ) @@ -28,10 +30,12 @@ class DailymotionBaseInfoExtractor(InfoExtractor):          request.add_header('Cookie', 'family_filter=off; ff=off')          return request +    def _download_webpage_no_ff(self, url, *args, **kwargs): +        request = self._build_request(url) +        return self._download_webpage(request, *args, **kwargs) -class DailymotionIE(DailymotionBaseInfoExtractor): -    """Information Extractor for Dailymotion""" +class DailymotionIE(DailymotionBaseInfoExtractor):      _VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P<id>[^/?_]+)'      IE_NAME = 'dailymotion' @@ -50,10 +54,17 @@ class DailymotionIE(DailymotionBaseInfoExtractor):              'info_dict': {                  'id': 'x2iuewm',                  'ext': 'mp4', -                'uploader': 'IGN',                  'title': 'Steam Machine Models, Pricing Listed on Steam Store - IGN News', -                'upload_date': '20150306', +                'description': 'Several come bundled with the Steam Controller.', +                'thumbnail': 're:^https?:.*\.(?:jpg|png)$',                  'duration': 74, +                'timestamp': 1425657362, +                'upload_date': '20150306', +                'uploader': 'IGN', +                'uploader_id': 'xijv66', +                'age_limit': 0, +                'view_count': int, +                'comment_count': int,              }          },          # Vevo video @@ -87,38 +98,106 @@ class DailymotionIE(DailymotionBaseInfoExtractor):      def _real_extract(self, url):          video_id = self._match_id(url) -        url = 'https://www.dailymotion.com/video/%s' % video_id -        # Retrieve video webpage to extract further information -        request = self._build_request(url) -        webpage = self._download_webpage(request, video_id) +        webpage = self._download_webpage_no_ff( +            'https://www.dailymotion.com/video/%s' % video_id, video_id) + +        age_limit = self._rta_search(webpage) + +        description = self._og_search_description(webpage) or self._html_search_meta( +            'description', webpage, 'description') -        # Extract URL, uploader and title from webpage -        self.report_extraction(video_id) +        view_count = str_to_int(self._search_regex( +            [r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserPlays:(\d+)"', +             r'video_views_count[^>]+>\s+([\d\.,]+)'], +            webpage, 'view count', fatal=False)) +        comment_count = int_or_none(self._search_regex( +            r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserComments:(\d+)"', +            webpage, 'comment count', fatal=False)) + +        player_v5 = self._search_regex( +            r'playerV5\s*=\s*dmp\.create\([^,]+?,\s*({.+?})\);', +            webpage, 'player v5', default=None) +        if player_v5: +            player = self._parse_json(player_v5, video_id) +            metadata = player['metadata'] +            formats = [] +            for quality, media_list in metadata['qualities'].items(): +                for media in media_list: +                    media_url = media.get('url') +                    if not media_url: +                        continue +                    type_ = media.get('type') +                    if type_ == 'application/vnd.lumberjack.manifest': +                        continue +                    if type_ == 'application/x-mpegURL' or determine_ext(media_url) == 'm3u8': +                        formats.extend(self._extract_m3u8_formats( +                            media_url, video_id, 'mp4', m3u8_id='hls')) +                    else: +                        f = { +                            'url': media_url, +                            'format_id': quality, +                        } +                        m = re.search(r'H264-(?P<width>\d+)x(?P<height>\d+)', media_url) +                        if m: +                            f.update({ +                                'width': int(m.group('width')), +                                'height': int(m.group('height')), +                            }) +                        formats.append(f) +            self._sort_formats(formats) + +            title = metadata['title'] +            duration = int_or_none(metadata.get('duration')) +            timestamp = int_or_none(metadata.get('created_time')) +            thumbnail = metadata.get('poster_url') +            uploader = metadata.get('owner', {}).get('screenname') +            uploader_id = metadata.get('owner', {}).get('id') + +            subtitles = {} +            for subtitle_lang, subtitle in metadata.get('subtitles', {}).get('data', {}).items(): +                subtitles[subtitle_lang] = [{ +                    'ext': determine_ext(subtitle_url), +                    'url': subtitle_url, +                } for subtitle_url in subtitle.get('urls', [])] + +            return { +                'id': video_id, +                'title': title, +                'description': description, +                'thumbnail': thumbnail, +                'duration': duration, +                'timestamp': timestamp, +                'uploader': uploader, +                'uploader_id': uploader_id, +                'age_limit': age_limit, +                'view_count': view_count, +                'comment_count': comment_count, +                'formats': formats, +                'subtitles': subtitles, +            } -        # It may just embed a vevo video: -        m_vevo = re.search( +        # vevo embed +        vevo_id = self._search_regex(              r'<link rel="video_src" href="[^"]*?vevo.com[^"]*?video=(?P<id>[\w]*)', -            webpage) -        if m_vevo is not None: -            vevo_id = m_vevo.group('id') -            self.to_screen('Vevo video detected: %s' % vevo_id) -            return self.url_result('vevo:%s' % vevo_id, ie='Vevo') +            webpage, 'vevo embed', default=None) +        if vevo_id: +            return self.url_result('vevo:%s' % vevo_id, 'Vevo') -        age_limit = self._rta_search(webpage) +        # fallback old player +        embed_page = self._download_webpage_no_ff( +            'https://www.dailymotion.com/embed/video/%s' % video_id, +            video_id, 'Downloading embed page') + +        timestamp = parse_iso8601(self._html_search_meta( +            'video:release_date', webpage, 'upload date')) + +        info = self._parse_json( +            self._search_regex( +                r'var info = ({.*?}),$', embed_page, +                'video info', flags=re.MULTILINE), +            video_id) -        video_upload_date = None -        mobj = re.search(r'<meta property="video:release_date" content="([0-9]{4})-([0-9]{2})-([0-9]{2}).+?"/>', webpage) -        if mobj is not None: -            video_upload_date = mobj.group(1) + mobj.group(2) + mobj.group(3) - -        embed_url = 'https://www.dailymotion.com/embed/video/%s' % video_id -        embed_request = self._build_request(embed_url) -        embed_page = self._download_webpage( -            embed_request, video_id, 'Downloading embed page') -        info = self._search_regex(r'var info = ({.*?}),$', embed_page, -                                  'video info', flags=re.MULTILINE) -        info = json.loads(info)          if info.get('error') is not None:              msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title']              raise ExtractorError(msg, expected=True) @@ -139,16 +218,11 @@ class DailymotionIE(DailymotionBaseInfoExtractor):                      'width': width,                      'height': height,                  }) -        if not formats: -            raise ExtractorError('Unable to extract video URL') +        self._sort_formats(formats)          # subtitles          video_subtitles = self.extract_subtitles(video_id, webpage) -        view_count = str_to_int(self._search_regex( -            r'video_views_count[^>]+>\s+([\d\.,]+)', -            webpage, 'view count', fatal=False)) -          title = self._og_search_title(webpage, default=None)          if title is None:              title = self._html_search_regex( @@ -159,8 +233,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor):              'id': video_id,              'formats': formats,              'uploader': info['owner.screenname'], -            'upload_date': video_upload_date, +            'timestamp': timestamp,              'title': title, +            'description': description,              'subtitles': video_subtitles,              'thumbnail': info['thumbnail_url'],              'age_limit': age_limit, @@ -201,9 +276,9 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):      def _extract_entries(self, id):          video_ids = []          for pagenum in itertools.count(1): -            request = self._build_request(self._PAGE_TEMPLATE % (id, pagenum)) -            webpage = self._download_webpage(request, -                                             id, 'Downloading page %s' % pagenum) +            webpage = self._download_webpage_no_ff( +                self._PAGE_TEMPLATE % (id, pagenum), +                id, 'Downloading page %s' % pagenum)              video_ids.extend(re.findall(r'data-xid="(.+?)"', webpage)) @@ -286,8 +361,7 @@ class DailymotionCloudIE(DailymotionBaseInfoExtractor):      def _real_extract(self, url):          video_id = self._match_id(url) -        request = self._build_request(url) -        webpage = self._download_webpage(request, video_id) +        webpage = self._download_webpage_no_ff(url, video_id)          title = self._html_search_regex(r'<title>([^>]+)</title>', webpage, 'title') diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index cd133a10c..6d2efb22e 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1702,7 +1702,7 @@ class GenericIE(InfoExtractor):                  if refresh_header:                      found = re.search(REDIRECT_REGEX, refresh_header)              if found: -                new_url = compat_urlparse.urljoin(url, found.group(1)) +                new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))                  self.report_following_redirect(new_url)                  return {                      '_type': 'url', diff --git a/youtube_dl/extractor/lecture2go.py b/youtube_dl/extractor/lecture2go.py new file mode 100644 index 000000000..40a3d2346 --- /dev/null +++ b/youtube_dl/extractor/lecture2go.py @@ -0,0 +1,62 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( +    determine_ext, +    parse_duration, +    int_or_none, +) + + +class Lecture2GoIE(InfoExtractor): +    _VALID_URL = r'https?://lecture2go\.uni-hamburg\.de/veranstaltungen/-/v/(?P<id>\d+)' +    _TEST = { +        'url': 'https://lecture2go.uni-hamburg.de/veranstaltungen/-/v/17473', +        'md5': 'ac02b570883020d208d405d5a3fd2f7f', +        'info_dict': { +            'id': '17473', +            'ext': 'flv', +            'title': '2 - Endliche Automaten und reguläre Sprachen', +            'creator': 'Frank Heitmann', +            'duration': 5220, +        } +    } + +    def _real_extract(self, url): +        video_id = self._match_id(url) +        webpage = self._download_webpage(url, video_id) + +        title = self._html_search_regex(r'<em[^>]+class="title">(.+)</em>', webpage, 'title') + +        formats = [] +        for url in set(re.findall(r'"src","([^"]+)"', webpage)): +            ext = determine_ext(url) +            if ext == 'f4m': +                formats.extend(self._extract_f4m_formats(url, video_id)) +            elif ext == 'm3u8': +                formats.extend(self._extract_m3u8_formats(url, video_id)) +            else: +                formats.append({ +                    'url': url, +                }) + +        self._sort_formats(formats) + +        creator = self._html_search_regex( +            r'<div[^>]+id="description">([^<]+)</div>', webpage, 'creator', fatal=False) +        duration = parse_duration(self._html_search_regex( +            r'Duration:\s*</em>\s*<em[^>]*>([^<]+)</em>', webpage, 'duration', fatal=False)) +        view_count = int_or_none(self._html_search_regex( +            r'Views:\s*</em>\s*<em[^>]+>(\d+)</em>', webpage, 'view count', fatal=False)) + +        return { +            'id': video_id, +            'title': title, +            'formats': formats, +            'creator': creator, +            'duration': duration, +            'view_count': view_count, +        } diff --git a/youtube_dl/extractor/nationalgeographic.py b/youtube_dl/extractor/nationalgeographic.py index f793b72f5..6fc9e7b05 100644 --- a/youtube_dl/extractor/nationalgeographic.py +++ b/youtube_dl/extractor/nationalgeographic.py @@ -8,18 +8,30 @@ from ..utils import (  class NationalGeographicIE(InfoExtractor): -    _VALID_URL = r'http://video\.nationalgeographic\.com/video/.*?' - -    _TEST = { -        'url': 'http://video.nationalgeographic.com/video/news/150210-news-crab-mating-vin?source=featuredvideo', -        'info_dict': { -            'id': '4DmDACA6Qtk_', -            'ext': 'flv', -            'title': 'Mating Crabs Busted by Sharks', -            'description': 'md5:16f25aeffdeba55aaa8ec37e093ad8b3', +    _VALID_URL = r'http://video\.nationalgeographic\.com/.*?' + +    _TESTS = [ +        { +            'url': 'http://video.nationalgeographic.com/video/news/150210-news-crab-mating-vin?source=featuredvideo', +            'info_dict': { +                'id': '4DmDACA6Qtk_', +                'ext': 'flv', +                'title': 'Mating Crabs Busted by Sharks', +                'description': 'md5:16f25aeffdeba55aaa8ec37e093ad8b3', +            }, +            'add_ie': ['ThePlatform'],          }, -        'add_ie': ['ThePlatform'], -    } +        { +            'url': 'http://video.nationalgeographic.com/wild/when-sharks-attack/the-real-jaws', +            'info_dict': { +                'id': '_JeBD_D7PlS5', +                'ext': 'flv', +                'title': 'The Real Jaws', +                'description': 'md5:8d3e09d9d53a85cd397b4b21b2c77be6', +            }, +            'add_ie': ['ThePlatform'], +        }, +    ]      def _real_extract(self, url):          name = url_basename(url) @@ -37,5 +49,6 @@ class NationalGeographicIE(InfoExtractor):          return self.url_result(smuggle_url(              'http://link.theplatform.com/s/ngs/%s?format=SMIL&formats=MPEG4&manifest=f4m' % theplatform_id, -            # For some reason, the normal links don't work and we must force the use of f4m +            # For some reason, the normal links don't work and we must force +            # the use of f4m              {'force_smil_url': True})) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index fec5d65ad..a53479aad 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -32,7 +32,7 @@ class PBSIE(InfoExtractor):              'info_dict': {                  'id': '2365006249',                  'ext': 'mp4', -                'title': 'A More Perfect Union', +                'title': 'Constitution USA with Peter Sagal - A More Perfect Union',                  'description': 'md5:ba0c207295339c8d6eced00b7c363c6a',                  'duration': 3190,              }, @@ -46,7 +46,7 @@ class PBSIE(InfoExtractor):              'info_dict': {                  'id': '2365297690',                  'ext': 'mp4', -                'title': 'Losing Iraq', +                'title': 'FRONTLINE - Losing Iraq',                  'description': 'md5:f5bfbefadf421e8bb8647602011caf8e',                  'duration': 5050,              }, @@ -60,7 +60,7 @@ class PBSIE(InfoExtractor):              'info_dict': {                  'id': '2201174722',                  'ext': 'mp4', -                'title': 'Cyber Schools Gain Popularity, but Quality Questions Persist', +                'title': 'PBS NewsHour - Cyber Schools Gain Popularity, but Quality Questions Persist',                  'description': 'md5:5871c15cba347c1b3d28ac47a73c7c28',                  'duration': 801,              }, @@ -72,7 +72,7 @@ class PBSIE(InfoExtractor):                  'id': '2365297708',                  'ext': 'mp4',                  'description': 'md5:68d87ef760660eb564455eb30ca464fe', -                'title': 'Dudamel Conducts Verdi Requiem at the Hollywood Bowl - Full', +                'title': 'Great Performances - Dudamel Conducts Verdi Requiem at the Hollywood Bowl - Full',                  'duration': 6559,                  'thumbnail': 're:^https?://.*\.jpg$',              }, @@ -88,7 +88,7 @@ class PBSIE(InfoExtractor):                  'display_id': 'killer-typhoon',                  'ext': 'mp4',                  'description': 'md5:c741d14e979fc53228c575894094f157', -                'title': 'Killer Typhoon', +                'title': 'NOVA - Killer Typhoon',                  'duration': 3172,                  'thumbnail': 're:^https?://.*\.jpg$',                  'upload_date': '20140122', @@ -110,7 +110,7 @@ class PBSIE(InfoExtractor):                  'id': '2280706814',                  'display_id': 'player',                  'ext': 'mp4', -                'title': 'Death and the Civil War', +                'title': 'American Experience - Death and the Civil War',                  'description': 'American Experience, TV’s most-watched history series, brings to life the compelling stories from our past that inform our understanding of the world today.',                  'duration': 6705,                  'thumbnail': 're:^https?://.*\.jpg$', @@ -118,6 +118,21 @@ class PBSIE(InfoExtractor):              'params': {                  'skip_download': True,  # requires ffmpeg              }, +        }, +        { +            'url': 'http://video.pbs.org/video/2365367186/', +            'info_dict': { +                'id': '2365367186', +                'display_id': '2365367186', +                'ext': 'mp4', +                'title': 'To Catch A Comet - Full Episode', +                'description': 'On November 12, 2014, billions of kilometers from Earth, spacecraft orbiter Rosetta and lander Philae did what no other had dared to attempt \u2014 land on the volatile surface of a comet as it zooms around the sun at 67,000 km/hr. The European Space Agency hopes this mission can help peer into our past and unlock secrets of our origins.', +                'duration': 3342, +                'thumbnail': 're:^https?://.*\.jpg$', +            }, +            'params': { +                'skip_download': True,  # requires ffmpeg +            },          }      ] @@ -232,6 +247,12 @@ class PBSIE(InfoExtractor):                  'url': closed_captions_url,              }] +        # info['title'] is often incomplete (e.g. 'Full Episode', 'Episode 5', etc) +        # Try turning it to 'program - title' naming scheme if possible +        alt_title = info.get('program', {}).get('title') +        if alt_title: +            info['title'] = alt_title + ' - ' + re.sub(r'^' + alt_title + '[\s\-:]+', '', info['title']) +          return {              'id': video_id,              'display_id': display_id, diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index fec008ce7..effcf1db3 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -9,7 +9,9 @@ from ..compat import (      compat_urllib_parse,  )  from ..utils import ( +    ExtractorError,      determine_ext, +    float_or_none,      int_or_none,      unified_strdate,  ) @@ -224,10 +226,13 @@ class ProSiebenSat1IE(InfoExtractor):              'ids': clip_id,          }) -        videos = self._download_json(videos_api_url, clip_id, 'Downloading videos JSON') +        video = self._download_json(videos_api_url, clip_id, 'Downloading videos JSON')[0] -        duration = float(videos[0]['duration']) -        source_ids = [source['id'] for source in videos[0]['sources']] +        if video.get('is_protected') is True: +            raise ExtractorError('This video is DRM protected.', expected=True) + +        duration = float_or_none(video.get('duration')) +        source_ids = [source['id'] for source in video['sources']]          source_ids_str = ','.join(map(str, source_ids))          g = '01!8d8F_)r9]4s[qeuXfP%' diff --git a/youtube_dl/extractor/rtlnl.py b/youtube_dl/extractor/rtlnl.py index e0c530d64..543d94417 100644 --- a/youtube_dl/extractor/rtlnl.py +++ b/youtube_dl/extractor/rtlnl.py @@ -82,16 +82,21 @@ class RtlNlIE(InfoExtractor):          meta = info.get('meta', {}) -        # Use unencrypted m3u8 streams (See https://github.com/rg3/youtube-dl/issues/4118) -        # NB: nowadays, recent ffmpeg and avconv can handle these encrypted streams, so -        # this adaptive -> flash workaround is not required in general, but it also -        # allows bypassing georestriction therefore is retained for now. -        videopath = material['videopath'].replace('/adaptive/', '/flash/') +        # m3u8 streams are encrypted and may not be handled properly by older ffmpeg/avconv. +        # To workaround this previously adaptive -> flash trick was used to obtain +        # unencrypted m3u8 streams (see https://github.com/rg3/youtube-dl/issues/4118) +        # and bypass georestrictions as well. +        # Currently, unencrypted m3u8 playlists are (intentionally?) invalid and therefore +        # unusable albeit can be fixed by simple string replacement (see +        # https://github.com/rg3/youtube-dl/pull/6337) +        # Since recent ffmpeg and avconv handle encrypted streams just fine encrypted +        # streams are used now. +        videopath = material['videopath']          m3u8_url = meta.get('videohost', 'http://manifest.us.rtl.nl') + videopath          formats = self._extract_m3u8_formats(m3u8_url, uuid, ext='mp4') -        video_urlpart = videopath.split('/flash/')[1][:-5] +        video_urlpart = videopath.split('/adaptive/')[1][:-5]          PG_URL_TEMPLATE = 'http://pg.us.rtl.nl/rtlxl/network/%s/progressive/%s.mp4'          formats.extend([ diff --git a/youtube_dl/extractor/rts.py b/youtube_dl/extractor/rts.py index 9fbe239d8..12639f08b 100644 --- a/youtube_dl/extractor/rts.py +++ b/youtube_dl/extractor/rts.py @@ -19,7 +19,16 @@ from ..utils import (  class RTSIE(InfoExtractor):      IE_DESC = 'RTS.ch' -    _VALID_URL = r'https?://(?:www\.)?rts\.ch/(?:(?:[^/]+/){2,}(?P<id>[0-9]+)-(?P<display_id>.+?)\.html|play/tv/[^/]+/video/(?P<display_id_new>.+?)\?id=(?P<id_new>[0-9]+))' +    _VALID_URL = r'''(?x) +                    (?: +                        rts:(?P<rts_id>\d+)| +                        https?:// +                            (?:www\.)?rts\.ch/ +                            (?: +                                (?:[^/]+/){2,}(?P<id>[0-9]+)-(?P<display_id>.+?)\.html| +                                play/tv/[^/]+/video/(?P<display_id_new>.+?)\?id=(?P<id_new>[0-9]+) +                            ) +                    )'''      _TESTS = [          { @@ -123,6 +132,15 @@ class RTSIE(InfoExtractor):              },          },          { +            # article with videos on rhs +            'url': 'http://www.rts.ch/sport/hockey/6693917-hockey-davos-decroche-son-31e-titre-de-champion-de-suisse.html', +            'info_dict': { +                'id': '6693917', +                'title': 'Hockey: Davos décroche son 31e titre de champion de Suisse', +            }, +            'playlist_mincount': 5, +        }, +        {              'url': 'http://www.rts.ch/play/tv/le-19h30/video/le-chantier-du-nouveau-parlement-vaudois-a-permis-une-trouvaille-historique?id=6348280',              'only_matching': True,          } @@ -130,7 +148,7 @@ class RTSIE(InfoExtractor):      def _real_extract(self, url):          m = re.match(self._VALID_URL, url) -        video_id = m.group('id') or m.group('id_new') +        video_id = m.group('rts_id') or m.group('id') or m.group('id_new')          display_id = m.group('display_id') or m.group('display_id_new')          def download_json(internal_id): @@ -143,6 +161,15 @@ class RTSIE(InfoExtractor):          # video_id extracted out of URL is not always a real id          if 'video' not in all_info and 'audio' not in all_info:              page = self._download_webpage(url, display_id) + +            # article with videos on rhs +            videos = re.findall( +                r'<article[^>]+class="content-item"[^>]*>\s*<a[^>]+data-video-urn="urn:rts:video:(\d+)"', +                page) +            if videos: +                entries = [self.url_result('rts:%s' % video_urn, 'RTS') for video_urn in videos] +                return self.playlist_result(entries, video_id, self._og_search_title(page)) +              internal_id = self._html_search_regex(                  r'<(?:video|audio) data-id="([0-9]+)"', page,                  'internal video id') diff --git a/youtube_dl/extractor/snagfilms.py b/youtube_dl/extractor/snagfilms.py index cf495f310..6977afb27 100644 --- a/youtube_dl/extractor/snagfilms.py +++ b/youtube_dl/extractor/snagfilms.py @@ -24,6 +24,15 @@ class SnagFilmsEmbedIE(InfoExtractor):              'title': '#whilewewatch',          }      }, { +        # invalid labels, 360p is better that 480p +        'url': 'http://www.snagfilms.com/embed/player?filmId=17ca0950-a74a-11e0-a92a-0026bb61d036', +        'md5': '882fca19b9eb27ef865efeeaed376a48', +        'info_dict': { +            'id': '17ca0950-a74a-11e0-a92a-0026bb61d036', +            'ext': 'mp4', +            'title': 'Life in Limbo', +        } +    }, {          'url': 'http://www.snagfilms.com/embed/player?filmId=0000014c-de2f-d5d6-abcf-ffef58af0017',          'only_matching': True,      }] @@ -52,14 +61,15 @@ class SnagFilmsEmbedIE(InfoExtractor):              if not file_:                  continue              type_ = source.get('type') -            format_id = source.get('label')              ext = determine_ext(file_) -            if any(_ == 'm3u8' for _ in (type_, ext)): +            format_id = source.get('label') or ext +            if all(v == 'm3u8' for v in (type_, ext)):                  formats.extend(self._extract_m3u8_formats(                      file_, video_id, 'mp4', m3u8_id='hls'))              else:                  bitrate = int_or_none(self._search_regex( -                    r'(\d+)kbps', file_, 'bitrate', default=None)) +                    [r'(\d+)kbps', r'_\d{1,2}x\d{1,2}_(\d{3,})\.%s' % ext], +                    file_, 'bitrate', default=None))                  height = int_or_none(self._search_regex(                      r'^(\d+)[pP]$', format_id, 'height', default=None))                  formats.append({ diff --git a/youtube_dl/extractor/tagesschau.py b/youtube_dl/extractor/tagesschau.py index cf1b37a75..73e7657d4 100644 --- a/youtube_dl/extractor/tagesschau.py +++ b/youtube_dl/extractor/tagesschau.py @@ -8,7 +8,7 @@ from ..utils import parse_filesize  class TagesschauIE(InfoExtractor): -    _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/(?:sendung/ts|video/video)(?P<id>-?[0-9]+)\.html' +    _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/(?:[^/]+/)*?[^/#?]+?(?P<id>-?[0-9]+)(?:~_[^/#?]+?)?\.html'      _TESTS = [{          'url': 'http://www.tagesschau.de/multimedia/video/video-102143.html', @@ -18,7 +18,7 @@ class TagesschauIE(InfoExtractor):              'ext': 'mp4',              'title': 'Regierungsumbildung in Athen: Neue Minister in Griechenland vereidigt',              'description': 'md5:171feccd9d9b3dd54d05d501568f6359', -            'thumbnail': 're:^http:.*\.jpg$', +            'thumbnail': 're:^https?:.*\.jpg$',          },      }, {          'url': 'http://www.tagesschau.de/multimedia/sendung/ts-5727.html', @@ -28,8 +28,39 @@ class TagesschauIE(InfoExtractor):              'ext': 'mp4',              'description': 'md5:695c01bfd98b7e313c501386327aea59',              'title': 'Sendung: tagesschau \t04.12.2014 20:00 Uhr', -            'thumbnail': 're:^http:.*\.jpg$', -        } +            'thumbnail': 're:^https?:.*\.jpg$', +        }, +    }, { +        'url': 'http://www.tagesschau.de/multimedia/politikimradio/audio-18407.html', +        'md5': 'aef45de271c4bf0a5db834aa40bf774c', +        'info_dict': { +            'id': '18407', +            'ext': 'mp3', +            'title': 'Flüchtlingsdebatte: Hitzig, aber wenig hilfreich', +            'description': 'Flüchtlingsdebatte: Hitzig, aber wenig hilfreich', +            'thumbnail': 're:^https?:.*\.jpg$', +        }, +    }, { +        'url': 'http://www.tagesschau.de/multimedia/sendung/tsg-3771.html', +        'only_matching': True, +    }, { +        'url': 'http://www.tagesschau.de/multimedia/sendung/tt-3827.html', +        'only_matching': True, +    }, { +        'url': 'http://www.tagesschau.de/multimedia/sendung/nm-3475.html', +        'only_matching': True, +    }, { +        'url': 'http://www.tagesschau.de/multimedia/sendung/weltspiegel-3167.html', +        'only_matching': True, +    }, { +        'url': 'http://www.tagesschau.de/multimedia/tsvorzwanzig-959.html', +        'only_matching': True, +    }, { +        'url': 'http://www.tagesschau.de/multimedia/sendung/bab/bab-3299~_bab-sendung-209.html', +        'only_matching': True, +    }, { +        'url': 'http://www.tagesschau.de/multimedia/video/video-102303~_bab-sendung-211.html', +        'only_matching': True,      }]      _FORMATS = { @@ -49,19 +80,26 @@ class TagesschauIE(InfoExtractor):              playerpage = self._download_webpage(                  player_url, display_id, 'Downloading player page') -            medias = re.findall( -                r'"(http://media.+?)", type:"video/(.+?)", quality:"(.+?)"', -                playerpage)              formats = [] -            for url, ext, res in medias: +            for media in re.finditer( +                    r'''(?x) +                        (?P<q_url>["\'])(?P<url>http://media.+?)(?P=q_url) +                        ,\s*type:(?P<q_type>["\'])(?P<type>video|audio)/(?P<ext>.+?)(?P=q_type) +                        (?:,\s*quality:(?P<q_quality>["\'])(?P<quality>.+?)(?P=q_quality))? +                    ''', playerpage): +                url = media.group('url') +                type_ = media.group('type') +                ext = media.group('ext') +                res = media.group('quality')                  f = { -                    'format_id': res + '_' + ext, +                    'format_id': '%s_%s' % (res, ext) if res else ext,                      'url': url,                      'ext': ext, +                    'vcodec': 'none' if type_ == 'audio' else None,                  }                  f.update(self._FORMATS.get(res, {}))                  formats.append(f) -            thumbnail_fn = re.findall(r'"(/multimedia/.+?\.jpg)"', playerpage)[-1] +            thumbnail = self._og_search_thumbnail(playerpage)              title = self._og_search_title(webpage).strip()              description = self._og_search_description(webpage).strip()          else: @@ -99,17 +137,14 @@ class TagesschauIE(InfoExtractor):                          'filesize_approx': parse_filesize(m.group('filesize_approx')),                      })                  formats.append(format) -            thumbnail_fn = self._search_regex( -                r'(?s)<img alt="Sendungsbild".*?src="([^"]+)"', -                webpage, 'thumbnail', fatal=False) +            thumbnail = self._og_search_thumbnail(webpage)              description = self._html_search_regex(                  r'(?s)<p class="teasertext">(.*?)</p>', -                webpage, 'description', fatal=False) +                webpage, 'description', default=None)              title = self._html_search_regex(                  r'<span class="headline".*?>(.*?)</span>', webpage, 'title')          self._sort_formats(formats) -        thumbnail = 'http://www.tagesschau.de' + thumbnail_fn          return {              'id': display_id, diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index e2bab52fe..4a0eaf65f 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -77,7 +77,11 @@ class UdemyIE(InfoExtractor):          login_popup = self._download_webpage(              self._LOGIN_URL, None, 'Downloading login popup') -        if login_popup == '<div class="run-command close-popup redirect" data-url="https://www.udemy.com/"></div>': +        def is_logged(webpage): +            return any(p in webpage for p in ['href="https://www.udemy.com/user/logout/', '>Logout<']) + +        # already logged in +        if is_logged(login_popup):              return          login_form = self._form_hidden_inputs('login-form', login_popup) @@ -95,8 +99,7 @@ class UdemyIE(InfoExtractor):          response = self._download_webpage(              request, None, 'Logging in as %s' % username) -        if all(logout_pattern not in response -               for logout_pattern in ['href="https://www.udemy.com/user/logout/', '>Logout<']): +        if not is_logged(response):              error = self._html_search_regex(                  r'(?s)<div[^>]+class="form-errors[^"]*">(.+?)</div>',                  response, 'error message', default=None) diff --git a/youtube_dl/extractor/viewster.py b/youtube_dl/extractor/viewster.py index 1742e66f4..6ef36290b 100644 --- a/youtube_dl/extractor/viewster.py +++ b/youtube_dl/extractor/viewster.py @@ -1,129 +1,137 @@ +# coding: utf-8  from __future__ import unicode_literals  from .common import InfoExtractor -from ..compat import compat_urllib_request +from ..compat import ( +    compat_urllib_request, +    compat_urllib_parse, +) +from ..utils import ( +    determine_ext, +    int_or_none, +    parse_iso8601, +)  class ViewsterIE(InfoExtractor): -    _VALID_URL = r'http://(?:www\.)?viewster\.com/movie/(?P<id>\d+-\d+-\d+)' +    _VALID_URL = r'http://(?:www\.)?viewster\.com/(?:serie|movie)/(?P<id>\d+-\d+-\d+)'      _TESTS = [{ -        # movielink, paymethod=fre -        'url': 'http://www.viewster.com/movie/1293-19341-000/hout-wood/', -        'playlist': [{ -            'md5': '8f9d94b282d80c42b378dffdbb11caf3', -            'info_dict': { -                'id': '1293-19341-000-movie', -                'ext': 'flv', -                'title': "'Hout' (Wood) - Movie", -            }, -        }], -        'info_dict': { -            'id': '1293-19341-000', -            'title': "'Hout' (Wood)", -            'description': 'md5:925733185a9242ef96f436937683f33b', -        } -    }, { -        # movielink, paymethod=adv +        # movie, Type=Movie          'url': 'http://www.viewster.com/movie/1140-11855-000/the-listening-project/', -        'playlist': [{ -            'md5': '77a005453ca7396cbe3d35c9bea30aef', -            'info_dict': { -                'id': '1140-11855-000-movie', -                'ext': 'flv', -                'title': "THE LISTENING PROJECT - Movie", -            }, -        }], +        'md5': '14d3cfffe66d57b41ae2d9c873416f01',          'info_dict': {              'id': '1140-11855-000', -            'title': "THE LISTENING PROJECT", -            'description': 'md5:714421ae9957e112e672551094bf3b08', -        } +            'ext': 'flv', +            'title': 'The listening Project', +            'description': 'md5:bac720244afd1a8ea279864e67baa071', +            'timestamp': 1214870400, +            'upload_date': '20080701', +            'duration': 4680, +        },      }, { -        # direct links, no movielink -        'url': 'http://www.viewster.com/movie/1198-56411-000/sinister/', -        'playlist': [{ -            'md5': '0307b7eac6bfb21ab0577a71f6eebd8f', -            'info_dict': { -                'id': '1198-56411-000-trailer', -                'ext': 'mp4', -                'title': "Sinister - Trailer", -            }, -        }, { -            'md5': '80b9ee3ad69fb368f104cb5d9732ae95', -            'info_dict': { -                'id': '1198-56411-000-behind-scenes', -                'ext': 'mp4', -                'title': "Sinister - Behind Scenes", -            }, -        }, { -            'md5': '3b3ea897ecaa91fca57a8a94ac1b15c5', -            'info_dict': { -                'id': '1198-56411-000-scene-from-movie', -                'ext': 'mp4', -                'title': "Sinister - Scene from movie", -            }, -        }], +        # series episode, Type=Episode +        'url': 'http://www.viewster.com/serie/1284-19427-001/the-world-and-a-wall/', +        'md5': 'd5434c80fcfdb61651cc2199a88d6ba3',          'info_dict': { -            'id': '1198-56411-000', -            'title': "Sinister", -            'description': 'md5:014c40b0488848de9683566a42e33372', -        } +            'id': '1284-19427-001', +            'ext': 'flv', +            'title': 'The World and a Wall', +            'description': 'md5:24814cf74d3453fdf5bfef9716d073e3', +            'timestamp': 1428192000, +            'upload_date': '20150405', +            'duration': 1500, +        }, +    }, { +        # serie, Type=Serie +        'url': 'http://www.viewster.com/serie/1303-19426-000/', +        'info_dict': { +            'id': '1303-19426-000', +            'title': 'Is It Wrong to Try to Pick up Girls in a Dungeon?', +            'description': 'md5:eeda9bef25b0d524b3a29a97804c2f11', +        }, +        'playlist_count': 13, +    }, { +        # unfinished serie, no Type +        'url': 'http://www.viewster.com/serie/1284-19427-000/baby-steps-season-2/', +        'info_dict': { +            'id': '1284-19427-000', +            'title': 'Baby Steps—Season 2', +            'description': 'md5:e7097a8fc97151e25f085c9eb7a1cdb1', +        }, +        'playlist_mincount': 16,      }]      _ACCEPT_HEADER = 'application/json, text/javascript, */*; q=0.01' +    _AUTH_TOKEN = '/YqhSYsx8EaU9Bsta3ojlA==' -    def _real_extract(self, url): -        video_id = self._match_id(url) - -        request = compat_urllib_request.Request( -            'http://api.live.viewster.com/api/v1/movie/%s' % video_id) +    def _download_json(self, url, video_id, note='Downloading JSON metadata', fatal=True): +        request = compat_urllib_request.Request(url)          request.add_header('Accept', self._ACCEPT_HEADER) +        request.add_header('Auth-token', self._AUTH_TOKEN) +        return super(ViewsterIE, self)._download_json(request, video_id, note, fatal=fatal) -        movie = self._download_json( -            request, video_id, 'Downloading movie metadata JSON') - -        title = movie.get('title') or movie['original_title'] -        description = movie.get('synopsis') -        thumbnail = movie.get('large_artwork') or movie.get('artwork') - -        entries = [] -        for clip in movie['play_list']: -            entry = None - -            # movielink api -            link_request = clip.get('link_request') -            if link_request: -                request = compat_urllib_request.Request( -                    'http://api.live.viewster.com/api/v1/movielink?movieid=%(movieid)s&action=%(action)s&paymethod=%(paymethod)s&price=%(price)s¤cy=%(currency)s&language=%(language)s&subtitlelanguage=%(subtitlelanguage)s&ischromecast=%(ischromecast)s' -                    % link_request) -                request.add_header('Accept', self._ACCEPT_HEADER) +    def _real_extract(self, url): +        video_id = self._match_id(url) -                movie_link = self._download_json( -                    request, video_id, 'Downloading movie link JSON', fatal=False) +        info = self._download_json( +            'https://public-api.viewster.com/search/%s' % video_id, +            video_id, 'Downloading entry JSON') -                if movie_link: -                    formats = self._extract_f4m_formats( -                        movie_link['url'] + '&hdcore=3.2.0&plugin=flowplayer-3.2.0.1', video_id) -                    self._sort_formats(formats) -                    entry = { -                        'formats': formats, -                    } +        entry_id = info.get('Id') or info['id'] -            # direct link -            clip_url = clip.get('clip_data', {}).get('url') -            if clip_url: -                entry = { -                    'url': clip_url, -                    'ext': 'mp4', -                } +        # unfinished serie has no Type +        if info.get('Type') in ['Serie', None]: +            episodes = self._download_json( +                'https://public-api.viewster.com/series/%s/episodes' % entry_id, +                video_id, 'Downloading series JSON') +            entries = [ +                self.url_result( +                    'http://www.viewster.com/movie/%s' % episode['OriginId'], 'Viewster') +                for episode in episodes] +            title = (info.get('Title') or info['Synopsis']['Title']).strip() +            description = info.get('Synopsis', {}).get('Detailed') +            return self.playlist_result(entries, video_id, title, description) -            if entry: -                entry.update({ -                    'id': '%s-%s' % (video_id, clip['canonical_title']), -                    'title': '%s - %s' % (title, clip['title']), +        formats = [] +        for media_type in ('application/f4m+xml', 'application/x-mpegURL'): +            media = self._download_json( +                'https://public-api.viewster.com/movies/%s/video?mediaType=%s' +                % (entry_id, compat_urllib_parse.quote(media_type)), +                video_id, 'Downloading %s JSON' % media_type, fatal=False) +            if not media: +                continue +            video_url = media.get('Uri') +            if not video_url: +                continue +            ext = determine_ext(video_url) +            if ext == 'f4m': +                video_url += '&' if '?' in video_url else '?' +                video_url += 'hdcore=3.2.0&plugin=flowplayer-3.2.0.1' +                formats.extend(self._extract_f4m_formats( +                    video_url, video_id, f4m_id='hds')) +            elif ext == 'm3u8': +                formats.extend(self._extract_m3u8_formats( +                    video_url, video_id, 'mp4', m3u8_id='hls', +                    fatal=False  # m3u8 sometimes fail +                )) +            else: +                formats.append({ +                    'url': video_url,                  }) -                entries.append(entry) +        self._sort_formats(formats) -        playlist = self.playlist_result(entries, video_id, title, description) -        playlist['thumbnail'] = thumbnail -        return playlist +        synopsis = info.get('Synopsis', {}) +        # Prefer title outside synopsis since it's less messy +        title = (info.get('Title') or synopsis['Title']).strip() +        description = synopsis.get('Detailed') or info.get('Synopsis', {}).get('Short') +        duration = int_or_none(info.get('Duration')) +        timestamp = parse_iso8601(info.get('ReleaseDate')) + +        return { +            'id': video_id, +            'title': title, +            'description': description, +            'timestamp': timestamp, +            'duration': duration, +            'formats': formats, +        } diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index 51cdc6b65..ddbd395c8 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -88,6 +88,14 @@ class VikiBaseIE(InfoExtractor):          if not self._token:              self.report_warning('Unable to get session token, login has probably failed') +    @staticmethod +    def dict_selection(dict_obj, preferred_key): +        if preferred_key in dict_obj: +            return dict_obj.get(preferred_key) + +        filtered_dict = list(filter(None, [dict_obj.get(k) for k in dict_obj.keys()])) +        return filtered_dict[0] if filtered_dict else None +  class VikiIE(VikiBaseIE):      IE_NAME = 'viki' @@ -173,6 +181,19 @@ class VikiIE(VikiBaseIE):      }, {          'url': 'http://www.viki.com/player/44699v',          'only_matching': True, +    }, { +        # non-English description +        'url': 'http://www.viki.com/videos/158036v-love-in-magic', +        'md5': '1713ae35df5a521b31f6dc40730e7c9c', +        'info_dict': { +            'id': '158036v', +            'ext': 'mp4', +            'uploader': 'I Planet Entertainment', +            'upload_date': '20111122', +            'timestamp': 1321985454, +            'description': 'md5:44b1e46619df3a072294645c770cef36', +            'title': 'Love In Magic', +        },      }]      def _real_extract(self, url): @@ -181,19 +202,14 @@ class VikiIE(VikiBaseIE):          video = self._call_api(              'videos/%s.json' % video_id, video_id, 'Downloading video JSON') -        title = None -        titles = video.get('titles') -        if titles: -            title = titles.get('en') or titles[titles.keys()[0]] +        title = self.dict_selection(video.get('titles', {}), 'en')          if not title:              title = 'Episode %d' % video.get('number') if video.get('type') == 'episode' else video.get('id') or video_id -            container_titles = video.get('container', {}).get('titles') -            if container_titles: -                container_title = container_titles.get('en') or container_titles[container_titles.keys()[0]] -                title = '%s - %s' % (container_title, title) +            container_titles = video.get('container', {}).get('titles', {}) +            container_title = self.dict_selection(container_titles, 'en') +            title = '%s - %s' % (container_title, title) -        descriptions = video.get('descriptions') -        description = descriptions.get('en') or descriptions[titles.keys()[0]] if descriptions else None +        description = self.dict_selection(video.get('descriptions', {}), 'en')          duration = int_or_none(video.get('duration'))          timestamp = parse_iso8601(video.get('created_at')) @@ -242,8 +258,8 @@ class VikiIE(VikiBaseIE):          formats = []          for format_id, stream_dict in streams.items(): -            height = self._search_regex( -                r'^(\d+)[pP]$', format_id, 'height', default=None) +            height = int_or_none(self._search_regex( +                r'^(\d+)[pP]$', format_id, 'height', default=None))              for protocol, format_dict in stream_dict.items():                  if format_id == 'm3u8':                      formats = self._extract_m3u8_formats( @@ -299,11 +315,9 @@ class VikiChannelIE(VikiBaseIE):              'containers/%s.json' % channel_id, channel_id,              'Downloading channel JSON') -        titles = channel['titles'] -        title = titles.get('en') or titles[titles.keys()[0]] +        title = self.dict_selection(channel['titles'], 'en') -        descriptions = channel['descriptions'] -        description = descriptions.get('en') or descriptions[descriptions.keys()[0]] +        description = self.dict_selection(channel['descriptions'], 'en')          entries = []          for video_type in ('episodes', 'clips', 'movies'): diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 462d244d8..229fc3a0f 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -983,7 +983,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):                          video_id, note=False,                          errnote='unable to download video info webpage')                      get_video_info = compat_parse_qs(video_info_webpage) -                    add_dash_mpd(get_video_info) +                    if get_video_info.get('use_cipher_signature') != ['True']: +                        add_dash_mpd(get_video_info)                      if not video_info:                          video_info = get_video_info                      if 'token' in get_video_info: diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 85365d769..9016e3498 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -219,7 +219,7 @@ def parseOpts(overrideArguments=None):      selection.add_option(          '--playlist-items',          dest='playlist_items', metavar='ITEM_SPEC', default=None, -        help='Playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.') +        help='Playlist video items to download. Specify indices of the videos in the playlist separated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.')      selection.add_option(          '--match-title',          dest='matchtitle', metavar='REGEX', diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 942f76d24..ae813099d 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1309,10 +1309,10 @@ def parse_duration(s):      m = re.match(          r'''(?ix)(?:P?T)?          (?: -            (?P<only_mins>[0-9.]+)\s*(?:mins?|minutes?)\s*| +            (?P<only_mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*|              (?P<only_hours>[0-9.]+)\s*(?:hours?)| -            \s*(?P<hours_reversed>[0-9]+)\s*(?:[:h]|hours?)\s*(?P<mins_reversed>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*| +            \s*(?P<hours_reversed>[0-9]+)\s*(?:[:h]|hours?)\s*(?P<mins_reversed>[0-9]+)\s*(?:[:m]|mins?\.?|minutes?)\s*|              (?:                  (?:                      (?:(?P<days>[0-9]+)\s*(?:[:d]|days?)\s*)? diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 3ad7a2bc0..280afdd7f 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@  from __future__ import unicode_literals -__version__ = '2015.07.18' +__version__ = '2015.07.21' | 
