diff options
| -rw-r--r-- | docs/supportedsites.md | 4 | ||||
| -rwxr-xr-x | youtube_dl/YoutubeDL.py | 6 | ||||
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 7 | ||||
| -rw-r--r-- | youtube_dl/extractor/animeondemand.py | 160 | ||||
| -rw-r--r-- | youtube_dl/extractor/cbc.py | 113 | ||||
| -rw-r--r-- | youtube_dl/extractor/comcarcoff.py | 16 | ||||
| -rw-r--r-- | youtube_dl/extractor/comedycentral.py | 9 | ||||
| -rw-r--r-- | youtube_dl/extractor/common.py | 18 | ||||
| -rw-r--r-- | youtube_dl/extractor/crackle.py | 95 | ||||
| -rw-r--r-- | youtube_dl/extractor/fox.py | 5 | ||||
| -rw-r--r-- | youtube_dl/extractor/generic.py | 19 | ||||
| -rw-r--r-- | youtube_dl/extractor/hotstar.py | 12 | ||||
| -rw-r--r-- | youtube_dl/extractor/nbc.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/pbs.py | 61 | ||||
| -rw-r--r-- | youtube_dl/extractor/plays.py | 51 | ||||
| -rw-r--r-- | youtube_dl/extractor/theplatform.py | 5 | ||||
| -rw-r--r-- | youtube_dl/extractor/viddler.py | 50 | ||||
| -rw-r--r-- | youtube_dl/extractor/vimeo.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/youku.py | 3 | ||||
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 20 | ||||
| -rw-r--r-- | youtube_dl/utils.py | 2 | ||||
| -rw-r--r-- | youtube_dl/version.py | 2 | 
22 files changed, 591 insertions, 71 deletions
| diff --git a/docs/supportedsites.md b/docs/supportedsites.md index c48561eff..b384a3165 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -89,6 +89,8 @@   - **canalc2.tv**   - **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv   - **Canvas** + - **CBC** + - **CBCPlayer**   - **CBS**   - **CBSNews**: CBS News   - **CBSNewsLiveVideo**: CBS News Live Videos @@ -120,6 +122,7 @@   - **ComedyCentralShows**: The Daily Show / The Colbert Report   - **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED   - **Cracked** + - **Crackle**   - **Criterion**   - **CrooksAndLiars**   - **Crunchyroll** @@ -445,6 +448,7 @@   - **PlanetaPlay**   - **play.fm**   - **played.to** + - **PlaysTV**   - **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz   - **Playvid**   - **Playwire** diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 50228bb32..0072c7d35 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1288,6 +1288,9 @@ class YoutubeDL(object):              if format.get('format_id') is None:                  format['format_id'] = compat_str(i) +            else: +                # Sanitize format_id from characters used in format selector expression +                format['format_id'] = re.sub('[\s,/+\[\]()]', '_', format['format_id'])              format_id = format['format_id']              if format_id not in formats_dict:                  formats_dict[format_id] = [] @@ -1338,7 +1341,6 @@ class YoutubeDL(object):          if req_format is None:              req_format_list = []              if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and -                    info_dict['extractor'] in ['youtube', 'ted'] and                      not info_dict.get('is_live')):                  merger = FFmpegMergerPP(self)                  if merger.available and merger.can_merge(): @@ -1795,7 +1797,7 @@ class YoutubeDL(object):              else:                  res = '%sp' % format['height']          elif format.get('width') is not None: -            res = '?x%d' % format['width'] +            res = '%dx?' % format['width']          else:              res = default          return res diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 18951c287..6937f28d3 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -20,6 +20,7 @@ from .aftonbladet import AftonbladetIE  from .airmozilla import AirMozillaIE  from .aljazeera import AlJazeeraIE  from .alphaporno import AlphaPornoIE +from .animeondemand import AnimeOnDemandIE  from .anitube import AnitubeIE  from .anysex import AnySexIE  from .aol import AolIE @@ -89,6 +90,10 @@ from .camdemy import (  from .canalplus import CanalplusIE  from .canalc2 import Canalc2IE  from .canvas import CanvasIE +from .cbc import ( +    CBCIE, +    CBCPlayerIE, +)  from .cbs import CBSIE  from .cbsnews import (      CBSNewsIE, @@ -126,6 +131,7 @@ from .comcarcoff import ComCarCoffIE  from .commonmistakes import CommonMistakesIE, UnicodeBOMIE  from .condenast import CondeNastIE  from .cracked import CrackedIE +from .crackle import CrackleIE  from .criterion import CriterionIE  from .crooksandliars import CrooksAndLiarsIE  from .crunchyroll import ( @@ -533,6 +539,7 @@ from .planetaplay import PlanetaPlayIE  from .pladform import PladformIE  from .played import PlayedIE  from .playfm import PlayFMIE +from .plays import PlaysTVIE  from .playtvak import PlaytvakIE  from .playvid import PlayvidIE  from .playwire import PlaywireIE diff --git a/youtube_dl/extractor/animeondemand.py b/youtube_dl/extractor/animeondemand.py new file mode 100644 index 000000000..a7d8daf7b --- /dev/null +++ b/youtube_dl/extractor/animeondemand.py @@ -0,0 +1,160 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_urlparse +from ..utils import ( +    determine_ext, +    encode_dict, +    ExtractorError, +    sanitized_Request, +    urlencode_postdata, +) + + +class AnimeOnDemandIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?anime-on-demand\.de/anime/(?P<id>\d+)' +    _LOGIN_URL = 'https://www.anime-on-demand.de/users/sign_in' +    _APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply' +    _NETRC_MACHINE = 'animeondemand' +    _TEST = { +        'url': 'https://www.anime-on-demand.de/anime/161', +        'info_dict': { +            'id': '161', +            'title': 'Grimgar, Ashes and Illusions (OmU)', +            'description': 'md5:6681ce3c07c7189d255ac6ab23812d31', +        }, +        'playlist_mincount': 4, +    } + +    def _login(self): +        (username, password) = self._get_login_info() +        if username is None: +            return + +        login_page = self._download_webpage( +            self._LOGIN_URL, None, 'Downloading login page') + +        login_form = self._form_hidden_inputs('new_user', login_page) + +        login_form.update({ +            'user[login]': username, +            'user[password]': password, +        }) + +        post_url = self._search_regex( +            r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page, +            'post url', default=self._LOGIN_URL, group='url') + +        if not post_url.startswith('http'): +            post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url) + +        request = sanitized_Request( +            post_url, urlencode_postdata(encode_dict(login_form))) +        request.add_header('Referer', self._LOGIN_URL) + +        response = self._download_webpage( +            request, None, 'Logging in as %s' % username) + +        if all(p not in response for p in ('>Logout<', 'href="/users/sign_out"')): +            error = self._search_regex( +                r'<p class="alert alert-danger">(.+?)</p>', +                response, 'error', default=None) +            if error: +                raise ExtractorError('Unable to login: %s' % error, expected=True) +            raise ExtractorError('Unable to log in') + +    def _real_initialize(self): +        self._login() + +    def _real_extract(self, url): +        anime_id = self._match_id(url) + +        webpage = self._download_webpage(url, anime_id) + +        if 'data-playlist=' not in webpage: +            self._download_webpage( +                self._APPLY_HTML5_URL, anime_id, +                'Activating HTML5 beta', 'Unable to apply HTML5 beta') +            webpage = self._download_webpage(url, anime_id) + +        csrf_token = self._html_search_meta( +            'csrf-token', webpage, 'csrf token', fatal=True) + +        anime_title = self._html_search_regex( +            r'(?s)<h1[^>]+itemprop="name"[^>]*>(.+?)</h1>', +            webpage, 'anime name') +        anime_description = self._html_search_regex( +            r'(?s)<div[^>]+itemprop="description"[^>]*>(.+?)</div>', +            webpage, 'anime description', default=None) + +        entries = [] + +        for episode_html in re.findall(r'(?s)<h3[^>]+class="episodebox-title".+?>Episodeninhalt<', webpage): +            m = re.search( +                r'class="episodebox-title"[^>]+title="Episode (?P<number>\d+) - (?P<title>.+?)"', episode_html) +            if not m: +                continue + +            episode_number = int(m.group('number')) +            episode_title = m.group('title') +            video_id = 'episode-%d' % episode_number + +            common_info = { +                'id': video_id, +                'series': anime_title, +                'episode': episode_title, +                'episode_number': episode_number, +            } + +            formats = [] + +            playlist_url = self._search_regex( +                r'data-playlist=(["\'])(?P<url>.+?)\1', +                episode_html, 'data playlist', default=None, group='url') +            if playlist_url: +                request = sanitized_Request( +                    compat_urlparse.urljoin(url, playlist_url), +                    headers={ +                        'X-Requested-With': 'XMLHttpRequest', +                        'X-CSRF-Token': csrf_token, +                        'Referer': url, +                        'Accept': 'application/json, text/javascript, */*; q=0.01', +                    }) + +                playlist = self._download_json( +                    request, video_id, 'Downloading playlist JSON', fatal=False) +                if playlist: +                    playlist = playlist['playlist'][0] +                    title = playlist['title'] +                    description = playlist.get('description') +                    for source in playlist.get('sources', []): +                        file_ = source.get('file') +                        if file_ and determine_ext(file_) == 'm3u8': +                            formats = self._extract_m3u8_formats( +                                file_, video_id, 'mp4', +                                entry_protocol='m3u8_native', m3u8_id='hls') + +            if formats: +                f = common_info.copy() +                f.update({ +                    'title': title, +                    'description': description, +                    'formats': formats, +                }) +                entries.append(f) + +            m = re.search( +                r'data-dialog-header=(["\'])(?P<title>.+?)\1[^>]+href=(["\'])(?P<href>.+?)\3[^>]*>Teaser<', +                episode_html) +            if m: +                f = common_info.copy() +                f.update({ +                    'id': '%s-teaser' % f['id'], +                    'title': m.group('title'), +                    'url': compat_urlparse.urljoin(url, m.group('href')), +                }) +                entries.append(f) + +        return self.playlist_result(entries, anime_id, anime_title, anime_description) diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py new file mode 100644 index 000000000..d8aa31038 --- /dev/null +++ b/youtube_dl/extractor/cbc.py @@ -0,0 +1,113 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import js_to_json + + +class CBCIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?:[^/]+/)+(?P<id>[^/?#]+)' +    _TESTS = [{ +        # with mediaId +        'url': 'http://www.cbc.ca/22minutes/videos/clips-season-23/don-cherry-play-offs', +        'info_dict': { +            'id': '2682904050', +            'ext': 'flv', +            'title': 'Don Cherry – All-Stars', +            'description': 'Don Cherry has a bee in his bonnet about AHL player John Scott because that guy’s got heart.', +            'timestamp': 1454475540, +            'upload_date': '20160203', +        }, +        'params': { +            # rtmp download +            'skip_download': True, +        }, +    }, { +        # with clipId +        'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live', +        'info_dict': { +            'id': '2487345465', +            'ext': 'flv', +            'title': 'Robin Williams freestyles on 90 Minutes Live', +            'description': 'Wacky American comedian Robin Williams shows off his infamous "freestyle" comedic talents while being interviewed on CBC\'s 90 Minutes Live.', +            'upload_date': '19700101', +        }, +        'params': { +            # rtmp download +            'skip_download': True, +        }, +    }, { +        # multiple iframes +        'url': 'http://www.cbc.ca/natureofthings/blog/birds-eye-view-from-vancouvers-burrard-street-bridge-how-we-got-the-shot', +        'playlist': [{ +            'info_dict': { +                'id': '2680832926', +                'ext': 'flv', +                'title': 'An Eagle\'s-Eye View Off Burrard Bridge', +                'description': 'Hercules the eagle flies from Vancouver\'s Burrard Bridge down to a nearby park with a mini-camera strapped to his back.', +                'upload_date': '19700101', +            }, +        }, { +            'info_dict': { +                'id': '2658915080', +                'ext': 'flv', +                'title': 'Fly like an eagle!', +                'description': 'Eagle equipped with a mini camera flies from the world\'s tallest tower', +                'upload_date': '19700101', +            }, +        }], +        'params': { +            # rtmp download +            'skip_download': True, +        }, +    }] + +    @classmethod +    def suitable(cls, url): +        return False if CBCPlayerIE.suitable(url) else super(CBCIE, cls).suitable(url) + +    def _real_extract(self, url): +        display_id = self._match_id(url) +        webpage = self._download_webpage(url, display_id) +        player_init = self._search_regex( +            r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage, 'player init', +            default=None) +        if player_init: +            player_info = self._parse_json(player_init, display_id, js_to_json) +            media_id = player_info.get('mediaId') +            if not media_id: +                clip_id = player_info['clipId'] +                media_id = self._download_json( +                    'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id, +                    clip_id)['entries'][0]['id'].split('/')[-1] +            return self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) +        else: +            entries = [self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) for media_id in re.findall(r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', webpage)] +            return self.playlist_result(entries) + + +class CBCPlayerIE(InfoExtractor): +    _VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P<id>\d+)' +    _TEST = { +        'url': 'http://www.cbc.ca/player/play/2683190193', +        'info_dict': { +            'id': '2683190193', +            'ext': 'flv', +            'title': 'Gerry Runs a Sweat Shop', +            'description': 'md5:b457e1c01e8ff408d9d801c1c2cd29b0', +            'timestamp': 1455067800, +            'upload_date': '20160210', +        }, +        'params': { +            # rtmp download +            'skip_download': True, +        }, +    } + +    def _real_extract(self, url): +        video_id = self._match_id(url) +        return self.url_result( +            'http://feed.theplatform.com/f/ExhSPC/vms_5akSXx4Ng_Zn?byGuid=%s' % video_id, +            'ThePlatformFeed', video_id) diff --git a/youtube_dl/extractor/comcarcoff.py b/youtube_dl/extractor/comcarcoff.py index 2efa200b5..7dff68492 100644 --- a/youtube_dl/extractor/comcarcoff.py +++ b/youtube_dl/extractor/comcarcoff.py @@ -2,6 +2,7 @@  from __future__ import unicode_literals  from .common import InfoExtractor +from ..compat import compat_str  from ..utils import (      int_or_none,      parse_duration, @@ -14,14 +15,13 @@ class ComCarCoffIE(InfoExtractor):      _TESTS = [{          'url': 'http://comediansincarsgettingcoffee.com/miranda-sings-happy-thanksgiving-miranda/',          'info_dict': { -            'id': 'miranda-sings-happy-thanksgiving-miranda', +            'id': '2494164',              'ext': 'mp4',              'upload_date': '20141127',              'timestamp': 1417107600,              'duration': 1232,              'title': 'Happy Thanksgiving Miranda',              'description': 'Jerry Seinfeld and his special guest Miranda Sings cruise around town in search of coffee, complaining and apologizing along the way.', -            'thumbnail': 'http://ccc.crackle.com/images/s5e4_thumb.jpg',          },          'params': {              'skip_download': 'requires ffmpeg', @@ -39,15 +39,14 @@ class ComCarCoffIE(InfoExtractor):                  r'window\.app\s*=\s*({.+?});\n', webpage, 'full data json'),              display_id)['videoData'] -        video_id = full_data['activeVideo']['video'] -        video_data = full_data.get('videos', {}).get(video_id) or full_data['singleshots'][video_id] +        display_id = full_data['activeVideo']['video'] +        video_data = full_data.get('videos', {}).get(display_id) or full_data['singleshots'][display_id] +        video_id = compat_str(video_data['mediaId'])          thumbnails = [{              'url': video_data['images']['thumb'],          }, {              'url': video_data['images']['poster'],          }] -        formats = self._extract_m3u8_formats( -            video_data['mediaUrl'], video_id, ext='mp4')          timestamp = int_or_none(video_data.get('pubDateTime')) or parse_iso8601(              video_data.get('pubDate')) @@ -55,6 +54,8 @@ class ComCarCoffIE(InfoExtractor):              video_data.get('duration'))          return { +            '_type': 'url_transparent', +            'url': 'crackle:%s' % video_id,              'id': video_id,              'display_id': display_id,              'title': video_data['title'], @@ -62,6 +63,7 @@ class ComCarCoffIE(InfoExtractor):              'timestamp': timestamp,              'duration': duration,              'thumbnails': thumbnails, -            'formats': formats, +            'season_number': int_or_none(video_data.get('season')), +            'episode_number': int_or_none(video_data.get('episode')),              'webpage_url': 'http://comediansincarsgettingcoffee.com/%s' % (video_data.get('urlSlug', video_data.get('slug'))),          } diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index 3e4bd10b6..055c9eec5 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -16,11 +16,11 @@ from ..utils import (  class ComedyCentralIE(MTVServicesInfoExtractor):      _VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/ -        (video-clips|episodes|cc-studios|video-collections|full-episodes) +        (video-clips|episodes|cc-studios|video-collections|full-episodes|shows)          /(?P<title>.*)'''      _FEED_URL = 'http://comedycentral.com/feeds/mrss/' -    _TEST = { +    _TESTS = [{          'url': 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother',          'md5': 'c4f48e9eda1b16dd10add0744344b6d8',          'info_dict': { @@ -29,7 +29,10 @@ class ComedyCentralIE(MTVServicesInfoExtractor):              'title': 'CC:Stand-Up|Greg Fitzsimmons: Life on Stage|Uncensored - Too Good of a Mother',              'description': 'After a certain point, breastfeeding becomes c**kblocking.',          }, -    } +    }, { +        'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/interviews/6yx39d/exclusive-rand-paul-extended-interview', +        'only_matching': True, +    }]  class ComedyCentralShowsIE(MTVServicesInfoExtractor): diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 1143f6dbb..444d412d9 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1186,12 +1186,13 @@ class InfoExtractor(object):          http_count = 0          m3u8_count = 0 -        src_urls = [] +        srcs = []          videos = smil.findall(self._xpath_ns('.//video', namespace))          for video in videos:              src = video.get('src') -            if not src: +            if not src or src in srcs:                  continue +            srcs.append(src)              bitrate = float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)              filesize = int_or_none(video.get('size') or video.get('fileSize')) @@ -1223,9 +1224,7 @@ class InfoExtractor(object):                  continue              src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src) -            if src_url in src_urls: -                continue -            src_urls.append(src_url) +            src_url = src_url.strip()              if proto == 'm3u8' or src_ext == 'm3u8':                  m3u8_formats = self._extract_m3u8_formats( @@ -1436,12 +1435,16 @@ class InfoExtractor(object):                                  base_url = base_url_e.text + base_url                                  if re.match(r'^https?://', base_url):                                      break -                        if not re.match(r'^https?://', base_url): +                        if mpd_base_url and not re.match(r'^https?://', base_url): +                            if not mpd_base_url.endswith('/') and not base_url.startswith('/'): +                                mpd_base_url += '/'                              base_url = mpd_base_url + base_url                          representation_id = representation_attrib.get('id')                          lang = representation_attrib.get('lang') +                        url_el = representation.find(_add_ns('BaseURL')) +                        filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)                          f = { -                            'format_id': mpd_id or representation_id, +                            'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,                              'url': base_url,                              'width': int_or_none(representation_attrib.get('width')),                              'height': int_or_none(representation_attrib.get('height')), @@ -1452,6 +1455,7 @@ class InfoExtractor(object):                              'acodec': 'none' if content_type == 'video' else representation_attrib.get('codecs'),                              'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,                              'format_note': 'DASH %s' % content_type, +                            'filesize': filesize,                          }                          representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)                          if 'segment_urls' not in representation_ms_info and 'media_template' in representation_ms_info: diff --git a/youtube_dl/extractor/crackle.py b/youtube_dl/extractor/crackle.py new file mode 100644 index 000000000..79238cce7 --- /dev/null +++ b/youtube_dl/extractor/crackle.py @@ -0,0 +1,95 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import int_or_none + + +class CrackleIE(InfoExtractor): +    _VALID_URL = r'(?:crackle:|https?://(?:www\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)' +    _TEST = { +        'url': 'http://www.crackle.com/the-art-of-more/2496419', +        'info_dict': { +            'id': '2496419', +            'ext': 'mp4', +            'title': 'Heavy Lies the Head', +            'description': 'md5:bb56aa0708fe7b9a4861535f15c3abca', +        }, +        'params': { +            # m3u8 download +            'skip_download': True, +        } +    } + +    # extracted from http://legacyweb-us.crackle.com/flash/QueryReferrer.ashx +    _SUBTITLE_SERVER = 'http://web-us-az.crackle.com' +    _UPLYNK_OWNER_ID = 'e8773f7770a44dbd886eee4fca16a66b' +    _THUMBNAIL_TEMPLATE = 'http://images-us-am.crackle.com/%stnl_1920x1080.jpg?ts=20140107233116?c=635333335057637614' + +    # extracted from http://legacyweb-us.crackle.com/flash/ReferrerRedirect.ashx +    _MEDIA_FILE_SLOTS = { +        'c544.flv': { +            'width': 544, +            'height': 306, +        }, +        '360p.mp4': { +            'width': 640, +            'height': 360, +        }, +        '480p.mp4': { +            'width': 852, +            'height': 478, +        }, +        '480p_1mbps.mp4': { +            'width': 852, +            'height': 478, +        }, +    } + +    def _real_extract(self, url): +        video_id = self._match_id(url) +        item = self._download_xml( +            'http://legacyweb-us.crackle.com/app/revamp/vidwallcache.aspx?flags=-1&fm=%s' % video_id, +            video_id).find('i') +        title = item.attrib['t'] + +        thumbnail = None +        subtitles = {} +        formats = self._extract_m3u8_formats( +            'http://content.uplynk.com/ext/%s/%s.m3u8' % (self._UPLYNK_OWNER_ID, video_id), +            video_id, 'mp4', m3u8_id='hls', fatal=None) +        path = item.attrib.get('p') +        if path: +            thumbnail = self._THUMBNAIL_TEMPLATE % path +            http_base_url = 'http://ahttp.crackle.com/' + path +            for mfs_path, mfs_info in self._MEDIA_FILE_SLOTS.items(): +                formats.append({ +                    'url': http_base_url + mfs_path, +                    'format_id': 'http-' + mfs_path.split('.')[0], +                    'width': mfs_info['width'], +                    'height': mfs_info['height'], +                }) +            for cc in item.findall('cc'): +                locale = cc.attrib.get('l') +                v = cc.attrib.get('v') +                if locale and v: +                    if locale not in subtitles: +                        subtitles[locale] = [] +                    subtitles[locale] = [{ +                        'url': '%s/%s%s_%s.xml' % (self._SUBTITLE_SERVER, path, locale, v), +                        'ext': 'ttml', +                    }] +        self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id')) + +        return { +            'id': video_id, +            'title': title, +            'description': item.attrib.get('d'), +            'duration': int(item.attrib.get('r'), 16) if item.attrib.get('r') else None, +            'series': item.attrib.get('sn'), +            'season_number': int_or_none(item.attrib.get('se')), +            'episode_number': int_or_none(item.attrib.get('ep')), +            'thumbnail': thumbnail, +            'subtitles': subtitles, +            'formats': formats, +        } diff --git a/youtube_dl/extractor/fox.py b/youtube_dl/extractor/fox.py index 4a8acd53d..fa05af50d 100644 --- a/youtube_dl/extractor/fox.py +++ b/youtube_dl/extractor/fox.py @@ -9,6 +9,7 @@ class FOXIE(InfoExtractor):      _VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[0-9]+)'      _TEST = {          'url': 'http://www.fox.com/watch/255180355939/7684182528', +        'md5': 'ebd296fcc41dd4b19f8115d8461a3165',          'info_dict': {              'id': '255180355939',              'ext': 'mp4', @@ -17,10 +18,6 @@ class FOXIE(InfoExtractor):              'duration': 129,          },          'add_ie': ['ThePlatform'], -        'params': { -            # m3u8 download -            'skip_download': True, -        },      }      def _real_extract(self, url): diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index bf61ab2e7..45adbb7a3 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -224,6 +224,20 @@ class GenericIE(InfoExtractor):                  'skip_download': True,              },          }, +        # MPD from http://dash-mse-test.appspot.com/media.html +        { +            'url': 'http://yt-dash-mse-test.commondatastorage.googleapis.com/media/car-20120827-manifest.mpd', +            'md5': '4b57baab2e30d6eb3a6a09f0ba57ef53', +            'info_dict': { +                'id': 'car-20120827-manifest', +                'ext': 'mp4', +                'title': 'car-20120827-manifest', +                'formats': 'mincount:9', +            }, +            'params': { +                'format': 'bestvideo', +            }, +        },          # google redirect          {              'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE', @@ -1302,7 +1316,8 @@ class GenericIE(InfoExtractor):                  return {                      'id': video_id,                      'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]), -                    'formats': self._parse_mpd_formats(doc, video_id), +                    'formats': self._parse_mpd_formats( +                        doc, video_id, mpd_base_url=url.rpartition('/')[0]),                  }          except compat_xml_parse_error:              pass @@ -1413,7 +1428,7 @@ class GenericIE(InfoExtractor):          # Look for embedded Dailymotion player          matches = re.findall( -            r'<(?:embed|iframe)[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage) +            r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage)          if matches:              return _playlist_from_matches(                  matches, lambda m: unescapeHTML(m[1])) diff --git a/youtube_dl/extractor/hotstar.py b/youtube_dl/extractor/hotstar.py index a7c3ce4ab..f05d765d6 100644 --- a/youtube_dl/extractor/hotstar.py +++ b/youtube_dl/extractor/hotstar.py @@ -10,8 +10,8 @@ from ..utils import (  class HotStarIE(InfoExtractor): -    _VALID_URL = r'https?://(?:www\.)?hotstar\.com/.*?[/-](?P<id>\d{10})' -    _TEST = { +    _VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})' +    _TESTS = [{          'url': 'http://www.hotstar.com/on-air-with-aib--english-1000076273',          'info_dict': {              'id': '1000076273', @@ -26,7 +26,13 @@ class HotStarIE(InfoExtractor):              # m3u8 download              'skip_download': True,          } -    } +    }, { +        'url': 'http://www.hotstar.com/sports/cricket/rajitha-sizzles-on-debut-with-329/2001477583', +        'only_matching': True, +    }, { +        'url': 'http://www.hotstar.com/1000000515', +        'only_matching': True, +    }]      _GET_CONTENT_TEMPLATE = 'http://account.hotstar.com/AVS/besc?action=GetAggregatedContentDetails&channel=PCTV&contentId=%s'      _GET_CDN_TEMPLATE = 'http://getcdn.hotstar.com/AVS/besc?action=GetCDN&asJson=Y&channel=%s&id=%s&type=%s' diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 18d01f423..2202cfa33 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -57,7 +57,7 @@ class NBCIE(InfoExtractor):          {              # This video has expired but with an escaped embedURL              'url': 'http://www.nbc.com/parenthood/episode-guide/season-5/just-like-at-home/515', -            'skip': 'Expired' +            'only_matching': True,          }      ] diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 97e8ffc97..cca012953 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -4,10 +4,12 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor +from ..compat import compat_HTTPError  from ..utils import (      ExtractorError,      determine_ext,      int_or_none, +    js_to_json,      strip_jsonp,      unified_strdate,      US_RATINGS, @@ -199,7 +201,7 @@ class PBSIE(InfoExtractor):                  'id': '2365006249',                  'ext': 'mp4',                  'title': 'Constitution USA with Peter Sagal - A More Perfect Union', -                'description': 'md5:ba0c207295339c8d6eced00b7c363c6a', +                'description': 'md5:36f341ae62e251b8f5bd2b754b95a071',                  'duration': 3190,              },              'params': { @@ -213,7 +215,7 @@ class PBSIE(InfoExtractor):                  'id': '2365297690',                  'ext': 'mp4',                  'title': 'FRONTLINE - Losing Iraq', -                'description': 'md5:f5bfbefadf421e8bb8647602011caf8e', +                'description': 'md5:4d3eaa01f94e61b3e73704735f1196d9',                  'duration': 5050,              },              'params': { @@ -227,7 +229,7 @@ class PBSIE(InfoExtractor):                  'id': '2201174722',                  'ext': 'mp4',                  'title': 'PBS NewsHour - Cyber Schools Gain Popularity, but Quality Questions Persist', -                'description': 'md5:5871c15cba347c1b3d28ac47a73c7c28', +                'description': 'md5:95a19f568689d09a166dff9edada3301',                  'duration': 801,              },          }, @@ -237,8 +239,8 @@ class PBSIE(InfoExtractor):              'info_dict': {                  'id': '2365297708',                  'ext': 'mp4', -                'description': 'md5:68d87ef760660eb564455eb30ca464fe',                  'title': 'Great Performances - Dudamel Conducts Verdi Requiem at the Hollywood Bowl - Full', +                'description': 'md5:657897370e09e2bc6bf0f8d2cd313c6b',                  'duration': 6559,                  'thumbnail': 're:^https?://.*\.jpg$',              }, @@ -278,7 +280,7 @@ class PBSIE(InfoExtractor):                  'display_id': 'player',                  'ext': 'mp4',                  'title': 'American Experience - Death and the Civil War, Chapter 1', -                'description': 'American Experience, TV’s most-watched history series, brings to life the compelling stories from our past that inform our understanding of the world today.', +                'description': 'md5:1b80a74e0380ed2a4fb335026de1600d',                  'duration': 682,                  'thumbnail': 're:^https?://.*\.jpg$',              }, @@ -287,20 +289,19 @@ class PBSIE(InfoExtractor):              },          },          { -            'url': 'http://video.pbs.org/video/2365367186/', +            'url': 'http://www.pbs.org/video/2365245528/',              'info_dict': { -                'id': '2365367186', -                'display_id': '2365367186', +                'id': '2365245528', +                'display_id': '2365245528',                  'ext': 'mp4', -                'title': 'To Catch A Comet - Full Episode', -                'description': 'On November 12, 2014, billions of kilometers from Earth, spacecraft orbiter Rosetta and lander Philae did what no other had dared to attempt \u2014 land on the volatile surface of a comet as it zooms around the sun at 67,000 km/hr. The European Space Agency hopes this mission can help peer into our past and unlock secrets of our origins.', -                'duration': 3342, +                'title': 'FRONTLINE - United States of Secrets (Part One)', +                'description': 'md5:55756bd5c551519cc4b7703e373e217e', +                'duration': 6851,                  'thumbnail': 're:^https?://.*\.jpg$',              },              'params': {                  'skip_download': True,  # requires ffmpeg              }, -            'skip': 'Expired',          },          {              # Video embedded in iframe containing angle brackets as attribute's value (e.g. @@ -312,7 +313,7 @@ class PBSIE(InfoExtractor):                  'display_id': 'a-chefs-life-season-3-episode-5-prickly-business',                  'ext': 'mp4',                  'title': "A Chef's Life - Season 3, Ep. 5: Prickly Business", -                'description': 'md5:61db2ddf27c9912f09c241014b118ed1', +                'description': 'md5:54033c6baa1f9623607c6e2ed245888b',                  'duration': 1480,                  'thumbnail': 're:^https?://.*\.jpg$',              }, @@ -328,7 +329,7 @@ class PBSIE(InfoExtractor):                  'display_id': 'the-atomic-artists',                  'ext': 'mp4',                  'title': 'FRONTLINE - The Atomic Artists', -                'description': 'md5:f5bfbefadf421e8bb8647602011caf8e', +                'description': 'md5:1a2481e86b32b2e12ec1905dd473e2c1',                  'duration': 723,                  'thumbnail': 're:^https?://.*\.jpg$',              }, @@ -365,10 +366,14 @@ class PBSIE(InfoExtractor):                  webpage, 'upload date', default=None))              # tabbed frontline videos -            tabbed_videos = re.findall( -                r'<div[^>]+class="videotab[^"]*"[^>]+vid="(\d+)"', webpage) -            if tabbed_videos: -                return tabbed_videos, presumptive_id, upload_date +            MULTI_PART_REGEXES = ( +                r'<div[^>]+class="videotab[^"]*"[^>]+vid="(\d+)"', +                r'<a[^>]+href=["\']#video-\d+["\'][^>]+data-coveid=["\'](\d+)', +            ) +            for p in MULTI_PART_REGEXES: +                tabbed_videos = re.findall(p, webpage) +                if tabbed_videos: +                    return tabbed_videos, presumptive_id, upload_date              MEDIA_ID_REGEXES = [                  r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'",  # frontline video embed @@ -432,9 +437,21 @@ class PBSIE(InfoExtractor):                  for vid_id in video_id]              return self.playlist_result(entries, display_id) -        info = self._download_json( -            'http://player.pbs.org/videoInfo/%s?format=json&type=partner' % video_id, -            display_id) +        try: +            info = self._download_json( +                'http://player.pbs.org/videoInfo/%s?format=json&type=partner' % video_id, +                display_id, 'Downloading video info JSON') +        except ExtractorError as e: +            if not isinstance(e.cause, compat_HTTPError) or e.cause.code != 404: +                raise +            # videoInfo API may not work for some videos, fallback to portalplayer API +            player = self._download_webpage( +                'http://player.pbs.org/portalplayer/%s' % video_id, display_id) +            info = self._parse_json( +                self._search_regex( +                    r'(?s)PBS\.videoData\s*=\s*({.+?});\n', +                    player, 'video data', default='{}'), +                display_id, transform_source=js_to_json, fatal=False)          formats = []          for encoding_name in ('recommended_encoding', 'alternate_encoding'): @@ -493,7 +510,7 @@ class PBSIE(InfoExtractor):              'id': video_id,              'display_id': display_id,              'title': info['title'], -            'description': info['program'].get('description'), +            'description': info.get('description') or info.get('program', {}).get('description'),              'thumbnail': info.get('image_url'),              'duration': int_or_none(info.get('duration')),              'age_limit': age_limit, diff --git a/youtube_dl/extractor/plays.py b/youtube_dl/extractor/plays.py new file mode 100644 index 000000000..c3c38cf4a --- /dev/null +++ b/youtube_dl/extractor/plays.py @@ -0,0 +1,51 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import int_or_none + + +class PlaysTVIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?plays\.tv/video/(?P<id>[0-9a-f]{18})' +    _TEST = { +        'url': 'http://plays.tv/video/56af17f56c95335490/when-you-outplay-the-azir-wall', +        'md5': 'dfeac1198506652b5257a62762cec7bc', +        'info_dict': { +            'id': '56af17f56c95335490', +            'ext': 'mp4', +            'title': 'When you outplay the Azir wall', +            'description': 'Posted by Bjergsen', +        } +    } + +    def _real_extract(self, url): +        video_id = self._match_id(url) +        webpage = self._download_webpage(url, video_id) + +        title = self._og_search_title(webpage) +        content = self._parse_json( +            self._search_regex( +                r'R\.bindContent\(({.+?})\);', webpage, +                'content'), video_id)['content'] +        mpd_url, sources = re.search( +            r'(?s)<video[^>]+data-mpd="([^"]+)"[^>]*>(.+?)</video>', +            content).groups() +        formats = self._extract_mpd_formats( +            self._proto_relative_url(mpd_url), video_id, mpd_id='DASH') +        for format_id, height, format_url in re.findall(r'<source\s+res="((\d+)h?)"\s+src="([^"]+)"', sources): +            formats.append({ +                'url': self._proto_relative_url(format_url), +                'format_id': 'http-' + format_id, +                'height': int_or_none(height), +            }) +        self._sort_formats(formats) + +        return { +            'id': video_id, +            'title': title, +            'description': self._og_search_description(webpage), +            'thumbnail': self._og_search_thumbnail(webpage), +            'formats': formats, +        } diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 10f2cad55..755f816ff 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -20,7 +20,6 @@ from ..utils import (      int_or_none,      sanitized_Request,      unsmuggle_url, -    url_basename,      xpath_with_ns,  ) @@ -283,8 +282,8 @@ class ThePlatformFeedIE(ThePlatformBaseIE):          first_video_id = None          duration = None          for item in entry['media$content']: -            smil_url = item['plfile$url'] + '&format=SMIL&Tracking=true&Embedded=true&formats=MPEG4,F4M' -            cur_video_id = url_basename(smil_url) +            smil_url = item['plfile$url'] + '&format=SMIL&mbr=true' +            cur_video_id = ThePlatformIE._match_id(smil_url)              if first_video_id is None:                  first_video_id = cur_video_id                  duration = float_or_none(item.get('plfile$duration')) diff --git a/youtube_dl/extractor/viddler.py b/youtube_dl/extractor/viddler.py index 40ffbad2a..6bfbd4d85 100644 --- a/youtube_dl/extractor/viddler.py +++ b/youtube_dl/extractor/viddler.py @@ -1,6 +1,10 @@  from __future__ import unicode_literals  from .common import InfoExtractor +from ..compat import ( +    compat_urllib_parse, +    compat_urlparse, +)  from ..utils import (      float_or_none,      int_or_none, @@ -12,10 +16,10 @@ class ViddlerIE(InfoExtractor):      _VALID_URL = r'https?://(?:www\.)?viddler\.com/(?:v|embed|player)/(?P<id>[a-z0-9]+)'      _TESTS = [{          'url': 'http://www.viddler.com/v/43903784', -        'md5': 'ae43ad7cb59431ce043f0ff7fa13cbf4', +        'md5': '9eee21161d2c7f5b39690c3e325fab2f',          'info_dict': {              'id': '43903784', -            'ext': 'mp4', +            'ext': 'mov',              'title': 'Video Made Easy',              'description': 'md5:6a697ebd844ff3093bd2e82c37b409cd',              'uploader': 'viddler', @@ -29,10 +33,10 @@ class ViddlerIE(InfoExtractor):          }      }, {          'url': 'http://www.viddler.com/v/4d03aad9/', -        'md5': 'faa71fbf70c0bee7ab93076fd007f4b0', +        'md5': 'f12c5a7fa839c47a79363bfdf69404fb',          'info_dict': {              'id': '4d03aad9', -            'ext': 'mp4', +            'ext': 'ts',              'title': 'WALL-TO-GORTAT',              'upload_date': '20150126',              'uploader': 'deadspin', @@ -42,10 +46,10 @@ class ViddlerIE(InfoExtractor):          }      }, {          'url': 'http://www.viddler.com/player/221ebbbd/0/', -        'md5': '0defa2bd0ea613d14a6e9bd1db6be326', +        'md5': '740511f61d3d1bb71dc14a0fe01a1c10',          'info_dict': {              'id': '221ebbbd', -            'ext': 'mp4', +            'ext': 'mov',              'title': 'LETeens-Grammar-snack-third-conditional',              'description': ' ',              'upload_date': '20140929', @@ -54,16 +58,42 @@ class ViddlerIE(InfoExtractor):              'view_count': int,              'comment_count': int,          } +    }, { +        # secret protected +        'url': 'http://www.viddler.com/v/890c0985?secret=34051570', +        'info_dict': { +            'id': '890c0985', +            'ext': 'mp4', +            'title': 'Complete Property Training - Traineeships', +            'description': ' ', +            'upload_date': '20130606', +            'uploader': 'TiffanyBowtell', +            'timestamp': 1370496993, +            'view_count': int, +            'comment_count': int, +        }, +        'params': { +            'skip_download': True, +        },      }]      def _real_extract(self, url):          video_id = self._match_id(url) -        json_url = ( -            'http://api.viddler.com/api/v2/viddler.videos.getPlaybackDetails.json?video_id=%s&key=v0vhrt7bg2xq1vyxhkct' % -            video_id) +        query = { +            'video_id': video_id, +            'key': 'v0vhrt7bg2xq1vyxhkct', +        } + +        qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) +        secret = qs.get('secret', [None])[0] +        if secret: +            query['secret'] = secret +          headers = {'Referer': 'http://static.cdn-ec.viddler.com/js/arpeggio/v2/embed.html'} -        request = sanitized_Request(json_url, None, headers) +        request = sanitized_Request( +            'http://api.viddler.com/api/v2/viddler.videos.getPlaybackDetails.json?%s' +            % compat_urllib_parse.urlencode(query), None, headers)          data = self._download_json(request, video_id)['video']          formats = [] diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 2389e7f0f..6a8f9b49d 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -57,7 +57,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):      def _extract_xsrft_and_vuid(self, webpage):          xsrft = self._search_regex( -            r'xsrft\s*[=:]\s*(?P<q>["\'])(?P<xsrft>.+?)(?P=q)', +            r'(?:(?P<q1>["\'])xsrft(?P=q1)\s*:|xsrft\s*[=:])\s*(?P<q>["\'])(?P<xsrft>.+?)(?P=q)',              webpage, 'login token', group='xsrft')          vuid = self._search_regex(              r'["\']vuid["\']\s*:\s*(["\'])(?P<vuid>.+?)\1', diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index f767fa15f..49687371a 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -229,6 +229,9 @@ class YoukuIE(InfoExtractor):              if error_note is not None and '因版权原因无法观看此视频' in error_note:                  raise ExtractorError(                      'Youku said: Sorry, this video is available in China only', expected=True) +            elif error_note and '该视频被设为私密' in error_note: +                raise ExtractorError( +                    'Youku said: Sorry, this video is private', expected=True)              else:                  msg = 'Youku server reported error %i' % error.get('code')                  if error_note is not None: diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index b9a91dea2..18f7d37f4 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -375,7 +375,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):      IE_NAME = 'youtube'      _TESTS = [          { -            'url': 'http://www.youtube.com/watch?v=BaW_jenozKcj&t=1s&end=9', +            'url': 'http://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',              'info_dict': {                  'id': 'BaW_jenozKc',                  'ext': 'mp4', @@ -441,7 +441,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):              }          },          { -            'url': 'http://www.youtube.com/watch?v=BaW_jenozKcj&v=UxxajLWwzqY', +            'url': 'http://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',              'note': 'Use the first video ID in the URL',              'info_dict': {                  'id': 'BaW_jenozKc', @@ -705,6 +705,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):              },          },          { +            # Multifeed video with comma in title (see https://github.com/rg3/youtube-dl/issues/8536) +            'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo', +            'info_dict': { +                'id': 'gVfLd0zydlo', +                'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30', +            }, +            'playlist_count': 2, +        }, +        {              'url': 'http://vid.plus/FlRa-iH7PGw',              'only_matching': True,          }, @@ -1196,9 +1205,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):              if not self._downloader.params.get('noplaylist'):                  entries = []                  feed_ids = [] -                multifeed_metadata_list = compat_urllib_parse_unquote_plus(video_info['multifeed_metadata_list'][0]) +                multifeed_metadata_list = video_info['multifeed_metadata_list'][0]                  for feed in multifeed_metadata_list.split(','): -                    feed_data = compat_parse_qs(feed) +                    # Unquote should take place before split on comma (,) since textual +                    # fields may contain comma as well (see +                    # https://github.com/rg3/youtube-dl/issues/8536) +                    feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))                      entries.append({                          '_type': 'url_transparent',                          'ie_key': 'Youtube', diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index dce67de8a..a82a262a0 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -56,7 +56,7 @@ from .compat import (  compiled_regex_type = type(re.compile(''))  std_headers = { -    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)', +    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/44.0 (Chrome)',      'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',      'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',      'Accept-Encoding': 'gzip, deflate', diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 53b210290..9aca8001a 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@  from __future__ import unicode_literals -__version__ = '2016.02.09' +__version__ = '2016.02.13' | 
