diff options
Diffstat (limited to 'youtube_dl')
33 files changed, 145 insertions, 91 deletions
diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index e13cf547d..e78169a0d 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -29,7 +29,17 @@ class ExternalFD(FileDownloader): self.report_destination(filename) tmpfilename = self.temp_name(filename) - retval = self._call_downloader(tmpfilename, info_dict) + try: + retval = self._call_downloader(tmpfilename, info_dict) + except KeyboardInterrupt: + if not info_dict.get('is_live'): + raise + # Live stream downloading cancellation should be considered as + # correct and expected termination thus all postprocessing + # should take place + retval = 0 + self.to_screen('[%s] Interrupted by user' % self.get_basename()) + if retval == 0: fsize = os.path.getsize(encodeFilename(tmpfilename)) self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize)) diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py index bb9e82578..d529ae09a 100644 --- a/youtube_dl/downloader/fragment.py +++ b/youtube_dl/downloader/fragment.py @@ -155,8 +155,6 @@ class FragmentFD(FileDownloader): self._write_ytdl_file(ctx) if ctx['fragment_index'] > 0: assert resume_len > 0 - else: - assert resume_len == 0 dest_stream, tmpfilename = sanitize_open(tmpfilename, open_mode) diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index 100cf997f..7da96c65c 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -1308,6 +1308,12 @@ class AdobePassIE(InfoExtractor): _USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0' _MVPD_CACHE = 'ap-mvpd' + def _download_webpage_handle(self, *args, **kwargs): + headers = kwargs.get('headers', {}) + headers.update(self.geo_verification_headers()) + kwargs['headers'] = headers + return super(AdobePassIE, self)._download_webpage_handle(*args, **kwargs) + @staticmethod def _get_mvpd_resource(provider_id, title, guid, rating): channel = etree.Element('channel') diff --git a/youtube_dl/extractor/aenetworks.py b/youtube_dl/extractor/aenetworks.py index c01c67303..2dcdba9d2 100644 --- a/youtube_dl/extractor/aenetworks.py +++ b/youtube_dl/extractor/aenetworks.py @@ -101,10 +101,14 @@ class AENetworksIE(AENetworksBaseIE): for season_url_path in re.findall(r'(?s)<li[^>]+data-href="(/shows/%s/season-\d+)"' % url_parts[0], webpage): entries.append(self.url_result( compat_urlparse.urljoin(url, season_url_path), 'AENetworks')) - return self.playlist_result( - entries, self._html_search_meta('aetn:SeriesId', webpage), - self._html_search_meta('aetn:SeriesTitle', webpage)) - elif url_parts_len == 2: + if entries: + return self.playlist_result( + entries, self._html_search_meta('aetn:SeriesId', webpage), + self._html_search_meta('aetn:SeriesTitle', webpage)) + else: + # single season + url_parts_len = 2 + if url_parts_len == 2: entries = [] for episode_item in re.findall(r'(?s)<[^>]+class="[^"]*(?:episode|program)-item[^"]*"[^>]*>', webpage): episode_attributes = extract_attributes(episode_item) @@ -112,7 +116,7 @@ class AENetworksIE(AENetworksBaseIE): url, episode_attributes['data-canonical']) entries.append(self.url_result( episode_url, 'AENetworks', - episode_attributes['data-videoid'])) + episode_attributes.get('data-videoid') or episode_attributes.get('data-video-id'))) return self.playlist_result( entries, self._html_search_meta('aetn:SeasonId', webpage)) diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py index 78d29c861..c8cb91dcb 100644 --- a/youtube_dl/extractor/afreecatv.py +++ b/youtube_dl/extractor/afreecatv.py @@ -207,11 +207,10 @@ class AfreecaTVIE(InfoExtractor): file_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', note='Downloading part %d m3u8 information' % file_num) - title = title if one else '%s (part %d)' % (title, file_num) file_info = common_entry.copy() file_info.update({ 'id': format_id, - 'title': title, + 'title': title if one else '%s (part %d)' % (title, file_num), 'upload_date': upload_date, 'duration': file_duration, 'formats': formats, diff --git a/youtube_dl/extractor/anvato.py b/youtube_dl/extractor/anvato.py index 623f44dce..9fd91c2f6 100644 --- a/youtube_dl/extractor/anvato.py +++ b/youtube_dl/extractor/anvato.py @@ -178,12 +178,7 @@ class AnvatoIE(InfoExtractor): } if ext == 'm3u8' or media_format in ('m3u8', 'm3u8-variant'): - # Not using _extract_m3u8_formats here as individual media - # playlists are also included in published_urls. - if tbr is None: - formats.append(self._m3u8_meta_format(video_url, ext='mp4', m3u8_id='hls')) - continue - else: + if tbr is not None: a_format.update({ 'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])), 'ext': 'mp4', diff --git a/youtube_dl/extractor/appleconnect.py b/youtube_dl/extractor/appleconnect.py index ea7a70393..a84b8b1eb 100644 --- a/youtube_dl/extractor/appleconnect.py +++ b/youtube_dl/extractor/appleconnect.py @@ -12,13 +12,13 @@ class AppleConnectIE(InfoExtractor): _VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/idsa\.(?P<id>[\w-]+)' _TEST = { 'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3', - 'md5': '10d0f2799111df4cb1c924520ca78f98', + 'md5': 'e7c38568a01ea45402570e6029206723', 'info_dict': { 'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3', 'ext': 'm4v', 'title': 'Energy', 'uploader': 'Drake', - 'thumbnail': 'http://is5.mzstatic.com/image/thumb/Video5/v4/78/61/c5/7861c5fa-ad6d-294b-1464-cf7605b911d6/source/1920x1080sr.jpg', + 'thumbnail': r're:^https?://.*\.jpg$', 'upload_date': '20150710', 'timestamp': 1436545535, }, diff --git a/youtube_dl/extractor/archiveorg.py b/youtube_dl/extractor/archiveorg.py index e21045bed..3c7d7250b 100644 --- a/youtube_dl/extractor/archiveorg.py +++ b/youtube_dl/extractor/archiveorg.py @@ -24,12 +24,12 @@ class ArchiveOrgIE(InfoExtractor): } }, { 'url': 'https://archive.org/details/Cops1922', - 'md5': 'bc73c8ab3838b5a8fc6c6651fa7b58ba', + 'md5': '0869000b4ce265e8ca62738b336b268a', 'info_dict': { 'id': 'Cops1922', 'ext': 'mp4', 'title': 'Buster Keaton\'s "Cops" (1922)', - 'description': 'md5:b4544662605877edd99df22f9620d858', + 'description': 'md5:89e7c77bf5d965dd5c0372cfb49470f6', } }, { 'url': 'http://archive.org/embed/XD300-23_68HighlightsAResearchCntAugHumanIntellect', diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index 99af6dc5a..01fa308ff 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -36,7 +36,7 @@ class AtresPlayerIE(InfoExtractor): }, { 'url': 'http://www.atresplayer.com/television/especial/videoencuentros/temporada-1/capitulo-112-david-bustamante_2014121600375.html', - 'md5': '0d0e918533bbd4b263f2de4d197d4aac', + 'md5': '6e52cbb513c405e403dbacb7aacf8747', 'info_dict': { 'id': 'capitulo-112-david-bustamante', 'ext': 'flv', diff --git a/youtube_dl/extractor/audioboom.py b/youtube_dl/extractor/audioboom.py index 8fc5f65c6..e48bb8972 100644 --- a/youtube_dl/extractor/audioboom.py +++ b/youtube_dl/extractor/audioboom.py @@ -16,7 +16,7 @@ class AudioBoomIE(InfoExtractor): 'title': '3/09/2016 Czaban Hour 3', 'description': 'Guest: Nate Davis - NFL free agency, Guest: Stan Gans', 'duration': 2245.72, - 'uploader': 'Steve Czaban', + 'uploader': 'SB Nation A.M.', 'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/steveczabanyahoosportsradio', } }, { diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 056e06376..df2972f26 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -34,12 +34,12 @@ class BandcampIE(InfoExtractor): '_skip': 'There is a limit of 200 free downloads / month for the test song' }, { 'url': 'http://benprunty.bandcamp.com/track/lanius-battle', - 'md5': '73d0b3171568232574e45652f8720b5c', + 'md5': '0369ace6b939f0927e62c67a1a8d9fa7', 'info_dict': { 'id': '2650410135', - 'ext': 'mp3', - 'title': 'Lanius (Battle)', - 'uploader': 'Ben Prunty Music', + 'ext': 'aiff', + 'title': 'Ben Prunty - Lanius (Battle)', + 'uploader': 'Ben Prunty', }, }] diff --git a/youtube_dl/extractor/beeg.py b/youtube_dl/extractor/beeg.py index b0b7914d8..d5c5822f2 100644 --- a/youtube_dl/extractor/beeg.py +++ b/youtube_dl/extractor/beeg.py @@ -16,7 +16,7 @@ class BeegIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?beeg\.com/(?P<id>\d+)' _TEST = { 'url': 'http://beeg.com/5416503', - 'md5': '46c384def73b33dbc581262e5ee67cef', + 'md5': 'a1a1b1a8bc70a89e49ccfd113aed0820', 'info_dict': { 'id': '5416503', 'ext': 'mp4', diff --git a/youtube_dl/extractor/bleacherreport.py b/youtube_dl/extractor/bleacherreport.py index 7a8e1f60b..e829974ff 100644 --- a/youtube_dl/extractor/bleacherreport.py +++ b/youtube_dl/extractor/bleacherreport.py @@ -35,7 +35,7 @@ class BleacherReportIE(InfoExtractor): 'title': 'Aussie Golfers Get Fright of Their Lives After Being Chased by Angry Kangaroo', 'timestamp': 1446839961, 'uploader': 'Sean Fay', - 'description': 'md5:825e94e0f3521df52fa83b2ed198fa20', + 'description': 'md5:b1601e2314c4d8eec23b6eafe086a757', 'uploader_id': 6466954, 'upload_date': '20151011', }, @@ -90,17 +90,13 @@ class BleacherReportCMSIE(AMPIE): _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36})' _TESTS = [{ 'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1', - 'md5': '8c2c12e3af7805152675446c905d159b', + 'md5': '2e4b0a997f9228ffa31fada5c53d1ed1', 'info_dict': { 'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1', - 'ext': 'mp4', + 'ext': 'flv', 'title': 'Cena vs. Rollins Would Expose the Heavyweight Division', 'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e', }, - 'params': { - # m3u8 download - 'skip_download': True, - }, }] def _real_extract(self, url): diff --git a/youtube_dl/extractor/br.py b/youtube_dl/extractor/br.py index ff0aa11b1..2c32b6ae2 100644 --- a/youtube_dl/extractor/br.py +++ b/youtube_dl/extractor/br.py @@ -77,7 +77,7 @@ class BRIE(InfoExtractor): 'description': 'md5:bb659990e9e59905c3d41e369db1fbe3', 'duration': 893, 'uploader': 'Eva Maria Steimle', - 'upload_date': '20140117', + 'upload_date': '20170208', } }, ] diff --git a/youtube_dl/extractor/canalc2.py b/youtube_dl/extractor/canalc2.py index f1f128c45..acd87e371 100644 --- a/youtube_dl/extractor/canalc2.py +++ b/youtube_dl/extractor/canalc2.py @@ -16,13 +16,10 @@ class Canalc2IE(InfoExtractor): 'md5': '060158428b650f896c542dfbb3d6487f', 'info_dict': { 'id': '12163', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Terrasses du Numérique', 'duration': 122, }, - 'params': { - 'skip_download': True, # Requires rtmpdump - } }, { 'url': 'http://archives-canalc2.u-strasbg.fr/video.asp?idVideo=11427&voir=oui', 'only_matching': True, diff --git a/youtube_dl/extractor/cbslocal.py b/youtube_dl/extractor/cbslocal.py index 8d5f11dd1..7d78e3aae 100644 --- a/youtube_dl/extractor/cbslocal.py +++ b/youtube_dl/extractor/cbslocal.py @@ -60,8 +60,8 @@ class CBSLocalIE(AnvatoIE): 'title': 'A Very Blue Anniversary', 'description': 'CBS2’s Cindy Hsu has more.', 'thumbnail': 're:^https?://.*', - 'timestamp': 1479962220, - 'upload_date': '20161124', + 'timestamp': int, + 'upload_date': r're:^\d{8}$', 'uploader': 'CBS', 'subtitles': { 'en': 'mincount:5', diff --git a/youtube_dl/extractor/clipfish.py b/youtube_dl/extractor/clipfish.py index bb52e0c6f..0920f6219 100644 --- a/youtube_dl/extractor/clipfish.py +++ b/youtube_dl/extractor/clipfish.py @@ -12,7 +12,7 @@ class ClipfishIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?clipfish\.de/(?:[^/]+/)+video/(?P<id>[0-9]+)' _TEST = { 'url': 'http://www.clipfish.de/special/ugly-americans/video/4343170/s01-e01-ugly-americans-date-in-der-hoelle/', - 'md5': '720563e467b86374c194bdead08d207d', + 'md5': 'b9a5dc46294154c1193e2d10e0c95693', 'info_dict': { 'id': '4343170', 'ext': 'mp4', diff --git a/youtube_dl/extractor/collegerama.py b/youtube_dl/extractor/collegerama.py index 18c734766..6a41db87c 100644 --- a/youtube_dl/extractor/collegerama.py +++ b/youtube_dl/extractor/collegerama.py @@ -21,7 +21,7 @@ class CollegeRamaIE(InfoExtractor): 'ext': 'mp4', 'title': 'Een nieuwe wereld: waarden, bewustzijn en techniek van de mensheid 2.0.', 'description': '', - 'thumbnail': r're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg(?:\?.*?)?$', 'duration': 7713.088, 'timestamp': 1413309600, 'upload_date': '20141014', @@ -35,6 +35,7 @@ class CollegeRamaIE(InfoExtractor): 'ext': 'wmv', 'title': '64ste Vakantiecursus: Afvalwater', 'description': 'md5:7fd774865cc69d972f542b157c328305', + 'thumbnail': r're:^https?://.*\.jpg(?:\?.*?)?$', 'duration': 10853, 'timestamp': 1326446400, 'upload_date': '20120113', diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 8b3f04c61..2cb55d6af 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1334,7 +1334,7 @@ class InfoExtractor(object): if '#EXT-X-FAXS-CM:' in m3u8_doc: # Adobe Flash Access return [] - formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)] + formats = [] format_url = lambda u: ( u @@ -1386,6 +1386,7 @@ class InfoExtractor(object): f = { 'format_id': '-'.join(format_id), 'url': format_url(media_url), + 'manifest_url': m3u8_url, 'language': media.get('LANGUAGE'), 'ext': ext, 'protocol': entry_protocol, @@ -1438,7 +1439,7 @@ class InfoExtractor(object): f = { 'format_id': '-'.join(format_id), 'url': manifest_url, - 'manifest_url': manifest_url, + 'manifest_url': m3u8_url, 'tbr': tbr, 'ext': ext, 'fps': float_or_none(last_stream_inf.get('FRAME-RATE')), diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py index 81c0ce9a3..49409369c 100644 --- a/youtube_dl/extractor/funnyordie.py +++ b/youtube_dl/extractor/funnyordie.py @@ -58,8 +58,7 @@ class FunnyOrDieIE(InfoExtractor): m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) source_formats = list(filter( - lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple', - m3u8_formats)) + lambda f: f.get('vcodec') != 'none', m3u8_formats)) bitrates = [int(bitrate) for bitrate in re.findall(r'[,/]v(\d+)(?=[,/])', m3u8_url)] bitrates.sort() diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py index 682c49e79..00d311158 100644 --- a/youtube_dl/extractor/gamespot.py +++ b/youtube_dl/extractor/gamespot.py @@ -78,8 +78,7 @@ class GameSpotIE(OnceIE): if m3u8_formats: self._sort_formats(m3u8_formats) m3u8_formats = list(filter( - lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple', - m3u8_formats)) + lambda f: f.get('vcodec') != 'none', m3u8_formats)) if len(qualities) == len(m3u8_formats): for q, m3u8_format in zip(qualities, m3u8_formats): f = m3u8_format.copy() diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py index 4c9be47b4..9c7b1bd37 100644 --- a/youtube_dl/extractor/go.py +++ b/youtube_dl/extractor/go.py @@ -36,22 +36,26 @@ class GoIE(AdobePassIE): 'requestor_id': 'DisneyXD', } } - _VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P<id>\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys()) + _VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:(?:[^/]+/)*(?P<id>vdka\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys()) _TESTS = [{ - 'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx', + 'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643', 'info_dict': { - 'id': '0_g86w5onx', + 'id': 'VDKA3807643', 'ext': 'mp4', - 'title': 'Sneak Peek: Language Arts', - 'description': 'md5:7dcdab3b2d17e5217c953256af964e9c', + 'title': 'The Traitor in the White House', + 'description': 'md5:05b009d2d145a1e85d25111bd37222e8', }, 'params': { # m3u8 download 'skip_download': True, }, }, { - 'url': 'http://abc.go.com/shows/after-paradise/video/most-recent/vdka3335601', - 'only_matching': True, + 'url': 'http://watchdisneyxd.go.com/doraemon', + 'info_dict': { + 'title': 'Doraemon', + 'id': 'SH55574025', + }, + 'playlist_mincount': 51, }, { 'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding', 'only_matching': True, @@ -60,19 +64,36 @@ class GoIE(AdobePassIE): 'only_matching': True, }] + def _extract_videos(self, brand, video_id='-1', show_id='-1'): + display_id = video_id if video_id != '-1' else show_id + return self._download_json( + 'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/%s/-1/%s/-1/-1.json' % (brand, show_id, video_id), + display_id)['video'] + def _real_extract(self, url): sub_domain, video_id, display_id = re.match(self._VALID_URL, url).groups() + site_info = self._SITE_INFO[sub_domain] + brand = site_info['brand'] if not video_id: webpage = self._download_webpage(url, display_id) video_id = self._search_regex( # There may be inner quotes, e.g. data-video-id="'VDKA3609139'" # from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood - r'data-video-id=["\']*VDKA(\w+)', webpage, 'video id') - site_info = self._SITE_INFO[sub_domain] - brand = site_info['brand'] - video_data = self._download_json( - 'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/-1/-1/%s/-1/-1.json' % (brand, video_id), - video_id)['video'][0] + r'data-video-id=["\']*(VDKA\w+)', webpage, 'video id', default=None) + if not video_id: + # show extraction works for Disney, DisneyJunior and DisneyXD + # ABC and Freeform has different layout + show_id = self._search_regex(r'data-show-id=["\']*(SH\d+)', webpage, 'show id') + videos = self._extract_videos(brand, show_id=show_id) + show_title = self._search_regex(r'data-show-title="([^"]+)"', webpage, 'show title', fatal=False) + entries = [] + for video in videos: + entries.append(self.url_result( + video['url'], 'Go', video.get('id'), video.get('title'))) + entries.reverse() + return self.playlist_result(entries, show_id, show_title) + video_data = self._extract_videos(brand, video_id)[0] + video_id = video_data['id'] title = video_data['title'] formats = [] @@ -105,7 +126,7 @@ class GoIE(AdobePassIE): self._initialize_geo_bypass(['US']) entitlement = self._download_json( 'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json', - video_id, data=urlencode_postdata(data), headers=self.geo_verification_headers()) + video_id, data=urlencode_postdata(data)) errors = entitlement.get('errors', {}).get('errors', []) if errors: for error in errors: diff --git a/youtube_dl/extractor/lego.py b/youtube_dl/extractor/lego.py index d3bca6435..b312e77f1 100644 --- a/youtube_dl/extractor/lego.py +++ b/youtube_dl/extractor/lego.py @@ -86,7 +86,7 @@ class LEGOIE(InfoExtractor): formats = self._extract_akamai_formats( '%si/s/public/%s_,%s,.mp4.csmil/master.m3u8' % (streaming_base, path, streaming_path), video_id) m3u8_formats = list(filter( - lambda f: f.get('protocol') == 'm3u8_native' and f.get('vcodec') != 'none' and f.get('resolution') != 'multiple', + lambda f: f.get('protocol') == 'm3u8_native' and f.get('vcodec') != 'none', formats)) if len(m3u8_formats) == len(self._BITRATES): self._sort_formats(m3u8_formats) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 3e51b4dd7..6166dc2ad 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -515,7 +515,7 @@ class PBSIE(InfoExtractor): http_url = format_url self._remove_duplicate_formats(formats) m3u8_formats = list(filter( - lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none' and f.get('resolution') != 'multiple', + lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none', formats)) if http_url: for m3u8_format in m3u8_formats: diff --git a/youtube_dl/extractor/r7.py b/youtube_dl/extractor/r7.py index ed38c77eb..e2202d603 100644 --- a/youtube_dl/extractor/r7.py +++ b/youtube_dl/extractor/r7.py @@ -62,8 +62,7 @@ class R7IE(InfoExtractor): # m3u8 format always matches the http format, let's copy metadata from # one to another m3u8_formats = list(filter( - lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple', - formats)) + lambda f: f.get('vcodec') != 'none', formats)) if len(m3u8_formats) == 1: f_copy = m3u8_formats[0].copy() f_copy.update(f) diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index 1b1afab32..3f3c681ae 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -210,7 +210,7 @@ class TEDIE(InfoExtractor): resources.get('stream'), video_name, 'mp4', m3u8_id=format_id, fatal=False)) m3u8_formats = list(filter( - lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none' and f.get('resolution') != 'multiple', + lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none', formats)) if http_url: for m3u8_format in m3u8_formats: diff --git a/youtube_dl/extractor/tvp.py b/youtube_dl/extractor/tvp.py index 06ea2b40a..c5b3288ad 100644 --- a/youtube_dl/extractor/tvp.py +++ b/youtube_dl/extractor/tvp.py @@ -150,8 +150,7 @@ class TVPEmbedIE(InfoExtractor): 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) self._sort_formats(m3u8_formats) m3u8_formats = list(filter( - lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple', - m3u8_formats)) + lambda f: f.get('vcodec') != 'none', m3u8_formats)) formats.extend(m3u8_formats) for i, m3u8_format in enumerate(m3u8_formats, 2): http_url = '%s-%d.mp4' % (video_url_base, i) diff --git a/youtube_dl/extractor/tvplayer.py b/youtube_dl/extractor/tvplayer.py index b6537141a..ebde6053f 100644 --- a/youtube_dl/extractor/tvplayer.py +++ b/youtube_dl/extractor/tvplayer.py @@ -2,9 +2,13 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_HTTPError +from ..compat import ( + compat_HTTPError, + compat_str, +) from ..utils import ( extract_attributes, + try_get, urlencode_postdata, ExtractorError, ) @@ -34,25 +38,32 @@ class TVPlayerIE(InfoExtractor): webpage, 'channel element')) title = current_channel['data-name'] - resource_id = self._search_regex( - r'resourceId\s*=\s*"(\d+)"', webpage, 'resource id') - platform = self._search_regex( - r'platform\s*=\s*"([^"]+)"', webpage, 'platform') + resource_id = current_channel['data-id'] + token = self._search_regex( - r'token\s*=\s*"([^"]+)"', webpage, 'token', default='null') - validate = self._search_regex( - r'validate\s*=\s*"([^"]+)"', webpage, 'validate', default='null') + r'data-token=(["\'])(?P<token>(?!\1).+)\1', webpage, + 'token', group='token') + + context = self._download_json( + 'https://tvplayer.com/watch/context', display_id, + 'Downloading JSON context', query={ + 'resource': resource_id, + 'nonce': token, + }) + + validate = context['validate'] + platform = try_get( + context, lambda x: x['platform']['key'], compat_str) or 'firefox' try: response = self._download_json( 'http://api.tvplayer.com/api/v2/stream/live', - resource_id, headers={ + display_id, 'Downloading JSON stream', headers={ 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', }, data=urlencode_postdata({ + 'id': resource_id, 'service': 1, 'platform': platform, - 'id': resource_id, - 'token': token, 'validate': validate, }))['tvplayer']['response'] except ExtractorError as e: @@ -63,7 +74,7 @@ class TVPlayerIE(InfoExtractor): '%s said: %s' % (self.IE_NAME, response['error']), expected=True) raise - formats = self._extract_m3u8_formats(response['stream'], resource_id, 'mp4') + formats = self._extract_m3u8_formats(response['stream'], display_id, 'mp4') self._sort_formats(formats) return { diff --git a/youtube_dl/extractor/viewster.py b/youtube_dl/extractor/viewster.py index fcf0cb100..d5d5b4c69 100644 --- a/youtube_dl/extractor/viewster.py +++ b/youtube_dl/extractor/viewster.py @@ -176,8 +176,7 @@ class ViewsterIE(InfoExtractor): if m3u8_formats: self._sort_formats(m3u8_formats) m3u8_formats = list(filter( - lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple', - m3u8_formats)) + lambda f: f.get('vcodec') != 'none', m3u8_formats)) if len(qualities) == len(m3u8_formats): for q, m3u8_format in zip(qualities, m3u8_formats): f = m3u8_format.copy() diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py index 5584674a0..bea9b87ad 100644 --- a/youtube_dl/extractor/xtube.py +++ b/youtube_dl/extractor/xtube.py @@ -6,6 +6,7 @@ import re from .common import InfoExtractor from ..utils import ( int_or_none, + js_to_json, orderedSet, parse_duration, sanitized_Request, @@ -38,6 +39,22 @@ class XTubeIE(InfoExtractor): 'age_limit': 18, } }, { + # FLV videos with duplicated formats + 'url': 'http://www.xtube.com/video-watch/A-Super-Run-Part-1-YT-9299752', + 'md5': 'a406963eb349dd43692ec54631efd88b', + 'info_dict': { + 'id': '9299752', + 'display_id': 'A-Super-Run-Part-1-YT', + 'ext': 'flv', + 'title': 'A Super Run - Part 1 (YT)', + 'description': 'md5:ca0d47afff4a9b2942e4b41aa970fd93', + 'uploader': 'tshirtguy59', + 'duration': 579, + 'view_count': int, + 'comment_count': int, + 'age_limit': 18, + }, + }, { # new URL schema 'url': 'http://www.xtube.com/video-watch/strange-erotica-625837', 'only_matching': True, @@ -68,8 +85,9 @@ class XTubeIE(InfoExtractor): }) sources = self._parse_json(self._search_regex( - r'(["\'])sources\1\s*:\s*(?P<sources>{.+?}),', - webpage, 'sources', group='sources'), video_id) + r'(["\'])?sources\1?\s*:\s*(?P<sources>{.+?}),', + webpage, 'sources', group='sources'), video_id, + transform_source=js_to_json) formats = [] for format_id, format_url in sources.items(): @@ -78,6 +96,7 @@ class XTubeIE(InfoExtractor): 'format_id': format_id, 'height': int_or_none(format_id), }) + self._remove_duplicate_formats(formats) self._sort_formats(formats) title = self._search_regex( diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 9e2b9115c..480f403da 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -963,7 +963,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _extract_signature_function(self, video_id, player_url, example_sig): id_m = re.match( - r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|/base)?\.(?P<ext>[a-z]+)$', + r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$', player_url) if not id_m: raise ExtractorError('Cannot identify player %r' % player_url) @@ -1629,7 +1629,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): player_desc = 'flash player %s' % player_version else: player_version = self._search_regex( - [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js', r'(?:www|player)-([^/]+)/base\.js'], + [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js', + r'(?:www|player)-([^/]+)(?:/[a-z]{2}_[A-Z]{2})?/base\.js'], player_url, 'html5 player', fatal=False) player_desc = 'html5 player %s' % player_version diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 91e235ff2..41bc20544 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -421,8 +421,8 @@ def clean_html(html): # Newline vs <br /> html = html.replace('\n', ' ') - html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html) - html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html) + html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html) + html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html) # Strip html tags html = re.sub('<.*?>', '', html) # Replace html entities diff --git a/youtube_dl/version.py b/youtube_dl/version.py index e206501e1..8c77f1905 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.04.26' +__version__ = '2017.04.28' |