aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl')
-rw-r--r--youtube_dl/downloader/external.py12
-rw-r--r--youtube_dl/downloader/fragment.py2
-rw-r--r--youtube_dl/extractor/adobepass.py6
-rw-r--r--youtube_dl/extractor/aenetworks.py14
-rw-r--r--youtube_dl/extractor/afreecatv.py3
-rw-r--r--youtube_dl/extractor/anvato.py7
-rw-r--r--youtube_dl/extractor/appleconnect.py4
-rw-r--r--youtube_dl/extractor/archiveorg.py4
-rw-r--r--youtube_dl/extractor/atresplayer.py2
-rw-r--r--youtube_dl/extractor/audioboom.py2
-rw-r--r--youtube_dl/extractor/bandcamp.py8
-rw-r--r--youtube_dl/extractor/beeg.py2
-rw-r--r--youtube_dl/extractor/bleacherreport.py10
-rw-r--r--youtube_dl/extractor/br.py2
-rw-r--r--youtube_dl/extractor/canalc2.py5
-rw-r--r--youtube_dl/extractor/cbslocal.py4
-rw-r--r--youtube_dl/extractor/clipfish.py2
-rw-r--r--youtube_dl/extractor/collegerama.py3
-rw-r--r--youtube_dl/extractor/common.py5
-rw-r--r--youtube_dl/extractor/funnyordie.py3
-rw-r--r--youtube_dl/extractor/gamespot.py3
-rw-r--r--youtube_dl/extractor/go.py49
-rw-r--r--youtube_dl/extractor/lego.py2
-rw-r--r--youtube_dl/extractor/pbs.py2
-rw-r--r--youtube_dl/extractor/r7.py3
-rw-r--r--youtube_dl/extractor/ted.py2
-rw-r--r--youtube_dl/extractor/tvp.py3
-rw-r--r--youtube_dl/extractor/tvplayer.py35
-rw-r--r--youtube_dl/extractor/viewster.py3
-rw-r--r--youtube_dl/extractor/xtube.py23
-rw-r--r--youtube_dl/extractor/youtube.py5
-rw-r--r--youtube_dl/utils.py4
-rw-r--r--youtube_dl/version.py2
33 files changed, 145 insertions, 91 deletions
diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py
index e13cf547d..e78169a0d 100644
--- a/youtube_dl/downloader/external.py
+++ b/youtube_dl/downloader/external.py
@@ -29,7 +29,17 @@ class ExternalFD(FileDownloader):
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
- retval = self._call_downloader(tmpfilename, info_dict)
+ try:
+ retval = self._call_downloader(tmpfilename, info_dict)
+ except KeyboardInterrupt:
+ if not info_dict.get('is_live'):
+ raise
+ # Live stream downloading cancellation should be considered as
+ # correct and expected termination thus all postprocessing
+ # should take place
+ retval = 0
+ self.to_screen('[%s] Interrupted by user' % self.get_basename())
+
if retval == 0:
fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize))
diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py
index bb9e82578..d529ae09a 100644
--- a/youtube_dl/downloader/fragment.py
+++ b/youtube_dl/downloader/fragment.py
@@ -155,8 +155,6 @@ class FragmentFD(FileDownloader):
self._write_ytdl_file(ctx)
if ctx['fragment_index'] > 0:
assert resume_len > 0
- else:
- assert resume_len == 0
dest_stream, tmpfilename = sanitize_open(tmpfilename, open_mode)
diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py
index 100cf997f..7da96c65c 100644
--- a/youtube_dl/extractor/adobepass.py
+++ b/youtube_dl/extractor/adobepass.py
@@ -1308,6 +1308,12 @@ class AdobePassIE(InfoExtractor):
_USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0'
_MVPD_CACHE = 'ap-mvpd'
+ def _download_webpage_handle(self, *args, **kwargs):
+ headers = kwargs.get('headers', {})
+ headers.update(self.geo_verification_headers())
+ kwargs['headers'] = headers
+ return super(AdobePassIE, self)._download_webpage_handle(*args, **kwargs)
+
@staticmethod
def _get_mvpd_resource(provider_id, title, guid, rating):
channel = etree.Element('channel')
diff --git a/youtube_dl/extractor/aenetworks.py b/youtube_dl/extractor/aenetworks.py
index c01c67303..2dcdba9d2 100644
--- a/youtube_dl/extractor/aenetworks.py
+++ b/youtube_dl/extractor/aenetworks.py
@@ -101,10 +101,14 @@ class AENetworksIE(AENetworksBaseIE):
for season_url_path in re.findall(r'(?s)<li[^>]+data-href="(/shows/%s/season-\d+)"' % url_parts[0], webpage):
entries.append(self.url_result(
compat_urlparse.urljoin(url, season_url_path), 'AENetworks'))
- return self.playlist_result(
- entries, self._html_search_meta('aetn:SeriesId', webpage),
- self._html_search_meta('aetn:SeriesTitle', webpage))
- elif url_parts_len == 2:
+ if entries:
+ return self.playlist_result(
+ entries, self._html_search_meta('aetn:SeriesId', webpage),
+ self._html_search_meta('aetn:SeriesTitle', webpage))
+ else:
+ # single season
+ url_parts_len = 2
+ if url_parts_len == 2:
entries = []
for episode_item in re.findall(r'(?s)<[^>]+class="[^"]*(?:episode|program)-item[^"]*"[^>]*>', webpage):
episode_attributes = extract_attributes(episode_item)
@@ -112,7 +116,7 @@ class AENetworksIE(AENetworksBaseIE):
url, episode_attributes['data-canonical'])
entries.append(self.url_result(
episode_url, 'AENetworks',
- episode_attributes['data-videoid']))
+ episode_attributes.get('data-videoid') or episode_attributes.get('data-video-id')))
return self.playlist_result(
entries, self._html_search_meta('aetn:SeasonId', webpage))
diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py
index 78d29c861..c8cb91dcb 100644
--- a/youtube_dl/extractor/afreecatv.py
+++ b/youtube_dl/extractor/afreecatv.py
@@ -207,11 +207,10 @@ class AfreecaTVIE(InfoExtractor):
file_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls',
note='Downloading part %d m3u8 information' % file_num)
- title = title if one else '%s (part %d)' % (title, file_num)
file_info = common_entry.copy()
file_info.update({
'id': format_id,
- 'title': title,
+ 'title': title if one else '%s (part %d)' % (title, file_num),
'upload_date': upload_date,
'duration': file_duration,
'formats': formats,
diff --git a/youtube_dl/extractor/anvato.py b/youtube_dl/extractor/anvato.py
index 623f44dce..9fd91c2f6 100644
--- a/youtube_dl/extractor/anvato.py
+++ b/youtube_dl/extractor/anvato.py
@@ -178,12 +178,7 @@ class AnvatoIE(InfoExtractor):
}
if ext == 'm3u8' or media_format in ('m3u8', 'm3u8-variant'):
- # Not using _extract_m3u8_formats here as individual media
- # playlists are also included in published_urls.
- if tbr is None:
- formats.append(self._m3u8_meta_format(video_url, ext='mp4', m3u8_id='hls'))
- continue
- else:
+ if tbr is not None:
a_format.update({
'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])),
'ext': 'mp4',
diff --git a/youtube_dl/extractor/appleconnect.py b/youtube_dl/extractor/appleconnect.py
index ea7a70393..a84b8b1eb 100644
--- a/youtube_dl/extractor/appleconnect.py
+++ b/youtube_dl/extractor/appleconnect.py
@@ -12,13 +12,13 @@ class AppleConnectIE(InfoExtractor):
_VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/idsa\.(?P<id>[\w-]+)'
_TEST = {
'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
- 'md5': '10d0f2799111df4cb1c924520ca78f98',
+ 'md5': 'e7c38568a01ea45402570e6029206723',
'info_dict': {
'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
'ext': 'm4v',
'title': 'Energy',
'uploader': 'Drake',
- 'thumbnail': 'http://is5.mzstatic.com/image/thumb/Video5/v4/78/61/c5/7861c5fa-ad6d-294b-1464-cf7605b911d6/source/1920x1080sr.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20150710',
'timestamp': 1436545535,
},
diff --git a/youtube_dl/extractor/archiveorg.py b/youtube_dl/extractor/archiveorg.py
index e21045bed..3c7d7250b 100644
--- a/youtube_dl/extractor/archiveorg.py
+++ b/youtube_dl/extractor/archiveorg.py
@@ -24,12 +24,12 @@ class ArchiveOrgIE(InfoExtractor):
}
}, {
'url': 'https://archive.org/details/Cops1922',
- 'md5': 'bc73c8ab3838b5a8fc6c6651fa7b58ba',
+ 'md5': '0869000b4ce265e8ca62738b336b268a',
'info_dict': {
'id': 'Cops1922',
'ext': 'mp4',
'title': 'Buster Keaton\'s "Cops" (1922)',
- 'description': 'md5:b4544662605877edd99df22f9620d858',
+ 'description': 'md5:89e7c77bf5d965dd5c0372cfb49470f6',
}
}, {
'url': 'http://archive.org/embed/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py
index 99af6dc5a..01fa308ff 100644
--- a/youtube_dl/extractor/atresplayer.py
+++ b/youtube_dl/extractor/atresplayer.py
@@ -36,7 +36,7 @@ class AtresPlayerIE(InfoExtractor):
},
{
'url': 'http://www.atresplayer.com/television/especial/videoencuentros/temporada-1/capitulo-112-david-bustamante_2014121600375.html',
- 'md5': '0d0e918533bbd4b263f2de4d197d4aac',
+ 'md5': '6e52cbb513c405e403dbacb7aacf8747',
'info_dict': {
'id': 'capitulo-112-david-bustamante',
'ext': 'flv',
diff --git a/youtube_dl/extractor/audioboom.py b/youtube_dl/extractor/audioboom.py
index 8fc5f65c6..e48bb8972 100644
--- a/youtube_dl/extractor/audioboom.py
+++ b/youtube_dl/extractor/audioboom.py
@@ -16,7 +16,7 @@ class AudioBoomIE(InfoExtractor):
'title': '3/09/2016 Czaban Hour 3',
'description': 'Guest: Nate Davis - NFL free agency, Guest: Stan Gans',
'duration': 2245.72,
- 'uploader': 'Steve Czaban',
+ 'uploader': 'SB Nation A.M.',
'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/steveczabanyahoosportsradio',
}
}, {
diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py
index 056e06376..df2972f26 100644
--- a/youtube_dl/extractor/bandcamp.py
+++ b/youtube_dl/extractor/bandcamp.py
@@ -34,12 +34,12 @@ class BandcampIE(InfoExtractor):
'_skip': 'There is a limit of 200 free downloads / month for the test song'
}, {
'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
- 'md5': '73d0b3171568232574e45652f8720b5c',
+ 'md5': '0369ace6b939f0927e62c67a1a8d9fa7',
'info_dict': {
'id': '2650410135',
- 'ext': 'mp3',
- 'title': 'Lanius (Battle)',
- 'uploader': 'Ben Prunty Music',
+ 'ext': 'aiff',
+ 'title': 'Ben Prunty - Lanius (Battle)',
+ 'uploader': 'Ben Prunty',
},
}]
diff --git a/youtube_dl/extractor/beeg.py b/youtube_dl/extractor/beeg.py
index b0b7914d8..d5c5822f2 100644
--- a/youtube_dl/extractor/beeg.py
+++ b/youtube_dl/extractor/beeg.py
@@ -16,7 +16,7 @@ class BeegIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?beeg\.com/(?P<id>\d+)'
_TEST = {
'url': 'http://beeg.com/5416503',
- 'md5': '46c384def73b33dbc581262e5ee67cef',
+ 'md5': 'a1a1b1a8bc70a89e49ccfd113aed0820',
'info_dict': {
'id': '5416503',
'ext': 'mp4',
diff --git a/youtube_dl/extractor/bleacherreport.py b/youtube_dl/extractor/bleacherreport.py
index 7a8e1f60b..e829974ff 100644
--- a/youtube_dl/extractor/bleacherreport.py
+++ b/youtube_dl/extractor/bleacherreport.py
@@ -35,7 +35,7 @@ class BleacherReportIE(InfoExtractor):
'title': 'Aussie Golfers Get Fright of Their Lives After Being Chased by Angry Kangaroo',
'timestamp': 1446839961,
'uploader': 'Sean Fay',
- 'description': 'md5:825e94e0f3521df52fa83b2ed198fa20',
+ 'description': 'md5:b1601e2314c4d8eec23b6eafe086a757',
'uploader_id': 6466954,
'upload_date': '20151011',
},
@@ -90,17 +90,13 @@ class BleacherReportCMSIE(AMPIE):
_VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36})'
_TESTS = [{
'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
- 'md5': '8c2c12e3af7805152675446c905d159b',
+ 'md5': '2e4b0a997f9228ffa31fada5c53d1ed1',
'info_dict': {
'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
- 'ext': 'mp4',
+ 'ext': 'flv',
'title': 'Cena vs. Rollins Would Expose the Heavyweight Division',
'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e',
},
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
}]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/br.py b/youtube_dl/extractor/br.py
index ff0aa11b1..2c32b6ae2 100644
--- a/youtube_dl/extractor/br.py
+++ b/youtube_dl/extractor/br.py
@@ -77,7 +77,7 @@ class BRIE(InfoExtractor):
'description': 'md5:bb659990e9e59905c3d41e369db1fbe3',
'duration': 893,
'uploader': 'Eva Maria Steimle',
- 'upload_date': '20140117',
+ 'upload_date': '20170208',
}
},
]
diff --git a/youtube_dl/extractor/canalc2.py b/youtube_dl/extractor/canalc2.py
index f1f128c45..acd87e371 100644
--- a/youtube_dl/extractor/canalc2.py
+++ b/youtube_dl/extractor/canalc2.py
@@ -16,13 +16,10 @@ class Canalc2IE(InfoExtractor):
'md5': '060158428b650f896c542dfbb3d6487f',
'info_dict': {
'id': '12163',
- 'ext': 'flv',
+ 'ext': 'mp4',
'title': 'Terrasses du Numérique',
'duration': 122,
},
- 'params': {
- 'skip_download': True, # Requires rtmpdump
- }
}, {
'url': 'http://archives-canalc2.u-strasbg.fr/video.asp?idVideo=11427&voir=oui',
'only_matching': True,
diff --git a/youtube_dl/extractor/cbslocal.py b/youtube_dl/extractor/cbslocal.py
index 8d5f11dd1..7d78e3aae 100644
--- a/youtube_dl/extractor/cbslocal.py
+++ b/youtube_dl/extractor/cbslocal.py
@@ -60,8 +60,8 @@ class CBSLocalIE(AnvatoIE):
'title': 'A Very Blue Anniversary',
'description': 'CBS2’s Cindy Hsu has more.',
'thumbnail': 're:^https?://.*',
- 'timestamp': 1479962220,
- 'upload_date': '20161124',
+ 'timestamp': int,
+ 'upload_date': r're:^\d{8}$',
'uploader': 'CBS',
'subtitles': {
'en': 'mincount:5',
diff --git a/youtube_dl/extractor/clipfish.py b/youtube_dl/extractor/clipfish.py
index bb52e0c6f..0920f6219 100644
--- a/youtube_dl/extractor/clipfish.py
+++ b/youtube_dl/extractor/clipfish.py
@@ -12,7 +12,7 @@ class ClipfishIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?clipfish\.de/(?:[^/]+/)+video/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.clipfish.de/special/ugly-americans/video/4343170/s01-e01-ugly-americans-date-in-der-hoelle/',
- 'md5': '720563e467b86374c194bdead08d207d',
+ 'md5': 'b9a5dc46294154c1193e2d10e0c95693',
'info_dict': {
'id': '4343170',
'ext': 'mp4',
diff --git a/youtube_dl/extractor/collegerama.py b/youtube_dl/extractor/collegerama.py
index 18c734766..6a41db87c 100644
--- a/youtube_dl/extractor/collegerama.py
+++ b/youtube_dl/extractor/collegerama.py
@@ -21,7 +21,7 @@ class CollegeRamaIE(InfoExtractor):
'ext': 'mp4',
'title': 'Een nieuwe wereld: waarden, bewustzijn en techniek van de mensheid 2.0.',
'description': '',
- 'thumbnail': r're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg(?:\?.*?)?$',
'duration': 7713.088,
'timestamp': 1413309600,
'upload_date': '20141014',
@@ -35,6 +35,7 @@ class CollegeRamaIE(InfoExtractor):
'ext': 'wmv',
'title': '64ste Vakantiecursus: Afvalwater',
'description': 'md5:7fd774865cc69d972f542b157c328305',
+ 'thumbnail': r're:^https?://.*\.jpg(?:\?.*?)?$',
'duration': 10853,
'timestamp': 1326446400,
'upload_date': '20120113',
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 8b3f04c61..2cb55d6af 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1334,7 +1334,7 @@ class InfoExtractor(object):
if '#EXT-X-FAXS-CM:' in m3u8_doc: # Adobe Flash Access
return []
- formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)]
+ formats = []
format_url = lambda u: (
u
@@ -1386,6 +1386,7 @@ class InfoExtractor(object):
f = {
'format_id': '-'.join(format_id),
'url': format_url(media_url),
+ 'manifest_url': m3u8_url,
'language': media.get('LANGUAGE'),
'ext': ext,
'protocol': entry_protocol,
@@ -1438,7 +1439,7 @@ class InfoExtractor(object):
f = {
'format_id': '-'.join(format_id),
'url': manifest_url,
- 'manifest_url': manifest_url,
+ 'manifest_url': m3u8_url,
'tbr': tbr,
'ext': ext,
'fps': float_or_none(last_stream_inf.get('FRAME-RATE')),
diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py
index 81c0ce9a3..49409369c 100644
--- a/youtube_dl/extractor/funnyordie.py
+++ b/youtube_dl/extractor/funnyordie.py
@@ -58,8 +58,7 @@ class FunnyOrDieIE(InfoExtractor):
m3u8_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False)
source_formats = list(filter(
- lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
- m3u8_formats))
+ lambda f: f.get('vcodec') != 'none', m3u8_formats))
bitrates = [int(bitrate) for bitrate in re.findall(r'[,/]v(\d+)(?=[,/])', m3u8_url)]
bitrates.sort()
diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py
index 682c49e79..00d311158 100644
--- a/youtube_dl/extractor/gamespot.py
+++ b/youtube_dl/extractor/gamespot.py
@@ -78,8 +78,7 @@ class GameSpotIE(OnceIE):
if m3u8_formats:
self._sort_formats(m3u8_formats)
m3u8_formats = list(filter(
- lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
- m3u8_formats))
+ lambda f: f.get('vcodec') != 'none', m3u8_formats))
if len(qualities) == len(m3u8_formats):
for q, m3u8_format in zip(qualities, m3u8_formats):
f = m3u8_format.copy()
diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py
index 4c9be47b4..9c7b1bd37 100644
--- a/youtube_dl/extractor/go.py
+++ b/youtube_dl/extractor/go.py
@@ -36,22 +36,26 @@ class GoIE(AdobePassIE):
'requestor_id': 'DisneyXD',
}
}
- _VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P<id>\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys())
+ _VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:(?:[^/]+/)*(?P<id>vdka\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys())
_TESTS = [{
- 'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx',
+ 'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643',
'info_dict': {
- 'id': '0_g86w5onx',
+ 'id': 'VDKA3807643',
'ext': 'mp4',
- 'title': 'Sneak Peek: Language Arts',
- 'description': 'md5:7dcdab3b2d17e5217c953256af964e9c',
+ 'title': 'The Traitor in the White House',
+ 'description': 'md5:05b009d2d145a1e85d25111bd37222e8',
},
'params': {
# m3u8 download
'skip_download': True,
},
}, {
- 'url': 'http://abc.go.com/shows/after-paradise/video/most-recent/vdka3335601',
- 'only_matching': True,
+ 'url': 'http://watchdisneyxd.go.com/doraemon',
+ 'info_dict': {
+ 'title': 'Doraemon',
+ 'id': 'SH55574025',
+ },
+ 'playlist_mincount': 51,
}, {
'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding',
'only_matching': True,
@@ -60,19 +64,36 @@ class GoIE(AdobePassIE):
'only_matching': True,
}]
+ def _extract_videos(self, brand, video_id='-1', show_id='-1'):
+ display_id = video_id if video_id != '-1' else show_id
+ return self._download_json(
+ 'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/%s/-1/%s/-1/-1.json' % (brand, show_id, video_id),
+ display_id)['video']
+
def _real_extract(self, url):
sub_domain, video_id, display_id = re.match(self._VALID_URL, url).groups()
+ site_info = self._SITE_INFO[sub_domain]
+ brand = site_info['brand']
if not video_id:
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
# There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
# from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
- r'data-video-id=["\']*VDKA(\w+)', webpage, 'video id')
- site_info = self._SITE_INFO[sub_domain]
- brand = site_info['brand']
- video_data = self._download_json(
- 'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/-1/-1/%s/-1/-1.json' % (brand, video_id),
- video_id)['video'][0]
+ r'data-video-id=["\']*(VDKA\w+)', webpage, 'video id', default=None)
+ if not video_id:
+ # show extraction works for Disney, DisneyJunior and DisneyXD
+ # ABC and Freeform has different layout
+ show_id = self._search_regex(r'data-show-id=["\']*(SH\d+)', webpage, 'show id')
+ videos = self._extract_videos(brand, show_id=show_id)
+ show_title = self._search_regex(r'data-show-title="([^"]+)"', webpage, 'show title', fatal=False)
+ entries = []
+ for video in videos:
+ entries.append(self.url_result(
+ video['url'], 'Go', video.get('id'), video.get('title')))
+ entries.reverse()
+ return self.playlist_result(entries, show_id, show_title)
+ video_data = self._extract_videos(brand, video_id)[0]
+ video_id = video_data['id']
title = video_data['title']
formats = []
@@ -105,7 +126,7 @@ class GoIE(AdobePassIE):
self._initialize_geo_bypass(['US'])
entitlement = self._download_json(
'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json',
- video_id, data=urlencode_postdata(data), headers=self.geo_verification_headers())
+ video_id, data=urlencode_postdata(data))
errors = entitlement.get('errors', {}).get('errors', [])
if errors:
for error in errors:
diff --git a/youtube_dl/extractor/lego.py b/youtube_dl/extractor/lego.py
index d3bca6435..b312e77f1 100644
--- a/youtube_dl/extractor/lego.py
+++ b/youtube_dl/extractor/lego.py
@@ -86,7 +86,7 @@ class LEGOIE(InfoExtractor):
formats = self._extract_akamai_formats(
'%si/s/public/%s_,%s,.mp4.csmil/master.m3u8' % (streaming_base, path, streaming_path), video_id)
m3u8_formats = list(filter(
- lambda f: f.get('protocol') == 'm3u8_native' and f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
+ lambda f: f.get('protocol') == 'm3u8_native' and f.get('vcodec') != 'none',
formats))
if len(m3u8_formats) == len(self._BITRATES):
self._sort_formats(m3u8_formats)
diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py
index 3e51b4dd7..6166dc2ad 100644
--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@@ -515,7 +515,7 @@ class PBSIE(InfoExtractor):
http_url = format_url
self._remove_duplicate_formats(formats)
m3u8_formats = list(filter(
- lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
+ lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none',
formats))
if http_url:
for m3u8_format in m3u8_formats:
diff --git a/youtube_dl/extractor/r7.py b/youtube_dl/extractor/r7.py
index ed38c77eb..e2202d603 100644
--- a/youtube_dl/extractor/r7.py
+++ b/youtube_dl/extractor/r7.py
@@ -62,8 +62,7 @@ class R7IE(InfoExtractor):
# m3u8 format always matches the http format, let's copy metadata from
# one to another
m3u8_formats = list(filter(
- lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
- formats))
+ lambda f: f.get('vcodec') != 'none', formats))
if len(m3u8_formats) == 1:
f_copy = m3u8_formats[0].copy()
f_copy.update(f)
diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py
index 1b1afab32..3f3c681ae 100644
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@@ -210,7 +210,7 @@ class TEDIE(InfoExtractor):
resources.get('stream'), video_name, 'mp4', m3u8_id=format_id, fatal=False))
m3u8_formats = list(filter(
- lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
+ lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none',
formats))
if http_url:
for m3u8_format in m3u8_formats:
diff --git a/youtube_dl/extractor/tvp.py b/youtube_dl/extractor/tvp.py
index 06ea2b40a..c5b3288ad 100644
--- a/youtube_dl/extractor/tvp.py
+++ b/youtube_dl/extractor/tvp.py
@@ -150,8 +150,7 @@ class TVPEmbedIE(InfoExtractor):
'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
self._sort_formats(m3u8_formats)
m3u8_formats = list(filter(
- lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
- m3u8_formats))
+ lambda f: f.get('vcodec') != 'none', m3u8_formats))
formats.extend(m3u8_formats)
for i, m3u8_format in enumerate(m3u8_formats, 2):
http_url = '%s-%d.mp4' % (video_url_base, i)
diff --git a/youtube_dl/extractor/tvplayer.py b/youtube_dl/extractor/tvplayer.py
index b6537141a..ebde6053f 100644
--- a/youtube_dl/extractor/tvplayer.py
+++ b/youtube_dl/extractor/tvplayer.py
@@ -2,9 +2,13 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..compat import compat_HTTPError
+from ..compat import (
+ compat_HTTPError,
+ compat_str,
+)
from ..utils import (
extract_attributes,
+ try_get,
urlencode_postdata,
ExtractorError,
)
@@ -34,25 +38,32 @@ class TVPlayerIE(InfoExtractor):
webpage, 'channel element'))
title = current_channel['data-name']
- resource_id = self._search_regex(
- r'resourceId\s*=\s*"(\d+)"', webpage, 'resource id')
- platform = self._search_regex(
- r'platform\s*=\s*"([^"]+)"', webpage, 'platform')
+ resource_id = current_channel['data-id']
+
token = self._search_regex(
- r'token\s*=\s*"([^"]+)"', webpage, 'token', default='null')
- validate = self._search_regex(
- r'validate\s*=\s*"([^"]+)"', webpage, 'validate', default='null')
+ r'data-token=(["\'])(?P<token>(?!\1).+)\1', webpage,
+ 'token', group='token')
+
+ context = self._download_json(
+ 'https://tvplayer.com/watch/context', display_id,
+ 'Downloading JSON context', query={
+ 'resource': resource_id,
+ 'nonce': token,
+ })
+
+ validate = context['validate']
+ platform = try_get(
+ context, lambda x: x['platform']['key'], compat_str) or 'firefox'
try:
response = self._download_json(
'http://api.tvplayer.com/api/v2/stream/live',
- resource_id, headers={
+ display_id, 'Downloading JSON stream', headers={
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
}, data=urlencode_postdata({
+ 'id': resource_id,
'service': 1,
'platform': platform,
- 'id': resource_id,
- 'token': token,
'validate': validate,
}))['tvplayer']['response']
except ExtractorError as e:
@@ -63,7 +74,7 @@ class TVPlayerIE(InfoExtractor):
'%s said: %s' % (self.IE_NAME, response['error']), expected=True)
raise
- formats = self._extract_m3u8_formats(response['stream'], resource_id, 'mp4')
+ formats = self._extract_m3u8_formats(response['stream'], display_id, 'mp4')
self._sort_formats(formats)
return {
diff --git a/youtube_dl/extractor/viewster.py b/youtube_dl/extractor/viewster.py
index fcf0cb100..d5d5b4c69 100644
--- a/youtube_dl/extractor/viewster.py
+++ b/youtube_dl/extractor/viewster.py
@@ -176,8 +176,7 @@ class ViewsterIE(InfoExtractor):
if m3u8_formats:
self._sort_formats(m3u8_formats)
m3u8_formats = list(filter(
- lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
- m3u8_formats))
+ lambda f: f.get('vcodec') != 'none', m3u8_formats))
if len(qualities) == len(m3u8_formats):
for q, m3u8_format in zip(qualities, m3u8_formats):
f = m3u8_format.copy()
diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py
index 5584674a0..bea9b87ad 100644
--- a/youtube_dl/extractor/xtube.py
+++ b/youtube_dl/extractor/xtube.py
@@ -6,6 +6,7 @@ import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
+ js_to_json,
orderedSet,
parse_duration,
sanitized_Request,
@@ -38,6 +39,22 @@ class XTubeIE(InfoExtractor):
'age_limit': 18,
}
}, {
+ # FLV videos with duplicated formats
+ 'url': 'http://www.xtube.com/video-watch/A-Super-Run-Part-1-YT-9299752',
+ 'md5': 'a406963eb349dd43692ec54631efd88b',
+ 'info_dict': {
+ 'id': '9299752',
+ 'display_id': 'A-Super-Run-Part-1-YT',
+ 'ext': 'flv',
+ 'title': 'A Super Run - Part 1 (YT)',
+ 'description': 'md5:ca0d47afff4a9b2942e4b41aa970fd93',
+ 'uploader': 'tshirtguy59',
+ 'duration': 579,
+ 'view_count': int,
+ 'comment_count': int,
+ 'age_limit': 18,
+ },
+ }, {
# new URL schema
'url': 'http://www.xtube.com/video-watch/strange-erotica-625837',
'only_matching': True,
@@ -68,8 +85,9 @@ class XTubeIE(InfoExtractor):
})
sources = self._parse_json(self._search_regex(
- r'(["\'])sources\1\s*:\s*(?P<sources>{.+?}),',
- webpage, 'sources', group='sources'), video_id)
+ r'(["\'])?sources\1?\s*:\s*(?P<sources>{.+?}),',
+ webpage, 'sources', group='sources'), video_id,
+ transform_source=js_to_json)
formats = []
for format_id, format_url in sources.items():
@@ -78,6 +96,7 @@ class XTubeIE(InfoExtractor):
'format_id': format_id,
'height': int_or_none(format_id),
})
+ self._remove_duplicate_formats(formats)
self._sort_formats(formats)
title = self._search_regex(
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 9e2b9115c..480f403da 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -963,7 +963,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _extract_signature_function(self, video_id, player_url, example_sig):
id_m = re.match(
- r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|/base)?\.(?P<ext>[a-z]+)$',
+ r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
player_url)
if not id_m:
raise ExtractorError('Cannot identify player %r' % player_url)
@@ -1629,7 +1629,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
player_desc = 'flash player %s' % player_version
else:
player_version = self._search_regex(
- [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js', r'(?:www|player)-([^/]+)/base\.js'],
+ [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
+ r'(?:www|player)-([^/]+)(?:/[a-z]{2}_[A-Z]{2})?/base\.js'],
player_url,
'html5 player', fatal=False)
player_desc = 'html5 player %s' % player_version
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 91e235ff2..41bc20544 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -421,8 +421,8 @@ def clean_html(html):
# Newline vs <br />
html = html.replace('\n', ' ')
- html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
- html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
+ html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
+ html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
# Strip html tags
html = re.sub('<.*?>', '', html)
# Replace html entities
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index e206501e1..8c77f1905 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
from __future__ import unicode_literals
-__version__ = '2017.04.26'
+__version__ = '2017.04.28'