aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.github/ISSUE_TEMPLATE.md6
-rw-r--r--AUTHORS1
-rw-r--r--ChangeLog79
-rw-r--r--README.md7
-rw-r--r--docs/supportedsites.md19
-rw-r--r--test/test_utils.py9
-rw-r--r--youtube_dl/__init__.py1
-rw-r--r--youtube_dl/downloader/dash.py35
-rw-r--r--youtube_dl/downloader/external.py3
-rw-r--r--youtube_dl/downloader/fragment.py15
-rw-r--r--youtube_dl/downloader/hls.py41
-rw-r--r--youtube_dl/extractor/adobepass.py13
-rw-r--r--youtube_dl/extractor/adultswim.py18
-rw-r--r--youtube_dl/extractor/bandcamp.py16
-rw-r--r--youtube_dl/extractor/cbs.py2
-rw-r--r--youtube_dl/extractor/cnn.py16
-rw-r--r--youtube_dl/extractor/common.py33
-rw-r--r--youtube_dl/extractor/ctv.py22
-rw-r--r--youtube_dl/extractor/curiositystream.py120
-rw-r--r--youtube_dl/extractor/drtv.py95
-rw-r--r--youtube_dl/extractor/espn.py5
-rw-r--r--youtube_dl/extractor/exfm.py58
-rw-r--r--youtube_dl/extractor/extractors.py33
-rw-r--r--youtube_dl/extractor/facebook.py42
-rw-r--r--youtube_dl/extractor/fc2.py61
-rw-r--r--youtube_dl/extractor/foxnews.py48
-rw-r--r--youtube_dl/extractor/generic.py4
-rw-r--r--youtube_dl/extractor/glide.py16
-rw-r--r--youtube_dl/extractor/go.py101
-rw-r--r--youtube_dl/extractor/internetvideoarchive.py15
-rw-r--r--youtube_dl/extractor/kusi.py27
-rw-r--r--youtube_dl/extractor/limelight.py12
-rw-r--r--youtube_dl/extractor/movingimage.py (renamed from youtube_dl/extractor/ssa.py)26
-rw-r--r--youtube_dl/extractor/myvidster.py2
-rw-r--r--youtube_dl/extractor/nba.py11
-rw-r--r--youtube_dl/extractor/ninecninemedia.py126
-rw-r--r--youtube_dl/extractor/nytimes.py95
-rw-r--r--youtube_dl/extractor/porncom.py13
-rw-r--r--youtube_dl/extractor/pornovoisines.py80
-rw-r--r--youtube_dl/extractor/pyvideo.py95
-rw-r--r--youtube_dl/extractor/rottentomatoes.py11
-rw-r--r--youtube_dl/extractor/soundcloud.py5
-rw-r--r--youtube_dl/extractor/southpark.py1
-rw-r--r--youtube_dl/extractor/theplatform.py3
-rw-r--r--youtube_dl/extractor/thestar.py11
-rw-r--r--youtube_dl/extractor/thvideo.py84
-rw-r--r--youtube_dl/extractor/turner.py66
-rw-r--r--youtube_dl/extractor/tvnoe.py49
-rw-r--r--youtube_dl/extractor/vimple.py35
-rw-r--r--youtube_dl/extractor/vodplatform.py2
-rw-r--r--youtube_dl/extractor/yahoo.py81
-rw-r--r--youtube_dl/extractor/youjizz.py43
-rw-r--r--youtube_dl/extractor/youporn.py18
-rw-r--r--youtube_dl/extractor/youtube.py98
-rw-r--r--youtube_dl/options.py10
-rw-r--r--youtube_dl/utils.py3
-rw-r--r--youtube_dl/version.py2
57 files changed, 1327 insertions, 616 deletions
diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md
index a2fe59f80..c03092442 100644
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@@ -6,8 +6,8 @@
---
-### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.28*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
-- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.28**
+### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.04.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
+- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.04.1**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
-[debug] youtube-dl version 2016.08.28
+[debug] youtube-dl version 2016.09.04.1
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}
diff --git a/AUTHORS b/AUTHORS
index b9a602c12..c4bef040a 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -182,3 +182,4 @@ Rob van Bekkum
Petr Zvoníček
Pratyush Singh
Aleksander Nitecki
+Sebastian Blunt
diff --git a/ChangeLog b/ChangeLog
index e055976c5..0be9b0fbb 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,6 +1,85 @@
version <unreleased>
Extractors
++ [tvnoe] New extractor (#10524)
+
+
+version 2016.09.04.1
+
+Core
+* In DASH downloader if the first segment fails, abort the whole download
+ process to prevent throttling (#10497)
++ Add support for --skip-unavailable-fragments and --fragment retries in
+ hlsnative downloader (#10165, #10448).
++ Add support for --skip-unavailable-fragments in DASH downloader
++ Introduce --skip-unavailable-fragments option for fragment based downloaders
+ that allows to skip fragments unavailable due to a HTTP error
+* Fix extraction of video/audio entries with src attribute in
+ _parse_html5_media_entries (#10540)
+
+Extractors
+* [theplatform] Relax URL regular expression (#10546)
+* [youtube:playlist] Extend URL regular expression
+* [rottentomatoes] Delegate extraction to internetvideoarchive extractor
+* [internetvideoarchive] Extract all formats
+* [pornvoisines] Fix extraction (#10469)
+* [rottentomatoes] Fix extraction (#10467)
+* [espn] Extend URL regular expression (#10549)
+* [vimple] Extend URL regular expression (#10547)
+* [youtube:watchlater] Fix extraction (#10544)
+* [youjizz] Fix extraction (#10437)
++ [foxnews] Add support for FoxNews Insider (#10445)
++ [fc2] Recognize Flash player URLs (#10512)
+
+
+version 2016.09.03
+
+Core
+* Restore usage of NAME attribute from EXT-X-MEDIA tag for formats codes in
+ _extract_m3u8_formats (#10522)
+* Handle semicolon in mimetype2ext
+
+Extractors
++ [youtube] Add support for rental videos' previews (#10532)
+* [youtube:playlist] Fallback to video extraction for video/playlist URLs when
+ no playlist is actually served (#10537)
++ [drtv] Add support for dr.dk/nyheder (#10536)
++ [facebook:plugins:video] Add extractor (#10530)
++ [go] Add extractor for *.go.com sites
+* [adobepass] Check for authz_token expiration (#10527)
+* [nytimes] improve extraction
+* [thestar] Fix extraction (#10465)
+* [glide] Fix extraction (#10478)
+- [exfm] Remove extractor (#10482)
+* [youporn] Fix categories and tags extraction (#10521)
++ [curiositystream] Add extractor for app.curiositystream.com
+- [thvideo] Remove extractor (#10464)
+* [movingimage] Fix for the new site name (#10466)
++ [cbs] Add support for once formats (#10515)
+* [limelight] Skip ism snd duplicate manifests
++ [porncom] Extract categories and tags (#10510)
++ [facebook] Extract timestamp (#10508)
++ [yahoo] Extract more formats
+
+
+version 2016.08.31
+
+Extractors
+* [soundcloud] Fix URL regular expression to avoid clashes with sets (#10505)
+* [bandcamp:album] Fix title extraction (#10455)
+* [pyvideo] Fix extraction (#10468)
++ [ctv] Add support for tsn.ca, bnn.ca and thecomedynetwork.ca (#10016)
+* [9c9media] Extract more metadata
+* [9c9media] Fix multiple stacks extraction (#10016)
+* [adultswim] Improve video info extraction (#10492)
+* [vodplatform] Improve embed regular expression
+- [played] Remove extractor (#10470)
++ [tbs] Add extractor for tbs.com and tntdrama.com (#10222)
++ [cartoonnetwork] Add extractor for cartoonnetwork.com (#10110)
+* [adultswim] Rework in terms of turner extractor
+* [cnn] Rework in terms of turner extractor
+* [nba] Rework in terms of turner extractor
++ [turner] Add base extractor for Turner Broadcasting System based sites
* [bilibili] Fix extraction (#10375)
* [openload] Fix extraction (#10408)
diff --git a/README.md b/README.md
index 87465aa5e..207b633db 100644
--- a/README.md
+++ b/README.md
@@ -89,6 +89,8 @@ which means you can modify it, redistribute it or use it however you like.
--mark-watched Mark videos watched (YouTube only)
--no-mark-watched Do not mark videos watched (YouTube only)
--no-color Do not emit color codes in output
+ --abort-on-unavailable-fragment Abort downloading when some fragment is not
+ available
## Network Options:
--proxy URL Use the specified HTTP/HTTPS/SOCKS proxy.
@@ -173,7 +175,10 @@ which means you can modify it, redistribute it or use it however you like.
-R, --retries RETRIES Number of retries (default is 10), or
"infinite".
--fragment-retries RETRIES Number of retries for a fragment (default
- is 10), or "infinite" (DASH only)
+ is 10), or "infinite" (DASH and hlsnative
+ only)
+ --skip-unavailable-fragments Skip unavailable fragments (DASH and
+ hlsnative only)
--buffer-size SIZE Size of download buffer (e.g. 1024 or 16K)
(default is 1024)
--no-resize-buffer Do not automatically adjust the buffer
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index bf08697be..9e21016f7 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -13,6 +13,8 @@
- **5min**
- **8tracks**
- **91porn**
+ - **9c9media**
+ - **9c9media:stack**
- **9gag**
- **9now.com.au**
- **abc.net.au**
@@ -89,7 +91,7 @@
- **Bet**
- **Bigflix**
- **Bild**: Bild.de
- - **BiliBili** (Currently broken)
+ - **BiliBili**
- **BioBioChileTV**
- **BIQLE**
- **BleacherReport**
@@ -115,6 +117,7 @@
- **Canvas**
- **CarambaTV**
- **CarambaTVPage**
+ - **CartoonNetwork**
- **cbc.ca**
- **cbc.ca:player**
- **cbc.ca:watch**
@@ -168,6 +171,8 @@
- **CTVNews**
- **culturebox.francetvinfo.fr**
- **CultureUnplugged**
+ - **curiositystream**
+ - **curiositystream:collection**
- **CWTV**
- **DailyMail**
- **dailymotion**
@@ -220,13 +225,14 @@
- **EsriVideo**
- **Europa**
- **EveryonesMixtape**
- - **exfm**: ex.fm
- **ExpoTV**
- **ExtremeTube**
- **EyedoTV**
- **facebook**
+ - **FacebookPluginsVideo**
- **faz.net**
- **fc2**
+ - **fc2:embed**
- **Fczenit**
- **features.aol.com**
- **fernsehkritik.tv**
@@ -240,6 +246,7 @@
- **FOX**
- **Foxgay**
- **FoxNews**: Fox News and Fox Business Video
+ - **foxnews:insider**
- **FoxSports**
- **france2.fr:generation-quoi**
- **FranceCulture**
@@ -268,6 +275,7 @@
- **Glide**: Glide mobile video messages (glide.me)
- **Globo**
- **GloboArticle**
+ - **Go**
- **GodTube**
- **GodTV**
- **Golem**
@@ -403,6 +411,7 @@
- **MovieClips**
- **MovieFap**
- **Moviezine**
+ - **MovingImage**
- **MPORA**
- **MSN**
- **mtg**: MTG services
@@ -459,7 +468,6 @@
- **nick.de**
- **niconico**: ニコニコ動画
- **NiconicoPlaylist**
- - **NineCNineMedia**
- **Nintendo**
- **njoy**: N-JOY
- **njoy:embed**
@@ -517,7 +525,6 @@
- **Pinkbike**
- **Pladform**
- **play.fm**
- - **played.to**
- **PlaysTV**
- **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz
- **Playvid**
@@ -658,7 +665,6 @@
- **sr:mediathek**: Saarländischer Rundfunk
- **SRGSSR**
- **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites
- - **SSA**
- **stanfordoc**: Stanford Open ClassRoom
- **Steam**
- **Stitcher**
@@ -675,6 +681,7 @@
- **Tagesschau**
- **tagesschau:player**
- **Tass**
+ - **TBS**
- **TDSLifeway**
- **teachertube**: teachertube.com videos
- **teachertube:user:collection**: teachertube.com user and collection videos
@@ -700,8 +707,6 @@
- **TheStar**
- **ThisAmericanLife**
- **ThisAV**
- - **THVideo**
- - **THVideoPlaylist**
- **tinypic**: tinypic.com videos
- **tlc.de**
- **TMZ**
diff --git a/test/test_utils.py b/test/test_utils.py
index d16ea7f77..405c5d351 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -39,6 +39,7 @@ from youtube_dl.utils import (
is_html,
js_to_json,
limit_length,
+ mimetype2ext,
ohdave_rsa_encrypt,
OnDemandPagedList,
orderedSet,
@@ -625,6 +626,14 @@ class TestUtil(unittest.TestCase):
limit_length('foo bar baz asd', 12).startswith('foo bar'))
self.assertTrue('...' in limit_length('foo bar baz asd', 12))
+ def test_mimetype2ext(self):
+ self.assertEqual(mimetype2ext(None), None)
+ self.assertEqual(mimetype2ext('video/x-flv'), 'flv')
+ self.assertEqual(mimetype2ext('application/x-mpegURL'), 'm3u8')
+ self.assertEqual(mimetype2ext('text/vtt'), 'vtt')
+ self.assertEqual(mimetype2ext('text/vtt;charset=utf-8'), 'vtt')
+ self.assertEqual(mimetype2ext('text/html; charset=utf-8'), 'html')
+
def test_parse_codecs(self):
self.assertEqual(parse_codecs(''), {})
self.assertEqual(parse_codecs('avc1.77.30, mp4a.40.2'), {
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index a9730292c..42128272a 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -318,6 +318,7 @@ def _real_main(argv=None):
'nooverwrites': opts.nooverwrites,
'retries': opts.retries,
'fragment_retries': opts.fragment_retries,
+ 'skip_unavailable_fragments': opts.skip_unavailable_fragments,
'buffersize': opts.buffersize,
'noresizebuffer': opts.noresizebuffer,
'continuedl': opts.continue_dl,
diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py
index 8bbab9dbc..41fc9cfc2 100644
--- a/youtube_dl/downloader/dash.py
+++ b/youtube_dl/downloader/dash.py
@@ -38,8 +38,10 @@ class DashSegmentsFD(FragmentFD):
segments_filenames = []
fragment_retries = self.params.get('fragment_retries', 0)
+ skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
- def append_url_to_file(target_url, tmp_filename, segment_name):
+ def process_segment(segment, tmp_filename, fatal):
+ target_url, segment_name = segment
target_filename = '%s-%s' % (tmp_filename, segment_name)
count = 0
while count <= fragment_retries:
@@ -52,26 +54,35 @@ class DashSegmentsFD(FragmentFD):
down.close()
segments_filenames.append(target_sanitized)
break
- except (compat_urllib_error.HTTPError, ) as err:
+ except compat_urllib_error.HTTPError as err:
# YouTube may often return 404 HTTP error for a fragment causing the
# whole download to fail. However if the same fragment is immediately
# retried with the same request data this usually succeeds (1-2 attemps
# is usually enough) thus allowing to download the whole file successfully.
- # So, we will retry all fragments that fail with 404 HTTP error for now.
- if err.code != 404:
- raise
- # Retry fragment
+ # To be future-proof we will retry all fragments that fail with any
+ # HTTP error.
count += 1
if count <= fragment_retries:
- self.report_retry_fragment(segment_name, count, fragment_retries)
+ self.report_retry_fragment(err, segment_name, count, fragment_retries)
if count > fragment_retries:
+ if not fatal:
+ self.report_skip_fragment(segment_name)
+ return True
self.report_error('giving up after %s fragment retries' % fragment_retries)
return False
-
- if initialization_url:
- append_url_to_file(initialization_url, ctx['tmpfilename'], 'Init')
- for i, segment_url in enumerate(segment_urls):
- append_url_to_file(segment_url, ctx['tmpfilename'], 'Seg%d' % i)
+ return True
+
+ segments_to_download = [(initialization_url, 'Init')] if initialization_url else []
+ segments_to_download.extend([
+ (segment_url, 'Seg%d' % i)
+ for i, segment_url in enumerate(segment_urls)])
+
+ for i, segment in enumerate(segments_to_download):
+ # In DASH, the first segment contains necessary headers to
+ # generate a valid MP4 file, so always abort for the first segment
+ fatal = i == 0 or not skip_unavailable_fragments
+ if not process_segment(segment, ctx['tmpfilename'], fatal):
+ return False
self._finish_frag_download(ctx)
diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py
index 17f12e970..0aeae3b8f 100644
--- a/youtube_dl/downloader/external.py
+++ b/youtube_dl/downloader/external.py
@@ -223,7 +223,8 @@ class FFmpegFD(ExternalFD):
if proxy.startswith('socks'):
self.report_warning(
- '%s does not support SOCKS proxies. Downloading may fail.' % self.get_basename())
+ '%s does not support SOCKS proxies. Downloading is likely to fail. '
+ 'Consider adding --hls-prefer-native to your command.' % self.get_basename())
# Since December 2015 ffmpeg supports -http_proxy option (see
# http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd)
diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py
index ba903ae10..84aacf7db 100644
--- a/youtube_dl/downloader/fragment.py
+++ b/youtube_dl/downloader/fragment.py
@@ -6,6 +6,7 @@ import time
from .common import FileDownloader
from .http import HttpFD
from ..utils import (
+ error_to_compat_str,
encodeFilename,
sanitize_open,
)
@@ -22,13 +23,19 @@ class FragmentFD(FileDownloader):
Available options:
- fragment_retries: Number of times to retry a fragment for HTTP error (DASH only)
+ fragment_retries: Number of times to retry a fragment for HTTP error (DASH
+ and hlsnative only)
+ skip_unavailable_fragments:
+ Skip unavailable fragments (DASH and hlsnative only)
"""
- def report_retry_fragment(self, fragment_name, count, retries):
+ def report_retry_fragment(self, err, fragment_name, count, retries):
self.to_screen(
- '[download] Got server HTTP error. Retrying fragment %s (attempt %d of %s)...'
- % (fragment_name, count, self.format_retries(retries)))
+ '[download] Got server HTTP error: %s. Retrying fragment %s (attempt %d of %s)...'
+ % (error_to_compat_str(err), fragment_name, count, self.format_retries(retries)))
+
+ def report_skip_fragment(self, fragment_name):
+ self.to_screen('[download] Skipping fragment %s...' % fragment_name)
def _prepare_and_start_frag_download(self, ctx):
self._prepare_frag_download(ctx)
diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py
index baaff44d5..5d70abf62 100644
--- a/youtube_dl/downloader/hls.py
+++ b/youtube_dl/downloader/hls.py
@@ -13,6 +13,7 @@ from .fragment import FragmentFD
from .external import FFmpegFD
from ..compat import (
+ compat_urllib_error,
compat_urlparse,
compat_struct_pack,
)
@@ -83,6 +84,10 @@ class HlsFD(FragmentFD):
self._prepare_and_start_frag_download(ctx)
+ fragment_retries = self.params.get('fragment_retries', 0)
+ skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
+ test = self.params.get('test', False)
+
extra_query = None
extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url')
if extra_param_to_segment_url:
@@ -99,15 +104,37 @@ class HlsFD(FragmentFD):
line
if re.match(r'^https?://', line)
else compat_urlparse.urljoin(man_url, line))
- frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], i)
+ frag_name = 'Frag%d' % i
+ frag_filename = '%s-%s' % (ctx['tmpfilename'], frag_name)
if extra_query:
frag_url = update_url_query(frag_url, extra_query)
- success = ctx['dl'].download(frag_filename, {'url': frag_url})
- if not success:
+ count = 0
+ while count <= fragment_retries:
+ try:
+ success = ctx['dl'].download(frag_filename, {'url': frag_url})
+ if not success:
+ return False
+ down, frag_sanitized = sanitize_open(frag_filename, 'rb')
+ frag_content = down.read()
+ down.close()
+ break
+ except compat_urllib_error.HTTPError as err:
+ # Unavailable (possibly temporary) fragments may be served.
+ # First we try to retry then either skip or abort.
+ # See https://github.com/rg3/youtube-dl/issues/10165,
+ # https://github.com/rg3/youtube-dl/issues/10448).
+ count += 1
+ if count <= fragment_retries:
+ self.report_retry_fragment(err, frag_name, count, fragment_retries)
+ if count > fragment_retries:
+ if skip_unavailable_fragments:
+ i += 1
+ media_sequence += 1
+ self.report_skip_fragment(frag_name)
+ continue
+ self.report_error(
+ 'giving up after %s fragment retries' % fragment_retries)
return False
- down, frag_sanitized = sanitize_open(frag_filename, 'rb')
- frag_content = down.read()
- down.close()
if decrypt_info['METHOD'] == 'AES-128':
iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
frag_content = AES.new(
@@ -115,7 +142,7 @@ class HlsFD(FragmentFD):
ctx['dest_stream'].write(frag_content)
frags_filenames.append(frag_sanitized)
# We only download the first fragment during the test
- if self.params.get('test', False):
+ if test:
break
i += 1
media_sequence += 1
diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py
index 9e3a3e362..68ec37e00 100644
--- a/youtube_dl/extractor/adobepass.py
+++ b/youtube_dl/extractor/adobepass.py
@@ -37,6 +37,10 @@ class AdobePassIE(InfoExtractor):
return self._search_regex(
'<%s>(.+?)</%s>' % (tag, tag), xml_str, tag)
+ def is_expired(token, date_ele):
+ token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(token, date_ele)))
+ return token_expires and token_expires <= int(time.time())
+
mvpd_headers = {
'ap_42': 'anonymous',
'ap_11': 'Linux i686',
@@ -47,11 +51,8 @@ class AdobePassIE(InfoExtractor):
guid = xml_text(resource, 'guid')
requestor_info = self._downloader.cache.load('mvpd', requestor_id) or {}
authn_token = requestor_info.get('authn_token')
- if authn_token:
- token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(authn_token, 'simpleTokenExpires')))
- if token_expires and token_expires <= int(time.time()):
- authn_token = None
- requestor_info = {}
+ if authn_token and is_expired(authn_token, 'simpleTokenExpires'):
+ authn_token = None
if not authn_token:
# TODO add support for other TV Providers
mso_id = 'DTV'
@@ -98,6 +99,8 @@ class AdobePassIE(InfoExtractor):
self._downloader.cache.store('mvpd', requestor_id, requestor_info)
authz_token = requestor_info.get(guid)
+ if authz_token and is_expired(authz_token, 'simpleTokenTTL'):
+ authz_token = None
if not authz_token:
authorize = self._download_webpage(
self._SERVICE_PROVIDER_TEMPLATE % 'authorize', video_id,
diff --git a/youtube_dl/extractor/adultswim.py b/youtube_dl/extractor/adultswim.py
index ef3cc2a61..5d0bf5a68 100644
--- a/youtube_dl/extractor/adultswim.py
+++ b/youtube_dl/extractor/adultswim.py
@@ -4,7 +4,10 @@ from __future__ import unicode_literals
import re
from .turner import TurnerBaseIE
-from ..utils import ExtractorError
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+)
class AdultSwimIE(TurnerBaseIE):
@@ -144,7 +147,10 @@ class AdultSwimIE(TurnerBaseIE):
if bootstrapped_data.get('slugged_video', {}).get('slug') == episode_path:
video_info = bootstrapped_data['slugged_video']
if not video_info:
- video_info = bootstrapped_data.get('heroMetadata', {}).get('trailer').get('video')
+ video_info = bootstrapped_data.get(
+ 'heroMetadata', {}).get('trailer', {}).get('video')
+ if not video_info:
+ video_info = bootstrapped_data.get('onlineOriginals', [None])[0]
if not video_info:
raise ExtractorError('Unable to find video info')
@@ -167,8 +173,9 @@ class AdultSwimIE(TurnerBaseIE):
episode_id = video_info['id']
episode_title = video_info['title']
- episode_description = video_info['description']
- episode_duration = video_info.get('duration')
+ episode_description = video_info.get('description')
+ episode_duration = int_or_none(video_info.get('duration'))
+ view_count = int_or_none(video_info.get('views'))
entries = []
for part_num, segment_id in enumerate(segment_ids):
@@ -197,5 +204,6 @@ class AdultSwimIE(TurnerBaseIE):
'entries': entries,
'title': '%s - %s' % (show_title, episode_title),
'description': episode_description,
- 'duration': episode_duration
+ 'duration': episode_duration,
+ 'view_count': view_count,
}
diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py
index 991ab0676..249c3d956 100644
--- a/youtube_dl/extractor/bandcamp.py
+++ b/youtube_dl/extractor/bandcamp.py
@@ -162,6 +162,15 @@ class BandcampAlbumIE(InfoExtractor):
'uploader_id': 'dotscale',
},
'playlist_mincount': 7,
+ }, {
+ # with escaped quote in title
+ 'url': 'https://jstrecords.bandcamp.com/album/entropy-ep',
+ 'info_dict': {
+ 'title': '"Entropy" EP',
+ 'uploader_id': 'jstrecords',
+ 'id': 'entropy-ep',
+ },
+ 'playlist_mincount': 3,
}]
def _real_extract(self, url):
@@ -176,8 +185,11 @@ class BandcampAlbumIE(InfoExtractor):
entries = [
self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
for t_path in tracks_paths]
- title = self._search_regex(
- r'album_title\s*:\s*"(.*?)"', webpage, 'title', fatal=False)
+ title = self._html_search_regex(
+ r'album_title\s*:\s*"((?:\\.|[^"\\])+?)"',
+ webpage, 'title', fatal=False)
+ if title:
+ title = title.replace(r'\"', '"')
return {
'_type': 'playlist',
'uploader_id': uploader_id,
diff --git a/youtube_dl/extractor/cbs.py b/youtube_dl/extractor/cbs.py
index c72ed2dbb..3f4dea40c 100644
--- a/youtube_dl/extractor/cbs.py
+++ b/youtube_dl/extractor/cbs.py
@@ -51,7 +51,7 @@ class CBSIE(CBSBaseIE):
path = 'dJ5BDC/media/guid/2198311517/' + guid
smil_url = 'http://link.theplatform.com/s/%s?mbr=true' % path
formats, subtitles = self._extract_theplatform_smil(smil_url + '&manifest=m3u', guid)
- for r in ('HLS&formats=M3U', 'RTMP', 'WIFI', '3G'):
+ for r in ('OnceURL&formats=M3U', 'HLS&formats=M3U', 'RTMP', 'WIFI', '3G'):
try:
tp_formats, _ = self._extract_theplatform_smil(smil_url + '&assetTypes=' + r, guid, 'Downloading %s SMIL data' % r.split('&')[0])
formats.extend(tp_formats)
diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py
index 1bf87f6ea..5fc311f53 100644
--- a/youtube_dl/extractor/cnn.py
+++ b/youtube_dl/extractor/cnn.py
@@ -15,7 +15,7 @@ class CNNIE(TurnerBaseIE):
'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
'md5': '3e6121ea48df7e2259fe73a0628605c4',
'info_dict': {
- 'id': 'nadal-1-on-1',
+ 'id': 'sports/2013/06/09/nadal-1-on-1.cnn',
'ext': 'mp4',
'title': 'Nadal wins 8th French Open title',
'description': 'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.',
@@ -27,7 +27,7 @@ class CNNIE(TurnerBaseIE):
'url': 'http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29',
'md5': 'b5cc60c60a3477d185af8f19a2a26f4e',
'info_dict': {
- 'id': 'sot-student-gives-epic-speech',
+ 'id': 'us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology',
'ext': 'mp4',
'title': "Student's epic speech stuns new freshmen",
'description': "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"",
@@ -38,7 +38,7 @@ class CNNIE(TurnerBaseIE):
'url': 'http://www.cnn.com/video/data/2.0/video/living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln.html',
'md5': 'f14d02ebd264df951feb2400e2c25a1b',
'info_dict': {
- 'id': 'growing-america-nashville-salemtown-board-episode-1',
+ 'id': 'living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln',
'ext': 'mp4',
'title': 'Nashville Ep. 1: Hand crafted skateboards',
'description': 'md5:e7223a503315c9f150acac52e76de086',
@@ -49,7 +49,7 @@ class CNNIE(TurnerBaseIE):
'url': 'http://money.cnn.com/video/news/2016/08/19/netflix-stunning-stats.cnnmoney/index.html',
'md5': '52a515dc1b0f001cd82e4ceda32be9d1',
'info_dict': {
- 'id': 'netflix-stunning-stats',
+ 'id': '/video/news/2016/08/19/netflix-stunning-stats.cnnmoney',
'ext': 'mp4',
'title': '5 stunning stats about Netflix',
'description': 'Did you know that Netflix has more than 80 million members? Here are five facts about the online video distributor that you probably didn\'t know.',
@@ -83,6 +83,10 @@ class CNNIE(TurnerBaseIE):
},
}
+ def _extract_timestamp(self, video_data):
+ # TODO: fix timestamp extraction
+ return None
+
def _real_extract(self, url):
sub_domain, path, page_title = re.match(self._VALID_URL, url).groups()
if sub_domain not in ('money', 'edition'):
@@ -108,6 +112,7 @@ class CNNBlogsIE(InfoExtractor):
'description': 'Glenn Greenwald responds to comments made this week on Capitol Hill that journalists could be criminal accessories.',
'upload_date': '20140209',
},
+ 'expected_warnings': ['Failed to download m3u8 information'],
'add_ie': ['CNN'],
}
@@ -130,9 +135,10 @@ class CNNArticleIE(InfoExtractor):
'id': 'bestoftv/2014/12/21/ip-north-korea-obama.cnn',
'ext': 'mp4',
'title': 'Obama: Cyberattack not an act of war',
- 'description': 'md5:51ce6750450603795cad0cdfbd7d05c5',
+ 'description': 'md5:0a802a40d2376f60e6b04c8d5bcebc4b',
'upload_date': '20141221',
},
+ 'expected_warnings': ['Failed to download m3u8 information'],
'add_ie': ['CNN'],
}
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index da0af29ec..6edd5a769 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1163,13 +1163,6 @@ class InfoExtractor(object):
m3u8_id=None, note=None, errnote=None,
fatal=True, live=False):
- formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)]
-
- format_url = lambda u: (
- u
- if re.match(r'^https?://', u)
- else compat_urlparse.urljoin(m3u8_url, u))
-
res = self._download_webpage_handle(
m3u8_url, video_id,
note=note or 'Downloading m3u8 information',
@@ -1180,6 +1173,13 @@ class InfoExtractor(object):
m3u8_doc, urlh = res
m3u8_url = urlh.geturl()
+ formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)]
+
+ format_url = lambda u: (
+ u
+ if re.match(r'^https?://', u)
+ else compat_urlparse.urljoin(m3u8_url, u))
+
# We should try extracting formats only from master playlists [1], i.e.
# playlists that describe available qualities. On the other hand media
# playlists [2] should be returned as is since they contain just the media
@@ -1201,7 +1201,8 @@ class InfoExtractor(object):
'protocol': entry_protocol,
'preference': preference,
}]
- last_info = None
+ last_info = {}
+ last_media = {}
for line in m3u8_doc.splitlines():
if line.startswith('#EXT-X-STREAM-INF:'):
last_info = parse_m3u8_attributes(line)
@@ -1224,23 +1225,24 @@ class InfoExtractor(object):
'protocol': entry_protocol,
'preference': preference,
})
+ else:
+ # When there is no URI in EXT-X-MEDIA let this tag's
+ # data be used by regular URI lines below
+ last_media = media
elif line.startswith('#') or not line.strip():
continue
else:
- if last_info is None:
- formats.append({'url': format_url(line)})
- continue
tbr = int_or_none(last_info.get('AVERAGE-BANDWIDTH') or last_info.get('BANDWIDTH'), scale=1000)
format_id = []
if m3u8_id:
format_id.append(m3u8_id)
+ # Despite specification does not mention NAME attribute for
+ # EXT-X-STREAM-INF it still sometimes may be present
+ stream_name = last_info.get('NAME') or last_media.get('NAME')
# Bandwidth of live streams may differ over time thus making
# format_id unpredictable. So it's better to keep provided
# format_id intact.
if not live:
- # Despite specification does not mention NAME attribute for
- # EXT-X-STREAM-INF it still sometimes may be present
- stream_name = last_info.get('NAME')
format_id.append(stream_name if stream_name else '%d' % (tbr if tbr else len(formats)))
f = {
'format_id': '-'.join(format_id),
@@ -1269,6 +1271,7 @@ class InfoExtractor(object):
f.update(parse_codecs(last_info.get('CODECS')))
formats.append(f)
last_info = {}
+ last_media = {}
return formats
@staticmethod
@@ -1746,7 +1749,7 @@ class InfoExtractor(object):
media_attributes = extract_attributes(media_tag)
src = media_attributes.get('src')
if src:
- _, formats = _media_formats(src)
+ _, formats = _media_formats(src, media_type)
media_info['formats'].extend(formats)
media_info['thumbnail'] = media_attributes.get('poster')
if media_content:
diff --git a/youtube_dl/extractor/ctv.py b/youtube_dl/extractor/ctv.py
index 5807fbac9..a1fe86316 100644
--- a/youtube_dl/extractor/ctv.py
+++ b/youtube_dl/extractor/ctv.py
@@ -1,11 +1,13 @@
# coding: utf-8
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
class CTVIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?ctv\.ca/video/player\?vid=(?P<id>[0-9.]+)'
+ _VALID_URL = r'https?://(?:www\.)?(?P<domain>ctv|tsn|bnn|thecomedynetwork)\.ca/.*?(?:\bvid=|-vid|~|%7E)(?P<id>[0-9.]+)'
_TESTS = [{
'url': 'http://www.ctv.ca/video/player?vid=706966',
'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0',
@@ -18,13 +20,27 @@ class CTVIE(InfoExtractor):
'timestamp': 1442624700,
},
'expected_warnings': ['HTTP Error 404'],
+ }, {
+ 'url': 'http://www.thecomedynetwork.ca/video/player?vid=923582',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.tsn.ca/video/expectations-high-for-milos-raonic-at-us-open~939549',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.bnn.ca/video/berman-s-call-part-two-viewer-questions~939654',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.ctv.ca/YourMorning/Video/S1E6-Monday-August-29-2016-vid938009',
+ 'only_matching': True,
}]
def _real_extract(self, url):
- video_id = self._match_id(url)
+ domain, video_id = re.match(self._VALID_URL, url).groups()
+ if domain == 'thecomedynetwork':
+ domain = 'comedy'
return {
'_type': 'url_transparent',
'id': video_id,
- 'url': '9c9media:ctv_web:%s' % video_id,
+ 'url': '9c9media:%s_web:%s' % (domain, video_id),
'ie_key': 'NineCNineMedia',
}
diff --git a/youtube_dl/extractor/curiositystream.py b/youtube_dl/extractor/curiositystream.py
new file mode 100644
index 000000000..e3c99468c
--- /dev/null
+++ b/youtube_dl/extractor/curiositystream.py
@@ -0,0 +1,120 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ urlencode_postdata,
+ compat_str,
+ ExtractorError,
+)
+
+
+class CuriosityStreamBaseIE(InfoExtractor):
+ _NETRC_MACHINE = 'curiositystream'
+ _auth_token = None
+ _API_BASE_URL = 'https://api.curiositystream.com/v1/'
+
+ def _handle_errors(self, result):
+ error = result.get('error', {}).get('message')
+ if error:
+ if isinstance(error, dict):
+ error = ', '.join(error.values())
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, error), expected=True)
+
+ def _call_api(self, path, video_id):
+ headers = {}
+ if self._auth_token:
+ headers['X-Auth-Token'] = self._auth_token
+ result = self._download_json(
+ self._API_BASE_URL + path, video_id, headers=headers)
+ self._handle_errors(result)
+ return result['data']
+
+ def _real_initialize(self):
+ (email, password) = self._get_login_info()
+ if email is None:
+ return
+ result = self._download_json(
+ self._API_BASE_URL + 'login', None, data=urlencode_postdata({
+ 'email': email,
+ 'password': password,
+ }))
+ self._handle_errors(result)
+ self._auth_token = result['message']['auth_token']
+
+ def _extract_media_info(self, media):
+ video_id = compat_str(media['id'])
+ limelight_media_id = media['limelight_media_id']
+ title = media['title']
+
+ subtitles = {}
+ for closed_caption in media.get('closed_captions', []):
+ sub_url = closed_caption.get('file')
+ if not sub_url:
+ continue
+ lang = closed_caption.get('code') or closed_caption.get('language') or 'en'
+ subtitles.setdefault(lang, []).append({
+ 'url': sub_url,
+ })
+
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'url': 'limelight:media:' + limelight_media_id,
+ 'title': title,
+ 'description': media.get('description'),
+ 'thumbnail': media.get('image_large') or media.get('image_medium') or media.get('image_small'),
+ 'duration': int_or_none(media.get('duration')),
+ 'tags': media.get('tags'),
+ 'subtitles': subtitles,
+ 'ie_key': 'LimelightMedia',
+ }
+
+
+class CuriosityStreamIE(CuriosityStreamBaseIE):
+ IE_NAME = 'curiositystream'
+ _VALID_URL = r'https?://app\.curiositystream\.com/video/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://app.curiositystream.com/video/2',
+ 'md5': 'a0074c190e6cddaf86900b28d3e9ee7a',
+ 'info_dict': {
+ 'id': '2',
+ 'ext': 'mp4',
+ 'title': 'How Did You Develop The Internet?',
+ 'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.',
+ 'timestamp': 1448388615,
+ 'upload_date': '20151124',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ media = self._call_api('media/' + video_id, video_id)
+ return self._extract_media_info(media)
+
+
+class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
+ IE_NAME = 'curiositystream:collection'
+ _VALID_URL = r'https?://app\.curiositystream\.com/collection/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://app.curiositystream.com/collection/2',
+ 'info_dict': {
+ 'id': '2',
+ 'title': 'Curious Minds: The Internet',
+ 'description': 'How is the internet shaping our lives in the 21st Century?',
+ },
+ 'playlist_mincount': 17,
+ }
+
+ def _real_extract(self, url):
+ collection_id = self._match_id(url)
+ collection = self._call_api(
+ 'collections/' + collection_id, collection_id)
+ entries = []
+ for media in collection.get('media', []):
+ entries.append(self._extract_media_info(media))
+ return self.playlist_result(
+ entries, collection_id,
+ collection.get('title'), collection.get('description'))
diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py
index 2d74ff855..88d096b30 100644
--- a/youtube_dl/extractor/drtv.py
+++ b/youtube_dl/extractor/drtv.py
@@ -4,26 +4,45 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
ExtractorError,
+ int_or_none,
+ float_or_none,
+ mimetype2ext,
parse_iso8601,
+ remove_end,
)
class DRTVIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
+ _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder)/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
- _TEST = {
- 'url': 'https://www.dr.dk/tv/se/boern/ultra/panisk-paske/panisk-paske-5',
- 'md5': 'dc515a9ab50577fa14cc4e4b0265168f',
+ _TESTS = [{
+ 'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10',
+ 'md5': '25e659cccc9a2ed956110a299fdf5983',
'info_dict': {
- 'id': 'panisk-paske-5',
+ 'id': 'klassen-darlig-taber-10',
'ext': 'mp4',
- 'title': 'Panisk Påske (5)',
- 'description': 'md5:ca14173c5ab24cd26b0fcc074dff391c',
- 'timestamp': 1426984612,
- 'upload_date': '20150322',
- 'duration': 1455,
+ 'title': 'Klassen - Dårlig taber (10)',
+ 'description': 'md5:815fe1b7fa656ed80580f31e8b3c79aa',
+ 'timestamp': 1471991907,
+ 'upload_date': '20160823',
+ 'duration': 606.84,
},
- }
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.dr.dk/nyheder/indland/live-christianias-rydning-af-pusher-street-er-i-gang',
+ 'md5': '2c37175c718155930f939ef59952474a',
+ 'info_dict': {
+ 'id': 'christiania-pusher-street-ryddes-drdkrjpo',
+ 'ext': 'mp4',
+ 'title': 'LIVE Christianias rydning af Pusher Street er i gang',
+ 'description': '- Det er det fedeste, der er sket i 20 år, fortæller christianit til DR Nyheder.',
+ 'timestamp': 1472800279,
+ 'upload_date': '20160902',
+ 'duration': 131.4,
+ },
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -35,7 +54,8 @@ class DRTVIE(InfoExtractor):
'Video %s is not available' % video_id, expected=True)
video_id = self._search_regex(
- r'data-(?:material-identifier|episode-slug)="([^"]+)"',
+ (r'data-(?:material-identifier|episode-slug)="([^"]+)"',
+ r'data-resource="[^>"]+mu/programcard/expanded/([^"]+)"'),
webpage, 'video id')
programcard = self._download_json(
@@ -43,9 +63,12 @@ class DRTVIE(InfoExtractor):
video_id, 'Downloading video JSON')
data = programcard['Data'][0]
- title = data['Title']
- description = data['Description']
- timestamp = parse_iso8601(data['CreatedTime'])
+ title = remove_end(self._og_search_title(
+ webpage, default=None), ' | TV | DR') or data['Title']
+ description = self._og_search_description(
+ webpage, default=None) or data.get('Description')
+
+ timestamp = parse_iso8601(data.get('CreatedTime'))
thumbnail = None
duration = None
@@ -56,16 +79,18 @@ class DRTVIE(InfoExtractor):
subtitles = {}
for asset in data['Assets']:
- if asset['Kind'] == 'Image':
- thumbnail = asset['Uri']
- elif asset['Kind'] == 'VideoResource':
- duration = asset['DurationInMilliseconds'] / 1000.0
- restricted_to_denmark = asset['RestrictedToDenmark']
- spoken_subtitles = asset['Target'] == 'SpokenSubtitles'
- for link in asset['Links']:
- uri = link['Uri']
- target = link['Target']
- format_id = target
+ if asset.get('Kind') == 'Image':
+ thumbnail = asset.get('Uri')
+ elif asset.get('Kind') == 'VideoResource':
+ duration = float_or_none(asset.get('DurationInMilliseconds'), 1000)
+ restricted_to_denmark = asset.get('RestrictedToDenmark')
+ spoken_subtitles = asset.get('Target') == 'SpokenSubtitles'
+ for link in asset.get('Links', []):
+ uri = link.get('Uri')
+ if not uri:
+ continue
+ target = link.get('Target')
+ format_id = target or ''
preference = None
if spoken_subtitles:
preference = -1
@@ -76,8 +101,8 @@ class DRTVIE(InfoExtractor):
video_id, preference, f4m_id=format_id))
elif target == 'HLS':
formats.extend(self._extract_m3u8_formats(
- uri, video_id, 'mp4', preference=preference,
- m3u8_id=format_id))
+ uri, video_id, 'mp4', entry_protocol='m3u8_native',
+ preference=preference, m3u8_id=format_id))
else:
bitrate = link.get('Bitrate')
if bitrate:
@@ -85,7 +110,7 @@ class DRTVIE(InfoExtractor):
formats.append({
'url': uri,
'format_id': format_id,
- 'tbr': bitrate,
+ 'tbr': int_or_none(bitrate),
'ext': link.get('FileFormat'),
})
subtitles_list = asset.get('SubtitlesList')
@@ -94,12 +119,18 @@ class DRTVIE(InfoExtractor):
'Danish': 'da',
}
for subs in subtitles_list:
- lang = subs['Language']
- subtitles[LANGS.get(lang, lang)] = [{'url': subs['Uri'], 'ext': 'vtt'}]
+ if not subs.get('Uri'):
+ continue
+ lang = subs.get('Language') or 'da'
+ subtitles.setdefault(LANGS.get(lang, lang), []).append({
+ 'url': subs['Uri'],
+ 'ext': mimetype2ext(subs.get('MimeType')) or 'vtt'
+ })
if not formats and restricted_to_denmark:
- raise ExtractorError(
- 'Unfortunately, DR is not allowed to show this program outside Denmark.', expected=True)
+ self.raise_geo_restricted(
+ 'Unfortunately, DR is not allowed to show this program outside Denmark.',
+ expected=True)
self._sort_formats(formats)
diff --git a/youtube_dl/extractor/espn.py b/youtube_dl/extractor/espn.py
index 66c08bec4..6d10f8e68 100644
--- a/youtube_dl/extractor/espn.py
+++ b/youtube_dl/extractor/espn.py
@@ -5,7 +5,7 @@ from ..utils import remove_end
class ESPNIE(InfoExtractor):
- _VALID_URL = r'https?://espn\.go\.com/(?:[^/]+/)*(?P<id>[^/]+)'
+ _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P<id>[^/]+)'
_TESTS = [{
'url': 'http://espn.go.com/video/clip?id=10365079',
'md5': '60e5d097a523e767d06479335d1bdc58',
@@ -47,6 +47,9 @@ class ESPNIE(InfoExtractor):
}, {
'url': 'http://espn.go.com/nba/playoffs/2015/story/_/id/12887571/john-wall-washington-wizards-no-swelling-left-hand-wrist-game-5-return',
'only_matching': True,
+ }, {
+ 'url': 'http://www.espn.com/video/clip?id=10365079',
+ 'only_matching': True,
}]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/exfm.py b/youtube_dl/extractor/exfm.py
deleted file mode 100644
index 09ed4f2b5..000000000
--- a/youtube_dl/extractor/exfm.py
+++ /dev/null
@@ -1,58 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-
-
-class ExfmIE(InfoExtractor):
- IE_NAME = 'exfm'
- IE_DESC = 'ex.fm'
- _VALID_URL = r'https?://(?:www\.)?ex\.fm/song/(?P<id>[^/]+)'
- _SOUNDCLOUD_URL = r'http://(?:www\.)?api\.soundcloud\.com/tracks/([^/]+)/stream'
- _TESTS = [
- {
- 'url': 'http://ex.fm/song/eh359',
- 'md5': 'e45513df5631e6d760970b14cc0c11e7',
- 'info_dict': {
- 'id': '44216187',
- 'ext': 'mp3',
- 'title': 'Test House "Love Is Not Enough" (Extended Mix) DeadJournalist Exclusive',
- 'uploader': 'deadjournalist',
- 'upload_date': '20120424',
- 'description': 'Test House \"Love Is Not Enough\" (Extended Mix) DeadJournalist Exclusive',
- },
- 'note': 'Soundcloud song',
- 'skip': 'The site is down too often',
- },
- {
- 'url': 'http://ex.fm/song/wddt8',
- 'md5': '966bd70741ac5b8570d8e45bfaed3643',
- 'info_dict': {
- 'id': 'wddt8',
- 'ext': 'mp3',
- 'title': 'Safe and Sound',
- 'uploader': 'Capital Cities',
- },
- 'skip': 'The site is down too often',
- },
- ]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- song_id = mobj.group('id')
- info_url = 'http://ex.fm/api/v3/song/%s' % song_id
- info = self._download_json(info_url, song_id)['song']
- song_url = info['url']
- if re.match(self._SOUNDCLOUD_URL, song_url) is not None:
- self.to_screen('Soundcloud song detected')
- return self.url_result(song_url.replace('/stream', ''), 'Soundcloud')
- return {
- 'id': song_id,
- 'url': song_url,
- 'ext': 'mp3',
- 'title': info['title'],
- 'thumbnail': info['image']['large'],
- 'uploader': info['artist'],
- 'view_count': info['loved_count'],
- }
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 20e85703f..e47adc26c 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -194,6 +194,10 @@ from .ctsnews import CtsNewsIE
from .ctv import CTVIE
from .ctvnews import CTVNewsIE
from .cultureunplugged import CultureUnpluggedIE
+from .curiositystream import (
+ CuriosityStreamIE,
+ CuriosityStreamCollectionIE,
+)
from .cwtv import CWTVIE
from .dailymail import DailyMailIE
from .dailymotion import (
@@ -257,13 +261,18 @@ from .espn import ESPNIE
from .esri import EsriVideoIE
from .europa import EuropaIE
from .everyonesmixtape import EveryonesMixtapeIE
-from .exfm import ExfmIE
from .expotv import ExpoTVIE
from .extremetube import ExtremeTubeIE
from .eyedotv import EyedoTVIE
-from .facebook import FacebookIE
+from .facebook import (
+ FacebookIE,
+ FacebookPluginsVideoIE,
+)
from .faz import FazIE
-from .fc2 import FC2IE
+from .fc2 import (
+ FC2IE,
+ FC2EmbedIE,
+)
from .fczenit import FczenitIE
from .firstpost import FirstpostIE
from .firsttv import FirstTVIE
@@ -278,7 +287,10 @@ from .formula1 import Formula1IE
from .fourtube import FourTubeIE
from .fox import FOXIE
from .foxgay import FoxgayIE
-from .foxnews import FoxNewsIE
+from .foxnews import (
+ FoxNewsIE,
+ FoxNewsInsiderIE,
+)
from .foxsports import FoxSportsIE
from .franceculture import FranceCultureIE
from .franceinter import FranceInterIE
@@ -315,6 +327,7 @@ from .globo import (
GloboIE,
GloboArticleIE,
)
+from .go import GoIE
from .godtube import GodTubeIE
from .godtv import GodTVIE
from .golem import GolemIE
@@ -486,6 +499,7 @@ from .motherless import MotherlessIE
from .motorsport import MotorsportIE
from .movieclips import MovieClipsIE
from .moviezine import MoviezineIE
+from .movingimage import MovingImageIE
from .msn import MSNIE
from .mtv import (
MTVIE,
@@ -554,7 +568,10 @@ from .nick import (
NickDeIE,
)
from .niconico import NiconicoIE, NiconicoPlaylistIE
-from .ninecninemedia import NineCNineMediaIE
+from .ninecninemedia import (
+ NineCNineMediaStackIE,
+ NineCNineMediaIE,
+)
from .ninegag import NineGagIE
from .ninenow import NineNowIE
from .nintendo import NintendoIE
@@ -803,7 +820,6 @@ from .srgssr import (
SRGSSRPlayIE,
)
from .srmediathek import SRMediathekIE
-from .ssa import SSAIE
from .stanfordoc import StanfordOpenClassroomIE
from .steam import SteamIE
from .streamable import StreamableIE
@@ -866,10 +882,6 @@ from .tnaflix import (
MovieFapIE,
)
from .toggle import ToggleIE
-from .thvideo import (
- THVideoIE,
- THVideoPlaylistIE
-)
from .toutv import TouTvIE
from .toypics import ToypicsUserIE, ToypicsIE
from .traileraddict import TrailerAddictIE
@@ -904,6 +916,7 @@ from .tvc import (
)
from .tvigle import TvigleIE
from .tvland import TVLandIE
+from .tvnoe import TVNoeIE
from .tvp import (
TVPEmbedIE,
TVPIE,
diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py
index 0fb781a73..3a220e995 100644
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@@ -15,6 +15,7 @@ from ..compat import (
from ..utils import (
error_to_compat_str,
ExtractorError,
+ int_or_none,
limit_length,
sanitized_Request,
urlencode_postdata,
@@ -62,6 +63,8 @@ class FacebookIE(InfoExtractor):
'ext': 'mp4',
'title': 're:Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam',
'uploader': 'Tennis on Facebook',
+ 'upload_date': '20140908',
+ 'timestamp': 1410199200,
}
}, {
'note': 'Video without discernible title',
@@ -71,6 +74,8 @@ class FacebookIE(InfoExtractor):
'ext': 'mp4',
'title': 'Facebook video #274175099429670',
'uploader': 'Asif Nawab Butt',
+ 'upload_date': '20140506',
+ 'timestamp': 1399398998,
},
'expected_warnings': [
'title'
@@ -78,12 +83,14 @@ class FacebookIE(InfoExtractor):
}, {
'note': 'Video with DASH manifest',
'url': 'https://www.facebook.com/video.php?v=957955867617029',
- 'md5': '54706e4db4f5ad58fbad82dde1f1213f',
+ 'md5': 'b2c28d528273b323abe5c6ab59f0f030',
'info_dict': {
'id': '957955867617029',
'ext': 'mp4',
'title': 'When you post epic content on instagram.com/433 8 million followers, this is ...',
'uploader': 'Demy de Zeeuw',
+ 'upload_date': '20160110',
+ 'timestamp': 1452431627,
},
}, {
'url': 'https://www.facebook.com/maxlayn/posts/10153807558977570',
@@ -306,12 +313,16 @@ class FacebookIE(InfoExtractor):
if not video_title:
video_title = 'Facebook video #%s' % video_id
uploader = clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage))
+ timestamp = int_or_none(self._search_regex(
+ r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
+ 'timestamp', default=None))
info_dict = {
'id': video_id,
'title': video_title,
'formats': formats,
'uploader': uploader,
+ 'timestamp': timestamp,
}
return webpage, info_dict
@@ -340,3 +351,32 @@ class FacebookIE(InfoExtractor):
self._VIDEO_PAGE_TEMPLATE % video_id,
video_id, fatal_if_no_video=True)
return info_dict
+
+
+class FacebookPluginsVideoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[\w-]+\.)?facebook\.com/plugins/video\.php\?.*?\bhref=(?P<id>https.+)'
+
+ _TESTS = [{
+ 'url': 'https://www.facebook.com/plugins/video.php?href=https%3A%2F%2Fwww.facebook.com%2Fgov.sg%2Fvideos%2F10154383743583686%2F&show_text=0&width=560',
+ 'md5': '5954e92cdfe51fe5782ae9bda7058a07',
+ 'info_dict': {
+ 'id': '10154383743583686',
+ 'ext': 'mp4',
+ 'title': 'What to do during the haze?',
+ 'uploader': 'Gov.sg',
+ 'upload_date': '20160826',
+ 'timestamp': 1472184808,
+ },
+ 'add_ie': [FacebookIE.ie_key()],
+ }, {
+ 'url': 'https://www.facebook.com/plugins/video.php?href=https%3A%2F%2Fwww.facebook.com%2Fvideo.php%3Fv%3D10204634152394104',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.facebook.com/plugins/video.php?href=https://www.facebook.com/gov.sg/videos/10154383743583686/&show_text=0&width=560',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ return self.url_result(
+ compat_urllib_parse_unquote(self._match_id(url)),
+ FacebookIE.ie_key())
diff --git a/youtube_dl/extractor/fc2.py b/youtube_dl/extractor/fc2.py
index c7d69ff1f..c032d4d02 100644
--- a/youtube_dl/extractor/fc2.py
+++ b/youtube_dl/extractor/fc2.py
@@ -1,10 +1,12 @@
-#! -*- coding: utf-8 -*-
+# coding: utf-8
from __future__ import unicode_literals
import hashlib
+import re
from .common import InfoExtractor
from ..compat import (
+ compat_parse_qs,
compat_urllib_request,
compat_urlparse,
)
@@ -16,7 +18,7 @@ from ..utils import (
class FC2IE(InfoExtractor):
- _VALID_URL = r'^https?://video\.fc2\.com/(?:[^/]+/)*content/(?P<id>[^/]+)'
+ _VALID_URL = r'^(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P<id>[^/]+)'
IE_NAME = 'fc2'
_NETRC_MACHINE = 'fc2'
_TESTS = [{
@@ -75,12 +77,17 @@ class FC2IE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
self._login()
- webpage = self._download_webpage(url, video_id)
- self._downloader.cookiejar.clear_session_cookies() # must clear
- self._login()
-
- title = self._og_search_title(webpage)
- thumbnail = self._og_search_thumbnail(webpage)
+ webpage = None
+ if not url.startswith('fc2:'):
+ webpage = self._download_webpage(url, video_id)
+ self._downloader.cookiejar.clear_session_cookies() # must clear
+ self._login()
+
+ title = 'FC2 video %s' % video_id
+ thumbnail = None
+ if webpage is not None:
+ title = self._og_search_title(webpage)
+ thumbnail = self._og_search_thumbnail(webpage)
refer = url.replace('/content/', '/a/content/') if '/a/content/' not in url else url
mimi = hashlib.md5((video_id + '_gGddgPfeaf_gzyr').encode('utf-8')).hexdigest()
@@ -113,3 +120,41 @@ class FC2IE(InfoExtractor):
'ext': 'flv',
'thumbnail': thumbnail,
}
+
+
+class FC2EmbedIE(InfoExtractor):
+ _VALID_URL = r'https?://video\.fc2\.com/flv2\.swf\?(?P<query>.+)'
+ IE_NAME = 'fc2:embed'
+
+ _TEST = {
+ 'url': 'http://video.fc2.com/flv2.swf?t=201404182936758512407645&i=20130316kwishtfitaknmcgd76kjd864hso93htfjcnaogz629mcgfs6rbfk0hsycma7shkf85937cbchfygd74&i=201403223kCqB3Ez&d=2625&sj=11&lang=ja&rel=1&from=11&cmt=1&tk=TlRBM09EQTNNekU9&tl=プリズン・ブレイク%20S1-01%20マイケル%20【吹替】',
+ 'md5': 'b8aae5334cb691bdb1193a88a6ab5d5a',
+ 'info_dict': {
+ 'id': '201403223kCqB3Ez',
+ 'ext': 'flv',
+ 'title': 'プリズン・ブレイク S1-01 マイケル 【吹替】',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ query = compat_parse_qs(mobj.group('query'))
+
+ video_id = query['i'][-1]
+ title = query.get('tl', ['FC2 video %s' % video_id])[0]
+
+ sj = query.get('sj', [None])[0]
+ thumbnail = None
+ if sj:
+ # See thumbnailImagePath() in ServerConst.as of flv2.swf
+ thumbnail = 'http://video%s-thumbnail.fc2.com/up/pic/%s.jpg' % (
+ sj, '/'.join((video_id[:6], video_id[6:8], video_id[-2], video_id[-1], video_id)))
+
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': FC2IE.ie_key(),
+ 'url': 'fc2:%s' % video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ }
diff --git a/youtube_dl/extractor/foxnews.py b/youtube_dl/extractor/foxnews.py
index b04da2415..5c7acd795 100644
--- a/youtube_dl/extractor/foxnews.py
+++ b/youtube_dl/extractor/foxnews.py
@@ -3,11 +3,12 @@ from __future__ import unicode_literals
import re
from .amp import AMPIE
+from .common import InfoExtractor
class FoxNewsIE(AMPIE):
IE_DESC = 'Fox News and Fox Business Video'
- _VALID_URL = r'https?://(?P<host>video\.fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'
+ _VALID_URL = r'https?://(?P<host>video\.(?:insider\.)?fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'
_TESTS = [
{
'url': 'http://video.foxnews.com/v/3937480/frozen-in-time/#sp=show-clips',
@@ -49,6 +50,11 @@ class FoxNewsIE(AMPIE):
'url': 'http://video.foxbusiness.com/v/4442309889001',
'only_matching': True,
},
+ {
+ # From http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words
+ 'url': 'http://video.insider.foxnews.com/v/video-embed.html?video_id=5099377331001&autoplay=true&share_url=http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words&share_title=Student%20Group:%20Saying%20%27Politically%20Correct,%27%20%27Trash%27%20and%20%27Lame%27%20Is%20Offensive&share=true',
+ 'only_matching': True,
+ },
]
def _real_extract(self, url):
@@ -58,3 +64,43 @@ class FoxNewsIE(AMPIE):
'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id))
info['id'] = video_id
return info
+
+
+class FoxNewsInsiderIE(InfoExtractor):
+ _VALID_URL = r'https?://insider\.foxnews\.com/([^/]+/)+(?P<id>[a-z-]+)'
+ IE_NAME = 'foxnews:insider'
+
+ _TEST = {
+ 'url': 'http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words',
+ 'md5': 'a10c755e582d28120c62749b4feb4c0c',
+ 'info_dict': {
+ 'id': '5099377331001',
+ 'display_id': 'univ-wisconsin-student-group-pushing-silence-certain-words',
+ 'ext': 'mp4',
+ 'title': 'Student Group: Saying \'Politically Correct,\' \'Trash\' and \'Lame\' Is Offensive',
+ 'description': 'Is campus censorship getting out of control?',
+ 'timestamp': 1472168725,
+ 'upload_date': '20160825',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
+ 'add_ie': [FoxNewsIE.ie_key()],
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ embed_url = self._html_search_meta('embedUrl', webpage, 'embed URL')
+
+ title = self._og_search_title(webpage)
+ description = self._og_search_description(webpage)
+
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': FoxNewsIE.ie_key(),
+ 'url': embed_url,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ }
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index c6e655c84..24b217715 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -2243,11 +2243,11 @@ class GenericIE(InfoExtractor):
# Look for VODPlatform embeds
mobj = re.search(
- r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vod-platform\.net/embed/[^/?#]+)',
+ r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vod-platform\.net/[eE]mbed/.+?)\1',
webpage)
if mobj is not None:
return self.url_result(
- self._proto_relative_url(unescapeHTML(mobj.group(1))), 'VODPlatform')
+ self._proto_relative_url(unescapeHTML(mobj.group('url'))), 'VODPlatform')
# Look for Instagram embeds
instagram_embed_url = InstagramIE._extract_embed_url(webpage)
diff --git a/youtube_dl/extractor/glide.py b/youtube_dl/extractor/glide.py
index 62ff84835..f0d951396 100644
--- a/youtube_dl/extractor/glide.py
+++ b/youtube_dl/extractor/glide.py
@@ -2,7 +2,6 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..utils import unified_strdate
class GlideIE(InfoExtractor):
@@ -14,10 +13,8 @@ class GlideIE(InfoExtractor):
'info_dict': {
'id': 'UZF8zlmuQbe4mr+7dCiQ0w==',
'ext': 'mp4',
- 'title': 'Damon Timm\'s Glide message',
+ 'title': "Damon's Glide message",
'thumbnail': 're:^https?://.*?\.cloudfront\.net/.*\.jpg$',
- 'uploader': 'Damon Timm',
- 'upload_date': '20140919',
}
}
@@ -27,7 +24,8 @@ class GlideIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(
- r'<title>(.+?)</title>', webpage, 'title')
+ r'<title>(.+?)</title>', webpage,
+ 'title', default=None) or self._og_search_title(webpage)
video_url = self._proto_relative_url(self._search_regex(
r'<source[^>]+src=(["\'])(?P<url>.+?)\1',
webpage, 'video URL', default=None,
@@ -36,18 +34,10 @@ class GlideIE(InfoExtractor):
r'<img[^>]+id=["\']video-thumbnail["\'][^>]+src=(["\'])(?P<url>.+?)\1',
webpage, 'thumbnail url', default=None,
group='url')) or self._og_search_thumbnail(webpage)
- uploader = self._search_regex(
- r'<div[^>]+class=["\']info-name["\'][^>]*>([^<]+)',
- webpage, 'uploader', fatal=False)
- upload_date = unified_strdate(self._search_regex(
- r'<div[^>]+class="info-date"[^>]*>([^<]+)',
- webpage, 'upload date', fatal=False))
return {
'id': video_id,
'title': title,
'url': video_url,
'thumbnail': thumbnail,
- 'uploader': uploader,
- 'upload_date': upload_date,
}
diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py
new file mode 100644
index 000000000..6a437c54d
--- /dev/null
+++ b/youtube_dl/extractor/go.py
@@ -0,0 +1,101 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ determine_ext,
+ parse_age_limit,
+)
+
+
+class GoIE(InfoExtractor):
+ _BRANDS = {
+ 'abc': '001',
+ 'freeform': '002',
+ 'watchdisneychannel': '004',
+ 'watchdisneyjunior': '008',
+ 'watchdisneyxd': '009',
+ }
+ _VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/.*?vdka(?P<id>\w+)' % '|'.join(_BRANDS.keys())
+ _TESTS = [{
+ 'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx',
+ 'info_dict': {
+ 'id': '0_g86w5onx',
+ 'ext': 'mp4',
+ 'title': 'Sneak Peek: Language Arts',
+ 'description': 'md5:7dcdab3b2d17e5217c953256af964e9c',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://abc.go.com/shows/after-paradise/video/most-recent/vdka3335601',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ sub_domain, video_id = re.match(self._VALID_URL, url).groups()
+ video_data = self._download_json(
+ 'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/-1/-1/%s/-1/-1.json' % (self._BRANDS[sub_domain], video_id),
+ video_id)['video'][0]
+ title = video_data['title']
+
+ formats = []
+ for asset in video_data.get('assets', {}).get('asset', []):
+ asset_url = asset.get('value')
+ if not asset_url:
+ continue
+ format_id = asset.get('format')
+ ext = determine_ext(asset_url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ asset_url, video_id, 'mp4', m3u8_id=format_id or 'hls', fatal=False))
+ else:
+ formats.append({
+ 'format_id': format_id,
+ 'url': asset_url,
+ 'ext': ext,
+ })
+ self._sort_formats(formats)
+
+ subtitles = {}
+ for cc in video_data.get('closedcaption', {}).get('src', []):
+ cc_url = cc.get('value')
+ if not cc_url:
+ continue
+ ext = determine_ext(cc_url)
+ if ext == 'xml':
+ ext = 'ttml'
+ subtitles.setdefault(cc.get('lang'), []).append({
+ 'url': cc_url,
+ 'ext': ext,
+ })
+
+ thumbnails = []
+ for thumbnail in video_data.get('thumbnails', {}).get('thumbnail', []):
+ thumbnail_url = thumbnail.get('value')
+ if not thumbnail_url:
+ continue
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'width': int_or_none(thumbnail.get('width')),
+ 'height': int_or_none(thumbnail.get('height')),
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video_data.get('longdescription') or video_data.get('description'),
+ 'duration': int_or_none(video_data.get('duration', {}).get('value'), 1000),
+ 'age_limit': parse_age_limit(video_data.get('tvrating', {}).get('rating')),
+ 'episode_number': int_or_none(video_data.get('episodenumber')),
+ 'series': video_data.get('show', {}).get('title'),
+ 'season_number': int_or_none(video_data.get('season', {}).get('num')),
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/youtube_dl/extractor/internetvideoarchive.py b/youtube_dl/extractor/internetvideoarchive.py
index 45add007f..76cc5ec3e 100644
--- a/youtube_dl/extractor/internetvideoarchive.py
+++ b/youtube_dl/extractor/internetvideoarchive.py
@@ -48,13 +48,23 @@ class InternetVideoArchiveIE(InfoExtractor):
# There are multiple videos in the playlist whlie only the first one
# matches the video played in browsers
video_info = configuration['playlist'][0]
+ title = video_info['title']
formats = []
for source in video_info['sources']:
file_url = source['file']
if determine_ext(file_url) == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- file_url, video_id, ext='mp4', m3u8_id='hls'))
+ m3u8_formats = self._extract_m3u8_formats(
+ file_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
+ if m3u8_formats:
+ formats.extend(m3u8_formats)
+ file_url = m3u8_formats[0]['url']
+ formats.extend(self._extract_f4m_formats(
+ file_url.replace('.m3u8', '.f4m'),
+ video_id, f4m_id='hds', fatal=False))
+ formats.extend(self._extract_mpd_formats(
+ file_url.replace('.m3u8', '.mpd'),
+ video_id, mpd_id='dash', fatal=False))
else:
a_format = {
'url': file_url,
@@ -70,7 +80,6 @@ class InternetVideoArchiveIE(InfoExtractor):
self._sort_formats(formats)
- title = video_info['title']
description = video_info.get('description')
thumbnail = video_info.get('image')
else:
diff --git a/youtube_dl/extractor/kusi.py b/youtube_dl/extractor/kusi.py
index 12cc56e44..2e66e8cf9 100644
--- a/youtube_dl/extractor/kusi.py
+++ b/youtube_dl/extractor/kusi.py
@@ -18,31 +18,20 @@ from ..utils import (
class KUSIIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?kusi\.com/(?P<path>story/.+|video\?clipId=(?P<clipId>\d+))'
_TESTS = [{
- 'url': 'http://www.kusi.com/story/31183873/turko-files-case-closed-put-on-hold',
- 'md5': 'f926e7684294cf8cb7bdf8858e1b3988',
+ 'url': 'http://www.kusi.com/story/32849881/turko-files-refused-to-help-it-aint-right',
+ 'md5': '4e76ce8e53660ce9697d06c0ba6fc47d',
'info_dict': {
- 'id': '12203019',
+ 'id': '12689020',
'ext': 'mp4',
- 'title': 'Turko Files: Case Closed! & Put On Hold!',
- 'duration': 231.0,
- 'upload_date': '20160210',
- 'timestamp': 1455087571,
+ 'title': "Turko Files: Refused to Help, It Ain't Right!",
+ 'duration': 223.586,
+ 'upload_date': '20160826',
+ 'timestamp': 1472233118,
'thumbnail': 're:^https?://.*\.jpg$'
},
}, {
'url': 'http://kusi.com/video?clipId=12203019',
- 'info_dict': {
- 'id': '12203019',
- 'ext': 'mp4',
- 'title': 'Turko Files: Case Closed! & Put On Hold!',
- 'duration': 231.0,
- 'upload_date': '20160210',
- 'timestamp': 1455087571,
- 'thumbnail': 're:^https?://.*\.jpg$'
- },
- 'params': {
- 'skip_download': True, # Same as previous one
- },
+ 'only_matching': True,
}]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py
index a425bafe3..6752ffee2 100644
--- a/youtube_dl/extractor/limelight.py
+++ b/youtube_dl/extractor/limelight.py
@@ -34,11 +34,12 @@ class LimelightBaseIE(InfoExtractor):
def _extract_info(self, streams, mobile_urls, properties):
video_id = properties['media_id']
formats = []
-
+ urls = []
for stream in streams:
stream_url = stream.get('url')
- if not stream_url or stream.get('drmProtected'):
+ if not stream_url or stream.get('drmProtected') or stream_url in urls:
continue
+ urls.append(stream_url)
ext = determine_ext(stream_url)
if ext == 'f4m':
formats.extend(self._extract_f4m_formats(
@@ -58,9 +59,11 @@ class LimelightBaseIE(InfoExtractor):
format_id = 'rtmp'
if stream.get('videoBitRate'):
format_id += '-%d' % int_or_none(stream['videoBitRate'])
+ http_url = 'http://%s/%s' % (rtmp.group('host').replace('csl.', 'cpl.'), rtmp.group('playpath')[4:])
+ urls.append(http_url)
http_fmt = fmt.copy()
http_fmt.update({
- 'url': 'http://%s/%s' % (rtmp.group('host').replace('csl.', 'cpl.'), rtmp.group('playpath')[4:]),
+ 'url': http_url,
'format_id': format_id.replace('rtmp', 'http'),
})
formats.append(http_fmt)
@@ -76,8 +79,9 @@ class LimelightBaseIE(InfoExtractor):
for mobile_url in mobile_urls:
media_url = mobile_url.get('mobileUrl')
format_id = mobile_url.get('targetMediaPlatform')
- if not media_url or format_id == 'Widevine':
+ if not media_url or format_id in ('Widevine', 'SmoothStreaming') or media_url in urls:
continue
+ urls.append(media_url)
ext = determine_ext(media_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
diff --git a/youtube_dl/extractor/ssa.py b/youtube_dl/extractor/movingimage.py
index 54d1843f2..bb789c32e 100644
--- a/youtube_dl/extractor/ssa.py
+++ b/youtube_dl/extractor/movingimage.py
@@ -7,22 +7,19 @@ from ..utils import (
)
-class SSAIE(InfoExtractor):
- _VALID_URL = r'https?://ssa\.nls\.uk/film/(?P<id>\d+)'
+class MovingImageIE(InfoExtractor):
+ _VALID_URL = r'https?://movingimage\.nls\.uk/film/(?P<id>\d+)'
_TEST = {
- 'url': 'http://ssa.nls.uk/film/3561',
+ 'url': 'http://movingimage.nls.uk/film/3561',
+ 'md5': '4caa05c2b38453e6f862197571a7be2f',
'info_dict': {
'id': '3561',
- 'ext': 'flv',
+ 'ext': 'mp4',
'title': 'SHETLAND WOOL',
'description': 'md5:c5afca6871ad59b4271e7704fe50ab04',
'duration': 900,
'thumbnail': 're:^https?://.*\.jpg$',
},
- 'params': {
- # rtmp download
- 'skip_download': True,
- },
}
def _real_extract(self, url):
@@ -30,10 +27,9 @@ class SSAIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
- streamer = self._search_regex(
- r"'streamer'\s*,\S*'(rtmp[^']+)'", webpage, 'streamer')
- play_path = self._search_regex(
- r"'file'\s*,\s*'([^']+)'", webpage, 'file').rpartition('.')[0]
+ formats = self._extract_m3u8_formats(
+ self._html_search_regex(r'file\s*:\s*"([^"]+)"', webpage, 'm3u8 manifest URL'),
+ video_id, ext='mp4', entry_protocol='m3u8_native')
def search_field(field_name, fatal=False):
return self._search_regex(
@@ -44,13 +40,11 @@ class SSAIE(InfoExtractor):
description = unescapeHTML(search_field('Description'))
duration = parse_duration(search_field('Running time'))
thumbnail = self._search_regex(
- r"'image'\s*,\s*'([^']+)'", webpage, 'thumbnails', fatal=False)
+ r"image\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False)
return {
'id': video_id,
- 'url': streamer,
- 'play_path': play_path,
- 'ext': 'flv',
+ 'formats': formats,
'title': title,
'description': description,
'duration': duration,
diff --git a/youtube_dl/extractor/myvidster.py b/youtube_dl/extractor/myvidster.py
index 731c24542..2117d302d 100644
--- a/youtube_dl/extractor/myvidster.py
+++ b/youtube_dl/extractor/myvidster.py
@@ -13,7 +13,7 @@ class MyVidsterIE(InfoExtractor):
'id': '3685814',
'title': 'md5:7d8427d6d02c4fbcef50fe269980c749',
'upload_date': '20141027',
- 'uploader_id': 'utkualp',
+ 'uploader': 'utkualp',
'ext': 'mp4',
'age_limit': 18,
},
diff --git a/youtube_dl/extractor/nba.py b/youtube_dl/extractor/nba.py
index aabd5b670..53561961c 100644
--- a/youtube_dl/extractor/nba.py
+++ b/youtube_dl/extractor/nba.py
@@ -39,18 +39,19 @@ class NBAIE(TurnerBaseIE):
'url': 'http://watch.nba.com/video/channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba',
'md5': 'b2b39b81cf28615ae0c3360a3f9668c4',
'info_dict': {
- 'id': '0041400301-cle-atl-recap',
+ 'id': 'channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba',
'ext': 'mp4',
'title': 'Hawks vs. Cavaliers Game 1',
'description': 'md5:8094c3498d35a9bd6b1a8c396a071b4d',
'duration': 228,
'timestamp': 1432134543,
'upload_date': '20150520',
- }
+ },
+ 'expected_warnings': ['Unable to download f4m manifest'],
}, {
'url': 'http://www.nba.com/clippers/news/doc-rivers-were-not-trading-blake',
'info_dict': {
- 'id': '1455672027478-Doc_Feb16_720',
+ 'id': 'teams/clippers/2016/02/17/1455672027478-Doc_Feb16_720.mov-297324',
'ext': 'mp4',
'title': 'Practice: Doc Rivers - 2/16/16',
'description': 'Head Coach Doc Rivers addresses the media following practice.',
@@ -61,6 +62,7 @@ class NBAIE(TurnerBaseIE):
# m3u8 download
'skip_download': True,
},
+ 'expected_warnings': ['Unable to download f4m manifest'],
}, {
'url': 'http://www.nba.com/timberwolves/wiggins-shootaround#',
'info_dict': {
@@ -75,7 +77,7 @@ class NBAIE(TurnerBaseIE):
}, {
'url': 'http://www.nba.com/timberwolves/wiggins-shootaround#',
'info_dict': {
- 'id': 'Wigginsmp4-3462601',
+ 'id': 'teams/timberwolves/2014/12/12/Wigginsmp4-3462601',
'ext': 'mp4',
'title': 'Shootaround Access - Dec. 12 | Andrew Wiggins',
'description': 'Wolves rookie Andrew Wiggins addresses the media after Friday\'s shootaround.',
@@ -87,6 +89,7 @@ class NBAIE(TurnerBaseIE):
# m3u8 download
'skip_download': True,
},
+ 'expected_warnings': ['Unable to download f4m manifest'],
}]
_PAGE_SIZE = 30
diff --git a/youtube_dl/extractor/ninecninemedia.py b/youtube_dl/extractor/ninecninemedia.py
index d889245ad..ec4d675e2 100644
--- a/youtube_dl/extractor/ninecninemedia.py
+++ b/youtube_dl/extractor/ninecninemedia.py
@@ -4,40 +4,36 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
parse_iso8601,
- parse_duration,
- ExtractorError
+ float_or_none,
+ ExtractorError,
+ int_or_none,
)
-class NineCNineMediaIE(InfoExtractor):
- _VALID_URL = r'9c9media:(?P<destination_code>[^:]+):(?P<id>\d+)'
+class NineCNineMediaBaseIE(InfoExtractor):
+ _API_BASE_TEMPLATE = 'http://capi.9c9media.com/destinations/%s/platforms/desktop/contents/%s/'
+
+
+class NineCNineMediaStackIE(NineCNineMediaBaseIE):
+ IE_NAME = '9c9media:stack'
+ _VALID_URL = r'9c9media:stack:(?P<destination_code>[^:]+):(?P<content_id>\d+):(?P<content_package>\d+):(?P<id>\d+)'
def _real_extract(self, url):
- destination_code, video_id = re.match(self._VALID_URL, url).groups()
- api_base_url = 'http://capi.9c9media.com/destinations/%s/platforms/desktop/contents/%s/' % (destination_code, video_id)
- content = self._download_json(api_base_url, video_id, query={
- '$include': '[contentpackages]',
- })
- title = content['Name']
- if len(content['ContentPackages']) > 1:
- raise ExtractorError('multiple content packages')
- content_package = content['ContentPackages'][0]
- stacks_base_url = api_base_url + 'contentpackages/%s/stacks/' % content_package['Id']
- stacks = self._download_json(stacks_base_url, video_id)['Items']
- if len(stacks) > 1:
- raise ExtractorError('multiple stacks')
- stack = stacks[0]
- stack_base_url = '%s%s/manifest.' % (stacks_base_url, stack['Id'])
+ destination_code, content_id, package_id, stack_id = re.match(self._VALID_URL, url).groups()
+ stack_base_url_template = self._API_BASE_TEMPLATE + 'contentpackages/%s/stacks/%s/manifest.'
+ stack_base_url = stack_base_url_template % (destination_code, content_id, package_id, stack_id)
+
formats = []
formats.extend(self._extract_m3u8_formats(
- stack_base_url + 'm3u8', video_id, 'mp4',
+ stack_base_url + 'm3u8', stack_id, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False))
formats.extend(self._extract_f4m_formats(
- stack_base_url + 'f4m', video_id,
+ stack_base_url + 'f4m', stack_id,
f4m_id='hds', fatal=False))
- mp4_url = self._download_webpage(stack_base_url + 'pd', video_id, fatal=False)
+ mp4_url = self._download_webpage(stack_base_url + 'pd', stack_id, fatal=False)
if mp4_url:
formats.append({
'url': mp4_url,
@@ -46,10 +42,86 @@ class NineCNineMediaIE(InfoExtractor):
self._sort_formats(formats)
return {
- 'id': video_id,
- 'title': title,
- 'description': content.get('Desc') or content.get('ShortDesc'),
- 'timestamp': parse_iso8601(content.get('BroadcastDateTime')),
- 'duration': parse_duration(content.get('BroadcastTime')),
+ 'id': stack_id,
'formats': formats,
}
+
+
+class NineCNineMediaIE(NineCNineMediaBaseIE):
+ IE_NAME = '9c9media'
+ _VALID_URL = r'9c9media:(?P<destination_code>[^:]+):(?P<id>\d+)'
+
+ def _real_extract(self, url):
+ destination_code, content_id = re.match(self._VALID_URL, url).groups()
+ api_base_url = self._API_BASE_TEMPLATE % (destination_code, content_id)
+ content = self._download_json(api_base_url, content_id, query={
+ '$include': '[Media,Season,ContentPackages]',
+ })
+ title = content['Name']
+ if len(content['ContentPackages']) > 1:
+ raise ExtractorError('multiple content packages')
+ content_package = content['ContentPackages'][0]
+ package_id = content_package['Id']
+ content_package_url = api_base_url + 'contentpackages/%s/' % package_id
+ content_package = self._download_json(content_package_url, content_id)
+
+ if content_package.get('Constraints', {}).get('Security', {}).get('Type') == 'adobe-drm':
+ raise ExtractorError('This video is DRM protected.', expected=True)
+
+ stacks = self._download_json(content_package_url + 'stacks/', package_id)['Items']
+ multistacks = len(stacks) > 1
+
+ thumbnails = []
+ for image in content.get('Images', []):
+ image_url = image.get('Url')
+ if not image_url:
+ continue
+ thumbnails.append({
+ 'url': image_url,
+ 'width': int_or_none(image.get('Width')),
+ 'height': int_or_none(image.get('Height')),
+ })
+
+ tags, categories = [], []
+ for source_name, container in (('Tags', tags), ('Genres', categories)):
+ for e in content.get(source_name, []):
+ e_name = e.get('Name')
+ if not e_name:
+ continue
+ container.append(e_name)
+
+ description = content.get('Desc') or content.get('ShortDesc')
+ season = content.get('Season', {})
+ base_info = {
+ 'description': description,
+ 'timestamp': parse_iso8601(content.get('BroadcastDateTime')),
+ 'episode_number': int_or_none(content.get('Episode')),
+ 'season': season.get('Name'),
+ 'season_number': season.get('Number'),
+ 'season_id': season.get('Id'),
+ 'series': content.get('Media', {}).get('Name'),
+ 'tags': tags,
+ 'categories': categories,
+ }
+
+ entries = []
+ for stack in stacks:
+ stack_id = compat_str(stack['Id'])
+ entry = {
+ '_type': 'url_transparent',
+ 'url': '9c9media:stack:%s:%s:%s:%s' % (destination_code, content_id, package_id, stack_id),
+ 'id': stack_id,
+ 'title': '%s_part%s' % (title, stack['Name']) if multistacks else title,
+ 'duration': float_or_none(stack.get('Duration')),
+ 'ie_key': 'NineCNineMediaStack',
+ }
+ entry.update(base_info)
+ entries.append(entry)
+
+ return {
+ '_type': 'multi_video',
+ 'id': content_id,
+ 'title': title,
+ 'description': description,
+ 'entries': entries,
+ }
diff --git a/youtube_dl/extractor/nytimes.py b/youtube_dl/extractor/nytimes.py
index 681683e86..142c34256 100644
--- a/youtube_dl/extractor/nytimes.py
+++ b/youtube_dl/extractor/nytimes.py
@@ -1,26 +1,37 @@
from __future__ import unicode_literals
+import hmac
+import hashlib
+import base64
+
from .common import InfoExtractor
from ..utils import (
float_or_none,
int_or_none,
parse_iso8601,
+ mimetype2ext,
+ determine_ext,
)
class NYTimesBaseIE(InfoExtractor):
+ _SECRET = b'pX(2MbU2);4N{7J8)>YwKRJ+/pQ3JkiU2Q^V>mFYv6g6gYvt6v'
+
def _extract_video_from_id(self, video_id):
- video_data = self._download_json(
- 'http://www.nytimes.com/svc/video/api/v2/video/%s' % video_id,
- video_id, 'Downloading video JSON')
+ # Authorization generation algorithm is reverse engineered from `signer` in
+ # http://graphics8.nytimes.com/video/vhs/vhs-2.x.min.js
+ path = '/svc/video/api/v3/video/' + video_id
+ hm = hmac.new(self._SECRET, (path + ':vhs').encode(), hashlib.sha512).hexdigest()
+ video_data = self._download_json('http://www.nytimes.com' + path, video_id, 'Downloading video JSON', headers={
+ 'Authorization': 'NYTV ' + base64.b64encode(hm.encode()).decode(),
+ 'X-NYTV': 'vhs',
+ }, fatal=False)
+ if not video_data:
+ video_data = self._download_json(
+ 'http://www.nytimes.com/svc/video/api/v2/video/' + video_id,
+ video_id, 'Downloading video JSON')
title = video_data['headline']
- description = video_data.get('summary')
- duration = float_or_none(video_data.get('duration'), 1000)
-
- uploader = video_data.get('byline')
- publication_date = video_data.get('publication_date')
- timestamp = parse_iso8601(publication_date[:-8]) if publication_date else None
def get_file_size(file_size):
if isinstance(file_size, int):
@@ -28,35 +39,59 @@ class NYTimesBaseIE(InfoExtractor):
elif isinstance(file_size, dict):
return int(file_size.get('value', 0))
else:
- return 0
-
- formats = [
- {
- 'url': video['url'],
- 'format_id': video.get('type'),
- 'vcodec': video.get('video_codec'),
- 'width': int_or_none(video.get('width')),
- 'height': int_or_none(video.get('height')),
- 'filesize': get_file_size(video.get('fileSize')),
- } for video in video_data['renditions'] if video.get('url')
- ]
+ return None
+
+ urls = []
+ formats = []
+ for video in video_data.get('renditions', []):
+ video_url = video.get('url')
+ format_id = video.get('type')
+ if not video_url or format_id == 'thumbs' or video_url in urls:
+ continue
+ urls.append(video_url)
+ ext = mimetype2ext(video.get('mimetype')) or determine_ext(video_url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id=format_id or 'hls', fatal=False))
+ elif ext == 'mpd':
+ continue
+ # formats.extend(self._extract_mpd_formats(
+ # video_url, video_id, format_id or 'dash', fatal=False))
+ else:
+ formats.append({
+ 'url': video_url,
+ 'format_id': format_id,
+ 'vcodec': video.get('videoencoding') or video.get('video_codec'),
+ 'width': int_or_none(video.get('width')),
+ 'height': int_or_none(video.get('height')),
+ 'filesize': get_file_size(video.get('file_size') or video.get('fileSize')),
+ 'tbr': int_or_none(video.get('bitrate'), 1000),
+ 'ext': ext,
+ })
self._sort_formats(formats)
- thumbnails = [
- {
- 'url': 'http://www.nytimes.com/%s' % image['url'],
+ thumbnails = []
+ for image in video_data.get('images', []):
+ image_url = image.get('url')
+ if not image_url:
+ continue
+ thumbnails.append({
+ 'url': 'http://www.nytimes.com/' + image_url,
'width': int_or_none(image.get('width')),
'height': int_or_none(image.get('height')),
- } for image in video_data.get('images', []) if image.get('url')
- ]
+ })
+
+ publication_date = video_data.get('publication_date')
+ timestamp = parse_iso8601(publication_date[:-8]) if publication_date else None
return {
'id': video_id,
'title': title,
- 'description': description,
+ 'description': video_data.get('summary'),
'timestamp': timestamp,
- 'uploader': uploader,
- 'duration': duration,
+ 'uploader': video_data.get('byline'),
+ 'duration': float_or_none(video_data.get('duration'), 1000),
'formats': formats,
'thumbnails': thumbnails,
}
@@ -67,7 +102,7 @@ class NYTimesIE(NYTimesBaseIE):
_TESTS = [{
'url': 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263',
- 'md5': '18a525a510f942ada2720db5f31644c0',
+ 'md5': 'd665342765db043f7e225cff19df0f2d',
'info_dict': {
'id': '100000002847155',
'ext': 'mov',
diff --git a/youtube_dl/extractor/porncom.py b/youtube_dl/extractor/porncom.py
index 4baf79688..d85e0294d 100644
--- a/youtube_dl/extractor/porncom.py
+++ b/youtube_dl/extractor/porncom.py
@@ -26,6 +26,8 @@ class PornComIE(InfoExtractor):
'duration': 551,
'view_count': int,
'age_limit': 18,
+ 'categories': list,
+ 'tags': list,
},
}, {
'url': 'http://se.porn.com/videos/marsha-may-rides-seth-on-top-of-his-thick-cock-2658067',
@@ -75,7 +77,14 @@ class PornComIE(InfoExtractor):
self._sort_formats(formats)
view_count = str_to_int(self._search_regex(
- r'class=["\']views["\'][^>]*><p>([\d,.]+)', webpage, 'view count'))
+ r'class=["\']views["\'][^>]*><p>([\d,.]+)', webpage,
+ 'view count', fatal=False))
+
+ def extract_list(kind):
+ s = self._search_regex(
+ r'(?s)<p[^>]*>%s:(.+?)</p>' % kind.capitalize(),
+ webpage, kind, fatal=False)
+ return re.findall(r'<a[^>]+>([^<]+)</a>', s or '')
return {
'id': video_id,
@@ -86,4 +95,6 @@ class PornComIE(InfoExtractor):
'view_count': view_count,
'formats': formats,
'age_limit': 18,
+ 'categories': extract_list('categories'),
+ 'tags': extract_list('tags'),
}
diff --git a/youtube_dl/extractor/pornovoisines.py b/youtube_dl/extractor/pornovoisines.py
index 6b51e5c54..58f557e39 100644
--- a/youtube_dl/extractor/pornovoisines.py
+++ b/youtube_dl/extractor/pornovoisines.py
@@ -2,7 +2,6 @@
from __future__ import unicode_literals
import re
-import random
from .common import InfoExtractor
from ..utils import (
@@ -13,61 +12,69 @@ from ..utils import (
class PornoVoisinesIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?pornovoisines\.com/showvideo/(?P<id>\d+)/(?P<display_id>[^/]+)'
-
- _VIDEO_URL_TEMPLATE = 'http://stream%d.pornovoisines.com' \
- '/static/media/video/transcoded/%s-640x360-1000-trscded.mp4'
-
- _SERVER_NUMBERS = (1, 2)
+ _VALID_URL = r'https?://(?:www\.)?pornovoisines\.com/videos/show/(?P<id>\d+)/(?P<display_id>[^/.]+)'
_TEST = {
- 'url': 'http://www.pornovoisines.com/showvideo/1285/recherche-appartement/',
- 'md5': '5ac670803bc12e9e7f9f662ce64cf1d1',
+ 'url': 'http://www.pornovoisines.com/videos/show/919/recherche-appartement.html',
+ 'md5': '6f8aca6a058592ab49fe701c8ba8317b',
'info_dict': {
- 'id': '1285',
+ 'id': '919',
'display_id': 'recherche-appartement',
'ext': 'mp4',
'title': 'Recherche appartement',
- 'description': 'md5:819ea0b785e2a04667a1a01cdc89594e',
+ 'description': 'md5:fe10cb92ae2dd3ed94bb4080d11ff493',
'thumbnail': 're:^https?://.*\.jpg$',
'upload_date': '20140925',
'duration': 120,
'view_count': int,
'average_rating': float,
- 'categories': ['Débutantes', 'Scénario', 'Sodomie'],
+ 'categories': ['Débutante', 'Débutantes', 'Scénario', 'Sodomie'],
'age_limit': 18,
+ 'subtitles': {
+ 'fr': [{
+ 'ext': 'vtt',
+ }]
+ },
}
}
- @classmethod
- def build_video_url(cls, num):
- return cls._VIDEO_URL_TEMPLATE % (random.choice(cls._SERVER_NUMBERS), num)
-
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
display_id = mobj.group('display_id')
- webpage = self._download_webpage(url, video_id)
+ settings_url = self._download_json(
+ 'http://www.pornovoisines.com/api/video/%s/getsettingsurl/' % video_id,
+ video_id, note='Getting settings URL')['video_settings_url']
+ settings = self._download_json(settings_url, video_id)['data']
+
+ formats = []
+ for kind, data in settings['variants'].items():
+ if kind == 'HLS':
+ formats.extend(self._extract_m3u8_formats(
+ data, video_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls'))
+ elif kind == 'MP4':
+ for item in data:
+ formats.append({
+ 'url': item['url'],
+ 'height': item.get('height'),
+ 'bitrate': item.get('bitrate'),
+ })
+ self._sort_formats(formats)
- video_url = self.build_video_url(video_id)
+ webpage = self._download_webpage(url, video_id)
- title = self._html_search_regex(
- r'<h1>(.+?)</h1>', webpage, 'title', flags=re.DOTALL)
- description = self._html_search_regex(
- r'<article id="descriptif">(.+?)</article>',
- webpage, 'description', fatal=False, flags=re.DOTALL)
+ title = self._og_search_title(webpage)
+ description = self._og_search_description(webpage)
- thumbnail = self._search_regex(
- r'<div id="mediaspace%s">\s*<img src="/?([^"]+)"' % video_id,
- webpage, 'thumbnail', fatal=False)
- if thumbnail:
- thumbnail = 'http://www.pornovoisines.com/%s' % thumbnail
+ # The webpage has a bug - there's no space between "thumb" and src=
+ thumbnail = self._html_search_regex(
+ r'<img[^>]+class=([\'"])thumb\1[^>]*src=([\'"])(?P<url>[^"]+)\2',
+ webpage, 'thumbnail', fatal=False, group='url')
upload_date = unified_strdate(self._search_regex(
- r'Publié le ([\d-]+)', webpage, 'upload date', fatal=False))
- duration = int_or_none(self._search_regex(
- 'Durée (\d+)', webpage, 'duration', fatal=False))
+ r'Le\s*<b>([\d/]+)', webpage, 'upload date', fatal=False))
+ duration = settings.get('main', {}).get('duration')
view_count = int_or_none(self._search_regex(
r'(\d+) vues', webpage, 'view count', fatal=False))
average_rating = self._search_regex(
@@ -75,15 +82,19 @@ class PornoVoisinesIE(InfoExtractor):
if average_rating:
average_rating = float_or_none(average_rating.replace(',', '.'))
- categories = self._html_search_meta(
- 'keywords', webpage, 'categories', fatal=False)
+ categories = self._html_search_regex(
+ r'(?s)Catégories\s*:\s*<b>(.+?)</b>', webpage, 'categories', fatal=False)
if categories:
categories = [category.strip() for category in categories.split(',')]
+ subtitles = {'fr': [{
+ 'url': subtitle,
+ } for subtitle in settings.get('main', {}).get('vtt_tracks', {}).values()]}
+
return {
'id': video_id,
'display_id': display_id,
- 'url': video_url,
+ 'formats': formats,
'title': title,
'description': description,
'thumbnail': thumbnail,
@@ -93,4 +104,5 @@ class PornoVoisinesIE(InfoExtractor):
'average_rating': average_rating,
'categories': categories,
'age_limit': 18,
+ 'subtitles': subtitles,
}
diff --git a/youtube_dl/extractor/pyvideo.py b/youtube_dl/extractor/pyvideo.py
index cc0416cb8..b8ac93a62 100644
--- a/youtube_dl/extractor/pyvideo.py
+++ b/youtube_dl/extractor/pyvideo.py
@@ -1,59 +1,72 @@
from __future__ import unicode_literals
import re
-import os
from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import int_or_none
class PyvideoIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?pyvideo\.org/video/(?P<id>\d+)/(.*)'
-
- _TESTS = [
- {
- 'url': 'http://pyvideo.org/video/1737/become-a-logging-expert-in-30-minutes',
- 'md5': '520915673e53a5c5d487c36e0c4d85b5',
- 'info_dict': {
- 'id': '24_4WWkSmNo',
- 'ext': 'webm',
- 'title': 'Become a logging expert in 30 minutes',
- 'description': 'md5:9665350d466c67fb5b1598de379021f7',
- 'upload_date': '20130320',
- 'uploader': 'Next Day Video',
- 'uploader_id': 'NextDayVideo',
- },
- 'add_ie': ['Youtube'],
+ _VALID_URL = r'https?://(?:www\.)?pyvideo\.org/(?P<category>[^/]+)/(?P<id>[^/?#&.]+)'
+
+ _TESTS = [{
+ 'url': 'http://pyvideo.org/pycon-us-2013/become-a-logging-expert-in-30-minutes.html',
+ 'info_dict': {
+ 'id': 'become-a-logging-expert-in-30-minutes',
},
- {
- 'url': 'http://pyvideo.org/video/2542/gloriajw-spotifywitherikbernhardsson182m4v',
- 'md5': '5fe1c7e0a8aa5570330784c847ff6d12',
- 'info_dict': {
- 'id': '2542',
- 'ext': 'm4v',
- 'title': 'Gloriajw-SpotifyWithErikBernhardsson182',
- },
+ 'playlist_count': 2,
+ }, {
+ 'url': 'http://pyvideo.org/pygotham-2012/gloriajw-spotifywitherikbernhardsson182m4v.html',
+ 'md5': '5fe1c7e0a8aa5570330784c847ff6d12',
+ 'info_dict': {
+ 'id': '2542',
+ 'ext': 'm4v',
+ 'title': 'Gloriajw-SpotifyWithErikBernhardsson182.m4v',
},
- ]
+ }]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
+ category = mobj.group('category')
video_id = mobj.group('id')
- webpage = self._download_webpage(url, video_id)
+ entries = []
- m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', webpage)
- if m_youtube is not None:
- return self.url_result(m_youtube.group(1), 'Youtube')
+ data = self._download_json(
+ 'https://raw.githubusercontent.com/pyvideo/data/master/%s/videos/%s.json'
+ % (category, video_id), video_id, fatal=False)
- title = self._html_search_regex(
- r'<div class="section">\s*<h3(?:\s+class="[^"]*"[^>]*)?>([^>]+?)</h3>',
- webpage, 'title', flags=re.DOTALL)
- video_url = self._search_regex(
- [r'<source src="(.*?)"', r'<dt>Download</dt>.*?<a href="(.+?)"'],
- webpage, 'video url', flags=re.DOTALL)
+ if data:
+ for video in data['videos']:
+ video_url = video.get('url')
+ if video_url:
+ if video.get('type') == 'youtube':
+ entries.append(self.url_result(video_url, 'Youtube'))
+ else:
+ entries.append({
+ 'id': compat_str(data.get('id') or video_id),
+ 'url': video_url,
+ 'title': data['title'],
+ 'description': data.get('description') or data.get('summary'),
+ 'thumbnail': data.get('thumbnail_url'),
+ 'duration': int_or_none(data.get('duration')),
+ })
+ else:
+ webpage = self._download_webpage(url, video_id)
+ title = self._og_search_title(webpage)
+ media_urls = self._search_regex(
+ r'(?s)Media URL:(.+?)</li>', webpage, 'media urls')
+ for m in re.finditer(
+ r'<a[^>]+href=(["\'])(?P<url>http.+?)\1', media_urls):
+ media_url = m.group('url')
+ if re.match(r'https?://www\.youtube\.com/watch\?v=.*', media_url):
+ entries.append(self.url_result(media_url, 'Youtube'))
+ else:
+ entries.append({
+ 'id': video_id,
+ 'url': media_url,
+ 'title': title,
+ })
- return {
- 'id': video_id,
- 'title': os.path.splitext(title)[0],
- 'url': video_url,
- }
+ return self.playlist_result(entries, video_id)
diff --git a/youtube_dl/extractor/rottentomatoes.py b/youtube_dl/extractor/rottentomatoes.py
index f9cd48790..23abf7a27 100644
--- a/youtube_dl/extractor/rottentomatoes.py
+++ b/youtube_dl/extractor/rottentomatoes.py
@@ -1,7 +1,6 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..compat import compat_urlparse
from .internetvideoarchive import InternetVideoArchiveIE
@@ -11,21 +10,23 @@ class RottenTomatoesIE(InfoExtractor):
_TEST = {
'url': 'http://www.rottentomatoes.com/m/toy_story_3/trailers/11028566/',
'info_dict': {
- 'id': '613340',
+ 'id': '11028566',
'ext': 'mp4',
'title': 'Toy Story 3',
+ 'description': 'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.',
+ 'thumbnail': 're:^https?://.*\.jpg$',
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- og_video = self._og_search_video_url(webpage)
- query = compat_urlparse.urlparse(og_video).query
+ iva_id = self._search_regex(r'publishedid=(\d+)', webpage, 'internet video archive id')
return {
'_type': 'url_transparent',
- 'url': InternetVideoArchiveIE._build_xml_url(query),
+ 'url': 'http://video.internetvideoarchive.net/player/6/configuration.ashx?domain=www.videodetective.com&customerid=69249&playerid=641&publishedid=' + iva_id,
'ie_key': InternetVideoArchiveIE.ie_key(),
+ 'id': video_id,
'title': self._og_search_title(webpage),
}
diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py
index aeae931a2..9635c2b49 100644
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -32,7 +32,7 @@ class SoundcloudIE(InfoExtractor):
_VALID_URL = r'''(?x)^(?:https?://)?
(?:(?:(?:www\.|m\.)?soundcloud\.com/
(?P<uploader>[\w\d-]+)/
- (?!(?:tracks|sets(?:/[^/?#]+)?|reposts|likes|spotlight)/?(?:$|[?#]))
+ (?!(?:tracks|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#]))
(?P<title>[\w\d-]+)/?
(?P<token>[^?]+?)?(?:[?].*)?$)
|(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)
@@ -265,6 +265,9 @@ class SoundcloudSetIE(SoundcloudIE):
'title': 'The Royal Concept EP',
},
'playlist_mincount': 6,
+ }, {
+ 'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep/token',
+ 'only_matching': True,
}]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/southpark.py b/youtube_dl/extractor/southpark.py
index a147f7db1..e2a9e45ac 100644
--- a/youtube_dl/extractor/southpark.py
+++ b/youtube_dl/extractor/southpark.py
@@ -35,6 +35,7 @@ class SouthParkEsIE(SouthParkIE):
'description': 'Cartman Consigue Una Sonda Anal',
},
'playlist_count': 4,
+ 'skip': 'Geo-restricted',
}]
diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py
index 23067e8c6..6febf805b 100644
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@@ -96,7 +96,7 @@ class ThePlatformBaseIE(OnceIE):
class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
_VALID_URL = r'''(?x)
(?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/
- (?:(?:(?:[^/]+/)+select/)?(?P<media>media/(?:guid/\d+/)?)|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))?
+ (?:(?:(?:[^/]+/)+select/)?(?P<media>media/(?:guid/\d+/)?)?|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))?
|theplatform:)(?P<id>[^/\?&]+)'''
_TESTS = [{
@@ -116,6 +116,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
# rtmp download
'skip_download': True,
},
+ 'skip': '404 Not Found',
}, {
# from http://www.cnet.com/videos/tesla-model-s-a-second-step-towards-a-cleaner-motoring-future/
'url': 'http://link.theplatform.com/s/kYEXFC/22d_qsQ6MIRT',
diff --git a/youtube_dl/extractor/thestar.py b/youtube_dl/extractor/thestar.py
index ba1380abc..c3f118894 100644
--- a/youtube_dl/extractor/thestar.py
+++ b/youtube_dl/extractor/thestar.py
@@ -2,8 +2,6 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from .brightcove import BrightcoveLegacyIE
-from ..compat import compat_parse_qs
class TheStarIE(InfoExtractor):
@@ -30,6 +28,9 @@ class TheStarIE(InfoExtractor):
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
- brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
- brightcove_id = compat_parse_qs(brightcove_legacy_url)['@videoPlayer'][0]
- return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
+ brightcove_id = self._search_regex(
+ r'mainartBrightcoveVideoId["\']?\s*:\s*["\']?(\d+)',
+ webpage, 'brightcove id')
+ return self.url_result(
+ self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
+ 'BrightcoveNew', brightcove_id)
diff --git a/youtube_dl/extractor/thvideo.py b/youtube_dl/extractor/thvideo.py
deleted file mode 100644
index 406f4a826..000000000
--- a/youtube_dl/extractor/thvideo.py
+++ /dev/null
@@ -1,84 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- unified_strdate
-)
-
-
-class THVideoIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?thvideo\.tv/(?:v/th|mobile\.php\?cid=)(?P<id>[0-9]+)'
- _TEST = {
- 'url': 'http://thvideo.tv/v/th1987/',
- 'md5': 'fa107b1f73817e325e9433505a70db50',
- 'info_dict': {
- 'id': '1987',
- 'ext': 'mp4',
- 'title': '【动画】秘封活动记录 ~ The Sealed Esoteric History.分镜稿预览',
- 'display_id': 'th1987',
- 'thumbnail': 'http://thvideo.tv/uploadfile/2014/0722/20140722013459856.jpg',
- 'description': '社团京都幻想剧团的第一个东方二次同人动画作品「秘封活动记录 ~ The Sealed Esoteric History.」 本视频是该动画第一期的分镜草稿...',
- 'upload_date': '20140722'
- }
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- # extract download link from mobile player page
- webpage_player = self._download_webpage(
- 'http://thvideo.tv/mobile.php?cid=%s-0' % (video_id),
- video_id, note='Downloading video source page')
- video_url = self._html_search_regex(
- r'<source src="(.*?)" type', webpage_player, 'video url')
-
- # extract video info from main page
- webpage = self._download_webpage(
- 'http://thvideo.tv/v/th%s' % (video_id), video_id)
- title = self._og_search_title(webpage)
- display_id = 'th%s' % video_id
- thumbnail = self._og_search_thumbnail(webpage)
- description = self._og_search_description(webpage)
- upload_date = unified_strdate(self._html_search_regex(
- r'span itemprop="datePublished" content="(.*?)">', webpage,
- 'upload date', fatal=False))
-
- return {
- 'id': video_id,
- 'ext': 'mp4',
- 'url': video_url,
- 'title': title,
- 'display_id': display_id,
- 'thumbnail': thumbnail,
- 'description': description,
- 'upload_date': upload_date
- }
-
-
-class THVideoPlaylistIE(InfoExtractor):
- _VALID_URL = r'http?://(?:www\.)?thvideo\.tv/mylist(?P<id>[0-9]+)'
- _TEST = {
- 'url': 'http://thvideo.tv/mylist2',
- 'info_dict': {
- 'id': '2',
- 'title': '幻想万華鏡',
- },
- 'playlist_mincount': 23,
- }
-
- def _real_extract(self, url):
- playlist_id = self._match_id(url)
-
- webpage = self._download_webpage(url, playlist_id)
- list_title = self._html_search_regex(
- r'<h1 class="show_title">(.*?)<b id', webpage, 'playlist title',
- fatal=False)
-
- entries = [
- self.url_result('http://thvideo.tv/v/th' + id, 'THVideo')
- for id in re.findall(r'<dd><a href="http://thvideo.tv/v/th(\d+)/" target=', webpage)]
-
- return self.playlist_result(entries, playlist_id, list_title)
diff --git a/youtube_dl/extractor/turner.py b/youtube_dl/extractor/turner.py
index 108caa9d8..b59dafda6 100644
--- a/youtube_dl/extractor/turner.py
+++ b/youtube_dl/extractor/turner.py
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
xpath_text,
int_or_none,
@@ -16,9 +17,12 @@ from ..utils import (
class TurnerBaseIE(InfoExtractor):
+ def _extract_timestamp(self, video_data):
+ return int_or_none(xpath_attr(video_data, 'dateCreated', 'uts'))
+
def _extract_cvp_info(self, data_src, video_id, path_data={}):
video_data = self._download_xml(data_src, video_id)
- video_id = video_data.attrib['id'].split('/')[-1].split('.')[0]
+ video_id = video_data.attrib['id']
title = xpath_text(video_data, 'headline', fatal=True)
# rtmp_src = xpath_text(video_data, 'akamai/src')
# if rtmp_src:
@@ -30,11 +34,11 @@ class TurnerBaseIE(InfoExtractor):
tokens = {}
urls = []
formats = []
- rex = re.compile(r'''(?x)
- (?P<width>[0-9]+)x(?P<height>[0-9]+)
- (?:_(?P<bitrate>[0-9]+))?
- ''')
- for video_file in video_data.findall('files/file'):
+ rex = re.compile(
+ r'(?P<width>[0-9]+)x(?P<height>[0-9]+)(?:_(?P<bitrate>[0-9]+))?')
+ # Possible formats locations: files/file, files/groupFiles/files
+ # and maybe others
+ for video_file in video_data.findall('.//file'):
video_url = video_file.text.strip()
if not video_url:
continue
@@ -84,12 +88,14 @@ class TurnerBaseIE(InfoExtractor):
if video_url in urls:
continue
urls.append(video_url)
- format_id = video_file.attrib['bitrate']
+ format_id = video_file.get('bitrate')
if ext == 'smil':
- formats.extend(self._extract_smil_formats(video_url, video_id, fatal=False))
+ formats.extend(self._extract_smil_formats(
+ video_url, video_id, fatal=False))
elif ext == 'm3u8':
m3u8_formats = self._extract_m3u8_formats(
- video_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)
+ video_url, video_id, 'mp4', m3u8_id=format_id or 'hls',
+ fatal=False)
if m3u8_formats:
# Sometimes final URLs inside m3u8 are unsigned, let's fix this
# ourselves
@@ -103,7 +109,7 @@ class TurnerBaseIE(InfoExtractor):
elif ext == 'f4m':
formats.extend(self._extract_f4m_formats(
update_url_query(video_url, {'hdcore': '3.7.0'}),
- video_id, f4m_id=format_id, fatal=False))
+ video_id, f4m_id=format_id or 'hds', fatal=False))
else:
f = {
'format_id': format_id,
@@ -117,29 +123,31 @@ class TurnerBaseIE(InfoExtractor):
'height': int(mobj.group('height')),
'tbr': int_or_none(mobj.group('bitrate')),
})
- elif format_id.isdigit():
- f['tbr'] = int(format_id)
- else:
- mobj = re.match(r'ios_(audio|[0-9]+)$', format_id)
- if mobj:
- if mobj.group(1) == 'audio':
- f.update({
- 'vcodec': 'none',
- 'ext': 'm4a',
- })
- else:
- f['tbr'] = int(mobj.group(1))
+ elif isinstance(format_id, compat_str):
+ if format_id.isdigit():
+ f['tbr'] = int(format_id)
+ else:
+ mobj = re.match(r'ios_(audio|[0-9]+)$', format_id)
+ if mobj:
+ if mobj.group(1) == 'audio':
+ f.update({
+ 'vcodec': 'none',
+ 'ext': 'm4a',
+ })
+ else:
+ f['tbr'] = int(mobj.group(1))
formats.append(f)
self._sort_formats(formats)
subtitles = {}
for source in video_data.findall('closedCaptions/source'):
for track in source.findall('track'):
- source_url = source.get('url')
- if not source_url:
+ track_url = track.get('url')
+ if not isinstance(track_url, compat_str) or track_url.endswith('/big'):
continue
- subtitles.set_default(source.get('lang') or source.get('label') or 'en', []).append({
- 'url': source_url,
+ lang = track.get('lang') or track.get('label') or 'en'
+ subtitles.setdefault(lang, []).append({
+ 'url': track_url,
'ext': {
'scc': 'scc',
'webvtt': 'vtt',
@@ -154,10 +162,6 @@ class TurnerBaseIE(InfoExtractor):
'height': int_or_none(image.get('height')),
} for image in video_data.findall('images/image')]
- timestamp = None
- if 'cnn.com' not in data_src:
- timestamp = int_or_none(xpath_attr(video_data, 'dateCreated', 'uts'))
-
return {
'id': video_id,
'title': title,
@@ -166,7 +170,7 @@ class TurnerBaseIE(InfoExtractor):
'thumbnails': thumbnails,
'description': xpath_text(video_data, 'description'),
'duration': parse_duration(xpath_text(video_data, 'length') or xpath_text(video_data, 'trt')),
- 'timestamp': timestamp,
+ 'timestamp': self._extract_timestamp(video_data),
'upload_date': xpath_attr(video_data, 'metas', 'version'),
'series': xpath_text(video_data, 'showTitle'),
'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')),
diff --git a/youtube_dl/extractor/tvnoe.py b/youtube_dl/extractor/tvnoe.py
new file mode 100644
index 000000000..1cd3e6a58
--- /dev/null
+++ b/youtube_dl/extractor/tvnoe.py
@@ -0,0 +1,49 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .jwplatform import JWPlatformBaseIE
+from ..utils import (
+ clean_html,
+ get_element_by_class,
+ js_to_json,
+)
+
+
+class TVNoeIE(JWPlatformBaseIE):
+ _VALID_URL = r'https?://(www\.)?tvnoe\.cz/video/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://www.tvnoe.cz/video/10362',
+ 'md5': 'aee983f279aab96ec45ab6e2abb3c2ca',
+ 'info_dict': {
+ 'id': '10362',
+ 'ext': 'mp4',
+ 'series': 'Noční univerzita',
+ 'title': 'prof. Tomáš Halík, Th.D. - Návrat náboženství a střet civilizací',
+ 'description': 'md5:f337bae384e1a531a52c55ebc50fff41',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ iframe_url = self._search_regex(
+ r'<iframe[^>]+src="([^"]+)"', webpage, 'iframe URL')
+
+ ifs_page = self._download_webpage(iframe_url, video_id)
+ jwplayer_data = self._parse_json(
+ self._find_jwplayer_data(ifs_page),
+ video_id, transform_source=js_to_json)
+ info_dict = self._parse_jwplayer_data(
+ jwplayer_data, video_id, require_title=False, base_url=iframe_url)
+
+ info_dict.update({
+ 'id': video_id,
+ 'title': clean_html(get_element_by_class(
+ 'field-name-field-podnazev', webpage)),
+ 'description': clean_html(get_element_by_class(
+ 'field-name-body', webpage)),
+ 'series': clean_html(get_element_by_class('title', webpage))
+ })
+
+ return info_dict
diff --git a/youtube_dl/extractor/vimple.py b/youtube_dl/extractor/vimple.py
index 92321d66e..7fd9b777b 100644
--- a/youtube_dl/extractor/vimple.py
+++ b/youtube_dl/extractor/vimple.py
@@ -28,23 +28,24 @@ class SprutoBaseIE(InfoExtractor):
class VimpleIE(SprutoBaseIE):
IE_DESC = 'Vimple - one-click video hosting'
- _VALID_URL = r'https?://(?:player\.vimple\.ru/iframe|vimple\.ru)/(?P<id>[\da-f-]{32,36})'
- _TESTS = [
- {
- 'url': 'http://vimple.ru/c0f6b1687dcd4000a97ebe70068039cf',
- 'md5': '2e750a330ed211d3fd41821c6ad9a279',
- 'info_dict': {
- 'id': 'c0f6b168-7dcd-4000-a97e-be70068039cf',
- 'ext': 'mp4',
- 'title': 'Sunset',
- 'duration': 20,
- 'thumbnail': 're:https?://.*?\.jpg',
- },
- }, {
- 'url': 'http://player.vimple.ru/iframe/52e1beec-1314-4a83-aeac-c61562eadbf9',
- 'only_matching': True,
- }
- ]
+ _VALID_URL = r'https?://(?:player\.vimple\.(?:ru|co)/iframe|vimple\.(?:ru|co))/(?P<id>[\da-f-]{32,36})'
+ _TESTS = [{
+ 'url': 'http://vimple.ru/c0f6b1687dcd4000a97ebe70068039cf',
+ 'md5': '2e750a330ed211d3fd41821c6ad9a279',
+ 'info_dict': {
+ 'id': 'c0f6b168-7dcd-4000-a97e-be70068039cf',
+ 'ext': 'mp4',
+ 'title': 'Sunset',
+ 'duration': 20,
+ 'thumbnail': 're:https?://.*?\.jpg',
+ },
+ }, {
+ 'url': 'http://player.vimple.ru/iframe/52e1beec-1314-4a83-aeac-c61562eadbf9',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://vimple.co/04506a053f124483b8fb05ed73899f19',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
diff --git a/youtube_dl/extractor/vodplatform.py b/youtube_dl/extractor/vodplatform.py
index b49542b16..7bdd8b1dc 100644
--- a/youtube_dl/extractor/vodplatform.py
+++ b/youtube_dl/extractor/vodplatform.py
@@ -6,7 +6,7 @@ from ..utils import unescapeHTML
class VODPlatformIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?vod-platform\.net/embed/(?P<id>[^/?#]+)'
+ _VALID_URL = r'https?://(?:www\.)?vod-platform\.net/[eE]mbed/(?P<id>[^/?#]+)'
_TEST = {
# from http://www.lbcgroup.tv/watch/chapter/29143/52844/%D8%A7%D9%84%D9%86%D8%B5%D8%B1%D8%A9-%D9%81%D9%8A-%D8%B6%D9%8A%D8%A7%D9%81%D8%A9-%D8%A7%D9%84%D9%80-cnn/ar
'url': 'http://vod-platform.net/embed/RufMcytHDolTH1MuKHY9Fw',
diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py
index b0679dfb7..d7a81ab8c 100644
--- a/youtube_dl/extractor/yahoo.py
+++ b/youtube_dl/extractor/yahoo.py
@@ -8,7 +8,6 @@ import re
from .common import InfoExtractor, SearchInfoExtractor
from ..compat import (
compat_urllib_parse,
- compat_urllib_parse_urlencode,
compat_urlparse,
)
from ..utils import (
@@ -17,6 +16,7 @@ from ..utils import (
ExtractorError,
int_or_none,
mimetype2ext,
+ determine_ext,
)
from .brightcove import BrightcoveNewIE
@@ -39,7 +39,7 @@ class YahooIE(InfoExtractor):
},
{
'url': 'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html',
- 'md5': 'c3466d2b6d5dd6b9f41ba9ed04c24b23',
+ 'md5': '251af144a19ebc4a033e8ba91ac726bb',
'info_dict': {
'id': 'd1dedf8c-d58c-38c3-8963-e899929ae0a9',
'ext': 'mp4',
@@ -50,7 +50,7 @@ class YahooIE(InfoExtractor):
},
{
'url': 'https://screen.yahoo.com/community/community-sizzle-reel-203225340.html?format=embed',
- 'md5': '75ffabdb87c16d4ffe8c036dc4d1c136',
+ 'md5': '7993e572fac98e044588d0b5260f4352',
'info_dict': {
'id': '4fe78544-8d48-39d8-97cd-13f205d9fcdb',
'ext': 'mp4',
@@ -61,7 +61,7 @@ class YahooIE(InfoExtractor):
},
{
'url': 'https://tw.news.yahoo.com/%E6%95%A2%E5%95%8F%E5%B8%82%E9%95%B7%20%E9%BB%83%E7%A7%80%E9%9C%9C%E6%89%B9%E8%B3%B4%E6%B8%85%E5%BE%B7%20%E9%9D%9E%E5%B8%B8%E9%AB%98%E5%82%B2-034024051.html',
- 'md5': '9035d38f88b1782682a3e89f985be5bb',
+ 'md5': '45c024bad51e63e9b6f6fad7a43a8c23',
'info_dict': {
'id': 'cac903b3-fcf4-3c14-b632-643ab541712f',
'ext': 'mp4',
@@ -72,10 +72,10 @@ class YahooIE(InfoExtractor):
},
{
'url': 'https://uk.screen.yahoo.com/editor-picks/cute-raccoon-freed-drain-using-091756545.html',
- 'md5': '0b51660361f0e27c9789e7037ef76f4b',
+ 'md5': '71298482f7c64cbb7fa064e4553ff1c1',
'info_dict': {
'id': 'b3affa53-2e14-3590-852b-0e0db6cd1a58',
- 'ext': 'mp4',
+ 'ext': 'webm',
'title': 'Cute Raccoon Freed From Drain\u00a0Using Angle Grinder',
'description': 'md5:f66c890e1490f4910a9953c941dee944',
'duration': 97,
@@ -98,7 +98,7 @@ class YahooIE(InfoExtractor):
'id': '154609075',
},
'playlist': [{
- 'md5': 'f8e336c6b66f503282e5f719641d6565',
+ 'md5': '000887d0dc609bc3a47c974151a40fb8',
'info_dict': {
'id': 'e624c4bc-3389-34de-9dfc-025f74943409',
'ext': 'mp4',
@@ -107,7 +107,7 @@ class YahooIE(InfoExtractor):
'duration': 30,
},
}, {
- 'md5': '958bcb90b4d6df71c56312137ee1cd5a',
+ 'md5': '81bc74faf10750fe36e4542f9a184c66',
'info_dict': {
'id': '1fc8ada0-718e-3abe-a450-bf31f246d1a9',
'ext': 'mp4',
@@ -139,7 +139,7 @@ class YahooIE(InfoExtractor):
'skip': 'Domain name in.lifestyle.yahoo.com gone',
}, {
'url': 'https://www.yahoo.com/movies/v/true-story-trailer-173000497.html',
- 'md5': 'b17ac378b1134fa44370fb27db09a744',
+ 'md5': '2a9752f74cb898af5d1083ea9f661b58',
'info_dict': {
'id': '071c4013-ce30-3a93-a5b2-e0413cd4a9d1',
'ext': 'mp4',
@@ -168,7 +168,7 @@ class YahooIE(InfoExtractor):
}, {
# Query result is embedded in webpage, but explicit request to video API fails with geo restriction
'url': 'https://screen.yahoo.com/community/communitary-community-episode-1-ladders-154501237.html',
- 'md5': '1ddbf7c850777548438e5c4f147c7b8c',
+ 'md5': '4fbafb9c9b6f07aa8f870629f6671b35',
'info_dict': {
'id': '1f32853c-a271-3eef-8cb6-f6d6872cb504',
'ext': 'mp4',
@@ -196,6 +196,7 @@ class YahooIE(InfoExtractor):
'description': 'Galactic',
'title': 'Dolla Diva (feat. Maggie Koerner)',
},
+ 'skip': 'redirect to https://www.yahoo.com/music',
},
]
@@ -213,15 +214,7 @@ class YahooIE(InfoExtractor):
entries = []
iframe_urls = re.findall(r'<iframe[^>]+src="(/video/.+?-\d+\.html\?format=embed.*?)"', webpage)
for idx, iframe_url in enumerate(iframe_urls):
- iframepage = self._download_webpage(
- host + iframe_url, display_id,
- note='Downloading iframe webpage for video #%d' % idx)
- items_json = self._search_regex(
- r'mediaItems: (\[.+?\])$', iframepage, 'items', flags=re.MULTILINE, default=None)
- if items_json:
- items = json.loads(items_json)
- video_id = items[0]['id']
- entries.append(self._get_info(video_id, display_id, webpage))
+ entries.append(self.url_result(host + iframe_url, 'Yahoo'))
if entries:
return self.playlist_result(entries, page_id)
@@ -246,7 +239,9 @@ class YahooIE(InfoExtractor):
if config:
sapi = config.get('models', {}).get('applet_model', {}).get('data', {}).get('sapi')
if sapi and 'query' in sapi:
- return self._extract_info(display_id, sapi, webpage)
+ info = self._extract_info(display_id, sapi, webpage)
+ self._sort_formats(info['formats'])
+ return info
items_json = self._search_regex(
r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE,
@@ -292,15 +287,17 @@ class YahooIE(InfoExtractor):
formats = []
for s in info['streams']:
+ tbr = int_or_none(s.get('bitrate'))
format_info = {
'width': int_or_none(s.get('width')),
'height': int_or_none(s.get('height')),
- 'tbr': int_or_none(s.get('bitrate')),
+ 'tbr': tbr,
}
host = s['host']
path = s['path']
if host.startswith('rtmp'):
+ fmt = 'rtmp'
format_info.update({
'url': host,
'play_path': path,
@@ -308,14 +305,18 @@ class YahooIE(InfoExtractor):
})
else:
if s.get('format') == 'm3u8_playlist':
- format_info['protocol'] = 'm3u8_native'
- format_info['ext'] = 'mp4'
+ fmt = 'hls'
+ format_info.update({
+ 'protocol': 'm3u8_native',
+ 'ext': 'mp4',
+ })
+ else:
+ fmt = format_info['ext'] = determine_ext(path)
format_url = compat_urlparse.urljoin(host, path)
format_info['url'] = format_url
+ format_info['format_id'] = fmt + ('-%d' % tbr if tbr else '')
formats.append(format_info)
- self._sort_formats(formats)
-
closed_captions = self._html_search_regex(
r'"closedcaptions":(\[[^\]]+\])', webpage, 'closed captions',
default='[]')
@@ -346,17 +347,25 @@ class YahooIE(InfoExtractor):
def _get_info(self, video_id, display_id, webpage):
region = self._search_regex(
r'\\?"region\\?"\s*:\s*\\?"([^"]+?)\\?"',
- webpage, 'region', fatal=False, default='US')
- data = compat_urllib_parse_urlencode({
- 'protocol': 'http',
- 'region': region.upper(),
- })
- query_url = (
- 'https://video.media.yql.yahoo.com/v1/video/sapi/streams/'
- '{id}?{data}'.format(id=video_id, data=data))
- query_result = self._download_json(
- query_url, display_id, 'Downloading video info')
- return self._extract_info(display_id, query_result, webpage)
+ webpage, 'region', fatal=False, default='US').upper()
+ formats = []
+ info = {}
+ for fmt in ('webm', 'mp4'):
+ query_result = self._download_json(
+ 'https://video.media.yql.yahoo.com/v1/video/sapi/streams/' + video_id,
+ display_id, 'Downloading %s video info' % fmt, query={
+ 'protocol': 'http',
+ 'region': region,
+ 'format': fmt,
+ })
+ info = self._extract_info(display_id, query_result, webpage)
+ formats.extend(info['formats'])
+ formats.extend(self._extract_m3u8_formats(
+ 'http://video.media.yql.yahoo.com/v1/hls/%s?region=%s' % (video_id, region),
+ video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+ self._sort_formats(formats)
+ info['formats'] = formats
+ return info
class YahooSearchIE(SearchInfoExtractor):
diff --git a/youtube_dl/extractor/youjizz.py b/youtube_dl/extractor/youjizz.py
index 31e2f9263..b50f34e9b 100644
--- a/youtube_dl/extractor/youjizz.py
+++ b/youtube_dl/extractor/youjizz.py
@@ -1,21 +1,16 @@
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
-)
class YouJizzIE(InfoExtractor):
_VALID_URL = r'https?://(?:\w+\.)?youjizz\.com/videos/(?:[^/#?]+)?-(?P<id>[0-9]+)\.html(?:$|[?#])'
_TESTS = [{
'url': 'http://www.youjizz.com/videos/zeichentrick-1-2189178.html',
- 'md5': '07e15fa469ba384c7693fd246905547c',
+ 'md5': '78fc1901148284c69af12640e01c6310',
'info_dict': {
'id': '2189178',
- 'ext': 'flv',
+ 'ext': 'mp4',
'title': 'Zeichentrick 1',
'age_limit': 18,
}
@@ -27,38 +22,18 @@ class YouJizzIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
+ # YouJizz's HTML5 player has invalid HTML
+ webpage = webpage.replace('"controls', '" controls')
age_limit = self._rta_search(webpage)
video_title = self._html_search_regex(
r'<title>\s*(.*)\s*</title>', webpage, 'title')
- embed_page_url = self._search_regex(
- r'(https?://www.youjizz.com/videos/embed/[0-9]+)',
- webpage, 'embed page')
- webpage = self._download_webpage(
- embed_page_url, video_id, note='downloading embed page')
-
- # Get the video URL
- m_playlist = re.search(r'so.addVariable\("playlist", ?"(?P<playlist>.+?)"\);', webpage)
- if m_playlist is not None:
- playlist_url = m_playlist.group('playlist')
- playlist_page = self._download_webpage(playlist_url, video_id,
- 'Downloading playlist page')
- m_levels = list(re.finditer(r'<level bitrate="(\d+?)" file="(.*?)"', playlist_page))
- if len(m_levels) == 0:
- raise ExtractorError('Unable to extract video url')
- videos = [(int(m.group(1)), m.group(2)) for m in m_levels]
- (_, video_url) = sorted(videos)[0]
- video_url = video_url.replace('%252F', '%2F')
- else:
- video_url = self._search_regex(r'so.addVariable\("file",encodeURIComponent\("(?P<source>[^"]+)"\)\);',
- webpage, 'video URL')
+ info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0]
- return {
+ info_dict.update({
'id': video_id,
- 'url': video_url,
'title': video_title,
- 'ext': 'flv',
- 'format': 'flv',
- 'player_url': embed_page_url,
'age_limit': age_limit,
- }
+ })
+
+ return info_dict
diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py
index 0df2d76ee..0265a64a7 100644
--- a/youtube_dl/extractor/youporn.py
+++ b/youtube_dl/extractor/youporn.py
@@ -35,7 +35,7 @@ class YouPornIE(InfoExtractor):
'age_limit': 18,
},
}, {
- # Anonymous User uploader
+ # Unknown uploader
'url': 'http://www.youporn.com/watch/561726/big-tits-awesome-brunette-on-amazing-webcam-show/?from=related3&al=2&from_id=561726&pos=4',
'info_dict': {
'id': '561726',
@@ -44,7 +44,7 @@ class YouPornIE(InfoExtractor):
'title': 'Big Tits Awesome Brunette On amazing webcam show',
'description': 'http://sweetlivegirls.com Big Tits Awesome Brunette On amazing webcam show.mp4',
'thumbnail': 're:^https?://.*\.jpg$',
- 'uploader': 'Anonymous User',
+ 'uploader': 'Unknown',
'upload_date': '20111125',
'average_rating': int,
'view_count': int,
@@ -140,17 +140,17 @@ class YouPornIE(InfoExtractor):
r'>All [Cc]omments? \(([\d,.]+)\)',
webpage, 'comment count', fatal=False))
- def extract_tag_box(title):
- tag_box = self._search_regex(
- (r'<div[^>]+class=["\']tagBoxTitle["\'][^>]*>\s*%s\b.*?</div>\s*'
- '<div[^>]+class=["\']tagBoxContent["\']>(.+?)</div>') % re.escape(title),
- webpage, '%s tag box' % title, default=None)
+ def extract_tag_box(regex, title):
+ tag_box = self._search_regex(regex, webpage, title, default=None)
if not tag_box:
return []
return re.findall(r'<a[^>]+href=[^>]+>([^<]+)', tag_box)
- categories = extract_tag_box('Category')
- tags = extract_tag_box('Tags')
+ categories = extract_tag_box(
+ r'(?s)Categories:.*?</[^>]+>(.+?)</div>', 'categories')
+ tags = extract_tag_box(
+ r'(?s)Tags:.*?</div>\s*<div[^>]+class=["\']tagBoxContent["\'][^>]*>(.+?)</div>',
+ 'tags')
return {
'id': video_id,
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index d5d5b7334..8fc26bd02 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -264,7 +264,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
)
)? # all until now is optional -> you can pass the naked ID
([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
- (?!.*?&list=) # combined list/video URLs are handled by the playlist IE
+ (?!.*?\blist=) # combined list/video URLs are handled by the playlist IE
(?(1).+)? # if we found the ID, everything can follow
$"""
_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
@@ -844,6 +844,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# YouTube Red paid video (https://github.com/rg3/youtube-dl/issues/10059)
'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
'only_matching': True,
+ },
+ {
+ # Rental video preview
+ 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
+ 'info_dict': {
+ 'id': 'uGpuVWrhIzE',
+ 'ext': 'mp4',
+ 'title': 'Piku - Trailer',
+ 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
+ 'upload_date': '20150811',
+ 'uploader': 'FlixMatrix',
+ 'uploader_id': 'FlixMatrixKaravan',
+ 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
+ 'license': 'Standard YouTube License',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
}
]
@@ -1254,6 +1272,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# Convert to the same format returned by compat_parse_qs
video_info = dict((k, [v]) for k, v in args.items())
add_dash_mpd(video_info)
+ # Rental video is not rented but preview is available (e.g.
+ # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
+ # https://github.com/rg3/youtube-dl/issues/10532)
+ if not video_info and args.get('ypc_vid'):
+ return self.url_result(
+ args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
if args.get('livestream') == '1' or args.get('live_playback') == 1:
is_live = True
if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
@@ -1754,11 +1778,14 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
_VALID_URL = r"""(?x)(?:
(?:https?://)?
(?:\w+\.)?
- youtube\.com/
(?:
- (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries)
- \? (?:.*?[&;])*? (?:p|a|list)=
- | p/
+ youtube\.com/
+ (?:
+ (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries)
+ \? (?:.*?[&;])*? (?:p|a|list)=
+ | p/
+ )|
+ youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
)
(
(?:PL|LL|EC|UU|FL|RD|UL)?[0-9A-Za-z-_]{10,}
@@ -1841,6 +1868,31 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
},
'playlist_mincout': 21,
+ }, {
+ # Playlist URL that does not actually serve a playlist
+ 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
+ 'info_dict': {
+ 'id': 'FqZTN594JQw',
+ 'ext': 'webm',
+ 'title': "Smiley's People 01 detective, Adventure Series, Action",
+ 'uploader': 'STREEM',
+ 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
+ 'uploader_url': 're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
+ 'upload_date': '20150526',
+ 'license': 'Standard YouTube License',
+ 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
+ 'categories': ['People & Blogs'],
+ 'tags': list,
+ 'like_count': int,
+ 'dislike_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [YoutubeIE.ie_key()],
+ }, {
+ 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
+ 'only_matching': True,
}]
def _real_initialize(self):
@@ -1901,9 +1953,20 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
playlist_title = self._html_search_regex(
r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
- page, 'title')
+ page, 'title', default=None)
+
+ has_videos = True
- return self.playlist_result(self._entries(page, playlist_id), playlist_id, playlist_title)
+ if not playlist_title:
+ try:
+ # Some playlist URLs don't actually serve a playlist (e.g.
+ # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
+ next(self._entries(page, playlist_id))
+ except StopIteration:
+ has_videos = False
+
+ return has_videos, self.playlist_result(
+ self._entries(page, playlist_id), playlist_id, playlist_title)
def _check_download_just_video(self, url, playlist_id):
# Check if it's a video-specific URL
@@ -1912,9 +1975,11 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
video_id = query_dict['v'][0]
if self._downloader.params.get('noplaylist'):
self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
- return self.url_result(video_id, 'Youtube', video_id=video_id)
+ return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
else:
self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
+ return video_id, None
+ return None, None
def _real_extract(self, url):
# Extract playlist id
@@ -1923,7 +1988,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
raise ExtractorError('Invalid URL: %s' % url)
playlist_id = mobj.group(1) or mobj.group(2)
- video = self._check_download_just_video(url, playlist_id)
+ video_id, video = self._check_download_just_video(url, playlist_id)
if video:
return video
@@ -1931,7 +1996,15 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
# Mixes require a custom extraction process
return self._extract_mix(playlist_id)
- return self._extract_playlist(playlist_id)
+ has_videos, playlist = self._extract_playlist(playlist_id)
+ if has_videos or not video_id:
+ return playlist
+
+ # Some playlist URLs don't actually serve a playlist (see
+ # https://github.com/rg3/youtube-dl/issues/10537).
+ # Fallback to plain video extraction if there is a video id
+ # along with playlist id.
+ return self.url_result(video_id, 'Youtube', video_id=video_id)
class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
@@ -2309,10 +2382,11 @@ class YoutubeWatchLaterIE(YoutubePlaylistIE):
}]
def _real_extract(self, url):
- video = self._check_download_just_video(url, 'WL')
+ _, video = self._check_download_just_video(url, 'WL')
if video:
return video
- return self._extract_playlist('WL')
+ _, playlist = self._extract_playlist('WL')
+ return playlist
class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
diff --git a/youtube_dl/options.py b/youtube_dl/options.py
index 5d62deef4..56f312f57 100644
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@@ -423,7 +423,15 @@ def parseOpts(overrideArguments=None):
downloader.add_option(
'--fragment-retries',
dest='fragment_retries', metavar='RETRIES', default=10,
- help='Number of retries for a fragment (default is %default), or "infinite" (DASH only)')
+ help='Number of retries for a fragment (default is %default), or "infinite" (DASH and hlsnative only)')
+ downloader.add_option(
+ '--skip-unavailable-fragments',
+ action='store_true', dest='skip_unavailable_fragments', default=True,
+ help='Skip unavailable fragments (DASH and hlsnative only)')
+ general.add_option(
+ '--abort-on-unavailable-fragment',
+ action='store_false', dest='skip_unavailable_fragments',
+ help='Abort downloading when some fragment is not available')
downloader.add_option(
'--buffer-size',
dest='buffersize', metavar='SIZE', default='1024',
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 1091f17f3..ed199c4ad 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -2148,7 +2148,7 @@ def mimetype2ext(mt):
return ext
_, _, res = mt.rpartition('/')
- res = res.lower()
+ res = res.split(';')[0].strip().lower()
return {
'3gpp': '3gp',
@@ -2168,6 +2168,7 @@ def mimetype2ext(mt):
'f4m+xml': 'f4m',
'hds+xml': 'f4m',
'vnd.ms-sstr+xml': 'ism',
+ 'quicktime': 'mov',
}.get(res, res)
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index ee30ca2ad..b2ea6dac6 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
from __future__ import unicode_literals
-__version__ = '2016.08.28'
+__version__ = '2016.09.04.1'