aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl')
-rwxr-xr-xyoutube_dl/YoutubeDL.py6
-rw-r--r--youtube_dl/extractor/__init__.py4
-rw-r--r--youtube_dl/extractor/arte.py1
-rw-r--r--youtube_dl/extractor/crunchyroll.py8
-rw-r--r--youtube_dl/extractor/douyutv.py77
-rw-r--r--youtube_dl/extractor/generic.py15
-rw-r--r--youtube_dl/extractor/grooveshark.py6
-rw-r--r--youtube_dl/extractor/krasview.py6
-rw-r--r--youtube_dl/extractor/letv.py9
-rw-r--r--youtube_dl/extractor/mixcloud.py69
-rw-r--r--youtube_dl/extractor/niconico.py23
-rw-r--r--youtube_dl/extractor/nrk.py73
-rw-r--r--youtube_dl/extractor/nytimes.py40
-rw-r--r--youtube_dl/extractor/primesharetv.py69
-rw-r--r--youtube_dl/extractor/sohu.py93
-rw-r--r--youtube_dl/extractor/twitch.py9
-rw-r--r--youtube_dl/extractor/ultimedia.py104
-rw-r--r--youtube_dl/extractor/videomega.py45
-rw-r--r--youtube_dl/extractor/vine.py15
-rw-r--r--youtube_dl/utils.py7
-rw-r--r--youtube_dl/version.py2
21 files changed, 545 insertions, 136 deletions
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 5a83bc956..b5ef5e009 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -328,9 +328,6 @@ class YoutubeDL(object):
'Parameter outtmpl is bytes, but should be a unicode string. '
'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
- if '%(stitle)s' in self.params.get('outtmpl', ''):
- self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
-
self._setup_opener()
if auto_init:
@@ -1218,9 +1215,6 @@ class YoutubeDL(object):
if len(info_dict['title']) > 200:
info_dict['title'] = info_dict['title'][:197] + '...'
- # Keep for backwards compatibility
- info_dict['stitle'] = info_dict['title']
-
if 'format' not in info_dict:
info_dict['format'] = info_dict['ext']
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index e94779d40..a20492fc3 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -107,6 +107,7 @@ from .dctp import DctpTvIE
from .deezer import DeezerPlaylistIE
from .dfb import DFBIE
from .dotsub import DotsubIE
+from .douyutv import DouyuTVIE
from .dreisat import DreiSatIE
from .drbonanza import DRBonanzaIE
from .drtuber import DrTuberIE
@@ -346,6 +347,7 @@ from .npo import (
)
from .nrk import (
NRKIE,
+ NRKPlaylistIE,
NRKTVIE,
)
from .ntvde import NTVDeIE
@@ -381,6 +383,7 @@ from .pornhub import (
)
from .pornotube import PornotubeIE
from .pornoxo import PornoXOIE
+from .primesharetv import PrimeShareTVIE
from .promptfile import PromptFileIE
from .prosiebensat1 import ProSiebenSat1IE
from .puls4 import Puls4IE
@@ -537,6 +540,7 @@ from .udemy import (
UdemyIE,
UdemyCourseIE
)
+from .ultimedia import UltimediaIE
from .unistra import UnistraIE
from .urort import UrortIE
from .ustream import UstreamIE, UstreamChannelIE
diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py
index 929dd3cc5..8273bd6c9 100644
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -146,6 +146,7 @@ class ArteTVPlus7IE(InfoExtractor):
formats.append(format)
+ self._check_formats(formats, video_id)
self._sort_formats(formats)
info_dict['formats'] = formats
diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py
index e64b88fbc..6ded723c9 100644
--- a/youtube_dl/extractor/crunchyroll.py
+++ b/youtube_dl/extractor/crunchyroll.py
@@ -23,7 +23,6 @@ from ..utils import (
)
from ..aes import (
aes_cbc_decrypt,
- inc,
)
@@ -102,13 +101,6 @@ class CrunchyrollIE(InfoExtractor):
key = obfuscate_key(id)
- class Counter:
- __value = iv
-
- def next_value(self):
- temp = self.__value
- self.__value = inc(self.__value)
- return temp
decrypted_data = intlist_to_bytes(aes_cbc_decrypt(data, key, iv))
return zlib.decompress(decrypted_data)
diff --git a/youtube_dl/extractor/douyutv.py b/youtube_dl/extractor/douyutv.py
new file mode 100644
index 000000000..d7956e6e4
--- /dev/null
+++ b/youtube_dl/extractor/douyutv.py
@@ -0,0 +1,77 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class DouyuTVIE(InfoExtractor):
+ _VALID_URL = r'http://(?:www\.)?douyutv\.com/(?P<id>[A-Za-z0-9]+)'
+ _TEST = {
+ 'url': 'http://www.douyutv.com/iseven',
+ 'info_dict': {
+ 'id': 'iseven',
+ 'ext': 'flv',
+ 'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'description': 'md5:9e525642c25a0a24302869937cf69d17',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'uploader': '7师傅',
+ 'uploader_id': '431925',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ config = self._download_json(
+ 'http://www.douyutv.com/api/client/room/%s' % video_id, video_id)
+
+ data = config['data']
+
+ error_code = config.get('error', 0)
+ show_status = data.get('show_status')
+ if error_code is not 0:
+ raise ExtractorError(
+ 'Server reported error %i' % error_code, expected=True)
+
+ # 1 = live, 2 = offline
+ if show_status == '2':
+ raise ExtractorError(
+ 'Live stream is offline', expected=True)
+
+ base_url = data['rtmp_url']
+ live_path = data['rtmp_live']
+
+ title = self._live_title(data['room_name'])
+ description = data.get('show_details')
+ thumbnail = data.get('room_src')
+
+ uploader = data.get('nickname')
+ uploader_id = data.get('owner_uid')
+
+ multi_formats = data.get('rtmp_multi_bitrate')
+ if not isinstance(multi_formats, dict):
+ multi_formats = {}
+ multi_formats['live'] = live_path
+
+ formats = [{
+ 'url': '%s/%s' % (base_url, format_path),
+ 'format_id': format_id,
+ 'preference': 1 if format_id == 'live' else 0,
+ } for format_id, format_path in multi_formats.items()]
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'formats': formats,
+ 'is_live': True,
+ }
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 4e6927b08..8716e4503 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -1006,6 +1006,13 @@ class GenericIE(InfoExtractor):
if mobj is not None:
return self.url_result(mobj.group('url'))
+ # Look for NYTimes player
+ mobj = re.search(
+ r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
+ webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'))
+
# Look for Ooyala videos
mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
@@ -1268,10 +1275,16 @@ class GenericIE(InfoExtractor):
# HTML5 video
found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
if not found:
+ REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
found = re.search(
r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
- r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};url=\'?([^\'"]+)',
+ r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
webpage)
+ if not found:
+ # Look also in Refresh HTTP header
+ refresh_header = head_response.headers.get('Refresh')
+ if refresh_header:
+ found = re.search(REDIRECT_REGEX, refresh_header)
if found:
new_url = found.group(1)
self.report_following_redirect(new_url)
diff --git a/youtube_dl/extractor/grooveshark.py b/youtube_dl/extractor/grooveshark.py
index 848d17beb..36ad4915c 100644
--- a/youtube_dl/extractor/grooveshark.py
+++ b/youtube_dl/extractor/grooveshark.py
@@ -140,9 +140,9 @@ class GroovesharkIE(InfoExtractor):
if webpage is not None:
o = GroovesharkHtmlParser.extract_object_tags(webpage)
- return (webpage, [x for x in o if x['attrs']['id'] == 'jsPlayerEmbed'])
+ return webpage, [x for x in o if x['attrs']['id'] == 'jsPlayerEmbed']
- return (webpage, None)
+ return webpage, None
def _real_initialize(self):
self.ts = int(time.time() * 1000) # timestamp in millis
@@ -154,7 +154,7 @@ class GroovesharkIE(InfoExtractor):
swf_referer = None
if self.do_playerpage_request:
(_, player_objs) = self._get_playerpage(url)
- if player_objs is not None:
+ if player_objs:
swf_referer = self._build_swf_referer(url, player_objs[0])
self.to_screen('SWF Referer: %s' % swf_referer)
diff --git a/youtube_dl/extractor/krasview.py b/youtube_dl/extractor/krasview.py
index e46954b47..96f95979a 100644
--- a/youtube_dl/extractor/krasview.py
+++ b/youtube_dl/extractor/krasview.py
@@ -40,8 +40,10 @@ class KrasViewIE(InfoExtractor):
description = self._og_search_description(webpage, default=None)
thumbnail = flashvars.get('image') or self._og_search_thumbnail(webpage)
duration = int_or_none(flashvars.get('duration'))
- width = int_or_none(self._og_search_property('video:width', webpage, 'video width'))
- height = int_or_none(self._og_search_property('video:height', webpage, 'video height'))
+ width = int_or_none(self._og_search_property(
+ 'video:width', webpage, 'video width', default=None))
+ height = int_or_none(self._og_search_property(
+ 'video:height', webpage, 'video height', default=None))
return {
'id': video_id,
diff --git a/youtube_dl/extractor/letv.py b/youtube_dl/extractor/letv.py
index 85eee141b..1484ac0d2 100644
--- a/youtube_dl/extractor/letv.py
+++ b/youtube_dl/extractor/letv.py
@@ -88,12 +88,13 @@ class LetvIE(InfoExtractor):
play_json_req = compat_urllib_request.Request(
'http://api.letv.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params)
)
- play_json_req.add_header(
- 'Ytdl-request-proxy',
- self._downloader.params.get('cn_verification_proxy'))
+ cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
+ if cn_verification_proxy:
+ play_json_req.add_header('Ytdl-request-proxy', cn_verification_proxy)
+
play_json = self._download_json(
play_json_req,
- media_id, 'playJson data')
+ media_id, 'Downloading playJson data')
# Check for errors
playstatus = play_json['playstatus']
diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py
index 1831c6749..21aea0c55 100644
--- a/youtube_dl/extractor/mixcloud.py
+++ b/youtube_dl/extractor/mixcloud.py
@@ -1,6 +1,7 @@
from __future__ import unicode_literals
import re
+import itertools
from .common import InfoExtractor
from ..compat import (
@@ -10,7 +11,6 @@ from ..utils import (
ExtractorError,
HEADRequest,
str_to_int,
- parse_iso8601,
)
@@ -27,8 +27,6 @@ class MixcloudIE(InfoExtractor):
'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
'uploader': 'Daniel Holbach',
'uploader_id': 'dholbach',
- 'upload_date': '20111115',
- 'timestamp': 1321359578,
'thumbnail': 're:https?://.*\.jpg',
'view_count': int,
'like_count': int,
@@ -37,31 +35,30 @@ class MixcloudIE(InfoExtractor):
'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/',
'info_dict': {
'id': 'gillespeterson-caribou-7-inch-vinyl-mix-chat',
- 'ext': 'm4a',
- 'title': 'Electric Relaxation vol. 3',
+ 'ext': 'mp3',
+ 'title': 'Caribou 7 inch Vinyl Mix & Chat',
'description': 'md5:2b8aec6adce69f9d41724647c65875e8',
- 'uploader': 'Daniel Drumz',
+ 'uploader': 'Gilles Peterson Worldwide',
'uploader_id': 'gillespeterson',
- 'thumbnail': 're:https?://.*\.jpg',
+ 'thumbnail': 're:https?://.*/images/',
'view_count': int,
'like_count': int,
},
}]
- def _get_url(self, track_id, template_url):
- server_count = 30
- for i in range(server_count):
- url = template_url % i
+ def _get_url(self, track_id, template_url, server_number):
+ boundaries = (1, 30)
+ for nr in server_numbers(server_number, boundaries):
+ url = template_url % nr
try:
# We only want to know if the request succeed
# don't download the whole file
self._request_webpage(
HEADRequest(url), track_id,
- 'Checking URL %d/%d ...' % (i + 1, server_count + 1))
+ 'Checking URL %d/%d ...' % (nr, boundaries[-1]))
return url
except ExtractorError:
pass
-
return None
def _real_extract(self, url):
@@ -75,17 +72,18 @@ class MixcloudIE(InfoExtractor):
preview_url = self._search_regex(
r'\s(?:data-preview-url|m-preview)="([^"]+)"', webpage, 'preview url')
song_url = preview_url.replace('/previews/', '/c/originals/')
+ server_number = int(self._search_regex(r'stream(\d+)', song_url, 'server number'))
template_url = re.sub(r'(stream\d*)', 'stream%d', song_url)
- final_song_url = self._get_url(track_id, template_url)
+ final_song_url = self._get_url(track_id, template_url, server_number)
if final_song_url is None:
self.to_screen('Trying with m4a extension')
template_url = template_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/')
- final_song_url = self._get_url(track_id, template_url)
+ final_song_url = self._get_url(track_id, template_url, server_number)
if final_song_url is None:
raise ExtractorError('Unable to extract track url')
PREFIX = (
- r'<span class="play-button[^"]*?"'
+ r'm-play-on-spacebar[^>]+'
r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+')
title = self._html_search_regex(
PREFIX + r'm-title="([^"]+)"', webpage, 'title')
@@ -99,16 +97,12 @@ class MixcloudIE(InfoExtractor):
r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
description = self._og_search_description(webpage)
like_count = str_to_int(self._search_regex(
- [r'<meta itemprop="interactionCount" content="UserLikes:([0-9]+)"',
- r'/favorites/?">([0-9]+)<'],
+ r'\bbutton-favorite\b.+m-ajax-toggle-count="([^"]+)"',
webpage, 'like count', fatal=False))
view_count = str_to_int(self._search_regex(
[r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
r'/listeners/?">([0-9,.]+)</a>'],
webpage, 'play count', fatal=False))
- timestamp = parse_iso8601(self._search_regex(
- r'<time itemprop="dateCreated" datetime="([^"]+)">',
- webpage, 'upload date', default=None))
return {
'id': track_id,
@@ -118,7 +112,38 @@ class MixcloudIE(InfoExtractor):
'thumbnail': thumbnail,
'uploader': uploader,
'uploader_id': uploader_id,
- 'timestamp': timestamp,
'view_count': view_count,
'like_count': like_count,
}
+
+
+def server_numbers(first, boundaries):
+ """ Server numbers to try in descending order of probable availability.
+ Starting from first (i.e. the number of the server hosting the preview file)
+ and going further and further up to the higher boundary and down to the
+ lower one in an alternating fashion. Namely:
+
+ server_numbers(2, (1, 5))
+
+ # Where the preview server is 2, min number is 1 and max is 5.
+ # Yields: 2, 3, 1, 4, 5
+
+ Why not random numbers or increasing sequences? Since from what I've seen,
+ full length files seem to be hosted on servers whose number is closer to
+ that of the preview; to be confirmed.
+ """
+ zip_longest = getattr(itertools, 'zip_longest', None)
+ if zip_longest is None:
+ # python 2.x
+ zip_longest = itertools.izip_longest
+
+ if len(boundaries) != 2:
+ raise ValueError("boundaries should be a two-element tuple")
+ min, max = boundaries
+ highs = range(first + 1, max + 1)
+ lows = range(first - 1, min - 1, -1)
+ rest = filter(
+ None, itertools.chain.from_iterable(zip_longest(highs, lows)))
+ yield first
+ for n in rest:
+ yield n
diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py
index 7fb4e57df..ddec7b338 100644
--- a/youtube_dl/extractor/niconico.py
+++ b/youtube_dl/extractor/niconico.py
@@ -22,7 +22,7 @@ class NiconicoIE(InfoExtractor):
IE_NAME = 'niconico'
IE_DESC = 'ニコニコ動画'
- _TEST = {
+ _TESTS = [{
'url': 'http://www.nicovideo.jp/watch/sm22312215',
'md5': 'd1a75c0823e2f629128c43e1212760f9',
'info_dict': {
@@ -39,7 +39,24 @@ class NiconicoIE(InfoExtractor):
'username': 'ydl.niconico@gmail.com',
'password': 'youtube-dl',
},
- }
+ }, {
+ 'url': 'http://www.nicovideo.jp/watch/nm14296458',
+ 'md5': '8db08e0158457cf852a31519fceea5bc',
+ 'info_dict': {
+ 'id': 'nm14296458',
+ 'ext': 'swf',
+ 'title': '【鏡音リン】Dance on media【オリジナル】take2!',
+ 'description': 'md5:',
+ 'uploader': 'りょうた',
+ 'uploader_id': '18822557',
+ 'upload_date': '20110429',
+ 'duration': 209,
+ },
+ 'params': {
+ 'username': 'ydl.niconico@gmail.com',
+ 'password': 'youtube-dl',
+ },
+ }]
_VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)'
_NETRC_MACHINE = 'niconico'
@@ -89,7 +106,7 @@ class NiconicoIE(InfoExtractor):
if self._AUTHENTICATED:
# Get flv info
flv_info_webpage = self._download_webpage(
- 'http://flapi.nicovideo.jp/api/getflv?v=' + video_id,
+ 'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1',
video_id, 'Downloading flv info')
else:
# Get external player info
diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py
index bff36f9d3..e91d3a248 100644
--- a/youtube_dl/extractor/nrk.py
+++ b/youtube_dl/extractor/nrk.py
@@ -14,46 +14,48 @@ from ..utils import (
class NRKIE(InfoExtractor):
- _VALID_URL = r'http://(?:www\.)?nrk\.no/(?:video|lyd)/[^/]+/(?P<id>[\dA-F]{16})'
+ _VALID_URL = r'(?:nrk:|http://(?:www\.)?nrk\.no/video/PS\*)(?P<id>\d+)'
_TESTS = [
{
- 'url': 'http://www.nrk.no/video/dompap_og_andre_fugler_i_piip_show/D0FA54B5C8B6CE59/emne/piipshow/',
- 'md5': 'a6eac35052f3b242bb6bb7f43aed5886',
+ 'url': 'http://www.nrk.no/video/PS*150533',
+ 'md5': 'bccd850baebefe23b56d708a113229c2',
'info_dict': {
'id': '150533',
'ext': 'flv',
'title': 'Dompap og andre fugler i Piip-Show',
- 'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f'
+ 'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
+ 'duration': 263,
}
},
{
- 'url': 'http://www.nrk.no/lyd/lyd_av_oppleser_for_blinde/AEFDDD5473BA0198/',
- 'md5': '3471f2a51718195164e88f46bf427668',
+ 'url': 'http://www.nrk.no/video/PS*154915',
+ 'md5': '0b1493ba1aae7d9579a5ad5531bc395a',
'info_dict': {
'id': '154915',
'ext': 'flv',
'title': 'Slik høres internett ut når du er blind',
'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
+ 'duration': 20,
}
},
]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
-
- page = self._download_webpage(url, video_id)
-
- video_id = self._html_search_regex(r'<div class="nrk-video" data-nrk-id="(\d+)">', page, 'video id')
+ video_id = self._match_id(url)
data = self._download_json(
- 'http://v7.psapi.nrk.no/mediaelement/%s' % video_id, video_id, 'Downloading media JSON')
+ 'http://v8.psapi.nrk.no/mediaelement/%s' % video_id,
+ video_id, 'Downloading media JSON')
if data['usageRights']['isGeoBlocked']:
- raise ExtractorError('NRK har ikke rettig-heter til å vise dette programmet utenfor Norge', expected=True)
+ raise ExtractorError(
+ 'NRK har ikke rettig-heter til å vise dette programmet utenfor Norge',
+ expected=True)
+
+ video_url = data['mediaUrl'] + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81'
- video_url = data['mediaUrl'] + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124'
+ duration = parse_duration(data.get('duration'))
images = data.get('images')
if images:
@@ -69,10 +71,51 @@ class NRKIE(InfoExtractor):
'ext': 'flv',
'title': data['title'],
'description': data['description'],
+ 'duration': duration,
'thumbnail': thumbnail,
}
+class NRKPlaylistIE(InfoExtractor):
+ _VALID_URL = r'http://(?:www\.)?nrk\.no/(?!video)(?:[^/]+/)+(?P<id>[^/]+)'
+
+ _TESTS = [{
+ 'url': 'http://www.nrk.no/troms/gjenopplev-den-historiske-solformorkelsen-1.12270763',
+ 'info_dict': {
+ 'id': 'gjenopplev-den-historiske-solformorkelsen-1.12270763',
+ 'title': 'Gjenopplev den historiske solformørkelsen',
+ 'description': 'md5:c2df8ea3bac5654a26fc2834a542feed',
+ },
+ 'playlist_count': 2,
+ }, {
+ 'url': 'http://www.nrk.no/kultur/bok/rivertonprisen-til-karin-fossum-1.12266449',
+ 'info_dict': {
+ 'id': 'rivertonprisen-til-karin-fossum-1.12266449',
+ 'title': 'Rivertonprisen til Karin Fossum',
+ 'description': 'Første kvinne på 15 år til å vinne krimlitteraturprisen.',
+ },
+ 'playlist_count': 5,
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ entries = [
+ self.url_result('nrk:%s' % video_id, 'NRK')
+ for video_id in re.findall(
+ r'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="([^"]+)"',
+ webpage)
+ ]
+
+ playlist_title = self._og_search_title(webpage)
+ playlist_description = self._og_search_description(webpage)
+
+ return self.playlist_result(
+ entries, playlist_id, playlist_title, playlist_description)
+
+
class NRKTVIE(InfoExtractor):
_VALID_URL = r'(?P<baseurl>http://tv\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
diff --git a/youtube_dl/extractor/nytimes.py b/youtube_dl/extractor/nytimes.py
index 56e1cad3b..03f0a4de6 100644
--- a/youtube_dl/extractor/nytimes.py
+++ b/youtube_dl/extractor/nytimes.py
@@ -1,15 +1,17 @@
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
-from ..utils import parse_iso8601
+from ..utils import (
+ float_or_none,
+ int_or_none,
+ parse_iso8601,
+)
class NYTimesIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?nytimes\.com/video/(?:[^/]+/)+(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:(?:www\.)?nytimes\.com/video/(?:[^/]+/)+?|graphics8\.nytimes\.com/bcvideo/\d+(?:\.\d+)?/iframe/embed\.html\?videoId=)(?P<id>\d+)'
- _TEST = {
+ _TESTS = [{
'url': 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263',
'md5': '18a525a510f942ada2720db5f31644c0',
'info_dict': {
@@ -22,18 +24,21 @@ class NYTimesIE(InfoExtractor):
'uploader': 'Brett Weiner',
'duration': 419,
}
- }
+ }, {
+ 'url': 'http://www.nytimes.com/video/travel/100000003550828/36-hours-in-dubai.html',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
video_data = self._download_json(
- 'http://www.nytimes.com/svc/video/api/v2/video/%s' % video_id, video_id, 'Downloading video JSON')
+ 'http://www.nytimes.com/svc/video/api/v2/video/%s' % video_id,
+ video_id, 'Downloading video JSON')
title = video_data['headline']
- description = video_data['summary']
- duration = video_data['duration'] / 1000.0
+ description = video_data.get('summary')
+ duration = float_or_none(video_data.get('duration'), 1000)
uploader = video_data['byline']
timestamp = parse_iso8601(video_data['publication_date'][:-8])
@@ -49,11 +54,11 @@ class NYTimesIE(InfoExtractor):
formats = [
{
'url': video['url'],
- 'format_id': video['type'],
- 'vcodec': video['video_codec'],
- 'width': video['width'],
- 'height': video['height'],
- 'filesize': get_file_size(video['fileSize']),
+ 'format_id': video.get('type'),
+ 'vcodec': video.get('video_codec'),
+ 'width': int_or_none(video.get('width')),
+ 'height': int_or_none(video.get('height')),
+ 'filesize': get_file_size(video.get('fileSize')),
} for video in video_data['renditions']
]
self._sort_formats(formats)
@@ -61,7 +66,8 @@ class NYTimesIE(InfoExtractor):
thumbnails = [
{
'url': 'http://www.nytimes.com/%s' % image['url'],
- 'resolution': '%dx%d' % (image['width'], image['height']),
+ 'width': int_or_none(image.get('width')),
+ 'height': int_or_none(image.get('height')),
} for image in video_data['images']
]
diff --git a/youtube_dl/extractor/primesharetv.py b/youtube_dl/extractor/primesharetv.py
new file mode 100644
index 000000000..01cc3d9ea
--- /dev/null
+++ b/youtube_dl/extractor/primesharetv.py
@@ -0,0 +1,69 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_urllib_parse,
+ compat_urllib_request,
+)
+from ..utils import ExtractorError
+
+
+class PrimeShareTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?primeshare\.tv/download/(?P<id>[\da-zA-Z]+)'
+
+ _TEST = {
+ 'url': 'http://primeshare.tv/download/238790B611',
+ 'md5': 'b92d9bf5461137c36228009f31533fbc',
+ 'info_dict': {
+ 'id': '238790B611',
+ 'ext': 'mp4',
+ 'title': 'Public Domain - 1960s Commercial - Crest Toothpaste-YKsuFona',
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ if '>File not exist<' in webpage:
+ raise ExtractorError('Video %s does not exist' % video_id, expected=True)
+
+ fields = dict(re.findall(r'''(?x)<input\s+
+ type="hidden"\s+
+ name="([^"]+)"\s+
+ (?:id="[^"]+"\s+)?
+ value="([^"]*)"
+ ''', webpage))
+
+ headers = {
+ 'Referer': url,
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ }
+
+ wait_time = int(self._search_regex(
+ r'var\s+cWaitTime\s*=\s*(\d+)',
+ webpage, 'wait time', default=7)) + 1
+ self._sleep(wait_time, video_id)
+
+ req = compat_urllib_request.Request(
+ url, compat_urllib_parse.urlencode(fields), headers)
+ video_page = self._download_webpage(
+ req, video_id, 'Downloading video page')
+
+ video_url = self._search_regex(
+ r"url\s*:\s*'([^']+\.primeshare\.tv(?::443)?/file/[^']+)'",
+ video_page, 'video url')
+
+ title = self._html_search_regex(
+ r'<h1>Watch\s*(?:&nbsp;)?\s*\((.+?)(?:\s*\[\.\.\.\])?\)\s*(?:&nbsp;)?\s*<strong>',
+ video_page, 'title')
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'ext': 'mp4',
+ }
diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py
index c04791997..11edf616a 100644
--- a/youtube_dl/extractor/sohu.py
+++ b/youtube_dl/extractor/sohu.py
@@ -4,22 +4,87 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from .common import compat_str
+from ..compat import (
+ compat_str,
+ compat_urllib_request
+)
+from ..utils import sanitize_url_path_consecutive_slashes
class SohuIE(InfoExtractor):
_VALID_URL = r'https?://(?P<mytv>my\.)?tv\.sohu\.com/.+?/(?(mytv)|n)(?P<id>\d+)\.shtml.*?'
- _TEST = {
+ _TESTS = [{
+ 'note': 'This video is available only in Mainland China',
'url': 'http://tv.sohu.com/20130724/n382479172.shtml#super',
- 'md5': 'bde8d9a6ffd82c63a1eefaef4eeefec7',
+ 'md5': '29175c8cadd8b5cc4055001e85d6b372',
'info_dict': {
'id': '382479172',
'ext': 'mp4',
'title': 'MV:Far East Movement《The Illest》',
},
- 'skip': 'Only available from China',
- }
+ 'params': {
+ 'cn_verification_proxy': 'proxy.uku.im:8888'
+ }
+ }, {
+ 'url': 'http://tv.sohu.com/20150305/n409385080.shtml',
+ 'md5': '699060e75cf58858dd47fb9c03c42cfb',
+ 'info_dict': {
+ 'id': '409385080',
+ 'ext': 'mp4',
+ 'title': '《2015湖南卫视羊年元宵晚会》唐嫣《花好月圆》',
+ }
+ }, {
+ 'url': 'http://my.tv.sohu.com/us/232799889/78693464.shtml',
+ 'md5': '9bf34be48f2f4dadcb226c74127e203c',
+ 'info_dict': {
+ 'id': '78693464',
+ 'ext': 'mp4',
+ 'title': '【爱范品】第31期:MWC见不到的奇葩手机',
+ }
+ }, {
+ 'note': 'Multipart video',
+ 'url': 'http://my.tv.sohu.com/pl/8384802/78910339.shtml',
+ 'info_dict': {
+ 'id': '78910339',
+ },
+ 'playlist': [{
+ 'md5': 'bdbfb8f39924725e6589c146bc1883ad',
+ 'info_dict': {
+ 'id': '78910339_part1',
+ 'ext': 'mp4',
+ 'duration': 294,
+ 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
+ }
+ }, {
+ 'md5': '3e1f46aaeb95354fd10e7fca9fc1804e',
+ 'info_dict': {
+ 'id': '78910339_part2',
+ 'ext': 'mp4',
+ 'duration': 300,
+ 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
+ }
+ }, {
+ 'md5': '8407e634175fdac706766481b9443450',
+ 'info_dict': {
+ 'id': '78910339_part3',
+ 'ext': 'mp4',
+ 'duration': 150,
+ 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
+ }
+ }]
+ }, {
+ 'note': 'Video with title containing dash',
+ 'url': 'http://my.tv.sohu.com/us/249884221/78932792.shtml',
+ 'info_dict': {
+ 'id': '78932792',
+ 'ext': 'mp4',
+ 'title': 'youtube-dl testing video',
+ },
+ 'params': {
+ 'skip_download': True
+ }
+ }]
def _real_extract(self, url):
@@ -29,8 +94,14 @@ class SohuIE(InfoExtractor):
else:
base_data_url = 'http://hot.vrs.sohu.com/vrs_flash.action?vid='
+ req = compat_urllib_request.Request(base_data_url + vid_id)
+
+ cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
+ if cn_verification_proxy:
+ req.add_header('Ytdl-request-proxy', cn_verification_proxy)
+
return self._download_json(
- base_data_url + vid_id, video_id,
+ req, video_id,
'Downloading JSON data for %s' % vid_id)
mobj = re.match(self._VALID_URL, url)
@@ -38,10 +109,8 @@ class SohuIE(InfoExtractor):
mytv = mobj.group('mytv') is not None
webpage = self._download_webpage(url, video_id)
- raw_title = self._html_search_regex(
- r'(?s)<title>(.+?)</title>',
- webpage, 'video title')
- title = raw_title.partition('-')[0].strip()
+
+ title = self._og_search_title(webpage)
vid = self._html_search_regex(
r'var vid ?= ?["\'](\d+)["\']',
@@ -77,7 +146,9 @@ class SohuIE(InfoExtractor):
% (format_id, i + 1, part_count))
part_info = part_str.split('|')
- video_url = '%s%s?key=%s' % (part_info[0], su[i], part_info[3])
+
+ video_url = sanitize_url_path_consecutive_slashes(
+ '%s%s?key=%s' % (part_info[0], su[i], part_info[3]))
formats.append({
'url': video_url,
diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py
index cbdaf9c7a..aad2bf222 100644
--- a/youtube_dl/extractor/twitch.py
+++ b/youtube_dl/extractor/twitch.py
@@ -23,6 +23,7 @@ class TwitchBaseIE(InfoExtractor):
_API_BASE = 'https://api.twitch.tv'
_USHER_BASE = 'http://usher.twitch.tv'
_LOGIN_URL = 'https://secure.twitch.tv/user/login'
+ _LOGIN_POST_URL = 'https://secure-login.twitch.tv/login'
_NETRC_MACHINE = 'twitch'
def _handle_error(self, response):
@@ -67,14 +68,14 @@ class TwitchBaseIE(InfoExtractor):
'authenticity_token': authenticity_token,
'redirect_on_login': '',
'embed_form': 'false',
- 'mp_source_action': '',
+ 'mp_source_action': 'login-button',
'follow': '',
- 'user[login]': username,
- 'user[password]': password,
+ 'login': username,
+ 'password': password,
}
request = compat_urllib_request.Request(
- self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
+ self._LOGIN_POST_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
request.add_header('Referer', self._LOGIN_URL)
response = self._download_webpage(
request, None, 'Logging in as %s' % username)
diff --git a/youtube_dl/extractor/ultimedia.py b/youtube_dl/extractor/ultimedia.py
new file mode 100644
index 000000000..06554a1be
--- /dev/null
+++ b/youtube_dl/extractor/ultimedia.py
@@ -0,0 +1,104 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ qualities,
+ unified_strdate,
+ clean_html,
+)
+
+
+class UltimediaIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?ultimedia\.com/default/index/video[^/]+/id/(?P<id>[\d+a-z]+)'
+ _TESTS = [{
+ # news
+ 'url': 'https://www.ultimedia.com/default/index/videogeneric/id/s8uk0r',
+ 'md5': '276a0e49de58c7e85d32b057837952a2',
+ 'info_dict': {
+ 'id': 's8uk0r',
+ 'ext': 'mp4',
+ 'title': 'Loi sur la fin de vie: le texte prévoit un renforcement des directives anticipées',
+ 'description': 'md5:3e5c8fd65791487333dda5db8aed32af',
+ 'thumbnail': 're:^https?://.*\.jpg',
+ 'upload_date': '20150317',
+ },
+ }, {
+ # music
+ 'url': 'https://www.ultimedia.com/default/index/videomusic/id/xvpfp8',
+ 'md5': '2ea3513813cf230605c7e2ffe7eca61c',
+ 'info_dict': {
+ 'id': 'xvpfp8',
+ 'ext': 'mp4',
+ 'title': "Two - C'est la vie (Clip)",
+ 'description': 'Two',
+ 'thumbnail': 're:^https?://.*\.jpg',
+ 'upload_date': '20150224',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ deliver_url = self._search_regex(
+ r'<iframe[^>]+src="(https?://(?:www\.)?ultimedia\.com/deliver/[^"]+)"',
+ webpage, 'deliver URL')
+
+ deliver_page = self._download_webpage(
+ deliver_url, video_id, 'Downloading iframe page')
+
+ if '>This video is currently not available' in deliver_page:
+ raise ExtractorError(
+ 'Video %s is currently not available' % video_id, expected=True)
+
+ player = self._parse_json(
+ self._search_regex(
+ r"jwplayer\('player(?:_temp)?'\)\.setup\(({.+?})\)\.on", deliver_page, 'player'),
+ video_id)
+
+ quality = qualities(['flash', 'html5'])
+ formats = []
+ for mode in player['modes']:
+ video_url = mode.get('config', {}).get('file')
+ if not video_url:
+ continue
+ if re.match(r'https?://www\.youtube\.com/.+?', video_url):
+ return self.url_result(video_url, 'Youtube')
+ formats.append({
+ 'url': video_url,
+ 'format_id': mode.get('type'),
+ 'quality': quality(mode.get('type')),
+ })
+ self._sort_formats(formats)
+
+ thumbnail = player.get('image')
+
+ title = clean_html((
+ self._html_search_regex(
+ r'(?s)<div\s+id="catArticle">.+?</div>(.+?)</h1>',
+ webpage, 'title', default=None)
+ or self._search_regex(
+ r"var\s+nameVideo\s*=\s*'([^']+)'",
+ deliver_page, 'title')))
+
+ description = clean_html(self._html_search_regex(
+ r'(?s)<span>Description</span>(.+?)</p>', webpage,
+ 'description', fatal=False))
+
+ upload_date = unified_strdate(self._search_regex(
+ r'Ajouté le\s*<span>([^<]+)', webpage,
+ 'upload date', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/videomega.py b/youtube_dl/extractor/videomega.py
index 273030316..eb309a7cd 100644
--- a/youtube_dl/extractor/videomega.py
+++ b/youtube_dl/extractor/videomega.py
@@ -4,28 +4,21 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse,
- compat_urllib_request,
-)
-from ..utils import (
- ExtractorError,
- remove_start,
-)
+from ..compat import compat_urllib_request
class VideoMegaIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://
(?:www\.)?videomega\.tv/
- (?:iframe\.php)?\?ref=(?P<id>[A-Za-z0-9]+)
+ (?:iframe\.php|cdn\.php)?\?ref=(?P<id>[A-Za-z0-9]+)
'''
_TEST = {
- 'url': 'http://videomega.tv/?ref=QR0HCUHI1661IHUCH0RQ',
+ 'url': 'http://videomega.tv/?ref=4GNA688SU99US886ANG4',
'md5': 'bf5c2f95c4c917536e80936af7bc51e1',
'info_dict': {
- 'id': 'QR0HCUHI1661IHUCH0RQ',
+ 'id': '4GNA688SU99US886ANG4',
'ext': 'mp4',
- 'title': 'Big Buck Bunny',
+ 'title': 'BigBuckBunny_320x180',
'thumbnail': 're:^https?://.*\.jpg$',
}
}
@@ -33,34 +26,24 @@ class VideoMegaIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
- iframe_url = 'http://videomega.tv/iframe.php?ref={0:}'.format(video_id)
+ iframe_url = 'http://videomega.tv/cdn.php?ref=%s' % video_id
req = compat_urllib_request.Request(iframe_url)
req.add_header('Referer', url)
webpage = self._download_webpage(req, video_id)
- try:
- escaped_data = re.findall(r'unescape\("([^"]+)"\)', webpage)[-1]
- except IndexError:
- raise ExtractorError('Unable to extract escaped data')
-
- playlist = compat_urllib_parse.unquote(escaped_data)
-
+ title = self._html_search_regex(
+ r'<title>(.*?)</title>', webpage, 'title')
+ title = re.sub(
+ r'(?:^[Vv]ideo[Mm]ega\.tv\s-\s?|\s?-\svideomega\.tv$)', '', title)
thumbnail = self._search_regex(
- r'image:\s*"([^"]+)"', playlist, 'thumbnail', fatal=False)
- video_url = self._search_regex(r'file:\s*"([^"]+)"', playlist, 'URL')
- title = remove_start(self._html_search_regex(
- r'<title>(.*?)</title>', webpage, 'title'), 'VideoMega.tv - ')
-
- formats = [{
- 'format_id': 'sd',
- 'url': video_url,
- }]
- self._sort_formats(formats)
+ r'<video[^>]+?poster="([^"]+)"', webpage, 'thumbnail', fatal=False)
+ video_url = self._search_regex(
+ r'<source[^>]+?src="([^"]+)"', webpage, 'video URL')
return {
'id': video_id,
'title': title,
- 'formats': formats,
+ 'url': video_url,
'thumbnail': thumbnail,
'http_headers': {
'Referer': iframe_url,
diff --git a/youtube_dl/extractor/vine.py b/youtube_dl/extractor/vine.py
index 0b58fe0fe..c3187cfeb 100644
--- a/youtube_dl/extractor/vine.py
+++ b/youtube_dl/extractor/vine.py
@@ -33,14 +33,13 @@ class VineIE(InfoExtractor):
r'window\.POST_DATA = { %s: ({.+?}) }' % video_id, webpage, 'vine data'))
formats = [{
- 'url': data['videoLowURL'],
- 'ext': 'mp4',
- 'format_id': 'low',
- }, {
- 'url': data['videoUrl'],
- 'ext': 'mp4',
- 'format_id': 'standard',
- }]
+ 'format_id': '%(format)s-%(rate)s' % f,
+ 'vcodec': f['format'],
+ 'quality': f['rate'],
+ 'url': f['videoUrl'],
+ } for f in data['videoUrls'] if f.get('rate')]
+
+ self._sort_formats(formats)
return {
'id': video_id,
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index c3135effc..472d4df41 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -326,6 +326,13 @@ def sanitize_path(s):
return os.path.join(*sanitized_path)
+def sanitize_url_path_consecutive_slashes(url):
+ """Collapses consecutive slashes in URLs' path"""
+ parsed_url = list(compat_urlparse.urlparse(url))
+ parsed_url[2] = re.sub(r'/{2,}', '/', parsed_url[2])
+ return compat_urlparse.urlunparse(parsed_url)
+
+
def orderedSet(iterable):
""" Remove all duplicates from the input iterable """
res = []
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index 7ed07c375..51b4260aa 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
from __future__ import unicode_literals
-__version__ = '2015.03.15'
+__version__ = '2015.03.18'