aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--AUTHORS2
-rw-r--r--README.md4
-rw-r--r--docs/supportedsites.md5
-rw-r--r--test/test_subtitles.py2
-rwxr-xr-xyoutube_dl/YoutubeDL.py1
-rw-r--r--youtube_dl/aes.py2
-rw-r--r--youtube_dl/extractor/__init__.py13
-rw-r--r--youtube_dl/extractor/arte.py5
-rw-r--r--youtube_dl/extractor/bilibili.py20
-rw-r--r--youtube_dl/extractor/chilloutzone.py2
-rw-r--r--youtube_dl/extractor/cinemassacre.py13
-rw-r--r--youtube_dl/extractor/cnn.py2
-rw-r--r--youtube_dl/extractor/dailymotion.py5
-rw-r--r--youtube_dl/extractor/drtv.py37
-rw-r--r--youtube_dl/extractor/empflix.py32
-rw-r--r--youtube_dl/extractor/facebook.py11
-rw-r--r--youtube_dl/extractor/firedrive.py80
-rw-r--r--youtube_dl/extractor/karrierevideos.py96
-rw-r--r--youtube_dl/extractor/mitele.py9
-rw-r--r--youtube_dl/extractor/naver.py24
-rw-r--r--youtube_dl/extractor/nba.py20
-rw-r--r--youtube_dl/extractor/nextmedia.py40
-rw-r--r--youtube_dl/extractor/nowtv.py192
-rw-r--r--youtube_dl/extractor/odnoklassniki.py36
-rw-r--r--youtube_dl/extractor/pornhub.py3
-rw-r--r--youtube_dl/extractor/prosiebensat1.py2
-rw-r--r--youtube_dl/extractor/rtbf.py30
-rw-r--r--youtube_dl/extractor/rtlnow.py174
-rw-r--r--youtube_dl/extractor/rtve.py2
-rw-r--r--youtube_dl/extractor/rutv.py2
-rw-r--r--youtube_dl/extractor/shared.py2
-rw-r--r--youtube_dl/extractor/sockshare.py83
-rw-r--r--youtube_dl/extractor/spankwire.py2
-rw-r--r--youtube_dl/extractor/sportbox.py26
-rw-r--r--youtube_dl/extractor/teamcoco.py3
-rw-r--r--youtube_dl/extractor/telecinco.py4
-rw-r--r--youtube_dl/extractor/tenplay.py27
-rw-r--r--youtube_dl/extractor/tf1.py9
-rw-r--r--youtube_dl/extractor/tnaflix.py34
-rw-r--r--youtube_dl/extractor/tutv.py2
-rw-r--r--youtube_dl/extractor/videott.py2
-rw-r--r--youtube_dl/extractor/viki.py333
-rw-r--r--youtube_dl/extractor/youtube.py11
-rw-r--r--youtube_dl/options.py4
-rw-r--r--youtube_dl/version.py2
45 files changed, 817 insertions, 593 deletions
diff --git a/AUTHORS b/AUTHORS
index 267b8da1e..3410e1fb9 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -124,3 +124,5 @@ Mohammad Teimori Pabandi
Roman Le Négrate
Matthias Küch
Julian Richen
+Ping O.
+Mister Hat
diff --git a/README.md b/README.md
index 3d9436456..e51bb5343 100644
--- a/README.md
+++ b/README.md
@@ -17,12 +17,12 @@ youtube-dl - download videos from youtube.com or other video platforms
To install it right away for all UNIX users (Linux, OS X, etc.), type:
sudo curl https://yt-dl.org/latest/youtube-dl -o /usr/local/bin/youtube-dl
- sudo chmod a+x /usr/local/bin/youtube-dl
+ sudo chmod a+rx /usr/local/bin/youtube-dl
If you do not have curl, you can alternatively use a recent wget:
sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl
- sudo chmod a+x /usr/local/bin/youtube-dl
+ sudo chmod a+rx /usr/local/bin/youtube-dl
Windows users can [download a .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in their home directory or any other location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29).
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index 43fbe8b1d..a4879bd9a 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -142,6 +142,7 @@
- **Eporner**
- **EroProfile**
- **Escapist**
+ - **ESPN** (Currently broken)
- **EveryonesMixtape**
- **exfm**: ex.fm
- **ExpoTV**
@@ -338,6 +339,7 @@
- **OktoberfestTV**
- **on.aol.com**
- **Ooyala**
+ - **OoyalaExternal**
- **OpenFilm**
- **orf:fm4**: radio FM4
- **orf:iptv**: iptv.ORF.at
@@ -451,6 +453,7 @@
- **Spike**
- **Sport5**
- **SportBox**
+ - **SportBoxEmbed**
- **SportDeutschland**
- **Srf**
- **SRMediathek**: Saarländischer Rundfunk
@@ -510,6 +513,8 @@
- **Turbo**
- **Tutv**
- **tv.dfb.de**
+ - **TV2**
+ - **TV2Article**
- **TV4**: tv4.se and tv4play.se
- **tvigle**: Интернет-телевидение Tvigle.ru
- **tvp.pl**
diff --git a/test/test_subtitles.py b/test/test_subtitles.py
index 891ee620b..c4e3adb67 100644
--- a/test/test_subtitles.py
+++ b/test/test_subtitles.py
@@ -266,7 +266,7 @@ class TestNRKSubtitles(BaseTestSubtitles):
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['no']))
- self.assertEqual(md5(subtitles['no']), '1d221e6458c95c5494dcd38e6a1f129a')
+ self.assertEqual(md5(subtitles['no']), '544fa917d3197fcbee64634559221cc2')
class TestRaiSubtitles(BaseTestSubtitles):
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 58b34e087..d1953c18f 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -1527,6 +1527,7 @@ class YoutubeDL(object):
pps_chain.extend(ie_info['__postprocessors'])
pps_chain.extend(self._pps)
for pp in pps_chain:
+ files_to_delete = []
try:
files_to_delete, info = pp.run(info)
except PostProcessingError as e:
diff --git a/youtube_dl/aes.py b/youtube_dl/aes.py
index 07224d508..7817adcfd 100644
--- a/youtube_dl/aes.py
+++ b/youtube_dl/aes.py
@@ -152,7 +152,7 @@ def aes_decrypt_text(data, password, key_size_bytes):
"""
NONCE_LENGTH_BYTES = 8
- data = bytes_to_intlist(base64.b64decode(data))
+ data = bytes_to_intlist(base64.b64decode(data.encode('utf-8')))
password = bytes_to_intlist(password.encode('utf-8'))
key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password))
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 2a5cf9547..f73bf646b 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -149,7 +149,6 @@ from .extremetube import ExtremeTubeIE
from .facebook import FacebookIE
from .faz import FazIE
from .fc2 import FC2IE
-from .firedrive import FiredriveIE
from .firstpost import FirstpostIE
from .firsttv import FirstTVIE
from .fivemin import FiveMinIE
@@ -244,6 +243,7 @@ from .kaltura import KalturaIE
from .kanalplay import KanalPlayIE
from .kankan import KankanIE
from .karaoketv import KaraoketvIE
+from .karrierevideos import KarriereVideosIE
from .keezmovies import KeezMoviesIE
from .khanacademy import KhanAcademyIE
from .kickstarter import KickStarterIE
@@ -338,8 +338,7 @@ from .newstube import NewstubeIE
from .nextmedia import (
NextMediaIE,
NextMediaActionNewsIE,
- AppleDailyRealtimeNewsIE,
- AppleDailyAnimationNewsIE
+ AppleDailyIE,
)
from .nfb import NFBIE
from .nfl import NFLIE
@@ -355,6 +354,7 @@ from .normalboots import NormalbootsIE
from .nosvideo import NosVideoIE
from .novamov import NovaMovIE
from .nowness import NownessIE
+from .nowtv import NowTVIE
from .nowvideo import NowVideoIE
from .npo import (
NPOIE,
@@ -438,7 +438,6 @@ from .roxwel import RoxwelIE
from .rtbf import RTBFIE
from .rte import RteIE
from .rtlnl import RtlNlIE
-from .rtlnow import RTLnowIE
from .rtl2 import RTL2IE
from .rtp import RTPIE
from .rts import RTSIE
@@ -480,7 +479,6 @@ from .smotri import (
SmotriBroadcastIE,
)
from .snotr import SnotrIE
-from .sockshare import SockshareIE
from .sohu import SohuIE
from .soompi import (
SoompiIE,
@@ -651,7 +649,10 @@ from .vine import (
VineIE,
VineUserIE,
)
-from .viki import VikiIE
+from .viki import (
+ VikiIE,
+ VikiChannelIE,
+)
from .vk import (
VKIE,
VKUserVideosIE,
diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py
index 8273bd6c9..76de24477 100644
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -7,7 +7,6 @@ from .common import InfoExtractor
from ..utils import (
find_xpath_attr,
unified_strdate,
- get_element_by_id,
get_element_by_attribute,
int_or_none,
qualities,
@@ -195,7 +194,9 @@ class ArteTVFutureIE(ArteTVPlus7IE):
def _real_extract(self, url):
anchor_id, lang = self._extract_url_info(url)
webpage = self._download_webpage(url, anchor_id)
- row = get_element_by_id(anchor_id, webpage)
+ row = self._search_regex(
+ r'(?s)id="%s"[^>]*>.+?(<div[^>]*arte_vp_url[^>]*>)' % anchor_id,
+ webpage, 'row')
return self._extract_from_webpage(row, anchor_id, lang)
diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py
index 7ca835e31..2103ed73a 100644
--- a/youtube_dl/extractor/bilibili.py
+++ b/youtube_dl/extractor/bilibili.py
@@ -3,6 +3,8 @@ from __future__ import unicode_literals
import re
import itertools
+import json
+import xml.etree.ElementTree as ET
from .common import InfoExtractor
from ..utils import (
@@ -67,11 +69,19 @@ class BiliBiliIE(InfoExtractor):
entries = []
- lq_doc = self._download_xml(
+ lq_page = self._download_webpage(
'http://interface.bilibili.com/v_cdn_play?appkey=1&cid=%s' % cid,
video_id,
note='Downloading LQ video info'
)
+ try:
+ err_info = json.loads(lq_page)
+ raise ExtractorError(
+ 'BiliBili said: ' + err_info['error_text'], expected=True)
+ except ValueError:
+ pass
+
+ lq_doc = ET.fromstring(lq_page)
lq_durls = lq_doc.findall('./durl')
hq_doc = self._download_xml(
@@ -80,9 +90,11 @@ class BiliBiliIE(InfoExtractor):
note='Downloading HQ video info',
fatal=False,
)
- hq_durls = hq_doc.findall('./durl') if hq_doc is not False else itertools.repeat(None)
-
- assert len(lq_durls) == len(hq_durls)
+ if hq_doc is not False:
+ hq_durls = hq_doc.findall('./durl')
+ assert len(lq_durls) == len(hq_durls)
+ else:
+ hq_durls = itertools.repeat(None)
i = 1
for lq_durl, hq_durl in zip(lq_durls, hq_durls):
diff --git a/youtube_dl/extractor/chilloutzone.py b/youtube_dl/extractor/chilloutzone.py
index c922f6959..0206d96db 100644
--- a/youtube_dl/extractor/chilloutzone.py
+++ b/youtube_dl/extractor/chilloutzone.py
@@ -57,7 +57,7 @@ class ChilloutzoneIE(InfoExtractor):
base64_video_info = self._html_search_regex(
r'var cozVidData = "(.+?)";', webpage, 'video data')
- decoded_video_info = base64.b64decode(base64_video_info).decode("utf-8")
+ decoded_video_info = base64.b64decode(base64_video_info.encode('utf-8')).decode('utf-8')
video_info_dict = json.loads(decoded_video_info)
# get video information from dict
diff --git a/youtube_dl/extractor/cinemassacre.py b/youtube_dl/extractor/cinemassacre.py
index cf0a7551b..c949a4814 100644
--- a/youtube_dl/extractor/cinemassacre.py
+++ b/youtube_dl/extractor/cinemassacre.py
@@ -60,6 +60,17 @@ class CinemassacreIE(InfoExtractor):
'uploader_id': 'Cinemassacre',
'title': 'AVGN: McKids',
}
+ },
+ {
+ 'url': 'http://cinemassacre.com/2015/05/25/mario-kart-64-nintendo-64-james-mike-mondays/',
+ 'md5': '1376908e49572389e7b06251a53cdd08',
+ 'info_dict': {
+ 'id': 'Cinemassacre-555779690c440',
+ 'ext': 'mp4',
+ 'description': 'Let’s Play Mario Kart 64 !! Mario Kart 64 is a classic go-kart racing game released for the Nintendo 64 (N64). Today James & Mike do 4 player Battle Mode with Kyle and Bootsy!',
+ 'title': 'Mario Kart 64 (Nintendo 64) James & Mike Mondays',
+ 'upload_date': '20150525',
+ }
}
]
@@ -72,7 +83,7 @@ class CinemassacreIE(InfoExtractor):
playerdata_url = self._search_regex(
[
- r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
+ r'src="(http://(?:player2\.screenwavemedia\.com|player\.screenwavemedia\.com/play)/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
r'<iframe[^>]+src="((?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"',
],
webpage, 'player data URL', default=None)
diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py
index 5efc5f4fe..3b1bd4033 100644
--- a/youtube_dl/extractor/cnn.py
+++ b/youtube_dl/extractor/cnn.py
@@ -12,7 +12,7 @@ from ..utils import (
class CNNIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://(?:(?:edition|www)\.)?cnn\.com/video/(?:data/.+?|\?)/
- (?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:[a-z]{3,5})(?:-ap)?|(?=&)))'''
+ (?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:[a-z\-]+)|(?=&)))'''
_TESTS = [{
'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py
index db10b8d00..70aa4333c 100644
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -225,7 +225,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
class DailymotionUserIE(DailymotionPlaylistIE):
IE_NAME = 'dailymotion:user'
- _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?:old/)?user/(?P<user>[^/]+)'
+ _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?:(?:old/)?user/)?(?P<user>[^/]+)$'
_PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
_TESTS = [{
'url': 'https://www.dailymotion.com/user/nqtv',
@@ -239,7 +239,8 @@ class DailymotionUserIE(DailymotionPlaylistIE):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
user = mobj.group('user')
- webpage = self._download_webpage(url, user)
+ webpage = self._download_webpage(
+ 'https://www.dailymotion.com/user/%s' % user, user)
full_user = unescapeHTML(self._html_search_regex(
r'<a class="nav-image" title="([^"]+)" href="/%s">' % re.escape(user),
webpage, 'user'))
diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py
index f25ab319e..baa24c6d1 100644
--- a/youtube_dl/extractor/drtv.py
+++ b/youtube_dl/extractor/drtv.py
@@ -1,8 +1,11 @@
# coding: utf-8
from __future__ import unicode_literals
-from .common import InfoExtractor, ExtractorError
-from ..utils import parse_iso8601
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ parse_iso8601,
+)
class DRTVIE(InfoExtractor):
@@ -60,19 +63,31 @@ class DRTVIE(InfoExtractor):
restricted_to_denmark = asset['RestrictedToDenmark']
spoken_subtitles = asset['Target'] == 'SpokenSubtitles'
for link in asset['Links']:
- target = link['Target']
uri = link['Uri']
+ target = link['Target']
format_id = target
- preference = -1 if target == 'HDS' else -2
+ preference = None
if spoken_subtitles:
- preference -= 2
+ preference = -1
format_id += '-spoken-subtitles'
- formats.append({
- 'url': uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43' if target == 'HDS' else uri,
- 'format_id': format_id,
- 'ext': link['FileFormat'],
- 'preference': preference,
- })
+ if target == 'HDS':
+ formats.extend(self._extract_f4m_formats(
+ uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43',
+ video_id, preference, f4m_id=format_id))
+ elif target == 'HLS':
+ formats.extend(self._extract_m3u8_formats(
+ uri, video_id, 'mp4', preference=preference,
+ m3u8_id=format_id))
+ else:
+ bitrate = link.get('Bitrate')
+ if bitrate:
+ format_id += '-%s' % bitrate
+ formats.append({
+ 'url': uri,
+ 'format_id': format_id,
+ 'tbr': bitrate,
+ 'ext': link.get('FileFormat'),
+ })
subtitles_list = asset.get('SubtitlesList')
if isinstance(subtitles_list, list):
LANGS = {
diff --git a/youtube_dl/extractor/empflix.py b/youtube_dl/extractor/empflix.py
index 70f8efe27..9a5a8f4bb 100644
--- a/youtube_dl/extractor/empflix.py
+++ b/youtube_dl/extractor/empflix.py
@@ -4,22 +4,28 @@ from .tnaflix import TNAFlixIE
class EMPFlixIE(TNAFlixIE):
- _VALID_URL = r'^https?://www\.empflix\.com/videos/(?P<display_id>[0-9a-zA-Z-]+)-(?P<id>[0-9]+)\.html'
+ _VALID_URL = r'https?://(?:www\.)?empflix\.com/videos/(?P<display_id>.+?)-(?P<id>[0-9]+)\.html'
_TITLE_REGEX = r'name="title" value="(?P<title>[^"]*)"'
_DESCRIPTION_REGEX = r'name="description" value="([^"]*)"'
_CONFIG_REGEX = r'flashvars\.config\s*=\s*escape\("([^"]+)"'
- _TEST = {
- 'url': 'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html',
- 'md5': 'b1bc15b6412d33902d6e5952035fcabc',
- 'info_dict': {
- 'id': '33051',
- 'display_id': 'Amateur-Finger-Fuck',
- 'ext': 'mp4',
- 'title': 'Amateur Finger Fuck',
- 'description': 'Amateur solo finger fucking.',
- 'thumbnail': 're:https?://.*\.jpg$',
- 'age_limit': 18,
+ _TESTS = [
+ {
+ 'url': 'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html',
+ 'md5': 'b1bc15b6412d33902d6e5952035fcabc',
+ 'info_dict': {
+ 'id': '33051',
+ 'display_id': 'Amateur-Finger-Fuck',
+ 'ext': 'mp4',
+ 'title': 'Amateur Finger Fuck',
+ 'description': 'Amateur solo finger fucking.',
+ 'thumbnail': 're:https?://.*\.jpg$',
+ 'age_limit': 18,
+ }
+ },
+ {
+ 'url': 'http://www.empflix.com/videos/[AROMA][ARMD-718]-Aoi-Yoshino-Sawa-25826.html',
+ 'matching_only': True,
}
- }
+ ]
diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py
index 937b28fcc..82dc27bc6 100644
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@@ -50,7 +50,10 @@ class FacebookIE(InfoExtractor):
'id': '274175099429670',
'ext': 'mp4',
'title': 'Facebook video #274175099429670',
- }
+ },
+ 'expected_warnings': [
+ 'title'
+ ]
}, {
'url': 'https://www.facebook.com/video.php?v=10204634152394104',
'only_matching': True,
@@ -149,12 +152,12 @@ class FacebookIE(InfoExtractor):
raise ExtractorError('Cannot find video formats')
video_title = self._html_search_regex(
- r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, 'title',
- fatal=False)
+ r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>([^<]*)</h2>', webpage, 'title',
+ default=None)
if not video_title:
video_title = self._html_search_regex(
r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(.*?)</span>',
- webpage, 'alternative title', default=None)
+ webpage, 'alternative title', fatal=False)
video_title = limit_length(video_title, 80)
if not video_title:
video_title = 'Facebook video #%s' % video_id
diff --git a/youtube_dl/extractor/firedrive.py b/youtube_dl/extractor/firedrive.py
deleted file mode 100644
index 3191116d9..000000000
--- a/youtube_dl/extractor/firedrive.py
+++ /dev/null
@@ -1,80 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse,
- compat_urllib_request,
-)
-from ..utils import (
- ExtractorError,
-)
-
-
-class FiredriveIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?firedrive\.com/' + \
- '(?:file|embed)/(?P<id>[0-9a-zA-Z]+)'
- _FILE_DELETED_REGEX = r'<div class="removed_file_image">'
-
- _TESTS = [{
- 'url': 'https://www.firedrive.com/file/FEB892FA160EBD01',
- 'md5': 'd5d4252f80ebeab4dc2d5ceaed1b7970',
- 'info_dict': {
- 'id': 'FEB892FA160EBD01',
- 'ext': 'flv',
- 'title': 'bbb_theora_486kbit.flv',
- 'thumbnail': 're:^http://.*\.jpg$',
- },
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- url = 'http://firedrive.com/file/%s' % video_id
- webpage = self._download_webpage(url, video_id)
-
- if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
- raise ExtractorError('Video %s does not exist' % video_id,
- expected=True)
-
- fields = dict(re.findall(r'''(?x)<input\s+
- type="hidden"\s+
- name="([^"]+)"\s+
- value="([^"]*)"
- ''', webpage))
-
- post = compat_urllib_parse.urlencode(fields)
- req = compat_urllib_request.Request(url, post)
- req.add_header('Content-type', 'application/x-www-form-urlencoded')
-
- # Apparently, this header is required for confirmation to work.
- req.add_header('Host', 'www.firedrive.com')
-
- webpage = self._download_webpage(req, video_id,
- 'Downloading video page')
-
- title = self._search_regex(r'class="external_title_left">(.+)</div>',
- webpage, 'title')
- thumbnail = self._search_regex(r'image:\s?"(//[^\"]+)', webpage,
- 'thumbnail', fatal=False)
- if thumbnail is not None:
- thumbnail = 'http:' + thumbnail
-
- ext = self._search_regex(r'type:\s?\'([^\']+)\',',
- webpage, 'extension', fatal=False)
- video_url = self._search_regex(
- r'file:\s?loadURL\(\'(http[^\']+)\'\),', webpage, 'file url')
-
- formats = [{
- 'format_id': 'sd',
- 'url': video_url,
- 'ext': ext,
- }]
-
- return {
- 'id': video_id,
- 'title': title,
- 'thumbnail': thumbnail,
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/karrierevideos.py b/youtube_dl/extractor/karrierevideos.py
new file mode 100644
index 000000000..bed94bc93
--- /dev/null
+++ b/youtube_dl/extractor/karrierevideos.py
@@ -0,0 +1,96 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..utils import (
+ fix_xml_ampersands,
+ float_or_none,
+ xpath_with_ns,
+ xpath_text,
+)
+
+
+class KarriereVideosIE(InfoExtractor):
+ _VALID_URL = r'http://(?:www\.)?karrierevideos\.at(?:/[^/]+)+/(?P<id>[^/]+)'
+ _TESTS = [{
+ 'url': 'http://www.karrierevideos.at/berufsvideos/mittlere-hoehere-schulen/altenpflegerin',
+ 'info_dict': {
+ 'id': '32c91',
+ 'ext': 'flv',
+ 'title': 'AltenpflegerIn',
+ 'description': 'md5:dbadd1259fde2159a9b28667cb664ae2',
+ 'thumbnail': 're:^http://.*\.png',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ }, {
+ # broken ampersands
+ 'url': 'http://www.karrierevideos.at/orientierung/vaeterkarenz-und-neue-chancen-fuer-muetter-baby-was-nun',
+ 'info_dict': {
+ 'id': '5sniu',
+ 'ext': 'flv',
+ 'title': 'Väterkarenz und neue Chancen für Mütter - "Baby - was nun?"',
+ 'description': 'md5:97092c6ad1fd7d38e9d6a5fdeb2bcc33',
+ 'thumbnail': 're:^http://.*\.png',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ title = (self._html_search_meta('title', webpage, default=None) or
+ self._search_regex(r'<h1 class="title">([^<]+)</h1>'))
+
+ video_id = self._search_regex(
+ r'/config/video/(.+?)\.xml', webpage, 'video id')
+ playlist = self._download_xml(
+ 'http://www.karrierevideos.at/player-playlist.xml.php?p=%s' % video_id,
+ video_id, transform_source=fix_xml_ampersands)
+
+ NS_MAP = {
+ 'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats'
+ }
+
+ def ns(path):
+ return xpath_with_ns(path, NS_MAP)
+
+ item = playlist.find('./tracklist/item')
+ video_file = xpath_text(
+ item, ns('./jwplayer:file'), 'video url', fatal=True)
+ streamer = xpath_text(
+ item, ns('./jwplayer:streamer'), 'streamer', fatal=True)
+
+ uploader = xpath_text(
+ item, ns('./jwplayer:author'), 'uploader')
+ duration = float_or_none(
+ xpath_text(item, ns('./jwplayer:duration'), 'duration'))
+
+ description = self._html_search_regex(
+ r'(?s)<div class="leadtext">(.+?)</div>',
+ webpage, 'description')
+
+ thumbnail = self._html_search_meta(
+ 'thumbnail', webpage, 'thumbnail')
+ if thumbnail:
+ thumbnail = compat_urlparse.urljoin(url, thumbnail)
+
+ return {
+ 'id': video_id,
+ 'url': streamer.replace('rtmpt', 'rtmp'),
+ 'play_path': 'mp4:%s' % video_file,
+ 'ext': 'flv',
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ 'duration': duration,
+ }
diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py
index d8897eb90..7091f3335 100644
--- a/youtube_dl/extractor/mitele.py
+++ b/youtube_dl/extractor/mitele.py
@@ -20,7 +20,6 @@ class MiTeleIE(InfoExtractor):
_TESTS = [{
'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
- 'md5': '6a75fe9d0d3275bead0cb683c616fddb',
'info_dict': {
'id': '0fce117d',
'ext': 'mp4',
@@ -29,6 +28,10 @@ class MiTeleIE(InfoExtractor):
'display_id': 'programa-144',
'duration': 2913,
},
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
}]
def _real_extract(self, url):
@@ -56,12 +59,14 @@ class MiTeleIE(InfoExtractor):
episode,
transform_source=strip_jsonp
)
+ formats = self._extract_m3u8_formats(
+ token_info['tokenizedUrl'], episode, ext='mp4')
return {
'id': embed_data['videoId'],
'display_id': episode,
'title': info_el.find('title').text,
- 'url': token_info['tokenizedUrl'],
+ 'formats': formats,
'description': get_element_by_attribute('class', 'text', webpage),
'thumbnail': info_el.find('thumb').text,
'duration': parse_duration(info_el.find('duration').text),
diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py
index c10405f04..925967753 100644
--- a/youtube_dl/extractor/naver.py
+++ b/youtube_dl/extractor/naver.py
@@ -6,6 +6,7 @@ import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
+ compat_urlparse,
)
from ..utils import (
ExtractorError,
@@ -16,7 +17,7 @@ from ..utils import (
class NaverIE(InfoExtractor):
_VALID_URL = r'https?://(?:m\.)?tvcast\.naver\.com/v/(?P<id>\d+)'
- _TEST = {
+ _TESTS = [{
'url': 'http://tvcast.naver.com/v/81652',
'info_dict': {
'id': '81652',
@@ -25,7 +26,18 @@ class NaverIE(InfoExtractor):
'description': '합격불변의 법칙 메가스터디 | 메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.',
'upload_date': '20130903',
},
- }
+ }, {
+ 'url': 'http://tvcast.naver.com/v/395837',
+ 'md5': '638ed4c12012c458fefcddfd01f173cd',
+ 'info_dict': {
+ 'id': '395837',
+ 'ext': 'mp4',
+ 'title': '9년이 지나도 아픈 기억, 전효성의 아버지',
+ 'description': 'md5:5bf200dcbf4b66eb1b350d1eb9c753f7',
+ 'upload_date': '20150519',
+ },
+ 'skip': 'Georestricted',
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -35,7 +47,7 @@ class NaverIE(InfoExtractor):
webpage)
if m_id is None:
m_error = re.search(
- r'(?s)<div class="nation_error">\s*(?:<!--.*?-->)?\s*<p class="[^"]+">(?P<msg>.+?)</p>\s*</div>',
+ r'(?s)<div class="(?:nation_error|nation_box)">\s*(?:<!--.*?-->)?\s*<p class="[^"]+">(?P<msg>.+?)</p>\s*</div>',
webpage)
if m_error:
raise ExtractorError(clean_html(m_error.group('msg')), expected=True)
@@ -58,14 +70,18 @@ class NaverIE(InfoExtractor):
formats = []
for format_el in urls.findall('EncodingOptions/EncodingOption'):
domain = format_el.find('Domain').text
+ uri = format_el.find('uri').text
f = {
- 'url': domain + format_el.find('uri').text,
+ 'url': compat_urlparse.urljoin(domain, uri),
'ext': 'mp4',
'width': int(format_el.find('width').text),
'height': int(format_el.find('height').text),
}
if domain.startswith('rtmp'):
+ # urlparse does not support custom schemes
+ # https://bugs.python.org/issue18828
f.update({
+ 'url': domain + uri,
'ext': 'flv',
'rtmp_protocol': '1', # rtmpt
})
diff --git a/youtube_dl/extractor/nba.py b/youtube_dl/extractor/nba.py
index 862b706bf..944096e1c 100644
--- a/youtube_dl/extractor/nba.py
+++ b/youtube_dl/extractor/nba.py
@@ -22,6 +22,18 @@ class NBAIE(InfoExtractor):
}, {
'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/',
'only_matching': True,
+ }, {
+ 'url': 'http://watch.nba.com/nba/video/channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba',
+ 'info_dict': {
+ 'id': '0041400301-cle-atl-recap.nba',
+ 'ext': 'mp4',
+ 'title': 'NBA GAME TIME | Video: Hawks vs. Cavaliers Game 1',
+ 'description': 'md5:8094c3498d35a9bd6b1a8c396a071b4d',
+ 'duration': 228,
+ },
+ 'params': {
+ 'skip_download': True,
+ }
}]
def _real_extract(self, url):
@@ -35,8 +47,12 @@ class NBAIE(InfoExtractor):
self._og_search_title(webpage, default=shortened_video_id), ' : NBA.com')
description = self._og_search_description(webpage)
- duration = parse_duration(
- self._html_search_meta('duration', webpage, 'duration'))
+ duration_str = self._html_search_meta(
+ 'duration', webpage, 'duration', default=None)
+ if not duration_str:
+ duration_str = self._html_search_regex(
+ r'Duration:</b>\s*(\d+:\d+)', webpage, 'duration', fatal=False)
+ duration = parse_duration(duration_str)
return {
'id': shortened_video_id,
diff --git a/youtube_dl/extractor/nextmedia.py b/youtube_dl/extractor/nextmedia.py
index 02dba4ef6..d1b7cff4c 100644
--- a/youtube_dl/extractor/nextmedia.py
+++ b/youtube_dl/extractor/nextmedia.py
@@ -89,8 +89,8 @@ class NextMediaActionNewsIE(NextMediaIE):
return self._extract_from_nextmedia_page(news_id, url, article_page)
-class AppleDailyRealtimeNewsIE(NextMediaIE):
- _VALID_URL = r'http://(www|ent).appledaily.com.tw/(realtimenews|enews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
+class AppleDailyIE(NextMediaIE):
+ _VALID_URL = r'http://(www|ent).appledaily.com.tw/(?:animation|appledaily|enews|realtimenews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
_TESTS = [{
'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694',
'md5': 'a843ab23d150977cc55ef94f1e2c1e4d',
@@ -99,7 +99,7 @@ class AppleDailyRealtimeNewsIE(NextMediaIE):
'ext': 'mp4',
'title': '周亭羽走過摩鐵陰霾2男陪吃 九把刀孤寒看醫生',
'thumbnail': 're:^https?://.*\.jpg$',
- 'description': 'md5:b23787119933404ce515c6356a8c355c',
+ 'description': 'md5:2acd430e59956dc47cd7f67cb3c003f4',
'upload_date': '20150128',
}
}, {
@@ -110,26 +110,10 @@ class AppleDailyRealtimeNewsIE(NextMediaIE):
'ext': 'mp4',
'title': '不滿被踩腳 山東兩大媽一路打下車',
'thumbnail': 're:^https?://.*\.jpg$',
- 'description': 'md5:2648aaf6fc4f401f6de35a91d111aa1d',
+ 'description': 'md5:175b4260c1d7c085993474217e4ab1b4',
'upload_date': '20150128',
}
- }]
-
- _URL_PATTERN = r'\{url: \'(.+)\'\}'
-
- def _fetch_title(self, page):
- return self._html_search_regex(r'<h1 id="h1">([^<>]+)</h1>', page, 'news title')
-
- def _fetch_thumbnail(self, page):
- return self._html_search_regex(r"setInitialImage\(\'([^']+)'\)", page, 'video thumbnail', fatal=False)
-
- def _fetch_timestamp(self, page):
- return None
-
-
-class AppleDailyAnimationNewsIE(AppleDailyRealtimeNewsIE):
- _VALID_URL = 'http://www.appledaily.com.tw/animation/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
- _TESTS = [{
+ }, {
'url': 'http://www.appledaily.com.tw/animation/realtimenews/new/20150128/5003671',
'md5': '03df296d95dedc2d5886debbb80cb43f',
'info_dict': {
@@ -154,10 +138,22 @@ class AppleDailyAnimationNewsIE(AppleDailyRealtimeNewsIE):
'expected_warnings': [
'video thumbnail',
]
+ }, {
+ 'url': 'http://www.appledaily.com.tw/appledaily/article/supplement/20140417/35770334/',
+ 'only_matching': True,
}]
+ _URL_PATTERN = r'\{url: \'(.+)\'\}'
+
def _fetch_title(self, page):
- return self._html_search_meta('description', page, 'news title')
+ return (self._html_search_regex(r'<h1 id="h1">([^<>]+)</h1>', page, 'news title', default=None) or
+ self._html_search_meta('description', page, 'news title'))
+
+ def _fetch_thumbnail(self, page):
+ return self._html_search_regex(r"setInitialImage\(\'([^']+)'\)", page, 'video thumbnail', fatal=False)
+
+ def _fetch_timestamp(self, page):
+ return None
def _fetch_description(self, page):
return self._html_search_meta('description', page, 'news description')
diff --git a/youtube_dl/extractor/nowtv.py b/youtube_dl/extractor/nowtv.py
new file mode 100644
index 000000000..173e46cd8
--- /dev/null
+++ b/youtube_dl/extractor/nowtv.py
@@ -0,0 +1,192 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ parse_iso8601,
+ parse_duration,
+ remove_start,
+)
+
+
+class NowTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?nowtv\.de/(?P<station>rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<id>.+?)/player'
+
+ _TESTS = [{
+ # rtl
+ 'url': 'http://www.nowtv.de/rtl/bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit/player',
+ 'info_dict': {
+ 'id': '203519',
+ 'display_id': 'bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit',
+ 'ext': 'mp4',
+ 'title': 'Die neuen Bauern und eine Hochzeit',
+ 'description': 'md5:e234e1ed6d63cf06be5c070442612e7e',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'timestamp': 1432580700,
+ 'upload_date': '20150525',
+ 'duration': 2786,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ # rtl2
+ 'url': 'http://www.nowtv.de/rtl2/berlin-tag-nacht/berlin-tag-nacht-folge-934/player',
+ 'info_dict': {
+ 'id': '203481',
+ 'display_id': 'berlin-tag-nacht/berlin-tag-nacht-folge-934',
+ 'ext': 'mp4',
+ 'title': 'Berlin - Tag & Nacht (Folge 934)',
+ 'description': 'md5:c85e88c2e36c552dfe63433bc9506dd0',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'timestamp': 1432666800,
+ 'upload_date': '20150526',
+ 'duration': 2641,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ # rtlnitro
+ 'url': 'http://www.nowtv.de/rtlnitro/alarm-fuer-cobra-11-die-autobahnpolizei/hals-und-beinbruch-2014-08-23-21-10-00/player',
+ 'info_dict': {
+ 'id': '165780',
+ 'display_id': 'alarm-fuer-cobra-11-die-autobahnpolizei/hals-und-beinbruch-2014-08-23-21-10-00',
+ 'ext': 'mp4',
+ 'title': 'Hals- und Beinbruch',
+ 'description': 'md5:b50d248efffe244e6f56737f0911ca57',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'timestamp': 1432415400,
+ 'upload_date': '20150523',
+ 'duration': 2742,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ # superrtl
+ 'url': 'http://www.nowtv.de/superrtl/medicopter-117/angst/player',
+ 'info_dict': {
+ 'id': '99205',
+ 'display_id': 'medicopter-117/angst',
+ 'ext': 'mp4',
+ 'title': 'Angst!',
+ 'description': 'md5:30cbc4c0b73ec98bcd73c9f2a8c17c4e',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'timestamp': 1222632900,
+ 'upload_date': '20080928',
+ 'duration': 3025,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ # ntv
+ 'url': 'http://www.nowtv.de/ntv/ratgeber-geld/thema-ua-der-erste-blick-die-apple-watch/player',
+ 'info_dict': {
+ 'id': '203521',
+ 'display_id': 'ratgeber-geld/thema-ua-der-erste-blick-die-apple-watch',
+ 'ext': 'mp4',
+ 'title': 'Thema u.a.: Der erste Blick: Die Apple Watch',
+ 'description': 'md5:4312b6c9d839ffe7d8caf03865a531af',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'timestamp': 1432751700,
+ 'upload_date': '20150527',
+ 'duration': 1083,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ # vox
+ 'url': 'http://www.nowtv.de/vox/der-hundeprofi/buero-fall-chihuahua-joel/player',
+ 'info_dict': {
+ 'id': '128953',
+ 'display_id': 'der-hundeprofi/buero-fall-chihuahua-joel',
+ 'ext': 'mp4',
+ 'title': "Büro-Fall / Chihuahua 'Joel'",
+ 'description': 'md5:e62cb6bf7c3cc669179d4f1eb279ad8d',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'timestamp': 1432408200,
+ 'upload_date': '20150523',
+ 'duration': 3092,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ display_id = mobj.group('id')
+ station = mobj.group('station')
+
+ info = self._download_json(
+ 'https://api.nowtv.de/v3/movies/%s?fields=*,format,files' % display_id,
+ display_id)
+
+ video_id = compat_str(info['id'])
+
+ files = info['files']
+ if not files:
+ if info.get('geoblocked', False):
+ raise ExtractorError(
+ 'Video %s is not available from your location due to geo restriction' % video_id,
+ expected=True)
+ if not info.get('free', True):
+ raise ExtractorError(
+ 'Video %s is not available for free' % video_id, expected=True)
+
+ f = info.get('format', {})
+ station = f.get('station') or station
+
+ STATIONS = {
+ 'rtl': 'rtlnow',
+ 'rtl2': 'rtl2now',
+ 'vox': 'voxnow',
+ 'nitro': 'rtlnitronow',
+ 'ntv': 'n-tvnow',
+ 'superrtl': 'superrtlnow'
+ }
+
+ formats = []
+ for item in files['items']:
+ item_path = remove_start(item['path'], '/')
+ tbr = int_or_none(item['bitrate'])
+ m3u8_url = 'http://hls.fra.%s.de/hls-vod-enc/%s.m3u8' % (STATIONS[station], item_path)
+ m3u8_url = m3u8_url.replace('now/', 'now/videos/')
+ formats.append({
+ 'url': m3u8_url,
+ 'format_id': '%s-%sk' % (item['id'], tbr),
+ 'ext': 'mp4',
+ 'tbr': tbr,
+ })
+ self._sort_formats(formats)
+
+ title = info['title']
+ description = info.get('articleLong') or info.get('articleShort')
+ timestamp = parse_iso8601(info.get('broadcastStartDate'), ' ')
+ duration = parse_duration(info.get('duration'))
+ thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo')
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py
index fbc521d1a..6c7149fe3 100644
--- a/youtube_dl/extractor/odnoklassniki.py
+++ b/youtube_dl/extractor/odnoklassniki.py
@@ -2,6 +2,7 @@
from __future__ import unicode_literals
from .common import InfoExtractor
+from ..compat import compat_urllib_parse
from ..utils import (
unified_strdate,
int_or_none,
@@ -11,8 +12,9 @@ from ..utils import (
class OdnoklassnikiIE(InfoExtractor):
- _VALID_URL = r'https?://(?:odnoklassniki|ok)\.ru/(?:video|web-api/video/moviePlayer)/(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:odnoklassniki|ok)\.ru/(?:video|web-api/video/moviePlayer)/(?P<id>[\d-]+)'
_TESTS = [{
+ # metadata in JSON
'url': 'http://ok.ru/video/20079905452',
'md5': '8e24ad2da6f387948e7a7d44eb8668fe',
'info_dict': {
@@ -20,11 +22,22 @@ class OdnoklassnikiIE(InfoExtractor):
'ext': 'mp4',
'title': 'Культура меняет нас (прекрасный ролик!))',
'duration': 100,
- 'upload_date': '20141207',
'uploader_id': '330537914540',
'uploader': 'Виталий Добровольский',
'like_count': int,
- 'age_limit': 0,
+ },
+ }, {
+ # metadataUrl
+ 'url': 'http://ok.ru/video/63567059965189-0',
+ 'md5': '9676cf86eff5391d35dea675d224e131',
+ 'info_dict': {
+ 'id': '63567059965189-0',
+ 'ext': 'mp4',
+ 'title': 'Девушка без комплексов ...',
+ 'duration': 191,
+ 'uploader_id': '534380003155',
+ 'uploader': 'Андрей Мещанинов',
+ 'like_count': int,
},
}, {
'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452',
@@ -34,14 +47,23 @@ class OdnoklassnikiIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
+ webpage = self._download_webpage(
+ 'http://ok.ru/video/%s' % video_id, video_id)
player = self._parse_json(
unescapeHTML(self._search_regex(
r'data-attributes="([^"]+)"', webpage, 'player')),
video_id)
- metadata = self._parse_json(player['flashvars']['metadata'], video_id)
+ flashvars = player['flashvars']
+
+ metadata = flashvars.get('metadata')
+ if metadata:
+ metadata = self._parse_json(metadata, video_id)
+ else:
+ metadata = self._download_json(
+ compat_urllib_parse.unquote(flashvars['metadataUrl']),
+ video_id, 'Downloading metadata JSON')
movie = metadata['movie']
title = movie['title']
@@ -53,11 +75,11 @@ class OdnoklassnikiIE(InfoExtractor):
uploader = author.get('name')
upload_date = unified_strdate(self._html_search_meta(
- 'ya:ovs:upload_date', webpage, 'upload date'))
+ 'ya:ovs:upload_date', webpage, 'upload date', default=None))
age_limit = None
adult = self._html_search_meta(
- 'ya:ovs:adult', webpage, 'age limit')
+ 'ya:ovs:adult', webpage, 'age limit', default=None)
if adult:
age_limit = 18 if adult == 'true' else 0
diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py
index 0c8b731cf..daa284ea2 100644
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@@ -71,7 +71,8 @@ class PornHubIE(InfoExtractor):
video_urls = list(map(compat_urllib_parse.unquote, re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
if webpage.find('"encrypted":true') != -1:
- password = compat_urllib_parse.unquote_plus(self._html_search_regex(r'"video_title":"([^"]+)', webpage, 'password'))
+ password = compat_urllib_parse.unquote_plus(
+ self._search_regex(r'"video_title":"([^"]+)', webpage, 'password'))
video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls))
formats = []
diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py
index 7cc799664..255d4abc1 100644
--- a/youtube_dl/extractor/prosiebensat1.py
+++ b/youtube_dl/extractor/prosiebensat1.py
@@ -17,7 +17,7 @@ from ..utils import (
class ProSiebenSat1IE(InfoExtractor):
IE_NAME = 'prosiebensat1'
IE_DESC = 'ProSiebenSat.1 Digital'
- _VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|ran|the-voice-of-germany)\.de|fem\.com)/(?P<id>.+)'
+ _VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|the-voice-of-germany)\.(?:de|at)|ran\.de|fem\.com)/(?P<id>.+)'
_TESTS = [
{
diff --git a/youtube_dl/extractor/rtbf.py b/youtube_dl/extractor/rtbf.py
index dce64e151..5a381d9ce 100644
--- a/youtube_dl/extractor/rtbf.py
+++ b/youtube_dl/extractor/rtbf.py
@@ -1,10 +1,11 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
-import json
-
from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ unescapeHTML,
+)
class RTBFIE(InfoExtractor):
@@ -16,25 +17,24 @@ class RTBFIE(InfoExtractor):
'id': '1921274',
'ext': 'mp4',
'title': 'Les Diables au coeur (épisode 2)',
- 'description': 'Football - Diables Rouges',
'duration': 3099,
- 'timestamp': 1398456336,
- 'upload_date': '20140425',
}
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
- page = self._download_webpage('https://www.rtbf.be/video/embed?id=%s' % video_id, video_id)
+ webpage = self._download_webpage(
+ 'http://www.rtbf.be/video/embed?id=%s' % video_id, video_id)
- data = json.loads(self._html_search_regex(
- r'<div class="js-player-embed(?: player-embed)?" data-video="([^"]+)"', page, 'data video'))['data']
+ data = self._parse_json(
+ unescapeHTML(self._search_regex(
+ r'data-video="([^"]+)"', webpage, 'data video')),
+ video_id)
video_url = data.get('downloadUrl') or data.get('url')
- if data['provider'].lower() == 'youtube':
+ if data.get('provider').lower() == 'youtube':
return self.url_result(video_url, 'Youtube')
return {
@@ -42,8 +42,8 @@ class RTBFIE(InfoExtractor):
'url': video_url,
'title': data['title'],
'description': data.get('description') or data.get('subtitle'),
- 'thumbnail': data['thumbnail']['large'],
+ 'thumbnail': data.get('thumbnail'),
'duration': data.get('duration') or data.get('realDuration'),
- 'timestamp': data['created'],
- 'view_count': data['viewCount'],
+ 'timestamp': int_or_none(data.get('created')),
+ 'view_count': int_or_none(data.get('viewCount')),
}
diff --git a/youtube_dl/extractor/rtlnow.py b/youtube_dl/extractor/rtlnow.py
deleted file mode 100644
index 785a8045e..000000000
--- a/youtube_dl/extractor/rtlnow.py
+++ /dev/null
@@ -1,174 +0,0 @@
-# encoding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- clean_html,
- unified_strdate,
- int_or_none,
-)
-
-
-class RTLnowIE(InfoExtractor):
- """Information Extractor for RTL NOW, RTL2 NOW, RTL NITRO, SUPER RTL NOW, VOX NOW and n-tv NOW"""
- _VALID_URL = r'''(?x)
- (?:https?://)?
- (?P<url>
- (?P<domain>
- rtl-now\.rtl\.de|
- rtl2now\.rtl2\.de|
- (?:www\.)?voxnow\.de|
- (?:www\.)?rtlnitronow\.de|
- (?:www\.)?superrtlnow\.de|
- (?:www\.)?n-tvnow\.de)
- /+[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?
- (?:container_id|film_id)=(?P<video_id>[0-9]+)&
- player=1(?:&season=[0-9]+)?(?:&.*)?
- )'''
-
- _TESTS = [
- {
- 'url': 'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1',
- 'info_dict': {
- 'id': '90419',
- 'ext': 'flv',
- 'title': 'Ahornallee - Folge 1 - Der Einzug',
- 'description': 'md5:ce843b6b5901d9a7f7d04d1bbcdb12de',
- 'upload_date': '20070416',
- 'duration': 1685,
- },
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'Only works from Germany',
- },
- {
- 'url': 'http://rtl2now.rtl2.de/aerger-im-revier/episode-15-teil-1.php?film_id=69756&player=1&season=2&index=5',
- 'info_dict': {
- 'id': '69756',
- 'ext': 'flv',
- 'title': 'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit u.a.',
- 'description': 'md5:3fb247005ed21a935ffc82b7dfa70cf0',
- 'thumbnail': 'http://autoimg.static-fra.de/rtl2now/219850/1500x1500/image2.jpg',
- 'upload_date': '20120519',
- 'duration': 1245,
- },
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'Only works from Germany',
- },
- {
- 'url': 'http://www.voxnow.de/voxtours/suedafrika-reporter-ii.php?film_id=13883&player=1&season=17',
- 'info_dict': {
- 'id': '13883',
- 'ext': 'flv',
- 'title': 'Voxtours - Südafrika-Reporter II',
- 'description': 'md5:de7f8d56be6fd4fed10f10f57786db00',
- 'upload_date': '20090627',
- 'duration': 1800,
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- 'url': 'http://superrtlnow.de/medicopter-117/angst.php?film_id=99205&player=1',
- 'info_dict': {
- 'id': '99205',
- 'ext': 'flv',
- 'title': 'Medicopter 117 - Angst!',
- 'description': 're:^Im Therapiezentrum \'Sonnalm\' kommen durch eine Unachtsamkeit die für die B.handlung mit Phobikern gehaltenen Voglespinnen frei\. Eine Ausreißerin',
- 'thumbnail': 'http://autoimg.static-fra.de/superrtlnow/287529/1500x1500/image2.jpg',
- 'upload_date': '20080928',
- 'duration': 2691,
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- 'url': 'http://rtl-now.rtl.de/der-bachelor/folge-4.php?film_id=188729&player=1&season=5',
- 'info_dict': {
- 'id': '188729',
- 'ext': 'flv',
- 'upload_date': '20150204',
- 'description': 'md5:5e1ce23095e61a79c166d134b683cecc',
- 'title': 'Der Bachelor - Folge 4',
- }
- }, {
- 'url': 'http://www.n-tvnow.de/deluxe-alles-was-spass-macht/thema-ua-luxushotel-fuer-vierbeiner.php?container_id=153819&player=1&season=0',
- 'only_matching': True,
- },
- ]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_page_url = 'http://%s/' % mobj.group('domain')
- video_id = mobj.group('video_id')
-
- webpage = self._download_webpage('http://' + mobj.group('url'), video_id)
-
- mobj = re.search(r'(?s)<div style="margin-left: 20px; font-size: 13px;">(.*?)<div id="playerteaser">', webpage)
- if mobj:
- raise ExtractorError(clean_html(mobj.group(1)), expected=True)
-
- title = self._og_search_title(webpage)
- description = self._og_search_description(webpage)
- thumbnail = self._og_search_thumbnail(webpage, default=None)
-
- upload_date = unified_strdate(self._html_search_meta('uploadDate', webpage, 'upload date'))
-
- mobj = re.search(r'<meta itemprop="duration" content="PT(?P<seconds>\d+)S" />', webpage)
- duration = int(mobj.group('seconds')) if mobj else None
-
- playerdata_url = self._html_search_regex(
- r"'playerdata': '(?P<playerdata_url>[^']+)'", webpage, 'playerdata_url')
-
- playerdata = self._download_xml(playerdata_url, video_id, 'Downloading player data XML')
-
- videoinfo = playerdata.find('./playlist/videoinfo')
-
- formats = []
- for filename in videoinfo.findall('filename'):
- mobj = re.search(r'(?P<url>rtmpe://(?:[^/]+/){2})(?P<play_path>.+)', filename.text)
- if mobj:
- fmt = {
- 'url': mobj.group('url'),
- 'play_path': 'mp4:' + mobj.group('play_path'),
- 'page_url': video_page_url,
- 'player_url': video_page_url + 'includes/vodplayer.swf',
- }
- else:
- mobj = re.search(r'.*/(?P<hoster>[^/]+)/videos/(?P<play_path>.+)\.f4m', filename.text)
- if mobj:
- fmt = {
- 'url': 'rtmpe://fms.rtl.de/' + mobj.group('hoster'),
- 'play_path': 'mp4:' + mobj.group('play_path'),
- 'page_url': url,
- 'player_url': video_page_url + 'includes/vodplayer.swf',
- }
- else:
- fmt = {
- 'url': filename.text,
- }
- fmt.update({
- 'width': int_or_none(filename.get('width')),
- 'height': int_or_none(filename.get('height')),
- 'vbr': int_or_none(filename.get('bitrate')),
- 'ext': 'flv',
- })
- formats.append(fmt)
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'upload_date': upload_date,
- 'duration': duration,
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py
index 849300140..82cd98ac7 100644
--- a/youtube_dl/extractor/rtve.py
+++ b/youtube_dl/extractor/rtve.py
@@ -17,7 +17,7 @@ from ..utils import (
def _decrypt_url(png):
- encrypted_data = base64.b64decode(png)
+ encrypted_data = base64.b64decode(png.encode('utf-8'))
text_index = encrypted_data.find(b'tEXt')
text_chunk = encrypted_data[text_index - 4:]
length = struct_unpack('!I', text_chunk[:4])[0]
diff --git a/youtube_dl/extractor/rutv.py b/youtube_dl/extractor/rutv.py
index 55604637d..d9df06861 100644
--- a/youtube_dl/extractor/rutv.py
+++ b/youtube_dl/extractor/rutv.py
@@ -104,7 +104,7 @@ class RUTVIE(InfoExtractor):
@classmethod
def _extract_url(cls, webpage):
mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.rutv\.ru/(?:iframe/(?:swf|video|live)/id|index/iframe/cast_id)/.+?)\1', webpage)
+ r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.(?:rutv\.ru|vgtrk\.com)/(?:iframe/(?:swf|video|live)/id|index/iframe/cast_id)/.+?)\1', webpage)
if mobj:
return mobj.group('url')
diff --git a/youtube_dl/extractor/shared.py b/youtube_dl/extractor/shared.py
index 26ced716e..9f3e944e7 100644
--- a/youtube_dl/extractor/shared.py
+++ b/youtube_dl/extractor/shared.py
@@ -47,7 +47,7 @@ class SharedIE(InfoExtractor):
video_url = self._html_search_regex(
r'data-url="([^"]+)"', video_page, 'video URL')
title = base64.b64decode(self._html_search_meta(
- 'full:title', webpage, 'title')).decode('utf-8')
+ 'full:title', webpage, 'title').encode('utf-8')).decode('utf-8')
filesize = int_or_none(self._html_search_meta(
'full:size', webpage, 'file size', fatal=False))
thumbnail = self._html_search_regex(
diff --git a/youtube_dl/extractor/sockshare.py b/youtube_dl/extractor/sockshare.py
deleted file mode 100644
index b5fa6f1da..000000000
--- a/youtube_dl/extractor/sockshare.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from ..compat import (
- compat_urllib_parse,
- compat_urllib_request,
-)
-from ..utils import (
- determine_ext,
- ExtractorError,
-)
-
-from .common import InfoExtractor
-
-
-class SockshareIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?sockshare\.com/file/(?P<id>[0-9A-Za-z]+)'
- _FILE_DELETED_REGEX = r'This file doesn\'t exist, or has been removed\.</div>'
- _TEST = {
- 'url': 'http://www.sockshare.com/file/437BE28B89D799D7',
- 'md5': '9d0bf1cfb6dbeaa8d562f6c97506c5bd',
- 'info_dict': {
- 'id': '437BE28B89D799D7',
- 'title': 'big_buck_bunny_720p_surround.avi',
- 'ext': 'avi',
- }
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- url = 'http://sockshare.com/file/%s' % video_id
- webpage = self._download_webpage(url, video_id)
-
- if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
- raise ExtractorError('Video %s does not exist' % video_id,
- expected=True)
-
- confirm_hash = self._html_search_regex(r'''(?x)<input\s+
- type="hidden"\s+
- value="([^"]*)"\s+
- name="hash"
- ''', webpage, 'hash')
-
- fields = {
- "hash": confirm_hash.encode('utf-8'),
- "confirm": "Continue as Free User"
- }
-
- post = compat_urllib_parse.urlencode(fields)
- req = compat_urllib_request.Request(url, post)
- # Apparently, this header is required for confirmation to work.
- req.add_header('Host', 'www.sockshare.com')
- req.add_header('Content-type', 'application/x-www-form-urlencoded')
-
- webpage = self._download_webpage(
- req, video_id, 'Downloading video page')
-
- video_url = self._html_search_regex(
- r'<a href="([^"]*)".+class="download_file_link"',
- webpage, 'file url')
- video_url = "http://www.sockshare.com" + video_url
- title = self._html_search_regex((
- r'<h1>(.+)<strong>',
- r'var name = "([^"]+)";'),
- webpage, 'title', default=None)
- thumbnail = self._html_search_regex(
- r'<img\s+src="([^"]*)".+?name="bg"',
- webpage, 'thumbnail', default=None)
-
- formats = [{
- 'format_id': 'sd',
- 'url': video_url,
- 'ext': determine_ext(title),
- }]
-
- return {
- 'id': video_id,
- 'title': title,
- 'thumbnail': thumbnail,
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/spankwire.py b/youtube_dl/extractor/spankwire.py
index b936202f6..06d6e6640 100644
--- a/youtube_dl/extractor/spankwire.py
+++ b/youtube_dl/extractor/spankwire.py
@@ -71,7 +71,7 @@ class SpankwireIE(InfoExtractor):
compat_urllib_parse.unquote,
re.findall(r'playerData\.cdnPath[0-9]{3,}\s*=\s*["\']([^"\']+)["\']', webpage)))
if webpage.find('flashvars\.encrypted = "true"') != -1:
- password = self._html_search_regex(
+ password = self._search_regex(
r'flashvars\.video_title = "([^"]+)',
webpage, 'password').replace('+', ' ')
video_urls = list(map(
diff --git a/youtube_dl/extractor/sportbox.py b/youtube_dl/extractor/sportbox.py
index 8686f9d11..86d509ae5 100644
--- a/youtube_dl/extractor/sportbox.py
+++ b/youtube_dl/extractor/sportbox.py
@@ -6,8 +6,7 @@ import re
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
- parse_duration,
- parse_iso8601,
+ unified_strdate,
)
@@ -20,11 +19,9 @@ class SportBoxIE(InfoExtractor):
'id': '80822',
'ext': 'mp4',
'title': 'Гонка 2 заезд ««Объединенный 2000»: классы Туринг и Супер-продакшн',
- 'description': 'md5:81715fa9c4ea3d9e7915dc8180c778ed',
+ 'description': 'md5:3d72dc4a006ab6805d82f037fdc637ad',
'thumbnail': 're:^https?://.*\.jpg$',
- 'timestamp': 1411896237,
'upload_date': '20140928',
- 'duration': 4846,
},
'params': {
# m3u8 download
@@ -48,17 +45,13 @@ class SportBoxIE(InfoExtractor):
r'src="/?(vdl/player/[^"]+)"', webpage, 'player')
title = self._html_search_regex(
- r'<h1 itemprop="name">([^<]+)</h1>', webpage, 'title')
- description = self._html_search_regex(
- r'(?s)<div itemprop="description">(.+?)</div>',
- webpage, 'description', fatal=False)
+ [r'"nodetitle"\s*:\s*"([^"]+)"', r'class="node-header_{1,2}title">([^<]+)'],
+ webpage, 'title')
+ description = self._og_search_description(webpage) or self._html_search_meta(
+ 'description', webpage, 'description')
thumbnail = self._og_search_thumbnail(webpage)
- timestamp = parse_iso8601(self._search_regex(
- r'<span itemprop="uploadDate">([^<]+)</span>',
- webpage, 'timestamp', fatal=False))
- duration = parse_duration(self._html_search_regex(
- r'<meta itemprop="duration" content="PT([^"]+)">',
- webpage, 'duration', fatal=False))
+ upload_date = unified_strdate(self._html_search_meta(
+ 'dateCreated', webpage, 'upload date'))
return {
'_type': 'url_transparent',
@@ -67,8 +60,7 @@ class SportBoxIE(InfoExtractor):
'title': title,
'description': description,
'thumbnail': thumbnail,
- 'timestamp': timestamp,
- 'duration': duration,
+ 'upload_date': upload_date,
}
diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py
index 56be52638..b2a4b1fc0 100644
--- a/youtube_dl/extractor/teamcoco.py
+++ b/youtube_dl/extractor/teamcoco.py
@@ -10,6 +10,7 @@ from .common import InfoExtractor
from ..utils import (
ExtractorError,
qualities,
+ determine_ext,
)
from ..compat import compat_ord
@@ -108,7 +109,7 @@ class TeamcocoIE(InfoExtractor):
formats = []
get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p'])
for filed in data['files']:
- if filed['type'] == 'hls':
+ if determine_ext(filed['url']) == 'm3u8':
formats.extend(self._extract_m3u8_formats(
filed['url'], video_id, ext='mp4'))
else:
diff --git a/youtube_dl/extractor/telecinco.py b/youtube_dl/extractor/telecinco.py
index 251a68680..a0c744fd1 100644
--- a/youtube_dl/extractor/telecinco.py
+++ b/youtube_dl/extractor/telecinco.py
@@ -16,6 +16,10 @@ class TelecincoIE(MiTeleIE):
'title': 'Con Martín Berasategui, hacer un bacalao al ...',
'duration': 662,
},
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
}, {
'url': 'http://www.telecinco.es/informativos/nacional/Pablo_Iglesias-Informativos_Telecinco-entrevista-Pedro_Piqueras_2_1945155182.html',
'only_matching': True,
diff --git a/youtube_dl/extractor/tenplay.py b/youtube_dl/extractor/tenplay.py
index 466155ef8..f6694149b 100644
--- a/youtube_dl/extractor/tenplay.py
+++ b/youtube_dl/extractor/tenplay.py
@@ -2,6 +2,10 @@
from __future__ import unicode_literals
from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ float_or_none,
+)
class TenPlayIE(InfoExtractor):
@@ -49,18 +53,23 @@ class TenPlayIE(InfoExtractor):
if protocol == 'rtmp':
url = url.replace('&mp4:', '')
+ tbr = int_or_none(rendition.get('encodingRate'), 1000)
+
formats.append({
- 'format_id': '_'.join(['rtmp', rendition['videoContainer'].lower(), rendition['videoCodec'].lower()]),
- 'width': rendition['frameWidth'],
- 'height': rendition['frameHeight'],
- 'tbr': rendition['encodingRate'] / 1024,
- 'filesize': rendition['size'],
+ 'format_id': '_'.join(
+ ['rtmp', rendition['videoContainer'].lower(),
+ rendition['videoCodec'].lower(), '%sk' % tbr]),
+ 'width': int_or_none(rendition['frameWidth']),
+ 'height': int_or_none(rendition['frameHeight']),
+ 'tbr': tbr,
+ 'filesize': int_or_none(rendition['size']),
'protocol': protocol,
'ext': ext,
'vcodec': rendition['videoCodec'].lower(),
'container': rendition['videoContainer'].lower(),
'url': url,
})
+ self._sort_formats(formats)
return {
'id': video_id,
@@ -74,8 +83,8 @@ class TenPlayIE(InfoExtractor):
'url': json['thumbnailURL']
}],
'thumbnail': json['videoStillURL'],
- 'duration': json['length'] / 1000,
- 'timestamp': float(json['creationDate']) / 1000,
- 'uploader': json['customFields']['production_company_distributor'] if 'production_company_distributor' in json['customFields'] else 'TENplay',
- 'view_count': json['playsTotal']
+ 'duration': float_or_none(json.get('length'), 1000),
+ 'timestamp': float_or_none(json.get('creationDate'), 1000),
+ 'uploader': json.get('customFields', {}).get('production_company_distributor') or 'TENplay',
+ 'view_count': int_or_none(json.get('playsTotal')),
}
diff --git a/youtube_dl/extractor/tf1.py b/youtube_dl/extractor/tf1.py
index 025d0877c..656410528 100644
--- a/youtube_dl/extractor/tf1.py
+++ b/youtube_dl/extractor/tf1.py
@@ -6,8 +6,8 @@ from .common import InfoExtractor
class TF1IE(InfoExtractor):
"""TF1 uses the wat.tv player."""
- _VALID_URL = r'http://(?:videos\.tf1|www\.tfou)\.fr/.*?-(?P<id>\d+)(?:-\d+)?\.html'
- _TESTS = {
+ _VALID_URL = r'http://(?:videos\.tf1|www\.tfou|www\.tf1)\.fr/.*?-(?P<id>\d+)(?:-\d+)?\.html'
+ _TESTS = [{
'url': 'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
'info_dict': {
'id': '10635995',
@@ -32,7 +32,10 @@ class TF1IE(InfoExtractor):
# Sometimes wat serves the whole file with the --test option
'skip_download': True,
},
- }
+ }, {
+ 'url': 'http://www.tf1.fr/tf1/koh-lanta/videos/replay-koh-lanta-22-mai-2015.html',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
diff --git a/youtube_dl/extractor/tnaflix.py b/youtube_dl/extractor/tnaflix.py
index d48cbbf14..59af9aba0 100644
--- a/youtube_dl/extractor/tnaflix.py
+++ b/youtube_dl/extractor/tnaflix.py
@@ -10,26 +10,32 @@ from ..utils import (
class TNAFlixIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?tnaflix\.com/(?P<cat_id>[\w-]+)/(?P<display_id>[\w-]+)/video(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:www\.)?tnaflix\.com/[^/]+/(?P<display_id>[^/]+)/video(?P<id>\d+)'
_TITLE_REGEX = r'<title>(.+?) - TNAFlix Porn Videos</title>'
_DESCRIPTION_REGEX = r'<h3 itemprop="description">([^<]+)</h3>'
_CONFIG_REGEX = r'flashvars\.config\s*=\s*escape\("([^"]+)"'
- _TEST = {
- 'url': 'http://www.tnaflix.com/porn-stars/Carmella-Decesare-striptease/video553878',
- 'md5': 'ecf3498417d09216374fc5907f9c6ec0',
- 'info_dict': {
- 'id': '553878',
- 'display_id': 'Carmella-Decesare-striptease',
- 'ext': 'mp4',
- 'title': 'Carmella Decesare - striptease',
- 'description': '',
- 'thumbnail': 're:https?://.*\.jpg$',
- 'duration': 91,
- 'age_limit': 18,
+ _TESTS = [
+ {
+ 'url': 'http://www.tnaflix.com/porn-stars/Carmella-Decesare-striptease/video553878',
+ 'md5': 'ecf3498417d09216374fc5907f9c6ec0',
+ 'info_dict': {
+ 'id': '553878',
+ 'display_id': 'Carmella-Decesare-striptease',
+ 'ext': 'mp4',
+ 'title': 'Carmella Decesare - striptease',
+ 'description': '',
+ 'thumbnail': 're:https?://.*\.jpg$',
+ 'duration': 91,
+ 'age_limit': 18,
+ }
+ },
+ {
+ 'url': 'https://www.tnaflix.com/amateur-porn/bunzHD-Ms.Donk/video358632',
+ 'matching_only': True,
}
- }
+ ]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
diff --git a/youtube_dl/extractor/tutv.py b/youtube_dl/extractor/tutv.py
index 4de0aac52..fad720b68 100644
--- a/youtube_dl/extractor/tutv.py
+++ b/youtube_dl/extractor/tutv.py
@@ -26,7 +26,7 @@ class TutvIE(InfoExtractor):
data_content = self._download_webpage(
'http://tu.tv/flvurl.php?codVideo=%s' % internal_id, video_id, 'Downloading video info')
- video_url = base64.b64decode(compat_parse_qs(data_content)['kpt'][0]).decode('utf-8')
+ video_url = base64.b64decode(compat_parse_qs(data_content)['kpt'][0].encode('utf-8')).decode('utf-8')
return {
'id': internal_id,
diff --git a/youtube_dl/extractor/videott.py b/youtube_dl/extractor/videott.py
index ececc7ee0..591024ead 100644
--- a/youtube_dl/extractor/videott.py
+++ b/youtube_dl/extractor/videott.py
@@ -43,7 +43,7 @@ class VideoTtIE(InfoExtractor):
formats = [
{
- 'url': base64.b64decode(res['u']).decode('utf-8'),
+ 'url': base64.b64decode(res['u'].encode('utf-8')).decode('utf-8'),
'ext': 'flv',
'format_id': res['l'],
} for res in settings['res'] if res['u']
diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py
index cf6af1e5c..7f2fb1ca8 100644
--- a/youtube_dl/extractor/viki.py
+++ b/youtube_dl/extractor/viki.py
@@ -1,29 +1,65 @@
from __future__ import unicode_literals
-import re
+import time
+import hmac
+import hashlib
+import itertools
-from ..compat import (
- compat_urlparse,
- compat_urllib_request,
-)
from ..utils import (
ExtractorError,
- unescapeHTML,
- unified_strdate,
- US_RATINGS,
- determine_ext,
- mimetype2ext,
+ int_or_none,
+ parse_age_limit,
+ parse_iso8601,
)
from .common import InfoExtractor
-class VikiIE(InfoExtractor):
- IE_NAME = 'viki'
+class VikiBaseIE(InfoExtractor):
+ _VALID_URL_BASE = r'https?://(?:www\.)?viki\.(?:com|net|mx|jp|fr)/'
+ _API_QUERY_TEMPLATE = '/v4/%sapp=%s&t=%s&site=www.viki.com'
+ _API_URL_TEMPLATE = 'http://api.viki.io%s&sig=%s'
+
+ _APP = '65535a'
+ _APP_VERSION = '2.2.5.1428709186'
+ _APP_SECRET = '-$iJ}@p7!G@SyU/je1bEyWg}upLu-6V6-Lg9VD(]siH,r.,m-r|ulZ,U4LC/SeR)'
+
+ def _prepare_call(self, path, timestamp=None):
+ path += '?' if '?' not in path else '&'
+ if not timestamp:
+ timestamp = int(time.time())
+ query = self._API_QUERY_TEMPLATE % (path, self._APP, timestamp)
+ sig = hmac.new(
+ self._APP_SECRET.encode('ascii'),
+ query.encode('ascii'),
+ hashlib.sha1
+ ).hexdigest()
+ return self._API_URL_TEMPLATE % (query, sig)
+
+ def _call_api(self, path, video_id, note, timestamp=None):
+ resp = self._download_json(
+ self._prepare_call(path, timestamp), video_id, note)
+
+ error = resp.get('error')
+ if error:
+ if error == 'invalid timestamp':
+ resp = self._download_json(
+ self._prepare_call(path, int(resp['current_timestamp'])),
+ video_id, '%s (retry)' % note)
+ error = resp.get('error')
+ if error:
+ self._raise_error(resp['error'])
+
+ return resp
- # iPad2
- _USER_AGENT = 'Mozilla/5.0(iPad; U; CPU OS 4_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8F191 Safari/6533.18.5'
+ def _raise_error(self, error):
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, error),
+ expected=True)
- _VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'
+
+class VikiIE(VikiBaseIE):
+ IE_NAME = 'viki'
+ _VALID_URL = r'%s(?:videos|player)/(?P<id>[0-9]+v)' % VikiBaseIE._VALID_URL_BASE
_TESTS = [{
'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14',
'info_dict': {
@@ -37,111 +73,218 @@ class VikiIE(InfoExtractor):
},
'skip': 'Blocked in the US',
}, {
+ # clip
'url': 'http://www.viki.com/videos/1067139v-the-avengers-age-of-ultron-press-conference',
- 'md5': 'ca6493e6f0a6ec07da9aa8d6304b4b2c',
+ 'md5': '86c0b5dbd4d83a6611a79987cc7a1989',
'info_dict': {
'id': '1067139v',
'ext': 'mp4',
+ 'title': "'The Avengers: Age of Ultron' Press Conference",
'description': 'md5:d70b2f9428f5488321bfe1db10d612ea',
+ 'duration': 352,
+ 'timestamp': 1430380829,
'upload_date': '20150430',
- 'title': '\'The Avengers: Age of Ultron\' Press Conference',
+ 'uploader': 'Arirang TV',
+ 'like_count': int,
+ 'age_limit': 0,
}
}, {
'url': 'http://www.viki.com/videos/1048879v-ankhon-dekhi',
'info_dict': {
'id': '1048879v',
'ext': 'mp4',
- 'upload_date': '20140820',
- 'description': 'md5:54ff56d51bdfc7a30441ec967394e91c',
'title': 'Ankhon Dekhi',
+ 'duration': 6512,
+ 'timestamp': 1408532356,
+ 'upload_date': '20140820',
+ 'uploader': 'Spuul',
+ 'like_count': int,
+ 'age_limit': 13,
},
'params': {
- # requires ffmpeg
+ # m3u8 download
'skip_download': True,
}
+ }, {
+ # episode
+ 'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1',
+ 'md5': '190f3ef426005ba3a080a63325955bc3',
+ 'info_dict': {
+ 'id': '44699v',
+ 'ext': 'mp4',
+ 'title': 'Boys Over Flowers - Episode 1',
+ 'description': 'md5:52617e4f729c7d03bfd4bcbbb6e946f2',
+ 'duration': 4155,
+ 'timestamp': 1270496524,
+ 'upload_date': '20100405',
+ 'uploader': 'group8',
+ 'like_count': int,
+ 'age_limit': 13,
+ }
+ }, {
+ # youtube external
+ 'url': 'http://www.viki.com/videos/50562v-poor-nastya-complete-episode-1',
+ 'md5': '216d1afdc0c64d1febc1e9f2bd4b864b',
+ 'info_dict': {
+ 'id': '50562v',
+ 'ext': 'mp4',
+ 'title': 'Poor Nastya [COMPLETE] - Episode 1',
+ 'description': '',
+ 'duration': 607,
+ 'timestamp': 1274949505,
+ 'upload_date': '20101213',
+ 'uploader': 'ad14065n',
+ 'uploader_id': 'ad14065n',
+ 'like_count': int,
+ 'age_limit': 13,
+ }
+ }, {
+ 'url': 'http://www.viki.com/player/44699v',
+ 'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
- title = self._og_search_title(webpage)
- description = self._og_search_description(webpage)
- thumbnail = self._og_search_thumbnail(webpage)
-
- uploader_m = re.search(
- r'<strong>Broadcast Network: </strong>\s*([^<]*)<', webpage)
- if uploader_m is None:
- uploader = None
- else:
- uploader = uploader_m.group(1).strip()
-
- rating_str = self._html_search_regex(
- r'<strong>Rating: </strong>\s*([^<]*)<', webpage,
- 'rating information', default='').strip()
- age_limit = US_RATINGS.get(rating_str)
-
- req = compat_urllib_request.Request(
- 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id)
- req.add_header('User-Agent', self._USER_AGENT)
- info_webpage = self._download_webpage(
- req, video_id, note='Downloading info page')
- err_msg = self._html_search_regex(r'<div[^>]+class="video-error[^>]+>(.+)</div>', info_webpage, 'error message', default=None)
- if err_msg:
- if 'not available in your region' in err_msg:
- raise ExtractorError(
- 'Video %s is blocked from your location.' % video_id,
- expected=True)
- else:
- raise ExtractorError('Viki said: ' + err_msg)
- mobj = re.search(
- r'<source[^>]+type="(?P<mime_type>[^"]+)"[^>]+src="(?P<url>[^"]+)"', info_webpage)
- if not mobj:
- raise ExtractorError('Unable to find video URL')
- video_url = unescapeHTML(mobj.group('url'))
- video_ext = mimetype2ext(mobj.group('mime_type'))
-
- if determine_ext(video_url) == 'm3u8':
- formats = self._extract_m3u8_formats(
- video_url, video_id, ext=video_ext)
- else:
- formats = [{
- 'url': video_url,
- 'ext': video_ext,
- }]
-
- upload_date_str = self._html_search_regex(
- r'"created_at":"([^"]+)"', info_webpage, 'upload date')
- upload_date = (
- unified_strdate(upload_date_str)
- if upload_date_str is not None
- else None
- )
-
- # subtitles
- video_subtitles = self.extract_subtitles(video_id, info_webpage)
-
- return {
+ video = self._call_api(
+ 'videos/%s.json' % video_id, video_id, 'Downloading video JSON')
+
+ title = None
+ titles = video.get('titles')
+ if titles:
+ title = titles.get('en') or titles[titles.keys()[0]]
+ if not title:
+ title = 'Episode %d' % video.get('number') if video.get('type') == 'episode' else video.get('id') or video_id
+ container_titles = video.get('container', {}).get('titles')
+ if container_titles:
+ container_title = container_titles.get('en') or container_titles[container_titles.keys()[0]]
+ title = '%s - %s' % (container_title, title)
+
+ descriptions = video.get('descriptions')
+ description = descriptions.get('en') or descriptions[titles.keys()[0]] if descriptions else None
+
+ duration = int_or_none(video.get('duration'))
+ timestamp = parse_iso8601(video.get('created_at'))
+ uploader = video.get('author')
+ like_count = int_or_none(video.get('likes', {}).get('count'))
+ age_limit = parse_age_limit(video.get('rating'))
+
+ thumbnails = []
+ for thumbnail_id, thumbnail in video.get('images', {}).items():
+ thumbnails.append({
+ 'id': thumbnail_id,
+ 'url': thumbnail.get('url'),
+ })
+
+ subtitles = {}
+ for subtitle_lang, _ in video.get('subtitle_completions', {}).items():
+ subtitles[subtitle_lang] = [{
+ 'ext': subtitles_format,
+ 'url': self._prepare_call(
+ 'videos/%s/subtitles/%s.%s' % (video_id, subtitle_lang, subtitles_format)),
+ } for subtitles_format in ('srt', 'vtt')]
+
+ result = {
'id': video_id,
'title': title,
- 'formats': formats,
'description': description,
- 'thumbnail': thumbnail,
- 'age_limit': age_limit,
+ 'duration': duration,
+ 'timestamp': timestamp,
'uploader': uploader,
- 'subtitles': video_subtitles,
- 'upload_date': upload_date,
+ 'like_count': like_count,
+ 'age_limit': age_limit,
+ 'thumbnails': thumbnails,
+ 'subtitles': subtitles,
}
- def _get_subtitles(self, video_id, info_webpage):
- res = {}
- for sturl_html in re.findall(r'<track src="([^"]+)"', info_webpage):
- sturl = unescapeHTML(sturl_html)
- m = re.search(r'/(?P<lang>[a-z]+)\.vtt', sturl)
- if not m:
- continue
- res[m.group('lang')] = [{
- 'url': compat_urlparse.urljoin('http://www.viki.com', sturl),
- 'ext': 'vtt',
- }]
- return res
+ streams = self._call_api(
+ 'videos/%s/streams.json' % video_id, video_id,
+ 'Downloading video streams JSON')
+
+ if 'external' in streams:
+ result.update({
+ '_type': 'url_transparent',
+ 'url': streams['external']['url'],
+ })
+ return result
+
+ formats = []
+ for format_id, stream_dict in streams.items():
+ height = self._search_regex(
+ r'^(\d+)[pP]$', format_id, 'height', default=None)
+ for protocol, format_dict in stream_dict.items():
+ if format_id == 'm3u8':
+ formats = self._extract_m3u8_formats(
+ format_dict['url'], video_id, 'mp4', m3u8_id='m3u8-%s' % protocol)
+ else:
+ formats.append({
+ 'url': format_dict['url'],
+ 'format_id': '%s-%s' % (format_id, protocol),
+ 'height': height,
+ })
+ self._sort_formats(formats)
+
+ result['formats'] = formats
+ return result
+
+
+class VikiChannelIE(VikiBaseIE):
+ IE_NAME = 'viki:channel'
+ _VALID_URL = r'%s(?:tv|news|movies|artists)/(?P<id>[0-9]+c)' % VikiBaseIE._VALID_URL_BASE
+ _TESTS = [{
+ 'url': 'http://www.viki.com/tv/50c-boys-over-flowers',
+ 'info_dict': {
+ 'id': '50c',
+ 'title': 'Boys Over Flowers',
+ 'description': 'md5:ecd3cff47967fe193cff37c0bec52790',
+ },
+ 'playlist_count': 70,
+ }, {
+ 'url': 'http://www.viki.com/tv/1354c-poor-nastya-complete',
+ 'info_dict': {
+ 'id': '1354c',
+ 'title': 'Poor Nastya [COMPLETE]',
+ 'description': 'md5:05bf5471385aa8b21c18ad450e350525',
+ },
+ 'playlist_count': 127,
+ }, {
+ 'url': 'http://www.viki.com/news/24569c-showbiz-korea',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.viki.com/movies/22047c-pride-and-prejudice-2005',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.viki.com/artists/2141c-shinee',
+ 'only_matching': True,
+ }]
+
+ _PER_PAGE = 25
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+
+ channel = self._call_api(
+ 'containers/%s.json' % channel_id, channel_id,
+ 'Downloading channel JSON')
+
+ titles = channel['titles']
+ title = titles.get('en') or titles[titles.keys()[0]]
+
+ descriptions = channel['descriptions']
+ description = descriptions.get('en') or descriptions[descriptions.keys()[0]]
+
+ entries = []
+ for video_type in ('episodes', 'clips', 'movies'):
+ for page_num in itertools.count(1):
+ page = self._call_api(
+ 'containers/%s/%s.json?per_page=%d&sort=number&direction=asc&with_paging=true&page=%d'
+ % (channel_id, video_type, self._PER_PAGE, page_num), channel_id,
+ 'Downloading %s JSON page #%d' % (video_type, page_num))
+ for video in page['response']:
+ video_id = video['id']
+ entries.append(self.url_result(
+ 'http://www.viki.com/videos/%s' % video_id, 'Viki'))
+ if not page['pagination']['next']:
+ break
+
+ return self.playlist_result(entries, channel_id, title, description)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 1f9940cf5..0301682b8 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1126,12 +1126,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
self.report_warning(
'Skipping DASH manifest: %r' % e, video_id)
else:
- # Hide the formats we found through non-DASH
+ # Remove the formats we found through non-DASH, they
+ # contain less info and it can be wrong, because we use
+ # fixed values (for example the resolution). See
+ # https://github.com/rg3/youtube-dl/issues/5774 for an
+ # example.
dash_keys = set(df['format_id'] for df in dash_formats)
- for f in formats:
- if f['format_id'] in dash_keys:
- f['format_id'] = 'nondash-%s' % f['format_id']
- f['preference'] = f.get('preference', 0) - 10000
+ formats = [f for f in formats if f['format_id'] not in dash_keys]
formats.extend(dash_formats)
# Check for malformed aspect ratio
diff --git a/youtube_dl/options.py b/youtube_dl/options.py
index 22dbc3aec..5a2315bd9 100644
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@@ -537,7 +537,7 @@ def parseOpts(overrideArguments=None):
verbosity.add_option(
'--dump-pages', '--dump-intermediate-pages',
action='store_true', dest='dump_intermediate_pages', default=False,
- help='Print downloaded pages to debug problems (very verbose)')
+ help='Print downloaded pages encoded using base64 to debug problems (very verbose)')
verbosity.add_option(
'--write-pages',
action='store_true', dest='write_pages', default=False,
@@ -713,7 +713,7 @@ def parseOpts(overrideArguments=None):
help='Parse additional metadata like song title / artist from the video title. '
'The format syntax is the same as --output, '
'the parsed parameters replace existing values. '
- 'Additional templates: %(album), %(artist). '
+ 'Additional templates: %(album)s, %(artist)s. '
'Example: --metadata-from-title "%(artist)s - %(title)s" matches a title like '
'"Coldplay - Paradise"')
postproc.add_option(
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index 38f00bc9b..b33385153 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
from __future__ import unicode_literals
-__version__ = '2015.05.15'
+__version__ = '2015.05.20'