aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--AUTHORS1
-rw-r--r--README.md4
-rw-r--r--docs/supportedsites.md9
-rwxr-xr-xyoutube_dl/YoutubeDL.py23
-rw-r--r--youtube_dl/extractor/__init__.py9
-rw-r--r--youtube_dl/extractor/aftonbladet.py11
-rw-r--r--youtube_dl/extractor/crunchyroll.py30
-rw-r--r--youtube_dl/extractor/facebook.py2
-rw-r--r--youtube_dl/extractor/generic.py192
-rw-r--r--youtube_dl/extractor/imgur.py6
-rw-r--r--youtube_dl/extractor/iprima.py16
-rw-r--r--youtube_dl/extractor/nova.py138
-rw-r--r--youtube_dl/extractor/nowtv.py192
-rw-r--r--youtube_dl/extractor/patreon.py2
-rw-r--r--youtube_dl/extractor/porn91.py71
-rw-r--r--youtube_dl/extractor/rtlnow.py174
-rw-r--r--youtube_dl/extractor/senateisvp.py8
-rw-r--r--youtube_dl/extractor/soompi.py146
-rw-r--r--youtube_dl/extractor/spiegeltv.py9
-rw-r--r--youtube_dl/extractor/tf1.py5
-rw-r--r--youtube_dl/extractor/tube8.py14
-rw-r--r--youtube_dl/extractor/tubitv.py84
-rw-r--r--youtube_dl/extractor/tumblr.py17
-rw-r--r--youtube_dl/extractor/tvigle.py39
-rw-r--r--youtube_dl/extractor/twentyfourvideo.py4
-rw-r--r--youtube_dl/extractor/vgtv.py28
-rw-r--r--youtube_dl/extractor/vidme.py9
-rw-r--r--youtube_dl/extractor/youtube.py16
-rw-r--r--youtube_dl/postprocessor/embedthumbnail.py4
-rw-r--r--youtube_dl/version.py2
30 files changed, 925 insertions, 340 deletions
diff --git a/AUTHORS b/AUTHORS
index 3410e1fb9..bf2a25cb8 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -126,3 +126,4 @@ Matthias Küch
Julian Richen
Ping O.
Mister Hat
+Peter Ding
diff --git a/README.md b/README.md
index e51bb5343..f3d83c89f 100644
--- a/README.md
+++ b/README.md
@@ -168,7 +168,7 @@ which means you can modify it, redistribute it or use it however you like.
--no-progress Do not print progress bar
--console-title Display progress in console titlebar
-v, --verbose Print various debugging information
- --dump-pages Print downloaded pages to debug problems (very verbose)
+ --dump-pages Print downloaded pages encoded using base64 to debug problems (very verbose)
--write-pages Write downloaded intermediary pages to files in the current directory to debug problems
--print-traffic Display sent and read HTTP traffic
-C, --call-home Contact the youtube-dl server for debugging
@@ -220,7 +220,7 @@ which means you can modify it, redistribute it or use it however you like.
--embed-thumbnail Embed thumbnail in the audio as cover art
--add-metadata Write metadata to the video file
--metadata-from-title FORMAT Parse additional metadata like song title / artist from the video title. The format syntax is the same as --output, the parsed
- parameters replace existing values. Additional templates: %(album), %(artist). Example: --metadata-from-title "%(artist)s -
+ parameters replace existing values. Additional templates: %(album)s, %(artist)s. Example: --metadata-from-title "%(artist)s -
%(title)s" matches a title like "Coldplay - Paradise"
--xattrs Write metadata to the video file's xattrs (using dublin core and xdg standards)
--fixup POLICY Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn(the default;
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index a4879bd9a..a421ae62b 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -26,8 +26,7 @@
- **anitube.se**
- **AnySex**
- **Aparat**
- - **AppleDailyAnimationNews**
- - **AppleDailyRealtimeNews**
+ - **AppleDaily**
- **AppleTrailers**
- **archive.org**: archive.org videos
- **ARD**
@@ -152,7 +151,6 @@
- **fc2**
- **fernsehkritik.tv**
- **fernsehkritik.tv:postecke**
- - **Firedrive**
- **Firstpost**
- **Flickr**
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
@@ -230,6 +228,7 @@
- **KanalPlay**: Kanal 5/9/11 Play
- **Kankan**
- **Karaoketv**
+ - **KarriereVideos**
- **keek**
- **KeezMovies**
- **KhanAcademy**
@@ -322,6 +321,7 @@
- **NosVideo**
- **novamov**: NovaMov
- **Nowness**
+ - **NowTV**
- **nowvideo**: NowVideo
- **npo.nl**
- **npo.nl:live**
@@ -393,7 +393,6 @@
- **Rte**
- **rtl.nl**: rtl.nl and rtlxl.nl
- **RTL2**
- - **RTLnow**
- **RTP**
- **RTS**: RTS.ch
- **rtve.es:alacarta**: RTVE a la carta
@@ -431,7 +430,6 @@
- **smotri:community**: Smotri.com community videos
- **smotri:user**: Smotri.com user videos
- **Snotr**
- - **Sockshare**
- **Sohu**
- **soundcloud**
- **soundcloud:playlist**
@@ -564,6 +562,7 @@
- **vier:videos**
- **Viewster**
- **viki**
+ - **viki:channel**
- **vimeo**
- **vimeo:album**
- **vimeo:channel**
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index d1953c18f..aa6ec9d9a 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -49,6 +49,7 @@ from .utils import (
ExtractorError,
format_bytes,
formatSeconds,
+ HEADRequest,
locked_file,
make_HTTPS_handler,
MaxDownloadsReached,
@@ -923,8 +924,9 @@ class YoutubeDL(object):
if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
if audiovideo_formats:
return audiovideo_formats[format_idx]
- # for audio only urls, select the best/worst audio format
- elif all(f.get('acodec') != 'none' for f in available_formats):
+ # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
+ elif (all(f.get('acodec') != 'none' for f in available_formats) or
+ all(f.get('vcodec') != 'none' for f in available_formats)):
return available_formats[format_idx]
elif format_spec == 'bestaudio':
audio_formats = [
@@ -1047,6 +1049,8 @@ class YoutubeDL(object):
if not formats:
raise ExtractorError('No video formats found!')
+ formats_dict = {}
+
# We check that all the formats have the format and format_id fields
for i, format in enumerate(formats):
if 'url' not in format:
@@ -1054,6 +1058,18 @@ class YoutubeDL(object):
if format.get('format_id') is None:
format['format_id'] = compat_str(i)
+ format_id = format['format_id']
+ if format_id not in formats_dict:
+ formats_dict[format_id] = []
+ formats_dict[format_id].append(format)
+
+ # Make sure all formats have unique format_id
+ for format_id, ambiguous_formats in formats_dict.items():
+ if len(ambiguous_formats) > 1:
+ for i, format in enumerate(ambiguous_formats):
+ format['format_id'] = '%s-%d' % (format_id, i)
+
+ for i, format in enumerate(formats):
if format.get('format') is None:
format['format'] = '{id} - {res}{note}'.format(
id=format['format_id'],
@@ -1706,7 +1722,8 @@ class YoutubeDL(object):
if req_is_string:
req = url_escaped
else:
- req = compat_urllib_request.Request(
+ req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
+ req = req_type(
url_escaped, data=req.data, headers=req.headers,
origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 85c1b1a3a..be464271a 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -353,8 +353,10 @@ from .ninegag import NineGagIE
from .noco import NocoIE
from .normalboots import NormalbootsIE
from .nosvideo import NosVideoIE
+from .nova import NovaIE
from .novamov import NovaMovIE
from .nowness import NownessIE
+from .nowtv import NowTVIE
from .nowvideo import NowVideoIE
from .npo import (
NPOIE,
@@ -401,6 +403,7 @@ from .playfm import PlayFMIE
from .playvid import PlayvidIE
from .playwire import PlaywireIE
from .podomatic import PodomaticIE
+from .porn91 import Porn91IE
from .pornhd import PornHdIE
from .pornhub import (
PornHubIE,
@@ -438,7 +441,6 @@ from .roxwel import RoxwelIE
from .rtbf import RTBFIE
from .rte import RteIE
from .rtlnl import RtlNlIE
-from .rtlnow import RTLnowIE
from .rtl2 import RTL2IE
from .rtp import RTPIE
from .rts import RTSIE
@@ -481,6 +483,10 @@ from .smotri import (
)
from .snotr import SnotrIE
from .sohu import SohuIE
+from .soompi import (
+ SoompiIE,
+ SoompiShowIE,
+)
from .soundcloud import (
SoundcloudIE,
SoundcloudSetIE,
@@ -566,6 +572,7 @@ from .traileraddict import TrailerAddictIE
from .trilulilu import TriluliluIE
from .trutube import TruTubeIE
from .tube8 import Tube8IE
+from .tubitv import TubiTvIE
from .tudou import TudouIE
from .tumblr import TumblrIE
from .tunein import TuneInIE
diff --git a/youtube_dl/extractor/aftonbladet.py b/youtube_dl/extractor/aftonbladet.py
index a117502bc..e0518cf26 100644
--- a/youtube_dl/extractor/aftonbladet.py
+++ b/youtube_dl/extractor/aftonbladet.py
@@ -6,11 +6,11 @@ from ..utils import int_or_none
class AftonbladetIE(InfoExtractor):
- _VALID_URL = r'http://tv\.aftonbladet\.se/webbtv.+?(?P<id>article[0-9]+)\.ab(?:$|[?#])'
+ _VALID_URL = r'http://tv\.aftonbladet\.se/abtv/articles/(?P<id>[0-9]+)'
_TEST = {
- 'url': 'http://tv.aftonbladet.se/webbtv/nyheter/vetenskap/rymden/article36015.ab',
+ 'url': 'http://tv.aftonbladet.se/abtv/articles/36015',
'info_dict': {
- 'id': 'article36015',
+ 'id': '36015',
'ext': 'mp4',
'title': 'Vulkanutbrott i rymden - nu släpper NASA bilderna',
'description': 'Jupiters måne mest aktiv av alla himlakroppar',
@@ -25,8 +25,9 @@ class AftonbladetIE(InfoExtractor):
# find internal video meta data
meta_url = 'http://aftonbladet-play.drlib.aptoma.no/video/%s.json'
- internal_meta_id = self._html_search_regex(
- r'data-aptomaId="([\w\d]+)"', webpage, 'internal_meta_id')
+ player_config = self._parse_json(self._html_search_regex(
+ r'data-player-config="([^"]+)"', webpage, 'player config'), video_id)
+ internal_meta_id = player_config['videoId']
internal_meta_url = meta_url % internal_meta_id
internal_meta_json = self._download_json(
internal_meta_url, video_id, 'Downloading video meta data')
diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py
index 1c77df47e..41f0c736d 100644
--- a/youtube_dl/extractor/crunchyroll.py
+++ b/youtube_dl/extractor/crunchyroll.py
@@ -76,8 +76,8 @@ class CrunchyrollIE(InfoExtractor):
self._login()
def _decrypt_subtitles(self, data, iv, id):
- data = bytes_to_intlist(data)
- iv = bytes_to_intlist(iv)
+ data = bytes_to_intlist(base64.b64decode(data.encode('utf-8')))
+ iv = bytes_to_intlist(base64.b64decode(iv.encode('utf-8')))
id = int(id)
def obfuscate_key_aux(count, modulo, start):
@@ -179,6 +179,16 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
return output
+ def _extract_subtitles(self, subtitle):
+ sub_root = xml.etree.ElementTree.fromstring(subtitle)
+ return [{
+ 'ext': 'srt',
+ 'data': self._convert_subtitles_to_srt(sub_root),
+ }, {
+ 'ext': 'ass',
+ 'data': self._convert_subtitles_to_ass(sub_root),
+ }]
+
def _get_subtitles(self, video_id, webpage):
subtitles = {}
for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
@@ -190,25 +200,11 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False)
if not id or not iv or not data:
continue
- id = int(id)
- iv = base64.b64decode(iv)
- data = base64.b64decode(data)
-
subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
if not lang_code:
continue
- sub_root = xml.etree.ElementTree.fromstring(subtitle)
- subtitles[lang_code] = [
- {
- 'ext': 'srt',
- 'data': self._convert_subtitles_to_srt(sub_root),
- },
- {
- 'ext': 'ass',
- 'data': self._convert_subtitles_to_ass(sub_root),
- },
- ]
+ subtitles[lang_code] = self._extract_subtitles(subtitle)
return subtitles
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py
index e8d682716..82dc27bc6 100644
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@@ -152,7 +152,7 @@ class FacebookIE(InfoExtractor):
raise ExtractorError('Cannot find video formats')
video_title = self._html_search_regex(
- r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, 'title',
+ r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>([^<]*)</h2>', webpage, 'title',
default=None)
if not video_title:
video_title = self._html_search_regex(
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 9a7b0d25d..96ca398de 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -9,6 +9,8 @@ from .common import InfoExtractor
from .youtube import YoutubeIE
from ..compat import (
compat_urllib_parse,
+ compat_urllib_parse_unquote,
+ compat_urllib_request,
compat_urlparse,
compat_xml_parse_error,
)
@@ -46,6 +48,97 @@ class GenericIE(InfoExtractor):
_VALID_URL = r'.*'
IE_NAME = 'generic'
_TESTS = [
+ # Direct link to a video
+ {
+ 'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
+ 'md5': '67d406c2bcb6af27fa886f31aa934bbe',
+ 'info_dict': {
+ 'id': 'trailer',
+ 'ext': 'mp4',
+ 'title': 'trailer',
+ 'upload_date': '20100513',
+ }
+ },
+ # Direct link to media delivered compressed (until Accept-Encoding is *)
+ {
+ 'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
+ 'md5': '128c42e68b13950268b648275386fc74',
+ 'info_dict': {
+ 'id': 'FictionJunction-Parallel_Hearts',
+ 'ext': 'flac',
+ 'title': 'FictionJunction-Parallel_Hearts',
+ 'upload_date': '20140522',
+ },
+ 'expected_warnings': [
+ 'URL could be a direct video link, returning it as such.'
+ ]
+ },
+ # Direct download with broken HEAD
+ {
+ 'url': 'http://ai-radio.org:8000/radio.opus',
+ 'info_dict': {
+ 'id': 'radio',
+ 'ext': 'opus',
+ 'title': 'radio',
+ },
+ 'params': {
+ 'skip_download': True, # infinite live stream
+ },
+ 'expected_warnings': [
+ r'501.*Not Implemented'
+ ],
+ },
+ # Direct link with incorrect MIME type
+ {
+ 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
+ 'md5': '4ccbebe5f36706d85221f204d7eb5913',
+ 'info_dict': {
+ 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
+ 'id': '5_Lennart_Poettering_-_Systemd',
+ 'ext': 'webm',
+ 'title': '5_Lennart_Poettering_-_Systemd',
+ 'upload_date': '20141120',
+ },
+ 'expected_warnings': [
+ 'URL could be a direct video link, returning it as such.'
+ ]
+ },
+ # RSS feed
+ {
+ 'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
+ 'info_dict': {
+ 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
+ 'title': 'Zero Punctuation',
+ 'description': 're:.*groundbreaking video review series.*'
+ },
+ 'playlist_mincount': 11,
+ },
+ # RSS feed with enclosure
+ {
+ 'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
+ 'info_dict': {
+ 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
+ 'ext': 'm4v',
+ 'upload_date': '20150228',
+ 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
+ }
+ },
+ # google redirect
+ {
+ 'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
+ 'info_dict': {
+ 'id': 'cmQHVoWB5FY',
+ 'ext': 'mp4',
+ 'upload_date': '20130224',
+ 'uploader_id': 'TheVerge',
+ 'description': 're:^Chris Ziegler takes a look at the\.*',
+ 'uploader': 'The Verge',
+ 'title': 'First Firefox OS phones side-by-side',
+ },
+ 'params': {
+ 'skip_download': False,
+ }
+ },
{
'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
@@ -125,17 +218,6 @@ class GenericIE(InfoExtractor):
'skip_download': True, # m3u8 download
},
},
- # Direct link to a video
- {
- 'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
- 'md5': '67d406c2bcb6af27fa886f31aa934bbe',
- 'info_dict': {
- 'id': 'trailer',
- 'ext': 'mp4',
- 'title': 'trailer',
- 'upload_date': '20100513',
- }
- },
# ooyala video
{
'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
@@ -160,22 +242,6 @@ class GenericIE(InfoExtractor):
},
'add_ie': ['Ooyala'],
},
- # google redirect
- {
- 'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
- 'info_dict': {
- 'id': 'cmQHVoWB5FY',
- 'ext': 'mp4',
- 'upload_date': '20130224',
- 'uploader_id': 'TheVerge',
- 'description': 're:^Chris Ziegler takes a look at the\.*',
- 'uploader': 'The Verge',
- 'title': 'First Firefox OS phones side-by-side',
- },
- 'params': {
- 'skip_download': False,
- }
- },
# embed.ly video
{
'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
@@ -407,16 +473,6 @@ class GenericIE(InfoExtractor):
'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
}
},
- # RSS feed
- {
- 'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
- 'info_dict': {
- 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
- 'title': 'Zero Punctuation',
- 'description': 're:.*groundbreaking video review series.*'
- },
- 'playlist_mincount': 11,
- },
# Multiple brightcove videos
# https://github.com/rg3/youtube-dl/issues/2283
{
@@ -470,21 +526,6 @@ class GenericIE(InfoExtractor):
'uploader': 'thoughtworks.wistia.com',
},
},
- # Direct download with broken HEAD
- {
- 'url': 'http://ai-radio.org:8000/radio.opus',
- 'info_dict': {
- 'id': 'radio',
- 'ext': 'opus',
- 'title': 'radio',
- },
- 'params': {
- 'skip_download': True, # infinite live stream
- },
- 'expected_warnings': [
- r'501.*Not Implemented'
- ],
- },
# Soundcloud embed
{
'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
@@ -516,21 +557,6 @@ class GenericIE(InfoExtractor):
},
'playlist_mincount': 2,
},
- # Direct link with incorrect MIME type
- {
- 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
- 'md5': '4ccbebe5f36706d85221f204d7eb5913',
- 'info_dict': {
- 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
- 'id': '5_Lennart_Poettering_-_Systemd',
- 'ext': 'webm',
- 'title': '5_Lennart_Poettering_-_Systemd',
- 'upload_date': '20141120',
- },
- 'expected_warnings': [
- 'URL could be a direct video link, returning it as such.'
- ]
- },
# Cinchcast embed
{
'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
@@ -689,16 +715,6 @@ class GenericIE(InfoExtractor):
'age_limit': 0,
},
},
- # RSS feed with enclosure
- {
- 'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
- 'info_dict': {
- 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
- 'ext': 'm4v',
- 'upload_date': '20150228',
- 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
- }
- },
# Crooks and Liars embed
{
'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
@@ -894,7 +910,7 @@ class GenericIE(InfoExtractor):
force_videoid = smuggled_data['force_videoid']
video_id = force_videoid
else:
- video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0]
+ video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
self.to_screen('%s: Requesting header' % video_id)
@@ -916,7 +932,9 @@ class GenericIE(InfoExtractor):
full_response = None
if head_response is False:
- full_response = self._request_webpage(url, video_id)
+ request = compat_urllib_request.Request(url)
+ request.add_header('Accept-Encoding', '*')
+ full_response = self._request_webpage(request, video_id)
head_response = full_response
# Check for direct link to a video
@@ -927,7 +945,7 @@ class GenericIE(InfoExtractor):
head_response.headers.get('Last-Modified'))
return {
'id': video_id,
- 'title': os.path.splitext(url_basename(url))[0],
+ 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
'direct': True,
'formats': [{
'format_id': m.group('format_id'),
@@ -941,7 +959,17 @@ class GenericIE(InfoExtractor):
self._downloader.report_warning('Falling back on generic information extractor.')
if not full_response:
- full_response = self._request_webpage(url, video_id)
+ request = compat_urllib_request.Request(url)
+ # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
+ # making it impossible to download only chunk of the file (yet we need only 512kB to
+ # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
+ # that will always result in downloading the whole file that is not desirable.
+ # Therefore for extraction pass we have to override Accept-Encoding to any in order
+ # to accept raw bytes and being able to download only a chunk.
+ # It may probably better to solve this by checking Content-Type for application/octet-stream
+ # after HEAD request finishes, but not sure if we can rely on this.
+ request.add_header('Accept-Encoding', '*')
+ full_response = self._request_webpage(request, video_id)
# Maybe it's a direct link to a video?
# Be careful not to download the whole thing!
@@ -953,7 +981,7 @@ class GenericIE(InfoExtractor):
head_response.headers.get('Last-Modified'))
return {
'id': video_id,
- 'title': os.path.splitext(url_basename(url))[0],
+ 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
'direct': True,
'url': url,
'upload_date': upload_date,
diff --git a/youtube_dl/extractor/imgur.py b/youtube_dl/extractor/imgur.py
index fe5d95e2c..d692ea79a 100644
--- a/youtube_dl/extractor/imgur.py
+++ b/youtube_dl/extractor/imgur.py
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
+from ..compat import compat_urlparse
from ..utils import (
int_or_none,
js_to_json,
@@ -12,7 +13,7 @@ from ..utils import (
class ImgurIE(InfoExtractor):
- _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?P<id>[a-zA-Z0-9]+)(?:\.mp4|\.gifv)?'
+ _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?P<id>[a-zA-Z0-9]+)'
_TESTS = [{
'url': 'https://i.imgur.com/A61SaA1.gifv',
@@ -34,7 +35,8 @@ class ImgurIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
+ webpage = self._download_webpage(
+ compat_urlparse.urljoin(url, video_id), video_id)
width = int_or_none(self._search_regex(
r'<param name="width" value="([0-9]+)"',
diff --git a/youtube_dl/extractor/iprima.py b/youtube_dl/extractor/iprima.py
index 8529bedfc..821c8ec10 100644
--- a/youtube_dl/extractor/iprima.py
+++ b/youtube_dl/extractor/iprima.py
@@ -11,11 +11,12 @@ from ..compat import (
)
from ..utils import (
ExtractorError,
+ remove_end,
)
class IPrimaIE(InfoExtractor):
- _VALID_URL = r'https?://play\.iprima\.cz/[^?#]+/(?P<id>[^?#]+)'
+ _VALID_URL = r'https?://play\.iprima\.cz/(?:[^/]+/)*(?P<id>[^?#]+)'
_TESTS = [{
'url': 'http://play.iprima.cz/particka/particka-92',
@@ -23,7 +24,7 @@ class IPrimaIE(InfoExtractor):
'id': '39152',
'ext': 'flv',
'title': 'Partička (92)',
- 'description': 'md5:3740fda51464da35a2d4d0670b8e4fd6',
+ 'description': 'md5:74e9617e51bca67c3ecfb2c6f9766f45',
'thumbnail': 'http://play.iprima.cz/sites/default/files/image_crops/image_620x349/3/491483_particka-92_image_620x349.jpg',
},
'params': {
@@ -35,13 +36,14 @@ class IPrimaIE(InfoExtractor):
'id': '9718337',
'ext': 'flv',
'title': 'Tchibo Partička - Jarní móda',
- 'description': 'md5:589f8f59f414220621ff8882eb3ce7be',
'thumbnail': 're:^http:.*\.jpg$',
},
'params': {
'skip_download': True, # requires rtmpdump
},
- 'skip': 'Do not have permission to access this page',
+ }, {
+ 'url': 'http://play.iprima.cz/zpravy-ftv-prima-2752015',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -102,8 +104,10 @@ class IPrimaIE(InfoExtractor):
return {
'id': real_id,
- 'title': self._og_search_title(webpage),
+ 'title': remove_end(self._og_search_title(webpage), ' | Prima PLAY'),
'thumbnail': self._og_search_thumbnail(webpage),
'formats': formats,
- 'description': self._og_search_description(webpage),
+ 'description': self._search_regex(
+ r'<p[^>]+itemprop="description"[^>]*>([^<]+)',
+ webpage, 'description', default=None),
}
diff --git a/youtube_dl/extractor/nova.py b/youtube_dl/extractor/nova.py
new file mode 100644
index 000000000..4e999b237
--- /dev/null
+++ b/youtube_dl/extractor/nova.py
@@ -0,0 +1,138 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class NovaIE(InfoExtractor):
+ IE_DESC = 'TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz'
+ _VALID_URL = 'http://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+)(?:\.html|/?)'
+ _TESTS = [{
+ 'url': 'http://tvnoviny.nova.cz/clanek/novinky/co-na-sebe-sportaci-praskli-vime-jestli-pujde-hrdlicka-na-materskou.html',
+ 'info_dict': {
+ 'id': '1608920',
+ 'display_id': 'co-na-sebe-sportaci-praskli-vime-jestli-pujde-hrdlicka-na-materskou',
+ 'ext': 'flv',
+ 'title': 'Duel: Michal Hrdlička a Petr Suchoň',
+ 'description': 'md5:d0cc509858eee1b1374111c588c6f5d5',
+ 'thumbnail': 're:^https?://.*\.(?:jpg)',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'http://tn.nova.cz/clanek/tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci.html',
+ 'md5': '1dd7b9d5ea27bc361f110cd855a19bd3',
+ 'info_dict': {
+ 'id': '1757139',
+ 'display_id': 'tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci',
+ 'ext': 'mp4',
+ 'title': 'Podzemní nemocnice v pražské Krči',
+ 'description': 'md5:f0a42dd239c26f61c28f19e62d20ef53',
+ 'thumbnail': 're:^https?://.*\.(?:jpg)',
+ }
+ }, {
+ 'url': 'http://novaplus.nova.cz/porad/policie-modrava/video/5591-policie-modrava-15-dil-blondynka-na-hrbitove/',
+ 'info_dict': {
+ 'id': '1756825',
+ 'display_id': '5591-policie-modrava-15-dil-blondynka-na-hrbitove',
+ 'ext': 'flv',
+ 'title': 'Policie Modrava - 15. díl - Blondýnka na hřbitově',
+ 'description': 'md5:d804ba6b30bc7da2705b1fea961bddfe',
+ 'thumbnail': 're:^https?://.*\.(?:jpg)',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'http://sport.tn.nova.cz/clanek/sport/hokej/nhl/zivot-jde-dal-hodnotil-po-vyrazeni-z-playoff-jiri-sekac.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://fanda.nova.cz/clanek/fun-and-games/krvavy-epos-zaklinac-3-divoky-hon-vychazi-vyhrajte-ho-pro-sebe.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://doma.nova.cz/clanek/zdravi/prijdte-se-zapsat-do-registru-kostni-drene-jiz-ve-stredu-3-cervna.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://prask.nova.cz/clanek/novinky/co-si-na-sobe-nase-hvezdy-nechaly-pojistit.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://tv.nova.cz/clanek/novinky/zivot-je-zivot-bondovsky-trailer.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ display_id = mobj.group('id')
+ site = mobj.group('site')
+
+ webpage = self._download_webpage(url, display_id)
+
+ video_id = self._search_regex(
+ [r"(?:media|video_id)\s*:\s*'(\d+)'",
+ r'media=(\d+)',
+ r'id="article_video_(\d+)"',
+ r'id="player_(\d+)"'],
+ webpage, 'video id')
+
+ config_url = self._search_regex(
+ r'src="(http://tn\.nova\.cz/bin/player/videojs/config\.php\?[^"]+)"',
+ webpage, 'config url', default=None)
+
+ if not config_url:
+ DEFAULT_SITE_ID = '23000'
+ SITES = {
+ 'tvnoviny': DEFAULT_SITE_ID,
+ 'novaplus': DEFAULT_SITE_ID,
+ 'vymena': DEFAULT_SITE_ID,
+ 'krasna': DEFAULT_SITE_ID,
+ 'fanda': '30',
+ 'tn': '30',
+ 'doma': '30',
+ }
+
+ site_id = self._search_regex(
+ r'site=(\d+)', webpage, 'site id', default=None) or SITES.get(site, DEFAULT_SITE_ID)
+
+ config_url = ('http://tn.nova.cz/bin/player/videojs/config.php?site=%s&media=%s&jsVar=vjsconfig'
+ % (site_id, video_id))
+
+ config = self._download_json(
+ config_url, display_id,
+ 'Downloading config JSON',
+ transform_source=lambda s: re.sub(r'var\s+[\da-zA-Z_]+\s*=\s*({.+?});', r'\1', s))
+
+ mediafile = config['mediafile']
+ video_url = mediafile['src']
+
+ m = re.search(r'^(?P<url>rtmpe?://[^/]+/(?P<app>[^/]+?))/&*(?P<playpath>.+)$', video_url)
+ if m:
+ formats = [{
+ 'url': m.group('url'),
+ 'app': m.group('app'),
+ 'play_path': m.group('playpath'),
+ 'player_path': 'http://tvnoviny.nova.cz/static/shared/app/videojs/video-js.swf',
+ 'ext': 'flv',
+ }]
+ else:
+ formats = [{
+ 'url': video_url,
+ }]
+ self._sort_formats(formats)
+
+ title = mediafile.get('meta', {}).get('title') or self._og_search_title(webpage)
+ description = self._og_search_description(webpage)
+ thumbnail = config.get('poster')
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/nowtv.py b/youtube_dl/extractor/nowtv.py
new file mode 100644
index 000000000..173e46cd8
--- /dev/null
+++ b/youtube_dl/extractor/nowtv.py
@@ -0,0 +1,192 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ parse_iso8601,
+ parse_duration,
+ remove_start,
+)
+
+
+class NowTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?nowtv\.de/(?P<station>rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<id>.+?)/player'
+
+ _TESTS = [{
+ # rtl
+ 'url': 'http://www.nowtv.de/rtl/bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit/player',
+ 'info_dict': {
+ 'id': '203519',
+ 'display_id': 'bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit',
+ 'ext': 'mp4',
+ 'title': 'Die neuen Bauern und eine Hochzeit',
+ 'description': 'md5:e234e1ed6d63cf06be5c070442612e7e',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'timestamp': 1432580700,
+ 'upload_date': '20150525',
+ 'duration': 2786,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ # rtl2
+ 'url': 'http://www.nowtv.de/rtl2/berlin-tag-nacht/berlin-tag-nacht-folge-934/player',
+ 'info_dict': {
+ 'id': '203481',
+ 'display_id': 'berlin-tag-nacht/berlin-tag-nacht-folge-934',
+ 'ext': 'mp4',
+ 'title': 'Berlin - Tag & Nacht (Folge 934)',
+ 'description': 'md5:c85e88c2e36c552dfe63433bc9506dd0',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'timestamp': 1432666800,
+ 'upload_date': '20150526',
+ 'duration': 2641,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ # rtlnitro
+ 'url': 'http://www.nowtv.de/rtlnitro/alarm-fuer-cobra-11-die-autobahnpolizei/hals-und-beinbruch-2014-08-23-21-10-00/player',
+ 'info_dict': {
+ 'id': '165780',
+ 'display_id': 'alarm-fuer-cobra-11-die-autobahnpolizei/hals-und-beinbruch-2014-08-23-21-10-00',
+ 'ext': 'mp4',
+ 'title': 'Hals- und Beinbruch',
+ 'description': 'md5:b50d248efffe244e6f56737f0911ca57',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'timestamp': 1432415400,
+ 'upload_date': '20150523',
+ 'duration': 2742,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ # superrtl
+ 'url': 'http://www.nowtv.de/superrtl/medicopter-117/angst/player',
+ 'info_dict': {
+ 'id': '99205',
+ 'display_id': 'medicopter-117/angst',
+ 'ext': 'mp4',
+ 'title': 'Angst!',
+ 'description': 'md5:30cbc4c0b73ec98bcd73c9f2a8c17c4e',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'timestamp': 1222632900,
+ 'upload_date': '20080928',
+ 'duration': 3025,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ # ntv
+ 'url': 'http://www.nowtv.de/ntv/ratgeber-geld/thema-ua-der-erste-blick-die-apple-watch/player',
+ 'info_dict': {
+ 'id': '203521',
+ 'display_id': 'ratgeber-geld/thema-ua-der-erste-blick-die-apple-watch',
+ 'ext': 'mp4',
+ 'title': 'Thema u.a.: Der erste Blick: Die Apple Watch',
+ 'description': 'md5:4312b6c9d839ffe7d8caf03865a531af',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'timestamp': 1432751700,
+ 'upload_date': '20150527',
+ 'duration': 1083,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ # vox
+ 'url': 'http://www.nowtv.de/vox/der-hundeprofi/buero-fall-chihuahua-joel/player',
+ 'info_dict': {
+ 'id': '128953',
+ 'display_id': 'der-hundeprofi/buero-fall-chihuahua-joel',
+ 'ext': 'mp4',
+ 'title': "Büro-Fall / Chihuahua 'Joel'",
+ 'description': 'md5:e62cb6bf7c3cc669179d4f1eb279ad8d',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'timestamp': 1432408200,
+ 'upload_date': '20150523',
+ 'duration': 3092,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ display_id = mobj.group('id')
+ station = mobj.group('station')
+
+ info = self._download_json(
+ 'https://api.nowtv.de/v3/movies/%s?fields=*,format,files' % display_id,
+ display_id)
+
+ video_id = compat_str(info['id'])
+
+ files = info['files']
+ if not files:
+ if info.get('geoblocked', False):
+ raise ExtractorError(
+ 'Video %s is not available from your location due to geo restriction' % video_id,
+ expected=True)
+ if not info.get('free', True):
+ raise ExtractorError(
+ 'Video %s is not available for free' % video_id, expected=True)
+
+ f = info.get('format', {})
+ station = f.get('station') or station
+
+ STATIONS = {
+ 'rtl': 'rtlnow',
+ 'rtl2': 'rtl2now',
+ 'vox': 'voxnow',
+ 'nitro': 'rtlnitronow',
+ 'ntv': 'n-tvnow',
+ 'superrtl': 'superrtlnow'
+ }
+
+ formats = []
+ for item in files['items']:
+ item_path = remove_start(item['path'], '/')
+ tbr = int_or_none(item['bitrate'])
+ m3u8_url = 'http://hls.fra.%s.de/hls-vod-enc/%s.m3u8' % (STATIONS[station], item_path)
+ m3u8_url = m3u8_url.replace('now/', 'now/videos/')
+ formats.append({
+ 'url': m3u8_url,
+ 'format_id': '%s-%sk' % (item['id'], tbr),
+ 'ext': 'mp4',
+ 'tbr': tbr,
+ })
+ self._sort_formats(formats)
+
+ title = info['title']
+ description = info.get('articleLong') or info.get('articleShort')
+ timestamp = parse_iso8601(info.get('broadcastStartDate'), ' ')
+ duration = parse_duration(info.get('duration'))
+ thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo')
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/patreon.py b/youtube_dl/extractor/patreon.py
index f179ea200..6cdc2638b 100644
--- a/youtube_dl/extractor/patreon.py
+++ b/youtube_dl/extractor/patreon.py
@@ -87,7 +87,7 @@ class PatreonIE(InfoExtractor):
r'<div class="attach"><a target="_blank" href="([^"]+)">',
webpage, 'attachment URL', default=None)
embed = self._html_search_regex(
- r'<div id="watchCreation">\s*<iframe class="embedly-embed" src="([^"]+)"',
+ r'<div[^>]+id="watchCreation"[^>]*>\s*<iframe[^>]+src="([^"]+)"',
webpage, 'embedded URL', default=None)
if attach_fn is not None:
diff --git a/youtube_dl/extractor/porn91.py b/youtube_dl/extractor/porn91.py
new file mode 100644
index 000000000..72d1b2718
--- /dev/null
+++ b/youtube_dl/extractor/porn91.py
@@ -0,0 +1,71 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+from ..compat import compat_urllib_parse
+from .common import InfoExtractor
+from ..utils import (
+ parse_duration,
+ int_or_none,
+ ExtractorError,
+)
+
+
+class Porn91IE(InfoExtractor):
+ IE_NAME = '91porn'
+ _VALID_URL = r'(?:https?://)(?:www\.|)91porn\.com/.+?\?viewkey=(?P<id>[\w\d]+)'
+
+ _TEST = {
+ 'url': 'http://91porn.com/view_video.php?viewkey=7e42283b4f5ab36da134',
+ 'md5': '6df8f6d028bc8b14f5dbd73af742fb20',
+ 'info_dict': {
+ 'id': '7e42283b4f5ab36da134',
+ 'title': '18岁大一漂亮学妹,水嫩性感,再爽一次!',
+ 'ext': 'mp4',
+ 'duration': 431,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ url = 'http://91porn.com/view_video.php?viewkey=%s' % video_id
+ self._set_cookie('91porn.com', 'language', 'cn_CN')
+ webpage = self._download_webpage(url, video_id, 'get HTML content')
+
+ if '作为游客,你每天只可观看10个视频' in webpage:
+ raise ExtractorError('91 Porn says: Daily limit 10 videos exceeded', expected=True)
+
+ title = self._search_regex(
+ r'<div id="viewvideo-title">([^<]+)</div>', webpage, 'title')
+ title = title.replace('\n', '')
+
+ # get real url
+ file_id = self._search_regex(
+ r'so.addVariable\(\'file\',\'(\d+)\'', webpage, 'file id')
+ sec_code = self._search_regex(
+ r'so.addVariable\(\'seccode\',\'([^\']+)\'', webpage, 'sec code')
+ max_vid = self._search_regex(
+ r'so.addVariable\(\'max_vid\',\'(\d+)\'', webpage, 'max vid')
+ url_params = compat_urllib_parse.urlencode({
+ 'VID': file_id,
+ 'mp4': '1',
+ 'seccode': sec_code,
+ 'max_vid': max_vid,
+ })
+ info_cn = self._download_webpage(
+ 'http://91porn.com/getfile.php?' + url_params, video_id,
+ 'get real video url')
+ video_url = self._search_regex(r'file=([^&]+)&', info_cn, 'url')
+
+ duration = parse_duration(self._search_regex(
+ r'时长:\s*</span>\s*(\d+:\d+)', webpage, 'duration', fatal=False))
+
+ comment_count = int_or_none(self._search_regex(
+ r'留言:\s*</span>\s*(\d+)', webpage, 'comment count', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'url': video_url,
+ 'duration': duration,
+ 'comment_count': comment_count,
+ }
diff --git a/youtube_dl/extractor/rtlnow.py b/youtube_dl/extractor/rtlnow.py
deleted file mode 100644
index 785a8045e..000000000
--- a/youtube_dl/extractor/rtlnow.py
+++ /dev/null
@@ -1,174 +0,0 @@
-# encoding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- clean_html,
- unified_strdate,
- int_or_none,
-)
-
-
-class RTLnowIE(InfoExtractor):
- """Information Extractor for RTL NOW, RTL2 NOW, RTL NITRO, SUPER RTL NOW, VOX NOW and n-tv NOW"""
- _VALID_URL = r'''(?x)
- (?:https?://)?
- (?P<url>
- (?P<domain>
- rtl-now\.rtl\.de|
- rtl2now\.rtl2\.de|
- (?:www\.)?voxnow\.de|
- (?:www\.)?rtlnitronow\.de|
- (?:www\.)?superrtlnow\.de|
- (?:www\.)?n-tvnow\.de)
- /+[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?
- (?:container_id|film_id)=(?P<video_id>[0-9]+)&
- player=1(?:&season=[0-9]+)?(?:&.*)?
- )'''
-
- _TESTS = [
- {
- 'url': 'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1',
- 'info_dict': {
- 'id': '90419',
- 'ext': 'flv',
- 'title': 'Ahornallee - Folge 1 - Der Einzug',
- 'description': 'md5:ce843b6b5901d9a7f7d04d1bbcdb12de',
- 'upload_date': '20070416',
- 'duration': 1685,
- },
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'Only works from Germany',
- },
- {
- 'url': 'http://rtl2now.rtl2.de/aerger-im-revier/episode-15-teil-1.php?film_id=69756&player=1&season=2&index=5',
- 'info_dict': {
- 'id': '69756',
- 'ext': 'flv',
- 'title': 'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit u.a.',
- 'description': 'md5:3fb247005ed21a935ffc82b7dfa70cf0',
- 'thumbnail': 'http://autoimg.static-fra.de/rtl2now/219850/1500x1500/image2.jpg',
- 'upload_date': '20120519',
- 'duration': 1245,
- },
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'Only works from Germany',
- },
- {
- 'url': 'http://www.voxnow.de/voxtours/suedafrika-reporter-ii.php?film_id=13883&player=1&season=17',
- 'info_dict': {
- 'id': '13883',
- 'ext': 'flv',
- 'title': 'Voxtours - Südafrika-Reporter II',
- 'description': 'md5:de7f8d56be6fd4fed10f10f57786db00',
- 'upload_date': '20090627',
- 'duration': 1800,
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- 'url': 'http://superrtlnow.de/medicopter-117/angst.php?film_id=99205&player=1',
- 'info_dict': {
- 'id': '99205',
- 'ext': 'flv',
- 'title': 'Medicopter 117 - Angst!',
- 'description': 're:^Im Therapiezentrum \'Sonnalm\' kommen durch eine Unachtsamkeit die für die B.handlung mit Phobikern gehaltenen Voglespinnen frei\. Eine Ausreißerin',
- 'thumbnail': 'http://autoimg.static-fra.de/superrtlnow/287529/1500x1500/image2.jpg',
- 'upload_date': '20080928',
- 'duration': 2691,
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- 'url': 'http://rtl-now.rtl.de/der-bachelor/folge-4.php?film_id=188729&player=1&season=5',
- 'info_dict': {
- 'id': '188729',
- 'ext': 'flv',
- 'upload_date': '20150204',
- 'description': 'md5:5e1ce23095e61a79c166d134b683cecc',
- 'title': 'Der Bachelor - Folge 4',
- }
- }, {
- 'url': 'http://www.n-tvnow.de/deluxe-alles-was-spass-macht/thema-ua-luxushotel-fuer-vierbeiner.php?container_id=153819&player=1&season=0',
- 'only_matching': True,
- },
- ]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_page_url = 'http://%s/' % mobj.group('domain')
- video_id = mobj.group('video_id')
-
- webpage = self._download_webpage('http://' + mobj.group('url'), video_id)
-
- mobj = re.search(r'(?s)<div style="margin-left: 20px; font-size: 13px;">(.*?)<div id="playerteaser">', webpage)
- if mobj:
- raise ExtractorError(clean_html(mobj.group(1)), expected=True)
-
- title = self._og_search_title(webpage)
- description = self._og_search_description(webpage)
- thumbnail = self._og_search_thumbnail(webpage, default=None)
-
- upload_date = unified_strdate(self._html_search_meta('uploadDate', webpage, 'upload date'))
-
- mobj = re.search(r'<meta itemprop="duration" content="PT(?P<seconds>\d+)S" />', webpage)
- duration = int(mobj.group('seconds')) if mobj else None
-
- playerdata_url = self._html_search_regex(
- r"'playerdata': '(?P<playerdata_url>[^']+)'", webpage, 'playerdata_url')
-
- playerdata = self._download_xml(playerdata_url, video_id, 'Downloading player data XML')
-
- videoinfo = playerdata.find('./playlist/videoinfo')
-
- formats = []
- for filename in videoinfo.findall('filename'):
- mobj = re.search(r'(?P<url>rtmpe://(?:[^/]+/){2})(?P<play_path>.+)', filename.text)
- if mobj:
- fmt = {
- 'url': mobj.group('url'),
- 'play_path': 'mp4:' + mobj.group('play_path'),
- 'page_url': video_page_url,
- 'player_url': video_page_url + 'includes/vodplayer.swf',
- }
- else:
- mobj = re.search(r'.*/(?P<hoster>[^/]+)/videos/(?P<play_path>.+)\.f4m', filename.text)
- if mobj:
- fmt = {
- 'url': 'rtmpe://fms.rtl.de/' + mobj.group('hoster'),
- 'play_path': 'mp4:' + mobj.group('play_path'),
- 'page_url': url,
- 'player_url': video_page_url + 'includes/vodplayer.swf',
- }
- else:
- fmt = {
- 'url': filename.text,
- }
- fmt.update({
- 'width': int_or_none(filename.get('width')),
- 'height': int_or_none(filename.get('height')),
- 'vbr': int_or_none(filename.get('bitrate')),
- 'ext': 'flv',
- })
- formats.append(fmt)
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'upload_date': upload_date,
- 'duration': duration,
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/senateisvp.py b/youtube_dl/extractor/senateisvp.py
index d3b8a1be4..9c53704ea 100644
--- a/youtube_dl/extractor/senateisvp.py
+++ b/youtube_dl/extractor/senateisvp.py
@@ -48,7 +48,7 @@ class SenateISVPIE(InfoExtractor):
["arch", "", "http://ussenate-f.akamaihd.net/"]
]
_IE_NAME = 'senate.gov'
- _VALID_URL = r'http://www\.senate\.gov/isvp/\?(?P<qs>.+)'
+ _VALID_URL = r'http://www\.senate\.gov/isvp/?\?(?P<qs>.+)'
_TESTS = [{
'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png',
'info_dict': {
@@ -72,12 +72,16 @@ class SenateISVPIE(InfoExtractor):
'ext': 'mp4',
'title': 'Integrated Senate Video Player'
}
+ }, {
+ # From http://www.c-span.org/video/?96791-1
+ 'url': 'http://www.senate.gov/isvp?type=live&comm=banking&filename=banking012715',
+ 'only_matching': True,
}]
@staticmethod
def _search_iframe_url(webpage):
mobj = re.search(
- r"<iframe[^>]+src=['\"](?P<url>http://www\.senate\.gov/isvp/\?[^'\"]+)['\"]",
+ r"<iframe[^>]+src=['\"](?P<url>http://www\.senate\.gov/isvp/?\?[^'\"]+)['\"]",
webpage)
if mobj:
return mobj.group('url')
diff --git a/youtube_dl/extractor/soompi.py b/youtube_dl/extractor/soompi.py
new file mode 100644
index 000000000..5da66ca9e
--- /dev/null
+++ b/youtube_dl/extractor/soompi.py
@@ -0,0 +1,146 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .crunchyroll import CrunchyrollIE
+
+from .common import InfoExtractor
+from ..compat import compat_HTTPError
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ remove_start,
+ xpath_text,
+)
+
+
+class SoompiBaseIE(InfoExtractor):
+ def _get_episodes(self, webpage, episode_filter=None):
+ episodes = self._parse_json(
+ self._search_regex(
+ r'VIDEOS\s*=\s*(\[.+?\]);', webpage, 'episodes JSON'),
+ None)
+ return list(filter(episode_filter, episodes))
+
+
+class SoompiIE(SoompiBaseIE, CrunchyrollIE):
+ IE_NAME = 'soompi'
+ _VALID_URL = r'https?://tv\.soompi\.com/(?:en/)?watch/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://tv.soompi.com/en/watch/29235',
+ 'info_dict': {
+ 'id': '29235',
+ 'ext': 'mp4',
+ 'title': 'Episode 1096',
+ 'description': '2015-05-20'
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ def _get_episode(self, webpage, video_id):
+ return self._get_episodes(webpage, lambda x: x['id'] == video_id)[0]
+
+ def _get_subtitles(self, config, video_id):
+ sub_langs = {}
+ for subtitle in config.findall('./{default}preload/subtitles/subtitle'):
+ sub_langs[subtitle.attrib['id']] = subtitle.attrib['title']
+
+ subtitles = {}
+ for s in config.findall('./{default}preload/subtitle'):
+ lang_code = sub_langs.get(s.attrib['id'])
+ if not lang_code:
+ continue
+ sub_id = s.get('id')
+ data = xpath_text(s, './data', 'data')
+ iv = xpath_text(s, './iv', 'iv')
+ if not id or not iv or not data:
+ continue
+ subtitle = self._decrypt_subtitles(data, iv, sub_id).decode('utf-8')
+ subtitles[lang_code] = self._extract_subtitles(subtitle)
+ return subtitles
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ try:
+ webpage = self._download_webpage(
+ url, video_id, 'Downloading episode page')
+ except ExtractorError as ee:
+ if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
+ webpage = ee.cause.read()
+ block_message = self._html_search_regex(
+ r'(?s)<div class="block-message">(.+?)</div>', webpage,
+ 'block message', default=None)
+ if block_message:
+ raise ExtractorError(block_message, expected=True)
+ raise
+
+ formats = []
+ config = None
+ for format_id in re.findall(r'\?quality=([0-9a-zA-Z]+)', webpage):
+ config = self._download_xml(
+ 'http://tv.soompi.com/en/show/_/%s-config.xml?mode=hls&quality=%s' % (video_id, format_id),
+ video_id, 'Downloading %s XML' % format_id)
+ m3u8_url = xpath_text(
+ config, './{default}preload/stream_info/file',
+ '%s m3u8 URL' % format_id)
+ if not m3u8_url:
+ continue
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', m3u8_id=format_id))
+ self._sort_formats(formats)
+
+ episode = self._get_episode(webpage, video_id)
+
+ title = episode['name']
+ description = episode.get('description')
+ duration = int_or_none(episode.get('duration'))
+
+ thumbnails = [{
+ 'id': thumbnail_id,
+ 'url': thumbnail_url,
+ } for thumbnail_id, thumbnail_url in episode.get('img_url', {}).items()]
+
+ subtitles = self.extract_subtitles(config, video_id)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnails': thumbnails,
+ 'duration': duration,
+ 'formats': formats,
+ 'subtitles': subtitles
+ }
+
+
+class SoompiShowIE(SoompiBaseIE):
+ IE_NAME = 'soompi:show'
+ _VALID_URL = r'https?://tv\.soompi\.com/en/shows/(?P<id>[0-9a-zA-Z\-_]+)'
+ _TESTS = [{
+ 'url': 'http://tv.soompi.com/en/shows/liar-game',
+ 'info_dict': {
+ 'id': 'liar-game',
+ 'title': 'Liar Game',
+ 'description': 'md5:52c02bce0c1a622a95823591d0589b66',
+ },
+ 'playlist_count': 14,
+ }]
+
+ def _real_extract(self, url):
+ show_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ url, show_id, 'Downloading show page')
+
+ title = remove_start(self._og_search_title(webpage), 'SoompiTV | ')
+ description = self._og_search_description(webpage)
+
+ entries = [
+ self.url_result('http://tv.soompi.com/en/watch/%s' % episode['id'], 'Soompi')
+ for episode in self._get_episodes(webpage)]
+
+ return self.playlist_result(entries, show_id, title, description)
diff --git a/youtube_dl/extractor/spiegeltv.py b/youtube_dl/extractor/spiegeltv.py
index 98cf92d89..359722ad6 100644
--- a/youtube_dl/extractor/spiegeltv.py
+++ b/youtube_dl/extractor/spiegeltv.py
@@ -51,9 +51,9 @@ class SpiegeltvIE(InfoExtractor):
is_wide = media_json['is_wide']
server_json = self._download_json(
- 'http://www.spiegel.tv/streaming_servers/', video_id,
- note='Downloading server information')
- server = server_json[0]['endpoint']
+ 'http://spiegeltv-prod-static.s3.amazonaws.com/projectConfigs/projectConfig.json',
+ video_id, note='Downloading server information')
+ server = server_json['streamingserver'][0]['endpoint']
thumbnails = []
for image in media_json['images']:
@@ -76,5 +76,6 @@ class SpiegeltvIE(InfoExtractor):
'ext': 'm4v',
'description': description,
'duration': duration,
- 'thumbnails': thumbnails
+ 'thumbnails': thumbnails,
+ 'rtmp_live': True,
}
diff --git a/youtube_dl/extractor/tf1.py b/youtube_dl/extractor/tf1.py
index 656410528..3a68eaa80 100644
--- a/youtube_dl/extractor/tf1.py
+++ b/youtube_dl/extractor/tf1.py
@@ -6,7 +6,7 @@ from .common import InfoExtractor
class TF1IE(InfoExtractor):
"""TF1 uses the wat.tv player."""
- _VALID_URL = r'http://(?:videos\.tf1|www\.tfou|www\.tf1)\.fr/.*?-(?P<id>\d+)(?:-\d+)?\.html'
+ _VALID_URL = r'http://(?:(?:videos|www|lci)\.tf1|www\.tfou)\.fr/.*?-(?P<id>\d+)(?:-\d+)?\.html'
_TESTS = [{
'url': 'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
'info_dict': {
@@ -35,6 +35,9 @@ class TF1IE(InfoExtractor):
}, {
'url': 'http://www.tf1.fr/tf1/koh-lanta/videos/replay-koh-lanta-22-mai-2015.html',
'only_matching': True,
+ }, {
+ 'url': 'http://lci.tf1.fr/sept-a-huit/videos/sept-a-huit-du-24-mai-2015-8611550.html',
+ 'only_matching': True,
}]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py
index d73ad3762..6ca8840b0 100644
--- a/youtube_dl/extractor/tube8.py
+++ b/youtube_dl/extractor/tube8.py
@@ -47,7 +47,7 @@ class Tube8IE(InfoExtractor):
webpage = self._download_webpage(req, display_id)
flashvars = json.loads(self._html_search_regex(
- r'var flashvars\s*=\s*({.+?})', webpage, 'flashvars'))
+ r'flashvars\s*=\s*({.+?})', webpage, 'flashvars'))
video_url = flashvars['video_url']
if flashvars.get('encrypted') is True:
@@ -58,19 +58,19 @@ class Tube8IE(InfoExtractor):
thumbnail = flashvars.get('image_url')
title = self._html_search_regex(
- r'videotitle\s*=\s*"([^"]+)', webpage, 'title')
+ r'videoTitle\s*=\s*"([^"]+)', webpage, 'title')
description = self._html_search_regex(
- r'>Description:</strong>(.+?)<', webpage, 'description', fatal=False)
+ r'>Description:</strong>\s*(.+?)\s*<', webpage, 'description', fatal=False)
uploader = self._html_search_regex(
- r'<strong class="video-username">(?:<a href="[^"]+">)?([^<]+)(?:</a>)?</strong>',
+ r'<span class="username">\s*(.+?)\s*<',
webpage, 'uploader', fatal=False)
like_count = int_or_none(self._html_search_regex(
- r"rupVar\s*=\s*'(\d+)'", webpage, 'like count', fatal=False))
+ r'rupVar\s*=\s*"(\d+)"', webpage, 'like count', fatal=False))
dislike_count = int_or_none(self._html_search_regex(
- r"rdownVar\s*=\s*'(\d+)'", webpage, 'dislike count', fatal=False))
+ r'rdownVar\s*=\s*"(\d+)"', webpage, 'dislike count', fatal=False))
view_count = self._html_search_regex(
- r'<strong>Views: </strong>([\d,\.]+)</li>', webpage, 'view count', fatal=False)
+ r'<strong>Views: </strong>([\d,\.]+)\s*</li>', webpage, 'view count', fatal=False)
if view_count:
view_count = str_to_int(view_count)
comment_count = self._html_search_regex(
diff --git a/youtube_dl/extractor/tubitv.py b/youtube_dl/extractor/tubitv.py
new file mode 100644
index 000000000..2c4b21807
--- /dev/null
+++ b/youtube_dl/extractor/tubitv.py
@@ -0,0 +1,84 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import codecs
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_urllib_parse,
+ compat_urllib_request
+)
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+)
+
+
+class TubiTvIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?tubitv\.com/video\?id=(?P<id>[0-9]+)'
+ _LOGIN_URL = 'http://tubitv.com/login'
+ _NETRC_MACHINE = 'tubitv'
+ _TEST = {
+ 'url': 'http://tubitv.com/video?id=54411&title=The_Kitchen_Musical_-_EP01',
+ 'info_dict': {
+ 'id': '54411',
+ 'ext': 'mp4',
+ 'title': 'The Kitchen Musical - EP01',
+ 'thumbnail': 're:^https?://.*\.png$',
+ 'description': 'md5:37532716166069b353e8866e71fefae7',
+ 'duration': 2407,
+ },
+ 'params': {
+ 'skip_download': 'HLS download',
+ },
+ }
+
+ def _login(self):
+ (username, password) = self._get_login_info()
+ if username is None:
+ return
+ self.report_login()
+ form_data = {
+ 'username': username,
+ 'password': password,
+ }
+ payload = compat_urllib_parse.urlencode(form_data).encode('utf-8')
+ request = compat_urllib_request.Request(self._LOGIN_URL, payload)
+ request.add_header('Content-Type', 'application/x-www-form-urlencoded')
+ login_page = self._download_webpage(
+ request, None, False, 'Wrong login info')
+ if not re.search(r'id="tubi-logout"', login_page):
+ raise ExtractorError(
+ 'Login failed (invalid username/password)', expected=True)
+
+ def _real_initialize(self):
+ self._login()
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+ if re.search(r"<(?:DIV|div) class='login-required-screen'>", webpage):
+ raise ExtractorError(
+ 'This video requires login, use --username and --password '
+ 'options to provide account credentials.', expected=True)
+
+ title = self._og_search_title(webpage)
+ description = self._og_search_description(webpage)
+ thumbnail = self._og_search_thumbnail(webpage)
+ duration = int_or_none(self._html_search_meta(
+ 'video:duration', webpage, 'duration'))
+
+ apu = self._search_regex(r"apu='([^']+)'", webpage, 'apu')
+ m3u8_url = codecs.decode(apu, 'rot_13')[::-1]
+ formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': thumbnail,
+ 'description': description,
+ 'duration': duration,
+ }
diff --git a/youtube_dl/extractor/tumblr.py b/youtube_dl/extractor/tumblr.py
index 828c808a6..e6218808f 100644
--- a/youtube_dl/extractor/tumblr.py
+++ b/youtube_dl/extractor/tumblr.py
@@ -28,6 +28,17 @@ class TumblrIE(InfoExtractor):
'description': 'md5:dba62ac8639482759c8eb10ce474586a',
'thumbnail': 're:http://.*\.jpg',
}
+ }, {
+ 'url': 'http://naked-yogi.tumblr.com/post/118312946248/naked-smoking-stretching',
+ 'md5': 'de07e5211d60d4f3a2c3df757ea9f6ab',
+ 'info_dict': {
+ 'id': 'Wmur',
+ 'ext': 'mp4',
+ 'title': 'naked smoking & stretching',
+ 'upload_date': '20150506',
+ 'timestamp': 1430931613,
+ },
+ 'add_ie': ['Vidme'],
}]
def _real_extract(self, url):
@@ -38,6 +49,12 @@ class TumblrIE(InfoExtractor):
url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id)
webpage = self._download_webpage(url, video_id)
+ vid_me_embed_url = self._search_regex(
+ r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
+ webpage, 'vid.me embed', default=None)
+ if vid_me_embed_url is not None:
+ return self.url_result(vid_me_embed_url, 'Vidme')
+
iframe_url = self._search_regex(
r'src=\'(https?://www\.tumblr\.com/video/[^\']+)\'',
webpage, 'iframe url')
diff --git a/youtube_dl/extractor/tvigle.py b/youtube_dl/extractor/tvigle.py
index 102362b29..dc3a8334a 100644
--- a/youtube_dl/extractor/tvigle.py
+++ b/youtube_dl/extractor/tvigle.py
@@ -5,7 +5,9 @@ import re
from .common import InfoExtractor
from ..utils import (
+ ExtractorError,
float_or_none,
+ int_or_none,
parse_age_limit,
)
@@ -24,22 +26,24 @@ class TvigleIE(InfoExtractor):
'display_id': 'sokrat',
'ext': 'flv',
'title': 'Сократ',
- 'description': 'md5:a05bd01be310074d5833efc6743be95e',
+ 'description': 'md5:d6b92ffb7217b4b8ebad2e7665253c17',
'duration': 6586,
- 'age_limit': 0,
+ 'age_limit': 12,
},
+ 'skip': 'georestricted',
},
{
'url': 'http://www.tvigle.ru/video/vladimir-vysotskii/vedushchii-teleprogrammy-60-minut-ssha-o-vladimire-vysotskom/',
- 'md5': 'd9012d7c7c598fe7a11d7fb46dc1f574',
+ 'md5': 'e7efe5350dd5011d0de6550b53c3ba7b',
'info_dict': {
'id': '5142516',
- 'ext': 'mp4',
+ 'ext': 'flv',
'title': 'Ведущий телепрограммы «60 минут» (США) о Владимире Высоцком',
'description': 'md5:027f7dc872948f14c96d19b4178428a4',
'duration': 186.080,
'age_limit': 0,
},
+ 'skip': 'georestricted',
}, {
'url': 'https://cloud.tvigle.ru/video/5267604/',
'only_matching': True,
@@ -54,7 +58,7 @@ class TvigleIE(InfoExtractor):
if not video_id:
webpage = self._download_webpage(url, display_id)
video_id = self._html_search_regex(
- r'<li class="video-preview current_playing" id="(\d+)">',
+ r'class="video-preview current_playing" id="(\d+)">',
webpage, 'video id')
video_data = self._download_json(
@@ -62,21 +66,34 @@ class TvigleIE(InfoExtractor):
item = video_data['playlist']['items'][0]
+ videos = item.get('videos')
+
+ error_message = item.get('errorMessage')
+ if not videos and error_message:
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, error_message), expected=True)
+
title = item['title']
- description = item['description']
- thumbnail = item['thumbnail']
+ description = item.get('description')
+ thumbnail = item.get('thumbnail')
duration = float_or_none(item.get('durationMilliseconds'), 1000)
age_limit = parse_age_limit(item.get('ageRestrictions'))
formats = []
for vcodec, fmts in item['videos'].items():
- for quality, video_url in fmts.items():
+ for format_id, video_url in fmts.items():
+ if format_id == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', m3u8_id=vcodec))
+ continue
+ height = self._search_regex(
+ r'^(\d+)[pP]$', format_id, 'height', default=None)
formats.append({
'url': video_url,
- 'format_id': '%s-%s' % (vcodec, quality),
+ 'format_id': '%s-%s' % (vcodec, format_id),
'vcodec': vcodec,
- 'height': int(quality[:-1]),
- 'filesize': item['video_files_size'][vcodec][quality],
+ 'height': int_or_none(height),
+ 'filesize': int_or_none(item.get('video_files_size', {}).get(vcodec, {}).get(format_id)),
})
self._sort_formats(formats)
diff --git a/youtube_dl/extractor/twentyfourvideo.py b/youtube_dl/extractor/twentyfourvideo.py
index 67e8bfea0..c1ee1decc 100644
--- a/youtube_dl/extractor/twentyfourvideo.py
+++ b/youtube_dl/extractor/twentyfourvideo.py
@@ -15,7 +15,7 @@ class TwentyFourVideoIE(InfoExtractor):
_TESTS = [
{
'url': 'http://www.24video.net/video/view/1044982',
- 'md5': '48dd7646775690a80447a8dca6a2df76',
+ 'md5': 'd041af8b5b4246ea466226a0d6693345',
'info_dict': {
'id': '1044982',
'ext': 'mp4',
@@ -54,7 +54,7 @@ class TwentyFourVideoIE(InfoExtractor):
webpage, 'upload date'))
uploader = self._html_search_regex(
- r'Загрузил\s*<a href="/jsecUser/movies/[^"]+" class="link">([^<]+)</a>',
+ r'class="video-uploaded"[^>]*>\s*<a href="/jsecUser/movies/[^"]+"[^>]*>([^<]+)</a>',
webpage, 'uploader', fatal=False)
view_count = int_or_none(self._html_search_regex(
diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py
index e6ee1e471..f38a72fde 100644
--- a/youtube_dl/extractor/vgtv.py
+++ b/youtube_dl/extractor/vgtv.py
@@ -4,7 +4,10 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..utils import float_or_none
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+)
class VGTVIE(InfoExtractor):
@@ -59,16 +62,16 @@ class VGTVIE(InfoExtractor):
},
{
# streamType: live
- 'url': 'http://www.vgtv.no/#!/live/100015/direkte-her-kan-du-se-laksen-live-fra-suldalslaagen',
+ 'url': 'http://www.vgtv.no/#!/live/113063/direkte-v75-fra-solvalla',
'info_dict': {
- 'id': '100015',
+ 'id': '113063',
'ext': 'flv',
- 'title': 'DIREKTE: Her kan du se laksen live fra Suldalslågen!',
- 'description': 'md5:9a60cc23fa349f761628924e56eeec2d',
+ 'title': 're:^DIREKTE: V75 fra Solvalla [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'description': 'md5:b3743425765355855f88e096acc93231',
'thumbnail': 're:^https?://.*\.jpg',
'duration': 0,
- 'timestamp': 1407423348,
- 'upload_date': '20140807',
+ 'timestamp': 1432975582,
+ 'upload_date': '20150530',
'view_count': int,
},
'params': {
@@ -97,7 +100,12 @@ class VGTVIE(InfoExtractor):
% (host, video_id, HOST_WEBSITES[host]),
video_id, 'Downloading media JSON')
+ if data.get('status') == 'inactive':
+ raise ExtractorError(
+ 'Video %s is no longer available' % video_id, expected=True)
+
streams = data['streamUrls']
+ stream_type = data.get('streamType')
formats = []
@@ -107,7 +115,8 @@ class VGTVIE(InfoExtractor):
hls_url, video_id, 'mp4', m3u8_id='hls'))
hds_url = streams.get('hds')
- if hds_url:
+ # wasLive hds are always 404
+ if hds_url and stream_type != 'wasLive':
formats.extend(self._extract_f4m_formats(
hds_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
video_id, f4m_id='hds'))
@@ -135,13 +144,14 @@ class VGTVIE(InfoExtractor):
return {
'id': video_id,
- 'title': data['title'],
+ 'title': self._live_title(data['title']),
'description': data['description'],
'thumbnail': data['images']['main'] + '?t[]=900x506q80',
'timestamp': data['published'],
'duration': float_or_none(data['duration'], 1000),
'view_count': data['displays'],
'formats': formats,
+ 'is_live': True if stream_type == 'live' else False,
}
diff --git a/youtube_dl/extractor/vidme.py b/youtube_dl/extractor/vidme.py
index bd953fb4c..e0b55078b 100644
--- a/youtube_dl/extractor/vidme.py
+++ b/youtube_dl/extractor/vidme.py
@@ -10,7 +10,7 @@ from ..utils import (
class VidmeIE(InfoExtractor):
_VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]+)'
- _TEST = {
+ _TESTS = [{
'url': 'https://vid.me/QNB',
'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
'info_dict': {
@@ -23,9 +23,14 @@ class VidmeIE(InfoExtractor):
'upload_date': '20140725',
'thumbnail': 're:^https?://.*\.jpg',
},
- }
+ }, {
+ # From http://naked-yogi.tumblr.com/post/118312946248/naked-smoking-stretching
+ 'url': 'https://vid.me/e/Wmur',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
+ url = url.replace('vid.me/e/', 'vid.me/')
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 0301682b8..aacb999ce 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1399,6 +1399,22 @@ class YoutubeChannelIE(InfoExtractor):
channel_id = self._match_id(url)
url = self._TEMPLATE_URL % channel_id
+
+ # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
+ # Workaround by extracting as a playlist if managed to obtain channel playlist URL
+ # otherwise fallback on channel by page extraction
+ channel_page = self._download_webpage(
+ url + '?view=57', channel_id,
+ 'Downloading channel page', fatal=False)
+ channel_playlist_id = self._search_regex(
+ [r'<meta itemprop="channelId" content="([^"]+)">',
+ r'data-channel-external-id="([^"]+)"'],
+ channel_page, 'channel id', default=None)
+ if channel_playlist_id and channel_playlist_id.startswith('UC'):
+ playlist_id = 'UU' + channel_playlist_id[2:]
+ return self.url_result(
+ compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
+
channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
autogenerated = re.search(r'''(?x)
class="[^"]*?(?:
diff --git a/youtube_dl/postprocessor/embedthumbnail.py b/youtube_dl/postprocessor/embedthumbnail.py
index 8f825f785..774494efd 100644
--- a/youtube_dl/postprocessor/embedthumbnail.py
+++ b/youtube_dl/postprocessor/embedthumbnail.py
@@ -49,7 +49,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
os.remove(encodeFilename(filename))
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
- elif info['ext'] == 'm4a':
+ elif info['ext'] in ['m4a', 'mp4']:
if not check_executable('AtomicParsley', ['-v']):
raise EmbedThumbnailPPError('AtomicParsley was not found. Please install.')
@@ -82,6 +82,6 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
os.remove(encodeFilename(filename))
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
else:
- raise EmbedThumbnailPPError('Only mp3 and m4a are supported for thumbnail embedding for now.')
+ raise EmbedThumbnailPPError('Only mp3 and m4a/mp4 are supported for thumbnail embedding for now.')
return [], info
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index b33385153..653710131 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
from __future__ import unicode_literals
-__version__ = '2015.05.20'
+__version__ = '2015.05.29'