aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README.md6
-rw-r--r--youtube_dl/extractor/arte.py2
-rw-r--r--youtube_dl/extractor/cloudy.py4
-rw-r--r--youtube_dl/extractor/crunchyroll.py31
-rw-r--r--youtube_dl/extractor/cwtv.py5
-rw-r--r--youtube_dl/extractor/extractors.py5
-rw-r--r--youtube_dl/extractor/malemotion.py46
-rw-r--r--youtube_dl/extractor/mwave.py26
-rw-r--r--youtube_dl/extractor/nuvid.py44
-rw-r--r--youtube_dl/extractor/ooyala.py2
-rw-r--r--youtube_dl/extractor/orf.py1
-rw-r--r--youtube_dl/extractor/scivee.py1
-rw-r--r--youtube_dl/extractor/theplatform.py6
-rw-r--r--youtube_dl/extractor/twentymin.py23
-rw-r--r--youtube_dl/extractor/viewster.py144
-rw-r--r--youtube_dl/extractor/watchindianporn.py (renamed from youtube_dl/extractor/sexykarma.py)63
-rw-r--r--youtube_dl/extractor/yandexmusic.py26
-rw-r--r--youtube_dl/extractor/youtube.py29
18 files changed, 227 insertions, 237 deletions
diff --git a/README.md b/README.md
index e062444b3..ecf737047 100644
--- a/README.md
+++ b/README.md
@@ -697,6 +697,10 @@ YouTube changed their playlist format in March 2014 and later on, so you'll need
If you have installed youtube-dl with a package manager, pip, setup.py or a tarball, please use that to update. Note that Ubuntu packages do not seem to get updated anymore. Since we are not affiliated with Ubuntu, there is little we can do. Feel free to [report bugs](https://bugs.launchpad.net/ubuntu/+source/youtube-dl/+filebug) to the [Ubuntu packaging guys](mailto:ubuntu-motu@lists.ubuntu.com?subject=outdated%20version%20of%20youtube-dl) - all they have to do is update the package to a somewhat recent version. See above for a way to update.
+### I'm getting an error when trying to use output template: `error: using output template conflicts with using title, video ID or auto number`
+
+Make sure you are not using `-o` with any of these options `-t`, `--title`, `--id`, `-A` or `--auto-number` set in command line or in a configuration file. Remove the latter if any.
+
### Do I always have to pass `-citw`?
By default, youtube-dl intends to have the best options (incidentally, if you have a convincing case that these should be different, [please file an issue where you explain that](https://yt-dl.org/bug)). Therefore, it is unnecessary and sometimes harmful to copy long option strings from webpages. In particular, the only option out of `-citw` that is regularly useful is `-i`.
@@ -717,7 +721,7 @@ Videos or video formats streamed via RTMP protocol can only be downloaded when [
### I have downloaded a video but how can I play it?
-Once the video is fully downloaded, use any video player, such as [vlc](http://www.videolan.org) or [mplayer](http://www.mplayerhq.hu/).
+Once the video is fully downloaded, use any video player, such as [mpv](https://mpv.io/), [vlc](http://www.videolan.org) or [mplayer](http://www.mplayerhq.hu/).
### I extracted a video URL with `-g`, but it does not play on another machine / in my webbrowser.
diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py
index a9e3266dc..881cacfab 100644
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -63,7 +63,7 @@ class ArteTvIE(InfoExtractor):
class ArteTVPlus7IE(InfoExtractor):
IE_NAME = 'arte.tv:+7'
- _VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/(?:(?:sendungen|emissions|embed)/)?(?P<id>[^/]+)/(?P<name>[^/?#&+])'
+ _VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/(?:(?:sendungen|emissions|embed)/)?(?P<id>[^/]+)/(?P<name>[^/?#&]+)'
@classmethod
def _extract_url_info(cls, url):
diff --git a/youtube_dl/extractor/cloudy.py b/youtube_dl/extractor/cloudy.py
index 9e267e6c0..9a28ef354 100644
--- a/youtube_dl/extractor/cloudy.py
+++ b/youtube_dl/extractor/cloudy.py
@@ -19,7 +19,7 @@ from ..utils import (
class CloudyIE(InfoExtractor):
_IE_DESC = 'cloudy.ec and videoraj.ch'
_VALID_URL = r'''(?x)
- https?://(?:www\.)?(?P<host>cloudy\.ec|videoraj\.ch)/
+ https?://(?:www\.)?(?P<host>cloudy\.ec|videoraj\.(?:ch|to))/
(?:v/|embed\.php\?id=)
(?P<id>[A-Za-z0-9]+)
'''
@@ -37,7 +37,7 @@ class CloudyIE(InfoExtractor):
}
},
{
- 'url': 'http://www.videoraj.ch/v/47f399fd8bb60',
+ 'url': 'http://www.videoraj.to/v/47f399fd8bb60',
'md5': '7d0f8799d91efd4eda26587421c3c3b0',
'info_dict': {
'id': '47f399fd8bb60',
diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py
index 8ae3f2890..dd753c7c3 100644
--- a/youtube_dl/extractor/crunchyroll.py
+++ b/youtube_dl/extractor/crunchyroll.py
@@ -11,7 +11,6 @@ from math import pow, sqrt, floor
from .common import InfoExtractor
from ..compat import (
compat_etree_fromstring,
- compat_urllib_parse_unquote,
compat_urllib_parse_urlencode,
compat_urllib_request,
compat_urlparse,
@@ -306,28 +305,24 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', webpage,
'video_uploader', fatal=False)
- playerdata_url = compat_urllib_parse_unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url'))
- playerdata_req = sanitized_Request(playerdata_url)
- playerdata_req.data = urlencode_postdata({'current_page': webpage_url})
- playerdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
- playerdata = self._download_webpage(playerdata_req, video_id, note='Downloading media info')
-
- stream_id = self._search_regex(r'<media_id>([^<]+)', playerdata, 'stream_id')
- video_thumbnail = self._search_regex(r'<episode_image_url>([^<]+)', playerdata, 'thumbnail', fatal=False)
-
formats = []
- for fmt in re.findall(r'showmedia\.([0-9]{3,4})p', webpage):
+ video_encode_ids = []
+ for fmt in re.findall(r'token="showmedia\.([0-9]{3,4})p"', webpage):
stream_quality, stream_format = self._FORMAT_IDS[fmt]
video_format = fmt + 'p'
streamdata_req = sanitized_Request(
'http://www.crunchyroll.com/xml/?req=RpcApiVideoPlayer_GetStandardConfig&media_id=%s&video_format=%s&video_quality=%s'
- % (stream_id, stream_format, stream_quality),
+ % (video_id, stream_format, stream_quality),
compat_urllib_parse_urlencode({'current_page': url}).encode('utf-8'))
streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
streamdata = self._download_xml(
streamdata_req, video_id,
note='Downloading media info for %s' % video_format)
stream_info = streamdata.find('./{default}preload/stream_info')
+ video_encode_id = xpath_text(stream_info, './video_encode_id')
+ if video_encode_id in video_encode_ids:
+ continue
+ video_encode_ids.append(video_encode_id)
video_url = xpath_text(stream_info, './host')
video_play_path = xpath_text(stream_info, './file')
if not video_url or not video_play_path:
@@ -360,15 +355,25 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
})
formats.append(format_info)
+ metadata = self._download_xml(
+ 'http://www.crunchyroll.com/xml', video_id,
+ note='Downloading media info', query={
+ 'req': 'RpcApiVideoPlayer_GetMediaMetadata',
+ 'media_id': video_id,
+ })
+
subtitles = self.extract_subtitles(video_id, webpage)
return {
'id': video_id,
'title': video_title,
'description': video_description,
- 'thumbnail': video_thumbnail,
+ 'thumbnail': xpath_text(metadata, 'episode_image_url'),
'uploader': video_uploader,
'upload_date': video_upload_date,
+ 'series': xpath_text(metadata, 'series_title'),
+ 'episode': xpath_text(metadata, 'episode_title'),
+ 'episode_number': int_or_none(xpath_text(metadata, 'episode_number')),
'subtitles': subtitles,
'formats': formats,
}
diff --git a/youtube_dl/extractor/cwtv.py b/youtube_dl/extractor/cwtv.py
index f5cefd966..ebd14cb16 100644
--- a/youtube_dl/extractor/cwtv.py
+++ b/youtube_dl/extractor/cwtv.py
@@ -9,7 +9,7 @@ from ..utils import (
class CWTVIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?cw(?:tv|seed)\.com/shows/(?:[^/]+/){2}\?play=(?P<id>[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})'
+ _VALID_URL = r'https?://(?:www\.)?cw(?:tv|seed)\.com/(?:shows/)?(?:[^/]+/){2}\?.*\bplay=(?P<id>[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})'
_TESTS = [{
'url': 'http://cwtv.com/shows/arrow/legends-of-yesterday/?play=6b15e985-9345-4f60-baf8-56e96be57c63',
'info_dict': {
@@ -48,6 +48,9 @@ class CWTVIE(InfoExtractor):
# m3u8 download
'skip_download': True,
}
+ }, {
+ 'url': 'http://cwtv.com/thecw/chroniclesofcisco/?play=8adebe35-f447-465f-ab52-e863506ff6d6',
+ 'only_matching': True,
}]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 8b215c5ab..3adcd41c4 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -400,7 +400,6 @@ from .macgamestore import MacGameStoreIE
from .mailru import MailRuIE
from .makerschannel import MakersChannelIE
from .makertv import MakerTVIE
-from .malemotion import MalemotionIE
from .matchtv import MatchTVIE
from .mdr import MDRIE
from .metacafe import MetacafeIE
@@ -439,7 +438,7 @@ from .mtv import (
)
from .muenchentv import MuenchenTVIE
from .musicplayon import MusicPlayOnIE
-from .mwave import MwaveIE
+from .mwave import MwaveIE, MwaveMeetGreetIE
from .myspace import MySpaceIE, MySpaceAlbumIE
from .myspass import MySpassIE
from .myvi import MyviIE
@@ -658,7 +657,6 @@ from .screenwavemedia import ScreenwaveMediaIE, TeamFourIE
from .senateisvp import SenateISVPIE
from .servingsys import ServingSysIE
from .sexu import SexuIE
-from .sexykarma import SexyKarmaIE
from .shahid import ShahidIE
from .shared import SharedIE
from .sharesix import ShareSixIE
@@ -919,6 +917,7 @@ from .vulture import VultureIE
from .walla import WallaIE
from .washingtonpost import WashingtonPostIE
from .wat import WatIE
+from .watchindianporn import WatchIndianPornIE
from .wdr import (
WDRIE,
WDRMobileIE,
diff --git a/youtube_dl/extractor/malemotion.py b/youtube_dl/extractor/malemotion.py
deleted file mode 100644
index 92511a671..000000000
--- a/youtube_dl/extractor/malemotion.py
+++ /dev/null
@@ -1,46 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..compat import compat_urllib_parse_unquote
-
-
-class MalemotionIE(InfoExtractor):
- _VALID_URL = r'https?://malemotion\.com/video/(.+?)\.(?P<id>.+?)(#|$)'
- _TEST = {
- 'url': 'http://malemotion.com/video/bete-de-concours.ltc',
- 'md5': '3013e53a0afbde2878bc39998c33e8a5',
- 'info_dict': {
- 'id': 'ltc',
- 'ext': 'mp4',
- 'title': 'Bête de Concours',
- 'age_limit': 18,
- },
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- video_url = compat_urllib_parse_unquote(self._search_regex(
- r'<source type="video/mp4" src="(.+?)"', webpage, 'video URL'))
- video_title = self._html_search_regex(
- r'<title>(.*?)</title', webpage, 'title')
- video_thumbnail = self._search_regex(
- r'<video .+?poster="(.+?)"', webpage, 'thumbnail', fatal=False)
-
- formats = [{
- 'url': video_url,
- 'ext': 'mp4',
- 'format_id': 'mp4',
- 'preference': 1,
- }]
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'formats': formats,
- 'title': video_title,
- 'thumbnail': video_thumbnail,
- 'age_limit': 18,
- }
diff --git a/youtube_dl/extractor/mwave.py b/youtube_dl/extractor/mwave.py
index 5c3c8d464..a103e0323 100644
--- a/youtube_dl/extractor/mwave.py
+++ b/youtube_dl/extractor/mwave.py
@@ -10,6 +10,7 @@ from ..utils import (
class MwaveIE(InfoExtractor):
_VALID_URL = r'https?://mwave\.interest\.me/mnettv/videodetail\.m\?searchVideoDetailVO\.clip_id=(?P<id>[0-9]+)'
+ _URL_TEMPLATE = 'http://mwave.interest.me/mnettv/videodetail.m?searchVideoDetailVO.clip_id=%s'
_TEST = {
'url': 'http://mwave.interest.me/mnettv/videodetail.m?searchVideoDetailVO.clip_id=168859',
# md5 is unstable
@@ -56,3 +57,28 @@ class MwaveIE(InfoExtractor):
'view_count': int_or_none(vod_info.get('hit')),
'formats': formats,
}
+
+
+class MwaveMeetGreetIE(InfoExtractor):
+ _VALID_URL = r'https?://mwave\.interest\.me/meetgreet/view/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://mwave.interest.me/meetgreet/view/256',
+ 'info_dict': {
+ 'id': '173294',
+ 'ext': 'flv',
+ 'title': '[MEET&GREET] Park BoRam',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'uploader': 'Mwave',
+ 'duration': 3634,
+ 'view_count': int,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ clip_id = self._html_search_regex(
+ r'<iframe[^>]+src="/mnettv/ifr_clip\.m\?searchVideoDetailVO\.clip_id=(\d+)',
+ webpage, 'clip ID')
+ clip_url = MwaveIE._URL_TEMPLATE % clip_id
+ return self.url_result(clip_url, 'Mwave', clip_id)
diff --git a/youtube_dl/extractor/nuvid.py b/youtube_dl/extractor/nuvid.py
index 9fa7cefad..ab6bfcd7f 100644
--- a/youtube_dl/extractor/nuvid.py
+++ b/youtube_dl/extractor/nuvid.py
@@ -5,8 +5,6 @@ import re
from .common import InfoExtractor
from ..utils import (
parse_duration,
- sanitized_Request,
- unified_strdate,
)
@@ -20,7 +18,6 @@ class NuvidIE(InfoExtractor):
'ext': 'mp4',
'title': 'Horny babes show their awesome bodeis and',
'duration': 129,
- 'upload_date': '20140508',
'age_limit': 18,
}
}
@@ -28,28 +25,31 @@ class NuvidIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
- formats = []
+ page_url = 'http://m.nuvid.com/video/%s' % video_id
+ webpage = self._download_webpage(
+ page_url, video_id, 'Downloading video page')
+ # When dwnld_speed exists and has a value larger than the MP4 file's
+ # bitrate, Nuvid returns the MP4 URL
+ # It's unit is 100bytes/millisecond, see mobile-nuvid-min.js for the algorithm
+ self._set_cookie('nuvid.com', 'dwnld_speed', '10.0')
+ mp4_webpage = self._download_webpage(
+ page_url, video_id, 'Downloading video page for MP4 format')
- for dwnld_speed, format_id in [(0, '3gp'), (5, 'mp4')]:
- request = sanitized_Request(
- 'http://m.nuvid.com/play/%s' % video_id)
- request.add_header('Cookie', 'skip_download_page=1; dwnld_speed=%d; adv_show=1' % dwnld_speed)
- webpage = self._download_webpage(
- request, video_id, 'Downloading %s page' % format_id)
- video_url = self._html_search_regex(
- r'<a\s+href="([^"]+)"\s+class="b_link">', webpage, '%s video URL' % format_id, fatal=False)
- if not video_url:
- continue
+ html5_video_re = r'(?s)<(?:video|audio)[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']',
+ video_url = self._html_search_regex(html5_video_re, webpage, video_id)
+ mp4_video_url = self._html_search_regex(html5_video_re, mp4_webpage, video_id)
+ formats = [{
+ 'url': video_url,
+ }]
+ if mp4_video_url != video_url:
formats.append({
- 'url': video_url,
- 'format_id': format_id,
+ 'url': mp4_video_url,
})
- webpage = self._download_webpage(
- 'http://m.nuvid.com/video/%s' % video_id, video_id, 'Downloading video page')
title = self._html_search_regex(
[r'<span title="([^"]+)">',
- r'<div class="thumb-holder video">\s*<h5[^>]*>([^<]+)</h5>'], webpage, 'title').strip()
+ r'<div class="thumb-holder video">\s*<h5[^>]*>([^<]+)</h5>',
+ r'<span[^>]+class="title_thumb">([^<]+)</span>'], webpage, 'title').strip()
thumbnails = [
{
'url': thumb_url,
@@ -57,9 +57,8 @@ class NuvidIE(InfoExtractor):
]
thumbnail = thumbnails[0]['url'] if thumbnails else None
duration = parse_duration(self._html_search_regex(
- r'<i class="fa fa-clock-o"></i>\s*(\d{2}:\d{2})', webpage, 'duration', fatal=False))
- upload_date = unified_strdate(self._html_search_regex(
- r'<i class="fa fa-user"></i>\s*(\d{4}-\d{2}-\d{2})', webpage, 'upload date', fatal=False))
+ [r'<i class="fa fa-clock-o"></i>\s*(\d{2}:\d{2})',
+ r'<span[^>]+class="view_time">([^<]+)</span>'], webpage, 'duration', fatal=False))
return {
'id': video_id,
@@ -67,7 +66,6 @@ class NuvidIE(InfoExtractor):
'thumbnails': thumbnails,
'thumbnail': thumbnail,
'duration': duration,
- 'upload_date': upload_date,
'age_limit': 18,
'formats': formats,
}
diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py
index 16f040191..95e982897 100644
--- a/youtube_dl/extractor/ooyala.py
+++ b/youtube_dl/extractor/ooyala.py
@@ -96,6 +96,8 @@ class OoyalaIE(OoyalaBaseIE):
'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
'duration': 853.386,
},
+ # The video in the original webpage now uses PlayWire
+ 'skip': 'Ooyala said: movie expired',
}, {
# Only available for ipad
'url': 'http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',
diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py
index 66c75f8b3..4e3864f0d 100644
--- a/youtube_dl/extractor/orf.py
+++ b/youtube_dl/extractor/orf.py
@@ -185,6 +185,7 @@ class ORFFM4IE(InfoExtractor):
'timestamp': 1452456073,
'upload_date': '20160110',
},
+ 'skip': 'Live streams on FM4 got deleted soon',
}
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/scivee.py b/youtube_dl/extractor/scivee.py
index 3bf93c870..b1ca12fde 100644
--- a/youtube_dl/extractor/scivee.py
+++ b/youtube_dl/extractor/scivee.py
@@ -18,6 +18,7 @@ class SciVeeIE(InfoExtractor):
'title': 'Adam Arkin at the 2014 DOE JGI Genomics of Energy & Environment Meeting',
'description': 'md5:81f1710638e11a481358fab1b11059d7',
},
+ 'skip': 'Not accessible from Travis CI server',
}
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py
index 8272dd969..a25417f94 100644
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@@ -159,11 +159,11 @@ class ThePlatformIE(ThePlatformBaseIE):
def str_to_hex(str):
return binascii.b2a_hex(str.encode('ascii')).decode('ascii')
- def hex_to_str(hex):
- return binascii.a2b_hex(hex)
+ def hex_to_bytes(hex):
+ return binascii.a2b_hex(hex.encode('ascii'))
relative_path = re.match(r'https?://link.theplatform.com/s/([^?]+)', url).group(1)
- clear_text = hex_to_str(flags + expiration_date + str_to_hex(relative_path))
+ clear_text = hex_to_bytes(flags + expiration_date + str_to_hex(relative_path))
checksum = hmac.new(sig_key.encode('ascii'), clear_text, hashlib.sha1).hexdigest()
sig = flags + expiration_date + checksum + str_to_hex(sig_secret)
return '%s&sig=%s' % (url, sig)
diff --git a/youtube_dl/extractor/twentymin.py b/youtube_dl/extractor/twentymin.py
index ca7d953b8..b721ecb0a 100644
--- a/youtube_dl/extractor/twentymin.py
+++ b/youtube_dl/extractor/twentymin.py
@@ -32,7 +32,22 @@ class TwentyMinutenIE(InfoExtractor):
'title': '«Wir müssen mutig nach vorne schauen»',
'description': 'Kein Land sei innovativer als die Schweiz, sagte Johann Schneider-Ammann in seiner Neujahrsansprache. Das Land müsse aber seine Hausaufgaben machen.',
'thumbnail': 'http://www.20min.ch/images/content/2/2/0/22050469/10/teaserbreit.jpg'
- }
+ },
+ 'skip': '"This video is no longer available" is shown both on the web page and in the downloaded file.',
+ }, {
+ # YouTube embed
+ 'url': 'http://www.20min.ch/ro/sports/football/story/Il-marque-une-bicyclette-de-plus-de-30-metres--21115184',
+ 'md5': 'cec64d59aa01c0ed9dbba9cf639dd82f',
+ 'info_dict': {
+ 'id': 'ivM7A7SpDOs',
+ 'ext': 'mp4',
+ 'title': 'GOLAZO DE CHILENA DE JAVI GÓMEZ, FINALISTA AL BALÓN DE CLM 2016',
+ 'description': 'md5:903c92fbf2b2f66c09de514bc25e9f5a',
+ 'upload_date': '20160424',
+ 'uploader': 'RTVCM Castilla-La Mancha',
+ 'uploader_id': 'RTVCM',
+ },
+ 'add_ie': ['Youtube'],
}, {
'url': 'http://www.20min.ch/videotv/?cid=44&vid=468738',
'only_matching': True,
@@ -48,6 +63,12 @@ class TwentyMinutenIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
+ youtube_url = self._html_search_regex(
+ r'<iframe[^>]+src="((?:https?:)?//www\.youtube\.com/embed/[^"]+)"',
+ webpage, 'YouTube embed URL', default=None)
+ if youtube_url is not None:
+ return self.url_result(youtube_url, 'Youtube')
+
title = self._html_search_regex(
r'<h1>.*?<span>(.+?)</span></h1>',
webpage, 'title', default=None)
diff --git a/youtube_dl/extractor/viewster.py b/youtube_dl/extractor/viewster.py
index 6edc2c44e..a93196a07 100644
--- a/youtube_dl/extractor/viewster.py
+++ b/youtube_dl/extractor/viewster.py
@@ -6,7 +6,6 @@ import re
from .common import InfoExtractor
from ..compat import (
compat_HTTPError,
- compat_urllib_parse,
compat_urllib_parse_unquote,
)
from ..utils import (
@@ -78,11 +77,11 @@ class ViewsterIE(InfoExtractor):
_ACCEPT_HEADER = 'application/json, text/javascript, */*; q=0.01'
- def _download_json(self, url, video_id, note='Downloading JSON metadata', fatal=True):
+ def _download_json(self, url, video_id, note='Downloading JSON metadata', fatal=True, query={}):
request = sanitized_Request(url)
request.add_header('Accept', self._ACCEPT_HEADER)
request.add_header('Auth-token', self._AUTH_TOKEN)
- return super(ViewsterIE, self)._download_json(request, video_id, note, fatal=fatal)
+ return super(ViewsterIE, self)._download_json(request, video_id, note, fatal=fatal, query=query)
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -117,72 +116,85 @@ class ViewsterIE(InfoExtractor):
return self.playlist_result(entries, video_id, title, description)
formats = []
- manifest_url = None
- m3u8_formats = []
- for media_type in ('application/f4m+xml', 'application/x-mpegURL', 'video/mp4'):
- media = self._download_json(
- 'https://public-api.viewster.com/movies/%s/video?mediaType=%s'
- % (entry_id, compat_urllib_parse.quote(media_type)),
- video_id, 'Downloading %s JSON' % media_type, fatal=False)
- if not media:
- continue
- video_url = media.get('Uri')
- if not video_url:
- continue
- ext = determine_ext(video_url)
- if ext == 'f4m':
- manifest_url = video_url
- video_url += '&' if '?' in video_url else '?'
- video_url += 'hdcore=3.2.0&plugin=flowplayer-3.2.0.1'
- formats.extend(self._extract_f4m_formats(
- video_url, video_id, f4m_id='hds'))
- elif ext == 'm3u8':
- manifest_url = video_url
- m3u8_formats = self._extract_m3u8_formats(
- video_url, video_id, 'mp4', m3u8_id='hls',
- fatal=False) # m3u8 sometimes fail
- if m3u8_formats:
- formats.extend(m3u8_formats)
- else:
- qualities_basename = self._search_regex(
- '/([^/]+)\.csmil/',
- manifest_url, 'qualities basename', default=None)
- if not qualities_basename:
+ for language_set in info.get('LanguageSets', []):
+ manifest_url = None
+ m3u8_formats = []
+ audio = language_set.get('Audio') or ''
+ subtitle = language_set.get('Subtitle') or ''
+ base_format_id = audio
+ if subtitle:
+ base_format_id += '-%s' % subtitle
+
+ def concat(suffix, sep='-'):
+ return (base_format_id + '%s%s' % (sep, suffix)) if base_format_id else suffix
+
+ for media_type in ('application/f4m+xml', 'application/x-mpegURL', 'video/mp4'):
+ media = self._download_json(
+ 'https://public-api.viewster.com/movies/%s/video' % entry_id,
+ video_id, 'Downloading %s JSON' % concat(media_type, ' '), fatal=False, query={
+ 'mediaType': media_type,
+ 'language': audio,
+ 'subtitle': subtitle,
+ })
+ if not media:
continue
- QUALITIES_RE = r'((,\d+k)+,?)'
- qualities = self._search_regex(
- QUALITIES_RE, qualities_basename,
- 'qualities', default=None)
- if not qualities:
+ video_url = media.get('Uri')
+ if not video_url:
continue
- qualities = list(map(lambda q: int(q[:-1]), qualities.strip(',').split(',')))
- qualities.sort()
- http_template = re.sub(QUALITIES_RE, r'%dk', qualities_basename)
- http_url_basename = url_basename(video_url)
- if m3u8_formats:
- self._sort_formats(m3u8_formats)
- m3u8_formats = list(filter(
- lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
- m3u8_formats))
- if len(qualities) == len(m3u8_formats):
- for q, m3u8_format in zip(qualities, m3u8_formats):
- f = m3u8_format.copy()
- f.update({
- 'url': video_url.replace(http_url_basename, http_template % q),
- 'format_id': f['format_id'].replace('hls', 'http'),
- 'protocol': 'http',
- })
- formats.append(f)
+ ext = determine_ext(video_url)
+ if ext == 'f4m':
+ manifest_url = video_url
+ video_url += '&' if '?' in video_url else '?'
+ video_url += 'hdcore=3.2.0&plugin=flowplayer-3.2.0.1'
+ formats.extend(self._extract_f4m_formats(
+ video_url, video_id, f4m_id=concat('hds')))
+ elif ext == 'm3u8':
+ manifest_url = video_url
+ m3u8_formats = self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', m3u8_id=concat('hls'),
+ fatal=False) # m3u8 sometimes fail
+ if m3u8_formats:
+ formats.extend(m3u8_formats)
else:
- for q in qualities:
- formats.append({
- 'url': video_url.replace(http_url_basename, http_template % q),
- 'ext': 'mp4',
- 'format_id': 'http-%d' % q,
- 'tbr': q,
- })
-
- if not formats and not info.get('LanguageSets') and not info.get('VODSettings'):
+ qualities_basename = self._search_regex(
+ '/([^/]+)\.csmil/',
+ manifest_url, 'qualities basename', default=None)
+ if not qualities_basename:
+ continue
+ QUALITIES_RE = r'((,\d+k)+,?)'
+ qualities = self._search_regex(
+ QUALITIES_RE, qualities_basename,
+ 'qualities', default=None)
+ if not qualities:
+ continue
+ qualities = list(map(lambda q: int(q[:-1]), qualities.strip(',').split(',')))
+ qualities.sort()
+ http_template = re.sub(QUALITIES_RE, r'%dk', qualities_basename)
+ http_url_basename = url_basename(video_url)
+ if m3u8_formats:
+ self._sort_formats(m3u8_formats)
+ m3u8_formats = list(filter(
+ lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
+ m3u8_formats))
+ if len(qualities) == len(m3u8_formats):
+ for q, m3u8_format in zip(qualities, m3u8_formats):
+ f = m3u8_format.copy()
+ f.update({
+ 'url': video_url.replace(http_url_basename, http_template % q),
+ 'format_id': f['format_id'].replace('hls', 'http'),
+ 'protocol': 'http',
+ })
+ formats.append(f)
+ else:
+ for q in qualities:
+ formats.append({
+ 'url': video_url.replace(http_url_basename, http_template % q),
+ 'ext': 'mp4',
+ 'format_id': 'http-%d' % q,
+ 'tbr': q,
+ })
+
+ if not formats and not info.get('VODSettings'):
self.raise_geo_restricted()
self._sort_formats(formats)
diff --git a/youtube_dl/extractor/sexykarma.py b/youtube_dl/extractor/watchindianporn.py
index e33483674..5d3b5bdb4 100644
--- a/youtube_dl/extractor/sexykarma.py
+++ b/youtube_dl/extractor/watchindianporn.py
@@ -11,61 +11,27 @@ from ..utils import (
)
-class SexyKarmaIE(InfoExtractor):
- IE_DESC = 'Sexy Karma and Watch Indian Porn'
- _VALID_URL = r'https?://(?:www\.)?(?:sexykarma\.com|watchindianporn\.net)/(?:[^/]+/)*video/(?P<display_id>[^/]+)-(?P<id>[a-zA-Z0-9]+)\.html'
- _TESTS = [{
- 'url': 'http://www.sexykarma.com/gonewild/video/taking-a-quick-pee-yHI70cOyIHt.html',
- 'md5': 'b9798e7d1ef1765116a8f516c8091dbd',
+class WatchIndianPornIE(InfoExtractor):
+ IE_DESC = 'Watch Indian Porn'
+ _VALID_URL = r'https?://(?:www\.)?watchindianporn\.net/(?:[^/]+/)*video/(?P<display_id>[^/]+)-(?P<id>[a-zA-Z0-9]+)\.html'
+ _TEST = {
+ 'url': 'http://www.watchindianporn.net/video/hot-milf-from-kerala-shows-off-her-gorgeous-large-breasts-on-camera-RZa2avywNPa.html',
+ 'md5': '249589a164dde236ec65832bfce17440',
'info_dict': {
- 'id': 'yHI70cOyIHt',
- 'display_id': 'taking-a-quick-pee',
+ 'id': 'RZa2avywNPa',
+ 'display_id': 'hot-milf-from-kerala-shows-off-her-gorgeous-large-breasts-on-camera',
'ext': 'mp4',
- 'title': 'Taking a quick pee.',
+ 'title': 'Hot milf from kerala shows off her gorgeous large breasts on camera',
'thumbnail': 're:^https?://.*\.jpg$',
- 'uploader': 'wildginger7',
- 'upload_date': '20141008',
- 'duration': 22,
+ 'uploader': 'LoveJay',
+ 'upload_date': '20160428',
+ 'duration': 226,
'view_count': int,
'comment_count': int,
'categories': list,
'age_limit': 18,
}
- }, {
- 'url': 'http://www.sexykarma.com/gonewild/video/pot-pixie-tribute-8Id6EZPbuHf.html',
- 'md5': 'dd216c68d29b49b12842b9babe762a5d',
- 'info_dict': {
- 'id': '8Id6EZPbuHf',
- 'display_id': 'pot-pixie-tribute',
- 'ext': 'mp4',
- 'title': 'pot_pixie tribute',
- 'thumbnail': 're:^https?://.*\.jpg$',
- 'uploader': 'banffite',
- 'upload_date': '20141013',
- 'duration': 16,
- 'view_count': int,
- 'comment_count': int,
- 'categories': list,
- 'age_limit': 18,
- }
- }, {
- 'url': 'http://www.watchindianporn.net/video/desi-dancer-namrata-stripping-completely-nude-and-dancing-on-a-hot-number-dW2mtctxJfs.html',
- 'md5': '9afb80675550406ed9a63ac2819ef69d',
- 'info_dict': {
- 'id': 'dW2mtctxJfs',
- 'display_id': 'desi-dancer-namrata-stripping-completely-nude-and-dancing-on-a-hot-number',
- 'ext': 'mp4',
- 'title': 'Desi dancer namrata stripping completely nude and dancing on a hot number',
- 'thumbnail': 're:^https?://.*\.jpg$',
- 'uploader': 'Don',
- 'upload_date': '20140213',
- 'duration': 83,
- 'view_count': int,
- 'comment_count': int,
- 'categories': list,
- 'age_limit': 18,
- }
- }]
+ }
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@@ -109,6 +75,9 @@ class SexyKarmaIE(InfoExtractor):
'id': video_id,
'display_id': display_id,
'url': video_url,
+ 'http_headers': {
+ 'Referer': url,
+ },
'title': title,
'thumbnail': thumbnail,
'uploader': uploader,
diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py
index 7a90cc60c..ce3723b55 100644
--- a/youtube_dl/extractor/yandexmusic.py
+++ b/youtube_dl/extractor/yandexmusic.py
@@ -18,9 +18,23 @@ from ..utils import (
class YandexMusicBaseIE(InfoExtractor):
@staticmethod
def _handle_error(response):
- error = response.get('error')
- if error:
- raise ExtractorError(error, expected=True)
+ if isinstance(response, dict):
+ error = response.get('error')
+ if error:
+ raise ExtractorError(error, expected=True)
+
+ def _download_webpage(self, *args, **kwargs):
+ webpage = super(YandexMusicBaseIE, self)._download_webpage(*args, **kwargs)
+ if 'Нам очень жаль, но&nbsp;запросы, поступившие с&nbsp;вашего IP-адреса, похожи на&nbsp;автоматические.' in webpage:
+ raise ExtractorError(
+ 'YandexMusic has considered youtube-dl requests automated and '
+ 'asks you to solve a CAPTCHA. You can either wait for some '
+ 'time until unblocked and optionally use --sleep-interval '
+ 'in future or alternatively you can go to https://music.yandex.ru/ '
+ 'solve CAPTCHA, then export cookies and pass cookie file to '
+ 'youtube-dl with --cookies',
+ expected=True)
+ return webpage
def _download_json(self, *args, **kwargs):
response = super(YandexMusicBaseIE, self)._download_json(*args, **kwargs)
@@ -47,7 +61,8 @@ class YandexMusicTrackIE(YandexMusicBaseIE):
'album_artist': 'Carlo Ambrosio',
'artist': 'Carlo Ambrosio & Fabio Di Bari, Carlo Ambrosio',
'release_year': '2009',
- }
+ },
+ 'skip': 'Travis CI servers blocked by YandexMusic',
}
def _get_track_url(self, storage_dir, track_id):
@@ -139,6 +154,7 @@ class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE):
'title': 'Carlo Ambrosio - Gypsy Soul (2009)',
},
'playlist_count': 50,
+ 'skip': 'Travis CI servers blocked by YandexMusic',
}
def _real_extract(self, url):
@@ -171,6 +187,7 @@ class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):
'description': 'md5:3b9f27b0efbe53f2ee1e844d07155cc9',
},
'playlist_count': 6,
+ 'skip': 'Travis CI servers blocked by YandexMusic',
}, {
# playlist exceeding the limit of 150 tracks shipped with webpage (see
# https://github.com/rg3/youtube-dl/issues/6666)
@@ -180,6 +197,7 @@ class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):
'title': 'Музыка 90-х',
},
'playlist_count': 310,
+ 'skip': 'Travis CI servers blocked by YandexMusic',
}]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 44f98d294..b7c3cb63f 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -2139,10 +2139,11 @@ class YoutubeSearchDateIE(YoutubeSearchIE):
_EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
-class YoutubeSearchURLIE(InfoExtractor):
+class YoutubeSearchURLIE(YoutubePlaylistBaseInfoExtractor):
IE_DESC = 'YouTube.com search URLs'
IE_NAME = 'youtube:search_url'
_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
+ _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
_TESTS = [{
'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
'playlist_mincount': 5,
@@ -2157,32 +2158,8 @@ class YoutubeSearchURLIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
query = compat_urllib_parse_unquote_plus(mobj.group('query'))
-
webpage = self._download_webpage(url, query)
- result_code = self._search_regex(
- r'(?s)<ol[^>]+class="item-section"(.*?)</ol>', webpage, 'result HTML')
-
- part_codes = re.findall(
- r'(?s)<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*>(.*?)</h3>', result_code)
- entries = []
- for part_code in part_codes:
- part_title = self._html_search_regex(
- [r'(?s)title="([^"]+)"', r'>([^<]+)</a>'], part_code, 'item title', fatal=False)
- part_url_snippet = self._html_search_regex(
- r'(?s)href="([^"]+)"', part_code, 'item URL')
- part_url = compat_urlparse.urljoin(
- 'https://www.youtube.com/', part_url_snippet)
- entries.append({
- '_type': 'url',
- 'url': part_url,
- 'title': part_title,
- })
-
- return {
- '_type': 'playlist',
- 'entries': entries,
- 'title': query,
- }
+ return self.playlist_result(self._process_page(webpage), playlist_title=query)
class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):