aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/__init__.py22
-rw-r--r--youtube_dl/extractor/addanime.py15
-rw-r--r--youtube_dl/extractor/aftonbladet.py19
-rw-r--r--youtube_dl/extractor/bliptv.py1
-rw-r--r--youtube_dl/extractor/bloomberg.py25
-rw-r--r--youtube_dl/extractor/cnn.py8
-rw-r--r--youtube_dl/extractor/common.py2
-rw-r--r--youtube_dl/extractor/crunchyroll.py8
-rw-r--r--youtube_dl/extractor/dailymotion.py10
-rw-r--r--youtube_dl/extractor/dhm.py73
-rw-r--r--youtube_dl/extractor/douyutv.py57
-rw-r--r--youtube_dl/extractor/dreisat.py26
-rw-r--r--youtube_dl/extractor/drtv.py21
-rw-r--r--youtube_dl/extractor/dump.py6
-rw-r--r--youtube_dl/extractor/dumpert.py60
-rw-r--r--youtube_dl/extractor/eagleplatform.py1
-rw-r--r--youtube_dl/extractor/ellentv.py19
-rw-r--r--youtube_dl/extractor/eroprofile.py52
-rw-r--r--youtube_dl/extractor/francetv.py36
-rw-r--r--youtube_dl/extractor/gamersyde.py70
-rw-r--r--youtube_dl/extractor/generic.py68
-rw-r--r--youtube_dl/extractor/hitbox.py24
-rw-r--r--youtube_dl/extractor/libsyn.py59
-rw-r--r--youtube_dl/extractor/livestream.py5
-rw-r--r--youtube_dl/extractor/miomio.py93
-rw-r--r--youtube_dl/extractor/mixcloud.py2
-rw-r--r--youtube_dl/extractor/mlb.py6
-rw-r--r--youtube_dl/extractor/nbc.py53
-rw-r--r--youtube_dl/extractor/npo.py5
-rw-r--r--youtube_dl/extractor/nrk.py73
-rw-r--r--youtube_dl/extractor/phoenix.py40
-rw-r--r--youtube_dl/extractor/playfm.py87
-rw-r--r--youtube_dl/extractor/pornhub.py17
-rw-r--r--youtube_dl/extractor/pornovoisines.py96
-rw-r--r--youtube_dl/extractor/prosiebensat1.py6
-rw-r--r--youtube_dl/extractor/radiojavan.py67
-rw-r--r--youtube_dl/extractor/rai.py83
-rw-r--r--youtube_dl/extractor/redtube.py9
-rw-r--r--youtube_dl/extractor/rtve.py13
-rw-r--r--youtube_dl/extractor/safari.py157
-rw-r--r--youtube_dl/extractor/slideshare.py2
-rw-r--r--youtube_dl/extractor/soundcloud.py4
-rw-r--r--youtube_dl/extractor/spankbang.py60
-rw-r--r--youtube_dl/extractor/teamcoco.py54
-rw-r--r--youtube_dl/extractor/ted.py51
-rw-r--r--youtube_dl/extractor/theplatform.py38
-rw-r--r--youtube_dl/extractor/twentytwotracks.py86
-rw-r--r--youtube_dl/extractor/twitch.py6
-rw-r--r--youtube_dl/extractor/udn.py64
-rw-r--r--youtube_dl/extractor/ultimedia.py5
-rw-r--r--youtube_dl/extractor/varzesh3.py45
-rw-r--r--youtube_dl/extractor/vessel.py127
-rw-r--r--youtube_dl/extractor/vimeo.py10
-rw-r--r--youtube_dl/extractor/vine.py44
-rw-r--r--youtube_dl/extractor/xuite.py14
-rw-r--r--youtube_dl/extractor/yahoo.py15
-rw-r--r--youtube_dl/extractor/youporn.py2
-rw-r--r--youtube_dl/extractor/youtube.py142
58 files changed, 1931 insertions, 332 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index dc272af82..894aa5b43 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -107,6 +107,7 @@ from .dbtv import DBTVIE
from .dctp import DctpTvIE
from .deezer import DeezerPlaylistIE
from .dfb import DFBIE
+from .dhm import DHMIE
from .dotsub import DotsubIE
from .douyutv import DouyuTVIE
from .dreisat import DreiSatIE
@@ -115,6 +116,7 @@ from .drtuber import DrTuberIE
from .drtv import DRTVIE
from .dvtv import DVTVIE
from .dump import DumpIE
+from .dumpert import DumpertIE
from .defense import DefenseGouvFrIE
from .discovery import DiscoveryIE
from .divxstage import DivxStageIE
@@ -176,6 +178,7 @@ from .gameone import (
GameOneIE,
GameOnePlaylistIE,
)
+from .gamersyde import GamersydeIE
from .gamespot import GameSpotIE
from .gamestar import GameStarIE
from .gametrailers import GametrailersIE
@@ -251,6 +254,7 @@ from .letv import (
LetvTvIE,
LetvPlaylistIE
)
+from .libsyn import LibsynIE
from .lifenews import LifeNewsIE
from .liveleak import LiveLeakIE
from .livestream import (
@@ -274,6 +278,7 @@ from .metacritic import MetacriticIE
from .mgoon import MgoonIE
from .minhateca import MinhatecaIE
from .ministrygrid import MinistryGridIE
+from .miomio import MioMioIE
from .mit import TechTVMITIE, MITIE, OCWMITIE
from .mitele import MiTeleIE
from .mixcloud import MixcloudIE
@@ -309,6 +314,8 @@ from .nba import NBAIE
from .nbc import (
NBCIE,
NBCNewsIE,
+ NBCSportsIE,
+ NBCSportsVPlayerIE,
)
from .ndr import NDRIE
from .ndtv import NDTVIE
@@ -347,6 +354,7 @@ from .npo import (
)
from .nrk import (
NRKIE,
+ NRKPlaylistIE,
NRKTVIE,
)
from .ntvde import NTVDeIE
@@ -381,6 +389,7 @@ from .pornhub import (
PornHubPlaylistIE,
)
from .pornotube import PornotubeIE
+from .pornovoisines import PornoVoisinesIE
from .pornoxo import PornoXOIE
from .primesharetv import PrimeShareTVIE
from .promptfile import PromptFileIE
@@ -390,6 +399,7 @@ from .pyvideo import PyvideoIE
from .quickvid import QuickVidIE
from .r7 import R7IE
from .radiode import RadioDeIE
+from .radiojavan import RadioJavanIE
from .radiobremen import RadioBremenIE
from .radiofrance import RadioFranceIE
from .rai import RaiIE
@@ -419,6 +429,10 @@ from .rutube import (
)
from .rutv import RUTVIE
from .sandia import SandiaIE
+from .safari import (
+ SafariIE,
+ SafariCourseIE,
+)
from .sapo import SapoIE
from .savefrom import SaveFromIE
from .sbs import SBSIE
@@ -458,6 +472,7 @@ from .southpark import (
SouthparkDeIE,
)
from .space import SpaceIE
+from .spankbang import SpankBangIE
from .spankwire import SpankwireIE
from .spiegel import SpiegelIE, SpiegelArticleIE
from .spiegeltv import SpiegeltvIE
@@ -525,6 +540,10 @@ from .tvp import TvpIE, TvpSeriesIE
from .tvplay import TVPlayIE
from .tweakers import TweakersIE
from .twentyfourvideo import TwentyFourVideoIE
+from .twentytwotracks import (
+ TwentyTwoTracksIE,
+ TwentyTwoTracksGenreIE
+)
from .twitch import (
TwitchVideoIE,
TwitchChapterIE,
@@ -539,13 +558,16 @@ from .udemy import (
UdemyIE,
UdemyCourseIE
)
+from .udn import UDNEmbedIE
from .ultimedia import UltimediaIE
from .unistra import UnistraIE
from .urort import UrortIE
from .ustream import UstreamIE, UstreamChannelIE
+from .varzesh3 import Varzesh3IE
from .vbox7 import Vbox7IE
from .veehd import VeeHDIE
from .veoh import VeohIE
+from .vessel import VesselIE
from .vesti import VestiIE
from .vevo import VevoIE
from .vgtv import VGTVIE
diff --git a/youtube_dl/extractor/addanime.py b/youtube_dl/extractor/addanime.py
index 203936e54..e3e6d2113 100644
--- a/youtube_dl/extractor/addanime.py
+++ b/youtube_dl/extractor/addanime.py
@@ -11,12 +11,13 @@ from ..compat import (
)
from ..utils import (
ExtractorError,
+ qualities,
)
class AddAnimeIE(InfoExtractor):
- _VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video\.php\?(?:.*?)v=(?P<id>[\w_]+)(?:.*)'
- _TEST = {
+ _VALID_URL = r'http://(?:\w+\.)?add-anime\.net/(?:watch_video\.php\?(?:.*?)v=|video/)(?P<id>[\w_]+)'
+ _TESTS = [{
'url': 'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
'md5': '72954ea10bc979ab5e2eb288b21425a0',
'info_dict': {
@@ -25,7 +26,10 @@ class AddAnimeIE(InfoExtractor):
'description': 'One Piece 606',
'title': 'One Piece 606',
}
- }
+ }, {
+ 'url': 'http://add-anime.net/video/MDUGWYKNGBD8/One-Piece-687',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -63,8 +67,10 @@ class AddAnimeIE(InfoExtractor):
note='Confirming after redirect')
webpage = self._download_webpage(url, video_id)
+ FORMATS = ('normal', 'hq')
+ quality = qualities(FORMATS)
formats = []
- for format_id in ('normal', 'hq'):
+ for format_id in FORMATS:
rex = r"var %s_video_file = '(.*?)';" % re.escape(format_id)
video_url = self._search_regex(rex, webpage, 'video file URLx',
fatal=False)
@@ -73,6 +79,7 @@ class AddAnimeIE(InfoExtractor):
formats.append({
'format_id': format_id,
'url': video_url,
+ 'quality': quality(format_id),
})
self._sort_formats(formats)
video_title = self._og_search_title(webpage)
diff --git a/youtube_dl/extractor/aftonbladet.py b/youtube_dl/extractor/aftonbladet.py
index 8442019ea..a117502bc 100644
--- a/youtube_dl/extractor/aftonbladet.py
+++ b/youtube_dl/extractor/aftonbladet.py
@@ -2,10 +2,11 @@
from __future__ import unicode_literals
from .common import InfoExtractor
+from ..utils import int_or_none
class AftonbladetIE(InfoExtractor):
- _VALID_URL = r'^http://tv\.aftonbladet\.se/webbtv.+?(?P<video_id>article[0-9]+)\.ab(?:$|[?#])'
+ _VALID_URL = r'http://tv\.aftonbladet\.se/webbtv.+?(?P<id>article[0-9]+)\.ab(?:$|[?#])'
_TEST = {
'url': 'http://tv.aftonbladet.se/webbtv/nyheter/vetenskap/rymden/article36015.ab',
'info_dict': {
@@ -43,9 +44,9 @@ class AftonbladetIE(InfoExtractor):
formats.append({
'url': 'http://%s:%d/%s/%s' % (p['address'], p['port'], p['path'], p['filename']),
'ext': 'mp4',
- 'width': fmt['width'],
- 'height': fmt['height'],
- 'tbr': fmt['bitrate'],
+ 'width': int_or_none(fmt.get('width')),
+ 'height': int_or_none(fmt.get('height')),
+ 'tbr': int_or_none(fmt.get('bitrate')),
'protocol': 'http',
})
self._sort_formats(formats)
@@ -54,9 +55,9 @@ class AftonbladetIE(InfoExtractor):
'id': video_id,
'title': internal_meta_json['title'],
'formats': formats,
- 'thumbnail': internal_meta_json['imageUrl'],
- 'description': internal_meta_json['shortPreamble'],
- 'timestamp': internal_meta_json['timePublished'],
- 'duration': internal_meta_json['duration'],
- 'view_count': internal_meta_json['views'],
+ 'thumbnail': internal_meta_json.get('imageUrl'),
+ 'description': internal_meta_json.get('shortPreamble'),
+ 'timestamp': int_or_none(internal_meta_json.get('timePublished')),
+ 'duration': int_or_none(internal_meta_json.get('duration')),
+ 'view_count': int_or_none(internal_meta_json.get('views')),
}
diff --git a/youtube_dl/extractor/bliptv.py b/youtube_dl/extractor/bliptv.py
index 8c7ba4b91..b632ce967 100644
--- a/youtube_dl/extractor/bliptv.py
+++ b/youtube_dl/extractor/bliptv.py
@@ -172,6 +172,7 @@ class BlipTVIE(InfoExtractor):
'width': int_or_none(media_content.get('width')),
'height': int_or_none(media_content.get('height')),
})
+ self._check_formats(formats, video_id)
self._sort_formats(formats)
subtitles = self.extract_subtitles(video_id, subtitles_urls)
diff --git a/youtube_dl/extractor/bloomberg.py b/youtube_dl/extractor/bloomberg.py
index 4a88ccd13..0dca29b71 100644
--- a/youtube_dl/extractor/bloomberg.py
+++ b/youtube_dl/extractor/bloomberg.py
@@ -6,32 +6,39 @@ from .common import InfoExtractor
class BloombergIE(InfoExtractor):
- _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<id>.+?)\.html'
+ _VALID_URL = r'https?://www\.bloomberg\.com/news/videos/[^/]+/(?P<id>[^/?#]+)'
_TEST = {
- 'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
+ 'url': 'http://www.bloomberg.com/news/videos/b/aaeae121-5949-481e-a1ce-4562db6f5df2',
# The md5 checksum changes
'info_dict': {
'id': 'qurhIVlJSB6hzkVi229d8g',
'ext': 'flv',
'title': 'Shah\'s Presentation on Foreign-Exchange Strategies',
- 'description': 'md5:0681e0d30dcdfc6abf34594961d8ea88',
+ 'description': 'md5:a8ba0302912d03d246979735c17d2761',
},
}
def _real_extract(self, url):
name = self._match_id(url)
webpage = self._download_webpage(url, name)
-
- f4m_url = self._search_regex(
- r'<source src="(https?://[^"]+\.f4m.*?)"', webpage,
- 'f4m url')
+ video_id = self._search_regex(r'"bmmrId":"(.+?)"', webpage, 'id')
title = re.sub(': Video$', '', self._og_search_title(webpage))
+ embed_info = self._download_json(
+ 'http://www.bloomberg.com/api/embed?id=%s' % video_id, video_id)
+ formats = []
+ for stream in embed_info['streams']:
+ if stream["muxing_format"] == "TS":
+ formats.extend(self._extract_m3u8_formats(stream['url'], video_id))
+ else:
+ formats.extend(self._extract_f4m_formats(stream['url'], video_id))
+ self._sort_formats(formats)
+
return {
- 'id': name.split('-')[-1],
+ 'id': video_id,
'title': title,
- 'formats': self._extract_f4m_formats(f4m_url, name),
+ 'formats': formats,
'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
}
diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py
index 90ea07438..5efc5f4fe 100644
--- a/youtube_dl/extractor/cnn.py
+++ b/youtube_dl/extractor/cnn.py
@@ -12,7 +12,7 @@ from ..utils import (
class CNNIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://(?:(?:edition|www)\.)?cnn\.com/video/(?:data/.+?|\?)/
- (?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:cnn|hln)(?:-ap)?|(?=&)))'''
+ (?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:[a-z]{3,5})(?:-ap)?|(?=&)))'''
_TESTS = [{
'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
@@ -45,6 +45,12 @@ class CNNIE(InfoExtractor):
'description': 'md5:e7223a503315c9f150acac52e76de086',
'upload_date': '20141222',
}
+ }, {
+ 'url': 'http://cnn.com/video/?/video/politics/2015/03/27/pkg-arizona-senator-church-attendance-mandatory.ktvk',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://cnn.com/video/?/video/us/2015/04/06/dnt-baker-refuses-anti-gay-order.wkmg',
+ 'only_matching': True,
}]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index e5245ec3f..530c449c1 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -822,7 +822,7 @@ class InfoExtractor(object):
(media_el.attrib.get('href') or media_el.attrib.get('url')))
tbr = int_or_none(media_el.attrib.get('bitrate'))
formats.append({
- 'format_id': '-'.join(filter(None, [f4m_id, 'f4m-%d' % (i if tbr is None else tbr)])),
+ 'format_id': '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)])),
'url': manifest_url,
'ext': 'flv',
'tbr': tbr,
diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py
index e64b88fbc..6ded723c9 100644
--- a/youtube_dl/extractor/crunchyroll.py
+++ b/youtube_dl/extractor/crunchyroll.py
@@ -23,7 +23,6 @@ from ..utils import (
)
from ..aes import (
aes_cbc_decrypt,
- inc,
)
@@ -102,13 +101,6 @@ class CrunchyrollIE(InfoExtractor):
key = obfuscate_key(id)
- class Counter:
- __value = iv
-
- def next_value(self):
- temp = self.__value
- self.__value = inc(self.__value)
- return temp
decrypted_data = intlist_to_bytes(aes_cbc_decrypt(data, key, iv))
return zlib.decompress(decrypted_data)
diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py
index 4f67c3aac..7615ecd4b 100644
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -25,8 +25,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
def _build_request(url):
"""Build a request with the family filter disabled"""
request = compat_urllib_request.Request(url)
- request.add_header('Cookie', 'family_filter=off')
- request.add_header('Cookie', 'ff=off')
+ request.add_header('Cookie', 'family_filter=off; ff=off')
return request
@@ -112,8 +111,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1)
embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id
- embed_page = self._download_webpage(embed_url, video_id,
- 'Downloading embed page')
+ embed_request = self._build_request(embed_url)
+ embed_page = self._download_webpage(
+ embed_request, video_id, 'Downloading embed page')
info = self._search_regex(r'var info = ({.*?}),$', embed_page,
'video info', flags=re.MULTILINE)
info = json.loads(info)
@@ -224,7 +224,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
class DailymotionUserIE(DailymotionPlaylistIE):
IE_NAME = 'dailymotion:user'
- _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/user/(?P<user>[^/]+)'
+ _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?:old/)?user/(?P<user>[^/]+)'
_PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
_TESTS = [{
'url': 'https://www.dailymotion.com/user/nqtv',
diff --git a/youtube_dl/extractor/dhm.py b/youtube_dl/extractor/dhm.py
new file mode 100644
index 000000000..3ed1f1663
--- /dev/null
+++ b/youtube_dl/extractor/dhm.py
@@ -0,0 +1,73 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ xpath_text,
+ parse_duration,
+)
+
+
+class DHMIE(InfoExtractor):
+ IE_DESC = 'Filmarchiv - Deutsches Historisches Museum'
+ _VALID_URL = r'https?://(?:www\.)?dhm\.de/filmarchiv/(?:[^/]+/)+(?P<id>[^/]+)'
+
+ _TESTS = [{
+ 'url': 'http://www.dhm.de/filmarchiv/die-filme/the-marshallplan-at-work-in-west-germany/',
+ 'md5': '11c475f670209bf6acca0b2b7ef51827',
+ 'info_dict': {
+ 'id': 'the-marshallplan-at-work-in-west-germany',
+ 'ext': 'flv',
+ 'title': 'MARSHALL PLAN AT WORK IN WESTERN GERMANY, THE',
+ 'description': 'md5:1fabd480c153f97b07add61c44407c82',
+ 'duration': 660,
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
+ }, {
+ 'url': 'http://www.dhm.de/filmarchiv/02-mapping-the-wall/peter-g/rolle-1/',
+ 'md5': '09890226332476a3e3f6f2cb74734aa5',
+ 'info_dict': {
+ 'id': 'rolle-1',
+ 'ext': 'flv',
+ 'title': 'ROLLE 1',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ playlist_url = self._search_regex(
+ r"file\s*:\s*'([^']+)'", webpage, 'playlist url')
+
+ playlist = self._download_xml(playlist_url, video_id)
+
+ track = playlist.find(
+ './{http://xspf.org/ns/0/}trackList/{http://xspf.org/ns/0/}track')
+
+ video_url = xpath_text(
+ track, './{http://xspf.org/ns/0/}location',
+ 'video url', fatal=True)
+ thumbnail = xpath_text(
+ track, './{http://xspf.org/ns/0/}image',
+ 'thumbnail')
+
+ title = self._search_regex(
+ [r'dc:title="([^"]+)"', r'<title> &raquo;([^<]+)</title>'],
+ webpage, 'title').strip()
+ description = self._html_search_regex(
+ r'<p><strong>Description:</strong>(.+?)</p>',
+ webpage, 'description', default=None)
+ duration = parse_duration(self._search_regex(
+ r'<em>Length\s*</em>\s*:\s*</strong>([^<]+)',
+ webpage, 'duration', default=None))
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'thumbnail': thumbnail,
+ }
diff --git a/youtube_dl/extractor/douyutv.py b/youtube_dl/extractor/douyutv.py
index d7956e6e4..479430c51 100644
--- a/youtube_dl/extractor/douyutv.py
+++ b/youtube_dl/extractor/douyutv.py
@@ -1,19 +1,23 @@
# coding: utf-8
from __future__ import unicode_literals
+import hashlib
+import time
from .common import InfoExtractor
-from ..utils import ExtractorError
+from ..utils import (ExtractorError, unescapeHTML)
+from ..compat import (compat_str, compat_basestring)
class DouyuTVIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?douyutv\.com/(?P<id>[A-Za-z0-9]+)'
- _TEST = {
+ _TESTS = [{
'url': 'http://www.douyutv.com/iseven',
'info_dict': {
- 'id': 'iseven',
+ 'id': '17732',
+ 'display_id': 'iseven',
'ext': 'flv',
'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
- 'description': 'md5:9e525642c25a0a24302869937cf69d17',
+ 'description': 'md5:c93d6692dde6fe33809a46edcbecca44',
'thumbnail': 're:^https?://.*\.jpg$',
'uploader': '7师傅',
'uploader_id': '431925',
@@ -22,22 +26,52 @@ class DouyuTVIE(InfoExtractor):
'params': {
'skip_download': True,
}
- }
+ }, {
+ 'url': 'http://www.douyutv.com/85982',
+ 'info_dict': {
+ 'id': '85982',
+ 'display_id': '85982',
+ 'ext': 'flv',
+ 'title': 're:^小漠从零单排记!——CSOL2躲猫猫 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'description': 'md5:746a2f7a253966a06755a912f0acc0d2',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'uploader': 'douyu小漠',
+ 'uploader_id': '3769985',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
+ if video_id.isdigit():
+ room_id = video_id
+ else:
+ page = self._download_webpage(url, video_id)
+ room_id = self._html_search_regex(
+ r'"room_id"\s*:\s*(\d+),', page, 'room id')
+
+ prefix = 'room/%s?aid=android&client_sys=android&time=%d' % (
+ room_id, int(time.time()))
+
+ auth = hashlib.md5((prefix + '1231').encode('ascii')).hexdigest()
config = self._download_json(
- 'http://www.douyutv.com/api/client/room/%s' % video_id, video_id)
+ 'http://www.douyutv.com/api/v1/%s&auth=%s' % (prefix, auth),
+ video_id)
data = config['data']
error_code = config.get('error', 0)
- show_status = data.get('show_status')
if error_code is not 0:
- raise ExtractorError(
- 'Server reported error %i' % error_code, expected=True)
+ error_desc = 'Server reported error %i' % error_code
+ if isinstance(data, (compat_str, compat_basestring)):
+ error_desc += ': ' + data
+ raise ExtractorError(error_desc, expected=True)
+ show_status = data.get('show_status')
# 1 = live, 2 = offline
if show_status == '2':
raise ExtractorError(
@@ -46,7 +80,7 @@ class DouyuTVIE(InfoExtractor):
base_url = data['rtmp_url']
live_path = data['rtmp_live']
- title = self._live_title(data['room_name'])
+ title = self._live_title(unescapeHTML(data['room_name']))
description = data.get('show_details')
thumbnail = data.get('room_src')
@@ -66,7 +100,8 @@ class DouyuTVIE(InfoExtractor):
self._sort_formats(formats)
return {
- 'id': video_id,
+ 'id': room_id,
+ 'display_id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
diff --git a/youtube_dl/extractor/dreisat.py b/youtube_dl/extractor/dreisat.py
index 69ca75423..05bb22ddf 100644
--- a/youtube_dl/extractor/dreisat.py
+++ b/youtube_dl/extractor/dreisat.py
@@ -3,22 +3,25 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..utils import unified_strdate
+from ..utils import (
+ ExtractorError,
+ unified_strdate,
+)
class DreiSatIE(InfoExtractor):
IE_NAME = '3sat'
_VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
_TEST = {
- 'url': 'http://www.3sat.de/mediathek/index.php?obj=36983',
- 'md5': '9dcfe344732808dbfcc901537973c922',
+ 'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918',
+ 'md5': 'be37228896d30a88f315b638900a026e',
'info_dict': {
- 'id': '36983',
+ 'id': '45918',
'ext': 'mp4',
- 'title': 'Kaffeeland Schweiz',
- 'description': 'md5:cc4424b18b75ae9948b13929a0814033',
+ 'title': 'Waidmannsheil',
+ 'description': 'md5:cce00ca1d70e21425e72c86a98a56817',
'uploader': '3sat',
- 'upload_date': '20130622'
+ 'upload_date': '20140913'
}
}
@@ -28,6 +31,15 @@ class DreiSatIE(InfoExtractor):
details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
details_doc = self._download_xml(details_url, video_id, 'Downloading video details')
+ status_code = details_doc.find('./status/statuscode')
+ if status_code is not None and status_code.text != 'ok':
+ code = status_code.text
+ if code == 'notVisibleAnymore':
+ message = 'Video %s is not available' % video_id
+ else:
+ message = '%s returned error: %s' % (self.IE_NAME, code)
+ raise ExtractorError(message, expected=True)
+
thumbnail_els = details_doc.findall('.//teaserimage')
thumbnails = [{
'width': int(te.attrib['key'].partition('x')[0]),
diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py
index 8257e35a4..f25ab319e 100644
--- a/youtube_dl/extractor/drtv.py
+++ b/youtube_dl/extractor/drtv.py
@@ -1,3 +1,4 @@
+# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor, ExtractorError
@@ -8,16 +9,16 @@ class DRTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
_TEST = {
- 'url': 'http://www.dr.dk/tv/se/partiets-mand/partiets-mand-7-8',
- 'md5': '4a7e1dd65cdb2643500a3f753c942f25',
+ 'url': 'https://www.dr.dk/tv/se/boern/ultra/panisk-paske/panisk-paske-5',
+ 'md5': 'dc515a9ab50577fa14cc4e4b0265168f',
'info_dict': {
- 'id': 'partiets-mand-7-8',
+ 'id': 'panisk-paske-5',
'ext': 'mp4',
- 'title': 'Partiets mand (7:8)',
- 'description': 'md5:a684b90a8f9336cd4aab94b7647d7862',
- 'timestamp': 1403047940,
- 'upload_date': '20140617',
- 'duration': 1299.040,
+ 'title': 'Panisk Påske (5)',
+ 'description': 'md5:ca14173c5ab24cd26b0fcc074dff391c',
+ 'timestamp': 1426984612,
+ 'upload_date': '20150322',
+ 'duration': 1455,
},
}
@@ -26,6 +27,10 @@ class DRTVIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
+ if '>Programmet er ikke længere tilgængeligt' in webpage:
+ raise ExtractorError(
+ 'Video %s is not available' % video_id, expected=True)
+
video_id = self._search_regex(
r'data-(?:material-identifier|episode-slug)="([^"]+)"',
webpage, 'video id')
diff --git a/youtube_dl/extractor/dump.py b/youtube_dl/extractor/dump.py
index 6b651778a..ff78d4fd2 100644
--- a/youtube_dl/extractor/dump.py
+++ b/youtube_dl/extractor/dump.py
@@ -28,12 +28,12 @@ class DumpIE(InfoExtractor):
video_url = self._search_regex(
r's1.addVariable\("file",\s*"([^"]+)"', webpage, 'video URL')
- thumb = self._og_search_thumbnail(webpage)
- title = self._search_regex(r'<b>([^"]+)</b>', webpage, 'title')
+ title = self._og_search_title(webpage)
+ thumbnail = self._og_search_thumbnail(webpage)
return {
'id': video_id,
'title': title,
'url': video_url,
- 'thumbnail': thumb,
+ 'thumbnail': thumbnail,
}
diff --git a/youtube_dl/extractor/dumpert.py b/youtube_dl/extractor/dumpert.py
new file mode 100644
index 000000000..9c594b757
--- /dev/null
+++ b/youtube_dl/extractor/dumpert.py
@@ -0,0 +1,60 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import base64
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_request
+from ..utils import qualities
+
+
+class DumpertIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?dumpert\.nl/mediabase/(?P<id>[0-9]+/[0-9a-zA-Z]+)'
+ _TEST = {
+ 'url': 'http://www.dumpert.nl/mediabase/6646981/951bc60f/',
+ 'md5': '1b9318d7d5054e7dcb9dc7654f21d643',
+ 'info_dict': {
+ 'id': '6646981/951bc60f',
+ 'ext': 'mp4',
+ 'title': 'Ik heb nieuws voor je',
+ 'description': 'Niet schrikken hoor',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ req = compat_urllib_request.Request(url)
+ req.add_header('Cookie', 'nsfw=1')
+ webpage = self._download_webpage(req, video_id)
+
+ files_base64 = self._search_regex(
+ r'data-files="([^"]+)"', webpage, 'data files')
+
+ files = self._parse_json(
+ base64.b64decode(files_base64.encode('utf-8')).decode('utf-8'),
+ video_id)
+
+ quality = qualities(['flv', 'mobile', 'tablet', '720p'])
+
+ formats = [{
+ 'url': video_url,
+ 'format_id': format_id,
+ 'quality': quality(format_id),
+ } for format_id, video_url in files.items() if format_id != 'still']
+ self._sort_formats(formats)
+
+ title = self._html_search_meta(
+ 'title', webpage) or self._og_search_title(webpage)
+ description = self._html_search_meta(
+ 'description', webpage) or self._og_search_description(webpage)
+ thumbnail = files.get('still') or self._og_search_thumbnail(webpage)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'formats': formats
+ }
diff --git a/youtube_dl/extractor/eagleplatform.py b/youtube_dl/extractor/eagleplatform.py
index 7173371ee..688dfc2f7 100644
--- a/youtube_dl/extractor/eagleplatform.py
+++ b/youtube_dl/extractor/eagleplatform.py
@@ -45,6 +45,7 @@ class EaglePlatformIE(InfoExtractor):
'duration': 216,
'view_count': int,
},
+ 'skip': 'Georestricted',
}]
def _handle_error(self, response):
diff --git a/youtube_dl/extractor/ellentv.py b/youtube_dl/extractor/ellentv.py
index fc92ff825..5154bbd7f 100644
--- a/youtube_dl/extractor/ellentv.py
+++ b/youtube_dl/extractor/ellentv.py
@@ -13,15 +13,15 @@ from ..utils import (
class EllenTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?:ellentv|ellentube)\.com/videos/(?P<id>[a-z0-9_-]+)'
_TESTS = [{
- 'url': 'http://www.ellentv.com/videos/0-7jqrsr18/',
- 'md5': 'e4af06f3bf0d5f471921a18db5764642',
+ 'url': 'http://www.ellentv.com/videos/0-ipq1gsai/',
+ 'md5': '8e3c576bf2e9bfff4d76565f56f94c9c',
'info_dict': {
- 'id': '0-7jqrsr18',
+ 'id': '0-ipq1gsai',
'ext': 'mp4',
- 'title': 'What\'s Wrong with These Photos? A Whole Lot',
- 'description': 'md5:35f152dc66b587cf13e6d2cf4fa467f6',
- 'timestamp': 1406876400,
- 'upload_date': '20140801',
+ 'title': 'Fast Fingers of Fate',
+ 'description': 'md5:686114ced0a032926935e9015ee794ac',
+ 'timestamp': 1428033600,
+ 'upload_date': '20150403',
}
}, {
'url': 'http://ellentube.com/videos/0-dvzmabd5/',
@@ -40,14 +40,15 @@ class EllenTVIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- video_url = self._html_search_meta('VideoURL', webpage, 'url')
+
+ video_url = self._html_search_meta('VideoURL', webpage, 'url', fatal=True)
title = self._og_search_title(webpage, default=None) or self._search_regex(
r'pageName\s*=\s*"([^"]+)"', webpage, 'title')
description = self._html_search_meta(
'description', webpage, 'description') or self._og_search_description(webpage)
timestamp = parse_iso8601(self._search_regex(
r'<span class="publish-date"><time datetime="([^"]+)">',
- webpage, 'timestamp'))
+ webpage, 'timestamp', fatal=False))
return {
'id': video_id,
diff --git a/youtube_dl/extractor/eroprofile.py b/youtube_dl/extractor/eroprofile.py
index 79e2fbd39..0cbca90b0 100644
--- a/youtube_dl/extractor/eroprofile.py
+++ b/youtube_dl/extractor/eroprofile.py
@@ -1,11 +1,17 @@
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
+from ..compat import compat_urllib_parse
+from ..utils import ExtractorError
class EroProfileIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?eroprofile\.com/m/videos/view/(?P<id>[^/]+)'
- _TEST = {
+ _LOGIN_URL = 'http://www.eroprofile.com/auth/auth.php?'
+ _NETRC_MACHINE = 'eroprofile'
+ _TESTS = [{
'url': 'http://www.eroprofile.com/m/videos/view/sexy-babe-softcore',
'md5': 'c26f351332edf23e1ea28ce9ec9de32f',
'info_dict': {
@@ -16,13 +22,55 @@ class EroProfileIE(InfoExtractor):
'thumbnail': 're:https?://.*\.jpg',
'age_limit': 18,
}
- }
+ }, {
+ 'url': 'http://www.eroprofile.com/m/videos/view/Try-It-On-Pee_cut_2-wmv-4shared-com-file-sharing-download-movie-file',
+ 'md5': '1baa9602ede46ce904c431f5418d8916',
+ 'info_dict': {
+ 'id': '1133519',
+ 'ext': 'm4v',
+ 'title': 'Try It On Pee_cut_2.wmv - 4shared.com - file sharing - download movie file',
+ 'thumbnail': 're:https?://.*\.jpg',
+ 'age_limit': 18,
+ },
+ 'skip': 'Requires login',
+ }]
+
+ def _login(self):
+ (username, password) = self._get_login_info()
+ if username is None:
+ return
+
+ query = compat_urllib_parse.urlencode({
+ 'username': username,
+ 'password': password,
+ 'url': 'http://www.eroprofile.com/',
+ })
+ login_url = self._LOGIN_URL + query
+ login_page = self._download_webpage(login_url, None, False)
+
+ m = re.search(r'Your username or password was incorrect\.', login_page)
+ if m:
+ raise ExtractorError(
+ 'Wrong username and/or password.', expected=True)
+
+ self.report_login()
+ redirect_url = self._search_regex(
+ r'<script[^>]+?src="([^"]+)"', login_page, 'login redirect url')
+ self._download_webpage(redirect_url, None, False)
+
+ def _real_initialize(self):
+ self._login()
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
+ m = re.search(r'You must be logged in to view this video\.', webpage)
+ if m:
+ raise ExtractorError(
+ 'This video requires login. Please specify a username and password and try again.', expected=True)
+
video_id = self._search_regex(
[r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'],
webpage, 'video id', default=None)
diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py
index 170d68075..edf555b29 100644
--- a/youtube_dl/extractor/francetv.py
+++ b/youtube_dl/extractor/francetv.py
@@ -14,7 +14,9 @@ from ..utils import (
clean_html,
ExtractorError,
int_or_none,
+ float_or_none,
parse_duration,
+ determine_ext,
)
@@ -50,7 +52,8 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
if not video_url:
continue
format_id = video['format']
- if video_url.endswith('.f4m'):
+ ext = determine_ext(video_url)
+ if ext == 'f4m':
if georestricted:
# See https://github.com/rg3/youtube-dl/issues/3963
# m3u8 urls work fine
@@ -60,12 +63,9 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
'http://hdfauth.francetv.fr/esi/urltokengen2.html?url=%s' % video_url_parsed.path,
video_id, 'Downloading f4m manifest token', fatal=False)
if f4m_url:
- f4m_formats = self._extract_f4m_formats(f4m_url, video_id)
- for f4m_format in f4m_formats:
- f4m_format['preference'] = 1
- formats.extend(f4m_formats)
- elif video_url.endswith('.m3u8'):
- formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4'))
+ formats.extend(self._extract_f4m_formats(f4m_url, video_id, 1, format_id))
+ elif ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4', m3u8_id=format_id))
elif video_url.startswith('rtmp'):
formats.append({
'url': video_url,
@@ -86,7 +86,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
'title': info['titre'],
'description': clean_html(info['synopsis']),
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']),
- 'duration': parse_duration(info['duree']),
+ 'duration': float_or_none(info.get('real_duration'), 1000) or parse_duration(info['duree']),
'timestamp': int_or_none(info['diffusion']['timestamp']),
'formats': formats,
}
@@ -260,22 +260,28 @@ class CultureboxIE(FranceTVBaseInfoExtractor):
_VALID_URL = r'https?://(?:m\.)?culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)'
_TEST = {
- 'url': 'http://culturebox.francetvinfo.fr/festivals/dans-les-jardins-de-william-christie/dans-les-jardins-de-william-christie-le-camus-162553',
- 'md5': '5ad6dec1ffb2a3fbcb20cc4b744be8d6',
+ 'url': 'http://culturebox.francetvinfo.fr/live/musique/musique-classique/le-livre-vermeil-de-montserrat-a-la-cathedrale-delne-214511',
+ 'md5': '9b88dc156781c4dbebd4c3e066e0b1d6',
'info_dict': {
- 'id': 'EV_22853',
+ 'id': 'EV_50111',
'ext': 'flv',
- 'title': 'Dans les jardins de William Christie - Le Camus',
- 'description': 'md5:4710c82315c40f0c865ca8b9a68b5299',
- 'upload_date': '20140829',
- 'timestamp': 1409317200,
+ 'title': "Le Livre Vermeil de Montserrat à la Cathédrale d'Elne",
+ 'description': 'md5:f8a4ad202e8fe533e2c493cc12e739d9',
+ 'upload_date': '20150320',
+ 'timestamp': 1426892400,
+ 'duration': 2760.9,
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
name = mobj.group('name')
+
webpage = self._download_webpage(url, name)
+
+ if ">Ce live n'est plus disponible en replay<" in webpage:
+ raise ExtractorError('Video %s is not available' % name, expected=True)
+
video_id, catalogue = self._search_regex(
r'"http://videos\.francetv\.fr/video/([^@]+@[^"]+)"', webpage, 'video id').split('@')
diff --git a/youtube_dl/extractor/gamersyde.py b/youtube_dl/extractor/gamersyde.py
new file mode 100644
index 000000000..d545e01bb
--- /dev/null
+++ b/youtube_dl/extractor/gamersyde.py
@@ -0,0 +1,70 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ js_to_json,
+ parse_duration,
+ remove_start,
+)
+
+
+class GamersydeIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?gamersyde\.com/hqstream_(?P<display_id>[\da-z_]+)-(?P<id>\d+)_[a-z]{2}\.html'
+ _TEST = {
+ 'url': 'http://www.gamersyde.com/hqstream_bloodborne_birth_of_a_hero-34371_en.html',
+ 'md5': 'f38d400d32f19724570040d5ce3a505f',
+ 'info_dict': {
+ 'id': '34371',
+ 'ext': 'mp4',
+ 'duration': 372,
+ 'title': 'Bloodborne - Birth of a hero',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id')
+
+ webpage = self._download_webpage(url, display_id)
+
+ playlist = self._parse_json(
+ self._search_regex(
+ r'(?s)playlist: \[({.+?})\]\s*}\);', webpage, 'files'),
+ display_id, transform_source=js_to_json)
+
+ formats = []
+ for source in playlist['sources']:
+ video_url = source.get('file')
+ if not video_url:
+ continue
+ format_id = source.get('label')
+ f = {
+ 'url': video_url,
+ 'format_id': format_id,
+ }
+ m = re.search(r'^(?P<height>\d+)[pP](?P<fps>\d+)fps', format_id)
+ if m:
+ f.update({
+ 'height': int(m.group('height')),
+ 'fps': int(m.group('fps')),
+ })
+ formats.append(f)
+ self._sort_formats(formats)
+
+ title = remove_start(playlist['title'], '%s - ' % video_id)
+ thumbnail = playlist.get('image')
+ duration = parse_duration(self._search_regex(
+ r'Length:</label>([^<]+)<', webpage, 'duration', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 8716e4503..6c212efac 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -29,10 +29,12 @@ from ..utils import (
xpath_text,
)
from .brightcove import BrightcoveIE
+from .nbc import NBCSportsVPlayerIE
from .ooyala import OoyalaIE
from .rutv import RUTVIE
from .smotri import SmotriIE
from .condenast import CondeNastIE
+from .udn import UDNEmbedIE
class GenericIE(InfoExtractor):
@@ -527,6 +529,17 @@ class GenericIE(InfoExtractor):
},
'add_ie': ['Viddler'],
},
+ # Libsyn embed
+ {
+ 'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
+ 'info_dict': {
+ 'id': '3377616',
+ 'ext': 'mp3',
+ 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
+ 'description': 'md5:601cb790edd05908957dae8aaa866465',
+ 'upload_date': '20150220',
+ },
+ },
# jwplayer YouTube
{
'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
@@ -609,6 +622,16 @@ class GenericIE(InfoExtractor):
'age_limit': 0,
},
},
+ # 5min embed
+ {
+ 'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
+ 'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
+ 'info_dict': {
+ 'id': '518726732',
+ 'ext': 'mp4',
+ 'title': 'Facebook Creates "On This Day" | Crunch Report',
+ },
+ },
# RSS feed with enclosure
{
'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
@@ -618,6 +641,27 @@ class GenericIE(InfoExtractor):
'upload_date': '20150228',
'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
}
+ },
+ # NBC Sports vplayer embed
+ {
+ 'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
+ 'info_dict': {
+ 'id': 'ln7x1qSThw4k',
+ 'ext': 'flv',
+ 'title': "PFT Live: New leader in the 'new-look' defense",
+ 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
+ },
+ },
+ # UDN embed
+ {
+ 'url': 'http://www.udn.com/news/story/7314/822787',
+ 'md5': 'de06b4c90b042c128395a88f0384817e',
+ 'info_dict': {
+ 'id': '300040',
+ 'ext': 'mp4',
+ 'title': '生物老師男變女 全校挺"做自己"',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ }
}
]
@@ -1013,6 +1057,12 @@ class GenericIE(InfoExtractor):
if mobj is not None:
return self.url_result(mobj.group('url'))
+ # Look for Libsyn player
+ mobj = re.search(
+ r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'))
+
# Look for Ooyala videos
mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
@@ -1219,6 +1269,24 @@ class GenericIE(InfoExtractor):
if mobj is not None:
return self.url_result(mobj.group('url'), 'Pladform')
+ # Look for 5min embeds
+ mobj = re.search(
+ r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
+ if mobj is not None:
+ return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
+
+ # Look for NBC Sports VPlayer embeds
+ nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
+ if nbc_sports_url:
+ return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
+
+ # Look for UDN embeds
+ mobj = re.search(
+ r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
+ if mobj is not None:
+ return self.url_result(
+ compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
+
def check_video(vurl):
if YoutubeIE.suitable(vurl):
return True
diff --git a/youtube_dl/extractor/hitbox.py b/youtube_dl/extractor/hitbox.py
index 84bd7c080..d606429ca 100644
--- a/youtube_dl/extractor/hitbox.py
+++ b/youtube_dl/extractor/hitbox.py
@@ -10,6 +10,7 @@ from ..utils import (
float_or_none,
int_or_none,
compat_str,
+ determine_ext,
)
@@ -147,12 +148,27 @@ class HitboxLiveIE(HitboxIE):
servers.append(base_url)
for stream in cdn.get('bitrates'):
label = stream.get('label')
- if label != 'Auto':
+ if label == 'Auto':
+ continue
+ stream_url = stream.get('url')
+ if not stream_url:
+ continue
+ bitrate = int_or_none(stream.get('bitrate'))
+ if stream.get('provider') == 'hls' or determine_ext(stream_url) == 'm3u8':
+ if not stream_url.startswith('http'):
+ continue
formats.append({
- 'url': '%s/%s' % (base_url, stream.get('url')),
+ 'url': stream_url,
'ext': 'mp4',
- 'vbr': stream.get('bitrate'),
- 'resolution': label,
+ 'tbr': bitrate,
+ 'format_note': label,
+ 'rtmp_live': True,
+ })
+ else:
+ formats.append({
+ 'url': '%s/%s' % (base_url, stream_url),
+ 'ext': 'mp4',
+ 'tbr': bitrate,
'rtmp_live': True,
'format_note': host,
'page_url': url,
diff --git a/youtube_dl/extractor/libsyn.py b/youtube_dl/extractor/libsyn.py
new file mode 100644
index 000000000..9ab1416f5
--- /dev/null
+++ b/youtube_dl/extractor/libsyn.py
@@ -0,0 +1,59 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import unified_strdate
+
+
+class LibsynIE(InfoExtractor):
+ _VALID_URL = r'https?://html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+)'
+
+ _TEST = {
+ 'url': 'http://html5-player.libsyn.com/embed/episode/id/3377616/',
+ 'md5': '443360ee1b58007bc3dcf09b41d093bb',
+ 'info_dict': {
+ 'id': '3377616',
+ 'ext': 'mp3',
+ 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
+ 'description': 'md5:601cb790edd05908957dae8aaa866465',
+ 'upload_date': '20150220',
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ formats = [{
+ 'url': media_url,
+ } for media_url in set(re.findall('var\s+mediaURL(?:Libsyn)?\s*=\s*"([^"]+)"', webpage))]
+
+ podcast_title = self._search_regex(
+ r'<h2>([^<]+)</h2>', webpage, 'title')
+ episode_title = self._search_regex(
+ r'<h3>([^<]+)</h3>', webpage, 'title', default=None)
+
+ title = '%s - %s' % (podcast_title, episode_title) if podcast_title else episode_title
+
+ description = self._html_search_regex(
+ r'<div id="info_text_body">(.+?)</div>', webpage,
+ 'description', fatal=False)
+
+ thumbnail = self._search_regex(
+ r'<img[^>]+class="info-show-icon"[^>]+src="([^"]+)"',
+ webpage, 'thumbnail', fatal=False)
+
+ release_date = unified_strdate(self._search_regex(
+ r'<div class="release_date">Released: ([^<]+)<', webpage, 'release date', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'upload_date': release_date,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py
index 2467f8bdd..ec309dadd 100644
--- a/youtube_dl/extractor/livestream.py
+++ b/youtube_dl/extractor/livestream.py
@@ -21,7 +21,7 @@ from ..utils import (
class LivestreamIE(InfoExtractor):
IE_NAME = 'livestream'
- _VALID_URL = r'https?://new\.livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>[0-9]+)(?:/player)?)?/?(?:$|[?#])'
+ _VALID_URL = r'https?://(?:new\.)?livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>[0-9]+)(?:/player)?)?/?(?:$|[?#])'
_TESTS = [{
'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
'md5': '53274c76ba7754fb0e8d072716f2292b',
@@ -51,6 +51,9 @@ class LivestreamIE(InfoExtractor):
}, {
'url': 'https://new.livestream.com/accounts/362/events/3557232/videos/67864563/player?autoPlay=false&height=360&mute=false&width=640',
'only_matching': True,
+ }, {
+ 'url': 'http://livestream.com/bsww/concacafbeachsoccercampeonato2015',
+ 'only_matching': True,
}]
def _parse_smil(self, video_id, smil_url):
diff --git a/youtube_dl/extractor/miomio.py b/youtube_dl/extractor/miomio.py
new file mode 100644
index 000000000..cc3f27194
--- /dev/null
+++ b/youtube_dl/extractor/miomio.py
@@ -0,0 +1,93 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import random
+
+from .common import InfoExtractor
+from ..utils import (
+ xpath_text,
+ int_or_none,
+)
+
+
+class MioMioIE(InfoExtractor):
+ IE_NAME = 'miomio.tv'
+ _VALID_URL = r'https?://(?:www\.)?miomio\.tv/watch/cc(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://www.miomio.tv/watch/cc179734/',
+ 'md5': '48de02137d0739c15b440a224ad364b9',
+ 'info_dict': {
+ 'id': '179734',
+ 'ext': 'flv',
+ 'title': '手绘动漫鬼泣但丁全程画法',
+ 'duration': 354,
+ },
+ }, {
+ 'url': 'http://www.miomio.tv/watch/cc184024/',
+ 'info_dict': {
+ 'id': '43729',
+ 'title': '《动漫同人插画绘制》',
+ },
+ 'playlist_mincount': 86,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_meta(
+ 'description', webpage, 'title', fatal=True)
+
+ mioplayer_path = self._search_regex(
+ r'src="(/mioplayer/[^"]+)"', webpage, 'ref_path')
+
+ xml_config = self._search_regex(
+ r'flashvars="type=sina&amp;(.+?)&amp;',
+ webpage, 'xml config')
+
+ # skipping the following page causes lags and eventually connection drop-outs
+ self._request_webpage(
+ 'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/xml.php?id=%s&r=%s' % (id, random.randint(100, 999)),
+ video_id)
+
+ # the following xml contains the actual configuration information on the video file(s)
+ vid_config = self._download_xml(
+ 'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/sina.php?{0}'.format(xml_config),
+ video_id)
+
+ http_headers = {
+ 'Referer': 'http://www.miomio.tv%s' % mioplayer_path,
+ }
+
+ entries = []
+ for f in vid_config.findall('./durl'):
+ segment_url = xpath_text(f, 'url', 'video url')
+ if not segment_url:
+ continue
+ order = xpath_text(f, 'order', 'order')
+ segment_id = video_id
+ segment_title = title
+ if order:
+ segment_id += '-%s' % order
+ segment_title += ' part %s' % order
+ entries.append({
+ 'id': segment_id,
+ 'url': segment_url,
+ 'title': segment_title,
+ 'duration': int_or_none(xpath_text(f, 'length', 'duration'), 1000),
+ 'http_headers': http_headers,
+ })
+
+ if len(entries) == 1:
+ segment = entries[0]
+ segment['id'] = video_id
+ segment['title'] = title
+ return segment
+
+ return {
+ '_type': 'multi_video',
+ 'id': video_id,
+ 'entries': entries,
+ 'title': title,
+ 'http_headers': http_headers,
+ }
diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py
index 21aea0c55..84f291558 100644
--- a/youtube_dl/extractor/mixcloud.py
+++ b/youtube_dl/extractor/mixcloud.py
@@ -97,7 +97,7 @@ class MixcloudIE(InfoExtractor):
r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
description = self._og_search_description(webpage)
like_count = str_to_int(self._search_regex(
- r'\bbutton-favorite\b.+m-ajax-toggle-count="([^"]+)"',
+ r'\bbutton-favorite\b[^>]+m-ajax-toggle-count="([^"]+)"',
webpage, 'like count', fatal=False))
view_count = str_to_int(self._search_regex(
[r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
diff --git a/youtube_dl/extractor/mlb.py b/youtube_dl/extractor/mlb.py
index 1a241aca7..e369551c2 100644
--- a/youtube_dl/extractor/mlb.py
+++ b/youtube_dl/extractor/mlb.py
@@ -10,7 +10,7 @@ from ..utils import (
class MLBIE(InfoExtractor):
- _VALID_URL = r'https?://m(?:lb)?\.mlb\.com/(?:(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v|(?:shared/video/embed/embed\.html|[^/]+/video/play\.jsp)\?.*?\bcontent_id=)(?P<id>n?\d+)'
+ _VALID_URL = r'https?://m(?:lb)?\.(?:[\da-z_-]+\.)?mlb\.com/(?:(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v|(?:shared/video/embed/embed\.html|[^/]+/video/play\.jsp)\?.*?\bcontent_id=)(?P<id>n?\d+)'
_TESTS = [
{
'url': 'http://m.mlb.com/sea/video/topic/51231442/v34698933/nymsea-ackley-robs-a-home-run-with-an-amazing-catch/?c_id=sea',
@@ -80,6 +80,10 @@ class MLBIE(InfoExtractor):
'url': 'http://mlb.mlb.com/es/video/play.jsp?content_id=36599553',
'only_matching': True,
},
+ {
+ 'url': 'http://m.cardinals.mlb.com/stl/video/v51175783/atlstl-piscotty-makes-great-sliding-catch-on-line/?partnerId=as_mlb_20150321_42500876&adbid=579409712979910656&adbpl=tw&adbpr=52847728',
+ 'only_matching': True,
+ }
]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py
index 3645d3033..ecd0ac8b1 100644
--- a/youtube_dl/extractor/nbc.py
+++ b/youtube_dl/extractor/nbc.py
@@ -14,7 +14,7 @@ from ..utils import (
class NBCIE(InfoExtractor):
- _VALID_URL = r'http://www\.nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)'
+ _VALID_URL = r'https?://www\.nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)'
_TESTS = [
{
@@ -50,6 +50,57 @@ class NBCIE(InfoExtractor):
return self.url_result(theplatform_url)
+class NBCSportsVPlayerIE(InfoExtractor):
+ _VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)'
+
+ _TESTS = [{
+ 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_share/select/9CsDKds0kvHI',
+ 'info_dict': {
+ 'id': '9CsDKds0kvHI',
+ 'ext': 'flv',
+ 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
+ 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
+ }
+ }, {
+ 'url': 'http://vplayer.nbcsports.com/p/BxmELC/nbc_embedshare/select/_hqLjQ95yx8Z',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_url(webpage):
+ iframe_m = re.search(
+ r'<iframe[^>]+src="(?P<url>https?://vplayer\.nbcsports\.com/[^"]+)"', webpage)
+ if iframe_m:
+ return iframe_m.group('url')
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ theplatform_url = self._og_search_video_url(webpage)
+ return self.url_result(theplatform_url, 'ThePlatform')
+
+
+class NBCSportsIE(InfoExtractor):
+ # Does not include https becuase its certificate is invalid
+ _VALID_URL = r'http://www\.nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
+
+ _TEST = {
+ 'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke',
+ 'info_dict': {
+ 'id': 'PHJSaFWbrTY9',
+ 'ext': 'flv',
+ 'title': 'Tom Izzo, Michigan St. has \'so much respect\' for Duke',
+ 'description': 'md5:ecb459c9d59e0766ac9c7d5d0eda8113',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ return self.url_result(
+ NBCSportsVPlayerIE._extract_url(webpage), 'NBCSportsVPlayer')
+
+
class NBCNewsIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/
(?:video/.+?/(?P<id>\d+)|
diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index 557dffa46..5d8448571 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -231,7 +231,10 @@ class NPOLiveIE(NPOBaseIE):
stream_url = self._download_json(
stream_info['stream'], display_id,
'Downloading %s URL' % stream_type,
- transform_source=strip_jsonp)
+ 'Unable to download %s URL' % stream_type,
+ transform_source=strip_jsonp, fatal=False)
+ if not stream_url:
+ continue
if stream_type == 'hds':
f4m_formats = self._extract_f4m_formats(stream_url, display_id)
# f4m downloader downloads only piece of live stream
diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py
index bff36f9d3..e91d3a248 100644
--- a/youtube_dl/extractor/nrk.py
+++ b/youtube_dl/extractor/nrk.py
@@ -14,46 +14,48 @@ from ..utils import (
class NRKIE(InfoExtractor):
- _VALID_URL = r'http://(?:www\.)?nrk\.no/(?:video|lyd)/[^/]+/(?P<id>[\dA-F]{16})'
+ _VALID_URL = r'(?:nrk:|http://(?:www\.)?nrk\.no/video/PS\*)(?P<id>\d+)'
_TESTS = [
{
- 'url': 'http://www.nrk.no/video/dompap_og_andre_fugler_i_piip_show/D0FA54B5C8B6CE59/emne/piipshow/',
- 'md5': 'a6eac35052f3b242bb6bb7f43aed5886',
+ 'url': 'http://www.nrk.no/video/PS*150533',
+ 'md5': 'bccd850baebefe23b56d708a113229c2',
'info_dict': {
'id': '150533',
'ext': 'flv',
'title': 'Dompap og andre fugler i Piip-Show',
- 'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f'
+ 'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
+ 'duration': 263,
}
},
{
- 'url': 'http://www.nrk.no/lyd/lyd_av_oppleser_for_blinde/AEFDDD5473BA0198/',
- 'md5': '3471f2a51718195164e88f46bf427668',
+ 'url': 'http://www.nrk.no/video/PS*154915',
+ 'md5': '0b1493ba1aae7d9579a5ad5531bc395a',
'info_dict': {
'id': '154915',
'ext': 'flv',
'title': 'Slik høres internett ut når du er blind',
'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
+ 'duration': 20,
}
},
]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
-
- page = self._download_webpage(url, video_id)
-
- video_id = self._html_search_regex(r'<div class="nrk-video" data-nrk-id="(\d+)">', page, 'video id')
+ video_id = self._match_id(url)
data = self._download_json(
- 'http://v7.psapi.nrk.no/mediaelement/%s' % video_id, video_id, 'Downloading media JSON')
+ 'http://v8.psapi.nrk.no/mediaelement/%s' % video_id,
+ video_id, 'Downloading media JSON')
if data['usageRights']['isGeoBlocked']:
- raise ExtractorError('NRK har ikke rettig-heter til å vise dette programmet utenfor Norge', expected=True)
+ raise ExtractorError(
+ 'NRK har ikke rettig-heter til å vise dette programmet utenfor Norge',
+ expected=True)
+
+ video_url = data['mediaUrl'] + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81'
- video_url = data['mediaUrl'] + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124'
+ duration = parse_duration(data.get('duration'))
images = data.get('images')
if images:
@@ -69,10 +71,51 @@ class NRKIE(InfoExtractor):
'ext': 'flv',
'title': data['title'],
'description': data['description'],
+ 'duration': duration,
'thumbnail': thumbnail,
}
+class NRKPlaylistIE(InfoExtractor):
+ _VALID_URL = r'http://(?:www\.)?nrk\.no/(?!video)(?:[^/]+/)+(?P<id>[^/]+)'
+
+ _TESTS = [{
+ 'url': 'http://www.nrk.no/troms/gjenopplev-den-historiske-solformorkelsen-1.12270763',
+ 'info_dict': {
+ 'id': 'gjenopplev-den-historiske-solformorkelsen-1.12270763',
+ 'title': 'Gjenopplev den historiske solformørkelsen',
+ 'description': 'md5:c2df8ea3bac5654a26fc2834a542feed',
+ },
+ 'playlist_count': 2,
+ }, {
+ 'url': 'http://www.nrk.no/kultur/bok/rivertonprisen-til-karin-fossum-1.12266449',
+ 'info_dict': {
+ 'id': 'rivertonprisen-til-karin-fossum-1.12266449',
+ 'title': 'Rivertonprisen til Karin Fossum',
+ 'description': 'Første kvinne på 15 år til å vinne krimlitteraturprisen.',
+ },
+ 'playlist_count': 5,
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ entries = [
+ self.url_result('nrk:%s' % video_id, 'NRK')
+ for video_id in re.findall(
+ r'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="([^"]+)"',
+ webpage)
+ ]
+
+ playlist_title = self._og_search_title(webpage)
+ playlist_description = self._og_search_description(webpage)
+
+ return self.playlist_result(
+ entries, playlist_id, playlist_title, playlist_description)
+
+
class NRKTVIE(InfoExtractor):
_VALID_URL = r'(?P<baseurl>http://tv\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
diff --git a/youtube_dl/extractor/phoenix.py b/youtube_dl/extractor/phoenix.py
index a20672c0c..46cebc0d7 100644
--- a/youtube_dl/extractor/phoenix.py
+++ b/youtube_dl/extractor/phoenix.py
@@ -5,19 +5,33 @@ from .zdf import extract_from_xml_url
class PhoenixIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?phoenix\.de/content/(?P<id>[0-9]+)'
- _TEST = {
- 'url': 'http://www.phoenix.de/content/884301',
- 'md5': 'ed249f045256150c92e72dbb70eadec6',
- 'info_dict': {
- 'id': '884301',
- 'ext': 'mp4',
- 'title': 'Michael Krons mit Hans-Werner Sinn',
- 'description': 'Im Dialog - Sa. 25.10.14, 00.00 - 00.35 Uhr',
- 'upload_date': '20141025',
- 'uploader': 'Im Dialog',
- }
- }
+ _VALID_URL = r'''(?x)https?://(?:www\.)?phoenix\.de/content/
+ (?:
+ phoenix/die_sendungen/(?:[^/]+/)?
+ )?
+ (?P<id>[0-9]+)'''
+ _TESTS = [
+ {
+ 'url': 'http://www.phoenix.de/content/884301',
+ 'md5': 'ed249f045256150c92e72dbb70eadec6',
+ 'info_dict': {
+ 'id': '884301',
+ 'ext': 'mp4',
+ 'title': 'Michael Krons mit Hans-Werner Sinn',
+ 'description': 'Im Dialog - Sa. 25.10.14, 00.00 - 00.35 Uhr',
+ 'upload_date': '20141025',
+ 'uploader': 'Im Dialog',
+ }
+ },
+ {
+ 'url': 'http://www.phoenix.de/content/phoenix/die_sendungen/869815',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://www.phoenix.de/content/phoenix/die_sendungen/diskussionen/928234',
+ 'only_matching': True,
+ },
+ ]
def _real_extract(self, url):
video_id = self._match_id(url)
diff --git a/youtube_dl/extractor/playfm.py b/youtube_dl/extractor/playfm.py
index 9576aed0e..e766ccca3 100644
--- a/youtube_dl/extractor/playfm.py
+++ b/youtube_dl/extractor/playfm.py
@@ -4,85 +4,72 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse,
- compat_urllib_request,
-)
+from ..compat import compat_str
from ..utils import (
ExtractorError,
- float_or_none,
int_or_none,
- str_to_int,
+ parse_iso8601,
)
class PlayFMIE(InfoExtractor):
IE_NAME = 'play.fm'
- _VALID_URL = r'https?://(?:www\.)?play\.fm/[^?#]*(?P<upload_date>[0-9]{8})(?P<id>[0-9]{6})(?:$|[?#])'
+ _VALID_URL = r'https?://(?:www\.)?play\.fm/(?P<slug>(?:[^/]+/)+(?P<id>[^/]+))/?(?:$|[?#])'
_TEST = {
- 'url': 'http://www.play.fm/recording/leipzigelectronicmusicbatofarparis_fr20140712137220',
+ 'url': 'https://www.play.fm/dan-drastic/sven-tasnadi-leipzig-electronic-music-batofar-paris-fr-2014-07-12',
'md5': 'c505f8307825a245d0c7ad1850001f22',
'info_dict': {
- 'id': '137220',
+ 'id': '71276',
'ext': 'mp3',
- 'title': 'LEIPZIG ELECTRONIC MUSIC @ Batofar (Paris,FR) - 2014-07-12',
- 'uploader': 'Sven Tasnadi',
- 'uploader_id': 'sventasnadi',
- 'duration': 5627.428,
- 'upload_date': '20140712',
+ 'title': 'Sven Tasnadi - LEIPZIG ELECTRONIC MUSIC @ Batofar (Paris,FR) - 2014-07-12',
+ 'description': '',
+ 'duration': 5627,
+ 'timestamp': 1406033781,
+ 'upload_date': '20140722',
+ 'uploader': 'Dan Drastic',
+ 'uploader_id': '71170',
'view_count': int,
'comment_count': int,
- 'thumbnail': 're:^https?://.*\.jpg$',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
- upload_date = mobj.group('upload_date')
-
- rec_data = compat_urllib_parse.urlencode({'rec_id': video_id})
- req = compat_urllib_request.Request(
- 'http://www.play.fm/flexRead/recording', data=rec_data)
- req.add_header('Content-Type', 'application/x-www-form-urlencoded')
- rec_doc = self._download_xml(req, video_id)
+ slug = mobj.group('slug')
- error_node = rec_doc.find('./error')
- if error_node is not None:
- raise ExtractorError('An error occured: %s (code %s)' % (
- error_node.text, rec_doc.find('./status').text))
+ recordings = self._download_json(
+ 'http://v2api.play.fm/recordings/slug/%s' % slug, video_id)
- recording = rec_doc.find('./recording')
- title = recording.find('./title').text
- view_count = str_to_int(recording.find('./stats/playcount').text)
- comment_count = str_to_int(recording.find('./stats/comments').text)
- duration = float_or_none(recording.find('./duration').text, scale=1000)
- thumbnail = recording.find('./image').text
+ error = recordings.get('error')
+ if isinstance(error, dict):
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, error.get('message')),
+ expected=True)
- artist = recording.find('./artists/artist')
- uploader = artist.find('./name').text
- uploader_id = artist.find('./slug').text
-
- video_url = '%s//%s/%s/%s/offset/0/sh/%s/rec/%s/jingle/%s/loc/%s' % (
- 'http:', recording.find('./url').text,
- recording.find('./_class').text, recording.find('./file_id').text,
- rec_doc.find('./uuid').text, video_id,
- rec_doc.find('./jingle/file_id').text,
- 'http%3A%2F%2Fwww.play.fm%2Fplayer',
- )
+ audio_url = recordings['audio']
+ video_id = compat_str(recordings.get('id') or video_id)
+ title = recordings['title']
+ description = recordings.get('description')
+ duration = int_or_none(recordings.get('recordingDuration'))
+ timestamp = parse_iso8601(recordings.get('created_at'))
+ uploader = recordings.get('page', {}).get('title')
+ uploader_id = compat_str(recordings.get('page', {}).get('id'))
+ view_count = int_or_none(recordings.get('playCount'))
+ comment_count = int_or_none(recordings.get('commentCount'))
+ categories = [tag['name'] for tag in recordings.get('tags', []) if tag.get('name')]
return {
'id': video_id,
- 'url': video_url,
- 'ext': 'mp3',
- 'filesize': int_or_none(recording.find('./size').text),
+ 'url': audio_url,
'title': title,
- 'upload_date': upload_date,
- 'view_count': view_count,
- 'comment_count': comment_count,
+ 'description': description,
'duration': duration,
- 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
'uploader': uploader,
'uploader_id': uploader_id,
+ 'view_count': view_count,
+ 'comment_count': comment_count,
+ 'categories': categories,
}
diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py
index 3a27e3789..0c8b731cf 100644
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@@ -33,10 +33,8 @@ class PornHubIE(InfoExtractor):
}
def _extract_count(self, pattern, webpage, name):
- count = self._html_search_regex(pattern, webpage, '%s count' % name, fatal=False)
- if count:
- count = str_to_int(count)
- return count
+ return str_to_int(self._search_regex(
+ pattern, webpage, '%s count' % name, fatal=False))
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -62,11 +60,14 @@ class PornHubIE(InfoExtractor):
if thumbnail:
thumbnail = compat_urllib_parse.unquote(thumbnail)
- view_count = self._extract_count(r'<span class="count">([\d,\.]+)</span> views', webpage, 'view')
- like_count = self._extract_count(r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like')
- dislike_count = self._extract_count(r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike')
+ view_count = self._extract_count(
+ r'<span class="count">([\d,\.]+)</span> views', webpage, 'view')
+ like_count = self._extract_count(
+ r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like')
+ dislike_count = self._extract_count(
+ r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike')
comment_count = self._extract_count(
- r'All comments \(<var class="videoCommentCount">([\d,\.]+)</var>', webpage, 'comment')
+ r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
video_urls = list(map(compat_urllib_parse.unquote, re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
if webpage.find('"encrypted":true') != -1:
diff --git a/youtube_dl/extractor/pornovoisines.py b/youtube_dl/extractor/pornovoisines.py
new file mode 100644
index 000000000..9688ed948
--- /dev/null
+++ b/youtube_dl/extractor/pornovoisines.py
@@ -0,0 +1,96 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+import random
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ float_or_none,
+ unified_strdate,
+)
+
+
+class PornoVoisinesIE(InfoExtractor):
+ _VALID_URL = r'http://(?:www\.)?pornovoisines\.com/showvideo/(?P<id>\d+)/(?P<display_id>[^/]+)'
+
+ _VIDEO_URL_TEMPLATE = 'http://stream%d.pornovoisines.com' \
+ '/static/media/video/transcoded/%s-640x360-1000-trscded.mp4'
+
+ _SERVER_NUMBERS = (1, 2)
+
+ _TEST = {
+ 'url': 'http://www.pornovoisines.com/showvideo/1285/recherche-appartement/',
+ 'md5': '5ac670803bc12e9e7f9f662ce64cf1d1',
+ 'info_dict': {
+ 'id': '1285',
+ 'display_id': 'recherche-appartement',
+ 'ext': 'mp4',
+ 'title': 'Recherche appartement',
+ 'description': 'md5:819ea0b785e2a04667a1a01cdc89594e',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'upload_date': '20140925',
+ 'duration': 120,
+ 'view_count': int,
+ 'average_rating': float,
+ 'categories': ['Débutante', 'Scénario', 'Sodomie'],
+ 'age_limit': 18,
+ }
+ }
+
+ @classmethod
+ def build_video_url(cls, num):
+ return cls._VIDEO_URL_TEMPLATE % (random.choice(cls._SERVER_NUMBERS), num)
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id')
+
+ webpage = self._download_webpage(url, video_id)
+
+ video_url = self.build_video_url(video_id)
+
+ title = self._html_search_regex(
+ r'<h1>(.+?)</h1>', webpage, 'title', flags=re.DOTALL)
+ description = self._html_search_regex(
+ r'<article id="descriptif">(.+?)</article>',
+ webpage, "description", fatal=False, flags=re.DOTALL)
+
+ thumbnail = self._search_regex(
+ r'<div id="mediaspace%s">\s*<img src="/?([^"]+)"' % video_id,
+ webpage, 'thumbnail', fatal=False)
+ if thumbnail:
+ thumbnail = 'http://www.pornovoisines.com/%s' % thumbnail
+
+ upload_date = unified_strdate(self._search_regex(
+ r'Publié le ([\d-]+)', webpage, 'upload date', fatal=False))
+ duration = int_or_none(self._search_regex(
+ 'Durée (\d+)', webpage, 'duration', fatal=False))
+ view_count = int_or_none(self._search_regex(
+ r'(\d+) vues', webpage, 'view count', fatal=False))
+ average_rating = self._search_regex(
+ r'Note : (\d+,\d+)', webpage, 'average rating', fatal=False)
+ if average_rating:
+ average_rating = float_or_none(average_rating.replace(',', '.'))
+
+ categories = self._html_search_meta(
+ 'keywords', webpage, 'categories', fatal=False)
+ if categories:
+ categories = [category.strip() for category in categories.split(',')]
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'average_rating': average_rating,
+ 'categories': categories,
+ 'age_limit': 18,
+ }
diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py
index 385681d06..7cc799664 100644
--- a/youtube_dl/extractor/prosiebensat1.py
+++ b/youtube_dl/extractor/prosiebensat1.py
@@ -10,6 +10,7 @@ from ..compat import (
)
from ..utils import (
unified_strdate,
+ int_or_none,
)
@@ -24,7 +25,7 @@ class ProSiebenSat1IE(InfoExtractor):
'info_dict': {
'id': '2104602',
'ext': 'mp4',
- 'title': 'Staffel 2, Episode 18 - Jahresrückblick',
+ 'title': 'Episode 18 - Staffel 2',
'description': 'md5:8733c81b702ea472e069bc48bb658fc1',
'upload_date': '20131231',
'duration': 5845.04,
@@ -266,6 +267,9 @@ class ProSiebenSat1IE(InfoExtractor):
urls_sources = urls_sources.values()
def fix_bitrate(bitrate):
+ bitrate = int_or_none(bitrate)
+ if not bitrate:
+ return None
return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate
for source in urls_sources:
diff --git a/youtube_dl/extractor/radiojavan.py b/youtube_dl/extractor/radiojavan.py
new file mode 100644
index 000000000..884c28420
--- /dev/null
+++ b/youtube_dl/extractor/radiojavan.py
@@ -0,0 +1,67 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import(
+ unified_strdate,
+ str_to_int,
+)
+
+
+class RadioJavanIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?radiojavan\.com/videos/video/(?P<id>[^/]+)/?'
+ _TEST = {
+ 'url': 'http://www.radiojavan.com/videos/video/chaartaar-ashoobam',
+ 'md5': 'e85208ffa3ca8b83534fca9fe19af95b',
+ 'info_dict': {
+ 'id': 'chaartaar-ashoobam',
+ 'ext': 'mp4',
+ 'title': 'Chaartaar - Ashoobam',
+ 'thumbnail': 're:^https?://.*\.jpe?g$',
+ 'upload_date': '20150215',
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ formats = [{
+ 'url': 'https://media.rdjavan.com/media/music_video/%s' % video_path,
+ 'format_id': '%sp' % height,
+ 'height': int(height),
+ } for height, video_path in re.findall(r"RJ\.video(\d+)p\s*=\s*'/?([^']+)'", webpage)]
+ self._sort_formats(formats)
+
+ title = self._og_search_title(webpage)
+ thumbnail = self._og_search_thumbnail(webpage)
+
+ upload_date = unified_strdate(self._search_regex(
+ r'class="date_added">Date added: ([^<]+)<',
+ webpage, 'upload date', fatal=False))
+
+ view_count = str_to_int(self._search_regex(
+ r'class="views">Plays: ([\d,]+)',
+ webpage, 'view count', fatal=False))
+ like_count = str_to_int(self._search_regex(
+ r'class="rating">([\d,]+) likes',
+ webpage, 'like count', fatal=False))
+ dislike_count = str_to_int(self._search_regex(
+ r'class="rating">([\d,]+) dislikes',
+ webpage, 'dislike count', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ 'view_count': view_count,
+ 'like_count': like_count,
+ 'dislike_count': dislike_count,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py
index 144e33982..1631faf29 100644
--- a/youtube_dl/extractor/rai.py
+++ b/youtube_dl/extractor/rai.py
@@ -13,7 +13,7 @@ from ..utils import (
class RaiIE(InfoExtractor):
- _VALID_URL = r'(?P<url>http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html)'
+ _VALID_URL = r'(?P<url>(?P<host>http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it))/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html)'
_TESTS = [
{
'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html',
@@ -62,34 +62,78 @@ class RaiIE(InfoExtractor):
'description': 'Edizione delle ore 20:30 ',
}
},
+ {
+ 'url': 'http://www.ilcandidato.rai.it/dl/ray/media/Il-Candidato---Primo-episodio-Le-Primarie-28e5525a-b495-45e8-a7c3-bc48ba45d2b6.html',
+ 'md5': '02b64456f7cc09f96ff14e7dd489017e',
+ 'info_dict': {
+ 'id': '28e5525a-b495-45e8-a7c3-bc48ba45d2b6',
+ 'ext': 'flv',
+ 'title': 'Il Candidato - Primo episodio: "Le Primarie"',
+ 'description': 'Primo appuntamento con "Il candidato" con Filippo Timi, alias Piero Zucca presidente!',
+ 'uploader': 'RaiTre',
+ }
+ }
]
+ def _extract_relinker_url(self, webpage):
+ return self._proto_relative_url(self._search_regex(
+ [r'name="videourl" content="([^"]+)"', r'var\s+videoURL(?:_MP4)?\s*=\s*"([^"]+)"'],
+ webpage, 'relinker url', default=None))
+
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
+ host = mobj.group('host')
- media = self._download_json('%s?json' % mobj.group('url'), video_id, 'Downloading video JSON')
+ webpage = self._download_webpage(url, video_id)
- title = media.get('name')
- description = media.get('desc')
- thumbnail = media.get('image_300') or media.get('image_medium') or media.get('image')
- duration = parse_duration(media.get('length'))
- uploader = media.get('author')
- upload_date = unified_strdate(media.get('date'))
+ relinker_url = self._extract_relinker_url(webpage)
- formats = []
+ if not relinker_url:
+ iframe_path = self._search_regex(
+ r'<iframe[^>]+src="/?(dl/[^"]+\?iframe\b[^"]*)"',
+ webpage, 'iframe')
+ webpage = self._download_webpage(
+ '%s/%s' % (host, iframe_path), video_id)
+ relinker_url = self._extract_relinker_url(webpage)
- for format_id in ['wmv', 'm3u8', 'mediaUri', 'h264']:
- media_url = media.get(format_id)
- if not media_url:
- continue
- formats.append({
+ relinker = self._download_json(
+ '%s&output=47' % relinker_url, video_id)
+
+ media_url = relinker['video'][0]
+ ct = relinker.get('ct')
+ if ct == 'f4m':
+ formats = self._extract_f4m_formats(
+ media_url + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id)
+ else:
+ formats = [{
'url': media_url,
- 'format_id': format_id,
- 'ext': 'mp4',
- })
+ 'format_id': ct,
+ }]
- subtitles = self.extract_subtitles(video_id, url)
+ json_link = self._html_search_meta(
+ 'jsonlink', webpage, 'JSON link', default=None)
+ if json_link:
+ media = self._download_json(
+ host + json_link, video_id, 'Downloading video JSON')
+ title = media.get('name')
+ description = media.get('desc')
+ thumbnail = media.get('image_300') or media.get('image_medium') or media.get('image')
+ duration = parse_duration(media.get('length'))
+ uploader = media.get('author')
+ upload_date = unified_strdate(media.get('date'))
+ else:
+ title = (self._search_regex(
+ r'var\s+videoTitolo\s*=\s*"(.+?)";',
+ webpage, 'title', default=None) or self._og_search_title(webpage)).replace('\\"', '"')
+ description = self._og_search_description(webpage)
+ thumbnail = self._og_search_thumbnail(webpage)
+ duration = None
+ uploader = self._html_search_meta('Editore', webpage, 'uploader')
+ upload_date = unified_strdate(self._html_search_meta(
+ 'item-date', webpage, 'upload date', default=None))
+
+ subtitles = self.extract_subtitles(video_id, webpage)
return {
'id': video_id,
@@ -103,8 +147,7 @@ class RaiIE(InfoExtractor):
'subtitles': subtitles,
}
- def _get_subtitles(self, video_id, url):
- webpage = self._download_webpage(url, video_id)
+ def _get_subtitles(self, video_id, webpage):
subtitles = {}
m = re.search(r'<meta name="closedcaption" content="(?P<captions>[^"]+)"', webpage)
if m:
diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py
index 846b76c81..d6054d717 100644
--- a/youtube_dl/extractor/redtube.py
+++ b/youtube_dl/extractor/redtube.py
@@ -1,17 +1,19 @@
from __future__ import unicode_literals
from .common import InfoExtractor
+from ..utils import ExtractorError
class RedTubeIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?redtube\.com/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.redtube.com/66418',
+ 'md5': '7b8c22b5e7098a3e1c09709df1126d2d',
'info_dict': {
'id': '66418',
'ext': 'mp4',
- "title": "Sucked on a toilet",
- "age_limit": 18,
+ 'title': 'Sucked on a toilet',
+ 'age_limit': 18,
}
}
@@ -19,6 +21,9 @@ class RedTubeIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
+ if any(s in webpage for s in ['video-deleted-info', '>This video has been removed']):
+ raise ExtractorError('Video %s has been removed' % video_id, expected=True)
+
video_url = self._html_search_regex(
r'<source src="(.+?)" type="video/mp4">', webpage, 'video URL')
video_title = self._html_search_regex(
diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py
index 13f071077..849300140 100644
--- a/youtube_dl/extractor/rtve.py
+++ b/youtube_dl/extractor/rtve.py
@@ -8,8 +8,10 @@ import time
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
+ ExtractorError,
float_or_none,
remove_end,
+ std_headers,
struct_unpack,
)
@@ -84,13 +86,22 @@ class RTVEALaCartaIE(InfoExtractor):
'only_matching': True,
}]
+ def _real_initialize(self):
+ user_agent_b64 = base64.b64encode(std_headers['User-Agent'].encode('utf-8')).decode('utf-8')
+ manager_info = self._download_json(
+ 'http://www.rtve.es/odin/loki/' + user_agent_b64,
+ None, 'Fetching manager info')
+ self._manager = manager_info['manager']
+
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
info = self._download_json(
'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
video_id)['page']['items'][0]
- png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % video_id
+ if info['state'] == 'DESPU':
+ raise ExtractorError('The video is no longer available', expected=True)
+ png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id)
png = self._download_webpage(png_url, video_id, 'Downloading url information')
video_url = _decrypt_url(png)
if not video_url.endswith('.f4m'):
diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py
new file mode 100644
index 000000000..10251f29e
--- /dev/null
+++ b/youtube_dl/extractor/safari.py
@@ -0,0 +1,157 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .brightcove import BrightcoveIE
+
+from ..compat import (
+ compat_urllib_parse,
+ compat_urllib_request,
+)
+from ..utils import (
+ ExtractorError,
+ smuggle_url,
+ std_headers,
+)
+
+
+class SafariBaseIE(InfoExtractor):
+ _LOGIN_URL = 'https://www.safaribooksonline.com/accounts/login/'
+ _SUCCESSFUL_LOGIN_REGEX = r'<a href="/accounts/logout/"[^>]*>Sign Out</a>'
+ _ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to supply credentials for safaribooksonline.com'
+ _NETRC_MACHINE = 'safari'
+
+ _API_BASE = 'https://www.safaribooksonline.com/api/v1/book'
+ _API_FORMAT = 'json'
+
+ LOGGED_IN = False
+
+ def _real_initialize(self):
+ # We only need to log in once for courses or individual videos
+ if not self.LOGGED_IN:
+ self._login()
+ SafariBaseIE.LOGGED_IN = True
+
+ def _login(self):
+ (username, password) = self._get_login_info()
+ if username is None:
+ raise ExtractorError(
+ self._ACCOUNT_CREDENTIALS_HINT,
+ expected=True)
+
+ headers = std_headers
+ if 'Referer' not in headers:
+ headers['Referer'] = self._LOGIN_URL
+
+ login_page = self._download_webpage(
+ self._LOGIN_URL, None,
+ 'Downloading login form')
+
+ csrf = self._html_search_regex(
+ r"name='csrfmiddlewaretoken'\s+value='([^']+)'",
+ login_page, 'csrf token')
+
+ login_form = {
+ 'csrfmiddlewaretoken': csrf,
+ 'email': username,
+ 'password1': password,
+ 'login': 'Sign In',
+ 'next': '',
+ }
+
+ request = compat_urllib_request.Request(
+ self._LOGIN_URL, compat_urllib_parse.urlencode(login_form), headers=headers)
+ login_page = self._download_webpage(
+ request, None, 'Logging in as %s' % username)
+
+ if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
+ raise ExtractorError(
+ 'Login failed; make sure your credentials are correct and try again.',
+ expected=True)
+
+ self.to_screen('Login successful')
+
+
+class SafariIE(SafariBaseIE):
+ IE_NAME = 'safari'
+ IE_DESC = 'safaribooksonline.com online video'
+ _VALID_URL = r'''(?x)https?://
+ (?:www\.)?safaribooksonline\.com/
+ (?:
+ library/view/[^/]+|
+ api/v1/book
+ )/
+ (?P<course_id>\d+)/
+ (?:chapter(?:-content)?/)?
+ (?P<part>part\d+)\.html
+ '''
+
+ _TESTS = [{
+ 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html',
+ 'md5': '5b0c4cc1b3c1ba15dda7344085aa5592',
+ 'info_dict': {
+ 'id': '2842601850001',
+ 'ext': 'mp4',
+ 'title': 'Introduction',
+ },
+ 'skip': 'Requires safaribooksonline account credentials',
+ }, {
+ 'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ course_id = mobj.group('course_id')
+ part = mobj.group('part')
+
+ webpage = self._download_webpage(
+ '%s/%s/chapter-content/%s.html' % (self._API_BASE, course_id, part),
+ part)
+
+ bc_url = BrightcoveIE._extract_brightcove_url(webpage)
+ if not bc_url:
+ raise ExtractorError('Could not extract Brightcove URL from %s' % url, expected=True)
+
+ return self.url_result(smuggle_url(bc_url, {'Referer': url}), 'Brightcove')
+
+
+class SafariCourseIE(SafariBaseIE):
+ IE_NAME = 'safari:course'
+ IE_DESC = 'safaribooksonline.com online courses'
+
+ _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/(?:library/view/[^/]+|api/v1/book)/(?P<id>\d+)/?(?:[#?]|$)'
+
+ _TESTS = [{
+ 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/',
+ 'info_dict': {
+ 'id': '9780133392838',
+ 'title': 'Hadoop Fundamentals LiveLessons',
+ },
+ 'playlist_count': 22,
+ 'skip': 'Requires safaribooksonline account credentials',
+ }, {
+ 'url': 'https://www.safaribooksonline.com/api/v1/book/9781449396459/?override_format=json',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ course_id = self._match_id(url)
+
+ course_json = self._download_json(
+ '%s/%s/?override_format=%s' % (self._API_BASE, course_id, self._API_FORMAT),
+ course_id, 'Downloading course JSON')
+
+ if 'chapters' not in course_json:
+ raise ExtractorError(
+ 'No chapters found for course %s' % course_id, expected=True)
+
+ entries = [
+ self.url_result(chapter, 'Safari')
+ for chapter in course_json['chapters']]
+
+ course_title = course_json['title']
+
+ return self.playlist_result(entries, course_id, course_title)
diff --git a/youtube_dl/extractor/slideshare.py b/youtube_dl/extractor/slideshare.py
index 9f79ff5c1..0b717a1e4 100644
--- a/youtube_dl/extractor/slideshare.py
+++ b/youtube_dl/extractor/slideshare.py
@@ -30,7 +30,7 @@ class SlideshareIE(InfoExtractor):
page_title = mobj.group('title')
webpage = self._download_webpage(url, page_title)
slideshare_obj = self._search_regex(
- r'var\s+slideshare_object\s*=\s*({.*?});\s*var\s+user_info\s*=',
+ r'\$\.extend\(slideshare_object,\s*(\{.*?\})\);',
webpage, 'slideshare object')
info = json.loads(slideshare_obj)
if info['slideshow']['type'] != 'video':
diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py
index 9d4505972..316b2c90f 100644
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -242,7 +242,7 @@ class SoundcloudIE(InfoExtractor):
class SoundcloudSetIE(SoundcloudIE):
- _VALID_URL = r'https?://(?:www\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?'
+ _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?'
IE_NAME = 'soundcloud:set'
_TESTS = [{
'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep',
@@ -287,7 +287,7 @@ class SoundcloudSetIE(SoundcloudIE):
class SoundcloudUserIE(SoundcloudIE):
- _VALID_URL = r'https?://(www\.)?soundcloud\.com/(?P<user>[^/]+)/?((?P<rsrc>tracks|likes)/?)?(\?.*)?$'
+ _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<user>[^/]+)/?((?P<rsrc>tracks|likes)/?)?(\?.*)?$'
IE_NAME = 'soundcloud:user'
_TESTS = [{
'url': 'https://soundcloud.com/the-concept-band',
diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py
new file mode 100644
index 000000000..7f060b15b
--- /dev/null
+++ b/youtube_dl/extractor/spankbang.py
@@ -0,0 +1,60 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class SpankBangIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:www|[a-z]{2})\.)?spankbang\.com/(?P<id>[\da-z]+)/video'
+ _TEST = {
+ 'url': 'http://spankbang.com/3vvn/video/fantasy+solo',
+ 'md5': '1cc433e1d6aa14bc376535b8679302f7',
+ 'info_dict': {
+ 'id': '3vvn',
+ 'ext': 'mp4',
+ 'title': 'fantasy solo',
+ 'description': 'dillion harper masturbates on a bed',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'uploader': 'silly2587',
+ 'age_limit': 18,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ stream_key = self._html_search_regex(
+ r'''var\s+stream_key\s*=\s*['"](.+?)['"]''',
+ webpage, 'stream key')
+
+ formats = [{
+ 'url': 'http://spankbang.com/_%s/%s/title/%sp__mp4' % (video_id, stream_key, height),
+ 'ext': 'mp4',
+ 'format_id': '%sp' % height,
+ 'height': int(height),
+ } for height in re.findall(r'<span[^>]+q_(\d+)p', webpage)]
+ self._sort_formats(formats)
+
+ title = self._html_search_regex(
+ r'(?s)<h1>(.+?)</h1>', webpage, 'title')
+ description = self._search_regex(
+ r'class="desc"[^>]*>([^<]+)',
+ webpage, 'description', default=None)
+ thumbnail = self._og_search_thumbnail(webpage)
+ uploader = self._search_regex(
+ r'class="user"[^>]*>([^<]+)',
+ webpage, 'uploader', fatal=False)
+
+ age_limit = self._rta_search(webpage)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ 'formats': formats,
+ 'age_limit': age_limit,
+ }
diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py
index 7cb06f351..1caf08cb7 100644
--- a/youtube_dl/extractor/teamcoco.py
+++ b/youtube_dl/extractor/teamcoco.py
@@ -4,7 +4,10 @@ import base64
import re
from .common import InfoExtractor
-from ..utils import qualities
+from ..utils import (
+ ExtractorError,
+ qualities,
+)
class TeamcocoIE(InfoExtractor):
@@ -18,6 +21,7 @@ class TeamcocoIE(InfoExtractor):
'ext': 'mp4',
'title': 'Conan Becomes A Mary Kay Beauty Consultant',
'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.',
+ 'duration': 504,
'age_limit': 0,
}
}, {
@@ -28,6 +32,7 @@ class TeamcocoIE(InfoExtractor):
'ext': 'mp4',
'description': 'Louis C.K. got starstruck by George W. Bush, so what? Part one.',
'title': 'Louis C.K. Interview Pt. 1 11/3/11',
+ 'duration': 288,
'age_limit': 0,
}
}
@@ -49,35 +54,37 @@ class TeamcocoIE(InfoExtractor):
video_id = self._html_search_regex(
self._VIDEO_ID_REGEXES, webpage, 'video id')
- embed_url = 'http://teamcoco.com/embed/v/%s' % video_id
- embed = self._download_webpage(
- embed_url, video_id, 'Downloading embed page')
-
- player_data = self._parse_json(self._search_regex(
- r'Y\.Ginger\.Module\.Player\((\{.*?\})\);', embed, 'player data'), video_id)
+ preloads = re.findall(r'"preload":\s*"([^"]+)"', webpage)
+ if not preloads:
+ raise ExtractorError('Preload information could not be extracted')
+ preload = max([(len(p), p) for p in preloads])[1]
data = self._parse_json(
- base64.b64decode(player_data['preload'].encode('ascii')).decode('utf-8'), video_id)
+ base64.b64decode(preload.encode('ascii')).decode('utf-8'), video_id)
formats = []
get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p'])
for filed in data['files']:
- m_format = re.search(r'(\d+(k|p))\.mp4', filed['url'])
- if m_format is not None:
- format_id = m_format.group(1)
+ if filed['type'] == 'hls':
+ formats.extend(self._extract_m3u8_formats(
+ filed['url'], video_id, ext='mp4'))
else:
- format_id = filed['bitrate']
- tbr = (
- int(filed['bitrate'])
- if filed['bitrate'].isdigit()
- else None)
+ m_format = re.search(r'(\d+(k|p))\.mp4', filed['url'])
+ if m_format is not None:
+ format_id = m_format.group(1)
+ else:
+ format_id = filed['bitrate']
+ tbr = (
+ int(filed['bitrate'])
+ if filed['bitrate'].isdigit()
+ else None)
- formats.append({
- 'url': filed['url'],
- 'ext': 'mp4',
- 'tbr': tbr,
- 'format_id': format_id,
- 'quality': get_quality(format_id),
- })
+ formats.append({
+ 'url': filed['url'],
+ 'ext': 'mp4',
+ 'tbr': tbr,
+ 'format_id': format_id,
+ 'quality': get_quality(format_id),
+ })
self._sort_formats(formats)
@@ -88,5 +95,6 @@ class TeamcocoIE(InfoExtractor):
'title': data['title'],
'thumbnail': data.get('thumb', {}).get('href'),
'description': data.get('teaser'),
+ 'duration': data.get('duration'),
'age_limit': self._family_friendly_search(webpage),
}
diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py
index 4cec06f8b..a2dc14c2b 100644
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@@ -5,9 +5,8 @@ import re
from .common import InfoExtractor
-from ..compat import (
- compat_str,
-)
+from ..compat import compat_str
+from ..utils import int_or_none
class TEDIE(InfoExtractor):
@@ -170,17 +169,41 @@ class TEDIE(InfoExtractor):
finfo = self._NATIVE_FORMATS.get(f['format_id'])
if finfo:
f.update(finfo)
- else:
- # Use rtmp downloads
- formats = [{
- 'format_id': f['name'],
- 'url': talk_info['streamer'],
- 'play_path': f['file'],
- 'ext': 'flv',
- 'width': f['width'],
- 'height': f['height'],
- 'tbr': f['bitrate'],
- } for f in talk_info['resources']['rtmp']]
+
+ for format_id, resources in talk_info['resources'].items():
+ if format_id == 'h264':
+ for resource in resources:
+ bitrate = int_or_none(resource.get('bitrate'))
+ formats.append({
+ 'url': resource['file'],
+ 'format_id': '%s-%sk' % (format_id, bitrate),
+ 'tbr': bitrate,
+ })
+ elif format_id == 'rtmp':
+ streamer = talk_info.get('streamer')
+ if not streamer:
+ continue
+ for resource in resources:
+ formats.append({
+ 'format_id': '%s-%s' % (format_id, resource.get('name')),
+ 'url': streamer,
+ 'play_path': resource['file'],
+ 'ext': 'flv',
+ 'width': int_or_none(resource.get('width')),
+ 'height': int_or_none(resource.get('height')),
+ 'tbr': int_or_none(resource.get('bitrate')),
+ })
+ elif format_id == 'hls':
+ formats.extend(self._extract_m3u8_formats(
+ resources.get('stream'), video_name, 'mp4', m3u8_id=format_id))
+
+ audio_download = talk_info.get('audioDownload')
+ if audio_download:
+ formats.append({
+ 'url': audio_download,
+ 'format_id': 'audio',
+ })
+
self._sort_formats(formats)
video_id = compat_str(talk_info['id'])
diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py
index feac666f7..6a006b2d2 100644
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@@ -17,6 +17,7 @@ from ..utils import (
ExtractorError,
xpath_with_ns,
unsmuggle_url,
+ int_or_none,
)
_x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'})
@@ -28,7 +29,7 @@ class ThePlatformIE(InfoExtractor):
(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)?
|theplatform:)(?P<id>[^/\?&]+)'''
- _TEST = {
+ _TESTS = [{
# from http://www.metacafe.com/watch/cb-e9I_cZgTgIPd/blackberrys_big_bold_z30/
'url': 'http://link.theplatform.com/s/dJ5BDC/e9I_cZgTgIPd/meta.smil?format=smil&Tracking=true&mbr=true',
'info_dict': {
@@ -42,7 +43,20 @@ class ThePlatformIE(InfoExtractor):
# rtmp download
'skip_download': True,
},
- }
+ }, {
+ # from http://www.cnet.com/videos/tesla-model-s-a-second-step-towards-a-cleaner-motoring-future/
+ 'url': 'http://link.theplatform.com/s/kYEXFC/22d_qsQ6MIRT',
+ 'info_dict': {
+ 'id': '22d_qsQ6MIRT',
+ 'ext': 'flv',
+ 'description': 'md5:ac330c9258c04f9d7512cf26b9595409',
+ 'title': 'Tesla Model S: A second step towards a cleaner motoring future',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ }]
@staticmethod
def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False):
@@ -92,7 +106,7 @@ class ThePlatformIE(InfoExtractor):
error_msg = next(
n.attrib['abstract']
for n in meta.findall(_x('.//smil:ref'))
- if n.attrib.get('title') == 'Geographic Restriction')
+ if n.attrib.get('title') == 'Geographic Restriction' or n.attrib.get('title') == 'Expired')
except StopIteration:
pass
else:
@@ -115,7 +129,7 @@ class ThePlatformIE(InfoExtractor):
head = meta.find(_x('smil:head'))
body = meta.find(_x('smil:body'))
- f4m_node = body.find(_x('smil:seq//smil:video'))
+ f4m_node = body.find(_x('smil:seq//smil:video')) or body.find(_x('smil:seq/smil:video'))
if f4m_node is not None and '.f4m' in f4m_node.attrib['src']:
f4m_url = f4m_node.attrib['src']
if 'manifest.f4m?' not in f4m_url:
@@ -127,13 +141,17 @@ class ThePlatformIE(InfoExtractor):
else:
formats = []
switch = body.find(_x('smil:switch'))
+ if switch is None:
+ switch = body.find(_x('smil:par//smil:switch')) or body.find(_x('smil:par/smil:switch'))
+ if switch is None:
+ switch = body.find(_x('smil:par'))
if switch is not None:
base_url = head.find(_x('smil:meta')).attrib['base']
for f in switch.findall(_x('smil:video')):
attr = f.attrib
- width = int(attr['width'])
- height = int(attr['height'])
- vbr = int(attr['system-bitrate']) // 1000
+ width = int_or_none(attr.get('width'))
+ height = int_or_none(attr.get('height'))
+ vbr = int_or_none(attr.get('system-bitrate'), 1000)
format_id = '%dx%d_%dk' % (width, height, vbr)
formats.append({
'format_id': format_id,
@@ -145,10 +163,10 @@ class ThePlatformIE(InfoExtractor):
'vbr': vbr,
})
else:
- switch = body.find(_x('smil:seq//smil:switch'))
+ switch = body.find(_x('smil:seq//smil:switch')) or body.find(_x('smil:seq/smil:switch'))
for f in switch.findall(_x('smil:video')):
attr = f.attrib
- vbr = int(attr['system-bitrate']) // 1000
+ vbr = int_or_none(attr.get('system-bitrate'), 1000)
ext = determine_ext(attr['src'])
if ext == 'once':
ext = 'mp4'
@@ -167,5 +185,5 @@ class ThePlatformIE(InfoExtractor):
'formats': formats,
'description': info['description'],
'thumbnail': info['defaultThumbnailUrl'],
- 'duration': info['duration'] // 1000,
+ 'duration': int_or_none(info.get('duration'), 1000),
}
diff --git a/youtube_dl/extractor/twentytwotracks.py b/youtube_dl/extractor/twentytwotracks.py
new file mode 100644
index 000000000..d6c0ab184
--- /dev/null
+++ b/youtube_dl/extractor/twentytwotracks.py
@@ -0,0 +1,86 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+# 22Tracks regularly replace the audio tracks that can be streamed on their
+# site. The tracks usually expire after 1 months, so we can't add tests.
+
+
+class TwentyTwoTracksIE(InfoExtractor):
+ _VALID_URL = r'https?://22tracks\.com/(?P<city>[a-z]+)/(?P<genre>[\da-z]+)/(?P<id>\d+)'
+ IE_NAME = '22tracks:track'
+
+ _API_BASE = 'http://22tracks.com/api'
+
+ def _extract_info(self, city, genre_name, track_id=None):
+ item_id = track_id if track_id else genre_name
+
+ cities = self._download_json(
+ '%s/cities' % self._API_BASE, item_id,
+ 'Downloading cities info',
+ 'Unable to download cities info')
+ city_id = [x['id'] for x in cities if x['slug'] == city][0]
+
+ genres = self._download_json(
+ '%s/genres/%s' % (self._API_BASE, city_id), item_id,
+ 'Downloading %s genres info' % city,
+ 'Unable to download %s genres info' % city)
+ genre = [x for x in genres if x['slug'] == genre_name][0]
+ genre_id = genre['id']
+
+ tracks = self._download_json(
+ '%s/tracks/%s' % (self._API_BASE, genre_id), item_id,
+ 'Downloading %s genre tracks info' % genre_name,
+ 'Unable to download track info')
+
+ return [x for x in tracks if x['id'] == item_id][0] if track_id else [genre['title'], tracks]
+
+ def _get_track_url(self, filename, track_id):
+ token = self._download_json(
+ 'http://22tracks.com/token.php?desktop=true&u=/128/%s' % filename,
+ track_id, 'Downloading token', 'Unable to download token')
+ return 'http://audio.22tracks.com%s?st=%s&e=%d' % (token['filename'], token['st'], token['e'])
+
+ def _extract_track_info(self, track_info, track_id):
+ download_url = self._get_track_url(track_info['filename'], track_id)
+ title = '%s - %s' % (track_info['artist'].strip(), track_info['title'].strip())
+ return {
+ 'id': track_id,
+ 'url': download_url,
+ 'ext': 'mp3',
+ 'title': title,
+ 'duration': int_or_none(track_info.get('duration')),
+ 'timestamp': int_or_none(track_info.get('published_at') or track_info.get('created'))
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+
+ city = mobj.group('city')
+ genre = mobj.group('genre')
+ track_id = mobj.group('id')
+
+ track_info = self._extract_info(city, genre, track_id)
+ return self._extract_track_info(track_info, track_id)
+
+
+class TwentyTwoTracksGenreIE(TwentyTwoTracksIE):
+ _VALID_URL = r'https?://22tracks\.com/(?P<city>[a-z]+)/(?P<genre>[\da-z]+)/?$'
+ IE_NAME = '22tracks:genre'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+
+ city = mobj.group('city')
+ genre = mobj.group('genre')
+
+ genre_title, tracks = self._extract_info(city, genre)
+
+ entries = [
+ self._extract_track_info(track_info, track_info['id'])
+ for track_info in tracks]
+
+ return self.playlist_result(entries, genre, genre_title)
diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py
index aad2bf222..94bd6345d 100644
--- a/youtube_dl/extractor/twitch.py
+++ b/youtube_dl/extractor/twitch.py
@@ -149,7 +149,7 @@ class TwitchItemBaseIE(TwitchBaseIE):
class TwitchVideoIE(TwitchItemBaseIE):
IE_NAME = 'twitch:video'
- _VALID_URL = r'%s/[^/]+/b/(?P<id>[^/]+)' % TwitchBaseIE._VALID_URL_BASE
+ _VALID_URL = r'%s/[^/]+/b/(?P<id>\d+)' % TwitchBaseIE._VALID_URL_BASE
_ITEM_TYPE = 'video'
_ITEM_SHORTCUT = 'a'
@@ -165,7 +165,7 @@ class TwitchVideoIE(TwitchItemBaseIE):
class TwitchChapterIE(TwitchItemBaseIE):
IE_NAME = 'twitch:chapter'
- _VALID_URL = r'%s/[^/]+/c/(?P<id>[^/]+)' % TwitchBaseIE._VALID_URL_BASE
+ _VALID_URL = r'%s/[^/]+/c/(?P<id>\d+)' % TwitchBaseIE._VALID_URL_BASE
_ITEM_TYPE = 'chapter'
_ITEM_SHORTCUT = 'c'
@@ -184,7 +184,7 @@ class TwitchChapterIE(TwitchItemBaseIE):
class TwitchVodIE(TwitchItemBaseIE):
IE_NAME = 'twitch:vod'
- _VALID_URL = r'%s/[^/]+/v/(?P<id>[^/]+)' % TwitchBaseIE._VALID_URL_BASE
+ _VALID_URL = r'%s/[^/]+/v/(?P<id>\d+)' % TwitchBaseIE._VALID_URL_BASE
_ITEM_TYPE = 'vod'
_ITEM_SHORTCUT = 'v'
diff --git a/youtube_dl/extractor/udn.py b/youtube_dl/extractor/udn.py
new file mode 100644
index 000000000..bba25bb58
--- /dev/null
+++ b/youtube_dl/extractor/udn.py
@@ -0,0 +1,64 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+from .common import InfoExtractor
+from ..utils import js_to_json
+from ..compat import compat_urlparse
+
+
+class UDNEmbedIE(InfoExtractor):
+ _VALID_URL = r'(?:https?:)?//video\.udn\.com/embed/news/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://video.udn.com/embed/news/300040',
+ 'md5': 'de06b4c90b042c128395a88f0384817e',
+ 'info_dict': {
+ 'id': '300040',
+ 'ext': 'mp4',
+ 'title': '生物老師男變女 全校挺"做自己"',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ }
+ }, {
+ 'url': '//video.udn.com/embed/news/300040',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ page = self._download_webpage(url, video_id)
+
+ options = json.loads(js_to_json(self._html_search_regex(
+ r'var options\s*=\s*([^;]+);', page, 'video urls dictionary')))
+
+ video_urls = options['video']
+
+ if video_urls.get('youtube'):
+ return self.url_result(video_urls.get('youtube'), 'Youtube')
+
+ try:
+ del video_urls['youtube']
+ except KeyError:
+ pass
+
+ formats = [{
+ 'url': self._download_webpage(
+ compat_urlparse.urljoin(url, api_url), video_id,
+ 'retrieve url for %s video' % video_type),
+ 'format_id': video_type,
+ 'preference': 0 if video_type == 'mp4' else -1,
+ } for video_type, api_url in video_urls.items()]
+
+ self._sort_formats(formats)
+
+ thumbnail = None
+
+ if options.get('gallery') and len(options['gallery']):
+ thumbnail = options['gallery'][0].get('original')
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': options['title'],
+ 'thumbnail': thumbnail
+ }
diff --git a/youtube_dl/extractor/ultimedia.py b/youtube_dl/extractor/ultimedia.py
index 06554a1be..96c809eaf 100644
--- a/youtube_dl/extractor/ultimedia.py
+++ b/youtube_dl/extractor/ultimedia.py
@@ -42,7 +42,6 @@ class UltimediaIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
-
webpage = self._download_webpage(url, video_id)
deliver_url = self._search_regex(
@@ -81,8 +80,8 @@ class UltimediaIE(InfoExtractor):
title = clean_html((
self._html_search_regex(
r'(?s)<div\s+id="catArticle">.+?</div>(.+?)</h1>',
- webpage, 'title', default=None)
- or self._search_regex(
+ webpage, 'title', default=None) or
+ self._search_regex(
r"var\s+nameVideo\s*=\s*'([^']+)'",
deliver_page, 'title')))
diff --git a/youtube_dl/extractor/varzesh3.py b/youtube_dl/extractor/varzesh3.py
new file mode 100644
index 000000000..9369abaf8
--- /dev/null
+++ b/youtube_dl/extractor/varzesh3.py
@@ -0,0 +1,45 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class Varzesh3IE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?video\.varzesh3\.com/(?:[^/]+/)+(?P<id>[^/]+)/?'
+ _TEST = {
+ 'url': 'http://video.varzesh3.com/germany/bundesliga/5-%D9%88%D8%A7%DA%A9%D9%86%D8%B4-%D8%A8%D8%B1%D8%AA%D8%B1-%D8%AF%D8%B1%D9%88%D8%A7%D8%B2%D9%87%E2%80%8C%D8%A8%D8%A7%D9%86%D8%A7%D9%86%D8%9B%D9%87%D9%81%D8%AA%D9%87-26-%D8%A8%D9%88%D9%86%D8%AF%D8%B3/',
+ 'md5': '2a933874cb7dce4366075281eb49e855',
+ 'info_dict': {
+ 'id': '76337',
+ 'ext': 'mp4',
+ 'title': '۵ واکنش برتر دروازه‌بانان؛هفته ۲۶ بوندسلیگا',
+ 'description': 'فصل ۲۰۱۵-۲۰۱۴',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ }
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ video_url = self._search_regex(
+ r'<source[^>]+src="([^"]+)"', webpage, 'video url')
+
+ title = self._og_search_title(webpage)
+ description = self._html_search_regex(
+ r'(?s)<div class="matn">(.+?)</div>',
+ webpage, 'description', fatal=False)
+ thumbnail = self._og_search_thumbnail(webpage)
+
+ video_id = self._search_regex(
+ r"<link[^>]+rel='(?:canonical|shortlink)'[^>]+href='/\?p=([^']+)'",
+ webpage, display_id, default=display_id)
+
+ return {
+ 'url': video_url,
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ }
diff --git a/youtube_dl/extractor/vessel.py b/youtube_dl/extractor/vessel.py
new file mode 100644
index 000000000..6215f0642
--- /dev/null
+++ b/youtube_dl/extractor/vessel.py
@@ -0,0 +1,127 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_request
+from ..utils import (
+ ExtractorError,
+ parse_iso8601,
+)
+
+
+class VesselIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?vessel\.com/videos/(?P<id>[0-9a-zA-Z]+)'
+ _API_URL_TEMPLATE = 'https://www.vessel.com/api/view/items/%s'
+ _LOGIN_URL = 'https://www.vessel.com/api/account/login'
+ _NETRC_MACHINE = 'vessel'
+ _TEST = {
+ 'url': 'https://www.vessel.com/videos/HDN7G5UMs',
+ 'md5': '455cdf8beb71c6dd797fd2f3818d05c4',
+ 'info_dict': {
+ 'id': 'HDN7G5UMs',
+ 'ext': 'mp4',
+ 'title': 'Nvidia GeForce GTX Titan X - The Best Video Card on the Market?',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'upload_date': '20150317',
+ 'description': 'Did Nvidia pull out all the stops on the Titan X, or does its performance leave something to be desired?',
+ 'timestamp': int,
+ },
+ }
+
+ @staticmethod
+ def make_json_request(url, data):
+ payload = json.dumps(data).encode('utf-8')
+ req = compat_urllib_request.Request(url, payload)
+ req.add_header('Content-Type', 'application/json; charset=utf-8')
+ return req
+
+ @staticmethod
+ def find_assets(data, asset_type):
+ for asset in data.get('assets', []):
+ if asset.get('type') == asset_type:
+ yield asset
+
+ def _check_access_rights(self, data):
+ access_info = data.get('__view', {})
+ if not access_info.get('allow_access', True):
+ err_code = access_info.get('error_code') or ''
+ if err_code == 'ITEM_PAID_ONLY':
+ raise ExtractorError(
+ 'This video requires subscription.', expected=True)
+ else:
+ raise ExtractorError(
+ 'Access to this content is restricted. (%s said: %s)' % (self.IE_NAME, err_code), expected=True)
+
+ def _login(self):
+ (username, password) = self._get_login_info()
+ if username is None:
+ return
+ self.report_login()
+ data = {
+ 'client_id': 'web',
+ 'type': 'password',
+ 'user_key': username,
+ 'password': password,
+ }
+ login_request = VesselIE.make_json_request(self._LOGIN_URL, data)
+ self._download_webpage(login_request, None, False, 'Wrong login info')
+
+ def _real_initialize(self):
+ self._login()
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+ data = self._parse_json(self._search_regex(
+ r'App\.bootstrapData\((.*?)\);', webpage, 'data'), video_id)
+ asset_id = data['model']['data']['id']
+
+ req = VesselIE.make_json_request(
+ self._API_URL_TEMPLATE % asset_id, {'client': 'web'})
+ data = self._download_json(req, video_id)
+
+ self._check_access_rights(data)
+
+ try:
+ video_asset = next(VesselIE.find_assets(data, 'video'))
+ except StopIteration:
+ raise ExtractorError('No video assets found')
+
+ formats = []
+ for f in video_asset.get('sources', []):
+ if f['name'] == 'hls-index':
+ formats.extend(self._extract_m3u8_formats(
+ f['location'], video_id, ext='mp4', m3u8_id='m3u8'))
+ else:
+ formats.append({
+ 'format_id': f['name'],
+ 'tbr': f.get('bitrate'),
+ 'height': f.get('height'),
+ 'width': f.get('width'),
+ 'url': f['location'],
+ })
+ self._sort_formats(formats)
+
+ thumbnails = []
+ for im_asset in VesselIE.find_assets(data, 'image'):
+ thumbnails.append({
+ 'url': im_asset['location'],
+ 'width': im_asset.get('width', 0),
+ 'height': im_asset.get('height', 0),
+ })
+
+ return {
+ 'id': video_id,
+ 'title': data['title'],
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'description': data.get('short_description'),
+ 'duration': data.get('duration'),
+ 'comment_count': data.get('comment_count'),
+ 'like_count': data.get('like_count'),
+ 'view_count': data.get('view_count'),
+ 'timestamp': parse_iso8601(data.get('released_at')),
+ }
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index bd09652cd..28bcc89cd 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -244,6 +244,16 @@ class VimeoIE(VimeoBaseInfoExtractor):
# and latter we extract those that are Vimeo specific.
self.report_extraction(video_id)
+ vimeo_config = self._search_regex(
+ r'vimeo\.config\s*=\s*({.+?});', webpage,
+ 'vimeo config', default=None)
+ if vimeo_config:
+ seed_status = self._parse_json(vimeo_config, video_id).get('seed_status', {})
+ if seed_status.get('state') == 'failed':
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, seed_status['title']),
+ expected=True)
+
# Extract the config JSON
try:
try:
diff --git a/youtube_dl/extractor/vine.py b/youtube_dl/extractor/vine.py
index c3187cfeb..d4f5a991e 100644
--- a/youtube_dl/extractor/vine.py
+++ b/youtube_dl/extractor/vine.py
@@ -9,8 +9,8 @@ from ..utils import unified_strdate
class VineIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?vine\.co/v/(?P<id>\w+)'
- _TEST = {
+ _VALID_URL = r'https?://(?:www\.)?vine\.co/(?:v|oembed)/(?P<id>\w+)'
+ _TESTS = [{
'url': 'https://vine.co/v/b9KOOWX7HUx',
'md5': '2f36fed6235b16da96ce9b4dc890940d',
'info_dict': {
@@ -23,21 +23,53 @@ class VineIE(InfoExtractor):
'uploader': 'Jack Dorsey',
'uploader_id': '76',
},
- }
+ }, {
+ 'url': 'https://vine.co/v/MYxVapFvz2z',
+ 'md5': '7b9a7cbc76734424ff942eb52c8f1065',
+ 'info_dict': {
+ 'id': 'MYxVapFvz2z',
+ 'ext': 'mp4',
+ 'title': 'Fuck Da Police #Mikebrown #justice #ferguson #prayforferguson #protesting #NMOS14',
+ 'alt_title': 'Vine by Luna',
+ 'description': 'Fuck Da Police #Mikebrown #justice #ferguson #prayforferguson #protesting #NMOS14',
+ 'upload_date': '20140815',
+ 'uploader': 'Luna',
+ 'uploader_id': '1102363502380728320',
+ },
+ }, {
+ 'url': 'https://vine.co/v/bxVjBbZlPUH',
+ 'md5': 'ea27decea3fa670625aac92771a96b73',
+ 'info_dict': {
+ 'id': 'bxVjBbZlPUH',
+ 'ext': 'mp4',
+ 'title': '#mw3 #ac130 #killcam #angelofdeath',
+ 'alt_title': 'Vine by Z3k3',
+ 'description': '#mw3 #ac130 #killcam #angelofdeath',
+ 'upload_date': '20130430',
+ 'uploader': 'Z3k3',
+ 'uploader_id': '936470460173008896',
+ },
+ }, {
+ 'url': 'https://vine.co/oembed/MYxVapFvz2z.json',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage('https://vine.co/v/' + video_id, video_id)
- data = json.loads(self._html_search_regex(
- r'window\.POST_DATA = { %s: ({.+?}) }' % video_id, webpage, 'vine data'))
+ data = self._parse_json(
+ self._html_search_regex(
+ r'window\.POST_DATA = { %s: ({.+?}) };\s*</script>' % video_id,
+ webpage, 'vine data'),
+ video_id)
formats = [{
'format_id': '%(format)s-%(rate)s' % f,
'vcodec': f['format'],
'quality': f['rate'],
'url': f['videoUrl'],
- } for f in data['videoUrls'] if f.get('rate')]
+ } for f in data['videoUrls']]
self._sort_formats(formats)
diff --git a/youtube_dl/extractor/xuite.py b/youtube_dl/extractor/xuite.py
index 4971965f9..81d885fdc 100644
--- a/youtube_dl/extractor/xuite.py
+++ b/youtube_dl/extractor/xuite.py
@@ -69,18 +69,26 @@ class XuiteIE(InfoExtractor):
'only_matching': True,
}]
+ @staticmethod
+ def base64_decode_utf8(data):
+ return base64.b64decode(data.encode('utf-8')).decode('utf-8')
+
+ @staticmethod
+ def base64_encode_utf8(data):
+ return base64.b64encode(data.encode('utf-8')).decode('utf-8')
+
def _extract_flv_config(self, media_id):
- base64_media_id = base64.b64encode(media_id.encode('utf-8')).decode('utf-8')
+ base64_media_id = self.base64_encode_utf8(media_id)
flv_config = self._download_xml(
'http://vlog.xuite.net/flash/player?media=%s' % base64_media_id,
'flv config')
prop_dict = {}
for prop in flv_config.findall('./property'):
- prop_id = base64.b64decode(prop.attrib['id']).decode('utf-8')
+ prop_id = self.base64_decode_utf8(prop.attrib['id'])
# CDATA may be empty in flv config
if not prop.text:
continue
- encoded_content = base64.b64decode(prop.text).decode('utf-8')
+ encoded_content = self.base64_decode_utf8(prop.text)
prop_dict[prop_id] = compat_urllib_parse_unquote(encoded_content)
return prop_dict
diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py
index 97dbac4cc..b777159c5 100644
--- a/youtube_dl/extractor/yahoo.py
+++ b/youtube_dl/extractor/yahoo.py
@@ -17,6 +17,8 @@ from ..utils import (
int_or_none,
)
+from .nbc import NBCSportsVPlayerIE
+
class YahooIE(InfoExtractor):
IE_DESC = 'Yahoo screen and movies'
@@ -129,6 +131,15 @@ class YahooIE(InfoExtractor):
}, {
'url': 'https://gma.yahoo.com/pizza-delivery-man-surprised-huge-tip-college-kids-195200785.html',
'only_matching': True,
+ }, {
+ 'note': 'NBC Sports embeds',
+ 'url': 'http://sports.yahoo.com/blogs/ncaab-the-dagger/tyler-kalinoski-s-buzzer-beater-caps-davidson-s-comeback-win-185609842.html?guid=nbc_cbk_davidsonbuzzerbeater_150313',
+ 'info_dict': {
+ 'id': '9CsDKds0kvHI',
+ 'ext': 'flv',
+ 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
+ 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
+ }
}
]
@@ -151,6 +162,10 @@ class YahooIE(InfoExtractor):
items = json.loads(items_json)
video_id = items[0]['id']
return self._get_info(video_id, display_id, webpage)
+ # Look for NBCSports iframes
+ nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
+ if nbc_sports_url:
+ return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
items_json = self._search_regex(
r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE,
diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py
index e4c855ee0..6abe72f73 100644
--- a/youtube_dl/extractor/youporn.py
+++ b/youtube_dl/extractor/youporn.py
@@ -52,7 +52,7 @@ class YouPornIE(InfoExtractor):
webpage, 'JSON parameters')
try:
params = json.loads(json_params)
- except:
+ except ValueError:
raise ExtractorError('Invalid JSON')
self.report_extraction(video_id)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 27c8c4453..2774ec30b 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -495,7 +495,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': '孫艾倫',
'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
},
- }
+ },
+ # url_encoded_fmt_stream_map is empty string
+ {
+ 'url': 'qEJwOuvDf7I',
+ 'info_dict': {
+ 'id': 'qEJwOuvDf7I',
+ 'ext': 'mp4',
+ 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
+ 'description': '',
+ 'upload_date': '20150404',
+ 'uploader_id': 'spbelect',
+ 'uploader': 'Наблюдатели Петербурга',
+ },
+ 'params': {
+ 'skip_download': 'requires avconv',
+ }
+ },
]
def __init__(self, *args, **kwargs):
@@ -772,33 +788,41 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
errnote='Could not download DASH manifest')
formats = []
- for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
- url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
- if url_el is None:
- continue
- format_id = r.attrib['id']
- video_url = url_el.text
- filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
- f = {
- 'format_id': format_id,
- 'url': video_url,
- 'width': int_or_none(r.attrib.get('width')),
- 'height': int_or_none(r.attrib.get('height')),
- 'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
- 'asr': int_or_none(r.attrib.get('audioSamplingRate')),
- 'filesize': filesize,
- 'fps': int_or_none(r.attrib.get('frameRate')),
- }
- try:
- existing_format = next(
- fo for fo in formats
- if fo['format_id'] == format_id)
- except StopIteration:
- full_info = self._formats.get(format_id, {}).copy()
- full_info.update(f)
- formats.append(full_info)
- else:
- existing_format.update(f)
+ for a in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}AdaptationSet'):
+ mime_type = a.attrib.get('mimeType')
+ for r in a.findall('{urn:mpeg:DASH:schema:MPD:2011}Representation'):
+ url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
+ if url_el is None:
+ continue
+ if mime_type == 'text/vtt':
+ # TODO implement WebVTT downloading
+ pass
+ elif mime_type.startswith('audio/') or mime_type.startswith('video/'):
+ format_id = r.attrib['id']
+ video_url = url_el.text
+ filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
+ f = {
+ 'format_id': format_id,
+ 'url': video_url,
+ 'width': int_or_none(r.attrib.get('width')),
+ 'height': int_or_none(r.attrib.get('height')),
+ 'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
+ 'asr': int_or_none(r.attrib.get('audioSamplingRate')),
+ 'filesize': filesize,
+ 'fps': int_or_none(r.attrib.get('frameRate')),
+ }
+ try:
+ existing_format = next(
+ fo for fo in formats
+ if fo['format_id'] == format_id)
+ except StopIteration:
+ full_info = self._formats.get(format_id, {}).copy()
+ full_info.update(f)
+ formats.append(full_info)
+ else:
+ existing_format.update(f)
+ else:
+ self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
return formats
def _real_extract(self, url):
@@ -855,7 +879,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
args = ytplayer_config['args']
# Convert to the same format returned by compat_parse_qs
video_info = dict((k, [v]) for k, v in args.items())
- if 'url_encoded_fmt_stream_map' not in args:
+ if not args.get('url_encoded_fmt_stream_map'):
raise ValueError('No stream_map present') # caught below
except ValueError:
# We fallback to the get_video_info pages (used by the embed page)
@@ -1263,27 +1287,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
return self.playlist_result(url_results, playlist_id, title)
- def _real_extract(self, url):
- # Extract playlist id
- mobj = re.match(self._VALID_URL, url)
- if mobj is None:
- raise ExtractorError('Invalid URL: %s' % url)
- playlist_id = mobj.group(1) or mobj.group(2)
-
- # Check if it's a video-specific URL
- query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
- if 'v' in query_dict:
- video_id = query_dict['v'][0]
- if self._downloader.params.get('noplaylist'):
- self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
- return self.url_result(video_id, 'Youtube', video_id=video_id)
- else:
- self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
-
- if playlist_id.startswith('RD') or playlist_id.startswith('UL'):
- # Mixes require a custom extraction process
- return self._extract_mix(playlist_id)
-
+ def _extract_playlist(self, playlist_id):
url = self._TEMPLATE_URL % playlist_id
page = self._download_webpage(url, playlist_id)
more_widget_html = content_html = page
@@ -1327,6 +1331,29 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
url_results = self._ids_to_results(ids)
return self.playlist_result(url_results, playlist_id, playlist_title)
+ def _real_extract(self, url):
+ # Extract playlist id
+ mobj = re.match(self._VALID_URL, url)
+ if mobj is None:
+ raise ExtractorError('Invalid URL: %s' % url)
+ playlist_id = mobj.group(1) or mobj.group(2)
+
+ # Check if it's a video-specific URL
+ query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+ if 'v' in query_dict:
+ video_id = query_dict['v'][0]
+ if self._downloader.params.get('noplaylist'):
+ self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
+ return self.url_result(video_id, 'Youtube', video_id=video_id)
+ else:
+ self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
+
+ if playlist_id.startswith('RD') or playlist_id.startswith('UL'):
+ # Mixes require a custom extraction process
+ return self._extract_mix(playlist_id)
+
+ return self._extract_playlist(playlist_id)
+
class YoutubeChannelIE(InfoExtractor):
IE_DESC = 'YouTube.com channels'
@@ -1643,21 +1670,26 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
+ IE_NAME = 'youtube:recommended'
IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
_VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
_FEED_NAME = 'recommended'
_PLAYLIST_TITLE = 'Youtube Recommended videos'
-class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
+class YoutubeWatchLaterIE(YoutubePlaylistIE):
+ IE_NAME = 'youtube:watchlater'
IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
- _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
- _FEED_NAME = 'watch_later'
- _PLAYLIST_TITLE = 'Youtube Watch Later'
- _PERSONAL_FEED = True
+ _VALID_URL = r'https?://www\.youtube\.com/(?:feed/watch_later|playlist\?list=WL)|:ytwatchlater'
+
+ _TESTS = [] # override PlaylistIE tests
+
+ def _real_extract(self, url):
+ return self._extract_playlist('WL')
class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
+ IE_NAME = 'youtube:history'
IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
_VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory'
_FEED_NAME = 'history'