aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/__init__.py2
-rw-r--r--youtube_dl/extractor/abc.py10
-rw-r--r--youtube_dl/extractor/allocine.py2
-rw-r--r--youtube_dl/extractor/bandcamp.py9
-rw-r--r--youtube_dl/extractor/byutv.py8
-rw-r--r--youtube_dl/extractor/dailymotion.py2
-rw-r--r--youtube_dl/extractor/funnyordie.py1
-rw-r--r--youtube_dl/extractor/goldenmoustache.py48
-rw-r--r--youtube_dl/extractor/grooveshark.py7
-rw-r--r--youtube_dl/extractor/mtv.py2
-rw-r--r--youtube_dl/extractor/myspass.py3
-rw-r--r--youtube_dl/extractor/ndr.py2
-rw-r--r--youtube_dl/extractor/niconico.py3
-rw-r--r--youtube_dl/extractor/sexu.py61
-rw-r--r--youtube_dl/extractor/spiegel.py62
-rw-r--r--youtube_dl/extractor/streamcloud.py6
-rw-r--r--youtube_dl/extractor/swrmediathek.py4
-rw-r--r--youtube_dl/extractor/tapely.py1
-rw-r--r--youtube_dl/extractor/ted.py3
-rw-r--r--youtube_dl/extractor/tvplay.py9
-rw-r--r--youtube_dl/extractor/wrzuta.py21
-rw-r--r--youtube_dl/extractor/youjizz.py34
-rw-r--r--youtube_dl/extractor/youtube.py3
23 files changed, 221 insertions, 82 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index db76e8870..f45ce05ab 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -142,6 +142,7 @@ from .generic import GenericIE
from .glide import GlideIE
from .globo import GloboIE
from .godtube import GodTubeIE
+from .goldenmoustache import GoldenMoustacheIE
from .golem import GolemIE
from .googleplus import GooglePlusIE
from .googlesearch import GoogleSearchIE
@@ -324,6 +325,7 @@ from .sbs import SBSIE
from .scivee import SciVeeIE
from .screencast import ScreencastIE
from .servingsys import ServingSysIE
+from .sexu import SexuIE
from .sexykarma import SexyKarmaIE
from .shared import SharedIE
from .sharesix import ShareSixIE
diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py
index 69f89320c..dc0fb85d6 100644
--- a/youtube_dl/extractor/abc.py
+++ b/youtube_dl/extractor/abc.py
@@ -11,13 +11,13 @@ class ABCIE(InfoExtractor):
_VALID_URL = r'http://www\.abc\.net\.au/news/[^/]+/[^/]+/(?P<id>\d+)'
_TEST = {
- 'url': 'http://www.abc.net.au/news/2014-07-25/bringing-asylum-seekers-to-australia-would-give/5624716',
- 'md5': 'dad6f8ad011a70d9ddf887ce6d5d0742',
+ 'url': 'http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334',
+ 'md5': 'cb3dd03b18455a661071ee1e28344d9f',
'info_dict': {
- 'id': '5624716',
+ 'id': '5868334',
'ext': 'mp4',
- 'title': 'Bringing asylum seekers to Australia would give them right to asylum claims: professor',
- 'description': 'md5:ba36fa5e27e5c9251fd929d339aea4af',
+ 'title': 'Australia to help staff Ebola treatment centre in Sierra Leone',
+ 'description': 'md5:809ad29c67a05f54eb41f2a105693a67',
},
}
diff --git a/youtube_dl/extractor/allocine.py b/youtube_dl/extractor/allocine.py
index 7bd797884..398e93bfb 100644
--- a/youtube_dl/extractor/allocine.py
+++ b/youtube_dl/extractor/allocine.py
@@ -22,7 +22,7 @@ class AllocineIE(InfoExtractor):
'id': '19546517',
'ext': 'mp4',
'title': 'Astérix - Le Domaine des Dieux Teaser VF',
- 'description': 'md5:4a754271d9c6f16c72629a8a993ee884',
+ 'description': 'md5:abcd09ce503c6560512c14ebfdb720d2',
'thumbnail': 're:http://.*\.jpg',
},
}, {
diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py
index c13446665..1b8da43ca 100644
--- a/youtube_dl/extractor/bandcamp.py
+++ b/youtube_dl/extractor/bandcamp.py
@@ -110,20 +110,25 @@ class BandcampAlbumIE(InfoExtractor):
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
'playlist': [
{
- 'file': '1353101989.mp3',
'md5': '39bc1eded3476e927c724321ddf116cf',
'info_dict': {
+ 'id': '1353101989',
+ 'ext': 'mp3',
'title': 'Intro',
}
},
{
- 'file': '38097443.mp3',
'md5': '1a2c32e2691474643e912cc6cd4bffaa',
'info_dict': {
+ 'id': '38097443',
+ 'ext': 'mp3',
'title': 'Kero One - Keep It Alive (Blazo remix)',
}
},
],
+ 'info_dict': {
+ 'title': 'Jazz Format Mixtape vol.1',
+ },
'params': {
'playlistend': 2
},
diff --git a/youtube_dl/extractor/byutv.py b/youtube_dl/extractor/byutv.py
index cf19b7b0c..6252be05b 100644
--- a/youtube_dl/extractor/byutv.py
+++ b/youtube_dl/extractor/byutv.py
@@ -10,12 +10,12 @@ from ..utils import ExtractorError
class BYUtvIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P<video_id>[^/?#]+)'
_TEST = {
- 'url': 'http://www.byutv.org/watch/44e80f7b-e3ba-43ba-8c51-b1fd96c94a79/granite-flats-talking',
+ 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5',
'info_dict': {
- 'id': 'granite-flats-talking',
+ 'id': 'studio-c-season-5-episode-5',
'ext': 'mp4',
- 'description': 'md5:4e9a7ce60f209a33eca0ac65b4918e1c',
- 'title': 'Talking',
+ 'description': 'md5:5438d33774b6bdc662f9485a340401cc',
+ 'title': 'Season 5 Episode 5',
'thumbnail': 're:^https?://.*promo.*'
},
'params': {
diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py
index dbcf5d6a7..fd4bc75b2 100644
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -94,7 +94,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
# It may just embed a vevo video:
m_vevo = re.search(
- r'<link rel="video_src" href="[^"]*?vevo.com[^"]*?videoId=(?P<id>[\w]*)',
+ r'<link rel="video_src" href="[^"]*?vevo.com[^"]*?video=(?P<id>[\w]*)',
webpage)
if m_vevo is not None:
vevo_id = m_vevo.group('id')
diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py
index 487d5d1b4..a49fc1151 100644
--- a/youtube_dl/extractor/funnyordie.py
+++ b/youtube_dl/extractor/funnyordie.py
@@ -21,7 +21,6 @@ class FunnyOrDieIE(InfoExtractor):
},
}, {
'url': 'http://www.funnyordie.com/embed/e402820827',
- 'md5': '29f4c5e5a61ca39dfd7e8348a75d0aad',
'info_dict': {
'id': 'e402820827',
'ext': 'mp4',
diff --git a/youtube_dl/extractor/goldenmoustache.py b/youtube_dl/extractor/goldenmoustache.py
new file mode 100644
index 000000000..f2e231bf4
--- /dev/null
+++ b/youtube_dl/extractor/goldenmoustache.py
@@ -0,0 +1,48 @@
+from __future__ import unicode_literals
+
+import re
+from .common import InfoExtractor
+from ..utils import (
+ parse_duration,
+ int_or_none,
+)
+
+
+class GoldenMoustacheIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?goldenmoustache\.com/(?P<display_id>[\w-]+)-(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://www.goldenmoustache.com/suricate-le-poker-3700/',
+ 'md5': '0f904432fa07da5054d6c8beb5efb51a',
+ 'info_dict': {
+ 'id': '3700',
+ 'ext': 'mp4',
+ 'title': 'Suricate - Le Poker',
+ 'description': 'md5:3d1f242f44f8c8cb0a106f1fd08e5dc9',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'view_count': int,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ video_url = self._html_search_regex(
+ r'data-src-type="mp4" data-src="([^"]+)"', webpage, 'video URL')
+ title = self._html_search_regex(
+ r'<title>(.*?) - Golden Moustache</title>', webpage, 'title')
+ thumbnail = self._og_search_thumbnail(webpage)
+ description = self._og_search_description(webpage)
+ view_count = int_or_none(self._html_search_regex(
+ r'<strong>([0-9]+)</strong>\s*VUES</span>',
+ webpage, 'view count', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': 'mp4',
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'view_count': view_count,
+ }
diff --git a/youtube_dl/extractor/grooveshark.py b/youtube_dl/extractor/grooveshark.py
index 726adff77..fff74a70a 100644
--- a/youtube_dl/extractor/grooveshark.py
+++ b/youtube_dl/extractor/grooveshark.py
@@ -8,12 +8,13 @@ import re
from .common import InfoExtractor
-from ..utils import ExtractorError, compat_urllib_request, compat_html_parser
-
-from ..utils import (
+from ..compat import (
+ compat_html_parser,
compat_urllib_parse,
+ compat_urllib_request,
compat_urlparse,
)
+from ..utils import ExtractorError
class GroovesharkHtmlParser(compat_html_parser.HTMLParser):
diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py
index 228b42d2b..f0e28e66a 100644
--- a/youtube_dl/extractor/mtv.py
+++ b/youtube_dl/extractor/mtv.py
@@ -33,7 +33,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\..+?/.*)$', rtmp_video_url)
if not m:
return rtmp_video_url
- base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
+ base = 'http://viacommtvstrmfs.fplive.net/'
return base + m.group('finalid')
def _get_feed_url(self, uri):
diff --git a/youtube_dl/extractor/myspass.py b/youtube_dl/extractor/myspass.py
index 4fa0575f8..51e540814 100644
--- a/youtube_dl/extractor/myspass.py
+++ b/youtube_dl/extractor/myspass.py
@@ -13,9 +13,10 @@ class MySpassIE(InfoExtractor):
_VALID_URL = r'http://www\.myspass\.de/.*'
_TEST = {
'url': 'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/',
- 'file': '11741.mp4',
'md5': '0b49f4844a068f8b33f4b7c88405862b',
'info_dict': {
+ 'id': '11741',
+ 'ext': 'mp4',
"description": "Wer kann in die Fu\u00dfstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?",
"title": "Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2",
},
diff --git a/youtube_dl/extractor/ndr.py b/youtube_dl/extractor/ndr.py
index add4b3e5d..c31ff8160 100644
--- a/youtube_dl/extractor/ndr.py
+++ b/youtube_dl/extractor/ndr.py
@@ -67,7 +67,7 @@ class NDRIE(InfoExtractor):
thumbnail = None
- video_url = re.search(r'''3: \{src:'(?P<video>.+?)\.hi\.mp4', type:"video/mp4"},''', page)
+ video_url = re.search(r'''3: \{src:'(?P<video>.+?)\.(lo|hi|hq)\.mp4', type:"video/mp4"},''', page)
if video_url:
thumbnails = re.findall(r'''\d+: \{src: "([^"]+)"(?: \|\| '[^']+')?, quality: '([^']+)'}''', page)
if thumbnails:
diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py
index 45cbd4ee9..3b5784e8f 100644
--- a/youtube_dl/extractor/niconico.py
+++ b/youtube_dl/extractor/niconico.py
@@ -175,7 +175,8 @@ class NiconicoPlaylistIE(InfoExtractor):
entries = [{
'_type': 'url',
'ie_key': NiconicoIE.ie_key(),
- 'url': 'http://www.nicovideo.jp/watch/%s' % entry['item_id'],
+ 'url': ('http://www.nicovideo.jp/watch/%s' %
+ entry['item_data']['video_id']),
} for entry in entries]
return {
diff --git a/youtube_dl/extractor/sexu.py b/youtube_dl/extractor/sexu.py
new file mode 100644
index 000000000..6365a8779
--- /dev/null
+++ b/youtube_dl/extractor/sexu.py
@@ -0,0 +1,61 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class SexuIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?sexu\.com/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://sexu.com/961791/',
+ 'md5': 'ff615aca9691053c94f8f10d96cd7884',
+ 'info_dict': {
+ 'id': '961791',
+ 'ext': 'mp4',
+ 'title': 'md5:4d05a19a5fc049a63dbbaf05fb71d91b',
+ 'description': 'md5:c5ed8625eb386855d5a7967bd7b77a54',
+ 'categories': list, # NSFW
+ 'thumbnail': 're:https?://.*\.jpg$',
+ 'age_limit': 18,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ quality_arr = self._search_regex(
+ r'sources:\s*\[([^\]]+)\]', webpage, 'forrmat string')
+ formats = [{
+ 'url': fmt[0].replace('\\', ''),
+ 'format_id': fmt[1],
+ 'height': int(fmt[1][:3]),
+ } for fmt in re.findall(r'"file":"([^"]+)","label":"([^"]+)"', quality_arr)]
+ self._sort_formats(formats)
+
+ title = self._html_search_regex(
+ r'<title>([^<]+)\s*-\s*Sexu\.Com</title>', webpage, 'title')
+
+ description = self._html_search_meta(
+ 'description', webpage, 'description')
+
+ thumbnail = self._html_search_regex(
+ r'image:\s*"([^"]+)"',
+ webpage, 'thumbnail', fatal=False)
+
+ categories_str = self._html_search_meta(
+ 'keywords', webpage, 'categories')
+ categories = (
+ None if categories_str is None
+ else categories_str.split(','))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'categories': categories,
+ 'formats': formats,
+ 'age_limit': 18,
+ }
diff --git a/youtube_dl/extractor/spiegel.py b/youtube_dl/extractor/spiegel.py
index 9ed7d3b39..9586a7da2 100644
--- a/youtube_dl/extractor/spiegel.py
+++ b/youtube_dl/extractor/spiegel.py
@@ -4,11 +4,11 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..utils import compat_urlparse
+from ..compat import compat_urlparse
class SpiegelIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$'
+ _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<id>[0-9]+)(?:-embed)?(?:\.html)?(?:#.*)?$'
_TESTS = [{
'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
'md5': '2c2754212136f35fb4b19767d242f66e',
@@ -29,16 +29,24 @@ class SpiegelIE(InfoExtractor):
'description': 'md5:c2322b65e58f385a820c10fa03b2d088',
'duration': 983,
},
+ }, {
+ 'url': 'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-embed.html',
+ 'md5': 'd8eeca6bfc8f1cd6f490eb1f44695d51',
+ 'info_dict': {
+ 'id': '1519126',
+ 'ext': 'mp4',
+ 'description': 'SPIEGEL ONLINE-Nutzer durften den deutschen Astronauten Alexander Gerst über sein Leben auf der ISS-Station befragen. Hier kommen seine Antworten auf die besten sechs Fragen.',
+ 'title': 'Fragen an Astronaut Alexander Gerst: "Bekommen Sie die Tageszeiten mit?"',
+ }
}]
def _real_extract(self, url):
- m = re.match(self._VALID_URL, url)
- video_id = m.group('videoID')
-
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- title = self._html_search_regex(
- r'<div class="module-title">(.*?)</div>', webpage, 'title')
+ title = re.sub(r'\s+', ' ', self._html_search_regex(
+ r'(?s)<(?:h1|div) class="module-title"[^>]*>(.*?)</(?:h1|div)>',
+ webpage, 'title'))
description = self._html_search_meta('description', webpage, 'description')
base_url = self._search_regex(
@@ -79,7 +87,7 @@ class SpiegelArticleIE(InfoExtractor):
_VALID_URL = 'https?://www\.spiegel\.de/(?!video/)[^?#]*?-(?P<id>[0-9]+)\.html'
IE_NAME = 'Spiegel:Article'
IE_DESC = 'Articles on spiegel.de'
- _TEST = {
+ _TESTS = [{
'url': 'http://www.spiegel.de/sport/sonst/badminton-wm-die-randsportart-soll-populaerer-werden-a-987092.html',
'info_dict': {
'id': '1516455',
@@ -87,20 +95,34 @@ class SpiegelArticleIE(InfoExtractor):
'title': 'Faszination Badminton: Nennt es bloß nicht Federball',
'description': 're:^Patrick Kämnitz gehört.{100,}',
},
- }
+ }, {
+ 'url': 'http://www.spiegel.de/wissenschaft/weltall/astronaut-alexander-gerst-antwortet-spiegel-online-lesern-a-989876.html',
+ 'info_dict': {
- def _real_extract(self, url):
- m = re.match(self._VALID_URL, url)
- video_id = m.group('id')
+ },
+ 'playlist_count': 6,
+ }]
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
+
+ # Single video on top of the page
video_link = self._search_regex(
r'<a href="([^"]+)" onclick="return spOpenVideo\(this,', webpage,
- 'video page URL')
- video_url = compat_urlparse.urljoin(
- self.http_scheme() + '//spiegel.de/', video_link)
-
- return {
- '_type': 'url',
- 'url': video_url,
- }
+ 'video page URL', default=None)
+ if video_link:
+ video_url = compat_urlparse.urljoin(
+ self.http_scheme() + '//spiegel.de/', video_link)
+ return self.url_result(video_url)
+
+ # Multiple embedded videos
+ embeds = re.findall(
+ r'<div class="vid_holder[0-9]+.*?</div>\s*.*?url\s*=\s*"([^"]+)"',
+ webpage)
+ entries = [
+ self.url_result(compat_urlparse.urljoin(
+ self.http_scheme() + '//spiegel.de/', embed_path))
+ for embed_path in embeds
+ ]
+ return self.playlist_result(entries)
diff --git a/youtube_dl/extractor/streamcloud.py b/youtube_dl/extractor/streamcloud.py
index 172def221..c1178f26d 100644
--- a/youtube_dl/extractor/streamcloud.py
+++ b/youtube_dl/extractor/streamcloud.py
@@ -13,7 +13,7 @@ from ..utils import (
class StreamcloudIE(InfoExtractor):
IE_NAME = 'streamcloud.eu'
- _VALID_URL = r'https?://streamcloud\.eu/(?P<id>[a-zA-Z0-9_-]+)/(?P<fname>[^#?]*)\.html'
+ _VALID_URL = r'https?://streamcloud\.eu/(?P<id>[a-zA-Z0-9_-]+)(?:/(?P<fname>[^#?]*)\.html)?'
_TEST = {
'url': 'http://streamcloud.eu/skp9j99s4bpz/youtube-dl_test_video_____________-BaW_jenozKc.mp4.html',
@@ -27,8 +27,8 @@ class StreamcloudIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
+ url = 'http://streamcloud.eu/%s' % video_id
orig_webpage = self._download_webpage(url, video_id)
diff --git a/youtube_dl/extractor/swrmediathek.py b/youtube_dl/extractor/swrmediathek.py
index 13c6ea677..bf430d870 100644
--- a/youtube_dl/extractor/swrmediathek.py
+++ b/youtube_dl/extractor/swrmediathek.py
@@ -80,7 +80,7 @@ class SWRMediathekIE(InfoExtractor):
if media_type == 'Video':
fmt.update({
- 'format_note': ['144p', '288p', '544p'][quality-1],
+ 'format_note': ['144p', '288p', '544p', '720p'][quality-1],
'vcodec': codec,
})
elif media_type == 'Audio':
@@ -101,4 +101,4 @@ class SWRMediathekIE(InfoExtractor):
'uploader': attr['channel_title'],
'uploader_id': attr['channel_idkey'],
'formats': formats,
- } \ No newline at end of file
+ }
diff --git a/youtube_dl/extractor/tapely.py b/youtube_dl/extractor/tapely.py
index 77e056242..283e11350 100644
--- a/youtube_dl/extractor/tapely.py
+++ b/youtube_dl/extractor/tapely.py
@@ -50,6 +50,7 @@ class TapelyIE(InfoExtractor):
request = compat_urllib_request.Request(playlist_url)
request.add_header('X-Requested-With', 'XMLHttpRequest')
request.add_header('Accept', 'application/json')
+ request.add_header('Referer', url)
playlist = self._download_json(request, display_id)
diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py
index cd4af96fd..855038077 100644
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@@ -38,6 +38,7 @@ class TEDIE(SubtitlesInfoExtractor):
'actively fooling us.'),
'uploader': 'Dan Dennett',
'width': 854,
+ 'duration': 1308,
}
}, {
'url': 'http://www.ted.com/watch/ted-institute/ted-bcg/vishal-sikka-the-beauty-and-power-of-algorithms',
@@ -57,6 +58,7 @@ class TEDIE(SubtitlesInfoExtractor):
'title': 'Be passionate. Be courageous. Be your best.',
'uploader': 'Gabby Giffords and Mark Kelly',
'description': 'md5:5174aed4d0f16021b704120360f72b92',
+ 'duration': 1128,
},
}, {
'url': 'http://www.ted.com/playlists/who_are_the_hackers',
@@ -178,6 +180,7 @@ class TEDIE(SubtitlesInfoExtractor):
'description': self._og_search_description(webpage),
'subtitles': video_subtitles,
'formats': formats,
+ 'duration': talk_info.get('duration'),
}
def _get_available_subtitles(self, video_id, talk_info):
diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py
index 445e0ec41..eb9473754 100644
--- a/youtube_dl/extractor/tvplay.py
+++ b/youtube_dl/extractor/tvplay.py
@@ -4,9 +4,9 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
ExtractorError,
- compat_str,
parse_iso8601,
qualities,
)
@@ -176,8 +176,7 @@ class TVPlayIE(InfoExtractor):
]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
video = self._download_json(
'http://playapi.mtgx.tv/v1/videos/%s' % video_id, video_id, 'Downloading video JSON')
@@ -208,6 +207,10 @@ class TVPlayIE(InfoExtractor):
'app': m.group('app'),
'play_path': m.group('playpath'),
})
+ elif video_url.endswith('.f4m'):
+ formats.extend(self._extract_f4m_formats(
+ video_url + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81', video_id))
+ continue
else:
fmt.update({
'url': video_url,
diff --git a/youtube_dl/extractor/wrzuta.py b/youtube_dl/extractor/wrzuta.py
index 41756784a..c42764921 100644
--- a/youtube_dl/extractor/wrzuta.py
+++ b/youtube_dl/extractor/wrzuta.py
@@ -27,15 +27,15 @@ class WrzutaIE(InfoExtractor):
'description': 'md5:7fb5ef3c21c5893375fda51d9b15d9cd',
},
}, {
- 'url': 'http://w729.wrzuta.pl/audio/9oXJqdcndqv/david_guetta_amp_showtek_ft._vassy_-_bad',
- 'md5': '1e546a18e1c22ac6e9adce17b8961ff5',
+ 'url': 'http://jolka85.wrzuta.pl/audio/063jOPX5ue2/liber_natalia_szroeder_-_teraz_ty',
+ 'md5': 'bc78077859bea7bcfe4295d7d7fc9025',
'info_dict': {
- 'id': '9oXJqdcndqv',
+ 'id': '063jOPX5ue2',
'ext': 'ogg',
- 'title': 'David Guetta & Showtek ft. Vassy - Bad',
- 'duration': 270,
- 'uploader_id': 'w729',
- 'description': 'md5:4628f01c666bbaaecefa83476cfa794a',
+ 'title': 'Liber & Natalia Szroeder - Teraz Ty',
+ 'duration': 203,
+ 'uploader_id': 'jolka85',
+ 'description': 'md5:2d2b6340f9188c8c4cd891580e481096',
},
}]
@@ -49,16 +49,17 @@ class WrzutaIE(InfoExtractor):
quality = qualities(['SD', 'MQ', 'HQ', 'HD'])
- audio_table = {'flv': 'mp3', 'webm': 'ogg', 'mp3': 'mp3'}
+ audio_table = {'flv': 'mp3', 'webm': 'ogg', '???': 'mp3'}
embedpage = self._download_json('http://www.wrzuta.pl/npp/embed/%s/%s' % (uploader, video_id), video_id)
formats = []
for media in embedpage['url']:
+ fmt = media['type'].split('@')[0]
if typ == 'audio':
- ext = audio_table[media['type'].split('@')[0]]
+ ext = audio_table.get(fmt, fmt)
else:
- ext = media['type'].split('@')[0]
+ ext = fmt
formats.append({
'format_id': '%s_%s' % (ext, media['quality'].lower()),
diff --git a/youtube_dl/extractor/youjizz.py b/youtube_dl/extractor/youjizz.py
index b86331e3c..c642075dc 100644
--- a/youtube_dl/extractor/youjizz.py
+++ b/youtube_dl/extractor/youjizz.py
@@ -9,40 +9,30 @@ from ..utils import (
class YouJizzIE(InfoExtractor):
- _VALID_URL = r'^https?://(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+)\.html$'
+ _VALID_URL = r'https?://(?:\w+\.)?youjizz\.com/videos/[^/#?]+-(?P<id>[0-9]+)\.html(?:$|[?#])'
_TEST = {
'url': 'http://www.youjizz.com/videos/zeichentrick-1-2189178.html',
- 'file': '2189178.flv',
'md5': '07e15fa469ba384c7693fd246905547c',
'info_dict': {
+ 'id': '2189178',
+ 'ext': 'flv',
"title": "Zeichentrick 1",
"age_limit": 18,
}
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
-
- video_id = mobj.group('videoid')
-
- # Get webpage content
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
-
age_limit = self._rta_search(webpage)
-
- # Get the video title
- video_title = self._html_search_regex(r'<title>(?P<title>.*)</title>',
- webpage, 'title').strip()
-
- # Get the embed page
- result = re.search(r'https?://www.youjizz.com/videos/embed/(?P<videoid>[0-9]+)', webpage)
- if result is None:
- raise ExtractorError('ERROR: unable to extract embed page')
-
- embed_page_url = result.group(0).strip()
- video_id = result.group('videoid')
-
- webpage = self._download_webpage(embed_page_url, video_id)
+ video_title = self._html_search_regex(
+ r'<title>\s*(.*)\s*</title>', webpage, 'title')
+
+ embed_page_url = self._search_regex(
+ r'(https?://www.youjizz.com/videos/embed/[0-9]+)',
+ webpage, 'embed page')
+ webpage = self._download_webpage(
+ embed_page_url, video_id, note='downloading embed page')
# Get the video URL
m_playlist = re.search(r'so.addVariable\("playlist", ?"(?P<playlist>.+?)"\);', webpage)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index c77d4056f..cea3d5c95 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -510,7 +510,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
def _parse_sig_js(self, jscode):
funcname = self._search_regex(
- r'signature=([$a-zA-Z]+)', jscode,
+ r'\.sig\|\|([a-zA-Z0-9]+)\(', jscode,
'Initial JS player signature function name')
jsi = JSInterpreter(jscode)
@@ -1043,6 +1043,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
'info_dict': {
'title': 'ytdl test PL',
+ 'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
},
'playlist_count': 3,
}, {