aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/abc.py10
-rw-r--r--youtube_dl/extractor/allocine.py2
-rw-r--r--youtube_dl/extractor/bandcamp.py9
-rw-r--r--youtube_dl/extractor/byutv.py8
-rw-r--r--youtube_dl/extractor/funnyordie.py1
-rw-r--r--youtube_dl/extractor/grooveshark.py7
-rw-r--r--youtube_dl/extractor/mtv.py2
-rw-r--r--youtube_dl/extractor/myspass.py3
-rw-r--r--youtube_dl/extractor/ndr.py2
-rw-r--r--youtube_dl/extractor/niconico.py3
-rw-r--r--youtube_dl/extractor/streamcloud.py6
-rw-r--r--youtube_dl/extractor/swrmediathek.py4
-rw-r--r--youtube_dl/extractor/tapely.py1
-rw-r--r--youtube_dl/extractor/ted.py3
-rw-r--r--youtube_dl/extractor/wrzuta.py21
-rw-r--r--youtube_dl/extractor/youjizz.py34
-rw-r--r--youtube_dl/extractor/youtube.py1
17 files changed, 60 insertions, 57 deletions
diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py
index 69f89320c..dc0fb85d6 100644
--- a/youtube_dl/extractor/abc.py
+++ b/youtube_dl/extractor/abc.py
@@ -11,13 +11,13 @@ class ABCIE(InfoExtractor):
_VALID_URL = r'http://www\.abc\.net\.au/news/[^/]+/[^/]+/(?P<id>\d+)'
_TEST = {
- 'url': 'http://www.abc.net.au/news/2014-07-25/bringing-asylum-seekers-to-australia-would-give/5624716',
- 'md5': 'dad6f8ad011a70d9ddf887ce6d5d0742',
+ 'url': 'http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334',
+ 'md5': 'cb3dd03b18455a661071ee1e28344d9f',
'info_dict': {
- 'id': '5624716',
+ 'id': '5868334',
'ext': 'mp4',
- 'title': 'Bringing asylum seekers to Australia would give them right to asylum claims: professor',
- 'description': 'md5:ba36fa5e27e5c9251fd929d339aea4af',
+ 'title': 'Australia to help staff Ebola treatment centre in Sierra Leone',
+ 'description': 'md5:809ad29c67a05f54eb41f2a105693a67',
},
}
diff --git a/youtube_dl/extractor/allocine.py b/youtube_dl/extractor/allocine.py
index 7bd797884..398e93bfb 100644
--- a/youtube_dl/extractor/allocine.py
+++ b/youtube_dl/extractor/allocine.py
@@ -22,7 +22,7 @@ class AllocineIE(InfoExtractor):
'id': '19546517',
'ext': 'mp4',
'title': 'Astérix - Le Domaine des Dieux Teaser VF',
- 'description': 'md5:4a754271d9c6f16c72629a8a993ee884',
+ 'description': 'md5:abcd09ce503c6560512c14ebfdb720d2',
'thumbnail': 're:http://.*\.jpg',
},
}, {
diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py
index c13446665..1b8da43ca 100644
--- a/youtube_dl/extractor/bandcamp.py
+++ b/youtube_dl/extractor/bandcamp.py
@@ -110,20 +110,25 @@ class BandcampAlbumIE(InfoExtractor):
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
'playlist': [
{
- 'file': '1353101989.mp3',
'md5': '39bc1eded3476e927c724321ddf116cf',
'info_dict': {
+ 'id': '1353101989',
+ 'ext': 'mp3',
'title': 'Intro',
}
},
{
- 'file': '38097443.mp3',
'md5': '1a2c32e2691474643e912cc6cd4bffaa',
'info_dict': {
+ 'id': '38097443',
+ 'ext': 'mp3',
'title': 'Kero One - Keep It Alive (Blazo remix)',
}
},
],
+ 'info_dict': {
+ 'title': 'Jazz Format Mixtape vol.1',
+ },
'params': {
'playlistend': 2
},
diff --git a/youtube_dl/extractor/byutv.py b/youtube_dl/extractor/byutv.py
index cf19b7b0c..6252be05b 100644
--- a/youtube_dl/extractor/byutv.py
+++ b/youtube_dl/extractor/byutv.py
@@ -10,12 +10,12 @@ from ..utils import ExtractorError
class BYUtvIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P<video_id>[^/?#]+)'
_TEST = {
- 'url': 'http://www.byutv.org/watch/44e80f7b-e3ba-43ba-8c51-b1fd96c94a79/granite-flats-talking',
+ 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5',
'info_dict': {
- 'id': 'granite-flats-talking',
+ 'id': 'studio-c-season-5-episode-5',
'ext': 'mp4',
- 'description': 'md5:4e9a7ce60f209a33eca0ac65b4918e1c',
- 'title': 'Talking',
+ 'description': 'md5:5438d33774b6bdc662f9485a340401cc',
+ 'title': 'Season 5 Episode 5',
'thumbnail': 're:^https?://.*promo.*'
},
'params': {
diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py
index 487d5d1b4..a49fc1151 100644
--- a/youtube_dl/extractor/funnyordie.py
+++ b/youtube_dl/extractor/funnyordie.py
@@ -21,7 +21,6 @@ class FunnyOrDieIE(InfoExtractor):
},
}, {
'url': 'http://www.funnyordie.com/embed/e402820827',
- 'md5': '29f4c5e5a61ca39dfd7e8348a75d0aad',
'info_dict': {
'id': 'e402820827',
'ext': 'mp4',
diff --git a/youtube_dl/extractor/grooveshark.py b/youtube_dl/extractor/grooveshark.py
index 726adff77..fff74a70a 100644
--- a/youtube_dl/extractor/grooveshark.py
+++ b/youtube_dl/extractor/grooveshark.py
@@ -8,12 +8,13 @@ import re
from .common import InfoExtractor
-from ..utils import ExtractorError, compat_urllib_request, compat_html_parser
-
-from ..utils import (
+from ..compat import (
+ compat_html_parser,
compat_urllib_parse,
+ compat_urllib_request,
compat_urlparse,
)
+from ..utils import ExtractorError
class GroovesharkHtmlParser(compat_html_parser.HTMLParser):
diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py
index 228b42d2b..f0e28e66a 100644
--- a/youtube_dl/extractor/mtv.py
+++ b/youtube_dl/extractor/mtv.py
@@ -33,7 +33,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\..+?/.*)$', rtmp_video_url)
if not m:
return rtmp_video_url
- base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
+ base = 'http://viacommtvstrmfs.fplive.net/'
return base + m.group('finalid')
def _get_feed_url(self, uri):
diff --git a/youtube_dl/extractor/myspass.py b/youtube_dl/extractor/myspass.py
index 4fa0575f8..51e540814 100644
--- a/youtube_dl/extractor/myspass.py
+++ b/youtube_dl/extractor/myspass.py
@@ -13,9 +13,10 @@ class MySpassIE(InfoExtractor):
_VALID_URL = r'http://www\.myspass\.de/.*'
_TEST = {
'url': 'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/',
- 'file': '11741.mp4',
'md5': '0b49f4844a068f8b33f4b7c88405862b',
'info_dict': {
+ 'id': '11741',
+ 'ext': 'mp4',
"description": "Wer kann in die Fu\u00dfstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?",
"title": "Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2",
},
diff --git a/youtube_dl/extractor/ndr.py b/youtube_dl/extractor/ndr.py
index add4b3e5d..c31ff8160 100644
--- a/youtube_dl/extractor/ndr.py
+++ b/youtube_dl/extractor/ndr.py
@@ -67,7 +67,7 @@ class NDRIE(InfoExtractor):
thumbnail = None
- video_url = re.search(r'''3: \{src:'(?P<video>.+?)\.hi\.mp4', type:"video/mp4"},''', page)
+ video_url = re.search(r'''3: \{src:'(?P<video>.+?)\.(lo|hi|hq)\.mp4', type:"video/mp4"},''', page)
if video_url:
thumbnails = re.findall(r'''\d+: \{src: "([^"]+)"(?: \|\| '[^']+')?, quality: '([^']+)'}''', page)
if thumbnails:
diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py
index 45cbd4ee9..3b5784e8f 100644
--- a/youtube_dl/extractor/niconico.py
+++ b/youtube_dl/extractor/niconico.py
@@ -175,7 +175,8 @@ class NiconicoPlaylistIE(InfoExtractor):
entries = [{
'_type': 'url',
'ie_key': NiconicoIE.ie_key(),
- 'url': 'http://www.nicovideo.jp/watch/%s' % entry['item_id'],
+ 'url': ('http://www.nicovideo.jp/watch/%s' %
+ entry['item_data']['video_id']),
} for entry in entries]
return {
diff --git a/youtube_dl/extractor/streamcloud.py b/youtube_dl/extractor/streamcloud.py
index 172def221..c1178f26d 100644
--- a/youtube_dl/extractor/streamcloud.py
+++ b/youtube_dl/extractor/streamcloud.py
@@ -13,7 +13,7 @@ from ..utils import (
class StreamcloudIE(InfoExtractor):
IE_NAME = 'streamcloud.eu'
- _VALID_URL = r'https?://streamcloud\.eu/(?P<id>[a-zA-Z0-9_-]+)/(?P<fname>[^#?]*)\.html'
+ _VALID_URL = r'https?://streamcloud\.eu/(?P<id>[a-zA-Z0-9_-]+)(?:/(?P<fname>[^#?]*)\.html)?'
_TEST = {
'url': 'http://streamcloud.eu/skp9j99s4bpz/youtube-dl_test_video_____________-BaW_jenozKc.mp4.html',
@@ -27,8 +27,8 @@ class StreamcloudIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
+ url = 'http://streamcloud.eu/%s' % video_id
orig_webpage = self._download_webpage(url, video_id)
diff --git a/youtube_dl/extractor/swrmediathek.py b/youtube_dl/extractor/swrmediathek.py
index 13c6ea677..bf430d870 100644
--- a/youtube_dl/extractor/swrmediathek.py
+++ b/youtube_dl/extractor/swrmediathek.py
@@ -80,7 +80,7 @@ class SWRMediathekIE(InfoExtractor):
if media_type == 'Video':
fmt.update({
- 'format_note': ['144p', '288p', '544p'][quality-1],
+ 'format_note': ['144p', '288p', '544p', '720p'][quality-1],
'vcodec': codec,
})
elif media_type == 'Audio':
@@ -101,4 +101,4 @@ class SWRMediathekIE(InfoExtractor):
'uploader': attr['channel_title'],
'uploader_id': attr['channel_idkey'],
'formats': formats,
- } \ No newline at end of file
+ }
diff --git a/youtube_dl/extractor/tapely.py b/youtube_dl/extractor/tapely.py
index 77e056242..283e11350 100644
--- a/youtube_dl/extractor/tapely.py
+++ b/youtube_dl/extractor/tapely.py
@@ -50,6 +50,7 @@ class TapelyIE(InfoExtractor):
request = compat_urllib_request.Request(playlist_url)
request.add_header('X-Requested-With', 'XMLHttpRequest')
request.add_header('Accept', 'application/json')
+ request.add_header('Referer', url)
playlist = self._download_json(request, display_id)
diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py
index cd4af96fd..855038077 100644
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@@ -38,6 +38,7 @@ class TEDIE(SubtitlesInfoExtractor):
'actively fooling us.'),
'uploader': 'Dan Dennett',
'width': 854,
+ 'duration': 1308,
}
}, {
'url': 'http://www.ted.com/watch/ted-institute/ted-bcg/vishal-sikka-the-beauty-and-power-of-algorithms',
@@ -57,6 +58,7 @@ class TEDIE(SubtitlesInfoExtractor):
'title': 'Be passionate. Be courageous. Be your best.',
'uploader': 'Gabby Giffords and Mark Kelly',
'description': 'md5:5174aed4d0f16021b704120360f72b92',
+ 'duration': 1128,
},
}, {
'url': 'http://www.ted.com/playlists/who_are_the_hackers',
@@ -178,6 +180,7 @@ class TEDIE(SubtitlesInfoExtractor):
'description': self._og_search_description(webpage),
'subtitles': video_subtitles,
'formats': formats,
+ 'duration': talk_info.get('duration'),
}
def _get_available_subtitles(self, video_id, talk_info):
diff --git a/youtube_dl/extractor/wrzuta.py b/youtube_dl/extractor/wrzuta.py
index 34dd6d952..c42764921 100644
--- a/youtube_dl/extractor/wrzuta.py
+++ b/youtube_dl/extractor/wrzuta.py
@@ -27,15 +27,15 @@ class WrzutaIE(InfoExtractor):
'description': 'md5:7fb5ef3c21c5893375fda51d9b15d9cd',
},
}, {
- 'url': 'http://w729.wrzuta.pl/audio/9oXJqdcndqv/david_guetta_amp_showtek_ft._vassy_-_bad',
- 'md5': '1e546a18e1c22ac6e9adce17b8961ff5',
+ 'url': 'http://jolka85.wrzuta.pl/audio/063jOPX5ue2/liber_natalia_szroeder_-_teraz_ty',
+ 'md5': 'bc78077859bea7bcfe4295d7d7fc9025',
'info_dict': {
- 'id': '9oXJqdcndqv',
+ 'id': '063jOPX5ue2',
'ext': 'ogg',
- 'title': 'David Guetta & Showtek ft. Vassy - Bad',
- 'duration': 270,
- 'uploader_id': 'w729',
- 'description': 'md5:4628f01c666bbaaecefa83476cfa794a',
+ 'title': 'Liber & Natalia Szroeder - Teraz Ty',
+ 'duration': 203,
+ 'uploader_id': 'jolka85',
+ 'description': 'md5:2d2b6340f9188c8c4cd891580e481096',
},
}]
@@ -49,16 +49,17 @@ class WrzutaIE(InfoExtractor):
quality = qualities(['SD', 'MQ', 'HQ', 'HD'])
- audio_table = {'flv': 'mp3', 'webm': 'ogg'}
+ audio_table = {'flv': 'mp3', 'webm': 'ogg', '???': 'mp3'}
embedpage = self._download_json('http://www.wrzuta.pl/npp/embed/%s/%s' % (uploader, video_id), video_id)
formats = []
for media in embedpage['url']:
+ fmt = media['type'].split('@')[0]
if typ == 'audio':
- ext = audio_table[media['type'].split('@')[0]]
+ ext = audio_table.get(fmt, fmt)
else:
- ext = media['type'].split('@')[0]
+ ext = fmt
formats.append({
'format_id': '%s_%s' % (ext, media['quality'].lower()),
diff --git a/youtube_dl/extractor/youjizz.py b/youtube_dl/extractor/youjizz.py
index b86331e3c..c642075dc 100644
--- a/youtube_dl/extractor/youjizz.py
+++ b/youtube_dl/extractor/youjizz.py
@@ -9,40 +9,30 @@ from ..utils import (
class YouJizzIE(InfoExtractor):
- _VALID_URL = r'^https?://(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+)\.html$'
+ _VALID_URL = r'https?://(?:\w+\.)?youjizz\.com/videos/[^/#?]+-(?P<id>[0-9]+)\.html(?:$|[?#])'
_TEST = {
'url': 'http://www.youjizz.com/videos/zeichentrick-1-2189178.html',
- 'file': '2189178.flv',
'md5': '07e15fa469ba384c7693fd246905547c',
'info_dict': {
+ 'id': '2189178',
+ 'ext': 'flv',
"title": "Zeichentrick 1",
"age_limit": 18,
}
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
-
- video_id = mobj.group('videoid')
-
- # Get webpage content
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
-
age_limit = self._rta_search(webpage)
-
- # Get the video title
- video_title = self._html_search_regex(r'<title>(?P<title>.*)</title>',
- webpage, 'title').strip()
-
- # Get the embed page
- result = re.search(r'https?://www.youjizz.com/videos/embed/(?P<videoid>[0-9]+)', webpage)
- if result is None:
- raise ExtractorError('ERROR: unable to extract embed page')
-
- embed_page_url = result.group(0).strip()
- video_id = result.group('videoid')
-
- webpage = self._download_webpage(embed_page_url, video_id)
+ video_title = self._html_search_regex(
+ r'<title>\s*(.*)\s*</title>', webpage, 'title')
+
+ embed_page_url = self._search_regex(
+ r'(https?://www.youjizz.com/videos/embed/[0-9]+)',
+ webpage, 'embed page')
+ webpage = self._download_webpage(
+ embed_page_url, video_id, note='downloading embed page')
# Get the video URL
m_playlist = re.search(r'so.addVariable\("playlist", ?"(?P<playlist>.+?)"\);', webpage)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index c77d4056f..5406de50b 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1043,6 +1043,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
'info_dict': {
'title': 'ytdl test PL',
+ 'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
},
'playlist_count': 3,
}, {