aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/__init__.py6
-rw-r--r--youtube_dl/extractor/cmt.py19
-rw-r--r--youtube_dl/extractor/comedycentral.py4
-rw-r--r--youtube_dl/extractor/common.py3
-rw-r--r--youtube_dl/extractor/dreisat.py6
-rw-r--r--youtube_dl/extractor/generic.py2
-rw-r--r--youtube_dl/extractor/imdb.py29
-rw-r--r--youtube_dl/extractor/mixcloud.py2
-rw-r--r--youtube_dl/extractor/mtv.py2
-rw-r--r--youtube_dl/extractor/soundcloud.py4
-rw-r--r--youtube_dl/extractor/wistia.py1
-rw-r--r--youtube_dl/extractor/youtube.py1
12 files changed, 69 insertions, 10 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index b59110b15..9c1374373 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -28,6 +28,7 @@ from .channel9 import Channel9IE
from .cinemassacre import CinemassacreIE
from .clipfish import ClipfishIE
from .clipsyndicate import ClipsyndicateIE
+from .cmt import CMTIE
from .cnn import CNNIE
from .collegehumor import CollegeHumorIE
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
@@ -79,7 +80,10 @@ from .hotnewhiphop import HotNewHipHopIE
from .howcast import HowcastIE
from .hypem import HypemIE
from .ign import IGNIE, OneUPIE
-from .imdb import ImdbIE
+from .imdb import (
+ ImdbIE,
+ ImdbListIE
+)
from .ina import InaIE
from .infoq import InfoQIE
from .instagram import InstagramIE
diff --git a/youtube_dl/extractor/cmt.py b/youtube_dl/extractor/cmt.py
new file mode 100644
index 000000000..88e0e9aba
--- /dev/null
+++ b/youtube_dl/extractor/cmt.py
@@ -0,0 +1,19 @@
+from .mtv import MTVIE
+
+class CMTIE(MTVIE):
+ IE_NAME = u'cmt.com'
+ _VALID_URL = r'https?://www\.cmt\.com/videos/.+?/(?P<videoid>[^/]+)\.jhtml'
+ _FEED_URL = 'http://www.cmt.com/sitewide/apps/player/embed/rss/'
+
+ _TESTS = [
+ {
+ u'url': u'http://www.cmt.com/videos/garth-brooks/989124/the-call-featuring-trisha-yearwood.jhtml#artist=30061',
+ u'md5': u'e6b7ef3c4c45bbfae88061799bbba6c2',
+ u'info_dict': {
+ u'id': u'989124',
+ u'ext': u'mp4',
+ u'title': u'Garth Brooks - "The Call (featuring Trisha Yearwood)"',
+ u'description': u'Blame It All On My Roots',
+ },
+ },
+ ]
diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py
index a54ce3ee7..27bd8256e 100644
--- a/youtube_dl/extractor/comedycentral.py
+++ b/youtube_dl/extractor/comedycentral.py
@@ -12,7 +12,9 @@ from ..utils import (
class ComedyCentralIE(MTVServicesInfoExtractor):
- _VALID_URL = r'https?://(?:www.)?comedycentral.com/(video-clips|episodes|cc-studios)/(?P<title>.*)'
+ _VALID_URL = r'''(?x)https?://(?:www.)?comedycentral.com/
+ (video-clips|episodes|cc-studios|video-collections)
+ /(?P<title>.*)'''
_FEED_URL = u'http://comedycentral.com/feeds/mrss/'
_TEST = {
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index f34d36cb0..f7247752e 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -69,7 +69,8 @@ class InfoExtractor(object):
download, lower-case.
"http", "https", "rtsp", "rtmp" or so.
* preference Order number of this format. If this field is
- present, the formats get sorted by this field.
+ present and not None, the formats get sorted
+ by this field.
-1 for default (order by other properties),
-2 or smaller for less than default.
url: Final video URL.
diff --git a/youtube_dl/extractor/dreisat.py b/youtube_dl/extractor/dreisat.py
index 416e25156..0b11d1f10 100644
--- a/youtube_dl/extractor/dreisat.py
+++ b/youtube_dl/extractor/dreisat.py
@@ -10,11 +10,11 @@ from ..utils import (
class DreiSatIE(InfoExtractor):
IE_NAME = '3sat'
- _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/index\.php\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
+ _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
_TEST = {
u"url": u"http://www.3sat.de/mediathek/index.php?obj=36983",
- u'file': u'36983.webm',
- u'md5': u'57c97d0469d71cf874f6815aa2b7c944',
+ u'file': u'36983.mp4',
+ u'md5': u'9dcfe344732808dbfcc901537973c922',
u'info_dict': {
u"title": u"Kaffeeland Schweiz",
u"description": u"Über 80 Kaffeeröstereien liefern in der Schweiz das Getränk, in das das Land so vernarrt ist: Mehr als 1000 Tassen trinkt ein Schweizer pro Jahr. SCHWEIZWEIT nimmt die Kaffeekultur unter die...",
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 7a14c98f9..377ae91c4 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -162,6 +162,8 @@ class GenericIE(InfoExtractor):
return self.url_result('http://' + url)
video_id = os.path.splitext(url.split('/')[-1])[0]
+ self.to_screen(u'%s: Requesting header' % video_id)
+
try:
response = self._send_head(url)
diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py
index e5332cce8..16926b4d3 100644
--- a/youtube_dl/extractor/imdb.py
+++ b/youtube_dl/extractor/imdb.py
@@ -55,3 +55,32 @@ class ImdbIE(InfoExtractor):
'description': descr,
'thumbnail': format_info['slate'],
}
+
+class ImdbListIE(InfoExtractor):
+ IE_NAME = u'imdb:list'
+ IE_DESC = u'Internet Movie Database lists'
+ _VALID_URL = r'http://www\.imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ list_id = mobj.group('id')
+
+ # RSS XML is sometimes malformed
+ rss = self._download_webpage('http://rss.imdb.com/list/%s' % list_id, list_id, u'Downloading list RSS')
+ list_title = self._html_search_regex(r'<title>(.*?)</title>', rss, u'list title')
+
+ # Export is independent of actual author_id, but returns 404 if no author_id is provided.
+ # However, passing dummy author_id seems to be enough.
+ csv = self._download_webpage('http://www.imdb.com/list/export?list_id=%s&author_id=ur00000000' % list_id,
+ list_id, u'Downloading list CSV')
+
+ entries = []
+ for item in csv.split('\n')[1:]:
+ cols = item.split(',')
+ if len(cols) < 2:
+ continue
+ item_id = cols[1][1:-1]
+ if item_id.startswith('vi'):
+ entries.append(self.url_result('http://www.imdb.com/video/imdb/%s' % item_id, 'Imdb'))
+
+ return self.playlist_result(entries, list_id, list_title) \ No newline at end of file
diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py
index 125d81551..7c54ea0f4 100644
--- a/youtube_dl/extractor/mixcloud.py
+++ b/youtube_dl/extractor/mixcloud.py
@@ -53,7 +53,7 @@ class MixcloudIE(InfoExtractor):
info = json.loads(json_data)
preview_url = self._search_regex(r'data-preview-url="(.+?)"', webpage, u'preview url')
- song_url = preview_url.replace('/previews/', '/cloudcasts/originals/')
+ song_url = preview_url.replace('/previews/', '/c/originals/')
template_url = re.sub(r'(stream\d*)', 'stream%d', song_url)
final_song_url = self._get_url(template_url)
if final_song_url is None:
diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py
index ed11f521a..f1cf41e2d 100644
--- a/youtube_dl/extractor/mtv.py
+++ b/youtube_dl/extractor/mtv.py
@@ -129,7 +129,7 @@ class MTVIE(MTVServicesInfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid')
- uri = mobj.group('mgid')
+ uri = mobj.groupdict().get('mgid')
if uri is None:
webpage = self._download_webpage(url, video_id)
diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py
index e22ff9c38..951e977bd 100644
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -29,7 +29,7 @@ class SoundcloudIE(InfoExtractor):
(?!sets/)(?P<title>[\w\d-]+)/?
(?P<token>[^?]+?)?(?:[?].*)?$)
|(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+))
- |(?P<widget>w\.soundcloud\.com/player/?.*?url=.*)
+ |(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*)
)
'''
IE_NAME = u'soundcloud'
@@ -193,7 +193,7 @@ class SoundcloudIE(InfoExtractor):
if track_id is not None:
info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID
full_title = track_id
- elif mobj.group('widget'):
+ elif mobj.group('player'):
query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
return self.url_result(query['url'][0], ie='Soundcloud')
else:
diff --git a/youtube_dl/extractor/wistia.py b/youtube_dl/extractor/wistia.py
index 584550455..bc31c2e64 100644
--- a/youtube_dl/extractor/wistia.py
+++ b/youtube_dl/extractor/wistia.py
@@ -44,6 +44,7 @@ class WistiaIE(InfoExtractor):
'height': a['height'],
'filesize': a['size'],
'ext': a['ext'],
+ 'preference': 1 if atype == 'original' else None,
})
self._sort_formats(formats)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index b0e29c2a8..9424d5e26 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -194,6 +194,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'137': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'preference': -40},
'138': {'ext': 'mp4', 'height': 1081, 'resolution': '>1080p', 'format_note': 'DASH video', 'preference': -40},
'160': {'ext': 'mp4', 'height': 192, 'resolution': '192p', 'format_note': 'DASH video', 'preference': -40},
+ '264': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'preference': -40},
# Dash mp4 audio
'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50},