9 files changed, 208 insertions, 81 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 73c17aa84..7eb9b4fbb 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -538,6 +538,7 @@ from .udemy import (
     UdemyIE,
     UdemyCourseIE
 )
+from .ultimedia import UltimediaIE
 from .unistra import UnistraIE
 from .urort import UrortIE
 from .ustream import UstreamIE, UstreamChannelIE
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index dc5755d12..8716e4503 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -1006,6 +1006,13 @@ class GenericIE(InfoExtractor):
         if mobj is not None:
             return self.url_result(mobj.group('url'))
 
+        # Look for NYTimes player
+        mobj = re.search(
+            r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
+            webpage)
+        if mobj is not None:
+            return self.url_result(mobj.group('url'))
+
         # Look for Ooyala videos
         mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
diff --git a/youtube_dl/extractor/krasview.py b/youtube_dl/extractor/krasview.py
index e46954b47..96f95979a 100644
--- a/youtube_dl/extractor/krasview.py
+++ b/youtube_dl/extractor/krasview.py
@@ -40,8 +40,10 @@ class KrasViewIE(InfoExtractor):
         description = self._og_search_description(webpage, default=None)
         thumbnail = flashvars.get('image') or self._og_search_thumbnail(webpage)
         duration = int_or_none(flashvars.get('duration'))
-        width = int_or_none(self._og_search_property('video:width', webpage, 'video width'))
-        height = int_or_none(self._og_search_property('video:height', webpage, 'video height'))
+        width = int_or_none(self._og_search_property(
+            'video:width', webpage, 'video width', default=None))
+        height = int_or_none(self._og_search_property(
+            'video:height', webpage, 'video height', default=None))
 
         return {
             'id': video_id,
diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py
index 1831c6749..21aea0c55 100644
--- a/youtube_dl/extractor/mixcloud.py
+++ b/youtube_dl/extractor/mixcloud.py
@@ -1,6 +1,7 @@
 from __future__ import unicode_literals
 
 import re
+import itertools
 
 from .common import InfoExtractor
 from ..compat import (
@@ -10,7 +11,6 @@ from ..utils import (
     ExtractorError,
     HEADRequest,
     str_to_int,
-    parse_iso8601,
 )
 
 
@@ -27,8 +27,6 @@ class MixcloudIE(InfoExtractor):
             'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
             'uploader': 'Daniel Holbach',
             'uploader_id': 'dholbach',
-            'upload_date': '20111115',
-            'timestamp': 1321359578,
             'thumbnail': 're:https?://.*\.jpg',
             'view_count': int,
             'like_count': int,
@@ -37,31 +35,30 @@ class MixcloudIE(InfoExtractor):
         'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/',
         'info_dict': {
             'id': 'gillespeterson-caribou-7-inch-vinyl-mix-chat',
-            'ext': 'm4a',
-            'title': 'Electric Relaxation vol. 3',
+            'ext': 'mp3',
+            'title': 'Caribou 7 inch Vinyl Mix & Chat',
             'description': 'md5:2b8aec6adce69f9d41724647c65875e8',
-            'uploader': 'Daniel Drumz',
+            'uploader': 'Gilles Peterson Worldwide',
             'uploader_id': 'gillespeterson',
-            'thumbnail': 're:https?://.*\.jpg',
+            'thumbnail': 're:https?://.*/images/',
             'view_count': int,
             'like_count': int,
         },
     }]
 
-    def _get_url(self, track_id, template_url):
-        server_count = 30
-        for i in range(server_count):
-            url = template_url % i
+    def _get_url(self, track_id, template_url, server_number):
+        boundaries = (1, 30)
+        for nr in server_numbers(server_number, boundaries):
+            url = template_url % nr
             try:
                 # We only want to know if the request succeed
                 # don't download the whole file
                 self._request_webpage(
                     HEADRequest(url), track_id,
-                    'Checking URL %d/%d ...' % (i + 1, server_count + 1))
+                    'Checking URL %d/%d ...' % (nr, boundaries[-1]))
                 return url
             except ExtractorError:
                 pass
-
         return None
 
     def _real_extract(self, url):
@@ -75,17 +72,18 @@ class MixcloudIE(InfoExtractor):
         preview_url = self._search_regex(
             r'\s(?:data-preview-url|m-preview)="([^"]+)"', webpage, 'preview url')
         song_url = preview_url.replace('/previews/', '/c/originals/')
+        server_number = int(self._search_regex(r'stream(\d+)', song_url, 'server number'))
         template_url = re.sub(r'(stream\d*)', 'stream%d', song_url)
-        final_song_url = self._get_url(track_id, template_url)
+        final_song_url = self._get_url(track_id, template_url, server_number)
         if final_song_url is None:
             self.to_screen('Trying with m4a extension')
             template_url = template_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/')
-            final_song_url = self._get_url(track_id, template_url)
+            final_song_url = self._get_url(track_id, template_url, server_number)
         if final_song_url is None:
             raise ExtractorError('Unable to extract track url')
 
         PREFIX = (
-            r'<span class="play-button[^"]*?"'
+            r'm-play-on-spacebar[^>]+'
             r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+')
         title = self._html_search_regex(
             PREFIX + r'm-title="([^"]+)"', webpage, 'title')
@@ -99,16 +97,12 @@ class MixcloudIE(InfoExtractor):
             r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
         description = self._og_search_description(webpage)
         like_count = str_to_int(self._search_regex(
-            [r'<meta itemprop="interactionCount" content="UserLikes:([0-9]+)"',
-             r'/favorites/?">([0-9]+)<'],
+            r'\bbutton-favorite\b.+m-ajax-toggle-count="([^"]+)"',
             webpage, 'like count', fatal=False))
         view_count = str_to_int(self._search_regex(
             [r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
              r'/listeners/?">([0-9,.]+)</a>'],
             webpage, 'play count', fatal=False))
-        timestamp = parse_iso8601(self._search_regex(
-            r'<time itemprop="dateCreated" datetime="([^"]+)">',
-            webpage, 'upload date', default=None))
 
         return {
             'id': track_id,
@@ -118,7 +112,38 @@ class MixcloudIE(InfoExtractor):
             'thumbnail': thumbnail,
             'uploader': uploader,
             'uploader_id': uploader_id,
-            'timestamp': timestamp,
             'view_count': view_count,
             'like_count': like_count,
         }
+
+
+def server_numbers(first, boundaries):
+    """ Server numbers to try in descending order of probable availability.
+    Starting from first (i.e. the number of the server hosting the preview file)
+    and going further and further up to the higher boundary and down to the
+    lower one in an alternating fashion. Namely:
+
+        server_numbers(2, (1, 5))
+
+        # Where the preview server is 2, min number is 1 and max is 5.
+        # Yields: 2, 3, 1, 4, 5
+
+    Why not random numbers or increasing sequences? Since from what I've seen,
+    full length files seem to be hosted on servers whose number is closer to
+    that of the preview; to be confirmed.
+    """
+    zip_longest = getattr(itertools, 'zip_longest', None)
+    if zip_longest is None:
+        # python 2.x
+        zip_longest = itertools.izip_longest
+
+    if len(boundaries) != 2:
+        raise ValueError("boundaries should be a two-element tuple")
+    min, max = boundaries
+    highs = range(first + 1, max + 1)
+    lows = range(first - 1, min - 1, -1)
+    rest = filter(
+        None, itertools.chain.from_iterable(zip_longest(highs, lows)))
+    yield first
+    for n in rest:
+        yield n
diff --git a/youtube_dl/extractor/nytimes.py b/youtube_dl/extractor/nytimes.py
index 56e1cad3b..03f0a4de6 100644
--- a/youtube_dl/extractor/nytimes.py
+++ b/youtube_dl/extractor/nytimes.py
@@ -1,15 +1,17 @@
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
-from ..utils import parse_iso8601
+from ..utils import (
+    float_or_none,
+    int_or_none,
+    parse_iso8601,
+)
 
 
 class NYTimesIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?nytimes\.com/video/(?:[^/]+/)+(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:(?:www\.)?nytimes\.com/video/(?:[^/]+/)+?|graphics8\.nytimes\.com/bcvideo/\d+(?:\.\d+)?/iframe/embed\.html\?videoId=)(?P<id>\d+)'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263',
         'md5': '18a525a510f942ada2720db5f31644c0',
         'info_dict': {
@@ -22,18 +24,21 @@ class NYTimesIE(InfoExtractor):
             'uploader': 'Brett Weiner',
             'duration': 419,
         }
-    }
+    }, {
+        'url': 'http://www.nytimes.com/video/travel/100000003550828/36-hours-in-dubai.html',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
 
         video_data = self._download_json(
-            'http://www.nytimes.com/svc/video/api/v2/video/%s' % video_id, video_id, 'Downloading video JSON')
+            'http://www.nytimes.com/svc/video/api/v2/video/%s' % video_id,
+            video_id, 'Downloading video JSON')
 
         title = video_data['headline']
-        description = video_data['summary']
-        duration = video_data['duration'] / 1000.0
+        description = video_data.get('summary')
+        duration = float_or_none(video_data.get('duration'), 1000)
 
         uploader = video_data['byline']
         timestamp = parse_iso8601(video_data['publication_date'][:-8])
@@ -49,11 +54,11 @@ class NYTimesIE(InfoExtractor):
         formats = [
             {
                 'url': video['url'],
-                'format_id': video['type'],
-                'vcodec': video['video_codec'],
-                'width': video['width'],
-                'height': video['height'],
-                'filesize': get_file_size(video['fileSize']),
+                'format_id': video.get('type'),
+                'vcodec': video.get('video_codec'),
+                'width': int_or_none(video.get('width')),
+                'height': int_or_none(video.get('height')),
+                'filesize': get_file_size(video.get('fileSize')),
             } for video in video_data['renditions']
         ]
         self._sort_formats(formats)
@@ -61,7 +66,8 @@ class NYTimesIE(InfoExtractor):
         thumbnails = [
             {
                 'url': 'http://www.nytimes.com/%s' % image['url'],
-                'resolution': '%dx%d' % (image['width'], image['height']),
+                'width': int_or_none(image.get('width')),
+                'height': int_or_none(image.get('height')),
             } for image in video_data['images']
         ]
 
diff --git a/youtube_dl/extractor/ultimedia.py b/youtube_dl/extractor/ultimedia.py
new file mode 100644
index 000000000..06554a1be
--- /dev/null
+++ b/youtube_dl/extractor/ultimedia.py
@@ -0,0 +1,104 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    qualities,
+    unified_strdate,
+    clean_html,
+)
+
+
+class UltimediaIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?ultimedia\.com/default/index/video[^/]+/id/(?P<id>[\d+a-z]+)'
+    _TESTS = [{
+        # news
+        'url': 'https://www.ultimedia.com/default/index/videogeneric/id/s8uk0r',
+        'md5': '276a0e49de58c7e85d32b057837952a2',
+        'info_dict': {
+            'id': 's8uk0r',
+            'ext': 'mp4',
+            'title': 'Loi sur la fin de vie: le texte prévoit un renforcement des directives anticipées',
+            'description': 'md5:3e5c8fd65791487333dda5db8aed32af',
+            'thumbnail': 're:^https?://.*\.jpg',
+            'upload_date': '20150317',
+        },
+    }, {
+        # music
+        'url': 'https://www.ultimedia.com/default/index/videomusic/id/xvpfp8',
+        'md5': '2ea3513813cf230605c7e2ffe7eca61c',
+        'info_dict': {
+            'id': 'xvpfp8',
+            'ext': 'mp4',
+            'title': "Two - C'est la vie (Clip)",
+            'description': 'Two',
+            'thumbnail': 're:^https?://.*\.jpg',
+            'upload_date': '20150224',
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        deliver_url = self._search_regex(
+            r'<iframe[^>]+src="(https?://(?:www\.)?ultimedia\.com/deliver/[^"]+)"',
+            webpage, 'deliver URL')
+
+        deliver_page = self._download_webpage(
+            deliver_url, video_id, 'Downloading iframe page')
+
+        if '>This video is currently not available' in deliver_page:
+            raise ExtractorError(
+                'Video %s is currently not available' % video_id, expected=True)
+
+        player = self._parse_json(
+            self._search_regex(
+                r"jwplayer\('player(?:_temp)?'\)\.setup\(({.+?})\)\.on", deliver_page, 'player'),
+            video_id)
+
+        quality = qualities(['flash', 'html5'])
+        formats = []
+        for mode in player['modes']:
+            video_url = mode.get('config', {}).get('file')
+            if not video_url:
+                continue
+            if re.match(r'https?://www\.youtube\.com/.+?', video_url):
+                return self.url_result(video_url, 'Youtube')
+            formats.append({
+                'url': video_url,
+                'format_id': mode.get('type'),
+                'quality': quality(mode.get('type')),
+            })
+        self._sort_formats(formats)
+
+        thumbnail = player.get('image')
+
+        title = clean_html((
+            self._html_search_regex(
+                r'(?s)<div\s+id="catArticle">.+?</div>(.+?)</h1>',
+                webpage, 'title', default=None)
+            or self._search_regex(
+                r"var\s+nameVideo\s*=\s*'([^']+)'",
+                deliver_page, 'title')))
+
+        description = clean_html(self._html_search_regex(
+            r'(?s)<span>Description</span>(.+?)</p>', webpage,
+            'description', fatal=False))
+
+        upload_date = unified_strdate(self._search_regex(
+            r'Ajouté le\s*<span>([^<]+)', webpage,
+            'upload date', fatal=False))
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'upload_date': upload_date,
+            'formats': formats,
+        }
diff --git a/youtube_dl/extractor/videomega.py b/youtube_dl/extractor/videomega.py
index 273030316..eb309a7cd 100644
--- a/youtube_dl/extractor/videomega.py
+++ b/youtube_dl/extractor/videomega.py
@@ -4,28 +4,21 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..compat import (
-    compat_urllib_parse,
-    compat_urllib_request,
-)
-from ..utils import (
-    ExtractorError,
-    remove_start,
-)
+from ..compat import compat_urllib_request
 
 
 class VideoMegaIE(InfoExtractor):
     _VALID_URL = r'''(?x)https?://
         (?:www\.)?videomega\.tv/
-        (?:iframe\.php)?\?ref=(?P<id>[A-Za-z0-9]+)
+        (?:iframe\.php|cdn\.php)?\?ref=(?P<id>[A-Za-z0-9]+)
         '''
     _TEST = {
-        'url': 'http://videomega.tv/?ref=QR0HCUHI1661IHUCH0RQ',
+        'url': 'http://videomega.tv/?ref=4GNA688SU99US886ANG4',
         'md5': 'bf5c2f95c4c917536e80936af7bc51e1',
         'info_dict': {
-            'id': 'QR0HCUHI1661IHUCH0RQ',
+            'id': '4GNA688SU99US886ANG4',
             'ext': 'mp4',
-            'title': 'Big Buck Bunny',
+            'title': 'BigBuckBunny_320x180',
             'thumbnail': 're:^https?://.*\.jpg$',
         }
     }
@@ -33,34 +26,24 @@ class VideoMegaIE(InfoExtractor):
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
-        iframe_url = 'http://videomega.tv/iframe.php?ref={0:}'.format(video_id)
+        iframe_url = 'http://videomega.tv/cdn.php?ref=%s' % video_id
         req = compat_urllib_request.Request(iframe_url)
         req.add_header('Referer', url)
         webpage = self._download_webpage(req, video_id)
 
-        try:
-            escaped_data = re.findall(r'unescape\("([^"]+)"\)', webpage)[-1]
-        except IndexError:
-            raise ExtractorError('Unable to extract escaped data')
-
-        playlist = compat_urllib_parse.unquote(escaped_data)
-
+        title = self._html_search_regex(
+            r'<title>(.*?)</title>', webpage, 'title')
+        title = re.sub(
+            r'(?:^[Vv]ideo[Mm]ega\.tv\s-\s?|\s?-\svideomega\.tv$)', '', title)
         thumbnail = self._search_regex(
-            r'image:\s*"([^"]+)"', playlist, 'thumbnail', fatal=False)
-        video_url = self._search_regex(r'file:\s*"([^"]+)"', playlist, 'URL')
-        title = remove_start(self._html_search_regex(
-            r'<title>(.*?)</title>', webpage, 'title'), 'VideoMega.tv - ')
-
-        formats = [{
-            'format_id': 'sd',
-            'url': video_url,
-        }]
-        self._sort_formats(formats)
+            r'<video[^>]+?poster="([^"]+)"', webpage, 'thumbnail', fatal=False)
+        video_url = self._search_regex(
+            r'<source[^>]+?src="([^"]+)"', webpage, 'video URL')
 
         return {
             'id': video_id,
             'title': title,
-            'formats': formats,
+            'url': video_url,
             'thumbnail': thumbnail,
             'http_headers': {
                 'Referer': iframe_url,
diff --git a/youtube_dl/extractor/vine.py b/youtube_dl/extractor/vine.py
index 0b58fe0fe..c3187cfeb 100644
--- a/youtube_dl/extractor/vine.py
+++ b/youtube_dl/extractor/vine.py
@@ -33,14 +33,13 @@ class VineIE(InfoExtractor):
             r'window\.POST_DATA = { %s: ({.+?}) }' % video_id, webpage, 'vine data'))
 
         formats = [{
-            'url': data['videoLowURL'],
-            'ext': 'mp4',
-            'format_id': 'low',
-        }, {
-            'url': data['videoUrl'],
-            'ext': 'mp4',
-            'format_id': 'standard',
-        }]
+            'format_id': '%(format)s-%(rate)s' % f,
+            'vcodec': f['format'],
+            'quality': f['rate'],
+            'url': f['videoUrl'],
+        } for f in data['videoUrls'] if f.get('rate')]
+
+        self._sort_formats(formats)
 
         return {
             'id': video_id,
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index 7ed07c375..51b4260aa 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals
 
-__version__ = '2015.03.15'
+__version__ = '2015.03.18'