[vevo] Some improvements (fixes #1580)

Extract the info from http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc={id} Some videos don't have an smil manifest, extract the video urls directly from the json and use the last version of the video. Extract all the available formats and set the 'formats' field of the result
author: Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> 2013-10-08 21:23:55 +0200
committer: Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> 2013-10-08 21:25:38 +0200
commit: 88bd97e34c91a86dfe7dd01a9677b76ef43e1b66 (patch)
tree: 29cef9f7c5436ef7d807fce05fbcdf4eb20cfa60 /youtube_dl
parent: 2ae3edb1cfa5b0afc2e6b6e2d4ea470dadf655a0 (diff)
1 files changed, 47 insertions, 23 deletions
diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py
index 70408c4f0..1c1cc418d 100644
--- a/youtube_dl/extractor/vevo.py
+++ b/youtube_dl/extractor/vevo.py
@@ -1,11 +1,15 @@
 import re
 import json
+import xml.etree.ElementTree
+import datetime
 
 from .common import InfoExtractor
 from ..utils import (
+    determine_ext,
     ExtractorError,
 )
 
+
 class VevoIE(InfoExtractor):
     """
     Accepts urls from vevo.com or in the format 'vevo:{id}'
@@ -15,11 +19,11 @@ class VevoIE(InfoExtractor):
     _TEST = {
         u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
         u'file': u'GB1101300280.mp4',
-        u'md5': u'06bea460acb744eab74a9d7dcb4bfd61',
         u'info_dict': {
             u"upload_date": u"20130624",
             u"uploader": u"Hurts",
-            u"title": u"Somebody to Die For"
+            u"title": u"Somebody to Die For",
+            u'duration': 230,
         }
     }
 
@@ -27,27 +31,47 @@ class VevoIE(InfoExtractor):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
 
-        json_url = 'http://www.vevo.com/data/video/%s' % video_id
-        base_url = 'http://smil.lvl3.vevo.com'
-        videos_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (base_url, video_id, video_id.lower())
+        json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
         info_json = self._download_webpage(json_url, video_id, u'Downloading json info')
-        links_webpage = self._download_webpage(videos_url, video_id, u'Downloading videos urls')
 
         self.report_extraction(video_id)
-        video_info = json.loads(info_json)
-        m_urls = list(re.finditer(r'<video src="(?P<ext>.*?):/?(?P<url>.*?)"', links_webpage))
-        if m_urls is None or len(m_urls) == 0:
-            raise ExtractorError(u'Unable to extract video url')
-        # They are sorted from worst to best quality
-        m_url = m_urls[-1]
-        video_url = base_url + '/' + m_url.group('url')
-        ext = m_url.group('ext')
-
-        return {'url': video_url,
-                'ext': ext,
-                'id': video_id,
-                'title': video_info['title'],
-                'thumbnail': video_info['img'],
-                'upload_date': video_info['launchDate'].replace('/',''),
-                'uploader': video_info['Artists'][0]['title'],
-                }
+        video_info = json.loads(info_json)['video']
+        last_version = {'version': -1}
+        for version in video_info['videoVersions']:
+            # These are the HTTP downloads, other types are for different manifests
+            if version['sourceType'] == 2:
+                if version['version'] > last_version['version']:
+                    last_version = version
+        if last_version['version'] == -1:
+            raise ExtractorError(u'Unable to extract last version of the video')
+
+        renditions = xml.etree.ElementTree.fromstring(last_version['data'])
+        formats = []
+        # Already sorted from worst to best quality
+        for rend in renditions.findall('rendition'):
+            attr = rend.attrib
+            f_url = attr['url']
+            formats.append({
+                'url': f_url,
+                'ext': determine_ext(f_url),
+                'height': int(attr['frameheight']),
+                'width': int(attr['frameWidth']),
+            })
+
+        date_epoch = int(self._search_regex(
+            r'/Date\((\d+)\)/', video_info['launchDate'], u'launch date'))/1000
+        upload_date = datetime.datetime.fromtimestamp(date_epoch)
+        info = {
+            'id': video_id,
+            'title': video_info['title'],
+            'formats': formats,
+            'thumbnail': video_info['imageUrl'],
+            'upload_date': upload_date.strftime('%Y%m%d'),
+            'uploader': video_info['mainArtists'][0]['artistName'],
+            'duration': video_info['duration'],
+        }
+
+        # TODO: Remove when #980 has been merged
+        info.update(formats[-1])
+
+        return info
author	Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>	2013-10-08 21:23:55 +0200
committer	Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>	2013-10-08 21:25:38 +0200
commit	88bd97e34c91a86dfe7dd01a9677b76ef43e1b66 (patch)
tree	29cef9f7c5436ef7d807fce05fbcdf4eb20cfa60 /youtube_dl
parent	2ae3edb1cfa5b0afc2e6b6e2d4ea470dadf655a0 (diff)