diff options
| -rw-r--r-- | devscripts/youtube_genalgo.py | 21 | ||||
| -rw-r--r-- | youtube_dl/FileDownloader.py | 10 | ||||
| -rw-r--r-- | youtube_dl/__init__.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 4 | ||||
| -rw-r--r-- | youtube_dl/extractor/collegehumor.py | 52 | ||||
| -rw-r--r-- | youtube_dl/extractor/common.py | 8 | ||||
| -rw-r--r-- | youtube_dl/extractor/funnyordie.py | 7 | ||||
| -rw-r--r-- | youtube_dl/extractor/generic.py | 12 | ||||
| -rw-r--r-- | youtube_dl/extractor/jeuxvideo.py | 47 | ||||
| -rw-r--r-- | youtube_dl/extractor/pbs.py | 34 | ||||
| -rw-r--r-- | youtube_dl/extractor/rtlnow.py | 15 | ||||
| -rw-r--r-- | youtube_dl/extractor/slashdot.py | 23 | ||||
| -rw-r--r-- | youtube_dl/extractor/soundcloud.py | 5 | ||||
| -rw-r--r-- | youtube_dl/extractor/statigram.py | 12 | ||||
| -rw-r--r-- | youtube_dl/extractor/unistra.py | 32 | ||||
| -rw-r--r-- | youtube_dl/extractor/vevo.py | 6 | ||||
| -rw-r--r-- | youtube_dl/extractor/vimeo.py | 41 | ||||
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 2 | ||||
| -rw-r--r-- | youtube_dl/version.py | 2 | 
19 files changed, 270 insertions, 64 deletions
| diff --git a/devscripts/youtube_genalgo.py b/devscripts/youtube_genalgo.py index 31d6ec952..dca963e8f 100644 --- a/devscripts/youtube_genalgo.py +++ b/devscripts/youtube_genalgo.py @@ -11,24 +11,27 @@ tests = [      # 90      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`",       "mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|"), +    # 89  +    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'", +     "/?;:|}<[{=+-_)(*&^%$#@!MqBVCXZASDFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuyt"),      # 88      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<",       "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"), -    # 87 - vflART1Nf 2013/07/24 +    # 87      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<", -     "tyuioplkjhgfdsazxcv<nm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>"), -    # 86 - vflm_D8eE 2013/07/31 +     "uioplkjhgfdsazxcvbnm1t34567890QWE2TYUIOPLKJHGFDSAZXCVeNM!@#$^&*()_-+={[]}|:;?/>.<"), +    # 86      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<", -     ">.1}|[{=+-_)(*&^%$#@!MNBVCXZASDFGHJK<POIUYTREW509876L432/mnbvcxzasdfghjklpoiuytre"), -    # 85 - vflSAFCP9 2013/07/19 +     "yuioplkjhgfdsazecvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<"), +    # 85      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<", -     "ertyuiqplkjhgfdsazx$vbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#<%^&*()_-+={[};?/c"), +     ".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"),      # 84      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<", -     "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"), -    # 83 - vflTWC9KW 2013/08/01 +     "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ09876543q1mnbvcxzasdfghjklpoiuew2"), +    # 83      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<", -     "qwertyuioplkjhg>dsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/f"), +     ".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"),      # 82      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<",       "Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"), diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index ea6b9d626..217c4a52f 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -79,9 +79,13 @@ class FileDownloader(object):          rate = float(current) / dif          eta = int((float(total) - float(current)) / rate)          (eta_mins, eta_secs) = divmod(eta, 60) -        if eta_mins > 99: -            return '--:--' -        return '%02d:%02d' % (eta_mins, eta_secs) +        (eta_hours, eta_mins) = divmod(eta_mins, 60) +        if eta_hours > 99: +            return '--:--:--' +        if eta_hours == 0: +            return '%02d:%02d' % (eta_mins, eta_secs) +        else: +            return '%02d:%02d:%02d' % (eta_hours, eta_mins, eta_secs)      @staticmethod      def calc_speed(start, now, bytes): diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index bf040aacd..3f77dba69 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -27,6 +27,7 @@ __authors__  = (      'Johny Mo Swag',      'Axel Noack',      'Albert Kim', +    'Pierre Rudloff',  )  __license__ = 'Public Domain' diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 5bb44e764..b4db8f0bf 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -36,6 +36,7 @@ from .ign import IGNIE, OneUPIE  from .ina import InaIE  from .infoq import InfoQIE  from .instagram import InstagramIE +from .jeuxvideo import JeuxVideoIE  from .jukebox import JukeboxIE  from .justintv import JustinTVIE  from .kankan import KankanIE @@ -50,6 +51,7 @@ from .myspass import MySpassIE  from .myvideo import MyVideoIE  from .nba import NBAIE  from .ooyala import OoyalaIE +from .pbs import PBSIE  from .photobucket import PhotobucketIE  from .pornotube import PornotubeIE  from .rbmaradio import RBMARadioIE @@ -58,6 +60,7 @@ from .ringtv import RingTVIE  from .roxwel import RoxwelIE  from .rtlnow import RTLnowIE  from .sina import SinaIE +from .slashdot import SlashdotIE  from .soundcloud import SoundcloudIE, SoundcloudSetIE  from .spiegel import SpiegelIE  from .stanfordoc import StanfordOpenClassroomIE @@ -72,6 +75,7 @@ from .tudou import TudouIE  from .tumblr import TumblrIE  from .tutv import TutvIE  from .ustream import UstreamIE +from .unistra import UnistraIE  from .vbox7 import Vbox7IE  from .veoh import VeohIE  from .vevo import VevoIE diff --git a/youtube_dl/extractor/collegehumor.py b/youtube_dl/extractor/collegehumor.py index 30b9c7549..8d4c93d6d 100644 --- a/youtube_dl/extractor/collegehumor.py +++ b/youtube_dl/extractor/collegehumor.py @@ -4,6 +4,7 @@ import xml.etree.ElementTree  from .common import InfoExtractor  from ..utils import (      compat_urllib_parse_urlparse, +    determine_ext,      ExtractorError,  ) @@ -12,7 +13,7 @@ from ..utils import (  class CollegeHumorIE(InfoExtractor):      _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P<videoid>[0-9]+)/?(?P<shorttitle>.*)$' -    _TEST = { +    _TESTS = [{          u'url': u'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',          u'file': u'6902724.mp4',          u'md5': u'1264c12ad95dca142a9f0bf7968105a0', @@ -20,7 +21,16 @@ class CollegeHumorIE(InfoExtractor):              u'title': u'Comic-Con Cosplay Catastrophe',              u'description': u'Fans get creative this year at San Diego.  Too creative.  And yes, that\'s really Joss Whedon.',          }, -    } +    }, +    { +        u'url': u'http://www.collegehumor.com/video/3505939/font-conference', +        u'file': u'3505939.mp4', +        u'md5': u'c51ca16b82bb456a4397987791a835f5', +        u'info_dict': { +            u'title': u'Font Conference', +            u'description': u'This video wasn\'t long enough, so we made it double-spaced.', +        }, +    }]      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url) @@ -49,25 +59,29 @@ class CollegeHumorIE(InfoExtractor):              info['description'] = videoNode.findall('./description')[0].text              info['title'] = videoNode.findall('./caption')[0].text              info['thumbnail'] = videoNode.findall('./thumbnail')[0].text -            manifest_url = videoNode.findall('./file')[0].text +            next_url = videoNode.findall('./file')[0].text          except IndexError:              raise ExtractorError(u'Invalid metadata XML file') -        manifest_url += '?hdcore=2.10.3' -        manifestXml = self._download_webpage(manifest_url, video_id, -                                             u'Downloading XML manifest', -                                             u'Unable to download video info XML') - -        adoc = xml.etree.ElementTree.fromstring(manifestXml) -        try: -            media_node = adoc.findall('./{http://ns.adobe.com/f4m/1.0}media')[0] -            node_id = media_node.attrib['url'] -            video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text -        except IndexError as err: -            raise ExtractorError(u'Invalid manifest file') +        if next_url.endswith(u'manifest.f4m'): +            manifest_url = next_url + '?hdcore=2.10.3' +            manifestXml = self._download_webpage(manifest_url, video_id, +                                         u'Downloading XML manifest', +                                         u'Unable to download video info XML') -        url_pr = compat_urllib_parse_urlparse(info['thumbnail']) +            adoc = xml.etree.ElementTree.fromstring(manifestXml) +            try: +                media_node = adoc.findall('./{http://ns.adobe.com/f4m/1.0}media')[0] +                node_id = media_node.attrib['url'] +                video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text +            except IndexError as err: +                raise ExtractorError(u'Invalid manifest file') +            url_pr = compat_urllib_parse_urlparse(info['thumbnail']) +            info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','') +            info['ext'] = 'mp4' +        else: +            # Old-style direct links +            info['url'] = next_url +            info['ext'] = determine_ext(info['url']) -        info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','') -        info['ext'] = 'mp4' -        return [info] +        return info diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index da50abfc1..8009c2d85 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -77,7 +77,13 @@ class InfoExtractor(object):      @classmethod      def suitable(cls, url):          """Receives a URL and returns True if suitable for this IE.""" -        return re.match(cls._VALID_URL, url) is not None + +        # This does not use has/getattr intentionally - we want to know whether +        # we have cached the regexp for *this* class, whereas getattr would also +        # match the superclass +        if '_VALID_URL_RE' not in cls.__dict__: +            cls._VALID_URL_RE = re.compile(cls._VALID_URL) +        return cls._VALID_URL_RE.match(url) is not None      @classmethod      def working(cls): diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py index 67a7e5f76..4508f0dfa 100644 --- a/youtube_dl/extractor/funnyordie.py +++ b/youtube_dl/extractor/funnyordie.py @@ -21,17 +21,14 @@ class FunnyOrDieIE(InfoExtractor):          video_id = mobj.group('id')          webpage = self._download_webpage(url, video_id) -        video_url = self._html_search_regex(r'<video[^>]*>\s*<source[^>]*>\s*<source src="(?P<url>[^"]+)"', +        video_url = self._search_regex(r'type: "video/mp4", src: "(.*?)"',              webpage, u'video URL', flags=re.DOTALL) -        title = self._html_search_regex((r"<h1 class='player_page_h1'.*?>(?P<title>.*?)</h1>", -            r'<title>(?P<title>[^<]+?)</title>'), webpage, 'title', flags=re.DOTALL) -          info = {              'id': video_id,              'url': video_url,              'ext': 'mp4', -            'title': title, +            'title': self._og_search_title(webpage),              'description': self._og_search_description(webpage),          }          return [info] diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index b633e896c..da016f7ee 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -107,8 +107,13 @@ class GenericIE(InfoExtractor):          return new_url      def _real_extract(self, url): -        new_url = self._test_redirect(url) -        if new_url: return [self.url_result(new_url)] +        try: +            new_url = self._test_redirect(url) +            if new_url: +                return [self.url_result(new_url)] +        except compat_urllib_error.HTTPError: +            # This may be a stupid server that doesn't like HEAD, our UA, or so +            pass          video_id = url.split('/')[-1]          try: @@ -145,6 +150,9 @@ class GenericIE(InfoExtractor):              if m_video_type is not None:                  mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)          if mobj is None: +            # HTML5 video +            mobj = re.search(r'<video[^<]*>.*?<source .*?src="([^"]+)"', webpage, flags=re.DOTALL) +        if mobj is None:              raise ExtractorError(u'Invalid URL: %s' % url)          # It's possible that one of the regexes diff --git a/youtube_dl/extractor/jeuxvideo.py b/youtube_dl/extractor/jeuxvideo.py new file mode 100644 index 000000000..4327bc13d --- /dev/null +++ b/youtube_dl/extractor/jeuxvideo.py @@ -0,0 +1,47 @@ +# coding: utf-8 + +import json +import re +import xml.etree.ElementTree + +from .common import InfoExtractor + +class JeuxVideoIE(InfoExtractor): +    _VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)-\d+\.htm' + +    _TEST = { +        u'url': u'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm', +        u'file': u'5182.mp4', +        u'md5': u'e0fdb0cd3ce98713ef9c1e1e025779d0', +        u'info_dict': { +            u'title': u'GC 2013 : Tearaway nous présente ses papiers d\'identité', +            u'description': u'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.\n', +        }, +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        title = re.match(self._VALID_URL, url).group(1) +        webpage = self._download_webpage(url, title) +        m_download = re.search(r'<param name="flashvars" value="config=(.*?)" />', webpage) + +        xml_link = m_download.group(1) +         +        id = re.search(r'http://www.jeuxvideo.com/config/\w+/0011/(.*?)/\d+_player\.xml', xml_link).group(1) + +        xml_config = self._download_webpage(xml_link, title, +                                                  'Downloading XML config') +        config = xml.etree.ElementTree.fromstring(xml_config.encode('utf-8')) +        info = re.search(r'<format\.json>(.*?)</format\.json>', +                         xml_config, re.MULTILINE|re.DOTALL).group(1) +        info = json.loads(info)['versions'][0] +         +        video_url = 'http://video720.jeuxvideo.com/' + info['file'] + +        return {'id': id, +                'title' : config.find('titre_video').text, +                'ext' : 'mp4', +                'url' : video_url, +                'description': self._og_search_description(webpage), +                'thumbnail': config.find('image').text, +                } diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py new file mode 100644 index 000000000..65462d867 --- /dev/null +++ b/youtube_dl/extractor/pbs.py @@ -0,0 +1,34 @@ +import re +import json + +from .common import InfoExtractor + + +class PBSIE(InfoExtractor): +    _VALID_URL = r'https?://video.pbs.org/video/(?P<id>\d+)/?' + +    _TEST = { +        u'url': u'http://video.pbs.org/video/2365006249/', +        u'file': u'2365006249.mp4', +        u'md5': 'ce1888486f0908d555a8093cac9a7362', +        u'info_dict': { +            u'title': u'A More Perfect Union', +            u'description': u'md5:ba0c207295339c8d6eced00b7c363c6a', +            u'duration': 3190, +        }, +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') +        info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id +        info_page = self._download_webpage(info_url, video_id) +        info =json.loads(info_page) +        return {'id': video_id, +                'title': info['title'], +                'url': info['alternate_encoding']['url'], +                'ext': 'mp4', +                'description': info['program'].get('description'), +                'thumbnail': info.get('image_url'), +                'duration': info.get('duration'), +                } diff --git a/youtube_dl/extractor/rtlnow.py b/youtube_dl/extractor/rtlnow.py index d993a990a..2f134e6a7 100644 --- a/youtube_dl/extractor/rtlnow.py +++ b/youtube_dl/extractor/rtlnow.py @@ -2,7 +2,10 @@  import re  from .common import InfoExtractor -from ..utils import ExtractorError +from ..utils import ( +    clean_html, +    ExtractorError, +)  class RTLnowIE(InfoExtractor):      """Information Extractor for RTLnow, RTL2now and VOXnow""" @@ -18,6 +21,7 @@ class RTLnowIE(InfoExtractor):          u'params': {              u'skip_download': True,          }, +        u'skip': u'Only works from Germany',      },      {          u'url': u'http://rtl2now.rtl2.de/aerger-im-revier/episode-15-teil-1.php?film_id=69756&player=1&season=2&index=5', @@ -31,6 +35,7 @@ class RTLnowIE(InfoExtractor):          u'params': {              u'skip_download': True,          }, +        u'skip': u'Only works from Germany',      },      {          u'url': u'www.voxnow.de/voxtours/suedafrika-reporter-ii.php?film_id=13883&player=1&season=17', @@ -53,6 +58,14 @@ class RTLnowIE(InfoExtractor):          video_id = mobj.group(u'video_id')          webpage = self._download_webpage(webpage_url, video_id) + +        note_m = re.search(r'''(?sx) +            <div[ ]style="margin-left:[ ]20px;[ ]font-size:[ ]13px;">(.*?) +            <div[ ]id="playerteaser">''', webpage) +        if note_m: +            msg = clean_html(note_m.group(1)) +            raise ExtractorError(msg) +          video_title = self._html_search_regex(r'<title>(?P<title>[^<]+)</title>',              webpage, u'title')          playerdata_url = self._html_search_regex(r'\'playerdata\': \'(?P<playerdata_url>[^\']+)\'', diff --git a/youtube_dl/extractor/slashdot.py b/youtube_dl/extractor/slashdot.py new file mode 100644 index 000000000..2cba53076 --- /dev/null +++ b/youtube_dl/extractor/slashdot.py @@ -0,0 +1,23 @@ +import re + +from .common import InfoExtractor + + +class SlashdotIE(InfoExtractor): +    _VALID_URL = r'https?://tv.slashdot.org/video/\?embed=(?P<id>.*?)(&|$)' + +    _TEST = { +        u'url': u'http://tv.slashdot.org/video/?embed=JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz', +        u'file': u'JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz.mp4', +        u'md5': u'd2222e7a4a4c1541b3e0cf732fb26735', +        u'info_dict': { +            u'title': u' Meet the Stampede Supercomputing Cluster\'s Administrator', +        }, +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') +        webpage = self._download_webpage(url, video_id) +        ooyala_url = self._search_regex(r'<script src="(.*?)"', webpage, 'ooyala url') +        return self.url_result(ooyala_url, 'Ooyala') diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 7c9f1c6b6..5f3a5540d 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -4,6 +4,7 @@ import re  from .common import InfoExtractor  from ..utils import (      compat_str, +    compat_urlparse,      ExtractorError,      unified_strdate, @@ -22,6 +23,7 @@ class SoundcloudIE(InfoExtractor):      _VALID_URL = r'''^(?:https?://)?                      (?:(?:(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)/?(?:[?].*)?$)                         |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)) +                       |(?P<widget>w.soundcloud.com/player/?.*?url=.*)                      )                      '''      IE_NAME = u'soundcloud' @@ -79,6 +81,9 @@ class SoundcloudIE(InfoExtractor):          if track_id is not None:              info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID              full_title = track_id +        elif mobj.group('widget'): +            query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) +            return self.url_result(query['url'][0], ie='Soundcloud')          else:              # extract uploader (which is in the url)              uploader = mobj.group(1) diff --git a/youtube_dl/extractor/statigram.py b/youtube_dl/extractor/statigram.py index b8e6b3bf9..1ea4a9f2f 100644 --- a/youtube_dl/extractor/statigram.py +++ b/youtube_dl/extractor/statigram.py @@ -5,13 +5,13 @@ from .common import InfoExtractor  class StatigramIE(InfoExtractor):      _VALID_URL = r'(?:http://)?(?:www\.)?statigr\.am/p/([^/]+)'      _TEST = { -        u'url': u'http://statigr.am/p/484091715184808010_284179915', -        u'file': u'484091715184808010_284179915.mp4', -        u'md5': u'deda4ff333abe2e118740321e992605b', +        u'url': u'http://statigr.am/p/522207370455279102_24101272', +        u'file': u'522207370455279102_24101272.mp4', +        u'md5': u'6eb93b882a3ded7c378ee1d6884b1814',          u'info_dict': { -            u"uploader_id": u"videoseconds",  -            u"title": u"Instagram photo by @videoseconds" -        } +            u'uploader_id': u'aguynamedpatrick', +            u'title': u'Instagram photo by @aguynamedpatrick (Patrick Janelle)', +        },      }      def _real_extract(self, url): diff --git a/youtube_dl/extractor/unistra.py b/youtube_dl/extractor/unistra.py new file mode 100644 index 000000000..5ba0a9061 --- /dev/null +++ b/youtube_dl/extractor/unistra.py @@ -0,0 +1,32 @@ +import re + +from .common import InfoExtractor + +class UnistraIE(InfoExtractor): +    _VALID_URL = r'http://utv.unistra.fr/(?:index|video).php\?id_video\=(\d+)' + +    _TEST = { +        u'url': u'http://utv.unistra.fr/video.php?id_video=154', +        u'file': u'154.mp4', +        u'md5': u'736f605cfdc96724d55bb543ab3ced24', +        u'info_dict': { +            u'title': u'M!ss Yella', +            u'description': u'md5:75e8439a3e2981cd5d4b6db232e8fdfc', +        }, +    } + +    def _real_extract(self, url): +        id = re.match(self._VALID_URL, url).group(1) +        webpage = self._download_webpage(url, id) +        file = re.search(r'file: "(.*?)",', webpage).group(1) +        title = self._html_search_regex(r'<title>UTV - (.*?)</', webpage, u'title') + +        video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file + +        return {'id': id, +                'title': title, +                'ext': 'mp4', +                'url': video_url, +                'description': self._html_search_regex(r'<meta name="Description" content="(.*?)"', webpage, u'description', flags=re.DOTALL), +                'thumbnail': self._search_regex(r'image: "(.*?)"', webpage, u'thumbnail'), +                } diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index 14abd58e8..70408c4f0 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -11,14 +11,14 @@ class VevoIE(InfoExtractor):      Accepts urls from vevo.com or in the format 'vevo:{id}'      (currently used by MTVIE)      """ -    _VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*)$' +    _VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*?)(\?|$)'      _TEST = {          u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',          u'file': u'GB1101300280.mp4',          u'md5': u'06bea460acb744eab74a9d7dcb4bfd61',          u'info_dict': { -            u"upload_date": u"20130624",  -            u"uploader": u"Hurts",  +            u"upload_date": u"20130624", +            u"uploader": u"Hurts",              u"title": u"Somebody to Die For"          }      } diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index cc9c8d018..512e06e2a 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -20,18 +20,31 @@ class VimeoIE(InfoExtractor):      _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)(?:[?].*)?$'      _NETRC_MACHINE = 'vimeo'      IE_NAME = u'vimeo' -    _TEST = { -        u'url': u'http://vimeo.com/56015672', -        u'file': u'56015672.mp4', -        u'md5': u'8879b6cc097e987f02484baf890129e5', -        u'info_dict': { -            u"upload_date": u"20121220",  -            u"description": u"This is a test case for youtube-dl.\nFor more information, see github.com/rg3/youtube-dl\nTest chars: \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",  -            u"uploader_id": u"user7108434",  -            u"uploader": u"Filippo Valsorda",  -            u"title": u"youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550" -        } -    } +    _TESTS = [ +        { +            u'url': u'http://vimeo.com/56015672', +            u'file': u'56015672.mp4', +            u'md5': u'8879b6cc097e987f02484baf890129e5', +            u'info_dict': { +                u"upload_date": u"20121220",  +                u"description": u"This is a test case for youtube-dl.\nFor more information, see github.com/rg3/youtube-dl\nTest chars: \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",  +                u"uploader_id": u"user7108434",  +                u"uploader": u"Filippo Valsorda",  +                u"title": u"youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550", +            }, +        }, +        { +            u'url': u'http://vimeopro.com/openstreetmapus/state-of-the-map-us-2013/video/68093876', +            u'file': u'68093876.mp4', +            u'md5': u'3b5ca6aa22b60dfeeadf50b72e44ed82', +            u'note': u'Vimeo Pro video (#1197)', +            u'info_dict': { +                u'uploader_id': u'openstreetmapus',  +                u'uploader': u'OpenStreetMap US',  +                u'title': u'Andy Allan - Putting the Carto into OpenStreetMap Cartography', +            }, +        }, +    ]      def _login(self):          (username, password) = self._get_login_info() @@ -83,7 +96,9 @@ class VimeoIE(InfoExtractor):          video_id = mobj.group('id')          if not mobj.group('proto'):              url = 'https://' + url -        if mobj.group('direct_link') or mobj.group('pro'): +        elif mobj.group('pro'): +            url = 'http://player.vimeo.com/video/' + video_id +        elif mobj.group('direct_link'):              url = 'https://vimeo.com/' + video_id          # Retrieve video webpage to extract further information diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 1599dd484..7d6d07b17 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -333,7 +333,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):                  u"upload_date": u"20120506",                  u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",                  u"description": u"md5:b085c9804f5ab69f4adea963a2dceb3c", -                u"uploader": u"IconaPop", +                u"uploader": u"Icona Pop",                  u"uploader_id": u"IconaPop"              }          }, diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 8c93a275c..58e26bc49 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.08.17' +__version__ = '2013.08.21' | 
