diff options
| author | Allan Zhou <allanzp@gmail.com> | 2013-08-22 10:43:51 -0700 | 
|---|---|---|
| committer | Allan Zhou <allanzp@gmail.com> | 2013-08-22 10:43:51 -0700 | 
| commit | b4e60dac233c791e53281bea4014354b8280421e (patch) | |
| tree | bd3cc773a31942f7cbc6e2a918b052ebfea13133 | |
| parent | 5af7e056a746ad9d0dbfd82470fb216addaf9dbd (diff) | |
| parent | 83390b83d9652042203abd91473e5c764908cbd9 (diff) | |
Merge remote-tracking branch 'upstream/master'
| -rw-r--r-- | devscripts/youtube_genalgo.py | 21 | ||||
| -rw-r--r-- | youtube_dl/__init__.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 4 | ||||
| -rw-r--r-- | youtube_dl/extractor/jeuxvideo.py | 47 | ||||
| -rw-r--r-- | youtube_dl/extractor/pbs.py | 34 | ||||
| -rw-r--r-- | youtube_dl/extractor/slashdot.py | 23 | ||||
| -rw-r--r-- | youtube_dl/extractor/unistra.py | 32 | 
7 files changed, 153 insertions, 9 deletions
| diff --git a/devscripts/youtube_genalgo.py b/devscripts/youtube_genalgo.py index 31d6ec952..dca963e8f 100644 --- a/devscripts/youtube_genalgo.py +++ b/devscripts/youtube_genalgo.py @@ -11,24 +11,27 @@ tests = [      # 90      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`",       "mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|"), +    # 89  +    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'", +     "/?;:|}<[{=+-_)(*&^%$#@!MqBVCXZASDFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuyt"),      # 88      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<",       "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"), -    # 87 - vflART1Nf 2013/07/24 +    # 87      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<", -     "tyuioplkjhgfdsazxcv<nm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>"), -    # 86 - vflm_D8eE 2013/07/31 +     "uioplkjhgfdsazxcvbnm1t34567890QWE2TYUIOPLKJHGFDSAZXCVeNM!@#$^&*()_-+={[]}|:;?/>.<"), +    # 86      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<", -     ">.1}|[{=+-_)(*&^%$#@!MNBVCXZASDFGHJK<POIUYTREW509876L432/mnbvcxzasdfghjklpoiuytre"), -    # 85 - vflSAFCP9 2013/07/19 +     "yuioplkjhgfdsazecvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<"), +    # 85      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<", -     "ertyuiqplkjhgfdsazx$vbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#<%^&*()_-+={[};?/c"), +     ".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"),      # 84      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<", -     "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"), -    # 83 - vflTWC9KW 2013/08/01 +     "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ09876543q1mnbvcxzasdfghjklpoiuew2"), +    # 83      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<", -     "qwertyuioplkjhg>dsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/f"), +     ".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"),      # 82      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<",       "Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"), diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index bf040aacd..3f77dba69 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -27,6 +27,7 @@ __authors__  = (      'Johny Mo Swag',      'Axel Noack',      'Albert Kim', +    'Pierre Rudloff',  )  __license__ = 'Public Domain' diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 5bb44e764..b4db8f0bf 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -36,6 +36,7 @@ from .ign import IGNIE, OneUPIE  from .ina import InaIE  from .infoq import InfoQIE  from .instagram import InstagramIE +from .jeuxvideo import JeuxVideoIE  from .jukebox import JukeboxIE  from .justintv import JustinTVIE  from .kankan import KankanIE @@ -50,6 +51,7 @@ from .myspass import MySpassIE  from .myvideo import MyVideoIE  from .nba import NBAIE  from .ooyala import OoyalaIE +from .pbs import PBSIE  from .photobucket import PhotobucketIE  from .pornotube import PornotubeIE  from .rbmaradio import RBMARadioIE @@ -58,6 +60,7 @@ from .ringtv import RingTVIE  from .roxwel import RoxwelIE  from .rtlnow import RTLnowIE  from .sina import SinaIE +from .slashdot import SlashdotIE  from .soundcloud import SoundcloudIE, SoundcloudSetIE  from .spiegel import SpiegelIE  from .stanfordoc import StanfordOpenClassroomIE @@ -72,6 +75,7 @@ from .tudou import TudouIE  from .tumblr import TumblrIE  from .tutv import TutvIE  from .ustream import UstreamIE +from .unistra import UnistraIE  from .vbox7 import Vbox7IE  from .veoh import VeohIE  from .vevo import VevoIE diff --git a/youtube_dl/extractor/jeuxvideo.py b/youtube_dl/extractor/jeuxvideo.py new file mode 100644 index 000000000..4327bc13d --- /dev/null +++ b/youtube_dl/extractor/jeuxvideo.py @@ -0,0 +1,47 @@ +# coding: utf-8 + +import json +import re +import xml.etree.ElementTree + +from .common import InfoExtractor + +class JeuxVideoIE(InfoExtractor): +    _VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)-\d+\.htm' + +    _TEST = { +        u'url': u'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm', +        u'file': u'5182.mp4', +        u'md5': u'e0fdb0cd3ce98713ef9c1e1e025779d0', +        u'info_dict': { +            u'title': u'GC 2013 : Tearaway nous présente ses papiers d\'identité', +            u'description': u'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.\n', +        }, +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        title = re.match(self._VALID_URL, url).group(1) +        webpage = self._download_webpage(url, title) +        m_download = re.search(r'<param name="flashvars" value="config=(.*?)" />', webpage) + +        xml_link = m_download.group(1) +         +        id = re.search(r'http://www.jeuxvideo.com/config/\w+/0011/(.*?)/\d+_player\.xml', xml_link).group(1) + +        xml_config = self._download_webpage(xml_link, title, +                                                  'Downloading XML config') +        config = xml.etree.ElementTree.fromstring(xml_config.encode('utf-8')) +        info = re.search(r'<format\.json>(.*?)</format\.json>', +                         xml_config, re.MULTILINE|re.DOTALL).group(1) +        info = json.loads(info)['versions'][0] +         +        video_url = 'http://video720.jeuxvideo.com/' + info['file'] + +        return {'id': id, +                'title' : config.find('titre_video').text, +                'ext' : 'mp4', +                'url' : video_url, +                'description': self._og_search_description(webpage), +                'thumbnail': config.find('image').text, +                } diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py new file mode 100644 index 000000000..65462d867 --- /dev/null +++ b/youtube_dl/extractor/pbs.py @@ -0,0 +1,34 @@ +import re +import json + +from .common import InfoExtractor + + +class PBSIE(InfoExtractor): +    _VALID_URL = r'https?://video.pbs.org/video/(?P<id>\d+)/?' + +    _TEST = { +        u'url': u'http://video.pbs.org/video/2365006249/', +        u'file': u'2365006249.mp4', +        u'md5': 'ce1888486f0908d555a8093cac9a7362', +        u'info_dict': { +            u'title': u'A More Perfect Union', +            u'description': u'md5:ba0c207295339c8d6eced00b7c363c6a', +            u'duration': 3190, +        }, +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') +        info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id +        info_page = self._download_webpage(info_url, video_id) +        info =json.loads(info_page) +        return {'id': video_id, +                'title': info['title'], +                'url': info['alternate_encoding']['url'], +                'ext': 'mp4', +                'description': info['program'].get('description'), +                'thumbnail': info.get('image_url'), +                'duration': info.get('duration'), +                } diff --git a/youtube_dl/extractor/slashdot.py b/youtube_dl/extractor/slashdot.py new file mode 100644 index 000000000..2cba53076 --- /dev/null +++ b/youtube_dl/extractor/slashdot.py @@ -0,0 +1,23 @@ +import re + +from .common import InfoExtractor + + +class SlashdotIE(InfoExtractor): +    _VALID_URL = r'https?://tv.slashdot.org/video/\?embed=(?P<id>.*?)(&|$)' + +    _TEST = { +        u'url': u'http://tv.slashdot.org/video/?embed=JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz', +        u'file': u'JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz.mp4', +        u'md5': u'd2222e7a4a4c1541b3e0cf732fb26735', +        u'info_dict': { +            u'title': u' Meet the Stampede Supercomputing Cluster\'s Administrator', +        }, +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') +        webpage = self._download_webpage(url, video_id) +        ooyala_url = self._search_regex(r'<script src="(.*?)"', webpage, 'ooyala url') +        return self.url_result(ooyala_url, 'Ooyala') diff --git a/youtube_dl/extractor/unistra.py b/youtube_dl/extractor/unistra.py new file mode 100644 index 000000000..5ba0a9061 --- /dev/null +++ b/youtube_dl/extractor/unistra.py @@ -0,0 +1,32 @@ +import re + +from .common import InfoExtractor + +class UnistraIE(InfoExtractor): +    _VALID_URL = r'http://utv.unistra.fr/(?:index|video).php\?id_video\=(\d+)' + +    _TEST = { +        u'url': u'http://utv.unistra.fr/video.php?id_video=154', +        u'file': u'154.mp4', +        u'md5': u'736f605cfdc96724d55bb543ab3ced24', +        u'info_dict': { +            u'title': u'M!ss Yella', +            u'description': u'md5:75e8439a3e2981cd5d4b6db232e8fdfc', +        }, +    } + +    def _real_extract(self, url): +        id = re.match(self._VALID_URL, url).group(1) +        webpage = self._download_webpage(url, id) +        file = re.search(r'file: "(.*?)",', webpage).group(1) +        title = self._html_search_regex(r'<title>UTV - (.*?)</', webpage, u'title') + +        video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file + +        return {'id': id, +                'title': title, +                'ext': 'mp4', +                'url': video_url, +                'description': self._html_search_regex(r'<meta name="Description" content="(.*?)"', webpage, u'description', flags=re.DOTALL), +                'thumbnail': self._search_regex(r'image: "(.*?)"', webpage, u'thumbnail'), +                } | 
