diff options
| -rw-r--r-- | devscripts/youtube_genalgo.py | 4 | ||||
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 4 | ||||
| -rw-r--r-- | youtube_dl/extractor/c56.py | 4 | ||||
| -rw-r--r-- | youtube_dl/extractor/dailymotion.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/generic.py | 10 | ||||
| -rw-r--r-- | youtube_dl/extractor/hark.py | 35 | ||||
| -rw-r--r-- | youtube_dl/extractor/ro220.py | 42 | ||||
| -rw-r--r-- | youtube_dl/extractor/rtlnow.py | 17 | ||||
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 2 | ||||
| -rw-r--r-- | youtube_dl/utils.py | 25 | 
10 files changed, 127 insertions, 18 deletions
diff --git a/devscripts/youtube_genalgo.py b/devscripts/youtube_genalgo.py index 014324439..6f1d6ef99 100644 --- a/devscripts/youtube_genalgo.py +++ b/devscripts/youtube_genalgo.py @@ -26,9 +26,9 @@ tests = [      # 85      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",       ".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"), -    # 84 +    # 84 - vflh9ybst 2013/08/23 (sporadic)      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<", -     "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ09876543q1mnbvcxzasdfghjklpoiuew2"), +     "yuioplkjhgfdsazxcvbnm1234567890QWERrYUIOPLKqHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<"),      # 83      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",       ".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"), diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index b4db8f0bf..f71ae2713 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -29,6 +29,7 @@ from .gametrailers import GametrailersIE  from .generic import GenericIE  from .googleplus import GooglePlusIE  from .googlesearch import GoogleSearchIE +from .hark import HarkIE  from .hotnewhiphop import HotNewHipHopIE  from .howcast import HowcastIE  from .hypem import HypemIE @@ -57,6 +58,7 @@ from .pornotube import PornotubeIE  from .rbmaradio import RBMARadioIE  from .redtube import RedTubeIE  from .ringtv import RingTVIE +from .ro220 import Ro220IE  from .roxwel import RoxwelIE  from .rtlnow import RTLnowIE  from .sina import SinaIE @@ -116,12 +118,14 @@ _ALL_CLASSES = [  ]  _ALL_CLASSES.append(GenericIE) +  def gen_extractors():      """ Return a list of an instance of every supported extractor.      The order does matter; the first extractor matched is the one handling the URL.      """      return [klass() for klass in _ALL_CLASSES] +  def get_info_extractor(ie_name):      """Returns the info extractor class with the given ie_name"""      return globals()[ie_name+'IE'] diff --git a/youtube_dl/extractor/c56.py b/youtube_dl/extractor/c56.py index 4c8a8af09..dc3a8d47d 100644 --- a/youtube_dl/extractor/c56.py +++ b/youtube_dl/extractor/c56.py @@ -12,8 +12,8 @@ class C56IE(InfoExtractor):      _TEST ={          u'url': u'http://www.56.com/u39/v_OTM0NDA3MTY.html', -        u'file': u'93440716.mp4', -        u'md5': u'9dc07b5c8e978112a6441f9e75d2b59e', +        u'file': u'93440716.flv', +        u'md5': u'e59995ac63d0457783ea05f93f12a866',          u'info_dict': {              u'title': u'网事知多少 第32期:车怒',          }, diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index fa8c630d0..1ea449ca8 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -21,7 +21,7 @@ class DailymotionIE(InfoExtractor):          u'file': u'x33vw9.mp4',          u'md5': u'392c4b85a60a90dc4792da41ce3144eb',          u'info_dict': { -            u"uploader": u"Alex and Van .",  +            u"uploader": u"Amphora Alex and Van .",               u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\""          }      } diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index da016f7ee..d034a11bb 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -7,12 +7,14 @@ from .common import InfoExtractor  from ..utils import (      compat_urllib_error,      compat_urllib_parse, +    compat_urllib_parse_urlparse,      compat_urllib_request,      ExtractorError,  )  from .brightcove import BrightcoveIE +  class GenericIE(InfoExtractor):      IE_DESC = u'Generic downloader that works on some sites'      _VALID_URL = r'.*' @@ -23,7 +25,7 @@ class GenericIE(InfoExtractor):              u'file': u'13601338388002.mp4',              u'md5': u'85b90ccc9d73b4acd9138d3af4c27f89',              u'info_dict': { -                u"uploader": u"www.hodiho.fr",  +                u"uploader": u"www.hodiho.fr",                  u"title": u"R\u00e9gis plante sa Jeep"              }          }, @@ -124,7 +126,7 @@ class GenericIE(InfoExtractor):              raise ExtractorError(u'Invalid URL: %s' % url)          self.report_extraction(video_id) -        # Look for BrigthCove: +        # Look for BrightCove:          m_brightcove = re.search(r'<object.+?class=([\'"]).*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL)          if m_brightcove is not None:              self.to_screen(u'Brightcove video detected.') @@ -161,6 +163,10 @@ class GenericIE(InfoExtractor):              raise ExtractorError(u'Invalid URL: %s' % url)          video_url = compat_urllib_parse.unquote(mobj.group(1)) +        if video_url.startswith('//'): +            video_url = compat_urllib_parse_urlparse(url).scheme + ':' + video_url +        if '://' not in video_url: +            video_url = url + ('' if url.endswith('/') else '/') + video_url          video_id = os.path.basename(video_url)          # here's a fun little line of code for you: diff --git a/youtube_dl/extractor/hark.py b/youtube_dl/extractor/hark.py new file mode 100644 index 000000000..ab0a69697 --- /dev/null +++ b/youtube_dl/extractor/hark.py @@ -0,0 +1,35 @@ +# -*- coding: utf-8 -*- + +import re + +from .common import InfoExtractor +from ..utils import determine_ext + +class HarkIE(InfoExtractor): +    _VALID_URL = r'https?://www\.hark\.com/clips/(.+?)-.+' +    _TEST = { +        u'url': u'http://www.hark.com/clips/mmbzyhkgny-obama-beyond-the-afghan-theater-we-only-target-al-qaeda-on-may-23-2013', +        u'file': u'mmbzyhkgny.mp3', +        u'md5': u'6783a58491b47b92c7c1af5a77d4cbee', +        u'info_dict': { +            u"title": u"Obama: 'Beyond The Afghan Theater, We Only Target Al Qaeda' On May 23, 2013 ", +        } +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group(1) +        embed_url = "http://www.hark.com/clips/%s/homepage_embed" %(video_id) +        webpage = self._download_webpage(embed_url, video_id) + +        final_url = self._search_regex(r'src="(.+?).mp3"', +                                webpage, 'video url')+'.mp3' +        title = self._html_search_regex(r'<title>(.+?)</title>', +                                webpage, 'video title').replace(' Sound Clip and Quote - Hark','').replace( +                                'Sound Clip , Quote, MP3, and Ringtone - Hark','') + +        return {'id': video_id, +                'url' : final_url, +                'title': title, +                'ext': determine_ext(final_url), +                } diff --git a/youtube_dl/extractor/ro220.py b/youtube_dl/extractor/ro220.py new file mode 100644 index 000000000..c32f64d99 --- /dev/null +++ b/youtube_dl/extractor/ro220.py @@ -0,0 +1,42 @@ +import re + +from .common import InfoExtractor +from ..utils import ( +    clean_html, +    compat_parse_qs, +) + + +class Ro220IE(InfoExtractor): +    IE_NAME = '220.ro' +    _VALID_URL = r'(?x)(?:https?://)?(?:www\.)?220\.ro/(?P<category>[^/]+)/(?P<shorttitle>[^/]+)/(?P<video_id>[^/]+)' +    _TEST = { +        u"url": u"http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/", +        u'file': u'LYV6doKo7f.mp4', +        u'md5': u'03af18b73a07b4088753930db7a34add', +        u'info_dict': { +            u"title": u"Luati-le Banii sez 4 ep 1", +            u"description": u"Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.", +        } +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('video_id') + +        webpage = self._download_webpage(url, video_id) +        flashVars_str = self._search_regex( +            r'<param name="flashVars" value="([^"]+)"', +            webpage, u'flashVars') +        flashVars = compat_parse_qs(flashVars_str) + +        info = { +            '_type': 'video', +            'id': video_id, +            'ext': 'mp4', +            'url': flashVars['videoURL'][0], +            'title': flashVars['title'][0], +            'description': clean_html(flashVars['desc'][0]), +            'thumbnail': flashVars['preview'][0], +        } +        return info diff --git a/youtube_dl/extractor/rtlnow.py b/youtube_dl/extractor/rtlnow.py index 2f134e6a7..7bb236c2b 100644 --- a/youtube_dl/extractor/rtlnow.py +++ b/youtube_dl/extractor/rtlnow.py @@ -8,8 +8,8 @@ from ..utils import (  )  class RTLnowIE(InfoExtractor): -    """Information Extractor for RTLnow, RTL2now and VOXnow""" -    _VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl(?:(?P<is_rtl2>2)|-)now\.rtl(?(is_rtl2)2|)\.de/|(?:www\.)?voxnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)' +    """Information Extractor for RTL NOW, RTL2 NOW, SUPER RTL NOW and VOX NOW""" +    _VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl-now\.rtl\.de/|rtl2now\.rtl2\.de/|(?:www\.)?voxnow\.de/|(?:www\.)?superrtlnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'      _TESTS = [{          u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1',          u'file': u'90419.flv', @@ -48,6 +48,19 @@ class RTLnowIE(InfoExtractor):          u'params': {              u'skip_download': True,          }, +    }, +    { +        u'url': u'http://superrtlnow.de/medicopter-117/angst.php?film_id=99205&player=1', +        u'file': u'99205.flv', +        u'info_dict': { +            u'upload_date': u'20080928',  +            u'title': u'Medicopter 117 - Angst!', +            u'description': u'Angst!', +            u'thumbnail': u'http://autoimg.static-fra.de/superrtlnow/287529/1500x1500/image2.jpg' +        }, +        u'params': { +            u'skip_download': True, +        },      }]      def _real_extract(self,url): diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index e4987b2b3..af01c9da0 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -427,7 +427,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):          elif len(s) == 85:              return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27]          elif len(s) == 84: -            return s[83:27:-1] + s[0] + s[26:5:-1] + s[2:0:-1] + s[27] +            return s[5:40] + s[3] + s[41:48] + s[0] + s[49:84]          elif len(s) == 83:              return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]          elif len(s) == 82: diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 52cfb8a6d..ab1049cc0 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -476,7 +476,7 @@ def formatSeconds(secs):  def make_HTTPS_handler(opts):      if sys.version_info < (3,2):          # Python's 2.x handler is very simplistic -        return compat_urllib_request.HTTPSHandler() +        return YoutubeDLHandlerHTTPS()      else:          import ssl          context = ssl.SSLContext(ssl.PROTOCOL_SSLv23) @@ -485,7 +485,7 @@ def make_HTTPS_handler(opts):          context.verify_mode = (ssl.CERT_NONE                                 if opts.no_check_certificate                                 else ssl.CERT_REQUIRED) -        return compat_urllib_request.HTTPSHandler(context=context) +        return YoutubeDLHandlerHTTPS(context=context)  class ExtractorError(Exception):      """Error during info extraction.""" @@ -569,7 +569,8 @@ class ContentTooShortError(Exception):          self.downloaded = downloaded          self.expected = expected -class YoutubeDLHandler(compat_urllib_request.HTTPHandler): + +class YoutubeDLHandler_Template:  # Old-style class, like HTTPHandler      """Handler for HTTP requests and responses.      This class, when installed with an OpenerDirector, automatically adds @@ -602,8 +603,8 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):          ret.code = code          return ret -    def http_request(self, req): -        for h,v in std_headers.items(): +    def _http_request(self, req): +        for h, v in std_headers.items():              if h in req.headers:                  del req.headers[h]              req.add_header(h, v) @@ -618,7 +619,7 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):              del req.headers['Youtubedl-user-agent']          return req -    def http_response(self, req, resp): +    def _http_response(self, req, resp):          old_resp = resp          # gzip          if resp.headers.get('Content-encoding', '') == 'gzip': @@ -632,8 +633,16 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):              resp.msg = old_resp.msg          return resp -    https_request = http_request -    https_response = http_response + +class YoutubeDLHandler(YoutubeDLHandler_Template, compat_urllib_request.HTTPHandler): +    http_request = YoutubeDLHandler_Template._http_request +    http_response = YoutubeDLHandler_Template._http_response + + +class YoutubeDLHandlerHTTPS(YoutubeDLHandler_Template, compat_urllib_request.HTTPSHandler): +    https_request = YoutubeDLHandler_Template._http_request +    https_response = YoutubeDLHandler_Template._http_response +  def unified_strdate(date_str):      """Return a string with the date in the format YYYYMMDD"""  | 
