aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
authorAllan Zhou <allanzp@gmail.com>2013-08-26 15:16:13 -0700
committerAllan Zhou <allanzp@gmail.com>2013-08-26 15:16:13 -0700
commit99859d436cdee9acc9c869254e734eba5b748260 (patch)
tree5d3a425aa0e6fdc65890c46713ce901fa5b5489b /youtube_dl/extractor
parent39c6f507df5f69e5d9b41b054205ec310f6427a5 (diff)
parent1b01e2b085987b06bd7b360d779a6cb537d4752c (diff)
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/__init__.py4
-rw-r--r--youtube_dl/extractor/c56.py4
-rw-r--r--youtube_dl/extractor/dailymotion.py2
-rw-r--r--youtube_dl/extractor/generic.py10
-rw-r--r--youtube_dl/extractor/hark.py35
-rw-r--r--youtube_dl/extractor/ro220.py42
-rw-r--r--youtube_dl/extractor/rtlnow.py17
-rw-r--r--youtube_dl/extractor/youtube.py2
8 files changed, 108 insertions, 8 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index b4db8f0bf..f71ae2713 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -29,6 +29,7 @@ from .gametrailers import GametrailersIE
from .generic import GenericIE
from .googleplus import GooglePlusIE
from .googlesearch import GoogleSearchIE
+from .hark import HarkIE
from .hotnewhiphop import HotNewHipHopIE
from .howcast import HowcastIE
from .hypem import HypemIE
@@ -57,6 +58,7 @@ from .pornotube import PornotubeIE
from .rbmaradio import RBMARadioIE
from .redtube import RedTubeIE
from .ringtv import RingTVIE
+from .ro220 import Ro220IE
from .roxwel import RoxwelIE
from .rtlnow import RTLnowIE
from .sina import SinaIE
@@ -116,12 +118,14 @@ _ALL_CLASSES = [
]
_ALL_CLASSES.append(GenericIE)
+
def gen_extractors():
""" Return a list of an instance of every supported extractor.
The order does matter; the first extractor matched is the one handling the URL.
"""
return [klass() for klass in _ALL_CLASSES]
+
def get_info_extractor(ie_name):
"""Returns the info extractor class with the given ie_name"""
return globals()[ie_name+'IE']
diff --git a/youtube_dl/extractor/c56.py b/youtube_dl/extractor/c56.py
index 4c8a8af09..dc3a8d47d 100644
--- a/youtube_dl/extractor/c56.py
+++ b/youtube_dl/extractor/c56.py
@@ -12,8 +12,8 @@ class C56IE(InfoExtractor):
_TEST ={
u'url': u'http://www.56.com/u39/v_OTM0NDA3MTY.html',
- u'file': u'93440716.mp4',
- u'md5': u'9dc07b5c8e978112a6441f9e75d2b59e',
+ u'file': u'93440716.flv',
+ u'md5': u'e59995ac63d0457783ea05f93f12a866',
u'info_dict': {
u'title': u'网事知多少 第32期:车怒',
},
diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py
index fa8c630d0..1ea449ca8 100644
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -21,7 +21,7 @@ class DailymotionIE(InfoExtractor):
u'file': u'x33vw9.mp4',
u'md5': u'392c4b85a60a90dc4792da41ce3144eb',
u'info_dict': {
- u"uploader": u"Alex and Van .",
+ u"uploader": u"Amphora Alex and Van .",
u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\""
}
}
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index da016f7ee..d034a11bb 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -7,12 +7,14 @@ from .common import InfoExtractor
from ..utils import (
compat_urllib_error,
compat_urllib_parse,
+ compat_urllib_parse_urlparse,
compat_urllib_request,
ExtractorError,
)
from .brightcove import BrightcoveIE
+
class GenericIE(InfoExtractor):
IE_DESC = u'Generic downloader that works on some sites'
_VALID_URL = r'.*'
@@ -23,7 +25,7 @@ class GenericIE(InfoExtractor):
u'file': u'13601338388002.mp4',
u'md5': u'85b90ccc9d73b4acd9138d3af4c27f89',
u'info_dict': {
- u"uploader": u"www.hodiho.fr",
+ u"uploader": u"www.hodiho.fr",
u"title": u"R\u00e9gis plante sa Jeep"
}
},
@@ -124,7 +126,7 @@ class GenericIE(InfoExtractor):
raise ExtractorError(u'Invalid URL: %s' % url)
self.report_extraction(video_id)
- # Look for BrigthCove:
+ # Look for BrightCove:
m_brightcove = re.search(r'<object.+?class=([\'"]).*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL)
if m_brightcove is not None:
self.to_screen(u'Brightcove video detected.')
@@ -161,6 +163,10 @@ class GenericIE(InfoExtractor):
raise ExtractorError(u'Invalid URL: %s' % url)
video_url = compat_urllib_parse.unquote(mobj.group(1))
+ if video_url.startswith('//'):
+ video_url = compat_urllib_parse_urlparse(url).scheme + ':' + video_url
+ if '://' not in video_url:
+ video_url = url + ('' if url.endswith('/') else '/') + video_url
video_id = os.path.basename(video_url)
# here's a fun little line of code for you:
diff --git a/youtube_dl/extractor/hark.py b/youtube_dl/extractor/hark.py
new file mode 100644
index 000000000..ab0a69697
--- /dev/null
+++ b/youtube_dl/extractor/hark.py
@@ -0,0 +1,35 @@
+# -*- coding: utf-8 -*-
+
+import re
+
+from .common import InfoExtractor
+from ..utils import determine_ext
+
+class HarkIE(InfoExtractor):
+ _VALID_URL = r'https?://www\.hark\.com/clips/(.+?)-.+'
+ _TEST = {
+ u'url': u'http://www.hark.com/clips/mmbzyhkgny-obama-beyond-the-afghan-theater-we-only-target-al-qaeda-on-may-23-2013',
+ u'file': u'mmbzyhkgny.mp3',
+ u'md5': u'6783a58491b47b92c7c1af5a77d4cbee',
+ u'info_dict': {
+ u"title": u"Obama: 'Beyond The Afghan Theater, We Only Target Al Qaeda' On May 23, 2013 ",
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group(1)
+ embed_url = "http://www.hark.com/clips/%s/homepage_embed" %(video_id)
+ webpage = self._download_webpage(embed_url, video_id)
+
+ final_url = self._search_regex(r'src="(.+?).mp3"',
+ webpage, 'video url')+'.mp3'
+ title = self._html_search_regex(r'<title>(.+?)</title>',
+ webpage, 'video title').replace(' Sound Clip and Quote - Hark','').replace(
+ 'Sound Clip , Quote, MP3, and Ringtone - Hark','')
+
+ return {'id': video_id,
+ 'url' : final_url,
+ 'title': title,
+ 'ext': determine_ext(final_url),
+ }
diff --git a/youtube_dl/extractor/ro220.py b/youtube_dl/extractor/ro220.py
new file mode 100644
index 000000000..c32f64d99
--- /dev/null
+++ b/youtube_dl/extractor/ro220.py
@@ -0,0 +1,42 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ compat_parse_qs,
+)
+
+
+class Ro220IE(InfoExtractor):
+ IE_NAME = '220.ro'
+ _VALID_URL = r'(?x)(?:https?://)?(?:www\.)?220\.ro/(?P<category>[^/]+)/(?P<shorttitle>[^/]+)/(?P<video_id>[^/]+)'
+ _TEST = {
+ u"url": u"http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/",
+ u'file': u'LYV6doKo7f.mp4',
+ u'md5': u'03af18b73a07b4088753930db7a34add',
+ u'info_dict': {
+ u"title": u"Luati-le Banii sez 4 ep 1",
+ u"description": u"Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.",
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('video_id')
+
+ webpage = self._download_webpage(url, video_id)
+ flashVars_str = self._search_regex(
+ r'<param name="flashVars" value="([^"]+)"',
+ webpage, u'flashVars')
+ flashVars = compat_parse_qs(flashVars_str)
+
+ info = {
+ '_type': 'video',
+ 'id': video_id,
+ 'ext': 'mp4',
+ 'url': flashVars['videoURL'][0],
+ 'title': flashVars['title'][0],
+ 'description': clean_html(flashVars['desc'][0]),
+ 'thumbnail': flashVars['preview'][0],
+ }
+ return info
diff --git a/youtube_dl/extractor/rtlnow.py b/youtube_dl/extractor/rtlnow.py
index 2f134e6a7..7bb236c2b 100644
--- a/youtube_dl/extractor/rtlnow.py
+++ b/youtube_dl/extractor/rtlnow.py
@@ -8,8 +8,8 @@ from ..utils import (
)
class RTLnowIE(InfoExtractor):
- """Information Extractor for RTLnow, RTL2now and VOXnow"""
- _VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl(?:(?P<is_rtl2>2)|-)now\.rtl(?(is_rtl2)2|)\.de/|(?:www\.)?voxnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
+ """Information Extractor for RTL NOW, RTL2 NOW, SUPER RTL NOW and VOX NOW"""
+ _VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl-now\.rtl\.de/|rtl2now\.rtl2\.de/|(?:www\.)?voxnow\.de/|(?:www\.)?superrtlnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
_TESTS = [{
u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1',
u'file': u'90419.flv',
@@ -48,6 +48,19 @@ class RTLnowIE(InfoExtractor):
u'params': {
u'skip_download': True,
},
+ },
+ {
+ u'url': u'http://superrtlnow.de/medicopter-117/angst.php?film_id=99205&player=1',
+ u'file': u'99205.flv',
+ u'info_dict': {
+ u'upload_date': u'20080928',
+ u'title': u'Medicopter 117 - Angst!',
+ u'description': u'Angst!',
+ u'thumbnail': u'http://autoimg.static-fra.de/superrtlnow/287529/1500x1500/image2.jpg'
+ },
+ u'params': {
+ u'skip_download': True,
+ },
}]
def _real_extract(self,url):
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index e4987b2b3..af01c9da0 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -427,7 +427,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
elif len(s) == 85:
return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27]
elif len(s) == 84:
- return s[83:27:-1] + s[0] + s[26:5:-1] + s[2:0:-1] + s[27]
+ return s[5:40] + s[3] + s[41:48] + s[0] + s[49:84]
elif len(s) == 83:
return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
elif len(s) == 82: