aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
authorPhilipp Hagemeister <phihag@phihag.de>2014-11-09 08:30:12 +0100
committerPhilipp Hagemeister <phihag@phihag.de>2014-11-09 08:30:12 +0100
commit8bb9b97c97d7114dbbc3f7b602130e82d2cfd16f (patch)
tree18cf24dafa9751ea40fcbbda9c5f88320046be6b /youtube_dl/extractor
parent4164f0117ece057a84e67df2239b7fdaa69e27a7 (diff)
parentc2b61af54827373780415edce92b971b43ceead1 (diff)
Merge remote-tracking branch 'origin/master'
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/__init__.py1
-rw-r--r--youtube_dl/extractor/generic.py1
-rw-r--r--youtube_dl/extractor/goshgay.py43
-rw-r--r--youtube_dl/extractor/niconico.py3
-rw-r--r--youtube_dl/extractor/vice.py38
-rw-r--r--youtube_dl/extractor/wrzuta.py2
6 files changed, 57 insertions, 31 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 3f85c99cd..3c1807f15 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -421,6 +421,7 @@ from .vesti import VestiIE
from .vevo import VevoIE
from .vgtv import VGTVIE
from .vh1 import VH1IE
+from .vice import ViceIE
from .viddler import ViddlerIE
from .videobam import VideoBamIE
from .videodetective import VideoDetectiveIE
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 8dd11e202..01d6a57f8 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -576,6 +576,7 @@ class GenericIE(InfoExtractor):
return {
'id': video_id,
'title': os.path.splitext(url_basename(url))[0],
+ 'direct': True,
'formats': [{
'format_id': m.group('format_id'),
'url': url,
diff --git a/youtube_dl/extractor/goshgay.py b/youtube_dl/extractor/goshgay.py
index 7bca21ad0..18474cbb7 100644
--- a/youtube_dl/extractor/goshgay.py
+++ b/youtube_dl/extractor/goshgay.py
@@ -1,15 +1,11 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
from ..utils import (
compat_urlparse,
- str_to_int,
ExtractorError,
)
-import json
class GoshgayIE(InfoExtractor):
@@ -27,36 +23,27 @@ class GoshgayIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- title = self._search_regex(r'class="video-title"><h1>(.+?)<', webpage, 'title')
+ title = self._og_search_title(webpage)
+ thumbnail = self._og_search_thumbnail(webpage)
+ family_friendly = self._html_search_meta(
+ 'isFamilyFriendly', webpage, default='false')
+ config_url = self._search_regex(
+ r"'config'\s*:\s*'([^']+)'", webpage, 'config URL')
- player_config = self._search_regex(
- r'(?s)jwplayer\("player"\)\.setup\(({.+?})\)', webpage, 'config settings')
- player_vars = json.loads(player_config.replace("'", '"'))
- width = str_to_int(player_vars.get('width'))
- height = str_to_int(player_vars.get('height'))
- config_uri = player_vars.get('config')
+ config = self._download_xml(
+ config_url, video_id, 'Downloading player config XML')
- if config_uri is None:
- raise ExtractorError('Missing config URI')
- node = self._download_xml(config_uri, video_id, 'Downloading player config XML',
- errnote='Unable to download XML')
- if node is None:
+ if config is None:
raise ExtractorError('Missing config XML')
- if node.tag != 'config':
+ if config.tag != 'config':
raise ExtractorError('Missing config attribute')
- fns = node.findall('file')
- imgs = node.findall('image')
- if len(fns) != 1:
+ fns = config.findall('file')
+ if len(fns) < 1:
raise ExtractorError('Missing media URI')
video_url = fns[0].text
- if len(imgs) < 1:
- thumbnail = None
- else:
- thumbnail = imgs[0].text
url_comp = compat_urlparse.urlparse(url)
ref = "%s://%s%s" % (url_comp[0], url_comp[1], url_comp[2])
@@ -65,9 +52,7 @@ class GoshgayIE(InfoExtractor):
'id': video_id,
'url': video_url,
'title': title,
- 'width': width,
- 'height': height,
'thumbnail': thumbnail,
'http_referer': ref,
- 'age_limit': 18,
+ 'age_limit': 0 if family_friendly == 'true' else 18,
}
diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py
index 45cbd4ee9..3b5784e8f 100644
--- a/youtube_dl/extractor/niconico.py
+++ b/youtube_dl/extractor/niconico.py
@@ -175,7 +175,8 @@ class NiconicoPlaylistIE(InfoExtractor):
entries = [{
'_type': 'url',
'ie_key': NiconicoIE.ie_key(),
- 'url': 'http://www.nicovideo.jp/watch/%s' % entry['item_id'],
+ 'url': ('http://www.nicovideo.jp/watch/%s' %
+ entry['item_data']['video_id']),
} for entry in entries]
return {
diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py
new file mode 100644
index 000000000..f11ca8217
--- /dev/null
+++ b/youtube_dl/extractor/vice.py
@@ -0,0 +1,38 @@
+from __future__ import unicode_literals
+import re
+
+from .common import InfoExtractor
+from .ooyala import OoyalaIE
+from ..utils import ExtractorError
+
+
+class ViceIE(InfoExtractor):
+ _VALID_URL = r'http://www\.vice\.com/.*?/(?P<name>.+)'
+
+ _TEST = {
+ 'url': 'http://www.vice.com/Fringes/cowboy-capitalists-part-1',
+ 'info_dict': {
+ 'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp',
+ 'ext': 'mp4',
+ 'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',
+ },
+ 'params': {
+ # Requires ffmpeg (m3u8 manifest)
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ name = mobj.group('name')
+ webpage = self._download_webpage(url, name)
+ try:
+ embed_code = self._search_regex(
+ r'embedCode=([^&\'"]+)', webpage,
+ 'ooyala embed code')
+ ooyala_url = OoyalaIE._url_for_embed_code(embed_code)
+ print(ooyala_url)
+ except ExtractorError:
+ raise ExtractorError('The page doesn\'t contain a video', expected=True)
+ return self.url_result(ooyala_url, ie='Ooyala')
+
diff --git a/youtube_dl/extractor/wrzuta.py b/youtube_dl/extractor/wrzuta.py
index 34dd6d952..41756784a 100644
--- a/youtube_dl/extractor/wrzuta.py
+++ b/youtube_dl/extractor/wrzuta.py
@@ -49,7 +49,7 @@ class WrzutaIE(InfoExtractor):
quality = qualities(['SD', 'MQ', 'HQ', 'HD'])
- audio_table = {'flv': 'mp3', 'webm': 'ogg'}
+ audio_table = {'flv': 'mp3', 'webm': 'ogg', 'mp3': 'mp3'}
embedpage = self._download_json('http://www.wrzuta.pl/npp/embed/%s/%s' % (uploader, video_id), video_id)