aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew "Akari" Alexeyew <akari@dbc.1gb.ua>2015-12-02 06:00:47 +0200
committerSergey M․ <dstftw@gmail.com>2016-01-22 23:29:24 +0600
commitd570746e45cff3c0f89654bf748e44a5da75a924 (patch)
tree2100d351de4a4e310f7a9a454894369cd2a41de7
parent4fcd9d147df9b06d954b8f8a1749b50609529ed4 (diff)
downloadyoutube-dl-d570746e45cff3c0f89654bf748e44a5da75a924.tar.xz
[nuevo] Generalize nuevo extractor and add support for trollvids
Supports only the nuevo player for now (most common). [trollvids] convert duration to an int [trollvids] added a test [trollvids] made flake8 shut up Generalized the Nuevo extractor Affects: anitube, trollvids, trutube [nuevo] Complied with the code comments.
-rw-r--r--youtube_dl/extractor/__init__.py1
-rw-r--r--youtube_dl/extractor/anitube.py34
-rw-r--r--youtube_dl/extractor/nuevo.py37
-rw-r--r--youtube_dl/extractor/trollvids.py49
-rw-r--r--youtube_dl/extractor/trutube.py23
5 files changed, 98 insertions, 46 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index cee5cfe7c..6f2b35cf1 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -726,6 +726,7 @@ from .toutv import TouTvIE
from .toypics import ToypicsUserIE, ToypicsIE
from .traileraddict import TrailerAddictIE
from .trilulilu import TriluliluIE
+from .trollvids import TrollvidsIE
from .trutube import TruTubeIE
from .tube8 import Tube8IE
from .tubitv import TubiTvIE
diff --git a/youtube_dl/extractor/anitube.py b/youtube_dl/extractor/anitube.py
index 23f942ae2..73690df82 100644
--- a/youtube_dl/extractor/anitube.py
+++ b/youtube_dl/extractor/anitube.py
@@ -2,10 +2,10 @@ from __future__ import unicode_literals
import re
-from .common import InfoExtractor
+from .nuevo import NuevoBaseIE
-class AnitubeIE(InfoExtractor):
+class AnitubeIE(NuevoBaseIE):
IE_NAME = 'anitube.se'
_VALID_URL = r'https?://(?:www\.)?anitube\.se/video/(?P<id>\d+)'
@@ -29,31 +29,5 @@ class AnitubeIE(InfoExtractor):
key = self._search_regex(
r'src=["\']https?://[^/]+/embed/([A-Za-z0-9_-]+)', webpage, 'key')
- config_xml = self._download_xml(
- 'http://www.anitube.se/nuevo/econfig.php?key=%s' % key, key)
-
- video_title = config_xml.find('title').text
- thumbnail = config_xml.find('image').text
- duration = float(config_xml.find('duration').text)
-
- formats = []
- video_url = config_xml.find('file')
- if video_url is not None:
- formats.append({
- 'format_id': 'sd',
- 'url': video_url.text,
- })
- video_url = config_xml.find('filehd')
- if video_url is not None:
- formats.append({
- 'format_id': 'hd',
- 'url': video_url.text,
- })
-
- return {
- 'id': video_id,
- 'title': video_title,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'formats': formats
- }
+ config_url = 'http://www.anitube.se/nuevo/econfig.php?key=%s' % key
+ return self._extract_nuevo(config_url, video_id)
diff --git a/youtube_dl/extractor/nuevo.py b/youtube_dl/extractor/nuevo.py
new file mode 100644
index 000000000..ccc697e4f
--- /dev/null
+++ b/youtube_dl/extractor/nuevo.py
@@ -0,0 +1,37 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+from ..utils import (
+ float_or_none,
+ xpath_text
+)
+
+
+class NuevoBaseIE(InfoExtractor):
+ def _extract_nuevo(self, config_url, video_id):
+ tree = self._download_xml(config_url, video_id, transform_source=lambda s: s.strip())
+
+ title = xpath_text(tree, './title')
+ if title:
+ title = title.strip()
+
+ thumbnail = xpath_text(tree, './image')
+ duration = float_or_none(xpath_text(tree, './duration'))
+
+ formats = []
+ for element_name, format_id in (('file', 'sd'), ('filehd', 'hd')):
+ video_url = tree.find(element_name)
+ video_url is None or formats.append({
+ 'format_id': format_id,
+ 'url': video_url.text
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'formats': formats
+ }
diff --git a/youtube_dl/extractor/trollvids.py b/youtube_dl/extractor/trollvids.py
new file mode 100644
index 000000000..e4fe620f7
--- /dev/null
+++ b/youtube_dl/extractor/trollvids.py
@@ -0,0 +1,49 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+from .nuevo import NuevoBaseIE
+
+from ..compat import (
+ compat_urllib_parse_unquote
+)
+
+import re
+
+
+class TrollvidsIE(NuevoBaseIE):
+ _VALID_URL = r'http://(?:www\.)?trollvids\.com/+video/+(?P<id>[0-9]+)/+(?P<title>[^?&]+)'
+ IE_NAME = 'trollvids'
+
+ def _real_extract(self, url):
+ match = re.match(self._VALID_URL, url)
+
+ video_id = match.group('id')
+ raw_video_title = match.group('title')
+ url = 'http://trollvids.com/video/%s/%s' % (video_id, raw_video_title)
+ config_url = 'http://trollvids.com/nuevo/player/config.php?v=%s' % video_id
+
+ info = self._extract_nuevo(config_url, video_id)
+
+ info.update({
+ 'webpage_url': url,
+ 'age_limit': 18
+ })
+
+ if 'title' not in info:
+ info['title'] = compat_urllib_parse_unquote(raw_video_title)
+
+ return info
+
+ _TESTS = [
+ {
+ 'url': 'http://trollvids.com/video/2349002/%E3%80%90MMD-R-18%E3%80%91%E3%82%AC%E3%83%BC%E3%83%AB%E3%83%95%E3%83%AC%E3%83%B3%E3%83%89-carrymeoff',
+ 'md5': '1d53866b2c514b23ed69e4352fdc9839',
+ 'info_dict': {
+ 'id': '2349002',
+ 'ext': 'mp4',
+ 'title': "【MMD R-18】ガールフレンド carry_me_off",
+ 'age_limit': 18,
+ 'duration': 216.78,
+ },
+ },
+ ]
diff --git a/youtube_dl/extractor/trutube.py b/youtube_dl/extractor/trutube.py
index e7b79243a..d7ec2ec26 100644
--- a/youtube_dl/extractor/trutube.py
+++ b/youtube_dl/extractor/trutube.py
@@ -1,10 +1,9 @@
from __future__ import unicode_literals
-from .common import InfoExtractor
-from ..utils import xpath_text
+from .nuevo import NuevoBaseIE
-class TruTubeIE(InfoExtractor):
+class TruTubeIE(NuevoBaseIE):
_VALID_URL = r'https?://(?:www\.)?trutube\.tv/(?:video/|nuevo/player/embed\.php\?v=)(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://trutube.tv/video/14880/Ramses-II-Proven-To-Be-A-Red-Headed-Caucasoid-',
@@ -22,19 +21,11 @@ class TruTubeIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
+ config_url = 'https://trutube.tv/nuevo/player/config.php?v=%s' % video_id
- config = self._download_xml(
- 'https://trutube.tv/nuevo/player/config.php?v=%s' % video_id,
- video_id, transform_source=lambda s: s.strip())
+ info = self._extract_nuevo(config_url, video_id)
- # filehd is always 404
- video_url = xpath_text(config, './file', 'video URL', fatal=True)
- title = xpath_text(config, './title', 'title').strip()
- thumbnail = xpath_text(config, './image', ' thumbnail')
+ # filehd always 404s
+ info['formats'] = info['formats'][:1]
- return {
- 'id': video_id,
- 'url': video_url,
- 'title': title,
- 'thumbnail': thumbnail,
- }
+ return info