aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/__init__.py1
-rw-r--r--youtube_dl/extractor/cbs.py30
-rw-r--r--youtube_dl/extractor/common.py3
-rw-r--r--youtube_dl/extractor/dailymotion.py4
-rw-r--r--youtube_dl/extractor/theplatform.py12
-rw-r--r--youtube_dl/extractor/youtube.py4
6 files changed, 49 insertions, 5 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index bd16f0f3e..41b6289a5 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -20,6 +20,7 @@ from .brightcove import BrightcoveIE
from .c56 import C56IE
from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE
+from .cbs import CBSIE
from .cinemassacre import CinemassacreIE
from .clipfish import ClipfishIE
from .clipsyndicate import ClipsyndicateIE
diff --git a/youtube_dl/extractor/cbs.py b/youtube_dl/extractor/cbs.py
new file mode 100644
index 000000000..ac0315853
--- /dev/null
+++ b/youtube_dl/extractor/cbs.py
@@ -0,0 +1,30 @@
+import re
+
+from .common import InfoExtractor
+
+
+class CBSIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?cbs\.com/shows/[^/]+/video/(?P<id>[^/]+)/.*'
+
+ _TEST = {
+ u'url': u'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
+ u'file': u'4JUVEwq3wUT7.flv',
+ u'info_dict': {
+ u'title': u'Connect Chat feat. Garth Brooks',
+ u'description': u'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!',
+ u'duration': 1495,
+ },
+ u'params': {
+ # rtmp download
+ u'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ webpage = self._download_webpage(url, video_id)
+ real_id = self._search_regex(
+ r"video\.settings\.pid\s*=\s*'([^']+)';",
+ webpage, u'real video ID')
+ return self.url_result(u'theplatform:%s' % real_id)
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 69a083b68..fe8ce9e6c 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -34,8 +34,8 @@ class InfoExtractor(object):
The dictionaries must include the following fields:
id: Video identifier.
- url: Final video URL.
title: Video title, unescaped.
+ url: Final video URL.
ext: Video filename extension.
Instead of url and ext, formats can also specified.
@@ -54,6 +54,7 @@ class InfoExtractor(object):
player_url: SWF Player URL (used for rtmpdump).
subtitles: The subtitle file contents as a dictionary in the format
{language: subtitles}.
+ duration: Length of the video in seconds, as an integer.
view_count: How many users have watched the video on the platform.
like_count: Number of positive ratings of the video
dislike_count: Number of negative ratings of the video
diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py
index aea7e557e..6685c94a3 100644
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -28,7 +28,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
"""Information Extractor for Dailymotion"""
- _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)'
+ _VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P<id>[^/?_]+)'
IE_NAME = u'dailymotion'
_FORMATS = [
@@ -81,7 +81,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
# Extract id and simplified title from URL
mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group(1).split('_')[0].split('?')[0]
+ video_id = mobj.group('id')
url = 'http://www.dailymotion.com/video/%s' % video_id
diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py
index 61452e47d..cec65261b 100644
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@@ -3,6 +3,7 @@ import json
from .common import InfoExtractor
from ..utils import (
+ ExtractorError,
xpath_with_ns,
)
@@ -32,6 +33,17 @@ class ThePlatformIE(InfoExtractor):
smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?'
'format=smil&mbr=true'.format(video_id))
meta = self._download_xml(smil_url, video_id)
+
+ try:
+ error_msg = next(
+ n.attrib['abstract']
+ for n in meta.findall(_x('.//smil:ref'))
+ if n.attrib.get('title') == u'Geographic Restriction')
+ except StopIteration:
+ pass
+ else:
+ raise ExtractorError(error_msg, expected=True)
+
info_url = 'http://link.theplatform.com/s/dJ5BDC/{0}?format=preview'.format(video_id)
info_json = self._download_webpage(info_url, video_id)
info = json.loads(info_json)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 874429b78..a68a214ca 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1377,9 +1377,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
if 'length_seconds' not in video_info:
self._downloader.report_warning(u'unable to extract video duration')
- video_duration = ''
+ video_duration = None
else:
- video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
+ video_duration = int(compat_urllib_parse.unquote_plus(video_info['length_seconds'][0]))
# annotations
video_annotations = None