diff options
author | Matthew Rayfield <matthewrayfield@gmail.com> | 2014-11-25 02:54:13 -0500 |
---|---|---|
committer | Matthew Rayfield <matthewrayfield@gmail.com> | 2014-11-25 02:54:13 -0500 |
commit | f0c3d729d7acb3af64f21b6042d20127ce7149ae (patch) | |
tree | 600d9111147f8e23a9cf49a9a0e7944683c7d970 | |
parent | 93129d9442cf96b71a6f0d191ed0e80de38382d1 (diff) |
[tmz] Add new extractor
-rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/tmz.py | 39 |
2 files changed, 40 insertions, 0 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index a680973de..7e91db601 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -393,6 +393,7 @@ from .thesixtyone import TheSixtyOneIE from .thisav import ThisAVIE from .tinypic import TinyPicIE from .tlc import TlcIE, TlcDeIE +from .tmz import TMZIE from .tnaflix import TNAFlixIE from .thvideo import ( THVideoIE, diff --git a/youtube_dl/extractor/tmz.py b/youtube_dl/extractor/tmz.py new file mode 100644 index 000000000..b82dc8233 --- /dev/null +++ b/youtube_dl/extractor/tmz.py @@ -0,0 +1,39 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class TMZIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?tmz\.com/videos/(?P<id>[^/]+)/?' + _TEST = { + 'url': 'http://www.tmz.com/videos/0_okj015ty/', + 'md5': '791204e3bf790b1426cb2db0706184c0', + 'info_dict': { + 'id': '0_okj015ty', + 'url': 'http://tmz.vo.llnwd.net/o28/2014-03/13/0_okj015ty_0_rt8ro3si_2.mp4', + 'ext': 'mp4', + 'title': 'Kim Kardashian\'s Boobs Unlock a Mystery!', + 'description': 'Did Kim Kardasain try to one-up Khloe by one-upping Kylie??? Or is she just showing off her amazing boobs?', + 'thumbnail': 'http://cdnbakmi.kaltura.com/p/591531/sp/59153100/thumbnail/entry_id/0_okj015ty/version/100002/acv/182/width/640', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + url = self._html_search_regex(r'<meta name="VideoURL" content="(.+)"', webpage, 'url') + title = self._html_search_regex(r'<meta property="og:title" content="(.+)"', webpage, 'title') + description = self._html_search_regex(r'<meta property="og:description" content="(.+)"', webpage, 'description') + thumbnail_url = self._html_search_regex(r'<meta name="ThumbURL" content="(.+)"', webpage, 'thumbnail url') + + return { + 'id': video_id, + 'url': url, + 'ext': 'mp4', + 'title': title, + 'description': description, + 'thumbnail': thumbnail_url, + } |