aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMikeCol <MikeCol@gmx.net>2014-03-10 14:41:19 +0100
committerMikeCol <MikeCol@gmx.net>2014-03-10 20:16:49 +0100
commit4ea3137e41e17fc29b29541990162fca877df7b4 (patch)
tree9b8635f866dca404b1f1b742367f17db9499ccea
parentae7ed92057dcb7b9b582416d6dcf584843eca460 (diff)
downloadyoutube-dl-4ea3137e41e17fc29b29541990162fca877df7b4.tar.xz
Playvid extractor
-rw-r--r--youtube_dl/extractor/__init__.py1
-rw-r--r--youtube_dl/extractor/playvid.py85
2 files changed, 86 insertions, 0 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index d43e6b764..4a14a2be6 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -176,6 +176,7 @@ from .ooyala import OoyalaIE
from .orf import ORFIE
from .pbs import PBSIE
from .photobucket import PhotobucketIE
+from .playvid import PlayvidIE
from .podomatic import PodomaticIE
from .pornhd import PornHdIE
from .pornhub import PornHubIE
diff --git a/youtube_dl/extractor/playvid.py b/youtube_dl/extractor/playvid.py
new file mode 100644
index 000000000..a74f8f675
--- /dev/null
+++ b/youtube_dl/extractor/playvid.py
@@ -0,0 +1,85 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ compat_urllib_parse,
+ determine_ext,
+)
+
+class PlayvidIE(InfoExtractor):
+
+ _VALID_URL = r'^(?:https?://)?www\.playvid\.com/watch(\?v=|/)(?P<id>.+?)(#|$)'
+ _TEST = {
+ 'url': 'http://www.playvid.com/watch/agbDDi7WZTV',
+ 'file': 'agbDDi7WZTV.mp4',
+ 'md5': '44930f8afa616efdf9482daf4fe53e1e',
+ 'info_dict': {
+ 'title': 'Michelle Lewin in Miami Beach',
+ 'duration': 240,
+ 'age_limit': 18,
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, video_id)
+
+ self.report_extraction(video_id)
+
+ video_title = None
+ duration = None
+ video_thumbnail = None
+ formats = []
+
+ # most of the information is stored in the flashvars
+ flashvars_match = re.search(r'flashvars="(.+?)"',webpage)
+
+ if flashvars_match:
+ infos = compat_urllib_parse.unquote(flashvars_match.group(1)).split(r'&amp;')
+ for info in infos:
+ videovars_match = re.match(r'^video_vars\[(.+?)\]=(.+?)$',info)
+ if videovars_match:
+ key = videovars_match.group(1)
+ val = videovars_match.group(2)
+
+ if key == 'title':
+ video_title = val.replace('+',' ')
+ if key == 'duration':
+ try:
+ duration = val
+ except ValueError:
+ duration = None
+ if key == 'big_thumb':
+ video_thumbnail = val
+
+ videourl_match = re.match(r'^video_urls\]\[(?P<resolution>\d+)p',key)
+ if videourl_match:
+ resolution = int(videourl_match.group('resolution'))
+ formats.append({
+ 'resolution': resolution, # 360, 480, ...
+ 'ext': determine_ext(val),
+ 'url': val
+ })
+
+ # fatal error, if no download url is found
+ if len(formats) == 0:
+ raise ExtractorError,'no video url found'
+
+ # Extract title - should be in the flashvars; if not, look elsewhere
+ if video_title is None:
+ video_title = self._html_search_regex(
+ r'<title>(.*?)</title', webpage, 'title')
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': video_title,
+ 'thumbnail': video_thumbnail,
+ 'duration': duration,
+ 'description': None,
+ 'age_limit': 18
+ }