aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2017-01-22 03:43:27 +0700
committerSergey M․ <dstftw@gmail.com>2017-01-22 03:43:27 +0700
commit271808b6b2bd75ec9bdf943a55dbc4737bfa6f81 (patch)
treeafaa13e61101a131d4759fcd30d6a9cea97c9c90 /youtube_dl/extractor
parent8d1fbe0cb20fdfab8487bb478c2a002f12c1a5d9 (diff)
[pornflip] Improve and extract dash formats (closes #11795)
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/pornflip.py77
1 files changed, 55 insertions, 22 deletions
diff --git a/youtube_dl/extractor/pornflip.py b/youtube_dl/extractor/pornflip.py
index b6077f7cb..a4a5d390e 100644
--- a/youtube_dl/extractor/pornflip.py
+++ b/youtube_dl/extractor/pornflip.py
@@ -4,56 +4,89 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
+ compat_str,
)
from ..utils import (
int_or_none,
try_get,
- RegexNotFoundError,
+ unified_timestamp,
)
class PornFlipIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?pornflip\.com/v/(?P<id>[0-9A-Za-z]{11})'
- _TEST = {
+ _VALID_URL = r'https?://(?:www\.)?pornflip\.com/(?:v|embed)/(?P<id>[0-9A-Za-z]{11})'
+ _TESTS = [{
'url': 'https://www.pornflip.com/v/wz7DfNhMmep',
'md5': '98c46639849145ae1fd77af532a9278c',
'info_dict': {
'id': 'wz7DfNhMmep',
'ext': 'mp4',
'title': '2 Amateurs swallow make his dream cumshots true',
- 'uploader': 'figifoto',
'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 112,
+ 'timestamp': 1481655502,
+ 'upload_date': '20161213',
+ 'uploader_id': '106786',
+ 'uploader': 'figifoto',
+ 'view_count': int,
'age_limit': 18,
}
- }
+ }, {
+ 'url': 'https://www.pornflip.com/embed/wz7DfNhMmep',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
- uploader = self._html_search_regex(
- r'<span class="name">\s+<a class="ajax" href=".+>\s+<strong>([^<]+)<', webpage, 'uploader', fatal=False)
- flashvars = compat_parse_qs(self._html_search_regex(
- r'<embed.+?flashvars="([^"]+)"',
- webpage, 'flashvars'))
+
+ webpage = self._download_webpage(
+ 'https://www.pornflip.com/v/%s' % video_id, video_id)
+
+ flashvars = compat_parse_qs(self._search_regex(
+ r'<embed[^>]+flashvars=(["\'])(?P<flashvars>(?:(?!\1).)+)\1',
+ webpage, 'flashvars', group='flashvars'))
+
title = flashvars['video_vars[title]'][0]
- thumbnail = try_get(flashvars, lambda x: x['video_vars[big_thumb]'][0])
- formats = []
- for k, v in flashvars.items():
- height = self._search_regex(r'video_vars\[video_urls\]\[(\d+).+?\]', k, 'height', default=None)
- if height:
- url = v[0]
- formats.append({
- 'height': int_or_none(height),
- 'url': url
- })
+ def flashvar(kind):
+ return try_get(
+ flashvars, lambda x: x['video_vars[%s]' % kind][0], compat_str)
+
+ formats = []
+ for key, value in flashvars.items():
+ if not (value and isinstance(value, list)):
+ continue
+ format_url = value[0]
+ if key == 'video_vars[hds_manifest]':
+ formats.extend(self._extract_mpd_formats(
+ format_url, video_id, mpd_id='dash', fatal=False))
+ continue
+ height = self._search_regex(
+ r'video_vars\[video_urls\]\[(\d+)', key, 'height', default=None)
+ if not height:
+ continue
+ formats.append({
+ 'url': format_url,
+ 'format_id': 'http-%s' % height,
+ 'height': int_or_none(height),
+ })
self._sort_formats(formats)
+ uploader = self._html_search_regex(
+ (r'<span[^>]+class="name"[^>]*>\s*<a[^>]+>\s*<strong>(?P<uploader>[^<]+)',
+ r'<meta[^>]+content=(["\'])[^>]*\buploaded by (?P<uploader>.+?)\1'),
+ webpage, 'uploader', fatal=False, group='uploader')
+
return {
'id': video_id,
'formats': formats,
'title': title,
+ 'thumbnail': flashvar('big_thumb'),
+ 'duration': int_or_none(flashvar('duration')),
+ 'timestamp': unified_timestamp(self._html_search_meta(
+ 'uploadDate', webpage, 'timestamp')),
+ 'uploader_id': flashvar('author_id'),
'uploader': uploader,
- 'thumbnail': thumbnail,
+ 'view_count': int_or_none(flashvar('views')),
'age_limit': 18,
}