diff options
author | Sergey M․ <dstftw@gmail.com> | 2016-06-18 05:42:20 +0700 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2016-06-18 05:42:20 +0700 |
commit | 667d96480b4f9c78ceace063415c3424d4d562ea (patch) | |
tree | 5b5f77b08f42956e70555fe2e56abb36caf8668e /youtube_dl/extractor | |
parent | e6fe993c318738fee5a4a2ce7a86c4512e42653a (diff) |
[pornhd] Detect removed videos and modernize
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/pornhd.py | 28 |
1 files changed, 18 insertions, 10 deletions
diff --git a/youtube_dl/extractor/pornhd.py b/youtube_dl/extractor/pornhd.py index e7721b013..7a5f00fe0 100644 --- a/youtube_dl/extractor/pornhd.py +++ b/youtube_dl/extractor/pornhd.py @@ -1,10 +1,10 @@ from __future__ import unicode_literals import re -import json from .common import InfoExtractor from ..utils import ( + ExtractorError, int_or_none, js_to_json, ) @@ -37,17 +37,17 @@ class PornHdIE(InfoExtractor): title = self._html_search_regex( [r'<span[^>]+class=["\']video-name["\'][^>]*>([^<]+)', r'<title>(.+?) - .*?[Pp]ornHD.*?</title>'], webpage, 'title') - description = self._html_search_regex( - r'<(div|p)[^>]+class="description"[^>]*>(?P<value>[^<]+)</\1', - webpage, 'description', fatal=False, group='value') - view_count = int_or_none(self._html_search_regex( - r'(\d+) views\s*<', webpage, 'view count', fatal=False)) - thumbnail = self._search_regex( - r"'poster'\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False) - sources = json.loads(js_to_json(self._search_regex( + sources = self._parse_json(js_to_json(self._search_regex( r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}[;,)]", - webpage, 'sources'))) + webpage, 'sources', default='{}')), video_id) + + if not sources: + message = self._html_search_regex( + r'(?s)<(div|p)[^>]+class="no-video"[^>]*>(?P<value>.+?)</\1', + webpage, 'error message', group='value') + raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True) + formats = [] for format_id, video_url in sources.items(): if not video_url: @@ -61,6 +61,14 @@ class PornHdIE(InfoExtractor): }) self._sort_formats(formats) + description = self._html_search_regex( + r'<(div|p)[^>]+class="description"[^>]*>(?P<value>[^<]+)</\1', + webpage, 'description', fatal=False, group='value') + view_count = int_or_none(self._html_search_regex( + r'(\d+) views\s*<', webpage, 'view count', fatal=False)) + thumbnail = self._search_regex( + r"'poster'\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False) + return { 'id': video_id, 'display_id': display_id, |