diff options
| author | Sergey M․ <dstftw@gmail.com> | 2020-02-03 06:05:56 +0700 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2020-02-03 06:05:56 +0700 | 
| commit | 7d55b62ff2e9f3666e6cc4daa969647b917331cd (patch) | |
| tree | a4a9c38b7563c8a434366cebb7e7e5c63b26eac2 | |
| parent | 0d006fac5c925f95f8ba0d58ee6d7e844e216d29 (diff) | |
[popcorntimes] Add extractor (closes #23949)
| -rw-r--r-- | youtube_dl/extractor/extractors.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/popcorntimes.py | 99 | 
2 files changed, 100 insertions, 0 deletions
| diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 1cab440f4..a202a0449 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -850,6 +850,7 @@ from .polskieradio import (      PolskieRadioIE,      PolskieRadioCategoryIE,  ) +from .popcorntimes import PopcorntimesIE  from .popcorntv import PopcornTVIE  from .porn91 import Porn91IE  from .porncom import PornComIE diff --git a/youtube_dl/extractor/popcorntimes.py b/youtube_dl/extractor/popcorntimes.py new file mode 100644 index 000000000..7bf7f9858 --- /dev/null +++ b/youtube_dl/extractor/popcorntimes.py @@ -0,0 +1,99 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import ( +    compat_b64decode, +    compat_chr, +) +from ..utils import int_or_none + + +class PopcorntimesIE(InfoExtractor): +    _VALID_URL = r'https?://popcorntimes\.tv/[^/]+/m/(?P<id>[^/]+)/(?P<display_id>[^/?#&]+)' +    _TEST = { +        'url': 'https://popcorntimes.tv/de/m/A1XCFvz/haensel-und-gretel-opera-fantasy', +        'md5': '93f210991ad94ba8c3485950a2453257', +        'info_dict': { +            'id': 'A1XCFvz', +            'display_id': 'haensel-und-gretel-opera-fantasy', +            'ext': 'mp4', +            'title': 'Hänsel und Gretel', +            'description': 'md5:1b8146791726342e7b22ce8125cf6945', +            'thumbnail': r're:^https?://.*\.jpg$', +            'creator': 'John Paul', +            'release_date': '19541009', +            'duration': 4260, +            'tbr': 5380, +            'width': 720, +            'height': 540, +        }, +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id, display_id = mobj.group('id', 'display_id') + +        webpage = self._download_webpage(url, display_id) + +        title = self._search_regex( +            r'<h1>([^<]+)', webpage, 'title', +            default=None) or self._html_search_meta( +            'ya:ovs:original_name', webpage, 'title', fatal=True) + +        loc = self._search_regex( +            r'PCTMLOC\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, 'loc', +            group='value') + +        loc_b64 = '' +        for c in loc: +            c_ord = ord(c) +            if ord('a') <= c_ord <= ord('z') or ord('A') <= c_ord <= ord('Z'): +                upper = ord('Z') if c_ord <= ord('Z') else ord('z') +                c_ord += 13 +                if upper < c_ord: +                    c_ord -= 26 +            loc_b64 += compat_chr(c_ord) + +        video_url = compat_b64decode(loc_b64).decode('utf-8') + +        description = self._html_search_regex( +            r'(?s)<div[^>]+class=["\']pt-movie-desc[^>]+>(.+?)</div>', webpage, +            'description', fatal=False) + +        thumbnail = self._search_regex( +            r'<img[^>]+class=["\']video-preview[^>]+\bsrc=(["\'])(?P<value>(?:(?!\1).)+)\1', +            webpage, 'thumbnail', default=None, +            group='value') or self._og_search_thumbnail(webpage) + +        creator = self._html_search_meta( +            'video:director', webpage, 'creator', default=None) + +        release_date = self._html_search_meta( +            'video:release_date', webpage, default=None) +        if release_date: +            release_date = release_date.replace('-', '') + +        def int_meta(name): +            return int_or_none(self._html_search_meta( +                name, webpage, default=None)) + +        return { +            'id': video_id, +            'display_id': display_id, +            'url': video_url, +            'title': title, +            'description': description, +            'thumbnail': thumbnail, +            'creator': creator, +            'release_date': release_date, +            'duration': int_meta('video:duration'), +            'tbr': int_meta('ya:ovs:bitrate'), +            'width': int_meta('og:video:width'), +            'height': int_meta('og:video:height'), +            'http_headers': { +                'Referer': url, +            }, +        } | 
