diff options
| -rw-r--r-- | youtube_dl/extractor/theintercept.py | 67 | 
1 files changed, 24 insertions, 43 deletions
| diff --git a/youtube_dl/extractor/theintercept.py b/youtube_dl/extractor/theintercept.py index b096a28de..8cb3c3669 100644 --- a/youtube_dl/extractor/theintercept.py +++ b/youtube_dl/extractor/theintercept.py @@ -1,24 +1,28 @@  # encoding: utf-8  from __future__ import unicode_literals -import re -  from .common import InfoExtractor - +from ..compat import compat_str  from ..utils import ( +    parse_iso8601, +    int_or_none,      ExtractorError,  ) +  class TheInterceptIE(InfoExtractor): -    _VALID_URL = r'https://theintercept.com/fieldofvision/(?P<id>.+?)/' +    _VALID_URL = r'https://theintercept.com/fieldofvision/(?P<id>[^/?#]+)'      _TESTS = [{          'url': 'https://theintercept.com/fieldofvision/thisisacoup-episode-four-surrender-or-die/', +        'md5': '145f28b41d44aab2f87c0a4ac8ec95bd',          'info_dict': { -            'id': 'thisisacoup-episode-four-surrender-or-die', +            'id': '46214',              'ext': 'mp4',              'title': '#ThisIsACoup – Episode Four: Surrender or Die', -            'upload_date': '20151218',              'description': 'md5:74dd27f0e2fbd50817829f97eaa33140', +            'timestamp': 1450429239, +            'upload_date': '20151218', +            'comment_count': int,          }      }] @@ -26,43 +30,20 @@ class TheInterceptIE(InfoExtractor):          display_id = self._match_id(url)          webpage = self._download_webpage(url, display_id) -        mobj = re.search(r'initialStoreTree =(?P<json_data>.+})', webpage) -        if mobj is None: -            raise ExtractorError('Unable to extract initialStoreTree') -        json_data = self._parse_json(mobj.group('json_data'), display_id) +        json_data = self._parse_json(self._search_regex( +            r'initialStoreTree\s*=\s*(?P<json_data>{.+})', webpage, +            'initialStoreTree'), display_id) -        info = None          for post in json_data['resources']['posts'].values():              if post['slug'] == display_id: -                info = post -                break -        if info is None: -            raise ExtractorError('Unable to find info for %s'%display_id) - -        title = info['title'] -        description = info['excerpt'] -        upload_date = info['date'][:10].replace('-', '') -        video_id = info['fov_videoid'] -        creator = ','.join([a['display_name'] for a in info['authors']]) -        thumbnail = self._og_search_property('image', webpage) -        content_id = thumbnail.split('/')[-1].split('.')[0] -        content_url = 'https://content.jwplatform.com/jw6/{content_id}.xml'.format(content_id=content_id) -        content = self._download_xml(content_url, video_id) - -        formats = [] -        for source in content.findall('.//{http://rss.jwpcdn.com/}source'): -            if source.attrib['file'].endswith('.m3u8'): -                formats.extend(self._extract_m3u8_formats( -                    source.attrib['file'], video_id, 'mp4', preference=1, m3u8_id='hls')) - -        return { -            'creator': creator, -            'description': description, -            'display_id': display_id, -            'formats': formats, -            'id': video_id, -            'id': video_id, -            'thumbnail': thumbnail, -            'title': title, -            'upload_date': upload_date, -        } +                return { +                    '_type': 'url_transparent', +                    'url': 'jwplatform:%s' % post['fov_videoid'], +                    'id': compat_str(post['ID']), +                    'display_id': display_id, +                    'title': post['title'], +                    'description': post.get('excerpt'), +                    'timestamp': parse_iso8601(post.get('date')), +                    'comment_count': int_or_none(post.get('comments_number')), +                } +        raise ExtractorError('Unable to find the current post') | 
