diff options
| -rw-r--r-- | youtube_dl/extractor/meipai.py | 153 | 
1 files changed, 79 insertions, 74 deletions
| diff --git a/youtube_dl/extractor/meipai.py b/youtube_dl/extractor/meipai.py index 2ea592055..35914fd4b 100644 --- a/youtube_dl/extractor/meipai.py +++ b/youtube_dl/extractor/meipai.py @@ -1,99 +1,104 @@  # coding: utf-8  from __future__ import unicode_literals -from ..utils import parse_iso8601  from .common import InfoExtractor +from ..utils import ( +    int_or_none, +    parse_duration, +    unified_timestamp, +)  class MeipaiIE(InfoExtractor):      IE_DESC = '美拍'      _VALID_URL = r'https?://(?:www\.)?meipai.com/media/(?P<id>[0-9]+)' -    _TESTS = [ -        { -            'url': 'http://www.meipai.com/media/531697625', -            'md5': 'e3e9600f9e55a302daecc90825854b4f', -            'info_dict': { -                'id': '531697625', -                'ext': 'mp4', -                'title': '#葉子##阿桑##余姿昀##超級女聲#', -                'description': '#葉子##阿桑##余姿昀##超級女聲#', -                'thumbnail': 're:^https?://.*\.jpg$', -                'creator': '她她-TATA', -                'tags': ['葉子', '阿桑', '余姿昀', '超級女聲'], -                'release_date': 1465492420, -            } -        }, -        { -            'url': 'http://www.meipai.com/media/576409659', -            'md5': '2e807c16ebe67b8b6b3c8dcacbc32f48', -            'info_dict': { -                'id': '576409659', -                'ext': 'mp4', -                'title': '#失語者##蔡健雅##吉他彈唱#', -                'description': '#失語者##蔡健雅##吉他彈唱#', -                'thumbnail': 're:^https?://.*\.jpg$', -                'creator': '她她-TATA', -                'tags': ['失語者', '蔡健雅', '吉他彈唱'], -                'release_date': 1472534847, -            } -        }, +    _TESTS = [{ +        # regular uploaded video +        'url': 'http://www.meipai.com/media/531697625', +        'md5': 'e3e9600f9e55a302daecc90825854b4f', +        'info_dict': { +            'id': '531697625', +            'ext': 'mp4', +            'title': '#葉子##阿桑##余姿昀##超級女聲#', +            'description': '#葉子##阿桑##余姿昀##超級女聲#', +            'thumbnail': 're:^https?://.*\.jpg$', +            'duration': 152, +            'timestamp': 1465492420, +            'upload_date': '20160609', +            'view_count': 35511, +            'creator': '她她-TATA', +            'tags': ['葉子', '阿桑', '余姿昀', '超級女聲'], +        } +    }, {          # record of live streaming -        { -            'url': 'http://www.meipai.com/media/585526361', -            'md5': 'ff7d6afdbc6143342408223d4f5fb99a', -            'info_dict': { -                'id': '585526361', -                'ext': 'mp4', -                'title': '姿昀和善願 練歌練琴啦😁😁😁', -                'description': '姿昀和善願 練歌練琴啦😁😁😁', -                'thumbnail': 're:^https?://.*\.jpg$', -                'creator': '她她-TATA', -                'release_date': 1474311799, -            } -        }, -    ] +        'url': 'http://www.meipai.com/media/585526361', +        'md5': 'ff7d6afdbc6143342408223d4f5fb99a', +        'info_dict': { +            'id': '585526361', +            'ext': 'mp4', +            'title': '姿昀和善願 練歌練琴啦😁😁😁', +            'description': '姿昀和善願 練歌練琴啦😁😁😁', +            'thumbnail': 're:^https?://.*\.jpg$', +            'duration': 5975, +            'timestamp': 1474311799, +            'upload_date': '20160919', +            'view_count': 1215, +            'creator': '她她-TATA', +        } +    }]      def _real_extract(self, url):          video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id) -        title = self._og_search_title(webpage, default=None) -        if title is None: -            # fall back to text used in title -            title = self._html_search_regex( -                r'<title[^>]*>(.+)</title>', webpage, 'title') +        title = self._og_search_title( +            webpage, default=None) or self._html_search_regex( +            r'<title[^>]*>([^<]+)</title>', webpage, 'title') -        release_date = self._og_search_property( -            'video:release_date', webpage, 'release date', fatal=False) -        release_date = parse_iso8601(release_date) +        formats = [] + +        # recorded playback of live streaming +        m3u8_url = self._html_search_regex( +            r'file:\s*encodeURIComponent\((["\'])(?P<url>(?:(?!\1).)+)\1\)', +            webpage, 'm3u8 url', group='url', default=None) +        if m3u8_url: +            formats.extend(self._extract_m3u8_formats( +                m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', +                m3u8_id='hls', fatal=False)) + +        if not formats: +            # regular uploaded video +            video_url = self._search_regex( +                r'data-video=(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, 'video url', +                group='url', default=None) +            if video_url: +                formats.append({ +                    'url': video_url, +                    'format_id': 'http', +                }) + +        timestamp = unified_timestamp(self._og_search_property( +            'video:release_date', webpage, 'release date', fatal=False))          tags = self._og_search_property(              'video:tag', webpage, 'tags', default='').split(',') -        info = { +        view_count = int_or_none(self._html_search_meta( +            'interactionCount', webpage, 'view count')) +        duration = parse_duration(self._html_search_meta( +            'duration', webpage, 'duration')) +        creator = self._og_search_property( +            'video:director', webpage, 'creator', fatal=False) + +        return {              'id': video_id,              'title': title, -            'thumbnail': self._og_search_thumbnail(webpage),              'description': self._og_search_description(webpage), -            'release_date': release_date, -            'creator': self._og_search_property( -                'video:director', webpage, 'creator', fatal=False), +            'thumbnail': self._og_search_thumbnail(webpage), +            'duration': duration, +            'timestamp': timestamp, +            'view_count': view_count, +            'creator': creator,              'tags': tags, +            'formats': formats,          } - -        keywords = self._html_search_meta( -            'keywords', webpage, 'keywords', default=[]) - -        if '直播回放' in keywords: -            # recorded playback of live streaming -            m3u8_url = self._html_search_regex( -                r'file:\s*encodeURIComponent\(["\'](.+)["\']\)', -                webpage, -                'm3u8_url') -            info['formats'] = self._extract_m3u8_formats( -                m3u8_url, video_id, 'mp4', 'm3u8_native') -        else: -            # regular uploaded video -            info['url'] = self._og_search_video_url(webpage) - -        return info | 
