diff options
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 4 | ||||
| -rw-r--r-- | youtube_dl/extractor/amp.py | 84 | ||||
| -rw-r--r-- | youtube_dl/extractor/bleacherreport.py | 106 | ||||
| -rw-r--r-- | youtube_dl/extractor/dramafever.py | 69 | ||||
| -rw-r--r-- | youtube_dl/extractor/foxnews.py | 73 | 
5 files changed, 218 insertions, 118 deletions
| diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index add1df023..760b65441 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -61,6 +61,10 @@ from .beatportpro import BeatportProIE  from .bet import BetIE  from .bild import BildIE  from .bilibili import BiliBiliIE +from .bleacherreport import ( +    BleacherReportIE, +    BleacherReportCMSIE, +)  from .blinkx import BlinkxIE  from .bloomberg import BloombergIE  from .bpb import BpbIE diff --git a/youtube_dl/extractor/amp.py b/youtube_dl/extractor/amp.py new file mode 100644 index 000000000..dcc3c97f1 --- /dev/null +++ b/youtube_dl/extractor/amp.py @@ -0,0 +1,84 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( +    int_or_none, +    parse_iso8601, +) + + +class AMPIE(InfoExtractor): +    # parse Akamai Adaptive Media Player feed +    def _extract_feed_info(self, url): +        item = self._download_json( +            url, None, 'Downloading Akamai AMP feed', +            'Unable to download Akamai AMP feed')['channel']['item'] + +        video_id = item['guid'] + +        def get_media_node(name, default=None): +            media_name = 'media-%s' % name +            media_group = item.get('media-group') or item +            return media_group.get(media_name) or item.get(media_name) or item.get(name, default) + +        thumbnails = [] +        media_thumbnail = get_media_node('thumbnail') +        if media_thumbnail: +            if isinstance(media_thumbnail, dict): +                media_thumbnail = [media_thumbnail] +            for thumbnail_data in media_thumbnail: +                thumbnail = thumbnail_data['@attributes'] +                thumbnails.append({ +                    'url': self._proto_relative_url(thumbnail['url'], 'http:'), +                    'width': int_or_none(thumbnail.get('width')), +                    'height': int_or_none(thumbnail.get('height')), +                }) + +        subtitles = {} +        media_subtitle = get_media_node('subTitle') +        if media_subtitle: +            if isinstance(media_subtitle, dict): +                media_subtitle = [media_subtitle] +            for subtitle_data in media_subtitle: +                subtitle = subtitle_data['@attributes'] +                lang = subtitle.get('lang') or 'en' +                subtitles[lang] = [{'url': subtitle['href']}] + +        formats = [] +        media_content = get_media_node('content') +        if isinstance(media_content, dict): +            media_content = [media_content] +        for media_data in media_content: +            media = media_data['@attributes'] +            media_type = media['type'] +            if media_type == 'video/f4m': +                f4m_formats = self._extract_f4m_formats( +                    media['url'] + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124', +                    video_id, f4m_id='hds', fatal=False) +                if f4m_formats: +                    formats.extend(f4m_formats) +            elif media_type == 'application/x-mpegURL': +                m3u8_formats = self._extract_m3u8_formats( +                    media['url'], video_id, 'mp4', m3u8_id='hls', fatal=False) +                if m3u8_formats: +                    formats.extend(m3u8_formats) +            else: +                formats.append({ +                    'format_id': media_data['media-category']['@attributes']['label'], +                    'url': media['url'], +                    'tbr': int_or_none(media.get('bitrate')), +                    'filesize': int_or_none(media.get('fileSize')), +                }) + +        self._sort_formats(formats) + +        return { +            'id': video_id, +            'title': get_media_node('title'), +            'description': get_media_node('description'), +            'thumbnails': thumbnails, +            'timestamp': parse_iso8601(item.get('pubDate'), ' '), +            'duration': int_or_none(media_content[0].get('@attributes', {}).get('duration')), +            'formats': formats, +        } diff --git a/youtube_dl/extractor/bleacherreport.py b/youtube_dl/extractor/bleacherreport.py new file mode 100644 index 000000000..bd2a6340b --- /dev/null +++ b/youtube_dl/extractor/bleacherreport.py @@ -0,0 +1,106 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from .amp import AMPIE +from ..utils import ( +    ExtractorError, +    int_or_none, +    parse_iso8601, +) + + +class BleacherReportIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/articles/(?P<id>\d+)' +    _TESTS = [{ +        'url': 'http://bleacherreport.com/articles/2496438-fsu-stat-projections-is-jalen-ramsey-best-defensive-player-in-college-football', +        'md5': 'a3ffc3dc73afdbc2010f02d98f990f20', +        'info_dict': { +            'id': '2496438', +            'ext': 'mp4', +            'title': 'FSU Stat Projections: Is Jalen Ramsey Best Defensive Player in College Football?', +            'uploader_id': 3992341, +            'description': 'CFB, ACC, Florida State', +            'timestamp': 1434380212, +            'upload_date': '20150615', +            'uploader': 'Team Stream Now ', +        }, +        'add_ie': ['Ooyala'], +    }, { +        'url': 'http://bleacherreport.com/articles/2586817-aussie-golfers-get-fright-of-their-lives-after-being-chased-by-angry-kangaroo', +        'md5': 'af5f90dc9c7ba1c19d0a3eac806bbf50', +        'info_dict': { +            'id': '2586817', +            'ext': 'mp4', +            'title': 'Aussie Golfers Get Fright of Their Lives After Being Chased by Angry Kangaroo', +            'timestamp': 1446839961, +            'uploader': 'Sean Fay', +            'description': 'md5:825e94e0f3521df52fa83b2ed198fa20', +            'uploader_id': 6466954, +            'upload_date': '20151011', +        }, +        'add_ie': ['Youtube'], +    }] + +    def _real_extract(self, url): +        article_id = self._match_id(url) + +        article_data = self._download_json('http://api.bleacherreport.com/api/v1/articles/%s' % article_id, article_id)['article'] + +        thumbnails = [] +        primary_photo = article_data.get('primaryPhoto') +        if primary_photo: +            thumbnails = [{ +                'url': primary_photo['url'], +                'width': primary_photo.get('width'), +                'height': primary_photo.get('height'), +            }] + +        info = { +            '_type': 'url_transparent', +            'id': article_id, +            'title': article_data['title'], +            'uploader': article_data.get('author', {}).get('name'), +            'uploader_id': article_data.get('authorId'), +            'timestamp': parse_iso8601(article_data.get('createdAt')), +            'thumbnails': thumbnails, +            'comment_count': int_or_none(article_data.get('commentsCount')), +            'view_count': int_or_none(article_data.get('hitCount')), +        } + +        video = article_data.get('video') +        if video: +            video_type = video['type'] +            if video_type == 'cms.bleacherreport.com': +                info['url'] = 'http://bleacherreport.com/video_embed?id=%s' % video['id'] +            elif video_type == 'ooyala.com': +                info['url'] = 'ooyala:%s' % video['id'] +            elif video_type == 'youtube.com': +                info['url'] = video['id'] +            elif video_type == 'vine.co': +                info['url'] = 'https://vine.co/v/%s' % video['id'] +            else: +                info['url'] = video_type + video['id'] +            return info +        else: +            raise ExtractorError('no video in the article', expected=True) + + +class BleacherReportCMSIE(AMPIE): +    _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36})' +    _TESTS = [{ +        'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1', +        'md5': 'f0ca220af012d4df857b54f792c586bb', +        'info_dict': { +            'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1', +            'ext': 'flv', +            'title': 'Cena vs. Rollins Would Expose the Heavyweight Division', +            'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e', +        }, +    }] + +    def _real_extract(self, url): +        video_id = self._match_id(url) +        info = self._extract_feed_info('http://cms.bleacherreport.com/media/items/%s/akamai.json' % video_id) +        info['id'] = video_id +        return info diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py index d836c1a6c..60ed438f8 100644 --- a/youtube_dl/extractor/dramafever.py +++ b/youtube_dl/extractor/dramafever.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals  import itertools -from .common import InfoExtractor +from .amp import AMPIE  from ..compat import (      compat_HTTPError,      compat_urllib_parse, @@ -12,14 +12,11 @@ from ..compat import (  from ..utils import (      ExtractorError,      clean_html, -    determine_ext, -    int_or_none, -    parse_iso8601,      sanitized_Request,  ) -class DramaFeverBaseIE(InfoExtractor): +class DramaFeverBaseIE(AMPIE):      _LOGIN_URL = 'https://www.dramafever.com/accounts/login/'      _NETRC_MACHINE = 'dramafever' @@ -80,60 +77,25 @@ class DramaFeverIE(DramaFeverBaseIE):              'timestamp': 1404336058,              'upload_date': '20140702',              'duration': 343, -        } +        }, +        'params': { +            # m3u8 download +            'skip_download': True, +        },      }      def _real_extract(self, url):          video_id = self._match_id(url).replace('/', '.')          try: -            feed = self._download_json( -                'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id, -                video_id, 'Downloading episode JSON')['channel']['item'] +            info = self._extract_feed_info( +                'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id)          except ExtractorError as e:              if isinstance(e.cause, compat_HTTPError):                  raise ExtractorError(                      'Currently unavailable in your country.', expected=True)              raise -        media_group = feed.get('media-group', {}) - -        formats = [] -        for media_content in media_group['media-content']: -            src = media_content.get('@attributes', {}).get('url') -            if not src: -                continue -            ext = determine_ext(src) -            if ext == 'f4m': -                formats.extend(self._extract_f4m_formats( -                    src, video_id, f4m_id='hds')) -            elif ext == 'm3u8': -                formats.extend(self._extract_m3u8_formats( -                    src, video_id, 'mp4', m3u8_id='hls')) -            else: -                formats.append({ -                    'url': src, -                }) -        self._sort_formats(formats) - -        title = media_group.get('media-title') -        description = media_group.get('media-description') -        duration = int_or_none(media_group['media-content'][0].get('@attributes', {}).get('duration')) -        thumbnail = self._proto_relative_url( -            media_group.get('media-thumbnail', {}).get('@attributes', {}).get('url')) -        timestamp = parse_iso8601(feed.get('pubDate'), ' ') - -        subtitles = {} -        for media_subtitle in media_group.get('media-subTitle', []): -            lang = media_subtitle.get('@attributes', {}).get('lang') -            href = media_subtitle.get('@attributes', {}).get('href') -            if not lang or not href: -                continue -            subtitles[lang] = [{ -                'ext': 'ttml', -                'url': href, -            }] -          series_id, episode_number = video_id.split('.')          episode_info = self._download_json(              # We only need a single episode info, so restricting page size to one episode @@ -146,21 +108,12 @@ class DramaFeverIE(DramaFeverBaseIE):              if value:                  subfile = value[0].get('subfile') or value[0].get('new_subfile')                  if subfile and subfile != 'http://www.dramafever.com/st/': -                    subtitles.setdefault('English', []).append({ +                    info['subtitiles'].setdefault('English', []).append({                          'ext': 'srt',                          'url': subfile,                      }) -        return { -            'id': video_id, -            'title': title, -            'description': description, -            'thumbnail': thumbnail, -            'timestamp': timestamp, -            'duration': duration, -            'formats': formats, -            'subtitles': subtitles, -        } +        return info  class DramaFeverSeriesIE(DramaFeverBaseIE): diff --git a/youtube_dl/extractor/foxnews.py b/youtube_dl/extractor/foxnews.py index 3a4a59135..318ac013d 100644 --- a/youtube_dl/extractor/foxnews.py +++ b/youtube_dl/extractor/foxnews.py @@ -2,14 +2,10 @@ from __future__ import unicode_literals  import re -from .common import InfoExtractor -from ..utils import ( -    parse_iso8601, -    int_or_none, -) +from .amp import AMPIE -class FoxNewsIE(InfoExtractor): +class FoxNewsIE(AMPIE):      IE_DESC = 'Fox News and Fox Business Video'      _VALID_URL = r'https?://(?P<host>video\.fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'      _TESTS = [ @@ -20,10 +16,10 @@ class FoxNewsIE(InfoExtractor):                  'id': '3937480',                  'ext': 'flv',                  'title': 'Frozen in Time', -                'description': 'Doctors baffled by 16-year-old girl that is the size of a toddler', +                'description': '16-year-old girl is size of toddler',                  'duration': 265, -                'timestamp': 1304411491, -                'upload_date': '20110503', +                # 'timestamp': 1304411491, +                # 'upload_date': '20110503',                  'thumbnail': 're:^https?://.*\.jpg$',              },          }, @@ -34,10 +30,10 @@ class FoxNewsIE(InfoExtractor):                  'id': '3922535568001',                  'ext': 'mp4',                  'title': "Rep. Luis Gutierrez on if Obama's immigration plan is legal", -                'description': "Congressman discusses the president's executive action", +                'description': "Congressman discusses president's plan",                  'duration': 292, -                'timestamp': 1417662047, -                'upload_date': '20141204', +                # 'timestamp': 1417662047, +                # 'upload_date': '20141204',                  'thumbnail': 're:^https?://.*\.jpg$',              },          }, @@ -52,52 +48,9 @@ class FoxNewsIE(InfoExtractor):      ]      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') -        host = mobj.group('host') +        host, video_id = re.match(self._VALID_URL, url).groups() -        video = self._download_json( -            'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id), video_id) - -        item = video['channel']['item'] -        title = item['title'] -        description = item['description'] -        timestamp = parse_iso8601(item['dc-date']) - -        media_group = item['media-group'] -        duration = None -        formats = [] -        for media in media_group['media-content']: -            attributes = media['@attributes'] -            video_url = attributes['url'] -            if video_url.endswith('.f4m'): -                formats.extend(self._extract_f4m_formats(video_url + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124', video_id)) -            elif video_url.endswith('.m3u8'): -                formats.extend(self._extract_m3u8_formats(video_url, video_id, 'flv')) -            elif not video_url.endswith('.smil'): -                duration = int_or_none(attributes.get('duration')) -                formats.append({ -                    'url': video_url, -                    'format_id': media['media-category']['@attributes']['label'], -                    'preference': 1, -                    'vbr': int_or_none(attributes.get('bitrate')), -                    'filesize': int_or_none(attributes.get('fileSize')) -                }) -        self._sort_formats(formats) - -        media_thumbnail = media_group['media-thumbnail']['@attributes'] -        thumbnails = [{ -            'url': media_thumbnail['url'], -            'width': int_or_none(media_thumbnail.get('width')), -            'height': int_or_none(media_thumbnail.get('height')), -        }] if media_thumbnail else [] - -        return { -            'id': video_id, -            'title': title, -            'description': description, -            'duration': duration, -            'timestamp': timestamp, -            'formats': formats, -            'thumbnails': thumbnails, -        } +        info = self._extract_feed_info( +            'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id)) +        info['id'] = video_id +        return info | 
