diff options
| author | Philipp Hagemeister <phihag@phihag.de> | 2014-12-30 19:41:04 +0100 | 
|---|---|---|
| committer | Philipp Hagemeister <phihag@phihag.de> | 2014-12-30 19:41:04 +0100 | 
| commit | b3013681ff49712d5b5437efdff117ca544caadb (patch) | |
| tree | 442d601fca73f070e9ac0e6ef2b9770c71d03f88 | |
| parent | 416c7fcbce86324587afae11414c71ff603ad296 (diff) | |
| parent | e83eebb12f984c1614204e53c09dc5124b52b45c (diff) | |
Merge remote-tracking branch 'origin/master'
| -rw-r--r-- | AUTHORS | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 4 | ||||
| -rw-r--r-- | youtube_dl/extractor/atresplayer.py | 114 | ||||
| -rw-r--r-- | youtube_dl/extractor/cnn.py | 12 | ||||
| -rw-r--r-- | youtube_dl/extractor/daum.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/hellporno.py | 71 | ||||
| -rw-r--r-- | youtube_dl/extractor/hitbox.py | 166 | ||||
| -rw-r--r-- | youtube_dl/extractor/xxxymovies.py | 81 | ||||
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 26 | 
9 files changed, 469 insertions, 8 deletions
| @@ -96,3 +96,4 @@ Mathias Rav  Petr Kutalek  Will Glynn  Max Reimann +Cédric Luthi diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index ab0f76862..c15786ad7 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -25,6 +25,7 @@ from .arte import (      ArteTVDDCIE,      ArteTVEmbedIE,  ) +from .atresplayer import AtresPlayerIE  from .audiomack import AudiomackIE  from .auengine import AUEngineIE  from .azubu import AzubuIE @@ -169,8 +170,10 @@ from .grooveshark import GroovesharkIE  from .groupon import GrouponIE  from .hark import HarkIE  from .heise import HeiseIE +from .hellporno import HellPornoIE  from .helsinki import HelsinkiIE  from .hentaistigma import HentaiStigmaIE +from .hitbox import HitboxIE, HitboxLiveIE  from .hornbunny import HornBunnyIE  from .hostingbulk import HostingBulkIE  from .hotnewhiphop import HotNewHipHopIE @@ -515,6 +518,7 @@ from .xminus import XMinusIE  from .xnxx import XNXXIE  from .xvideos import XVideosIE  from .xtube import XTubeUserIE, XTubeIE +from .xxxymovies import XXXYMoviesIE  from .yahoo import (      YahooIE,      YahooSearchIE, diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py new file mode 100644 index 000000000..72e83bfc2 --- /dev/null +++ b/youtube_dl/extractor/atresplayer.py @@ -0,0 +1,114 @@ +from __future__ import unicode_literals + +import time +import hmac + +from .common import InfoExtractor +from ..utils import ( +    compat_str, +    compat_urllib_request, +    int_or_none, +    float_or_none, +    xpath_text, +    ExtractorError, +) + + +class AtresPlayerIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/television/[^/]+/[^/]+/[^/]+/(?P<id>.+?)_\d+\.html' +    _TESTS = [ +        { +            'url': 'http://www.atresplayer.com/television/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_2014122100174.html', +            'md5': 'efd56753cda1bb64df52a3074f62e38a', +            'info_dict': { +                'id': 'capitulo-10-especial-solidario-nochebuena', +                'ext': 'mp4', +                'title': 'Especial Solidario de Nochebuena', +                'description': 'md5:e2d52ff12214fa937107d21064075bf1', +                'duration': 5527.6, +                'thumbnail': 're:^https?://.*\.jpg$', +            }, +        }, +        { +            'url': 'http://www.atresplayer.com/television/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_2014122400174.html', +            'only_matching': True, +        }, +    ] + +    _USER_AGENT = 'Dalvik/1.6.0 (Linux; U; Android 4.3; GT-I9300 Build/JSS15J' +    _MAGIC = 'QWtMLXs414Yo+c#_+Q#K@NN)' +    _TIMESTAMP_SHIFT = 30000 + +    _TIME_API_URL = 'http://servicios.atresplayer.com/api/admin/time.json' +    _URL_VIDEO_TEMPLATE = 'https://servicios.atresplayer.com/api/urlVideo/{1}/{0}/{1}|{2}|{3}.json' +    _PLAYER_URL_TEMPLATE = 'https://servicios.atresplayer.com/episode/getplayer.json?episodePk=%s' +    _EPISODE_URL_TEMPLATE = 'http://www.atresplayer.com/episodexml/%s' + +    def _real_extract(self, url): +        video_id = self._match_id(url) + +        webpage = self._download_webpage(url, video_id) + +        episode_id = self._search_regex( +            r'episode="([^"]+)"', webpage, 'episode id') + +        timestamp = int_or_none(self._download_webpage( +            self._TIME_API_URL, +            video_id, 'Downloading timestamp', fatal=False), 1000, time.time()) +        timestamp_shifted = compat_str(timestamp + self._TIMESTAMP_SHIFT) +        token = hmac.new( +            self._MAGIC.encode('ascii'), +            (episode_id + timestamp_shifted).encode('utf-8') +        ).hexdigest() + +        formats = [] +        for fmt in ['windows', 'android_tablet']: +            request = compat_urllib_request.Request( +                self._URL_VIDEO_TEMPLATE.format(fmt, episode_id, timestamp_shifted, token)) +            request.add_header('Youtubedl-user-agent', self._USER_AGENT) + +            fmt_json = self._download_json( +                request, video_id, 'Downloading %s video JSON' % fmt) + +            result = fmt_json.get('resultDes') +            if result.lower() != 'ok': +                raise ExtractorError( +                    '%s returned error: %s' % (self.IE_NAME, result), expected=True) + +            for _, video_url in fmt_json['resultObject'].items(): +                if video_url.endswith('/Manifest'): +                    formats.extend(self._extract_f4m_formats(video_url[:-9] + '/manifest.f4m', video_id)) +                else: +                    formats.append({ +                        'url': video_url, +                        'format_id': 'android', +                        'preference': 1, +                    }) +        self._sort_formats(formats) + +        player = self._download_json( +            self._PLAYER_URL_TEMPLATE % episode_id, +            episode_id) + +        path_data = player.get('pathData') + +        episode = self._download_xml( +            self._EPISODE_URL_TEMPLATE % path_data, +            video_id, 'Downloading episode XML') + +        duration = float_or_none(xpath_text( +            episode, './media/asset/info/technical/contentDuration', 'duration')) + +        art = episode.find('./media/asset/info/art') +        title = xpath_text(art, './name', 'title') +        description = xpath_text(art, './description', 'description') +        thumbnail = xpath_text(episode, './media/asset/files/background', 'thumbnail') + +        return { +            'id': video_id, +            'title': title, +            'description': description, +            'thumbnail': thumbnail, +            'duration': duration, +            'formats': formats, +        } diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py index 1bff005d6..93e8d0de3 100644 --- a/youtube_dl/extractor/cnn.py +++ b/youtube_dl/extractor/cnn.py @@ -12,7 +12,7 @@ from ..utils import (  class CNNIE(InfoExtractor):      _VALID_URL = r'''(?x)https?://(?:(?:edition|www)\.)?cnn\.com/video/(?:data/.+?|\?)/ -        (?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn(?:-ap)?|(?=&)))''' +        (?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:cnn|hln)(?:-ap)?|(?=&)))'''      _TESTS = [{          'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn', @@ -35,6 +35,16 @@ class CNNIE(InfoExtractor):              "description": "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"",              "upload_date": "20130821",          } +    }, { +        'url': 'http://www.cnn.com/video/data/2.0/video/living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln.html', +        'md5': 'f14d02ebd264df951feb2400e2c25a1b', +        'info_dict': { +            'id': 'living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln', +            'ext': 'mp4', +            'title': 'Nashville Ep. 1: Hand crafted skateboards', +            'description': 'md5:e7223a503315c9f150acac52e76de086', +            'upload_date': '20141222', +        }      }]      def _real_extract(self, url): diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py index c6b813f58..934da765e 100644 --- a/youtube_dl/extractor/daum.py +++ b/youtube_dl/extractor/daum.py @@ -38,7 +38,7 @@ class DaumIE(InfoExtractor):          canonical_url = 'http://tvpot.daum.net/v/%s' % video_id          webpage = self._download_webpage(canonical_url, video_id)          full_id = self._search_regex( -            r'<iframe src="http://videofarm.daum.net/controller/video/viewer/Video.html\?.*?vid=(.+?)[&"]', +            r'src=["\']http://videofarm\.daum\.net/controller/video/viewer/Video\.html\?.*?vid=(.+?)[&"\']',              webpage, 'full id')          query = compat_urllib_parse.urlencode({'vid': full_id})          info = self._download_xml( diff --git a/youtube_dl/extractor/hellporno.py b/youtube_dl/extractor/hellporno.py new file mode 100644 index 000000000..7a1c75b65 --- /dev/null +++ b/youtube_dl/extractor/hellporno.py @@ -0,0 +1,71 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( +    js_to_json, +    remove_end, +) + + +class HellPornoIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?hellporno\.com/videos/(?P<id>[^/]+)' +    _TEST = { +        'url': 'http://hellporno.com/videos/dixie-is-posing-with-naked-ass-very-erotic/', +        'md5': '1fee339c610d2049699ef2aa699439f1', +        'info_dict': { +            'id': '149116', +            'display_id': 'dixie-is-posing-with-naked-ass-very-erotic', +            'ext': 'mp4', +            'title': 'Dixie is posing with naked ass very erotic', +            'thumbnail': 're:https?://.*\.jpg$', +            'age_limit': 18, +        } +    } + +    def _real_extract(self, url): +        display_id = self._match_id(url) + +        webpage = self._download_webpage(url, display_id) + +        title = remove_end(self._html_search_regex( +            r'<title>([^<]+)</title>', webpage, 'title'), ' - Hell Porno') + +        flashvars = self._parse_json(self._search_regex( +            r'var\s+flashvars\s*=\s*({.+?});', webpage, 'flashvars'), +            display_id, transform_source=js_to_json) + +        video_id = flashvars.get('video_id') +        thumbnail = flashvars.get('preview_url') +        ext = flashvars.get('postfix', '.mp4')[1:] + +        formats = [] +        for video_url_key in ['video_url', 'video_alt_url']: +            video_url = flashvars.get(video_url_key) +            if not video_url: +                continue +            video_text = flashvars.get('%s_text' % video_url_key) +            fmt = { +                'url': video_url, +                'ext': ext, +                'format_id': video_text, +            } +            m = re.search(r'^(?P<height>\d+)[pP]', video_text) +            if m: +                fmt['height'] = int(m.group('height')) +            formats.append(fmt) +        self._sort_formats(formats) + +        categories = self._html_search_meta( +            'keywords', webpage, 'categories', default='').split(',') + +        return { +            'id': video_id, +            'display_id': display_id, +            'title': title, +            'thumbnail': thumbnail, +            'categories': categories, +            'age_limit': 18, +            'formats': formats, +        } diff --git a/youtube_dl/extractor/hitbox.py b/youtube_dl/extractor/hitbox.py new file mode 100644 index 000000000..84bd7c080 --- /dev/null +++ b/youtube_dl/extractor/hitbox.py @@ -0,0 +1,166 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( +    clean_html, +    parse_iso8601, +    float_or_none, +    int_or_none, +    compat_str, +) + + +class HitboxIE(InfoExtractor): +    IE_NAME = 'hitbox' +    _VALID_URL = r'https?://(?:www\.)?hitbox\.tv/video/(?P<id>[0-9]+)' +    _TEST = { +        'url': 'http://www.hitbox.tv/video/203213', +        'info_dict': { +            'id': '203213', +            'title': 'hitbox @ gamescom, Sub Button Hype extended, Giveaway - hitbox News Update with Oxy', +            'alt_title': 'hitboxlive - Aug 9th #6', +            'description': '', +            'ext': 'mp4', +            'thumbnail': 're:^https?://.*\.jpg$', +            'duration': 215.1666, +            'resolution': 'HD 720p', +            'uploader': 'hitboxlive', +            'view_count': int, +            'timestamp': 1407576133, +            'upload_date': '20140809', +            'categories': ['Live Show'], +        }, +        'params': { +            # m3u8 download +            'skip_download': True, +        }, +    } + +    def _extract_metadata(self, url, video_id): +        thumb_base = 'https://edge.sf.hitbox.tv' +        metadata = self._download_json( +            '%s/%s' % (url, video_id), video_id) + +        date = 'media_live_since' +        media_type = 'livestream' +        if metadata.get('media_type') == 'video': +            media_type = 'video' +            date = 'media_date_added' + +        video_meta = metadata.get(media_type, [])[0] +        title = video_meta.get('media_status') +        alt_title = video_meta.get('media_title') +        description = clean_html( +            video_meta.get('media_description') or +            video_meta.get('media_description_md')) +        duration = float_or_none(video_meta.get('media_duration')) +        uploader = video_meta.get('media_user_name') +        views = int_or_none(video_meta.get('media_views')) +        timestamp = parse_iso8601(video_meta.get(date), ' ') +        categories = [video_meta.get('category_name')] +        thumbs = [ +            {'url': thumb_base + video_meta.get('media_thumbnail'), +             'width': 320, +             'height': 180}, +            {'url': thumb_base + video_meta.get('media_thumbnail_large'), +             'width': 768, +             'height': 432}, +        ] + +        return { +            'id': video_id, +            'title': title, +            'alt_title': alt_title, +            'description': description, +            'ext': 'mp4', +            'thumbnails': thumbs, +            'duration': duration, +            'uploader': uploader, +            'view_count': views, +            'timestamp': timestamp, +            'categories': categories, +        } + +    def _real_extract(self, url): +        video_id = self._match_id(url) + +        metadata = self._extract_metadata( +            'https://www.hitbox.tv/api/media/video', +            video_id) + +        player_config = self._download_json( +            'https://www.hitbox.tv/api/player/config/video/%s' % video_id, +            video_id) + +        clip = player_config.get('clip') +        video_url = clip.get('url') +        res = clip.get('bitrates', [])[0].get('label') + +        metadata['resolution'] = res +        metadata['url'] = video_url +        metadata['protocol'] = 'm3u8' + +        return metadata + + +class HitboxLiveIE(HitboxIE): +    IE_NAME = 'hitbox:live' +    _VALID_URL = r'https?://(?:www\.)?hitbox\.tv/(?!video)(?P<id>.+)' +    _TEST = { +        'url': 'http://www.hitbox.tv/dimak', +        'info_dict': { +            'id': 'dimak', +            'ext': 'mp4', +            'description': 'md5:c9f80fa4410bc588d7faa40003fc7d0e', +            'timestamp': int, +            'upload_date': compat_str, +            'title': compat_str, +            'uploader': 'Dimak', +        }, +        'params': { +            # live +            'skip_download': True, +        }, +    } + +    def _real_extract(self, url): +        video_id = self._match_id(url) + +        metadata = self._extract_metadata( +            'https://www.hitbox.tv/api/media/live', +            video_id) + +        player_config = self._download_json( +            'https://www.hitbox.tv/api/player/config/live/%s' % video_id, +            video_id) + +        formats = [] +        cdns = player_config.get('cdns') +        servers = [] +        for cdn in cdns: +            base_url = cdn.get('netConnectionUrl') +            host = re.search('.+\.([^\.]+\.[^\./]+)/.+', base_url).group(1) +            if base_url not in servers: +                servers.append(base_url) +                for stream in cdn.get('bitrates'): +                    label = stream.get('label') +                    if label != 'Auto': +                        formats.append({ +                            'url': '%s/%s' % (base_url, stream.get('url')), +                            'ext': 'mp4', +                            'vbr': stream.get('bitrate'), +                            'resolution': label, +                            'rtmp_live': True, +                            'format_note': host, +                            'page_url': url, +                            'player_url': 'http://www.hitbox.tv/static/player/flowplayer/flowplayer.commercial-3.2.16.swf', +                        }) + +        self._sort_formats(formats) +        metadata['formats'] = formats +        metadata['is_live'] = True +        metadata['title'] = self._live_title(metadata.get('title')) +        return metadata diff --git a/youtube_dl/extractor/xxxymovies.py b/youtube_dl/extractor/xxxymovies.py new file mode 100644 index 000000000..5c8f17eb2 --- /dev/null +++ b/youtube_dl/extractor/xxxymovies.py @@ -0,0 +1,81 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( +    parse_duration, +    int_or_none, +) + + +class XXXYMoviesIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?xxxymovies\.com/videos/(?P<id>\d+)/(?P<display_id>[^/]+)' +    _TEST = { +        'url': 'http://xxxymovies.com/videos/138669/ecstatic-orgasm-sofcore/', +        'md5': '810b1bdbbffff89dd13bdb369fe7be4b', +        'info_dict': { +            'id': '138669', +            'display_id': 'ecstatic-orgasm-sofcore', +            'ext': 'mp4', +            'title': 'Ecstatic Orgasm Sofcore', +            'duration': 931, +            'categories': list, +            'view_count': int, +            'like_count': int, +            'dislike_count': int, +            'age_limit': 18, +        } +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') +        display_id = mobj.group('display_id') + +        webpage = self._download_webpage(url, display_id) + +        video_url = self._search_regex( +            r"video_url\s*:\s*'([^']+)'", webpage, 'video URL') + +        title = self._html_search_regex( +            [r'<div class="block_header">\s*<h1>([^<]+)</h1>', +             r'<title>(.*?)\s*-\s*XXXYMovies\.com</title>'], +            webpage, 'title') + +        thumbnail = self._search_regex( +            r"preview_url\s*:\s*'([^']+)'", +            webpage, 'thumbnail', fatal=False) + +        categories = self._html_search_meta( +            'keywords', webpage, 'categories', default='').split(',') + +        duration = parse_duration(self._search_regex( +            r'<span>Duration:</span>\s*(\d+:\d+)', +            webpage, 'duration', fatal=False)) + +        view_count = int_or_none(self._html_search_regex( +            r'<div class="video_views">\s*(\d+)', +            webpage, 'view count', fatal=False)) +        like_count = int_or_none(self._search_regex( +            r'>\s*Likes? <b>\((\d+)\)', +            webpage, 'like count', fatal=False)) +        dislike_count = int_or_none(self._search_regex( +            r'>\s*Dislike <b>\((\d+)\)</b>', +            webpage, 'dislike count', fatal=False)) + +        age_limit = self._rta_search(webpage) + +        return { +            'id': video_id, +            'display_id': display_id, +            'url': video_url, +            'title': title, +            'thumbnail': thumbnail, +            'categories': categories, +            'duration': duration, +            'view_count': view_count, +            'like_count': like_count, +            'dislike_count': dislike_count, +            'age_limit': age_limit, +        } diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 550e18733..3da83e3a8 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -418,6 +418,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):                  'upload_date': '20140605',              },          }, +        # Age-gate video with encrypted signature +        { +            'url': 'http://www.youtube.com/watch?v=6kLq3WMV1nU', +            'info_dict': { +                'id': '6kLq3WMV1nU', +                'ext': 'mp4', +                'title': 'Dedication To My Ex (Miss That) (Lyric Video)', +                'description': 'md5:33765bb339e1b47e7e72b5490139bb41', +                'uploader': 'LloydVEVO', +                'uploader_id': 'LloydVEVO', +                'upload_date': '20110629', +            }, +        },          # video_info is None (https://github.com/rg3/youtube-dl/issues/4421)          {              'url': '__2ABJjxzNo', @@ -766,11 +779,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):              age_gate = True              # We simulate the access to the video from www.youtube.com/v/{video_id}              # this can be viewed without login into Youtube +            url = proto + '://www.youtube.com/embed/%s' % video_id +            embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')              data = compat_urllib_parse.urlencode({                  'video_id': video_id,                  'eurl': 'https://youtube.googleapis.com/v/' + video_id,                  'sts': self._search_regex( -                    r'"sts"\s*:\s*(\d+)', video_webpage, 'sts', default=''), +                    r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),              })              video_info_url = proto + '://www.youtube.com/get_video_info?' + data              video_info_webpage = self._download_webpage( @@ -968,11 +983,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):                  elif 's' in url_data:                      encrypted_sig = url_data['s'][0] -                    if not age_gate: -                        jsplayer_url_json = self._search_regex( -                            r'"assets":.+?"js":\s*("[^"]+")', -                            video_webpage, 'JS player URL') -                        player_url = json.loads(jsplayer_url_json) +                    jsplayer_url_json = self._search_regex( +                        r'"assets":.+?"js":\s*("[^"]+")', +                        embed_webpage if age_gate else video_webpage, 'JS player URL') +                    player_url = json.loads(jsplayer_url_json)                      if player_url is None:                          player_url_json = self._search_regex(                              r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")', | 
