diff options
| -rw-r--r-- | youtube_dl/YoutubeDL.py | 34 | ||||
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/ninegag.py | 41 | 
3 files changed, 73 insertions, 3 deletions
| diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index b68b110a4..8ad7bd1da 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -405,7 +405,8 @@ class YoutubeDL(object):          for key, value in extra_info.items():              info_dict.setdefault(key, value) -    def extract_info(self, url, download=True, ie_key=None, extra_info={}): +    def extract_info(self, url, download=True, ie_key=None, extra_info={}, +                     process=True):          '''          Returns a list with a dictionary for each video we find.          If 'download', also downloads the videos. @@ -441,7 +442,10 @@ class YoutubeDL(object):                          'webpage_url': url,                          'extractor_key': ie.ie_key(),                      }) -                return self.process_ie_result(ie_result, download, extra_info) +                if process: +                    return self.process_ie_result(ie_result, download, extra_info) +                else: +                    return ie_result              except ExtractorError as de: # An error we somewhat expected                  self.report_error(compat_str(de), de.format_traceback())                  break @@ -474,8 +478,32 @@ class YoutubeDL(object):                                       download,                                       ie_key=ie_result.get('ie_key'),                                       extra_info=extra_info) +        elif result_type == 'url_transparent': +            # Use the information from the embedding page +            info = self.extract_info( +                ie_result['url'], ie_key=ie_result.get('ie_key'), +                extra_info=extra_info, download=False, process=False) + +            def make_result(embedded_info): +                new_result = ie_result.copy() +                for f in ('_type', 'url', 'ext', 'player_url', 'formats', +                          'entries', 'urlhandle', 'ie_key', 'duration', +                          'subtitles', 'annotations', 'format'): +                    if f in new_result: +                        del new_result[f] +                    if f in embedded_info: +                        new_result[f] = embedded_info[f] +                return new_result +            new_result = make_result(info) + +            assert new_result.get('_type') != 'url_transparent' +            if new_result.get('_type') == 'compat_list': +                new_result['entries'] = [ +                    make_result(e) for e in new_result['entries']] + +            return self.process_ie_result( +                new_result, download=download, extra_info=extra_info)          elif result_type == 'playlist': -              # We process each entry in the playlist              playlist = ie_result.get('title', None) or ie_result.get('id', None)              self.to_screen(u'[download] Downloading playlist: %s' % playlist) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index bd996483b..a77e98d49 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -102,6 +102,7 @@ from .nbc import NBCNewsIE  from .newgrounds import NewgroundsIE  from .nhl import NHLIE, NHLVideocenterIE  from .niconico import NiconicoIE +from .ninegag import NineGagIE  from .nowvideo import NowVideoIE  from .ooyala import OoyalaIE  from .orf import ORFIE diff --git a/youtube_dl/extractor/ninegag.py b/youtube_dl/extractor/ninegag.py new file mode 100644 index 000000000..cc00ffbcc --- /dev/null +++ b/youtube_dl/extractor/ninegag.py @@ -0,0 +1,41 @@ +import json +import re + +from .common import InfoExtractor + + +class NineGagIE(InfoExtractor): +    IE_NAME = '9gag' +    _VALID_URL = r'^https?://(?:www\.)?9gag\.tv/v/(?P<id>[0-9]+)' + +    _TEST = { +        u"url": u"http://9gag.tv/v/1912", +        u"file": u"1912.mp4", +        u"info_dict": { +            u"description": u"This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)", +            u"title": u"\"People Are Awesome 2013\" Is Absolutely Awesome" +        }, +        u'add_ie': [u'Youtube'] +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') + +        webpage = self._download_webpage(url, video_id) +        data_json = self._html_search_regex(r'''(?x) +            <div\s*id="tv-video"\s*data-video-source="youtube"\s* +                data-video-meta="([^"]+)"''', webpage, u'video metadata') + +        data = json.loads(data_json) + +        return { +            '_type': 'url_transparent', +            'url': data['youtubeVideoId'], +            'ie_key': 'Youtube', +            'id': video_id, +            'title': data['title'], +            'description': data['description'], +            'view_count': int(data['view_count']), +            'thumbnail': data['thumbnail_url'], +        } | 
