diff options
| -rw-r--r-- | test/tests.json | 6 | ||||
| -rwxr-xr-x | youtube_dl/InfoExtractors.py | 49 | ||||
| -rw-r--r-- | youtube_dl/__init__.py | 1 | 
3 files changed, 56 insertions, 0 deletions
| diff --git a/test/tests.json b/test/tests.json index b573affc5..dbff62676 100644 --- a/test/tests.json +++ b/test/tests.json @@ -102,5 +102,11 @@      "name": "GooglePlus",      "url": "https://plus.google.com/u/0/108897254135232129896/posts/ZButuJc6CtH",      "file": "ZButuJc6CtH.flv" +  }, +  { +    "name": "FunnyOrDie", +    "url": "http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version", +    "file": "0732f586d7.mp4", +    "md5": "f647e9e90064b53b6e046e75d0241fbd"    }  ] diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 5a9032331..697c031c5 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -3644,3 +3644,52 @@ class JustinTVIE(InfoExtractor):                  break              offset += limit          return info + +class FunnyOrDieIE(InfoExtractor): +    _VALID_URL = r'^(?:https?://)?(?:www\.)?funnyordie\.com/videos/(?P<id>[0-9a-f]+)/.*$' +    IE_NAME = u'FunnyOrDie' + +    def report_extraction(self, video_id): +        self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id)) + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        if mobj is None: +            self._downloader.trouble(u'ERROR: invalid URL: %s' % url) +            return + +        video_id = mobj.group('id') +        self.report_extraction(video_id) +        try: +            urlh = compat_urllib_request.urlopen(url) +            webpage_bytes = urlh.read() +            webpage = webpage_bytes.decode('utf-8', 'ignore') +        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: +            self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err)) +            return + +        m = re.search(r'<video[^>]*>\s*<source[^>]*>\s*<source src="(?P<url>[^"]+)"', webpage, re.DOTALL) +        if not m: +            self._downloader.trouble(u'ERROR: unable to find video information') +        video_url = unescapeHTML(m.group('url')) +        print(video_url) + +        m = re.search(r"class='player_page_h1'>\s+<a.*?>(?P<title>.*?)</a>", webpage) +        if not m: +            self._downloader.trouble(u'Cannot find video title') +        title = unescapeHTML(m.group('title')) + +        m = re.search(r'<meta property="og:description" content="(?P<desc>.*?)"', webpage) +        if m: +            desc = unescapeHTML(m.group('desc')) +        else: +            desc = None + +        info = { +            'id': video_id, +            'url': video_url, +            'ext': 'mp4', +            'title': title, +            'description': desc, +        } +        return [info] diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 1102b2fce..c7a0bb959 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -400,6 +400,7 @@ def gen_extractors():          ArteTvIE(),          NBAIE(),          JustinTVIE(), +        FunnyOrDieIE(),          GenericIE()      ] | 
