diff options
Diffstat (limited to 'youtube_dl/extractor/collegehumor.py')
| -rw-r--r-- | youtube_dl/extractor/collegehumor.py | 52 | 
1 files changed, 33 insertions, 19 deletions
diff --git a/youtube_dl/extractor/collegehumor.py b/youtube_dl/extractor/collegehumor.py index 30b9c7549..8d4c93d6d 100644 --- a/youtube_dl/extractor/collegehumor.py +++ b/youtube_dl/extractor/collegehumor.py @@ -4,6 +4,7 @@ import xml.etree.ElementTree  from .common import InfoExtractor  from ..utils import (      compat_urllib_parse_urlparse, +    determine_ext,      ExtractorError,  ) @@ -12,7 +13,7 @@ from ..utils import (  class CollegeHumorIE(InfoExtractor):      _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P<videoid>[0-9]+)/?(?P<shorttitle>.*)$' -    _TEST = { +    _TESTS = [{          u'url': u'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',          u'file': u'6902724.mp4',          u'md5': u'1264c12ad95dca142a9f0bf7968105a0', @@ -20,7 +21,16 @@ class CollegeHumorIE(InfoExtractor):              u'title': u'Comic-Con Cosplay Catastrophe',              u'description': u'Fans get creative this year at San Diego.  Too creative.  And yes, that\'s really Joss Whedon.',          }, -    } +    }, +    { +        u'url': u'http://www.collegehumor.com/video/3505939/font-conference', +        u'file': u'3505939.mp4', +        u'md5': u'c51ca16b82bb456a4397987791a835f5', +        u'info_dict': { +            u'title': u'Font Conference', +            u'description': u'This video wasn\'t long enough, so we made it double-spaced.', +        }, +    }]      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url) @@ -49,25 +59,29 @@ class CollegeHumorIE(InfoExtractor):              info['description'] = videoNode.findall('./description')[0].text              info['title'] = videoNode.findall('./caption')[0].text              info['thumbnail'] = videoNode.findall('./thumbnail')[0].text -            manifest_url = videoNode.findall('./file')[0].text +            next_url = videoNode.findall('./file')[0].text          except IndexError:              raise ExtractorError(u'Invalid metadata XML file') -        manifest_url += '?hdcore=2.10.3' -        manifestXml = self._download_webpage(manifest_url, video_id, -                                             u'Downloading XML manifest', -                                             u'Unable to download video info XML') - -        adoc = xml.etree.ElementTree.fromstring(manifestXml) -        try: -            media_node = adoc.findall('./{http://ns.adobe.com/f4m/1.0}media')[0] -            node_id = media_node.attrib['url'] -            video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text -        except IndexError as err: -            raise ExtractorError(u'Invalid manifest file') +        if next_url.endswith(u'manifest.f4m'): +            manifest_url = next_url + '?hdcore=2.10.3' +            manifestXml = self._download_webpage(manifest_url, video_id, +                                         u'Downloading XML manifest', +                                         u'Unable to download video info XML') -        url_pr = compat_urllib_parse_urlparse(info['thumbnail']) +            adoc = xml.etree.ElementTree.fromstring(manifestXml) +            try: +                media_node = adoc.findall('./{http://ns.adobe.com/f4m/1.0}media')[0] +                node_id = media_node.attrib['url'] +                video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text +            except IndexError as err: +                raise ExtractorError(u'Invalid manifest file') +            url_pr = compat_urllib_parse_urlparse(info['thumbnail']) +            info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','') +            info['ext'] = 'mp4' +        else: +            # Old-style direct links +            info['url'] = next_url +            info['ext'] = determine_ext(info['url']) -        info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','') -        info['ext'] = 'mp4' -        return [info] +        return info  | 
