diff options
| author | Sergey M․ <dstftw@gmail.com> | 2016-03-13 17:22:23 +0600 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2016-03-13 17:22:23 +0600 | 
| commit | 0a477f873115a11dbaadae674c2aaabe15ec4249 (patch) | |
| tree | 675fe779cc19099129cdb91415c099ebbafe0ad7 | |
| parent | 7f4173ae7c0dae870250018cd4915df36ccf0f54 (diff) | |
[vice:show] Add extractor (Closes #8847)
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 5 | ||||
| -rw-r--r-- | youtube_dl/extractor/vice.py | 78 | 
2 files changed, 62 insertions, 21 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index dc5ecea30..c5b80f4aa 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -836,7 +836,10 @@ from .vgtv import (      VGTVIE,  )  from .vh1 import VH1IE -from .vice import ViceIE +from .vice import ( +    ViceIE, +    ViceShowIE, +)  from .viddler import ViddlerIE  from .videodetective import VideoDetectiveIE  from .videofyme import VideofyMeIE diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py index 3db6286e4..46c785ae1 100644 --- a/youtube_dl/extractor/vice.py +++ b/youtube_dl/extractor/vice.py @@ -1,31 +1,37 @@  from __future__ import unicode_literals +import re +  from .common import InfoExtractor  from .ooyala import OoyalaIE  from ..utils import ExtractorError  class ViceIE(InfoExtractor): -    _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)+(?P<id>.+)' - -    _TESTS = [ -        { -            'url': 'http://www.vice.com/Fringes/cowboy-capitalists-part-1', -            'info_dict': { -                'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp', -                'ext': 'mp4', -                'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov', -                'duration': 725.983, -            }, -            'params': { -                # Requires ffmpeg (m3u8 manifest) -                'skip_download': True, -            }, -        }, { -            'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab', -            'only_matching': True, -        } -    ] +    _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)?videos?/(?P<id>[^/?#&]+)' + +    _TESTS = [{ +        'url': 'http://www.vice.com/video/cowboy-capitalists-part-1', +        'info_dict': { +            'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp', +            'ext': 'mp4', +            'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov', +            'duration': 725.983, +        }, +        'params': { +            # Requires ffmpeg (m3u8 manifest) +            'skip_download': True, +        }, +    }, { +        'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab', +        'only_matching': True, +    }, { +        'url': 'http://www.vice.com/ru/video/big-night-out-ibiza-clive-martin-229', +        'only_matching': True, +    }, { +        'url': 'https://munchies.vice.com/en/videos/watch-the-trailer-for-our-new-series-the-pizza-show', +        'only_matching': True, +    }]      def _real_extract(self, url):          video_id = self._match_id(url) @@ -38,3 +44,35 @@ class ViceIE(InfoExtractor):          except ExtractorError:              raise ExtractorError('The page doesn\'t contain a video', expected=True)          return self.url_result(ooyala_url, ie='Ooyala') + + +class ViceShowIE(InfoExtractor): +    _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)?show/(?P<id>[^/?#&]+)' + +    _TEST = { +        'url': 'https://munchies.vice.com/en/show/fuck-thats-delicious-2', +        'info_dict': { +            'id': 'fuck-thats-delicious-2', +            'title': "Fuck, That's Delicious", +            'description': 'Follow the culinary adventures of rapper Action Bronson during his ongoing world tour.', +        }, +        'playlist_count': 17, +    } + +    def _real_extract(self, url): +        show_id = self._match_id(url) +        webpage = self._download_webpage(url, show_id) + +        entries = [ +            self.url_result(video_url, ViceIE.ie_key()) +            for video_url, _ in re.findall( +                r'<h2[^>]+class="article-title"[^>]+data-id="\d+"[^>]*>\s*<a[^>]+href="(%s.*?)"' +                % ViceIE._VALID_URL, webpage)] + +        title = self._search_regex( +            r'<title>(.+?)</title>', webpage, 'title', default=None) +        if title: +            title = re.sub(r'(.+)\s*\|\s*.+$', r'\1', title).strip() +        description = self._html_search_meta('description', webpage, 'description') + +        return self.playlist_result(entries, show_id, title, description)  | 
