diff options
| -rw-r--r-- | AUTHORS | 1 | ||||
| -rw-r--r-- | docs/supportedsites.md | 11 | ||||
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/beeg.py | 4 | ||||
| -rw-r--r-- | youtube_dl/extractor/canvas.py | 65 | ||||
| -rw-r--r-- | youtube_dl/extractor/ntvde.py | 33 | ||||
| -rw-r--r-- | youtube_dl/extractor/orf.py | 16 | ||||
| -rw-r--r-- | youtube_dl/extractor/prosiebensat1.py | 30 | ||||
| -rw-r--r-- | youtube_dl/extractor/unistra.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/vodlocker.py | 26 | ||||
| -rw-r--r-- | youtube_dl/version.py | 2 | 
11 files changed, 157 insertions, 34 deletions
| @@ -152,3 +152,4 @@ Evan Lu  flatgreen  Brian Foley  Vignesh Venkat +Tom Gijselinck diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 8d0c7b97a..eb160bd2f 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -65,6 +65,7 @@   - **Beeg**   - **BehindKink**   - **Bet** + - **Bigflix**   - **Bild**: Bild.de   - **BiliBili**   - **BleacherReport** @@ -251,7 +252,7 @@   - **Instagram**   - **instagram:user**: Instagram user profile   - **InternetVideoArchive** - - **IPrima** + - **IPrima** (Currently broken)   - **iqiyi**: 爱奇艺   - **Ir90Tv**   - **ivi**: ivi.ru @@ -602,7 +603,9 @@   - **TruTube**   - **Tube8**   - **TubiTv** - - **Tudou** + - **tudou** + - **tudou:album** + - **tudou:playlist**   - **Tumblr**   - **tunein:clip**   - **tunein:program** @@ -655,12 +658,12 @@   - **video.mit.edu**   - **VideoDetective**   - **videofy.me** - - **VideoMega** + - **VideoMega** (Currently broken)   - **videomore**   - **videomore:season**   - **videomore:video**   - **VideoPremium** - - **VideoTt**: video.tt - Your True Tube + - **VideoTt**: video.tt - Your True Tube (Currently broken)   - **videoweed**: VideoWeed   - **Vidme**   - **Vidzi** diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index e4ae9332d..5621c9eb0 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -86,6 +86,7 @@ from .camdemy import (  )  from .canalplus import CanalplusIE  from .canalc2 import Canalc2IE +from .canvas import CanvasIE  from .cbs import CBSIE  from .cbsnews import CBSNewsIE  from .cbssports import CBSSportsIE diff --git a/youtube_dl/extractor/beeg.py b/youtube_dl/extractor/beeg.py index c8d921daf..34c2a756f 100644 --- a/youtube_dl/extractor/beeg.py +++ b/youtube_dl/extractor/beeg.py @@ -34,7 +34,7 @@ class BeegIE(InfoExtractor):          video_id = self._match_id(url)          video = self._download_json( -            'http://beeg.com/api/v5/video/%s' % video_id, video_id) +            'https://api.beeg.com/api/v5/video/%s' % video_id, video_id)          def split(o, e):              def cut(s, x): @@ -60,7 +60,7 @@ class BeegIE(InfoExtractor):          def decrypt_url(encrypted_url):              encrypted_url = self._proto_relative_url( -                encrypted_url.replace('{DATA_MARKERS}', ''), 'http:') +                encrypted_url.replace('{DATA_MARKERS}', ''), 'https:')              key = self._search_regex(                  r'/key=(.*?)%2Cend=', encrypted_url, 'key', default=None)              if not key: diff --git a/youtube_dl/extractor/canvas.py b/youtube_dl/extractor/canvas.py new file mode 100644 index 000000000..ee19ff836 --- /dev/null +++ b/youtube_dl/extractor/canvas.py @@ -0,0 +1,65 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import float_or_none + + +class CanvasIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?canvas\.be/video/(?:[^/]+/)*(?P<id>[^/?#&]+)' +    _TEST = { +        'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week', +        'md5': 'ea838375a547ac787d4064d8c7860a6c', +        'info_dict': { +            'id': 'mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e', +            'display_id': 'de-afspraak-veilt-voor-de-warmste-week', +            'ext': 'mp4', +            'title': 'De afspraak veilt voor de Warmste Week', +            'description': 'md5:24cb860c320dc2be7358e0e5aa317ba6', +            'thumbnail': 're:^https?://.*\.jpg$', +            'duration': 49.02, +        } +    } + +    def _real_extract(self, url): +        display_id = self._match_id(url) + +        webpage = self._download_webpage(url, display_id) + +        title = self._search_regex( +            r'<h1[^>]+class="video__body__header__title"[^>]*>(.+?)</h1>', +            webpage, 'title', default=None) or self._og_search_title(webpage) + +        video_id = self._html_search_regex( +            r'data-video=(["\'])(?P<id>.+?)\1', webpage, 'video id', group='id') + +        data = self._download_json( +            'https://mediazone.vrt.be/api/v1/canvas/assets/%s' % video_id, display_id) + +        formats = [] +        for target in data['targetUrls']: +            format_url, format_type = target.get('url'), target.get('type') +            if not format_url or not format_type: +                continue +            if format_type == 'HLS': +                formats.extend(self._extract_m3u8_formats( +                    format_url, display_id, entry_protocol='m3u8_native', +                    ext='mp4', preference=0, fatal=False, m3u8_id=format_type)) +            elif format_type == 'HDS': +                formats.extend(self._extract_f4m_formats( +                    format_url, display_id, f4m_id=format_type, fatal=False)) +            else: +                formats.append({ +                    'format_id': format_type, +                    'url': format_url, +                }) +        self._sort_formats(formats) + +        return { +            'id': video_id, +            'display_id': display_id, +            'title': title, +            'description': self._og_search_description(webpage), +            'formats': formats, +            'duration': float_or_none(data.get('duration'), 1000), +            'thumbnail': data.get('posterImageUrl'), +        } diff --git a/youtube_dl/extractor/ntvde.py b/youtube_dl/extractor/ntvde.py index d2cfe0961..a83e85cb8 100644 --- a/youtube_dl/extractor/ntvde.py +++ b/youtube_dl/extractor/ntvde.py @@ -2,6 +2,7 @@  from __future__ import unicode_literals  from .common import InfoExtractor +from ..compat import compat_urlparse  from ..utils import (      int_or_none,      js_to_json, @@ -34,7 +35,7 @@ class NTVDeIE(InfoExtractor):          webpage = self._download_webpage(url, video_id)          info = self._parse_json(self._search_regex( -            r'(?s)ntv.pageInfo.article =\s(\{.*?\});', webpage, 'info'), +            r'(?s)ntv\.pageInfo\.article\s*=\s*(\{.*?\});', webpage, 'info'),              video_id, transform_source=js_to_json)          timestamp = int_or_none(info.get('publishedDateAsUnixTimeStamp'))          vdata = self._parse_json(self._search_regex( @@ -42,18 +43,24 @@ class NTVDeIE(InfoExtractor):              webpage, 'player data'),              video_id, transform_source=js_to_json)          duration = parse_duration(vdata.get('duration')) -        formats = [{ -            'format_id': 'flash', -            'url': 'rtmp://fms.n-tv.de/' + vdata['video'], -        }, { -            'format_id': 'mobile', -            'url': 'http://video.n-tv.de' + vdata['videoMp4'], -            'tbr': 400,  # estimation -        }] -        m3u8_url = 'http://video.n-tv.de' + vdata['videoM3u8'] -        formats.extend(self._extract_m3u8_formats( -            m3u8_url, video_id, ext='mp4', -            entry_protocol='m3u8_native', preference=0)) + +        formats = [] +        if vdata.get('video'): +            formats.append({ +                'format_id': 'flash', +                'url': 'rtmp://fms.n-tv.de/%s' % vdata['video'], +            }) +        if vdata.get('videoMp4'): +            formats.append({ +                'format_id': 'mobile', +                'url': compat_urlparse.urljoin('http://video.n-tv.de', vdata['videoMp4']), +                'tbr': 400,  # estimation +            }) +        if vdata.get('videoM3u8'): +            m3u8_url = compat_urlparse.urljoin('http://video.n-tv.de', vdata['videoM3u8']) +            formats.extend(self._extract_m3u8_formats( +                m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native', +                preference=0, m3u8_id='hls', fatal=False))          self._sort_formats(formats)          return { diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py index 2e6c9872b..c54775d54 100644 --- a/youtube_dl/extractor/orf.py +++ b/youtube_dl/extractor/orf.py @@ -170,7 +170,21 @@ class ORFOE1IE(InfoExtractor):  class ORFFM4IE(InfoExtractor):      IE_NAME = 'orf:fm4'      IE_DESC = 'radio FM4' -    _VALID_URL = r'http://fm4\.orf\.at/7tage/?#(?P<date>[0-9]+)/(?P<show>\w+)' +    _VALID_URL = r'http://fm4\.orf\.at/(?:7tage/?#|player/)(?P<date>[0-9]+)/(?P<show>\w+)' + +    _TEST = { +        'url': 'http://fm4.orf.at/player/20160110/IS/', +        'md5': '01e736e8f1cef7e13246e880a59ad298', +        'info_dict': { +            'id': '2016-01-10_2100_tl_54_7DaysSun13_11244', +            'ext': 'mp3', +            'title': 'Im Sumpf', +            'description': 'md5:384c543f866c4e422a55f66a62d669cd', +            'duration': 7173, +            'timestamp': 1452456073, +            'upload_date': '20160110', +        }, +    }      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index baa54a3af..670e6950f 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -20,7 +20,7 @@ from ..utils import (  class ProSiebenSat1IE(InfoExtractor):      IE_NAME = 'prosiebensat1'      IE_DESC = 'ProSiebenSat.1 Digital' -    _VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|the-voice-of-germany)\.(?:de|at|ch)|ran\.de|fem\.com)/(?P<id>.+)' +    _VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|the-voice-of-germany|7tv)\.(?:de|at|ch)|ran\.de|fem\.com)/(?P<id>.+)'      _TESTS = [          { @@ -32,7 +32,7 @@ class ProSiebenSat1IE(InfoExtractor):              'url': 'http://www.prosieben.de/tv/circus-halligalli/videos/218-staffel-2-episode-18-jahresrueckblick-ganze-folge',              'info_dict': {                  'id': '2104602', -                'ext': 'mp4', +                'ext': 'flv',                  'title': 'Episode 18 - Staffel 2',                  'description': 'md5:8733c81b702ea472e069bc48bb658fc1',                  'upload_date': '20131231', @@ -138,14 +138,13 @@ class ProSiebenSat1IE(InfoExtractor):              'url': 'http://www.the-voice-of-germany.de/video/31-andreas-kuemmert-rocket-man-clip',              'info_dict': {                  'id': '2572814', -                'ext': 'mp4', +                'ext': 'flv',                  'title': 'Andreas Kümmert: Rocket Man',                  'description': 'md5:6ddb02b0781c6adf778afea606652e38',                  'upload_date': '20131017',                  'duration': 469.88,              },              'params': { -                # rtmp download                  'skip_download': True,              },          }, @@ -153,13 +152,12 @@ class ProSiebenSat1IE(InfoExtractor):              'url': 'http://www.fem.com/wellness/videos/wellness-video-clip-kurztripps-zum-valentinstag.html',              'info_dict': {                  'id': '2156342', -                'ext': 'mp4', +                'ext': 'flv',                  'title': 'Kurztrips zum Valentinstag', -                'description': 'Romantischer Kurztrip zum Valentinstag? Wir verraten, was sich hier wirklich lohnt.', +                'description': 'Romantischer Kurztrip zum Valentinstag? Nina Heinemann verrät, was sich hier wirklich lohnt.',                  'duration': 307.24,              },              'params': { -                # rtmp download                  'skip_download': True,              },          }, @@ -172,12 +170,26 @@ class ProSiebenSat1IE(InfoExtractor):              },              'playlist_count': 2,          }, +        { +            'url': 'http://www.7tv.de/circus-halligalli/615-best-of-circus-halligalli-ganze-folge', +            'info_dict': { +                'id': '4187506', +                'ext': 'flv', +                'title': 'Best of Circus HalliGalli', +                'description': 'md5:8849752efd90b9772c9db6fdf87fb9e9', +                'upload_date': '20151229', +            }, +            'params': { +                'skip_download': True, +            }, +        },      ]      _CLIPID_REGEXES = [          r'"clip_id"\s*:\s+"(\d+)"',          r'clipid: "(\d+)"',          r'clip[iI]d=(\d+)', +        r'clip[iI]d\s*=\s*["\'](\d+)',          r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)",      ]      _TITLE_REGEXES = [ @@ -186,12 +198,16 @@ class ProSiebenSat1IE(InfoExtractor):          r'<!-- start video -->\s*<h1>(.+?)</h1>',          r'<h1 class="att-name">\s*(.+?)</h1>',          r'<header class="module_header">\s*<h2>([^<]+)</h2>\s*</header>', +        r'<h2 class="video-title" itemprop="name">\s*(.+?)</h2>', +        r'<div[^>]+id="veeseoTitle"[^>]*>(.+?)</div>',      ]      _DESCRIPTION_REGEXES = [          r'<p itemprop="description">\s*(.+?)</p>',          r'<div class="videoDecription">\s*<p><strong>Beschreibung</strong>: (.+?)</p>',          r'<div class="g-plusone" data-size="medium"></div>\s*</div>\s*</header>\s*(.+?)\s*<footer>',          r'<p class="att-description">\s*(.+?)\s*</p>', +        r'<p class="video-description" itemprop="description">\s*(.+?)</p>', +        r'<div[^>]+id="veeseoDescription"[^>]*>(.+?)</div>',      ]      _UPLOAD_DATE_REGEXES = [          r'<meta property="og:published_time" content="(.+?)">', diff --git a/youtube_dl/extractor/unistra.py b/youtube_dl/extractor/unistra.py index f70978299..594bee4f9 100644 --- a/youtube_dl/extractor/unistra.py +++ b/youtube_dl/extractor/unistra.py @@ -38,7 +38,7 @@ class UnistraIE(InfoExtractor):          webpage = self._download_webpage(url, video_id) -        files = set(re.findall(r'file\s*:\s*"([^"]+)"', webpage)) +        files = set(re.findall(r'file\s*:\s*"(/[^"]+)"', webpage))          quality = qualities(['SD', 'HD'])          formats = [] diff --git a/youtube_dl/extractor/vodlocker.py b/youtube_dl/extractor/vodlocker.py index 357594a11..a97995a6d 100644 --- a/youtube_dl/extractor/vodlocker.py +++ b/youtube_dl/extractor/vodlocker.py @@ -5,12 +5,13 @@ from .common import InfoExtractor  from ..compat import compat_urllib_parse  from ..utils import (      ExtractorError, +    NO_DEFAULT,      sanitized_Request,  )  class VodlockerIE(InfoExtractor): -    _VALID_URL = r'https?://(?:www\.)?vodlocker\.com/(?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:\..*?)?' +    _VALID_URL = r'https?://(?:www\.)?vodlocker\.(?:com|city)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:\..*?)?'      _TESTS = [{          'url': 'http://vodlocker.com/e8wvyzz4sl42', @@ -43,16 +44,31 @@ class VodlockerIE(InfoExtractor):              webpage = self._download_webpage(                  req, video_id, 'Downloading video page') +        def extract_file_url(html, default=NO_DEFAULT): +            return self._search_regex( +                r'file:\s*"(http[^\"]+)",', html, 'file url', default=default) + +        video_url = extract_file_url(webpage, default=None) + +        if not video_url: +            embed_url = self._search_regex( +                r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?vodlocker\.(?:com|city)/embed-.+?)\1', +                webpage, 'embed url', group='url') +            embed_webpage = self._download_webpage( +                embed_url, video_id, 'Downloading embed webpage') +            video_url = extract_file_url(embed_webpage) +            thumbnail_webpage = embed_webpage +        else: +            thumbnail_webpage = webpage +          title = self._search_regex(              r'id="file_title".*?>\s*(.*?)\s*<(?:br|span)', webpage, 'title')          thumbnail = self._search_regex( -            r'image:\s*"(http[^\"]+)",', webpage, 'thumbnail') -        url = self._search_regex( -            r'file:\s*"(http[^\"]+)",', webpage, 'file url') +            r'image:\s*"(http[^\"]+)",', thumbnail_webpage, 'thumbnail', fatal=False)          formats = [{              'format_id': 'sd', -            'url': url, +            'url': video_url,          }]          return { diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 7030903c0..4d433b667 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@  from __future__ import unicode_literals -__version__ = '2016.01.09' +__version__ = '2016.01.14' | 
