diff options
155 files changed, 811 insertions, 296 deletions
@@ -163,3 +163,6 @@ Patrick Griffis  Aidan Rowe  mutantmonkey  Ben Congdon +Kacper Michajłow +José Joaquín Atria +Viťas Strádal @@ -12,15 +12,7 @@ SHAREDIR ?= $(PREFIX)/share  PYTHON ?= /usr/bin/env python  # set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local -ifeq ($(PREFIX),/usr) -	SYSCONFDIR=/etc -else -	ifeq ($(PREFIX),/usr/local) -		SYSCONFDIR=/etc -	else -		SYSCONFDIR=$(PREFIX)/etc -	endif -endif +SYSCONFDIR != if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi  install: youtube-dl youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish  	install -d $(DESTDIR)$(BINDIR) @@ -831,7 +831,7 @@ To run the test, simply invoke your favorite test runner, or execute a test file  If you want to create a build of youtube-dl yourself, you'll need  * python -* make +* make (both GNU make and BSD make are supported)  * pandoc  * zip  * nosetests diff --git a/test/test_http.py b/test/test_http.py index fc59b1aed..15e0ad369 100644 --- a/test/test_http.py +++ b/test/test_http.py @@ -1,4 +1,5 @@  #!/usr/bin/env python +# coding: utf-8  from __future__ import unicode_literals  # Allow direct execution @@ -120,5 +121,14 @@ class TestProxy(unittest.TestCase):          response = ydl.urlopen(req).read().decode('utf-8')          self.assertEqual(response, 'cn: {0}'.format(url)) +    def test_proxy_with_idn(self): +        ydl = YoutubeDL({ +            'proxy': 'localhost:{0}'.format(self.port), +        }) +        url = 'http://中文.tw/' +        response = ydl.urlopen(url).read().decode('utf-8') +        # b'xn--fiq228c' is '中文'.encode('idna') +        self.assertEqual(response, 'normal: http://xn--fiq228c.tw/') +  if __name__ == '__main__':      unittest.main() @@ -8,6 +8,6 @@ deps =  passenv = HOME  defaultargs = test --exclude test_download.py --exclude test_age_restriction.py      --exclude test_subtitles.py --exclude test_write_annotations.py -    --exclude test_youtube_lists.py +    --exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py  commands = nosetests --verbose {posargs:{[testenv]defaultargs}}  # --with-coverage --cover-package=youtube_dl --cover-html                                                 # test.test_download:TestDownload.test_NowVideo diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 93b6ca54d..29d7a3106 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1836,7 +1836,7 @@ class YoutubeDL(object):          if fdict.get('language'):              if res:                  res += ' ' -            res += '[%s]' % fdict['language'] +            res += '[%s] ' % fdict['language']          if fdict.get('format_note') is not None:              res += fdict['format_note'] + ' '          if fdict.get('tbr') is not None: diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 529051a93..8f7df4d12 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -72,6 +72,7 @@ from .bet import BetIE  from .bigflix import BigflixIE  from .bild import BildIE  from .bilibili import BiliBiliIE +from .biobiochiletv import BioBioChileTVIE  from .bleacherreport import (      BleacherReportIE,      BleacherReportCMSIE, @@ -108,6 +109,7 @@ from .cbsnews import (  )  from .cbssports import CBSSportsIE  from .ccc import CCCIE +from .cda import CDAIE  from .ceskatelevize import CeskaTelevizeIE  from .channel9 import Channel9IE  from .chaturbate import ChaturbateIE @@ -533,6 +535,7 @@ from .ooyala import (      OoyalaIE,      OoyalaExternalIE,  ) +from .openload import OpenloadIE  from .ora import OraTVIE  from .orf import (      ORFTVthekIE, @@ -628,6 +631,7 @@ from .ruutu import RuutuIE  from .sandia import SandiaIE  from .safari import (      SafariIE, +    SafariApiIE,      SafariCourseIE,  )  from .sapo import SapoIE @@ -739,6 +743,7 @@ from .theplatform import (      ThePlatformIE,      ThePlatformFeedIE,  ) +from .thescene import TheSceneIE  from .thesixtyone import TheSixtyOneIE  from .thestar import TheStarIE  from .thisamericanlife import ThisAmericanLifeIE diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index 6a29e587f..b584277be 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -12,7 +12,7 @@ from ..utils import (  class ABCIE(InfoExtractor):      IE_NAME = 'abc.net.au' -    _VALID_URL = r'http://www\.abc\.net\.au/news/(?:[^/]+/){1,2}(?P<id>\d+)' +    _VALID_URL = r'https?://www\.abc\.net\.au/news/(?:[^/]+/){1,2}(?P<id>\d+)'      _TESTS = [{          'url': 'http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334', diff --git a/youtube_dl/extractor/addanime.py b/youtube_dl/extractor/addanime.py index e3e6d2113..fb1cc02e1 100644 --- a/youtube_dl/extractor/addanime.py +++ b/youtube_dl/extractor/addanime.py @@ -16,7 +16,7 @@ from ..utils import (  class AddAnimeIE(InfoExtractor): -    _VALID_URL = r'http://(?:\w+\.)?add-anime\.net/(?:watch_video\.php\?(?:.*?)v=|video/)(?P<id>[\w_]+)' +    _VALID_URL = r'https?://(?:\w+\.)?add-anime\.net/(?:watch_video\.php\?(?:.*?)v=|video/)(?P<id>[\w_]+)'      _TESTS = [{          'url': 'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',          'md5': '72954ea10bc979ab5e2eb288b21425a0', diff --git a/youtube_dl/extractor/aftonbladet.py b/youtube_dl/extractor/aftonbladet.py index e0518cf26..d548592fe 100644 --- a/youtube_dl/extractor/aftonbladet.py +++ b/youtube_dl/extractor/aftonbladet.py @@ -6,7 +6,7 @@ from ..utils import int_or_none  class AftonbladetIE(InfoExtractor): -    _VALID_URL = r'http://tv\.aftonbladet\.se/abtv/articles/(?P<id>[0-9]+)' +    _VALID_URL = r'https?://tv\.aftonbladet\.se/abtv/articles/(?P<id>[0-9]+)'      _TEST = {          'url': 'http://tv.aftonbladet.se/abtv/articles/36015',          'info_dict': { diff --git a/youtube_dl/extractor/aljazeera.py b/youtube_dl/extractor/aljazeera.py index cddcaa489..b081695d8 100644 --- a/youtube_dl/extractor/aljazeera.py +++ b/youtube_dl/extractor/aljazeera.py @@ -4,7 +4,7 @@ from .common import InfoExtractor  class AlJazeeraIE(InfoExtractor): -    _VALID_URL = r'http://www\.aljazeera\.com/programmes/.*?/(?P<id>[^/]+)\.html' +    _VALID_URL = r'https?://www\.aljazeera\.com/programmes/.*?/(?P<id>[^/]+)\.html'      _TEST = {          'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html', diff --git a/youtube_dl/extractor/animeondemand.py b/youtube_dl/extractor/animeondemand.py index 0158407f6..2cede55a7 100644 --- a/youtube_dl/extractor/animeondemand.py +++ b/youtube_dl/extractor/animeondemand.py @@ -3,10 +3,14 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor -from ..compat import compat_urlparse +from ..compat import ( +    compat_urlparse, +    compat_str, +)  from ..utils import (      determine_ext,      encode_dict, +    extract_attributes,      ExtractorError,      sanitized_Request,      urlencode_postdata, @@ -34,6 +38,10 @@ class AnimeOnDemandIE(InfoExtractor):          # Episodes without titles          'url': 'https://www.anime-on-demand.de/anime/162',          'only_matching': True, +    }, { +        # ger/jap, Dub/OmU, account required +        'url': 'https://www.anime-on-demand.de/anime/169', +        'only_matching': True,      }]      def _login(self): @@ -44,6 +52,10 @@ class AnimeOnDemandIE(InfoExtractor):          login_page = self._download_webpage(              self._LOGIN_URL, None, 'Downloading login page') +        if '>Our licensing terms allow the distribution of animes only to German-speaking countries of Europe' in login_page: +            self.raise_geo_restricted( +                '%s is only available in German-speaking countries of Europe' % self.IE_NAME) +          login_form = self._form_hidden_inputs('new_user', login_page)          login_form.update({ @@ -126,33 +138,86 @@ class AnimeOnDemandIE(InfoExtractor):              formats = [] -            playlist_url = self._search_regex( -                r'data-playlist=(["\'])(?P<url>.+?)\1', -                episode_html, 'data playlist', default=None, group='url') -            if playlist_url: -                request = sanitized_Request( -                    compat_urlparse.urljoin(url, playlist_url), -                    headers={ -                        'X-Requested-With': 'XMLHttpRequest', -                        'X-CSRF-Token': csrf_token, -                        'Referer': url, -                        'Accept': 'application/json, text/javascript, */*; q=0.01', -                    }) - -                playlist = self._download_json( -                    request, video_id, 'Downloading playlist JSON', fatal=False) -                if playlist: -                    playlist = playlist['playlist'][0] -                    title = playlist['title'] +            for input_ in re.findall( +                    r'<input[^>]+class=["\'].*?streamstarter_html5[^>]+>', episode_html): +                attributes = extract_attributes(input_) +                playlist_urls = [] +                for playlist_key in ('data-playlist', 'data-otherplaylist'): +                    playlist_url = attributes.get(playlist_key) +                    if isinstance(playlist_url, compat_str) and re.match( +                            r'/?[\da-zA-Z]+', playlist_url): +                        playlist_urls.append(attributes[playlist_key]) +                if not playlist_urls: +                    continue + +                lang = attributes.get('data-lang') +                lang_note = attributes.get('value') + +                for playlist_url in playlist_urls: +                    kind = self._search_regex( +                        r'videomaterialurl/\d+/([^/]+)/', +                        playlist_url, 'media kind', default=None) +                    format_id_list = [] +                    if lang: +                        format_id_list.append(lang) +                    if kind: +                        format_id_list.append(kind) +                    if not format_id_list: +                        format_id_list.append(compat_str(num)) +                    format_id = '-'.join(format_id_list) +                    format_note = ', '.join(filter(None, (kind, lang_note))) +                    request = sanitized_Request( +                        compat_urlparse.urljoin(url, playlist_url), +                        headers={ +                            'X-Requested-With': 'XMLHttpRequest', +                            'X-CSRF-Token': csrf_token, +                            'Referer': url, +                            'Accept': 'application/json, text/javascript, */*; q=0.01', +                        }) +                    playlist = self._download_json( +                        request, video_id, 'Downloading %s playlist JSON' % format_id, +                        fatal=False) +                    if not playlist: +                        continue +                    start_video = playlist.get('startvideo', 0) +                    playlist = playlist.get('playlist') +                    if not playlist or not isinstance(playlist, list): +                        continue +                    playlist = playlist[start_video] +                    title = playlist.get('title') +                    if not title: +                        continue                      description = playlist.get('description')                      for source in playlist.get('sources', []):                          file_ = source.get('file') -                        if file_ and determine_ext(file_) == 'm3u8': -                            formats = self._extract_m3u8_formats( +                        if not file_: +                            continue +                        ext = determine_ext(file_) +                        format_id_list = [lang, kind] +                        if ext == 'm3u8': +                            format_id_list.append('hls') +                        elif source.get('type') == 'video/dash' or ext == 'mpd': +                            format_id_list.append('dash') +                        format_id = '-'.join(filter(None, format_id_list)) +                        if ext == 'm3u8': +                            file_formats = self._extract_m3u8_formats(                                  file_, video_id, 'mp4', -                                entry_protocol='m3u8_native', m3u8_id='hls') +                                entry_protocol='m3u8_native', m3u8_id=format_id, fatal=False) +                        elif source.get('type') == 'video/dash' or ext == 'mpd': +                            continue +                            file_formats = self._extract_mpd_formats( +                                file_, video_id, mpd_id=format_id, fatal=False) +                        else: +                            continue +                        for f in file_formats: +                            f.update({ +                                'language': lang, +                                'format_note': format_note, +                            }) +                        formats.extend(file_formats)              if formats: +                self._sort_formats(formats)                  f = common_info.copy()                  f.update({                      'title': title, @@ -161,16 +226,18 @@ class AnimeOnDemandIE(InfoExtractor):                  })                  entries.append(f) -            m = re.search( -                r'data-dialog-header=(["\'])(?P<title>.+?)\1[^>]+href=(["\'])(?P<href>.+?)\3[^>]*>Teaser<', -                episode_html) -            if m: -                f = common_info.copy() -                f.update({ -                    'id': '%s-teaser' % f['id'], -                    'title': m.group('title'), -                    'url': compat_urlparse.urljoin(url, m.group('href')), -                }) -                entries.append(f) +            # Extract teaser only when full episode is not available +            if not formats: +                m = re.search( +                    r'data-dialog-header=(["\'])(?P<title>.+?)\1[^>]+href=(["\'])(?P<href>.+?)\3[^>]*>Teaser<', +                    episode_html) +                if m: +                    f = common_info.copy() +                    f.update({ +                        'id': '%s-teaser' % f['id'], +                        'title': m.group('title'), +                        'url': compat_urlparse.urljoin(url, m.group('href')), +                    }) +                    entries.append(f)          return self.playlist_result(entries, anime_id, anime_title, anime_description) diff --git a/youtube_dl/extractor/aol.py b/youtube_dl/extractor/aol.py index b761b2cc4..95a99c6b0 100644 --- a/youtube_dl/extractor/aol.py +++ b/youtube_dl/extractor/aol.py @@ -5,7 +5,7 @@ from .common import InfoExtractor  class AolIE(InfoExtractor):      IE_NAME = 'on.aol.com' -    _VALID_URL = r'(?:aol-video:|http://on\.aol\.com/video/.*-)(?P<id>[0-9]+)(?:$|\?)' +    _VALID_URL = r'(?:aol-video:|https?://on\.aol\.com/video/.*-)(?P<id>[0-9]+)(?:$|\?)'      _TESTS = [{          'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img', @@ -25,7 +25,7 @@ class AolIE(InfoExtractor):  class AolFeaturesIE(InfoExtractor):      IE_NAME = 'features.aol.com' -    _VALID_URL = r'http://features\.aol\.com/video/(?P<id>[^/?#]+)' +    _VALID_URL = r'https?://features\.aol\.com/video/(?P<id>[^/?#]+)'      _TESTS = [{          'url': 'http://features.aol.com/video/behind-secret-second-careers-late-night-talk-show-hosts', diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 3e119e21b..ae0f27dcb 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -23,7 +23,7 @@ from ..utils import (  class ArteTvIE(InfoExtractor): -    _VALID_URL = r'http://videos\.arte\.tv/(?P<lang>fr|de|en|es)/.*-(?P<id>.*?)\.html' +    _VALID_URL = r'https?://videos\.arte\.tv/(?P<lang>fr|de|en|es)/.*-(?P<id>.*?)\.html'      IE_NAME = 'arte.tv'      def _real_extract(self, url): diff --git a/youtube_dl/extractor/azubu.py b/youtube_dl/extractor/azubu.py index 011edf128..1805b7312 100644 --- a/youtube_dl/extractor/azubu.py +++ b/youtube_dl/extractor/azubu.py @@ -98,7 +98,7 @@ class AzubuIE(InfoExtractor):  class AzubuLiveIE(InfoExtractor): -    _VALID_URL = r'http://www.azubu.tv/(?P<id>[^/]+)$' +    _VALID_URL = r'https?://www.azubu.tv/(?P<id>[^/]+)$'      _TEST = {          'url': 'http://www.azubu.tv/MarsTVMDLen', diff --git a/youtube_dl/extractor/baidu.py b/youtube_dl/extractor/baidu.py index 76b21e596..234a661d3 100644 --- a/youtube_dl/extractor/baidu.py +++ b/youtube_dl/extractor/baidu.py @@ -9,7 +9,7 @@ from ..utils import unescapeHTML  class BaiduVideoIE(InfoExtractor):      IE_DESC = '百度视频' -    _VALID_URL = r'http://v\.baidu\.com/(?P<type>[a-z]+)/(?P<id>\d+)\.htm' +    _VALID_URL = r'https?://v\.baidu\.com/(?P<type>[a-z]+)/(?P<id>\d+)\.htm'      _TESTS = [{          'url': 'http://v.baidu.com/comic/1069.htm?frp=bdbrand&q=%E4%B8%AD%E5%8D%8E%E5%B0%8F%E5%BD%93%E5%AE%B6',          'info_dict': { diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index e62b3860e..2dfcee98d 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -942,7 +942,7 @@ class BBCIE(BBCCoUkIE):  class BBCCoUkArticleIE(InfoExtractor): -    _VALID_URL = 'http://www.bbc.co.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)' +    _VALID_URL = r'https?://www.bbc.co.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)'      IE_NAME = 'bbc.co.uk:article'      IE_DESC = 'BBC articles' diff --git a/youtube_dl/extractor/behindkink.py b/youtube_dl/extractor/behindkink.py index 1bdc25812..9bca853b3 100644 --- a/youtube_dl/extractor/behindkink.py +++ b/youtube_dl/extractor/behindkink.py @@ -8,7 +8,7 @@ from ..utils import url_basename  class BehindKinkIE(InfoExtractor): -    _VALID_URL = r'http://(?:www\.)?behindkink\.com/(?P<year>[0-9]{4})/(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<id>[^/#?_]+)' +    _VALID_URL = r'https?://(?:www\.)?behindkink\.com/(?P<year>[0-9]{4})/(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<id>[^/#?_]+)'      _TEST = {          'url': 'http://www.behindkink.com/2014/12/05/what-are-you-passionate-about-marley-blaze/',          'md5': '507b57d8fdcd75a41a9a7bdb7989c762', diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 59beb11bc..8baff2041 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -14,7 +14,7 @@ from ..utils import (  class BiliBiliIE(InfoExtractor): -    _VALID_URL = r'http://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)(?:/index_(?P<page_num>\d+).html)?' +    _VALID_URL = r'https?://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)(?:/index_(?P<page_num>\d+).html)?'      _TESTS = [{          'url': 'http://www.bilibili.tv/video/av1074402/', diff --git a/youtube_dl/extractor/biobiochiletv.py b/youtube_dl/extractor/biobiochiletv.py new file mode 100644 index 000000000..133228133 --- /dev/null +++ b/youtube_dl/extractor/biobiochiletv.py @@ -0,0 +1,86 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import remove_end + + +class BioBioChileTVIE(InfoExtractor): +    _VALID_URL = r'https?://tv\.biobiochile\.cl/notas/(?:[^/]+/)+(?P<id>[^/]+)\.shtml' + +    _TESTS = [{ +        'url': 'http://tv.biobiochile.cl/notas/2015/10/21/sobre-camaras-y-camarillas-parlamentarias.shtml', +        'md5': '26f51f03cf580265defefb4518faec09', +        'info_dict': { +            'id': 'sobre-camaras-y-camarillas-parlamentarias', +            'ext': 'mp4', +            'title': 'Sobre Cámaras y camarillas parlamentarias', +            'thumbnail': 're:^https?://.*\.jpg$', +            'uploader': 'Fernando Atria', +        }, +    }, { +        # different uploader layout +        'url': 'http://tv.biobiochile.cl/notas/2016/03/18/natalia-valdebenito-repasa-a-diputado-hasbun-paso-a-la-categoria-de-hablar-brutalidades.shtml', +        'md5': 'edc2e6b58974c46d5b047dea3c539ff3', +        'info_dict': { +            'id': 'natalia-valdebenito-repasa-a-diputado-hasbun-paso-a-la-categoria-de-hablar-brutalidades', +            'ext': 'mp4', +            'title': 'Natalia Valdebenito repasa a diputado Hasbún: Pasó a la categoría de hablar brutalidades', +            'thumbnail': 're:^https?://.*\.jpg$', +            'uploader': 'Piangella Obrador', +        }, +        'params': { +            'skip_download': True, +        }, +    }, { +        'url': 'http://tv.biobiochile.cl/notas/2015/10/22/ninos-transexuales-de-quien-es-la-decision.shtml', +        'only_matching': True, +    }, { +        'url': 'http://tv.biobiochile.cl/notas/2015/10/21/exclusivo-hector-pinto-formador-de-chupete-revela-version-del-ex-delantero-albo.shtml', +        'only_matching': True, +    }] + +    def _real_extract(self, url): +        video_id = self._match_id(url) + +        webpage = self._download_webpage(url, video_id) + +        title = remove_end(self._og_search_title(webpage), ' - BioBioChile TV') + +        file_url = self._search_regex( +            r'loadFWPlayerVideo\([^,]+,\s*(["\'])(?P<url>.+?)\1', +            webpage, 'file url', group='url') + +        base_url = self._search_regex( +            r'file\s*:\s*(["\'])(?P<url>.+?)\1\s*\+\s*fileURL', webpage, +            'base url', default='http://unlimited2-cl.digitalproserver.com/bbtv/', +            group='url') + +        formats = self._extract_m3u8_formats( +            '%s%s/playlist.m3u8' % (base_url, file_url), video_id, 'mp4', +            entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) +        f = { +            'url': '%s%s' % (base_url, file_url), +            'format_id': 'http', +            'protocol': 'http', +            'preference': 1, +        } +        if formats: +            f_copy = formats[-1].copy() +            f_copy.update(f) +            f = f_copy +        formats.append(f) +        self._sort_formats(formats) + +        thumbnail = self._og_search_thumbnail(webpage) +        uploader = self._html_search_regex( +            r'<a[^>]+href=["\']https?://busca\.biobiochile\.cl/author[^>]+>(.+?)</a>', +            webpage, 'uploader', fatal=False) + +        return { +            'id': video_id, +            'title': title, +            'thumbnail': thumbnail, +            'uploader': uploader, +            'formats': formats, +        } diff --git a/youtube_dl/extractor/bokecc.py b/youtube_dl/extractor/bokecc.py index 122a1cbb6..86a7f4d7d 100644 --- a/youtube_dl/extractor/bokecc.py +++ b/youtube_dl/extractor/bokecc.py @@ -33,7 +33,7 @@ class BokeCCBaseIE(InfoExtractor):  class BokeCCIE(BokeCCBaseIE):      _IE_DESC = 'CC视频' -    _VALID_URL = r'http://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)' +    _VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)'      _TESTS = [{          'url': 'http://union.bokecc.com/playvideo.bo?vid=E44D40C15E65EA30&uid=CD0C5D3C8614B28B', diff --git a/youtube_dl/extractor/bpb.py b/youtube_dl/extractor/bpb.py index c28e72927..6ad45a1e6 100644 --- a/youtube_dl/extractor/bpb.py +++ b/youtube_dl/extractor/bpb.py @@ -12,7 +12,7 @@ from ..utils import (  class BpbIE(InfoExtractor):      IE_DESC = 'Bundeszentrale für politische Bildung' -    _VALID_URL = r'http://www\.bpb\.de/mediathek/(?P<id>[0-9]+)/' +    _VALID_URL = r'https?://www\.bpb\.de/mediathek/(?P<id>[0-9]+)/'      _TEST = {          'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr', diff --git a/youtube_dl/extractor/breakcom.py b/youtube_dl/extractor/breakcom.py index aa08051b1..725859b4d 100644 --- a/youtube_dl/extractor/breakcom.py +++ b/youtube_dl/extractor/breakcom.py @@ -11,7 +11,7 @@ from ..utils import (  class BreakIE(InfoExtractor): -    _VALID_URL = r'http://(?:www\.)?break\.com/video/(?:[^/]+/)*.+-(?P<id>\d+)' +    _VALID_URL = r'https?://(?:www\.)?break\.com/video/(?:[^/]+/)*.+-(?P<id>\d+)'      _TESTS = [{          'url': 'http://www.break.com/video/when-girls-act-like-guys-2468056',          'info_dict': { diff --git a/youtube_dl/extractor/camdemy.py b/youtube_dl/extractor/camdemy.py index 897f3a104..dd4d96cec 100644 --- a/youtube_dl/extractor/camdemy.py +++ b/youtube_dl/extractor/camdemy.py @@ -16,7 +16,7 @@ from ..utils import (  class CamdemyIE(InfoExtractor): -    _VALID_URL = r'http://(?:www\.)?camdemy\.com/media/(?P<id>\d+)' +    _VALID_URL = r'https?://(?:www\.)?camdemy\.com/media/(?P<id>\d+)'      _TESTS = [{          # single file          'url': 'http://www.camdemy.com/media/5181/', @@ -104,7 +104,7 @@ class CamdemyIE(InfoExtractor):  class CamdemyFolderIE(InfoExtractor): -    _VALID_URL = r'http://www.camdemy.com/folder/(?P<id>\d+)' +    _VALID_URL = r'https?://www.camdemy.com/folder/(?P<id>\d+)'      _TESTS = [{          # links with trailing slash          'url': 'http://www.camdemy.com/folder/450', diff --git a/youtube_dl/extractor/cbsnews.py b/youtube_dl/extractor/cbsnews.py index 8ddcc5097..f23bac9a1 100644 --- a/youtube_dl/extractor/cbsnews.py +++ b/youtube_dl/extractor/cbsnews.py @@ -11,7 +11,7 @@ from ..utils import (  class CBSNewsIE(ThePlatformIE):      IE_DESC = 'CBS News' -    _VALID_URL = r'http://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P<id>[\da-z_-]+)' +    _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P<id>[\da-z_-]+)'      _TESTS = [          { @@ -96,7 +96,7 @@ class CBSNewsIE(ThePlatformIE):  class CBSNewsLiveVideoIE(InfoExtractor):      IE_DESC = 'CBS News Live Videos' -    _VALID_URL = r'http://(?:www\.)?cbsnews\.com/live/video/(?P<id>[\da-z_-]+)' +    _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/video/(?P<id>[\da-z_-]+)'      _TEST = {          'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/', diff --git a/youtube_dl/extractor/cbssports.py b/youtube_dl/extractor/cbssports.py index ae47e74cc..549ae32f3 100644 --- a/youtube_dl/extractor/cbssports.py +++ b/youtube_dl/extractor/cbssports.py @@ -6,7 +6,7 @@ from .common import InfoExtractor  class CBSSportsIE(InfoExtractor): -    _VALID_URL = r'http://www\.cbssports\.com/video/player/(?P<section>[^/]+)/(?P<id>[^/]+)' +    _VALID_URL = r'https?://www\.cbssports\.com/video/player/(?P<section>[^/]+)/(?P<id>[^/]+)'      _TEST = {          'url': 'http://www.cbssports.com/video/player/tennis/318462531970/0/us-open-flashbacks-1990s', diff --git a/youtube_dl/extractor/cda.py b/youtube_dl/extractor/cda.py new file mode 100755 index 000000000..498d2c0d8 --- /dev/null +++ b/youtube_dl/extractor/cda.py @@ -0,0 +1,96 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( +    decode_packed_codes, +    ExtractorError, +    parse_duration +) + + +class CDAIE(InfoExtractor): +    _VALID_URL = r'https?://(?:(?:www\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P<id>[0-9a-z]+)' +    _TESTS = [{ +        'url': 'http://www.cda.pl/video/5749950c', +        'md5': '6f844bf51b15f31fae165365707ae970', +        'info_dict': { +            'id': '5749950c', +            'ext': 'mp4', +            'height': 720, +            'title': 'Oto dlaczego przed zakrętem należy zwolnić.', +            'duration': 39 +        } +    }, { +        'url': 'http://www.cda.pl/video/57413289', +        'md5': 'a88828770a8310fc00be6c95faf7f4d5', +        'info_dict': { +            'id': '57413289', +            'ext': 'mp4', +            'title': 'Lądowanie na lotnisku na Maderze', +            'duration': 137 +        } +    }, { +        'url': 'http://ebd.cda.pl/0x0/5749950c', +        'only_matching': True, +    }] + +    def _real_extract(self, url): +        video_id = self._match_id(url) +        webpage = self._download_webpage('http://ebd.cda.pl/0x0/' + video_id, video_id) + +        if 'Ten film jest dostępny dla użytkowników premium' in webpage: +            raise ExtractorError('This video is only available for premium users.', expected=True) + +        title = self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title') + +        formats = [] + +        info_dict = { +            'id': video_id, +            'title': title, +            'formats': formats, +            'duration': None, +        } + +        def extract_format(page, version): +            unpacked = decode_packed_codes(page) +            format_url = self._search_regex( +                r"url:\\'(.+?)\\'", unpacked, '%s url' % version, fatal=False) +            if not format_url: +                return +            f = { +                'url': format_url, +            } +            m = re.search( +                r'<a[^>]+data-quality="(?P<format_id>[^"]+)"[^>]+href="[^"]+"[^>]+class="[^"]*quality-btn-active[^"]*">(?P<height>[0-9]+)p', +                page) +            if m: +                f.update({ +                    'format_id': m.group('format_id'), +                    'height': int(m.group('height')), +                }) +            info_dict['formats'].append(f) +            if not info_dict['duration']: +                info_dict['duration'] = parse_duration(self._search_regex( +                    r"duration:\\'(.+?)\\'", unpacked, 'duration', fatal=False)) + +        extract_format(webpage, 'default') + +        for href, resolution in re.findall( +                r'<a[^>]+data-quality="[^"]+"[^>]+href="([^"]+)"[^>]+class="quality-btn"[^>]*>([0-9]+p)', +                webpage): +            webpage = self._download_webpage( +                href, video_id, 'Downloading %s version information' % resolution, fatal=False) +            if not webpage: +                # Manually report warning because empty page is returned when +                # invalid version is requested. +                self.report_warning('Unable to download %s version information' % resolution) +                continue +            extract_format(webpage, resolution) + +        self._sort_formats(formats) + +        return info_dict diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index b27b4e670..b355111cb 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -129,7 +129,8 @@ class CeskaTelevizeIE(InfoExtractor):              formats = []              for format_id, stream_url in item['streamUrls'].items():                  formats.extend(self._extract_m3u8_formats( -                    stream_url, playlist_id, 'mp4', entry_protocol='m3u8_native')) +                    stream_url, playlist_id, 'mp4', +                    entry_protocol='m3u8_native', fatal=False))              self._sort_formats(formats)              item_id = item.get('id') or item['assetId'] diff --git a/youtube_dl/extractor/cliphunter.py b/youtube_dl/extractor/cliphunter.py index 2996b6b09..19f8b397e 100644 --- a/youtube_dl/extractor/cliphunter.py +++ b/youtube_dl/extractor/cliphunter.py @@ -19,7 +19,7 @@ def _decode(s):  class CliphunterIE(InfoExtractor):      IE_NAME = 'cliphunter' -    _VALID_URL = r'''(?x)http://(?:www\.)?cliphunter\.com/w/ +    _VALID_URL = r'''(?x)https?://(?:www\.)?cliphunter\.com/w/          (?P<id>[0-9]+)/          (?P<seo>.+?)(?:$|[#\?])      ''' diff --git a/youtube_dl/extractor/clipsyndicate.py b/youtube_dl/extractor/clipsyndicate.py index 8306d6fb7..0b6ad895f 100644 --- a/youtube_dl/extractor/clipsyndicate.py +++ b/youtube_dl/extractor/clipsyndicate.py @@ -8,7 +8,7 @@ from ..utils import (  class ClipsyndicateIE(InfoExtractor): -    _VALID_URL = r'http://(?:chic|www)\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)' +    _VALID_URL = r'https?://(?:chic|www)\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)'      _TESTS = [{          'url': 'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe', diff --git a/youtube_dl/extractor/clubic.py b/youtube_dl/extractor/clubic.py index 1dfa7c12e..2fba93543 100644 --- a/youtube_dl/extractor/clubic.py +++ b/youtube_dl/extractor/clubic.py @@ -12,7 +12,7 @@ from ..utils import (  class ClubicIE(InfoExtractor): -    _VALID_URL = r'http://(?:www\.)?clubic\.com/video/(?:[^/]+/)*video.*-(?P<id>[0-9]+)\.html' +    _VALID_URL = r'https?://(?:www\.)?clubic\.com/video/(?:[^/]+/)*video.*-(?P<id>[0-9]+)\.html'      _TESTS = [{          'url': 'http://www.clubic.com/video/clubic-week/video-clubic-week-2-0-le-fbi-se-lance-dans-la-photo-d-identite-448474.html', diff --git a/youtube_dl/extractor/comcarcoff.py b/youtube_dl/extractor/comcarcoff.py index 7dff68492..e697d1410 100644 --- a/youtube_dl/extractor/comcarcoff.py +++ b/youtube_dl/extractor/comcarcoff.py @@ -11,7 +11,7 @@ from ..utils import (  class ComCarCoffIE(InfoExtractor): -    _VALID_URL = r'http://(?:www\.)?comediansincarsgettingcoffee\.com/(?P<id>[a-z0-9\-]*)' +    _VALID_URL = r'https?://(?:www\.)?comediansincarsgettingcoffee\.com/(?P<id>[a-z0-9\-]*)'      _TESTS = [{          'url': 'http://comediansincarsgettingcoffee.com/miranda-sings-happy-thanksgiving-miranda/',          'info_dict': { diff --git a/youtube_dl/extractor/condenast.py b/youtube_dl/extractor/condenast.py index 6f92ae2ed..054978ff2 100644 --- a/youtube_dl/extractor/condenast.py +++ b/youtube_dl/extractor/condenast.py @@ -45,7 +45,7 @@ class CondeNastIE(InfoExtractor):          'wmagazine': 'W Magazine',      } -    _VALID_URL = r'http://(?:video|www|player)\.(?P<site>%s)\.com/(?P<type>watch|series|video|embed(?:js)?)/(?P<id>[^/?#]+)' % '|'.join(_SITES.keys()) +    _VALID_URL = r'https?://(?:video|www|player)\.(?P<site>%s)\.com/(?P<type>watch|series|video|embed(?:js)?)/(?P<id>[^/?#]+)' % '|'.join(_SITES.keys())      IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values()))      EMBED_URL = r'(?:https?:)?//player\.(?P<site>%s)\.com/(?P<type>embed(?:js)?)/.+?' % '|'.join(_SITES.keys()) diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py index b8b9d058d..84b36f44c 100644 --- a/youtube_dl/extractor/cspan.py +++ b/youtube_dl/extractor/cspan.py @@ -15,7 +15,7 @@ from .senateisvp import SenateISVPIE  class CSpanIE(InfoExtractor): -    _VALID_URL = r'http://(?:www\.)?c-span\.org/video/\?(?P<id>[0-9a-f]+)' +    _VALID_URL = r'https?://(?:www\.)?c-span\.org/video/\?(?P<id>[0-9a-f]+)'      IE_DESC = 'C-SPAN'      _TESTS = [{          'url': 'http://www.c-span.org/video/?313572-1/HolderonV', diff --git a/youtube_dl/extractor/ctsnews.py b/youtube_dl/extractor/ctsnews.py index 45049bf37..1622fc844 100644 --- a/youtube_dl/extractor/ctsnews.py +++ b/youtube_dl/extractor/ctsnews.py @@ -8,7 +8,7 @@ from ..utils import parse_iso8601, ExtractorError  class CtsNewsIE(InfoExtractor):      IE_DESC = '華視新聞'      # https connection failed (Connection reset) -    _VALID_URL = r'http://news\.cts\.com\.tw/[a-z]+/[a-z]+/\d+/(?P<id>\d+)\.html' +    _VALID_URL = r'https?://news\.cts\.com\.tw/[a-z]+/[a-z]+/\d+/(?P<id>\d+)\.html'      _TESTS = [{          'url': 'http://news.cts.com.tw/cts/international/201501/201501291578109.html',          'md5': 'a9875cb790252b08431186d741beaabe', diff --git a/youtube_dl/extractor/dctp.py b/youtube_dl/extractor/dctp.py index aa2c09eb6..9099f5046 100644 --- a/youtube_dl/extractor/dctp.py +++ b/youtube_dl/extractor/dctp.py @@ -6,7 +6,7 @@ from ..compat import compat_str  class DctpTvIE(InfoExtractor): -    _VALID_URL = r'http://www.dctp.tv/(#/)?filme/(?P<id>.+?)/$' +    _VALID_URL = r'https?://www.dctp.tv/(#/)?filme/(?P<id>.+?)/$'      _TEST = {          'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',          'info_dict': { diff --git a/youtube_dl/extractor/defense.py b/youtube_dl/extractor/defense.py index 98e3aedfd..9fe144e14 100644 --- a/youtube_dl/extractor/defense.py +++ b/youtube_dl/extractor/defense.py @@ -5,7 +5,7 @@ from .common import InfoExtractor  class DefenseGouvFrIE(InfoExtractor):      IE_NAME = 'defense.gouv.fr' -    _VALID_URL = r'http://.*?\.defense\.gouv\.fr/layout/set/ligthboxvideo/base-de-medias/webtv/(?P<id>[^/?#]*)' +    _VALID_URL = r'https?://.*?\.defense\.gouv\.fr/layout/set/ligthboxvideo/base-de-medias/webtv/(?P<id>[^/?#]*)'      _TEST = {          'url': 'http://www.defense.gouv.fr/layout/set/ligthboxvideo/base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1', diff --git a/youtube_dl/extractor/discovery.py b/youtube_dl/extractor/discovery.py index ce680a9f3..fdce1429a 100644 --- a/youtube_dl/extractor/discovery.py +++ b/youtube_dl/extractor/discovery.py @@ -9,7 +9,7 @@ from ..compat import compat_str  class DiscoveryIE(InfoExtractor): -    _VALID_URL = r'''(?x)http://(?:www\.)?(?: +    _VALID_URL = r'''(?x)https?://(?:www\.)?(?:              discovery|              investigationdiscovery|              discoverylife| diff --git a/youtube_dl/extractor/douyutv.py b/youtube_dl/extractor/douyutv.py index bdc768c78..bcb670945 100644 --- a/youtube_dl/extractor/douyutv.py +++ b/youtube_dl/extractor/douyutv.py @@ -10,7 +10,7 @@ from ..compat import (compat_str, compat_basestring)  class DouyuTVIE(InfoExtractor):      IE_DESC = '斗鱼' -    _VALID_URL = r'http://(?:www\.)?douyutv\.com/(?P<id>[A-Za-z0-9]+)' +    _VALID_URL = r'https?://(?:www\.)?douyutv\.com/(?P<id>[A-Za-z0-9]+)'      _TESTS = [{          'url': 'http://www.douyutv.com/iseven',          'info_dict': { diff --git a/youtube_dl/extractor/dplay.py b/youtube_dl/extractor/dplay.py index a638c827c..1e7dcada6 100644 --- a/youtube_dl/extractor/dplay.py +++ b/youtube_dl/extractor/dplay.py @@ -10,7 +10,7 @@ from ..utils import int_or_none  class DPlayIE(InfoExtractor): -    _VALID_URL = r'http://(?P<domain>it\.dplay\.com|www\.dplay\.(?:dk|se|no))/[^/]+/(?P<id>[^/?#]+)' +    _VALID_URL = r'https?://(?P<domain>it\.dplay\.com|www\.dplay\.(?:dk|se|no))/[^/]+/(?P<id>[^/?#]+)'      _TESTS = [{          'url': 'http://it.dplay.com/take-me-out/stagione-1-episodio-25/', diff --git a/youtube_dl/extractor/dreisat.py b/youtube_dl/extractor/dreisat.py index 028144f20..0040e70d4 100644 --- a/youtube_dl/extractor/dreisat.py +++ b/youtube_dl/extractor/dreisat.py @@ -7,7 +7,7 @@ from .zdf import ZDFIE  class DreiSatIE(ZDFIE):      IE_NAME = '3sat' -    _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php|mediathek\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$' +    _VALID_URL = r'(?:https?://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php|mediathek\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'      _TESTS = [          {              'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918', diff --git a/youtube_dl/extractor/dvtv.py b/youtube_dl/extractor/dvtv.py index c1a4bc757..974c69dbc 100644 --- a/youtube_dl/extractor/dvtv.py +++ b/youtube_dl/extractor/dvtv.py @@ -15,7 +15,7 @@ class DVTVIE(InfoExtractor):      IE_NAME = 'dvtv'      IE_DESC = 'http://video.aktualne.cz/' -    _VALID_URL = r'http://video\.aktualne\.cz/(?:[^/]+/)+r~(?P<id>[0-9a-f]{32})' +    _VALID_URL = r'https?://video\.aktualne\.cz/(?:[^/]+/)+r~(?P<id>[0-9a-f]{32})'      _TESTS = [{          'url': 'http://video.aktualne.cz/dvtv/vondra-o-ceskem-stoleti-pri-pohledu-na-havla-mi-bylo-trapne/r~e5efe9ca855511e4833a0025900fea04/', diff --git a/youtube_dl/extractor/echomsk.py b/youtube_dl/extractor/echomsk.py index d2d94049d..6b7cc652f 100644 --- a/youtube_dl/extractor/echomsk.py +++ b/youtube_dl/extractor/echomsk.py @@ -7,7 +7,7 @@ from .common import InfoExtractor  class EchoMskIE(InfoExtractor): -    _VALID_URL = r'http://(?:www\.)?echo\.msk\.ru/sounds/(?P<id>\d+)' +    _VALID_URL = r'https?://(?:www\.)?echo\.msk\.ru/sounds/(?P<id>\d+)'      _TEST = {          'url': 'http://www.echo.msk.ru/sounds/1464134.html',          'md5': '2e44b3b78daff5b458e4dbc37f191f7c', diff --git a/youtube_dl/extractor/exfm.py b/youtube_dl/extractor/exfm.py index 0c0fe6d65..09ed4f2b5 100644 --- a/youtube_dl/extractor/exfm.py +++ b/youtube_dl/extractor/exfm.py @@ -8,7 +8,7 @@ from .common import InfoExtractor  class ExfmIE(InfoExtractor):      IE_NAME = 'exfm'      IE_DESC = 'ex.fm' -    _VALID_URL = r'http://(?:www\.)?ex\.fm/song/(?P<id>[^/]+)' +    _VALID_URL = r'https?://(?:www\.)?ex\.fm/song/(?P<id>[^/]+)'      _SOUNDCLOUD_URL = r'http://(?:www\.)?api\.soundcloud\.com/tracks/([^/]+)/stream'      _TESTS = [          { diff --git a/youtube_dl/extractor/fc2.py b/youtube_dl/extractor/fc2.py index 9580f5c0c..508684d2e 100644 --- a/youtube_dl/extractor/fc2.py +++ b/youtube_dl/extractor/fc2.py @@ -17,7 +17,7 @@ from ..utils import (  class FC2IE(InfoExtractor): -    _VALID_URL = r'^http://video\.fc2\.com/(?:[^/]+/)*content/(?P<id>[^/]+)' +    _VALID_URL = r'^https?://video\.fc2\.com/(?:[^/]+/)*content/(?P<id>[^/]+)'      IE_NAME = 'fc2'      _NETRC_MACHINE = 'fc2'      _TESTS = [{ diff --git a/youtube_dl/extractor/firstpost.py b/youtube_dl/extractor/firstpost.py index 298227d57..e8936cb24 100644 --- a/youtube_dl/extractor/firstpost.py +++ b/youtube_dl/extractor/firstpost.py @@ -4,7 +4,7 @@ from .common import InfoExtractor  class FirstpostIE(InfoExtractor): -    _VALID_URL = r'http://(?:www\.)?firstpost\.com/[^/]+/.*-(?P<id>[0-9]+)\.html' +    _VALID_URL = r'https?://(?:www\.)?firstpost\.com/[^/]+/.*-(?P<id>[0-9]+)\.html'      _TEST = {          'url': 'http://www.firstpost.com/india/india-to-launch-indigenous-aircraft-carrier-monday-1025403.html', diff --git a/youtube_dl/extractor/firsttv.py b/youtube_dl/extractor/firsttv.py index 510d4b108..98b165143 100644 --- a/youtube_dl/extractor/firsttv.py +++ b/youtube_dl/extractor/firsttv.py @@ -8,7 +8,7 @@ from ..utils import int_or_none  class FirstTVIE(InfoExtractor):      IE_NAME = '1tv'      IE_DESC = 'Первый канал' -    _VALID_URL = r'http://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P<id>.+)' +    _VALID_URL = r'https?://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P<id>.+)'      _TESTS = [{          'url': 'http://www.1tv.ru/videoarchive/73390', diff --git a/youtube_dl/extractor/fktv.py b/youtube_dl/extractor/fktv.py index 5f6e65dae..a3a291599 100644 --- a/youtube_dl/extractor/fktv.py +++ b/youtube_dl/extractor/fktv.py @@ -10,7 +10,7 @@ from ..utils import (  class FKTVIE(InfoExtractor):      IE_NAME = 'fernsehkritik.tv' -    _VALID_URL = r'http://(?:www\.)?fernsehkritik\.tv/folge-(?P<id>[0-9]+)(?:/.*)?' +    _VALID_URL = r'https?://(?:www\.)?fernsehkritik\.tv/folge-(?P<id>[0-9]+)(?:/.*)?'      _TEST = {          'url': 'http://fernsehkritik.tv/folge-1', diff --git a/youtube_dl/extractor/footyroom.py b/youtube_dl/extractor/footyroom.py index 370fd006f..d2503ae2e 100644 --- a/youtube_dl/extractor/footyroom.py +++ b/youtube_dl/extractor/footyroom.py @@ -5,7 +5,7 @@ from .common import InfoExtractor  class FootyRoomIE(InfoExtractor): -    _VALID_URL = r'http://footyroom\.com/(?P<id>[^/]+)' +    _VALID_URL = r'https?://footyroom\.com/(?P<id>[^/]+)'      _TESTS = [{          'url': 'http://footyroom.com/schalke-04-0-2-real-madrid-2015-02/',          'info_dict': { diff --git a/youtube_dl/extractor/foxgay.py b/youtube_dl/extractor/foxgay.py index 08b8ea362..70c1a815d 100644 --- a/youtube_dl/extractor/foxgay.py +++ b/youtube_dl/extractor/foxgay.py @@ -4,7 +4,7 @@ from .common import InfoExtractor  class FoxgayIE(InfoExtractor): -    _VALID_URL = r'http://(?:www\.)?foxgay\.com/videos/(?:\S+-)?(?P<id>\d+)\.shtml' +    _VALID_URL = r'https?://(?:www\.)?foxgay\.com/videos/(?:\S+-)?(?P<id>\d+)\.shtml'      _TEST = {          'url': 'http://foxgay.com/videos/fuck-turkish-style-2582.shtml',          'md5': '80d72beab5d04e1655a56ad37afe6841', diff --git a/youtube_dl/extractor/franceinter.py b/youtube_dl/extractor/franceinter.py index 0388ba00c..2369f868d 100644 --- a/youtube_dl/extractor/franceinter.py +++ b/youtube_dl/extractor/franceinter.py @@ -6,7 +6,7 @@ from ..utils import int_or_none  class FranceInterIE(InfoExtractor): -    _VALID_URL = r'http://(?:www\.)?franceinter\.fr/player/reecouter\?play=(?P<id>[0-9]+)' +    _VALID_URL = r'https?://(?:www\.)?franceinter\.fr/player/reecouter\?play=(?P<id>[0-9]+)'      _TEST = {          'url': 'http://www.franceinter.fr/player/reecouter?play=793962',          'md5': '4764932e466e6f6c79c317d2e74f6884', diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index 3f4ac3093..ad94e31f3 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -60,28 +60,31 @@ class FranceTVBaseInfoExtractor(InfoExtractor):                      video_id, 'Downloading f4m manifest token', fatal=False)                  if f4m_url:                      formats.extend(self._extract_f4m_formats( -                        f4m_url + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id, 1, format_id)) +                        f4m_url + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44', +                        video_id, f4m_id=format_id, fatal=False))              elif ext == 'm3u8': -                formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4', m3u8_id=format_id)) +                formats.extend(self._extract_m3u8_formats( +                    video_url, video_id, 'mp4', entry_protocol='m3u8_native', +                    m3u8_id=format_id, fatal=False))              elif video_url.startswith('rtmp'):                  formats.append({                      'url': video_url,                      'format_id': 'rtmp-%s' % format_id,                      'ext': 'flv', -                    'preference': 1,                  })              else: -                formats.append({ -                    'url': video_url, -                    'format_id': format_id, -                    'preference': -1, -                }) +                if self._is_valid_url(video_url, video_id, format_id): +                    formats.append({ +                        'url': video_url, +                        'format_id': format_id, +                    })          self._sort_formats(formats)          title = info['titre']          subtitle = info.get('sous_titre')          if subtitle:              title += ' - %s' % subtitle +        title = title.strip()          subtitles = {}          subtitles_list = [{ @@ -125,13 +128,13 @@ class PluzzIE(FranceTVBaseInfoExtractor):  class FranceTvInfoIE(FranceTVBaseInfoExtractor):      IE_NAME = 'francetvinfo.fr' -    _VALID_URL = r'https?://(?:www|mobile)\.francetvinfo\.fr/.*/(?P<title>.+)\.html' +    _VALID_URL = r'https?://(?:www|mobile|france3-regions)\.francetvinfo\.fr/.*/(?P<title>.+)\.html'      _TESTS = [{          'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',          'info_dict': {              'id': '84981923', -            'ext': 'flv', +            'ext': 'mp4',              'title': 'Soir 3',              'upload_date': '20130826',              'timestamp': 1377548400, @@ -139,6 +142,10 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):                  'fr': 'mincount:2',              },          }, +        'params': { +            # m3u8 downloads +            'skip_download': True, +        },      }, {          'url': 'http://www.francetvinfo.fr/elections/europeennes/direct-europeennes-regardez-le-debat-entre-les-candidats-a-la-presidence-de-la-commission_600639.html',          'info_dict': { @@ -155,11 +162,32 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):          'url': 'http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html',          'md5': 'f485bda6e185e7d15dbc69b72bae993e',          'info_dict': { -            'id': '556e03339473995ee145930c', +            'id': 'NI_173343',              'ext': 'mp4',              'title': 'Les entreprises familiales : le secret de la réussite',              'thumbnail': 're:^https?://.*\.jpe?g$', -        } +            'timestamp': 1433273139, +            'upload_date': '20150602', +        }, +        'params': { +            # m3u8 downloads +            'skip_download': True, +        }, +    }, { +        'url': 'http://france3-regions.francetvinfo.fr/bretagne/cotes-d-armor/thalassa-echappee-breizh-ce-venredi-dans-les-cotes-d-armor-954961.html', +        'md5': 'f485bda6e185e7d15dbc69b72bae993e', +        'info_dict': { +            'id': 'NI_657393', +            'ext': 'mp4', +            'title': 'Olivier Monthus, réalisateur de "Bretagne, le choix de l’Armor"', +            'description': 'md5:a3264114c9d29aeca11ced113c37b16c', +            'thumbnail': 're:^https?://.*\.jpe?g$', +            'timestamp': 1458300695, +            'upload_date': '20160318', +        }, +        'params': { +            'skip_download': True, +        },      }]      def _real_extract(self, url): @@ -172,7 +200,9 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):              return self.url_result(dmcloud_url, 'DailymotionCloud')          video_id, catalogue = self._search_regex( -            r'id-video=([^@]+@[^"]+)', webpage, 'video id').split('@') +            (r'id-video=([^@]+@[^"]+)', +             r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"'), +            webpage, 'video id').split('@')          return self._extract_video(video_id, catalogue) diff --git a/youtube_dl/extractor/freevideo.py b/youtube_dl/extractor/freevideo.py index c7bec027b..cd8423a6f 100644 --- a/youtube_dl/extractor/freevideo.py +++ b/youtube_dl/extractor/freevideo.py @@ -5,7 +5,7 @@ from ..utils import ExtractorError  class FreeVideoIE(InfoExtractor): -    _VALID_URL = r'^http://www.freevideo.cz/vase-videa/(?P<id>[^.]+)\.html(?:$|[?#])' +    _VALID_URL = r'^https?://www.freevideo.cz/vase-videa/(?P<id>[^.]+)\.html(?:$|[?#])'      _TEST = {          'url': 'http://www.freevideo.cz/vase-videa/vysukany-zadecek-22033.html', diff --git a/youtube_dl/extractor/gamekings.py b/youtube_dl/extractor/gamekings.py index f6b9046f9..cbcddcb7c 100644 --- a/youtube_dl/extractor/gamekings.py +++ b/youtube_dl/extractor/gamekings.py @@ -10,7 +10,7 @@ from .youtube import YoutubeIE  class GamekingsIE(InfoExtractor): -    _VALID_URL = r'http://www\.gamekings\.nl/(?:videos|nieuws)/(?P<id>[^/]+)' +    _VALID_URL = r'https?://www\.gamekings\.nl/(?:videos|nieuws)/(?P<id>[^/]+)'      _TESTS = [{          # YouTube embed video          'url': 'http://www.gamekings.nl/videos/phoenix-wright-ace-attorney-dual-destinies-review/', diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py index b3f1bafcc..4ffdd7515 100644 --- a/youtube_dl/extractor/gamespot.py +++ b/youtube_dl/extractor/gamespot.py @@ -14,7 +14,7 @@ from ..utils import (  class GameSpotIE(InfoExtractor): -    _VALID_URL = r'http://(?:www\.)?gamespot\.com/.*-(?P<id>\d+)/?' +    _VALID_URL = r'https?://(?:www\.)?gamespot\.com/.*-(?P<id>\d+)/?'      _TESTS = [{          'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/',          'md5': 'b2a30deaa8654fcccd43713a6b6a4825', diff --git a/youtube_dl/extractor/gamestar.py b/youtube_dl/extractor/gamestar.py index 590ccf526..69058a583 100644 --- a/youtube_dl/extractor/gamestar.py +++ b/youtube_dl/extractor/gamestar.py @@ -13,7 +13,7 @@ from ..utils import (  class GameStarIE(InfoExtractor): -    _VALID_URL = r'http://www\.gamestar\.de/videos/.*,(?P<id>[0-9]+)\.html' +    _VALID_URL = r'https?://www\.gamestar\.de/videos/.*,(?P<id>[0-9]+)\.html'      _TEST = {          'url': 'http://www.gamestar.de/videos/trailer,3/hobbit-3-die-schlacht-der-fuenf-heere,76110.html',          'md5': '96974ecbb7fd8d0d20fca5a00810cea7', diff --git a/youtube_dl/extractor/gametrailers.py b/youtube_dl/extractor/gametrailers.py index c3f031d9c..1e7948ab8 100644 --- a/youtube_dl/extractor/gametrailers.py +++ b/youtube_dl/extractor/gametrailers.py @@ -9,7 +9,7 @@ from ..utils import (  class GametrailersIE(InfoExtractor): -    _VALID_URL = r'http://www\.gametrailers\.com/videos/view/[^/]+/(?P<id>.+)' +    _VALID_URL = r'https?://www\.gametrailers\.com/videos/view/[^/]+/(?P<id>.+)'      _TEST = {          'url': 'http://www.gametrailers.com/videos/view/gametrailers-com/116437-Just-Cause-3-Review', diff --git a/youtube_dl/extractor/hotnewhiphop.py b/youtube_dl/extractor/hotnewhiphop.py index 31e219945..efc3e8429 100644 --- a/youtube_dl/extractor/hotnewhiphop.py +++ b/youtube_dl/extractor/hotnewhiphop.py @@ -12,7 +12,7 @@ from ..utils import (  class HotNewHipHopIE(InfoExtractor): -    _VALID_URL = r'http://www\.hotnewhiphop\.com/.*\.(?P<id>.*)\.html' +    _VALID_URL = r'https?://www\.hotnewhiphop\.com/.*\.(?P<id>.*)\.html'      _TEST = {          'url': 'http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html',          'md5': '2c2cd2f76ef11a9b3b581e8b232f3d96', diff --git a/youtube_dl/extractor/hypem.py b/youtube_dl/extractor/hypem.py index b3706fe6d..e0ab31802 100644 --- a/youtube_dl/extractor/hypem.py +++ b/youtube_dl/extractor/hypem.py @@ -12,7 +12,7 @@ from ..utils import (  class HypemIE(InfoExtractor): -    _VALID_URL = r'http://(?:www\.)?hypem\.com/track/(?P<id>[^/]+)/' +    _VALID_URL = r'https?://(?:www\.)?hypem\.com/track/(?P<id>[^/]+)/'      _TEST = {          'url': 'http://hypem.com/track/1v6ga/BODYWORK+-+TAME',          'md5': 'b9cc91b5af8995e9f0c1cee04c575828', diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py index b61b2dc4e..8bed8ccd0 100644 --- a/youtube_dl/extractor/imdb.py +++ b/youtube_dl/extractor/imdb.py @@ -12,7 +12,7 @@ from ..utils import (  class ImdbIE(InfoExtractor):      IE_NAME = 'imdb'      IE_DESC = 'Internet Movie Database trailers' -    _VALID_URL = r'http://(?:www|m)\.imdb\.com/video/imdb/vi(?P<id>\d+)' +    _VALID_URL = r'https?://(?:www|m)\.imdb\.com/video/imdb/vi(?P<id>\d+)'      _TEST = {          'url': 'http://www.imdb.com/video/imdb/vi2524815897', @@ -70,7 +70,7 @@ class ImdbIE(InfoExtractor):  class ImdbListIE(InfoExtractor):      IE_NAME = 'imdb:list'      IE_DESC = 'Internet Movie Database lists' -    _VALID_URL = r'http://www\.imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})' +    _VALID_URL = r'https?://www\.imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})'      _TEST = {          'url': 'http://www.imdb.com/list/JFs9NWw6XI0',          'info_dict': { diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index e7c0cb3f6..1a4c64713 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -165,7 +165,7 @@ class IqiyiIE(InfoExtractor):      IE_NAME = 'iqiyi'      IE_DESC = '爱奇艺' -    _VALID_URL = r'http://(?:[^.]+\.)?iqiyi\.com/.+\.html' +    _VALID_URL = r'https?://(?:[^.]+\.)?iqiyi\.com/.+\.html'      _NETRC_MACHINE = 'iqiyi' diff --git a/youtube_dl/extractor/jadorecettepub.py b/youtube_dl/extractor/jadorecettepub.py index 063e86de4..158c09a33 100644 --- a/youtube_dl/extractor/jadorecettepub.py +++ b/youtube_dl/extractor/jadorecettepub.py @@ -9,7 +9,7 @@ from .youtube import YoutubeIE  class JadoreCettePubIE(InfoExtractor): -    _VALID_URL = r'http://(?:www\.)?jadorecettepub\.com/[0-9]{4}/[0-9]{2}/(?P<id>.*?)\.html' +    _VALID_URL = r'https?://(?:www\.)?jadorecettepub\.com/[0-9]{4}/[0-9]{2}/(?P<id>.*?)\.html'      _TEST = {          'url': 'http://www.jadorecettepub.com/2010/12/star-wars-massacre-par-les-japonais.html', diff --git a/youtube_dl/extractor/jeuxvideo.py b/youtube_dl/extractor/jeuxvideo.py index 137db873c..1a4227f6b 100644 --- a/youtube_dl/extractor/jeuxvideo.py +++ b/youtube_dl/extractor/jeuxvideo.py @@ -8,7 +8,7 @@ from .common import InfoExtractor  class JeuxVideoIE(InfoExtractor): -    _VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)\.htm' +    _VALID_URL = r'https?://.*?\.jeuxvideo\.com/.*/(.*?)\.htm'      _TESTS = [{          'url': 'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm', diff --git a/youtube_dl/extractor/karaoketv.py b/youtube_dl/extractor/karaoketv.py index 06daf5a89..b4c30b7f3 100644 --- a/youtube_dl/extractor/karaoketv.py +++ b/youtube_dl/extractor/karaoketv.py @@ -9,7 +9,7 @@ from ..utils import (  class KaraoketvIE(InfoExtractor): -    _VALID_URL = r'http://karaoketv\.co\.il/\?container=songs&id=(?P<id>[0-9]+)' +    _VALID_URL = r'https?://karaoketv\.co\.il/\?container=songs&id=(?P<id>[0-9]+)'      _TEST = {          'url': 'http://karaoketv.co.il/?container=songs&id=171568',          'info_dict': { diff --git a/youtube_dl/extractor/karrierevideos.py b/youtube_dl/extractor/karrierevideos.py index bed94bc93..2cb04e533 100644 --- a/youtube_dl/extractor/karrierevideos.py +++ b/youtube_dl/extractor/karrierevideos.py @@ -12,7 +12,7 @@ from ..utils import (  class KarriereVideosIE(InfoExtractor): -    _VALID_URL = r'http://(?:www\.)?karrierevideos\.at(?:/[^/]+)+/(?P<id>[^/]+)' +    _VALID_URL = r'https?://(?:www\.)?karrierevideos\.at(?:/[^/]+)+/(?P<id>[^/]+)'      _TESTS = [{          'url': 'http://www.karrierevideos.at/berufsvideos/mittlere-hoehere-schulen/altenpflegerin',          'info_dict': { diff --git a/youtube_dl/extractor/kontrtube.py b/youtube_dl/extractor/kontrtube.py index a59c529f4..704bd7b34 100644 --- a/youtube_dl/extractor/kontrtube.py +++ b/youtube_dl/extractor/kontrtube.py @@ -13,7 +13,7 @@ from ..utils import (  class KontrTubeIE(InfoExtractor):      IE_NAME = 'kontrtube'      IE_DESC = 'KontrTube.ru - Труба зовёт' -    _VALID_URL = r'http://(?:www\.)?kontrtube\.ru/videos/(?P<id>\d+)/(?P<display_id>[^/]+)/' +    _VALID_URL = r'https?://(?:www\.)?kontrtube\.ru/videos/(?P<id>\d+)/(?P<display_id>[^/]+)/'      _TEST = {          'url': 'http://www.kontrtube.ru/videos/2678/nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag/', diff --git a/youtube_dl/extractor/ku6.py b/youtube_dl/extractor/ku6.py index a602980a1..a574408e5 100644 --- a/youtube_dl/extractor/ku6.py +++ b/youtube_dl/extractor/ku6.py @@ -4,7 +4,7 @@ from .common import InfoExtractor  class Ku6IE(InfoExtractor): -    _VALID_URL = r'http://v\.ku6\.com/show/(?P<id>[a-zA-Z0-9\-\_]+)(?:\.)*html' +    _VALID_URL = r'https?://v\.ku6\.com/show/(?P<id>[a-zA-Z0-9\-\_]+)(?:\.)*html'      _TEST = {          'url': 'http://v.ku6.com/show/JG-8yS14xzBr4bCn1pu0xw...html',          'md5': '01203549b9efbb45f4b87d55bdea1ed1', diff --git a/youtube_dl/extractor/kusi.py b/youtube_dl/extractor/kusi.py index 931f34c9b..12cc56e44 100644 --- a/youtube_dl/extractor/kusi.py +++ b/youtube_dl/extractor/kusi.py @@ -16,7 +16,7 @@ from ..utils import (  class KUSIIE(InfoExtractor): -    _VALID_URL = r'http://(?:www\.)?kusi\.com/(?P<path>story/.+|video\?clipId=(?P<clipId>\d+))' +    _VALID_URL = r'https?://(?:www\.)?kusi\.com/(?P<path>story/.+|video\?clipId=(?P<clipId>\d+))'      _TESTS = [{          'url': 'http://www.kusi.com/story/31183873/turko-files-case-closed-put-on-hold',          'md5': 'f926e7684294cf8cb7bdf8858e1b3988', diff --git a/youtube_dl/extractor/kuwo.py b/youtube_dl/extractor/kuwo.py index f94804d06..a586308b2 100644 --- a/youtube_dl/extractor/kuwo.py +++ b/youtube_dl/extractor/kuwo.py @@ -2,13 +2,13 @@  from __future__ import unicode_literals  import re -import itertools  from .common import InfoExtractor  from ..utils import (      get_element_by_id,      clean_html,      ExtractorError, +    InAdvancePagedList,      remove_start,  ) @@ -55,7 +55,7 @@ class KuwoBaseIE(InfoExtractor):  class KuwoIE(KuwoBaseIE):      IE_NAME = 'kuwo:song'      IE_DESC = '酷我音乐' -    _VALID_URL = r'http://www\.kuwo\.cn/yinyue/(?P<id>\d+?)/' +    _VALID_URL = r'https?://www\.kuwo\.cn/yinyue/(?P<id>\d+?)'      _TESTS = [{          'url': 'http://www.kuwo.cn/yinyue/635632/',          'info_dict': { @@ -80,6 +80,9 @@ class KuwoIE(KuwoBaseIE):          'params': {              'format': 'mp3-320'          }, +    }, { +        'url': 'http://www.kuwo.cn/yinyue/3197154?catalog=yueku2016', +        'only_matching': True,      }]      def _real_extract(self, url): @@ -131,7 +134,7 @@ class KuwoIE(KuwoBaseIE):  class KuwoAlbumIE(InfoExtractor):      IE_NAME = 'kuwo:album'      IE_DESC = '酷我音乐 - 专辑' -    _VALID_URL = r'http://www\.kuwo\.cn/album/(?P<id>\d+?)/' +    _VALID_URL = r'https?://www\.kuwo\.cn/album/(?P<id>\d+?)/'      _TEST = {          'url': 'http://www.kuwo.cn/album/502294/',          'info_dict': { @@ -167,13 +170,11 @@ class KuwoAlbumIE(InfoExtractor):  class KuwoChartIE(InfoExtractor):      IE_NAME = 'kuwo:chart'      IE_DESC = '酷我音乐 - 排行榜' -    _VALID_URL = r'http://yinyue\.kuwo\.cn/billboard_(?P<id>[^.]+).htm' +    _VALID_URL = r'https?://yinyue\.kuwo\.cn/billboard_(?P<id>[^.]+).htm'      _TEST = {          'url': 'http://yinyue.kuwo.cn/billboard_香港中文龙虎榜.htm',          'info_dict': {              'id': '香港中文龙虎榜', -            'title': '香港中文龙虎榜', -            'description': 're:\d{4}第\d{2}期',          },          'playlist_mincount': 10,      } @@ -184,30 +185,24 @@ class KuwoChartIE(InfoExtractor):              url, chart_id, note='Download chart info',              errnote='Unable to get chart info') -        chart_name = self._html_search_regex( -            r'<h1[^>]+class="unDis">([^<]+)</h1>', webpage, 'chart name') - -        chart_desc = self._html_search_regex( -            r'<p[^>]+class="tabDef">(\d{4}第\d{2}期)</p>', webpage, 'chart desc') -          entries = [              self.url_result(song_url, 'Kuwo') for song_url in re.findall( -                r'<a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+)/"', webpage) +                r'<a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+)', webpage)          ] -        return self.playlist_result(entries, chart_id, chart_name, chart_desc) +        return self.playlist_result(entries, chart_id)  class KuwoSingerIE(InfoExtractor):      IE_NAME = 'kuwo:singer'      IE_DESC = '酷我音乐 - 歌手' -    _VALID_URL = r'http://www\.kuwo\.cn/mingxing/(?P<id>[^/]+)' +    _VALID_URL = r'https?://www\.kuwo\.cn/mingxing/(?P<id>[^/]+)'      _TESTS = [{          'url': 'http://www.kuwo.cn/mingxing/bruno+mars/',          'info_dict': {              'id': 'bruno+mars',              'title': 'Bruno Mars',          }, -        'playlist_count': 10, +        'playlist_mincount': 329,      }, {          'url': 'http://www.kuwo.cn/mingxing/Ali/music.htm',          'info_dict': { @@ -218,6 +213,8 @@ class KuwoSingerIE(InfoExtractor):          'skip': 'Regularly stalls travis build',  # See https://travis-ci.org/rg3/youtube-dl/jobs/78878540      }] +    PAGE_SIZE = 15 +      def _real_extract(self, url):          singer_id = self._match_id(url)          webpage = self._download_webpage( @@ -225,25 +222,28 @@ class KuwoSingerIE(InfoExtractor):              errnote='Unable to get singer info')          singer_name = self._html_search_regex( -            r'<div class="title clearfix">\s*<h1>([^<]+)<span', webpage, 'singer name' -        ) +            r'<h1>([^<]+)</h1>', webpage, 'singer name') + +        artist_id = self._html_search_regex( +            r'data-artistid="(\d+)"', webpage, 'artist id') + +        page_count = int(self._html_search_regex( +            r'data-page="(\d+)"', webpage, 'page count')) -        entries = [] -        first_page_only = False if re.search(r'/music(?:_\d+)?\.htm', url) else True -        for page_num in itertools.count(1): +        def page_func(page_num):              webpage = self._download_webpage( -                'http://www.kuwo.cn/mingxing/%s/music_%d.htm' % (singer_id, page_num), -                singer_id, note='Download song list page #%d' % page_num, -                errnote='Unable to get song list page #%d' % page_num) +                'http://www.kuwo.cn/artist/contentMusicsAjax', +                singer_id, note='Download song list page #%d' % (page_num + 1), +                errnote='Unable to get song list page #%d' % (page_num + 1), +                query={'artistId': artist_id, 'pn': page_num, 'rn': self.PAGE_SIZE}) -            entries.extend([ +            return [                  self.url_result(song_url, 'Kuwo') for song_url in re.findall( -                    r'<p[^>]+class="m_name"><a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+)/', +                    r'<div[^>]+class="name"><a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+)',                      webpage) -            ][:10 if first_page_only else None]) +            ] -            if first_page_only or not re.search(r'<a[^>]+href="[^"]+">下一页</a>', webpage): -                break +        entries = InAdvancePagedList(page_func, page_count, self.PAGE_SIZE)          return self.playlist_result(entries, singer_id, singer_name) @@ -251,7 +251,7 @@ class KuwoSingerIE(InfoExtractor):  class KuwoCategoryIE(InfoExtractor):      IE_NAME = 'kuwo:category'      IE_DESC = '酷我音乐 - 分类' -    _VALID_URL = r'http://yinyue\.kuwo\.cn/yy/cinfo_(?P<id>\d+?).htm' +    _VALID_URL = r'https?://yinyue\.kuwo\.cn/yy/cinfo_(?P<id>\d+?).htm'      _TEST = {          'url': 'http://yinyue.kuwo.cn/yy/cinfo_86375.htm',          'info_dict': { @@ -288,7 +288,7 @@ class KuwoCategoryIE(InfoExtractor):  class KuwoMvIE(KuwoBaseIE):      IE_NAME = 'kuwo:mv'      IE_DESC = '酷我音乐 - MV' -    _VALID_URL = r'http://www\.kuwo\.cn/mv/(?P<id>\d+?)/' +    _VALID_URL = r'https?://www\.kuwo\.cn/mv/(?P<id>\d+?)/'      _TEST = {          'url': 'http://www.kuwo.cn/mv/6480076/',          'info_dict': { diff --git a/youtube_dl/extractor/laola1tv.py b/youtube_dl/extractor/laola1tv.py index 5d8ebbeb3..41d80bc12 100644 --- a/youtube_dl/extractor/laola1tv.py +++ b/youtube_dl/extractor/laola1tv.py @@ -19,7 +19,7 @@ from ..utils import (  class Laola1TvIE(InfoExtractor): -    _VALID_URL = r'https?://(?:www\.)?laola1\.tv/(?P<lang>[a-z]+)-(?P<portal>[a-z]+)/[^/]+/(?P<slug>[^/?#&]+)' +    _VALID_URL = r'https?://(?:www\.)?laola1\.tv/(?P<lang>[a-z]+)-(?P<portal>[a-z]+)/(?P<kind>[^/]+)/(?P<slug>[^/?#&]+)'      _TESTS = [{          'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html',          'info_dict': { @@ -33,7 +33,7 @@ class Laola1TvIE(InfoExtractor):          },          'params': {              'skip_download': True, -        } +        },      }, {          'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie',          'info_dict': { @@ -47,12 +47,28 @@ class Laola1TvIE(InfoExtractor):          },          'params': {              'skip_download': True, -        } +        }, +    }, { +        'url': 'http://www.laola1.tv/de-de/livestream/2016-03-22-belogorie-belgorod-trentino-diatec-lde', +        'info_dict': { +            'id': '487850', +            'display_id': '2016-03-22-belogorie-belgorod-trentino-diatec-lde', +            'ext': 'flv', +            'title': 'Belogorie BELGOROD - TRENTINO Diatec', +            'upload_date': '20160322', +            'uploader': 'CEV - Europäischer Volleyball Verband', +            'is_live': True, +            'categories': ['Volleyball'], +        }, +        'params': { +            'skip_download': True, +        },      }]      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url)          display_id = mobj.group('slug') +        kind = mobj.group('kind')          lang = mobj.group('lang')          portal = mobj.group('portal') @@ -85,12 +101,17 @@ class Laola1TvIE(InfoExtractor):          _v = lambda x, **k: xpath_text(hd_doc, './/video/' + x, **k)          title = _v('title', fatal=True) +        VS_TARGETS = { +            'video': '2', +            'livestream': '17', +        } +          req = sanitized_Request(              'https://club.laola1.tv/sp/laola1/api/v3/user/session/premium/player/stream-access?%s' %              compat_urllib_parse.urlencode({                  'videoId': video_id, -                'target': '2', -                'label': 'laola1tv', +                'target': VS_TARGETS.get(kind, '2'), +                'label': _v('label'),                  'area': _v('area'),              }),              urlencode_postdata( diff --git a/youtube_dl/extractor/leeco.py b/youtube_dl/extractor/leeco.py index df47e88ba..462b752dd 100644 --- a/youtube_dl/extractor/leeco.py +++ b/youtube_dl/extractor/leeco.py @@ -28,7 +28,7 @@ from ..utils import (  class LeIE(InfoExtractor):      IE_DESC = '乐视网' -    _VALID_URL = r'http://www\.le\.com/ptv/vplay/(?P<id>\d+)\.html' +    _VALID_URL = r'https?://www\.le\.com/ptv/vplay/(?P<id>\d+)\.html'      _URL_TEMPLATE = 'http://www.le.com/ptv/vplay/%s.html' @@ -196,7 +196,7 @@ class LeIE(InfoExtractor):  class LePlaylistIE(InfoExtractor): -    _VALID_URL = r'http://[a-z]+\.le\.com/[a-z]+/(?P<id>[a-z0-9_]+)' +    _VALID_URL = r'https?://[a-z]+\.le\.com/[a-z]+/(?P<id>[a-z0-9_]+)'      _TESTS = [{          'url': 'http://www.le.com/tv/46177.html', diff --git a/youtube_dl/extractor/lifenews.py b/youtube_dl/extractor/lifenews.py index a8fd639cc..ba2f80a75 100644 --- a/youtube_dl/extractor/lifenews.py +++ b/youtube_dl/extractor/lifenews.py @@ -17,7 +17,7 @@ from ..utils import (  class LifeNewsIE(InfoExtractor):      IE_NAME = 'lifenews'      IE_DESC = 'LIFE | NEWS' -    _VALID_URL = r'http://lifenews\.ru/(?:mobile/)?(?P<section>news|video)/(?P<id>\d+)' +    _VALID_URL = r'https?://lifenews\.ru/(?:mobile/)?(?P<section>news|video)/(?P<id>\d+)'      _TESTS = [{          # single video embedded via video/source @@ -159,7 +159,7 @@ class LifeNewsIE(InfoExtractor):  class LifeEmbedIE(InfoExtractor):      IE_NAME = 'life:embed' -    _VALID_URL = r'http://embed\.life\.ru/embed/(?P<id>[\da-f]{32})' +    _VALID_URL = r'https?://embed\.life\.ru/embed/(?P<id>[\da-f]{32})'      _TEST = {          'url': 'http://embed.life.ru/embed/e50c2dec2867350528e2574c899b8291', diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py index 1a0625ac3..2599d45c3 100644 --- a/youtube_dl/extractor/limelight.py +++ b/youtube_dl/extractor/limelight.py @@ -123,7 +123,7 @@ class LimelightBaseIE(InfoExtractor):  class LimelightMediaIE(LimelightBaseIE):      IE_NAME = 'limelight' -    _VALID_URL = r'(?:limelight:media:|http://link\.videoplatform\.limelight\.com/media/\??\bmediaId=)(?P<id>[a-z0-9]{32})' +    _VALID_URL = r'(?:limelight:media:|https?://link\.videoplatform\.limelight\.com/media/\??\bmediaId=)(?P<id>[a-z0-9]{32})'      _TESTS = [{          'url': 'http://link.videoplatform.limelight.com/media/?mediaId=3ffd040b522b4485b6d84effc750cd86',          'info_dict': { @@ -176,7 +176,7 @@ class LimelightMediaIE(LimelightBaseIE):  class LimelightChannelIE(LimelightBaseIE):      IE_NAME = 'limelight:channel' -    _VALID_URL = r'(?:limelight:channel:|http://link\.videoplatform\.limelight\.com/media/\??\bchannelId=)(?P<id>[a-z0-9]{32})' +    _VALID_URL = r'(?:limelight:channel:|https?://link\.videoplatform\.limelight\.com/media/\??\bchannelId=)(?P<id>[a-z0-9]{32})'      _TEST = {          'url': 'http://link.videoplatform.limelight.com/media/?channelId=ab6a524c379342f9b23642917020c082',          'info_dict': { @@ -207,7 +207,7 @@ class LimelightChannelIE(LimelightBaseIE):  class LimelightChannelListIE(LimelightBaseIE):      IE_NAME = 'limelight:channel_list' -    _VALID_URL = r'(?:limelight:channel_list:|http://link\.videoplatform\.limelight\.com/media/\?.*?\bchannelListId=)(?P<id>[a-z0-9]{32})' +    _VALID_URL = r'(?:limelight:channel_list:|https?://link\.videoplatform\.limelight\.com/media/\?.*?\bchannelListId=)(?P<id>[a-z0-9]{32})'      _TEST = {          'url': 'http://link.videoplatform.limelight.com/media/?channelListId=301b117890c4465c8179ede21fd92e2b',          'info_dict': { diff --git a/youtube_dl/extractor/m6.py b/youtube_dl/extractor/m6.py index 7e025831b..d5945ad66 100644 --- a/youtube_dl/extractor/m6.py +++ b/youtube_dl/extractor/m6.py @@ -8,7 +8,7 @@ from .common import InfoExtractor  class M6IE(InfoExtractor):      IE_NAME = 'm6' -    _VALID_URL = r'http://(?:www\.)?m6\.fr/[^/]+/videos/(?P<id>\d+)-[^\.]+\.html' +    _VALID_URL = r'https?://(?:www\.)?m6\.fr/[^/]+/videos/(?P<id>\d+)-[^\.]+\.html'      _TEST = {          'url': 'http://www.m6.fr/emission-les_reines_du_shopping/videos/11323908-emeline_est_la_reine_du_shopping_sur_le_theme_ma_fete_d_8217_anniversaire.html', diff --git a/youtube_dl/extractor/mailru.py b/youtube_dl/extractor/mailru.py index 71085f279..46eb00492 100644 --- a/youtube_dl/extractor/mailru.py +++ b/youtube_dl/extractor/mailru.py @@ -13,7 +13,7 @@ from ..utils import (  class MailRuIE(InfoExtractor):      IE_NAME = 'mailru'      IE_DESC = 'Видео@Mail.Ru' -    _VALID_URL = r'http://(?:www\.)?my\.mail\.ru/(?:video/.*#video=/?(?P<idv1>(?:[^/]+/){3}\d+)|(?:(?P<idv2prefix>(?:[^/]+/){2})video/(?P<idv2suffix>[^/]+/\d+))\.html)' +    _VALID_URL = r'https?://(?:www\.)?my\.mail\.ru/(?:video/.*#video=/?(?P<idv1>(?:[^/]+/){3}\d+)|(?:(?P<idv2prefix>(?:[^/]+/){2})video/(?P<idv2suffix>[^/]+/\d+))\.html)'      _TESTS = [          { diff --git a/youtube_dl/extractor/metacafe.py b/youtube_dl/extractor/metacafe.py index 67d6271e1..c31e8798a 100644 --- a/youtube_dl/extractor/metacafe.py +++ b/youtube_dl/extractor/metacafe.py @@ -17,7 +17,7 @@ from ..utils import (  class MetacafeIE(InfoExtractor): -    _VALID_URL = r'http://(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*' +    _VALID_URL = r'https?://(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'      _DISCLAIMER = 'http://www.metacafe.com/family_filter/'      _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'      IE_NAME = 'metacafe' diff --git a/youtube_dl/extractor/mit.py b/youtube_dl/extractor/mit.py index 819c1b90b..1aea78d11 100644 --- a/youtube_dl/extractor/mit.py +++ b/youtube_dl/extractor/mit.py @@ -91,7 +91,7 @@ class MITIE(TechTVMITIE):  class OCWMITIE(InfoExtractor):      IE_NAME = 'ocw.mit.edu' -    _VALID_URL = r'^http://ocw\.mit\.edu/courses/(?P<topic>[a-z0-9\-]+)' +    _VALID_URL = r'^https?://ocw\.mit\.edu/courses/(?P<topic>[a-z0-9\-]+)'      _BASE_URL = 'http://ocw.mit.edu/'      _TESTS = [ diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py index c595f2077..9e584860a 100644 --- a/youtube_dl/extractor/mitele.py +++ b/youtube_dl/extractor/mitele.py @@ -14,7 +14,7 @@ from ..utils import (  class MiTeleIE(InfoExtractor):      IE_DESC = 'mitele.es' -    _VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/' +    _VALID_URL = r'https?://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/'      _TESTS = [{          'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/', diff --git a/youtube_dl/extractor/mooshare.py b/youtube_dl/extractor/mooshare.py index 7cc7f054f..f010f52d5 100644 --- a/youtube_dl/extractor/mooshare.py +++ b/youtube_dl/extractor/mooshare.py @@ -13,7 +13,7 @@ from ..utils import (  class MooshareIE(InfoExtractor):      IE_NAME = 'mooshare'      IE_DESC = 'Mooshare.biz' -    _VALID_URL = r'http://(?:www\.)?mooshare\.biz/(?P<id>[\da-z]{12})' +    _VALID_URL = r'https?://(?:www\.)?mooshare\.biz/(?P<id>[\da-z]{12})'      _TESTS = [          { diff --git a/youtube_dl/extractor/motherless.py b/youtube_dl/extractor/motherless.py index 0b4787c1d..5e1a8a71a 100644 --- a/youtube_dl/extractor/motherless.py +++ b/youtube_dl/extractor/motherless.py @@ -12,7 +12,7 @@ from ..utils import (  class MotherlessIE(InfoExtractor): -    _VALID_URL = r'http://(?:www\.)?motherless\.com/(?:g/[a-z0-9_]+/)?(?P<id>[A-Z0-9]+)' +    _VALID_URL = r'https?://(?:www\.)?motherless\.com/(?:g/[a-z0-9_]+/)?(?P<id>[A-Z0-9]+)'      _TESTS = [{          'url': 'http://motherless.com/AC3FFE1',          'md5': '310f62e325a9fafe64f68c0bccb6e75f', @@ -69,6 +69,9 @@ class MotherlessIE(InfoExtractor):                  ">The page you're looking for cannot be found.<")):              raise ExtractorError('Video %s does not exist' % video_id, expected=True) +        if '>The content you are trying to view is for friends only.' in webpage: +            raise ExtractorError('Video %s is for friends only' % video_id, expected=True) +          title = self._html_search_regex(              r'id="view-upload-title">\s+([^<]+)<', webpage, 'title')          video_url = self._html_search_regex( diff --git a/youtube_dl/extractor/motorsport.py b/youtube_dl/extractor/motorsport.py index c1a482dba..370328b36 100644 --- a/youtube_dl/extractor/motorsport.py +++ b/youtube_dl/extractor/motorsport.py @@ -9,7 +9,7 @@ from ..compat import (  class MotorsportIE(InfoExtractor):      IE_DESC = 'motorsport.com' -    _VALID_URL = r'http://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/?(?:$|[?#])' +    _VALID_URL = r'https?://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/?(?:$|[?#])'      _TEST = {          'url': 'http://www.motorsport.com/f1/video/main-gallery/red-bull-racing-2014-rules-explained/',          'info_dict': { diff --git a/youtube_dl/extractor/myspass.py b/youtube_dl/extractor/myspass.py index f936b92bb..1ca7b1a9e 100644 --- a/youtube_dl/extractor/myspass.py +++ b/youtube_dl/extractor/myspass.py @@ -11,7 +11,7 @@ from ..utils import (  class MySpassIE(InfoExtractor): -    _VALID_URL = r'http://www\.myspass\.de/.*' +    _VALID_URL = r'https?://www\.myspass\.de/.*'      _TEST = {          'url': 'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/',          'md5': '0b49f4844a068f8b33f4b7c88405862b', diff --git a/youtube_dl/extractor/myvideo.py b/youtube_dl/extractor/myvideo.py index 1e21cf98a..c83a1eab5 100644 --- a/youtube_dl/extractor/myvideo.py +++ b/youtube_dl/extractor/myvideo.py @@ -20,7 +20,7 @@ from ..utils import (  class MyVideoIE(InfoExtractor):      _WORKING = False -    _VALID_URL = r'http://(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/(?P<id>[0-9]+)/[^?/]+.*' +    _VALID_URL = r'https?://(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/(?P<id>[0-9]+)/[^?/]+.*'      IE_NAME = 'myvideo'      _TEST = {          'url': 'http://www.myvideo.de/watch/8229274/bowling_fail_or_win', diff --git a/youtube_dl/extractor/myvidster.py b/youtube_dl/extractor/myvidster.py index a94ab8358..731c24542 100644 --- a/youtube_dl/extractor/myvidster.py +++ b/youtube_dl/extractor/myvidster.py @@ -4,7 +4,7 @@ from .common import InfoExtractor  class MyVidsterIE(InfoExtractor): -    _VALID_URL = r'http://(?:www\.)?myvidster\.com/video/(?P<id>\d+)/' +    _VALID_URL = r'https?://(?:www\.)?myvidster\.com/video/(?P<id>\d+)/'      _TEST = {          'url': 'http://www.myvidster.com/video/32059805/Hot_chemistry_with_raw_love_making', diff --git a/youtube_dl/extractor/nationalgeographic.py b/youtube_dl/extractor/nationalgeographic.py index 7ce8d9b18..d5e53365c 100644 --- a/youtube_dl/extractor/nationalgeographic.py +++ b/youtube_dl/extractor/nationalgeographic.py @@ -8,7 +8,7 @@ from ..utils import (  class NationalGeographicIE(InfoExtractor): -    _VALID_URL = r'http://video\.nationalgeographic\.com/.*?' +    _VALID_URL = r'https?://video\.nationalgeographic\.com/.*?'      _TESTS = [          { diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index bb0817e34..a622f2212 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -115,7 +115,7 @@ class NBCSportsVPlayerIE(InfoExtractor):  class NBCSportsIE(InfoExtractor):      # Does not include https because its certificate is invalid -    _VALID_URL = r'http://www\.nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)' +    _VALID_URL = r'https?://www\.nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)'      _TEST = {          'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke', @@ -295,7 +295,7 @@ class NBCNewsIE(ThePlatformIE):  class MSNBCIE(InfoExtractor):      # https URLs redirect to corresponding http ones -    _VALID_URL = r'http://www\.msnbc\.com/[^/]+/watch/(?P<id>[^/]+)' +    _VALID_URL = r'https?://www\.msnbc\.com/[^/]+/watch/(?P<id>[^/]+)'      _TEST = {          'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924',          'md5': '6d236bf4f3dddc226633ce6e2c3f814d', diff --git a/youtube_dl/extractor/nextmedia.py b/youtube_dl/extractor/nextmedia.py index d1688457f..aae7aeeeb 100644 --- a/youtube_dl/extractor/nextmedia.py +++ b/youtube_dl/extractor/nextmedia.py @@ -7,7 +7,7 @@ from ..utils import parse_iso8601  class NextMediaIE(InfoExtractor):      IE_DESC = '蘋果日報' -    _VALID_URL = r'http://hk.apple.nextmedia.com/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)' +    _VALID_URL = r'https?://hk.apple.nextmedia.com/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)'      _TESTS = [{          'url': 'http://hk.apple.nextmedia.com/realtime/news/20141108/53109199',          'md5': 'dff9fad7009311c421176d1ac90bfe4f', @@ -68,7 +68,7 @@ class NextMediaIE(InfoExtractor):  class NextMediaActionNewsIE(NextMediaIE):      IE_DESC = '蘋果日報 - 動新聞' -    _VALID_URL = r'http://hk.dv.nextmedia.com/actionnews/[^/]+/(?P<date>\d+)/(?P<id>\d+)/\d+' +    _VALID_URL = r'https?://hk.dv.nextmedia.com/actionnews/[^/]+/(?P<date>\d+)/(?P<id>\d+)/\d+'      _TESTS = [{          'url': 'http://hk.dv.nextmedia.com/actionnews/hit/20150121/19009428/20061460',          'md5': '05fce8ffeed7a5e00665d4b7cf0f9201', @@ -93,7 +93,7 @@ class NextMediaActionNewsIE(NextMediaIE):  class AppleDailyIE(NextMediaIE):      IE_DESC = '臺灣蘋果日報' -    _VALID_URL = r'http://(www|ent).appledaily.com.tw/(?:animation|appledaily|enews|realtimenews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?' +    _VALID_URL = r'https?://(www|ent).appledaily.com.tw/(?:animation|appledaily|enews|realtimenews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'      _TESTS = [{          'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694',          'md5': 'a843ab23d150977cc55ef94f1e2c1e4d', diff --git a/youtube_dl/extractor/noco.py b/youtube_dl/extractor/noco.py index d440313d5..ec7317a2f 100644 --- a/youtube_dl/extractor/noco.py +++ b/youtube_dl/extractor/noco.py @@ -22,7 +22,7 @@ from ..utils import (  class NocoIE(InfoExtractor): -    _VALID_URL = r'http://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)' +    _VALID_URL = r'https?://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)'      _LOGIN_URL = 'http://noco.tv/do.php'      _API_URL_TEMPLATE = 'https://api.noco.tv/1.1/%s?ts=%s&tk=%s'      _SUB_LANG_TEMPLATE = '&sub_lang=%s' diff --git a/youtube_dl/extractor/normalboots.py b/youtube_dl/extractor/normalboots.py index 5952d136f..77e091072 100644 --- a/youtube_dl/extractor/normalboots.py +++ b/youtube_dl/extractor/normalboots.py @@ -9,7 +9,7 @@ from ..utils import (  class NormalbootsIE(InfoExtractor): -    _VALID_URL = r'http://(?:www\.)?normalboots\.com/video/(?P<id>[0-9a-z-]*)/?$' +    _VALID_URL = r'https?://(?:www\.)?normalboots\.com/video/(?P<id>[0-9a-z-]*)/?$'      _TEST = {          'url': 'http://normalboots.com/video/home-alone-games-jontron/',          'md5': '8bf6de238915dd501105b44ef5f1e0f6', diff --git a/youtube_dl/extractor/nova.py b/youtube_dl/extractor/nova.py index 3f9c776ef..17671ad39 100644 --- a/youtube_dl/extractor/nova.py +++ b/youtube_dl/extractor/nova.py @@ -12,7 +12,7 @@ from ..utils import (  class NovaIE(InfoExtractor):      IE_DESC = 'TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz' -    _VALID_URL = 'http://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+?)(?:\.html|/|$)' +    _VALID_URL = r'https?://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+?)(?:\.html|/|$)'      _TESTS = [{          'url': 'http://tvnoviny.nova.cz/clanek/novinky/co-na-sebe-sportaci-praskli-vime-jestli-pujde-hrdlicka-na-materskou.html?utm_source=tvnoviny&utm_medium=cpfooter&utm_campaign=novaplus',          'info_dict': { diff --git a/youtube_dl/extractor/npr.py b/youtube_dl/extractor/npr.py index 125c7010b..a3f0abb4e 100644 --- a/youtube_dl/extractor/npr.py +++ b/youtube_dl/extractor/npr.py @@ -9,7 +9,7 @@ from ..utils import (  class NprIE(InfoExtractor): -    _VALID_URL = r'http://(?:www\.)?npr\.org/player/v2/mediaPlayer\.html\?.*\bid=(?P<id>\d+)' +    _VALID_URL = r'https?://(?:www\.)?npr\.org/player/v2/mediaPlayer\.html\?.*\bid=(?P<id>\d+)'      _TESTS = [{          'url': 'http://www.npr.org/player/v2/mediaPlayer.html?id=449974205',          'info_dict': { diff --git a/youtube_dl/extractor/ntvru.py b/youtube_dl/extractor/ntvru.py index 2cd924d05..0895d7ea4 100644 --- a/youtube_dl/extractor/ntvru.py +++ b/youtube_dl/extractor/ntvru.py @@ -11,7 +11,7 @@ from ..utils import (  class NTVRuIE(InfoExtractor):      IE_NAME = 'ntv.ru' -    _VALID_URL = r'http://(?:www\.)?ntv\.ru/(?P<id>.+)' +    _VALID_URL = r'https?://(?:www\.)?ntv\.ru/(?P<id>.+)'      _TESTS = [          { diff --git a/youtube_dl/extractor/once.py b/youtube_dl/extractor/once.py index 080045d4c..5db949b17 100644 --- a/youtube_dl/extractor/once.py +++ b/youtube_dl/extractor/once.py @@ -20,6 +20,10 @@ class OnceIE(InfoExtractor):              media_item_id, 'mp4', m3u8_id='hls', fatal=False)          progressive_formats = []          for adaptive_format in formats: +            # Prevent advertisement from embedding into m3u8 playlist (see +            # https://github.com/rg3/youtube-dl/issues/8893#issuecomment-199912684) +            adaptive_format['url'] = re.sub( +                r'\badsegmentlength=\d+', r'adsegmentlength=0', adaptive_format['url'])              rendition_id = self._search_regex(                  r'/now/media/playlist/[^/]+/[^/]+/([^/]+)',                  adaptive_format['url'], 'redition id', default=None) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py new file mode 100644 index 000000000..4468f31fc --- /dev/null +++ b/youtube_dl/extractor/openload.py @@ -0,0 +1,107 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_chr +from ..utils import ( +    encode_base_n, +    ExtractorError, +) + + +class OpenloadIE(InfoExtractor): +    _VALID_URL = r'https://openload.(?:co|io)/(?:f|embed)/(?P<id>[a-zA-Z0-9-]+)' + +    _TESTS = [{ +        'url': 'https://openload.co/f/kUEfGclsU9o', +        'md5': 'bf1c059b004ebc7a256f89408e65c36e', +        'info_dict': { +            'id': 'kUEfGclsU9o', +            'ext': 'mp4', +            'title': 'skyrim_no-audio_1080.mp4', +            'thumbnail': 're:^https?://.*\.jpg$', +        }, +    }, { +        'url': 'https://openload.co/embed/kUEfGclsU9o/skyrim_no-audio_1080.mp4', +        'only_matching': True, +    }, { +        'url': 'https://openload.io/f/ZAn6oz-VZGE/', +        'only_matching': True, +    }] + +    @staticmethod +    def openload_level2_debase(m): +        radix, num = int(m.group(1)) + 27, int(m.group(2)) +        return '"' + encode_base_n(num, radix) + '"' + +    @classmethod +    def openload_level2(cls, txt): +        # The function name is ǃ \u01c3 +        # Using escaped unicode literals does not work in Python 3.2 +        return re.sub(r'ǃ\((\d+),(\d+)\)', cls.openload_level2_debase, txt, re.UNICODE).replace('"+"', '') + +    # Openload uses a variant of aadecode +    # openload_decode and related functions are originally written by +    # vitas@matfyz.cz and released with public domain +    # See https://github.com/rg3/youtube-dl/issues/8489 +    @classmethod +    def openload_decode(cls, txt): +        symbol_table = [ +            ('_', '(゚Д゚) [゚Θ゚]'), +            ('a', '(゚Д゚) [゚ω゚ノ]'), +            ('b', '(゚Д゚) [゚Θ゚ノ]'), +            ('c', '(゚Д゚) [\'c\']'), +            ('d', '(゚Д゚) [゚ー゚ノ]'), +            ('e', '(゚Д゚) [゚Д゚ノ]'), +            ('f', '(゚Д゚) [1]'), + +            ('o', '(゚Д゚) [\'o\']'), +            ('u', '(o゚ー゚o)'), +            ('c', '(゚Д゚) [\'c\']'), + +            ('7', '((゚ー゚) + (o^_^o))'), +            ('6', '((o^_^o) +(o^_^o) +(c^_^o))'), +            ('5', '((゚ー゚) + (゚Θ゚))'), +            ('4', '(-~3)'), +            ('3', '(-~-~1)'), +            ('2', '(-~1)'), +            ('1', '(-~0)'), +            ('0', '((c^_^o)-(c^_^o))'), +        ] +        delim = '(゚Д゚)[゚ε゚]+' +        ret = '' +        for aachar in txt.split(delim): +            for val, pat in symbol_table: +                aachar = aachar.replace(pat, val) +            aachar = aachar.replace('+ ', '') +            m = re.match(r'^\d+', aachar) +            if m: +                ret += compat_chr(int(m.group(0), 8)) +            else: +                m = re.match(r'^u([\da-f]+)', aachar) +                if m: +                    ret += compat_chr(int(m.group(1), 16)) +        return cls.openload_level2(ret) + +    def _real_extract(self, url): +        video_id = self._match_id(url) +        webpage = self._download_webpage(url, video_id) + +        if 'File not found' in webpage: +            raise ExtractorError('File not found', expected=True) + +        code = self._search_regex( +            r'<video[^>]+>\s*<script[^>]+>([^<]+)</script>', +            webpage, 'JS code') + +        video_url = self._search_regex( +            r'return\s+"(https?://[^"]+)"', self.openload_decode(code), 'video URL') + +        return { +            'id': video_id, +            'title': self._og_search_title(webpage), +            'thumbnail': self._og_search_thumbnail(webpage), +            'url': video_url, +        } diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py index 958eb398b..66c75f8b3 100644 --- a/youtube_dl/extractor/orf.py +++ b/youtube_dl/extractor/orf.py @@ -137,7 +137,7 @@ class ORFTVthekIE(InfoExtractor):  class ORFOE1IE(InfoExtractor):      IE_NAME = 'orf:oe1'      IE_DESC = 'Radio Österreich 1' -    _VALID_URL = r'http://oe1\.orf\.at/(?:programm/|konsole.*?#\?track_id=)(?P<id>[0-9]+)' +    _VALID_URL = r'https?://oe1\.orf\.at/(?:programm/|konsole.*?#\?track_id=)(?P<id>[0-9]+)'      # Audios on ORF radio are only available for 7 days, so we can't add tests.      _TEST = { @@ -171,7 +171,7 @@ class ORFOE1IE(InfoExtractor):  class ORFFM4IE(InfoExtractor):      IE_NAME = 'orf:fm4'      IE_DESC = 'radio FM4' -    _VALID_URL = r'http://fm4\.orf\.at/(?:7tage/?#|player/)(?P<date>[0-9]+)/(?P<show>\w+)' +    _VALID_URL = r'https?://fm4\.orf\.at/(?:7tage/?#|player/)(?P<date>[0-9]+)/(?P<show>\w+)'      _TEST = {          'url': 'http://fm4.orf.at/player/20160110/IS/', @@ -222,7 +222,7 @@ class ORFFM4IE(InfoExtractor):  class ORFIPTVIE(InfoExtractor):      IE_NAME = 'orf:iptv'      IE_DESC = 'iptv.ORF.at' -    _VALID_URL = r'http://iptv\.orf\.at/(?:#/)?stories/(?P<id>\d+)' +    _VALID_URL = r'https?://iptv\.orf\.at/(?:#/)?stories/(?P<id>\d+)'      _TEST = {          'url': 'http://iptv.orf.at/stories/2275236/', diff --git a/youtube_dl/extractor/philharmoniedeparis.py b/youtube_dl/extractor/philharmoniedeparis.py index 6e60e5fe9..f1008ae51 100644 --- a/youtube_dl/extractor/philharmoniedeparis.py +++ b/youtube_dl/extractor/philharmoniedeparis.py @@ -12,7 +12,7 @@ from ..utils import (  class PhilharmonieDeParisIE(InfoExtractor):      IE_DESC = 'Philharmonie de Paris' -    _VALID_URL = r'http://live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|misc/Playlist\.ashx\?id=)(?P<id>\d+)' +    _VALID_URL = r'https?://live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|misc/Playlist\.ashx\?id=)(?P<id>\d+)'      _TESTS = [{          'url': 'http://live.philharmoniedeparis.fr/concert/1032066.html',          'info_dict': { diff --git a/youtube_dl/extractor/photobucket.py b/youtube_dl/extractor/photobucket.py index 788411ccc..6c8bbe1d9 100644 --- a/youtube_dl/extractor/photobucket.py +++ b/youtube_dl/extractor/photobucket.py @@ -8,7 +8,7 @@ from ..compat import compat_urllib_parse_unquote  class PhotobucketIE(InfoExtractor): -    _VALID_URL = r'http://(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))' +    _VALID_URL = r'https?://(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))'      _TEST = {          'url': 'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0',          'md5': '7dabfb92b0a31f6c16cebc0f8e60ff99', diff --git a/youtube_dl/extractor/porn91.py b/youtube_dl/extractor/porn91.py index 3e15533e9..63ce87ee3 100644 --- a/youtube_dl/extractor/porn91.py +++ b/youtube_dl/extractor/porn91.py @@ -1,7 +1,10 @@  # encoding: utf-8  from __future__ import unicode_literals -from ..compat import compat_urllib_parse +from ..compat import ( +    compat_urllib_parse, +    compat_urllib_parse_unquote, +)  from .common import InfoExtractor  from ..utils import (      parse_duration, @@ -28,9 +31,10 @@ class Porn91IE(InfoExtractor):      def _real_extract(self, url):          video_id = self._match_id(url) -        url = 'http://91porn.com/view_video.php?viewkey=%s' % video_id          self._set_cookie('91porn.com', 'language', 'cn_CN') -        webpage = self._download_webpage(url, video_id, 'get HTML content') + +        webpage = self._download_webpage( +            'http://91porn.com/view_video.php?viewkey=%s' % video_id, video_id)          if '作为游客,你每天只可观看10个视频' in webpage:              raise ExtractorError('91 Porn says: Daily limit 10 videos exceeded', expected=True) @@ -54,8 +58,9 @@ class Porn91IE(InfoExtractor):          })          info_cn = self._download_webpage(              'http://91porn.com/getfile.php?' + url_params, video_id, -            'get real video url') -        video_url = self._search_regex(r'file=([^&]+)&', info_cn, 'url') +            'Downloading real video url') +        video_url = compat_urllib_parse_unquote(self._search_regex( +            r'file=([^&]+)&', info_cn, 'url'))          duration = parse_duration(self._search_regex(              r'时长:\s*</span>\s*(\d+:\d+)', webpage, 'duration', fatal=False)) diff --git a/youtube_dl/extractor/pornhd.py b/youtube_dl/extractor/pornhd.py index 57c78ba52..39b53ecf6 100644 --- a/youtube_dl/extractor/pornhd.py +++ b/youtube_dl/extractor/pornhd.py @@ -12,7 +12,7 @@ from ..utils import (  class PornHdIE(InfoExtractor): -    _VALID_URL = r'http://(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<id>\d+)(?:/(?P<display_id>.+))?' +    _VALID_URL = r'https?://(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<id>\d+)(?:/(?P<display_id>.+))?'      _TEST = {          'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video',          'md5': '956b8ca569f7f4d8ec563e2c41598441', diff --git a/youtube_dl/extractor/pornovoisines.py b/youtube_dl/extractor/pornovoisines.py index 1a53fd71c..6b51e5c54 100644 --- a/youtube_dl/extractor/pornovoisines.py +++ b/youtube_dl/extractor/pornovoisines.py @@ -13,7 +13,7 @@ from ..utils import (  class PornoVoisinesIE(InfoExtractor): -    _VALID_URL = r'http://(?:www\.)?pornovoisines\.com/showvideo/(?P<id>\d+)/(?P<display_id>[^/]+)' +    _VALID_URL = r'https?://(?:www\.)?pornovoisines\.com/showvideo/(?P<id>\d+)/(?P<display_id>[^/]+)'      _VIDEO_URL_TEMPLATE = 'http://stream%d.pornovoisines.com' \          '/static/media/video/transcoded/%s-640x360-1000-trscded.mp4' diff --git a/youtube_dl/extractor/pyvideo.py b/youtube_dl/extractor/pyvideo.py index 30a5f2de4..cc0416cb8 100644 --- a/youtube_dl/extractor/pyvideo.py +++ b/youtube_dl/extractor/pyvideo.py @@ -7,7 +7,7 @@ from .common import InfoExtractor  class PyvideoIE(InfoExtractor): -    _VALID_URL = r'http://(?:www\.)?pyvideo\.org/video/(?P<id>\d+)/(.*)' +    _VALID_URL = r'https?://(?:www\.)?pyvideo\.org/video/(?P<id>\d+)/(.*)'      _TESTS = [          { diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index 45a3c41c5..ff0af9543 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -18,7 +18,7 @@ from ..utils import (  class QQMusicIE(InfoExtractor):      IE_NAME = 'qqmusic'      IE_DESC = 'QQ音乐' -    _VALID_URL = r'http://y.qq.com/#type=song&mid=(?P<id>[0-9A-Za-z]+)' +    _VALID_URL = r'https?://y.qq.com/#type=song&mid=(?P<id>[0-9A-Za-z]+)'      _TESTS = [{          'url': 'http://y.qq.com/#type=song&mid=004295Et37taLD',          'md5': '9ce1c1c8445f561506d2e3cfb0255705', @@ -172,7 +172,7 @@ class QQPlaylistBaseIE(InfoExtractor):  class QQMusicSingerIE(QQPlaylistBaseIE):      IE_NAME = 'qqmusic:singer'      IE_DESC = 'QQ音乐 - 歌手' -    _VALID_URL = r'http://y.qq.com/#type=singer&mid=(?P<id>[0-9A-Za-z]+)' +    _VALID_URL = r'https?://y.qq.com/#type=singer&mid=(?P<id>[0-9A-Za-z]+)'      _TEST = {          'url': 'http://y.qq.com/#type=singer&mid=001BLpXF2DyJe2',          'info_dict': { @@ -217,7 +217,7 @@ class QQMusicSingerIE(QQPlaylistBaseIE):  class QQMusicAlbumIE(QQPlaylistBaseIE):      IE_NAME = 'qqmusic:album'      IE_DESC = 'QQ音乐 - 专辑' -    _VALID_URL = r'http://y.qq.com/#type=album&mid=(?P<id>[0-9A-Za-z]+)' +    _VALID_URL = r'https?://y.qq.com/#type=album&mid=(?P<id>[0-9A-Za-z]+)'      _TESTS = [{          'url': 'http://y.qq.com/#type=album&mid=000gXCTb2AhRR1', @@ -260,7 +260,7 @@ class QQMusicAlbumIE(QQPlaylistBaseIE):  class QQMusicToplistIE(QQPlaylistBaseIE):      IE_NAME = 'qqmusic:toplist'      IE_DESC = 'QQ音乐 - 排行榜' -    _VALID_URL = r'http://y\.qq\.com/#type=toplist&p=(?P<id>(top|global)_[0-9]+)' +    _VALID_URL = r'https?://y\.qq\.com/#type=toplist&p=(?P<id>(top|global)_[0-9]+)'      _TESTS = [{          'url': 'http://y.qq.com/#type=toplist&p=global_123', @@ -314,7 +314,7 @@ class QQMusicToplistIE(QQPlaylistBaseIE):  class QQMusicPlaylistIE(QQPlaylistBaseIE):      IE_NAME = 'qqmusic:playlist'      IE_DESC = 'QQ音乐 - 歌单' -    _VALID_URL = r'http://y\.qq\.com/#type=taoge&id=(?P<id>[0-9]+)' +    _VALID_URL = r'https?://y\.qq\.com/#type=taoge&id=(?P<id>[0-9]+)'      _TESTS = [{          'url': 'http://y.qq.com/#type=taoge&id=3462654915', diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py index a4dc5c335..e36ce1aa1 100644 --- a/youtube_dl/extractor/rai.py +++ b/youtube_dl/extractor/rai.py @@ -18,7 +18,7 @@ from ..utils import (  class RaiTVIE(InfoExtractor): -    _VALID_URL = r'http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/(?:[^/]+/)+media/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html' +    _VALID_URL = r'https?://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/(?:[^/]+/)+media/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html'      _TESTS = [          {              'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html', @@ -175,7 +175,7 @@ class RaiTVIE(InfoExtractor):  class RaiIE(InfoExtractor): -    _VALID_URL = r'http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html' +    _VALID_URL = r'https?://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html'      _TESTS = [          {              'url': 'http://www.report.rai.it/dl/Report/puntata/ContentItem-0c7a664b-d0f4-4b2c-8835-3f82e46f433e.html', diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py index d6054d717..7ba41ba59 100644 --- a/youtube_dl/extractor/redtube.py +++ b/youtube_dl/extractor/redtube.py @@ -5,7 +5,7 @@ from ..utils import ExtractorError  class RedTubeIE(InfoExtractor): -    _VALID_URL = r'http://(?:www\.)?redtube\.com/(?P<id>[0-9]+)' +    _VALID_URL = r'https?://(?:www\.)?redtube\.com/(?P<id>[0-9]+)'      _TEST = {          'url': 'http://www.redtube.com/66418',          'md5': '7b8c22b5e7098a3e1c09709df1126d2d', diff --git a/youtube_dl/extractor/ringtv.py b/youtube_dl/extractor/ringtv.py index 508758075..2c2c707bd 100644 --- a/youtube_dl/extractor/ringtv.py +++ b/youtube_dl/extractor/ringtv.py @@ -6,7 +6,7 @@ from .common import InfoExtractor  class RingTVIE(InfoExtractor): -    _VALID_URL = r'http://(?:www\.)?ringtv\.craveonline\.com/(?P<type>news|videos/video)/(?P<id>[^/?#]+)' +    _VALID_URL = r'https?://(?:www\.)?ringtv\.craveonline\.com/(?P<type>news|videos/video)/(?P<id>[^/?#]+)'      _TEST = {          'url': 'http://ringtv.craveonline.com/news/310833-luis-collazo-says-victor-ortiz-better-not-quit-on-jan-30',          'md5': 'd25945f5df41cdca2d2587165ac28720', diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index 8a8c5d2a0..08cd1ae6c 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -62,7 +62,7 @@ def _decrypt_url(png):  class RTVEALaCartaIE(InfoExtractor):      IE_NAME = 'rtve.es:alacarta'      IE_DESC = 'RTVE a la carta' -    _VALID_URL = r'http://www\.rtve\.es/(m/)?alacarta/videos/[^/]+/[^/]+/(?P<id>\d+)' +    _VALID_URL = r'https?://www\.rtve\.es/(m/)?alacarta/videos/[^/]+/[^/]+/(?P<id>\d+)'      _TESTS = [{          'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/', @@ -179,7 +179,7 @@ class RTVEInfantilIE(InfoExtractor):  class RTVELiveIE(InfoExtractor):      IE_NAME = 'rtve.es:live'      IE_DESC = 'RTVE.es live streams' -    _VALID_URL = r'http://www\.rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)' +    _VALID_URL = r'https?://www\.rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)'      _TESTS = [{          'url': 'http://www.rtve.es/directo/la-1/', diff --git a/youtube_dl/extractor/ruhd.py b/youtube_dl/extractor/ruhd.py index 0e470e73f..1f7c26299 100644 --- a/youtube_dl/extractor/ruhd.py +++ b/youtube_dl/extractor/ruhd.py @@ -5,7 +5,7 @@ from .common import InfoExtractor  class RUHDIE(InfoExtractor): -    _VALID_URL = r'http://(?:www\.)?ruhd\.ru/play\.php\?vid=(?P<id>\d+)' +    _VALID_URL = r'https?://(?:www\.)?ruhd\.ru/play\.php\?vid=(?P<id>\d+)'      _TEST = {          'url': 'http://www.ruhd.ru/play.php?vid=207',          'md5': 'd1a9ec4edf8598e3fbd92bb16072ba83', diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py index c5c47d01e..9ca4ae147 100644 --- a/youtube_dl/extractor/rutube.py +++ b/youtube_dl/extractor/rutube.py @@ -122,7 +122,7 @@ class RutubeEmbedIE(InfoExtractor):  class RutubeChannelIE(InfoExtractor):      IE_NAME = 'rutube:channel'      IE_DESC = 'Rutube channels' -    _VALID_URL = r'http://rutube\.ru/tags/video/(?P<id>\d+)' +    _VALID_URL = r'https?://rutube\.ru/tags/video/(?P<id>\d+)'      _TESTS = [{          'url': 'http://rutube.ru/tags/video/1800/',          'info_dict': { @@ -156,7 +156,7 @@ class RutubeChannelIE(InfoExtractor):  class RutubeMovieIE(RutubeChannelIE):      IE_NAME = 'rutube:movie'      IE_DESC = 'Rutube movies' -    _VALID_URL = r'http://rutube\.ru/metainfo/tv/(?P<id>\d+)' +    _VALID_URL = r'https?://rutube\.ru/metainfo/tv/(?P<id>\d+)'      _TESTS = []      _MOVIE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/?format=json' @@ -174,7 +174,7 @@ class RutubeMovieIE(RutubeChannelIE):  class RutubePersonIE(RutubeChannelIE):      IE_NAME = 'rutube:person'      IE_DESC = 'Rutube person videos' -    _VALID_URL = r'http://rutube\.ru/video/person/(?P<id>\d+)' +    _VALID_URL = r'https?://rutube\.ru/video/person/(?P<id>\d+)'      _TESTS = [{          'url': 'http://rutube.ru/video/person/313878/',          'info_dict': { diff --git a/youtube_dl/extractor/rutv.py b/youtube_dl/extractor/rutv.py index f7fe1fece..a2379eb04 100644 --- a/youtube_dl/extractor/rutv.py +++ b/youtube_dl/extractor/rutv.py @@ -14,7 +14,7 @@ class RUTVIE(InfoExtractor):      IE_DESC = 'RUTV.RU'      _VALID_URL = r'''(?x)          https?://player\.(?:rutv\.ru|vgtrk\.com)/ -            (?P<path>flash2v/container\.swf\?id= +            (?P<path>flash\d+v/container\.swf\?id=              |iframe/(?P<type>swf|video|live)/id/              |index/iframe/cast_id/)              (?P<id>\d+)''' @@ -109,7 +109,7 @@ class RUTVIE(InfoExtractor):              return mobj.group('url')          mobj = re.search( -            r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>https?://player\.(?:rutv\.ru|vgtrk\.com)/flash2v/container\.swf\?id=.+?\2)', +            r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>https?://player\.(?:rutv\.ru|vgtrk\.com)/flash\d+v/container\.swf\?id=.+?\2)',              webpage)          if mobj:              return mobj.group('url') @@ -119,7 +119,7 @@ class RUTVIE(InfoExtractor):          video_id = mobj.group('id')          video_path = mobj.group('path') -        if video_path.startswith('flash2v'): +        if re.match(r'flash\d+v', video_path):              video_type = 'video'          elif video_path.startswith('iframe'):              video_type = mobj.group('type') @@ -168,7 +168,7 @@ class RUTVIE(InfoExtractor):                          'play_path': mobj.group('playpath'),                          'app': mobj.group('app'),                          'page_url': 'http://player.rutv.ru', -                        'player_url': 'http://player.rutv.ru/flash2v/osmf.swf?i=22', +                        'player_url': 'http://player.rutv.ru/flash3v/osmf.swf?i=22',                          'rtmp_live': True,                          'ext': 'flv',                          'vbr': int(quality), diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py index 256396bb8..6ba91f202 100644 --- a/youtube_dl/extractor/safari.py +++ b/youtube_dl/extractor/safari.py @@ -75,16 +75,7 @@ class SafariBaseIE(InfoExtractor):  class SafariIE(SafariBaseIE):      IE_NAME = 'safari'      IE_DESC = 'safaribooksonline.com online video' -    _VALID_URL = r'''(?x)https?:// -                            (?:www\.)?safaribooksonline\.com/ -                                (?: -                                    library/view/[^/]+| -                                    api/v1/book -                                )/ -                                (?P<course_id>[^/]+)/ -                                    (?:chapter(?:-content)?/)? -                                (?P<part>part\d+)\.html -    ''' +    _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>part\d+)\.html'      _TESTS = [{          'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html', @@ -98,9 +89,6 @@ class SafariIE(SafariBaseIE):              'uploader_id': 'stork',          },      }, { -        'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html', -        'only_matching': True, -    }, {          # non-digits in course id          'url': 'https://www.safaribooksonline.com/library/view/create-a-nodejs/100000006A0210/part00.html',          'only_matching': True, @@ -108,13 +96,18 @@ class SafariIE(SafariBaseIE):      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url) -        course_id = mobj.group('course_id') -        part = mobj.group('part') - -        webpage = self._download_webpage(url, '%s/%s' % (course_id, part)) -        reference_id = self._search_regex(r'data-reference-id="([^"]+)"', webpage, 'kaltura reference id') -        partner_id = self._search_regex(r'data-partner-id="([^"]+)"', webpage, 'kaltura widget id') -        ui_id = self._search_regex(r'data-ui-id="([^"]+)"', webpage, 'kaltura uiconf id') +        video_id = '%s/%s' % (mobj.group('course_id'), mobj.group('part')) + +        webpage = self._download_webpage(url, video_id) +        reference_id = self._search_regex( +            r'data-reference-id=(["\'])(?P<id>.+?)\1', +            webpage, 'kaltura reference id', group='id') +        partner_id = self._search_regex( +            r'data-partner-id=(["\'])(?P<id>.+?)\1', +            webpage, 'kaltura widget id', group='id') +        ui_id = self._search_regex( +            r'data-ui-id=(["\'])(?P<id>.+?)\1', +            webpage, 'kaltura uiconf id', group='id')          query = {              'wid': '_%s' % partner_id, @@ -125,7 +118,7 @@ class SafariIE(SafariBaseIE):          if self.LOGGED_IN:              kaltura_session = self._download_json(                  '%s/player/kaltura_session/?reference_id=%s' % (self._API_BASE, reference_id), -                course_id, 'Downloading kaltura session JSON', +                video_id, 'Downloading kaltura session JSON',                  'Unable to download kaltura session JSON', fatal=False)              if kaltura_session:                  session = kaltura_session.get('session') @@ -137,6 +130,23 @@ class SafariIE(SafariBaseIE):              'Kaltura') +class SafariApiIE(SafariBaseIE): +    IE_NAME = 'safari:api' +    _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/api/v1/book/(?P<course_id>[^/]+)/chapter(?:-content)?/(?P<part>part\d+)\.html' + +    _TEST = { +        'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html', +        'only_matching': True, +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        part = self._download_json( +            url, '%s/%s' % (mobj.group('course_id'), mobj.group('part')), +            'Downloading part JSON') +        return self.url_result(part['web_url'], SafariIE.ie_key()) + +  class SafariCourseIE(SafariBaseIE):      IE_NAME = 'safari:course'      IE_DESC = 'safaribooksonline.com online courses' @@ -168,7 +178,7 @@ class SafariCourseIE(SafariBaseIE):                  'No chapters found for course %s' % course_id, expected=True)          entries = [ -            self.url_result(chapter, 'Safari') +            self.url_result(chapter, SafariApiIE.ie_key())              for chapter in course_json['chapters']]          course_title = course_json['title'] diff --git a/youtube_dl/extractor/screenjunkies.py b/youtube_dl/extractor/screenjunkies.py index f2af15f6b..dd0a6ba19 100644 --- a/youtube_dl/extractor/screenjunkies.py +++ b/youtube_dl/extractor/screenjunkies.py @@ -11,7 +11,7 @@ from ..utils import (  class ScreenJunkiesIE(InfoExtractor): -    _VALID_URL = r'http://www.screenjunkies.com/video/(?P<display_id>[^/]+?)(?:-(?P<id>\d+))?(?:[/?#&]|$)' +    _VALID_URL = r'https?://www.screenjunkies.com/video/(?P<display_id>[^/]+?)(?:-(?P<id>\d+))?(?:[/?#&]|$)'      _TESTS = [{          'url': 'http://www.screenjunkies.com/video/best-quentin-tarantino-movie-2841915',          'md5': '5c2b686bec3d43de42bde9ec047536b0', diff --git a/youtube_dl/extractor/senateisvp.py b/youtube_dl/extractor/senateisvp.py index 4d3b58522..c5f474dd1 100644 --- a/youtube_dl/extractor/senateisvp.py +++ b/youtube_dl/extractor/senateisvp.py @@ -48,7 +48,7 @@ class SenateISVPIE(InfoExtractor):          ['arch', '', 'http://ussenate-f.akamaihd.net/']      ]      _IE_NAME = 'senate.gov' -    _VALID_URL = r'http://www\.senate\.gov/isvp/?\?(?P<qs>.+)' +    _VALID_URL = r'https?://www\.senate\.gov/isvp/?\?(?P<qs>.+)'      _TESTS = [{          'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png',          'info_dict': { diff --git a/youtube_dl/extractor/shared.py b/youtube_dl/extractor/shared.py index 8eda3c864..96fe0b90d 100644 --- a/youtube_dl/extractor/shared.py +++ b/youtube_dl/extractor/shared.py @@ -13,7 +13,7 @@ from ..utils import (  class SharedIE(InfoExtractor):      IE_DESC = 'shared.sx and vivo.sx' -    _VALID_URL = r'http://(?:shared|vivo)\.sx/(?P<id>[\da-z]{10})' +    _VALID_URL = r'https?://(?:shared|vivo)\.sx/(?P<id>[\da-z]{10})'      _TESTS = [{          'url': 'http://shared.sx/0060718775', diff --git a/youtube_dl/extractor/sport5.py b/youtube_dl/extractor/sport5.py index dfe50ed45..7e6783306 100644 --- a/youtube_dl/extractor/sport5.py +++ b/youtube_dl/extractor/sport5.py @@ -8,7 +8,7 @@ from ..utils import ExtractorError  class Sport5IE(InfoExtractor): -    _VALID_URL = r'http://(?:www|vod)?\.sport5\.co\.il/.*\b(?:Vi|docID)=(?P<id>\d+)' +    _VALID_URL = r'https?://(?:www|vod)?\.sport5\.co\.il/.*\b(?:Vi|docID)=(?P<id>\d+)'      _TESTS = [          {              'url': 'http://vod.sport5.co.il/?Vc=147&Vi=176331&Page=1', diff --git a/youtube_dl/extractor/ssa.py b/youtube_dl/extractor/ssa.py index 13101c714..54d1843f2 100644 --- a/youtube_dl/extractor/ssa.py +++ b/youtube_dl/extractor/ssa.py @@ -8,7 +8,7 @@ from ..utils import (  class SSAIE(InfoExtractor): -    _VALID_URL = r'http://ssa\.nls\.uk/film/(?P<id>\d+)' +    _VALID_URL = r'https?://ssa\.nls\.uk/film/(?P<id>\d+)'      _TEST = {          'url': 'http://ssa.nls.uk/film/3561',          'info_dict': { diff --git a/youtube_dl/extractor/sztvhu.py b/youtube_dl/extractor/sztvhu.py index aa5964acb..f562aa6d3 100644 --- a/youtube_dl/extractor/sztvhu.py +++ b/youtube_dl/extractor/sztvhu.py @@ -5,7 +5,7 @@ from .common import InfoExtractor  class SztvHuIE(InfoExtractor): -    _VALID_URL = r'http://(?:(?:www\.)?sztv\.hu|www\.tvszombathely\.hu)/(?:[^/]+)/.+-(?P<id>[0-9]+)' +    _VALID_URL = r'https?://(?:(?:www\.)?sztv\.hu|www\.tvszombathely\.hu)/(?:[^/]+)/.+-(?P<id>[0-9]+)'      _TEST = {          'url': 'http://sztv.hu/hirek/cserkeszek-nepszerusitettek-a-kornyezettudatos-eletmodot-a-savaria-teren-20130909',          'md5': 'a6df607b11fb07d0e9f2ad94613375cb', diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py index d1b7264b4..b49ab5f5b 100644 --- a/youtube_dl/extractor/teamcoco.py +++ b/youtube_dl/extractor/teamcoco.py @@ -16,7 +16,7 @@ from ..compat import compat_ord  class TeamcocoIE(InfoExtractor): -    _VALID_URL = r'http://teamcoco\.com/video/(?P<video_id>[0-9]+)?/?(?P<display_id>.*)' +    _VALID_URL = r'https?://teamcoco\.com/video/(?P<video_id>[0-9]+)?/?(?P<display_id>.*)'      _TESTS = [          {              'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant', diff --git a/youtube_dl/extractor/tele13.py b/youtube_dl/extractor/tele13.py index 4e860db0a..a29a64b6d 100644 --- a/youtube_dl/extractor/tele13.py +++ b/youtube_dl/extractor/tele13.py @@ -11,7 +11,7 @@ from ..utils import (  class Tele13IE(InfoExtractor): -    _VALID_URL = r'^http://(?:www\.)?t13\.cl/videos(?:/[^/]+)+/(?P<id>[\w-]+)' +    _VALID_URL = r'^https?://(?:www\.)?t13\.cl/videos(?:/[^/]+)+/(?P<id>[\w-]+)'      _TESTS = [          {              'url': 'http://www.t13.cl/videos/actualidad/el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda', diff --git a/youtube_dl/extractor/tf1.py b/youtube_dl/extractor/tf1.py index 9ee844684..3f54b2744 100644 --- a/youtube_dl/extractor/tf1.py +++ b/youtube_dl/extractor/tf1.py @@ -6,7 +6,7 @@ from .common import InfoExtractor  class TF1IE(InfoExtractor):      """TF1 uses the wat.tv player.""" -    _VALID_URL = r'http://(?:(?:videos|www|lci)\.tf1|www\.tfou)\.fr/(?:[^/]+/)*(?P<id>.+?)\.html' +    _VALID_URL = r'https?://(?:(?:videos|www|lci)\.tf1|www\.tfou)\.fr/(?:[^/]+/)*(?P<id>.+?)\.html'      _TESTS = [{          'url': 'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',          'info_dict': { diff --git a/youtube_dl/extractor/thescene.py b/youtube_dl/extractor/thescene.py new file mode 100644 index 000000000..08d666eaf --- /dev/null +++ b/youtube_dl/extractor/thescene.py @@ -0,0 +1,48 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor + +from ..compat import compat_urllib_parse +from ..utils import qualities + + +class TheSceneIE(InfoExtractor): +    _VALID_URL = r'https://thescene\.com/watch/[^/]+/(?P<id>[^/#?]+)' + +    _TEST = { +        'url': 'https://thescene.com/watch/vogue/narciso-rodriguez-spring-2013-ready-to-wear', +        'info_dict': { +            'id': '520e8faac2b4c00e3c6e5f43', +            'ext': 'mp4', +            'title': 'Narciso Rodriguez: Spring 2013 Ready-to-Wear', +            'display_id': 'narciso-rodriguez-spring-2013-ready-to-wear', +        }, +    } + +    def _real_extract(self, url): +        display_id = self._match_id(url) +        webpage = self._download_webpage(url, display_id) +        player_url = compat_urllib_parse.urljoin( +            url, +            self._html_search_regex( +                r'id=\'js-player-script\'[^>]+src=\'(.+?)\'', webpage, 'player url')) + +        self.to_screen(player_url) +        player = self._download_webpage(player_url, player_url) +        info = self._parse_json(self._search_regex(r'(?m)var\s+video\s+=\s+({.+?});$', player, 'info json'), display_id) + +        qualities_order = qualities(['low', 'high']) +        formats = [{ +            'format_id': '{0}-{1}'.format(f['type'].split('/')[0], f['quality']), +            'url': f['src'], +            'quality': qualities_order(f['quality']), +        } for f in info['sources'][0]] +        self._sort_formats(formats) + +        return { +            'id': info['id'], +            'title': info['title'], +            'formats': formats, +            'thumbnail': info.get('poster_frame'), +            'display_id': display_id, +        } diff --git a/youtube_dl/extractor/thvideo.py b/youtube_dl/extractor/thvideo.py index 496f15d80..406f4a826 100644 --- a/youtube_dl/extractor/thvideo.py +++ b/youtube_dl/extractor/thvideo.py @@ -10,7 +10,7 @@ from ..utils import (  class THVideoIE(InfoExtractor): -    _VALID_URL = r'http://(?:www\.)?thvideo\.tv/(?:v/th|mobile\.php\?cid=)(?P<id>[0-9]+)' +    _VALID_URL = r'https?://(?:www\.)?thvideo\.tv/(?:v/th|mobile\.php\?cid=)(?P<id>[0-9]+)'      _TEST = {          'url': 'http://thvideo.tv/v/th1987/',          'md5': 'fa107b1f73817e325e9433505a70db50', diff --git a/youtube_dl/extractor/tinypic.py b/youtube_dl/extractor/tinypic.py index e036b8cdf..c43cace24 100644 --- a/youtube_dl/extractor/tinypic.py +++ b/youtube_dl/extractor/tinypic.py @@ -9,7 +9,7 @@ from ..utils import ExtractorError  class TinyPicIE(InfoExtractor):      IE_NAME = 'tinypic'      IE_DESC = 'tinypic.com videos' -    _VALID_URL = r'http://(?:.+?\.)?tinypic\.com/player\.php\?v=(?P<id>[^&]+)&s=\d+' +    _VALID_URL = r'https?://(?:.+?\.)?tinypic\.com/player\.php\?v=(?P<id>[^&]+)&s=\d+'      _TESTS = [          { diff --git a/youtube_dl/extractor/tlc.py b/youtube_dl/extractor/tlc.py index 17add9543..abad3ff64 100644 --- a/youtube_dl/extractor/tlc.py +++ b/youtube_dl/extractor/tlc.py @@ -9,7 +9,7 @@ from ..compat import compat_parse_qs  class TlcDeIE(InfoExtractor):      IE_NAME = 'tlc.de' -    _VALID_URL = r'http://www\.tlc\.de/(?:[^/]+/)*videos/(?P<title>[^/?#]+)?(?:.*#(?P<id>\d+))?' +    _VALID_URL = r'https?://www\.tlc\.de/(?:[^/]+/)*videos/(?P<title>[^/?#]+)?(?:.*#(?P<id>\d+))?'      _TEST = {          'url': 'http://www.tlc.de/sendungen/breaking-amish/videos/#3235167922001', diff --git a/youtube_dl/extractor/toypics.py b/youtube_dl/extractor/toypics.py index 2756f56d3..2579ba8c6 100644 --- a/youtube_dl/extractor/toypics.py +++ b/youtube_dl/extractor/toypics.py @@ -41,7 +41,7 @@ class ToypicsIE(InfoExtractor):  class ToypicsUserIE(InfoExtractor):      IE_DESC = 'Toypics user profile' -    _VALID_URL = r'http://videos\.toypics\.net/(?P<username>[^/?]+)(?:$|[?#])' +    _VALID_URL = r'https?://videos\.toypics\.net/(?P<username>[^/?]+)(?:$|[?#])'      _TEST = {          'url': 'http://videos.toypics.net/Mikey',          'info_dict': { diff --git a/youtube_dl/extractor/traileraddict.py b/youtube_dl/extractor/traileraddict.py index 0e01b15fc..747370d12 100644 --- a/youtube_dl/extractor/traileraddict.py +++ b/youtube_dl/extractor/traileraddict.py @@ -7,7 +7,7 @@ from .common import InfoExtractor  class TrailerAddictIE(InfoExtractor):      _WORKING = False -    _VALID_URL = r'(?:http://)?(?:www\.)?traileraddict\.com/(?:trailer|clip)/(?P<movie>.+?)/(?P<trailer_name>.+)' +    _VALID_URL = r'(?:https?://)?(?:www\.)?traileraddict\.com/(?:trailer|clip)/(?P<movie>.+?)/(?P<trailer_name>.+)'      _TEST = {          'url': 'http://www.traileraddict.com/trailer/prince-avalanche/trailer',          'md5': '41365557f3c8c397d091da510e73ceb4', diff --git a/youtube_dl/extractor/trollvids.py b/youtube_dl/extractor/trollvids.py index d239949a6..657705623 100644 --- a/youtube_dl/extractor/trollvids.py +++ b/youtube_dl/extractor/trollvids.py @@ -7,7 +7,7 @@ from .nuevo import NuevoBaseIE  class TrollvidsIE(NuevoBaseIE): -    _VALID_URL = r'http://(?:www\.)?trollvids\.com/video/(?P<id>\d+)/(?P<display_id>[^/?#&]+)' +    _VALID_URL = r'https?://(?:www\.)?trollvids\.com/video/(?P<id>\d+)/(?P<display_id>[^/?#&]+)'      IE_NAME = 'trollvids'      _TEST = {          'url': 'http://trollvids.com/video/2349002/%E3%80%90MMD-R-18%E3%80%91%E3%82%AC%E3%83%BC%E3%83%AB%E3%83%95%E3%83%AC%E3%83%B3%E3%83%89-carrymeoff', diff --git a/youtube_dl/extractor/tudou.py b/youtube_dl/extractor/tudou.py index f56b66d06..9892e8a62 100644 --- a/youtube_dl/extractor/tudou.py +++ b/youtube_dl/extractor/tudou.py @@ -6,6 +6,7 @@ from .common import InfoExtractor  from ..compat import compat_str  from ..utils import (      int_or_none, +    InAdvancePagedList,      float_or_none,      unescapeHTML,  ) @@ -75,15 +76,16 @@ class TudouIE(InfoExtractor):          quality = sorted(filter(lambda k: k.isdigit(), segments.keys()),                           key=lambda k: int(k))[-1]          parts = segments[quality] -        result = []          len_parts = len(parts)          if len_parts > 1:              self.to_screen('%s: found %s parts' % (video_id, len_parts)) -        for part in parts: + +        def part_func(partnum): +            part = parts[partnum]              part_id = part['k']              final_url = self._url_for_id(part_id, quality)              ext = (final_url.split('?')[0]).split('.')[-1] -            part_info = { +            return [{                  'id': '%s' % part_id,                  'url': final_url,                  'ext': ext, @@ -97,12 +99,13 @@ class TudouIE(InfoExtractor):                  'http_headers': {                      'Referer': self._PLAYER_URL,                  }, -            } -            result.append(part_info) +            }] + +        entries = InAdvancePagedList(part_func, len_parts, 1)          return {              '_type': 'multi_video', -            'entries': result, +            'entries': entries,              'id': video_id,              'title': title,          } diff --git a/youtube_dl/extractor/tumblr.py b/youtube_dl/extractor/tumblr.py index 4f844706d..cea117c79 100644 --- a/youtube_dl/extractor/tumblr.py +++ b/youtube_dl/extractor/tumblr.py @@ -8,7 +8,7 @@ from ..utils import int_or_none  class TumblrIE(InfoExtractor): -    _VALID_URL = r'http://(?P<blog_name>.*?)\.tumblr\.com/(?:post|video)/(?P<id>[0-9]+)(?:$|[/?#])' +    _VALID_URL = r'https?://(?P<blog_name>.*?)\.tumblr\.com/(?:post|video)/(?P<id>[0-9]+)(?:$|[/?#])'      _TESTS = [{          'url': 'http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes',          'md5': '479bb068e5b16462f5176a6828829767', diff --git a/youtube_dl/extractor/tunein.py b/youtube_dl/extractor/tunein.py index 8322cc14d..ae4cfaec2 100644 --- a/youtube_dl/extractor/tunein.py +++ b/youtube_dl/extractor/tunein.py @@ -1,7 +1,7 @@  # coding: utf-8  from __future__ import unicode_literals -import json +import re  from .common import InfoExtractor  from ..utils import ExtractorError @@ -27,10 +27,9 @@ class TuneInBaseIE(InfoExtractor):          if not streams_url.startswith('http://'):              streams_url = compat_urlparse.urljoin(url, streams_url) -        stream_data = self._download_webpage( -            streams_url, content_id, note='Downloading stream data') -        streams = json.loads(self._search_regex( -            r'\((.*)\);', stream_data, 'stream info'))['Streams'] +        streams = self._download_json( +            streams_url, content_id, note='Downloading stream data', +            transform_source=lambda s: re.sub(r'^\s*\((.*)\);\s*$', r'\1', s))['Streams']          is_live = None          formats = [] diff --git a/youtube_dl/extractor/tv2.py b/youtube_dl/extractor/tv2.py index 1457e524e..86bb7915d 100644 --- a/youtube_dl/extractor/tv2.py +++ b/youtube_dl/extractor/tv2.py @@ -14,7 +14,7 @@ from ..utils import (  class TV2IE(InfoExtractor): -    _VALID_URL = 'http://(?:www\.)?tv2\.no/v/(?P<id>\d+)' +    _VALID_URL = r'https?://(?:www\.)?tv2\.no/v/(?P<id>\d+)'      _TEST = {          'url': 'http://www.tv2.no/v/916509/',          'info_dict': { @@ -100,7 +100,7 @@ class TV2IE(InfoExtractor):  class TV2ArticleIE(InfoExtractor): -    _VALID_URL = 'http://(?:www\.)?tv2\.no/(?:a|\d{4}/\d{2}/\d{2}(/[^/]+)+)/(?P<id>\d+)' +    _VALID_URL = r'https?://(?:www\.)?tv2\.no/(?:a|\d{4}/\d{2}/\d{2}(/[^/]+)+)/(?P<id>\d+)'      _TESTS = [{          'url': 'http://www.tv2.no/2015/05/16/nyheter/alesund/krim/pingvin/6930542',          'info_dict': { diff --git a/youtube_dl/extractor/tvc.py b/youtube_dl/extractor/tvc.py index 3a4f393fc..4065354dd 100644 --- a/youtube_dl/extractor/tvc.py +++ b/youtube_dl/extractor/tvc.py @@ -11,7 +11,7 @@ from ..utils import (  class TVCIE(InfoExtractor): -    _VALID_URL = r'http://(?:www\.)?tvc\.ru/video/iframe/id/(?P<id>\d+)' +    _VALID_URL = r'https?://(?:www\.)?tvc\.ru/video/iframe/id/(?P<id>\d+)'      _TEST = {          'url': 'http://www.tvc.ru/video/iframe/id/74622/isPlay/false/id_stat/channel/?acc_video_id=/channel/brand/id/17/show/episodes/episode_id/39702',          'md5': 'bbc5ff531d1e90e856f60fc4b3afd708', @@ -64,7 +64,7 @@ class TVCIE(InfoExtractor):  class TVCArticleIE(InfoExtractor): -    _VALID_URL = r'http://(?:www\.)?tvc\.ru/(?!video/iframe/id/)(?P<id>[^?#]+)' +    _VALID_URL = r'https?://(?:www\.)?tvc\.ru/(?!video/iframe/id/)(?P<id>[^?#]+)'      _TESTS = [{          'url': 'http://www.tvc.ru/channel/brand/id/29/show/episodes/episode_id/39702/',          'info_dict': { diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index b4683de54..df70a6b23 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -13,7 +13,7 @@ from ..utils import (  class TVPlayIE(InfoExtractor):      IE_DESC = 'TV3Play and related services' -    _VALID_URL = r'''(?x)http://(?:www\.)? +    _VALID_URL = r'''(?x)https?://(?:www\.)?          (?:tvplay\.lv/parraides|             tv3play\.lt/programos|             play\.tv3\.lt/programos| diff --git a/youtube_dl/extractor/ubu.py b/youtube_dl/extractor/ubu.py index d50237758..1d52cbc98 100644 --- a/youtube_dl/extractor/ubu.py +++ b/youtube_dl/extractor/ubu.py @@ -10,7 +10,7 @@ from ..utils import (  class UbuIE(InfoExtractor): -    _VALID_URL = r'http://(?:www\.)?ubu\.com/film/(?P<id>[\da-z_-]+)\.html' +    _VALID_URL = r'https?://(?:www\.)?ubu\.com/film/(?P<id>[\da-z_-]+)\.html'      _TEST = {          'url': 'http://ubu.com/film/her_noise.html',          'md5': '138d5652618bf0f03878978db9bef1ee', diff --git a/youtube_dl/extractor/unistra.py b/youtube_dl/extractor/unistra.py index 594bee4f9..66d9f1bf3 100644 --- a/youtube_dl/extractor/unistra.py +++ b/youtube_dl/extractor/unistra.py @@ -7,7 +7,7 @@ from ..utils import qualities  class UnistraIE(InfoExtractor): -    _VALID_URL = r'http://utv\.unistra\.fr/(?:index|video)\.php\?id_video\=(?P<id>\d+)' +    _VALID_URL = r'https?://utv\.unistra\.fr/(?:index|video)\.php\?id_video\=(?P<id>\d+)'      _TESTS = [          { diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py index 3794bcded..b755dda90 100644 --- a/youtube_dl/extractor/vbox7.py +++ b/youtube_dl/extractor/vbox7.py @@ -13,7 +13,7 @@ from ..utils import (  class Vbox7IE(InfoExtractor): -    _VALID_URL = r'http://(?:www\.)?vbox7\.com/play:(?P<id>[^/]+)' +    _VALID_URL = r'https?://(?:www\.)?vbox7\.com/play:(?P<id>[^/]+)'      _TEST = {          'url': 'http://vbox7.com/play:249bb972c2',          'md5': '99f65c0c9ef9b682b97313e052734c3f', diff --git a/youtube_dl/extractor/veoh.py b/youtube_dl/extractor/veoh.py index 9633f7ffe..23ce0a0d1 100644 --- a/youtube_dl/extractor/veoh.py +++ b/youtube_dl/extractor/veoh.py @@ -12,7 +12,7 @@ from ..utils import (  class VeohIE(InfoExtractor): -    _VALID_URL = r'http://(?:www\.)?veoh\.com/(?:watch|iphone/#_Watch)/(?P<id>(?:v|yapi-)[\da-zA-Z]+)' +    _VALID_URL = r'https?://(?:www\.)?veoh\.com/(?:watch|iphone/#_Watch)/(?P<id>(?:v|yapi-)[\da-zA-Z]+)'      _TESTS = [          { diff --git a/youtube_dl/extractor/vesti.py b/youtube_dl/extractor/vesti.py index a0c59a2e0..cb64ae0bd 100644 --- a/youtube_dl/extractor/vesti.py +++ b/youtube_dl/extractor/vesti.py @@ -10,7 +10,7 @@ from .rutv import RUTVIE  class VestiIE(InfoExtractor):      IE_DESC = 'Вести.Ru' -    _VALID_URL = r'http://(?:.+?\.)?vesti\.ru/(?P<id>.+)' +    _VALID_URL = r'https?://(?:.+?\.)?vesti\.ru/(?P<id>.+)'      _TESTS = [          { diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py index e148b1ef5..b11cd254c 100644 --- a/youtube_dl/extractor/vgtv.py +++ b/youtube_dl/extractor/vgtv.py @@ -214,7 +214,7 @@ class VGTVIE(XstreamIE):  class BTArticleIE(InfoExtractor):      IE_NAME = 'bt:article'      IE_DESC = 'Bergens Tidende Articles' -    _VALID_URL = 'http://(?:www\.)?bt\.no/(?:[^/]+/)+(?P<id>[^/]+)-\d+\.html' +    _VALID_URL = r'https?://(?:www\.)?bt\.no/(?:[^/]+/)+(?P<id>[^/]+)-\d+\.html'      _TEST = {          'url': 'http://www.bt.no/nyheter/lokalt/Kjemper-for-internatet-1788214.html',          'md5': '2acbe8ad129b3469d5ae51b1158878df', @@ -241,7 +241,7 @@ class BTArticleIE(InfoExtractor):  class BTVestlendingenIE(InfoExtractor):      IE_NAME = 'bt:vestlendingen'      IE_DESC = 'Bergens Tidende - Vestlendingen' -    _VALID_URL = 'http://(?:www\.)?bt\.no/spesial/vestlendingen/#!/(?P<id>\d+)' +    _VALID_URL = r'https?://(?:www\.)?bt\.no/spesial/vestlendingen/#!/(?P<id>\d+)'      _TESTS = [{          'url': 'http://www.bt.no/spesial/vestlendingen/#!/86588',          'md5': 'd7d17e3337dc80de6d3a540aefbe441b', diff --git a/youtube_dl/extractor/videott.py b/youtube_dl/extractor/videott.py index 2cd36508a..0f798711b 100644 --- a/youtube_dl/extractor/videott.py +++ b/youtube_dl/extractor/videott.py @@ -14,7 +14,7 @@ class VideoTtIE(InfoExtractor):      _WORKING = False      ID_NAME = 'video.tt'      IE_DESC = 'video.tt - Your True Tube' -    _VALID_URL = r'http://(?:www\.)?video\.tt/(?:(?:video|embed)/|watch_video\.php\?v=)(?P<id>[\da-zA-Z]{9})' +    _VALID_URL = r'https?://(?:www\.)?video\.tt/(?:(?:video|embed)/|watch_video\.php\?v=)(?P<id>[\da-zA-Z]{9})'      _TESTS = [{          'url': 'http://www.video.tt/watch_video.php?v=amd5YujV8', diff --git a/youtube_dl/extractor/viidea.py b/youtube_dl/extractor/viidea.py index 315984bf9..03b9f1353 100644 --- a/youtube_dl/extractor/viidea.py +++ b/youtube_dl/extractor/viidea.py @@ -15,7 +15,7 @@ from ..utils import (  class ViideaIE(InfoExtractor): -    _VALID_URL = r'''(?x)http://(?:www\.)?(?: +    _VALID_URL = r'''(?x)https?://(?:www\.)?(?:              videolectures\.net|              flexilearn\.viidea\.net|              presentations\.ocwconsortium\.org| diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py index 9e2aa58bd..bd5545173 100644 --- a/youtube_dl/extractor/vlive.py +++ b/youtube_dl/extractor/vlive.py @@ -64,7 +64,7 @@ class VLiveIE(InfoExtractor):          thumbnail = self._og_search_thumbnail(webpage)          creator = self._html_search_regex( -            r'<div[^>]+class="info_area"[^>]*>\s*<strong[^>]+class="name"[^>]*>([^<]+)</strong>', +            r'<div[^>]+class="info_area"[^>]*>\s*<a\s+[^>]*>([^<]+)',              webpage, 'creator', fatal=False)          view_count = int_or_none(playinfo.get('meta', {}).get('count')) diff --git a/youtube_dl/extractor/vube.py b/youtube_dl/extractor/vube.py index 149e36467..10ca6acb1 100644 --- a/youtube_dl/extractor/vube.py +++ b/youtube_dl/extractor/vube.py @@ -15,7 +15,7 @@ from ..utils import (  class VubeIE(InfoExtractor):      IE_NAME = 'vube'      IE_DESC = 'Vube.com' -    _VALID_URL = r'http://vube\.com/(?:[^/]+/)+(?P<id>[\da-zA-Z]{10})\b' +    _VALID_URL = r'https?://vube\.com/(?:[^/]+/)+(?P<id>[\da-zA-Z]{10})\b'      _TESTS = [          { diff --git a/youtube_dl/extractor/vuclip.py b/youtube_dl/extractor/vuclip.py index a6d9b5fee..eaa888f00 100644 --- a/youtube_dl/extractor/vuclip.py +++ b/youtube_dl/extractor/vuclip.py @@ -14,7 +14,7 @@ from ..utils import (  class VuClipIE(InfoExtractor): -    _VALID_URL = r'http://(?:m\.)?vuclip\.com/w\?.*?cid=(?P<id>[0-9]+)' +    _VALID_URL = r'https?://(?:m\.)?vuclip\.com/w\?.*?cid=(?P<id>[0-9]+)'      _TEST = {          'url': 'http://m.vuclip.com/w?cid=922692425&fid=70295&z=1010&nvar&frm=index.html', diff --git a/youtube_dl/extractor/walla.py b/youtube_dl/extractor/walla.py index 24efbd6e6..8b9488340 100644 --- a/youtube_dl/extractor/walla.py +++ b/youtube_dl/extractor/walla.py @@ -11,7 +11,7 @@ from ..utils import (  class WallaIE(InfoExtractor): -    _VALID_URL = r'http://vod\.walla\.co\.il/[^/]+/(?P<id>\d+)/(?P<display_id>.+)' +    _VALID_URL = r'https?://vod\.walla\.co\.il/[^/]+/(?P<id>\d+)/(?P<display_id>.+)'      _TEST = {          'url': 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one',          'info_dict': { diff --git a/youtube_dl/extractor/wat.py b/youtube_dl/extractor/wat.py index 37cf3d309..5227bb5ad 100644 --- a/youtube_dl/extractor/wat.py +++ b/youtube_dl/extractor/wat.py @@ -12,7 +12,7 @@ from ..utils import (  class WatIE(InfoExtractor): -    _VALID_URL = r'(?:wat:(?P<real_id>\d{8})|http://www\.wat\.tv/video/(?P<display_id>.*)-(?P<short_id>.*?)_.*?\.html)' +    _VALID_URL = r'(?:wat:(?P<real_id>\d{8})|https?://www\.wat\.tv/video/(?P<display_id>.*)-(?P<short_id>.*?)_.*?\.html)'      IE_NAME = 'wat.tv'      _TESTS = [          { diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index a851578e0..31c904303 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -244,7 +244,7 @@ class WDRMobileIE(InfoExtractor):  class WDRMausIE(InfoExtractor): -    _VALID_URL = 'http://(?:www\.)?wdrmaus\.de/(?:[^/]+/){,2}(?P<id>[^/?#]+)(?:/index\.php5|(?<!index)\.php5|/(?:$|[?#]))' +    _VALID_URL = r'https?://(?:www\.)?wdrmaus\.de/(?:[^/]+/){,2}(?P<id>[^/?#]+)(?:/index\.php5|(?<!index)\.php5|/(?:$|[?#]))'      IE_DESC = 'Sendung mit der Maus'      _TESTS = [{          'url': 'http://www.wdrmaus.de/aktuelle-sendung/index.php5', diff --git a/youtube_dl/extractor/weiqitv.py b/youtube_dl/extractor/weiqitv.py index e333ae345..3dafbeec2 100644 --- a/youtube_dl/extractor/weiqitv.py +++ b/youtube_dl/extractor/weiqitv.py @@ -6,7 +6,7 @@ from .common import InfoExtractor  class WeiqiTVIE(InfoExtractor):      IE_DESC = 'WQTV' -    _VALID_URL = r'http://www\.weiqitv\.com/index/video_play\?videoId=(?P<id>[A-Za-z0-9]+)' +    _VALID_URL = r'https?://www\.weiqitv\.com/index/video_play\?videoId=(?P<id>[A-Za-z0-9]+)'      _TESTS = [{          'url': 'http://www.weiqitv.com/index/video_play?videoId=53c744f09874f0e76a8b46f3', diff --git a/youtube_dl/extractor/wimp.py b/youtube_dl/extractor/wimp.py index fb0accac7..828c03dc3 100644 --- a/youtube_dl/extractor/wimp.py +++ b/youtube_dl/extractor/wimp.py @@ -5,7 +5,7 @@ from .youtube import YoutubeIE  class WimpIE(InfoExtractor): -    _VALID_URL = r'http://(?:www\.)?wimp\.com/(?P<id>[^/]+)' +    _VALID_URL = r'https?://(?:www\.)?wimp\.com/(?P<id>[^/]+)'      _TESTS = [{          'url': 'http://www.wimp.com/maruexhausted/',          'md5': 'ee21217ffd66d058e8b16be340b74883', diff --git a/youtube_dl/extractor/xbef.py b/youtube_dl/extractor/xbef.py index 4ff99e5ca..e4a2baad2 100644 --- a/youtube_dl/extractor/xbef.py +++ b/youtube_dl/extractor/xbef.py @@ -5,7 +5,7 @@ from ..compat import compat_urllib_parse_unquote  class XBefIE(InfoExtractor): -    _VALID_URL = r'http://(?:www\.)?xbef\.com/video/(?P<id>[0-9]+)' +    _VALID_URL = r'https?://(?:www\.)?xbef\.com/video/(?P<id>[0-9]+)'      _TEST = {          'url': 'http://xbef.com/video/5119-glamourous-lesbians-smoking-drinking-and-fucking',          'md5': 'a478b565baff61634a98f5e5338be995', diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index fd43e8854..b3547174d 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -4,6 +4,7 @@ import re  from .common import InfoExtractor  from ..utils import ( +    dict_get,      float_or_none,      int_or_none,      unified_strdate, @@ -170,6 +171,12 @@ class XHamsterEmbedIE(InfoExtractor):          video_url = self._search_regex(              r'href="(https?://xhamster\.com/movies/%s/[^"]+\.html[^"]*)"' % video_id, -            webpage, 'xhamster url') +            webpage, 'xhamster url', default=None) + +        if not video_url: +            vars = self._parse_json( +                self._search_regex(r'vars\s*:\s*({.+?})\s*,\s*\n', webpage, 'vars'), +                video_id) +            video_url = dict_get(vars, ('downloadLink', 'homepageLink', 'commentsLink', 'shareUrl'))          return self.url_result(video_url, 'XHamster') diff --git a/youtube_dl/extractor/yam.py b/youtube_dl/extractor/yam.py index 001ee17b6..63bbc0634 100644 --- a/youtube_dl/extractor/yam.py +++ b/youtube_dl/extractor/yam.py @@ -15,7 +15,7 @@ from ..utils import (  class YamIE(InfoExtractor):      IE_DESC = '蕃薯藤yam天空部落' -    _VALID_URL = r'http://mymedia.yam.com/m/(?P<id>\d+)' +    _VALID_URL = r'https?://mymedia.yam.com/m/(?P<id>\d+)'      _TESTS = [{          # An audio hosted on Yam diff --git a/youtube_dl/extractor/ynet.py b/youtube_dl/extractor/ynet.py index 869f3e819..2522551dc 100644 --- a/youtube_dl/extractor/ynet.py +++ b/youtube_dl/extractor/ynet.py @@ -9,7 +9,7 @@ from ..compat import compat_urllib_parse_unquote_plus  class YnetIE(InfoExtractor): -    _VALID_URL = r'http://(?:.+?\.)?ynet\.co\.il/(?:.+?/)?0,7340,(?P<id>L(?:-[0-9]+)+),00\.html' +    _VALID_URL = r'https?://(?:.+?\.)?ynet\.co\.il/(?:.+?/)?0,7340,(?P<id>L(?:-[0-9]+)+),00\.html'      _TESTS = [          {              'url': 'http://hot.ynet.co.il/home/0,7340,L-11659-99244,00.html', diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 822728afc..7819f14ab 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -724,7 +724,7 @@ def parseOpts(overrideArguments=None):      postproc.add_option(          '--embed-subs',          action='store_true', dest='embedsubtitles', default=False, -        help='Embed subtitles in the video (only for mkv and mp4 videos)') +        help='Embed subtitles in the video (only for mp4, webm and mkv videos)')      postproc.add_option(          '--embed-thumbnail',          action='store_true', dest='embedthumbnail', default=False, diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index a8819f258..06b8c0548 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -331,17 +331,34 @@ class FFmpegVideoConvertorPP(FFmpegPostProcessor):  class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):      def run(self, information): -        if information['ext'] not in ['mp4', 'mkv']: -            self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4 or mkv files') +        if information['ext'] not in ('mp4', 'webm', 'mkv'): +            self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4, webm or mkv files')              return [], information          subtitles = information.get('requested_subtitles')          if not subtitles:              self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to embed')              return [], information -        sub_langs = list(subtitles.keys())          filename = information['filepath'] -        sub_filenames = [subtitles_filename(filename, lang, sub_info['ext']) for lang, sub_info in subtitles.items()] + +        ext = information['ext'] +        sub_langs = [] +        sub_filenames = [] +        webm_vtt_warn = False + +        for lang, sub_info in subtitles.items(): +            sub_ext = sub_info['ext'] +            if ext != 'webm' or ext == 'webm' and sub_ext == 'vtt': +                sub_langs.append(lang) +                sub_filenames.append(subtitles_filename(filename, lang, sub_ext)) +            else: +                if not webm_vtt_warn and ext == 'webm' and sub_ext != 'vtt': +                    webm_vtt_warn = True +                    self._downloader.to_screen('[ffmpeg] Only WebVTT subtitles can be embedded in webm files') + +        if not sub_langs: +            return [], information +          input_files = [filename] + sub_filenames          opts = [ diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 067b8a184..03bb7782f 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1746,6 +1746,7 @@ def escape_url(url):      """Escape URL as suggested by RFC 3986"""      url_parsed = compat_urllib_parse_urlparse(url)      return url_parsed._replace( +        netloc=url_parsed.netloc.encode('idna').decode('ascii'),          path=escape_rfc3986(url_parsed.path),          params=escape_rfc3986(url_parsed.params),          query=escape_rfc3986(url_parsed.query),  | 
