diff options
| -rw-r--r-- | test/test_subtitles.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/bilibili.py | 20 | ||||
| -rw-r--r-- | youtube_dl/extractor/cinemassacre.py | 13 | ||||
| -rw-r--r-- | youtube_dl/extractor/dailymotion.py | 5 | ||||
| -rw-r--r-- | youtube_dl/extractor/facebook.py | 11 | ||||
| -rw-r--r-- | youtube_dl/extractor/firedrive.py | 80 | ||||
| -rw-r--r-- | youtube_dl/extractor/naver.py | 24 | ||||
| -rw-r--r-- | youtube_dl/extractor/pornhub.py | 3 | ||||
| -rw-r--r-- | youtube_dl/extractor/sockshare.py | 83 | ||||
| -rw-r--r-- | youtube_dl/extractor/spankwire.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/teamcoco.py | 3 | ||||
| -rw-r--r-- | youtube_dl/extractor/tf1.py | 9 | 
13 files changed, 70 insertions, 187 deletions
| diff --git a/test/test_subtitles.py b/test/test_subtitles.py index 891ee620b..c4e3adb67 100644 --- a/test/test_subtitles.py +++ b/test/test_subtitles.py @@ -266,7 +266,7 @@ class TestNRKSubtitles(BaseTestSubtitles):          self.DL.params['allsubtitles'] = True          subtitles = self.getSubtitles()          self.assertEqual(set(subtitles.keys()), set(['no'])) -        self.assertEqual(md5(subtitles['no']), '1d221e6458c95c5494dcd38e6a1f129a') +        self.assertEqual(md5(subtitles['no']), '544fa917d3197fcbee64634559221cc2')  class TestRaiSubtitles(BaseTestSubtitles): diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 17248ccea..3c1e8d526 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -149,7 +149,6 @@ from .extremetube import ExtremeTubeIE  from .facebook import FacebookIE  from .faz import FazIE  from .fc2 import FC2IE -from .firedrive import FiredriveIE  from .firstpost import FirstpostIE  from .firsttv import FirstTVIE  from .fivemin import FiveMinIE @@ -480,7 +479,6 @@ from .smotri import (      SmotriBroadcastIE,  )  from .snotr import SnotrIE -from .sockshare import SockshareIE  from .sohu import SohuIE  from .soundcloud import (      SoundcloudIE, diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 7ca835e31..2103ed73a 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -3,6 +3,8 @@ from __future__ import unicode_literals  import re  import itertools +import json +import xml.etree.ElementTree as ET  from .common import InfoExtractor  from ..utils import ( @@ -67,11 +69,19 @@ class BiliBiliIE(InfoExtractor):          entries = [] -        lq_doc = self._download_xml( +        lq_page = self._download_webpage(              'http://interface.bilibili.com/v_cdn_play?appkey=1&cid=%s' % cid,              video_id,              note='Downloading LQ video info'          ) +        try: +            err_info = json.loads(lq_page) +            raise ExtractorError( +                'BiliBili said: ' + err_info['error_text'], expected=True) +        except ValueError: +            pass + +        lq_doc = ET.fromstring(lq_page)          lq_durls = lq_doc.findall('./durl')          hq_doc = self._download_xml( @@ -80,9 +90,11 @@ class BiliBiliIE(InfoExtractor):              note='Downloading HQ video info',              fatal=False,          ) -        hq_durls = hq_doc.findall('./durl') if hq_doc is not False else itertools.repeat(None) - -        assert len(lq_durls) == len(hq_durls) +        if hq_doc is not False: +            hq_durls = hq_doc.findall('./durl') +            assert len(lq_durls) == len(hq_durls) +        else: +            hq_durls = itertools.repeat(None)          i = 1          for lq_durl, hq_durl in zip(lq_durls, hq_durls): diff --git a/youtube_dl/extractor/cinemassacre.py b/youtube_dl/extractor/cinemassacre.py index cf0a7551b..c949a4814 100644 --- a/youtube_dl/extractor/cinemassacre.py +++ b/youtube_dl/extractor/cinemassacre.py @@ -60,6 +60,17 @@ class CinemassacreIE(InfoExtractor):                  'uploader_id': 'Cinemassacre',                  'title': 'AVGN: McKids',              } +        }, +        { +            'url': 'http://cinemassacre.com/2015/05/25/mario-kart-64-nintendo-64-james-mike-mondays/', +            'md5': '1376908e49572389e7b06251a53cdd08', +            'info_dict': { +                'id': 'Cinemassacre-555779690c440', +                'ext': 'mp4', +                'description': 'Let’s Play Mario Kart 64 !! Mario Kart 64 is a classic go-kart racing game released for the Nintendo 64 (N64). Today James & Mike do 4 player Battle Mode with Kyle and Bootsy!', +                'title': 'Mario Kart 64 (Nintendo 64) James & Mike Mondays', +                'upload_date': '20150525', +            }          }      ] @@ -72,7 +83,7 @@ class CinemassacreIE(InfoExtractor):          playerdata_url = self._search_regex(              [ -                r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"', +                r'src="(http://(?:player2\.screenwavemedia\.com|player\.screenwavemedia\.com/play)/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',                  r'<iframe[^>]+src="((?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"',              ],              webpage, 'player data URL', default=None) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index db10b8d00..70aa4333c 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -225,7 +225,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):  class DailymotionUserIE(DailymotionPlaylistIE):      IE_NAME = 'dailymotion:user' -    _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?:old/)?user/(?P<user>[^/]+)' +    _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?:(?:old/)?user/)?(?P<user>[^/]+)$'      _PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'      _TESTS = [{          'url': 'https://www.dailymotion.com/user/nqtv', @@ -239,7 +239,8 @@ class DailymotionUserIE(DailymotionPlaylistIE):      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url)          user = mobj.group('user') -        webpage = self._download_webpage(url, user) +        webpage = self._download_webpage( +            'https://www.dailymotion.com/user/%s' % user, user)          full_user = unescapeHTML(self._html_search_regex(              r'<a class="nav-image" title="([^"]+)" href="/%s">' % re.escape(user),              webpage, 'user')) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 937b28fcc..82dc27bc6 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -50,7 +50,10 @@ class FacebookIE(InfoExtractor):              'id': '274175099429670',              'ext': 'mp4',              'title': 'Facebook video #274175099429670', -        } +        }, +        'expected_warnings': [ +            'title' +        ]      }, {          'url': 'https://www.facebook.com/video.php?v=10204634152394104',          'only_matching': True, @@ -149,12 +152,12 @@ class FacebookIE(InfoExtractor):              raise ExtractorError('Cannot find video formats')          video_title = self._html_search_regex( -            r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, 'title', -            fatal=False) +            r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>([^<]*)</h2>', webpage, 'title', +            default=None)          if not video_title:              video_title = self._html_search_regex(                  r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(.*?)</span>', -                webpage, 'alternative title', default=None) +                webpage, 'alternative title', fatal=False)              video_title = limit_length(video_title, 80)          if not video_title:              video_title = 'Facebook video #%s' % video_id diff --git a/youtube_dl/extractor/firedrive.py b/youtube_dl/extractor/firedrive.py deleted file mode 100644 index 3191116d9..000000000 --- a/youtube_dl/extractor/firedrive.py +++ /dev/null @@ -1,80 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import ( -    compat_urllib_parse, -    compat_urllib_request, -) -from ..utils import ( -    ExtractorError, -) - - -class FiredriveIE(InfoExtractor): -    _VALID_URL = r'https?://(?:www\.)?firedrive\.com/' + \ -                 '(?:file|embed)/(?P<id>[0-9a-zA-Z]+)' -    _FILE_DELETED_REGEX = r'<div class="removed_file_image">' - -    _TESTS = [{ -        'url': 'https://www.firedrive.com/file/FEB892FA160EBD01', -        'md5': 'd5d4252f80ebeab4dc2d5ceaed1b7970', -        'info_dict': { -            'id': 'FEB892FA160EBD01', -            'ext': 'flv', -            'title': 'bbb_theora_486kbit.flv', -            'thumbnail': 're:^http://.*\.jpg$', -        }, -    }] - -    def _real_extract(self, url): -        video_id = self._match_id(url) -        url = 'http://firedrive.com/file/%s' % video_id -        webpage = self._download_webpage(url, video_id) - -        if re.search(self._FILE_DELETED_REGEX, webpage) is not None: -            raise ExtractorError('Video %s does not exist' % video_id, -                                 expected=True) - -        fields = dict(re.findall(r'''(?x)<input\s+ -            type="hidden"\s+ -            name="([^"]+)"\s+ -            value="([^"]*)" -            ''', webpage)) - -        post = compat_urllib_parse.urlencode(fields) -        req = compat_urllib_request.Request(url, post) -        req.add_header('Content-type', 'application/x-www-form-urlencoded') - -        # Apparently, this header is required for confirmation to work. -        req.add_header('Host', 'www.firedrive.com') - -        webpage = self._download_webpage(req, video_id, -                                         'Downloading video page') - -        title = self._search_regex(r'class="external_title_left">(.+)</div>', -                                   webpage, 'title') -        thumbnail = self._search_regex(r'image:\s?"(//[^\"]+)', webpage, -                                       'thumbnail', fatal=False) -        if thumbnail is not None: -            thumbnail = 'http:' + thumbnail - -        ext = self._search_regex(r'type:\s?\'([^\']+)\',', -                                 webpage, 'extension', fatal=False) -        video_url = self._search_regex( -            r'file:\s?loadURL\(\'(http[^\']+)\'\),', webpage, 'file url') - -        formats = [{ -            'format_id': 'sd', -            'url': video_url, -            'ext': ext, -        }] - -        return { -            'id': video_id, -            'title': title, -            'thumbnail': thumbnail, -            'formats': formats, -        } diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py index c10405f04..925967753 100644 --- a/youtube_dl/extractor/naver.py +++ b/youtube_dl/extractor/naver.py @@ -6,6 +6,7 @@ import re  from .common import InfoExtractor  from ..compat import (      compat_urllib_parse, +    compat_urlparse,  )  from ..utils import (      ExtractorError, @@ -16,7 +17,7 @@ from ..utils import (  class NaverIE(InfoExtractor):      _VALID_URL = r'https?://(?:m\.)?tvcast\.naver\.com/v/(?P<id>\d+)' -    _TEST = { +    _TESTS = [{          'url': 'http://tvcast.naver.com/v/81652',          'info_dict': {              'id': '81652', @@ -25,7 +26,18 @@ class NaverIE(InfoExtractor):              'description': '합격불변의 법칙 메가스터디 | 메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.',              'upload_date': '20130903',          }, -    } +    }, { +        'url': 'http://tvcast.naver.com/v/395837', +        'md5': '638ed4c12012c458fefcddfd01f173cd', +        'info_dict': { +            'id': '395837', +            'ext': 'mp4', +            'title': '9년이 지나도 아픈 기억, 전효성의 아버지', +            'description': 'md5:5bf200dcbf4b66eb1b350d1eb9c753f7', +            'upload_date': '20150519', +        }, +        'skip': 'Georestricted', +    }]      def _real_extract(self, url):          video_id = self._match_id(url) @@ -35,7 +47,7 @@ class NaverIE(InfoExtractor):                           webpage)          if m_id is None:              m_error = re.search( -                r'(?s)<div class="nation_error">\s*(?:<!--.*?-->)?\s*<p class="[^"]+">(?P<msg>.+?)</p>\s*</div>', +                r'(?s)<div class="(?:nation_error|nation_box)">\s*(?:<!--.*?-->)?\s*<p class="[^"]+">(?P<msg>.+?)</p>\s*</div>',                  webpage)              if m_error:                  raise ExtractorError(clean_html(m_error.group('msg')), expected=True) @@ -58,14 +70,18 @@ class NaverIE(InfoExtractor):          formats = []          for format_el in urls.findall('EncodingOptions/EncodingOption'):              domain = format_el.find('Domain').text +            uri = format_el.find('uri').text              f = { -                'url': domain + format_el.find('uri').text, +                'url': compat_urlparse.urljoin(domain, uri),                  'ext': 'mp4',                  'width': int(format_el.find('width').text),                  'height': int(format_el.find('height').text),              }              if domain.startswith('rtmp'): +                # urlparse does not support custom schemes +                # https://bugs.python.org/issue18828                  f.update({ +                    'url': domain + uri,                      'ext': 'flv',                      'rtmp_protocol': '1',  # rtmpt                  }) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 0c8b731cf..daa284ea2 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -71,7 +71,8 @@ class PornHubIE(InfoExtractor):          video_urls = list(map(compat_urllib_parse.unquote, re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))          if webpage.find('"encrypted":true') != -1: -            password = compat_urllib_parse.unquote_plus(self._html_search_regex(r'"video_title":"([^"]+)', webpage, 'password')) +            password = compat_urllib_parse.unquote_plus( +                self._search_regex(r'"video_title":"([^"]+)', webpage, 'password'))              video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls))          formats = [] diff --git a/youtube_dl/extractor/sockshare.py b/youtube_dl/extractor/sockshare.py deleted file mode 100644 index b5fa6f1da..000000000 --- a/youtube_dl/extractor/sockshare.py +++ /dev/null @@ -1,83 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from ..compat import ( -    compat_urllib_parse, -    compat_urllib_request, -) -from ..utils import ( -    determine_ext, -    ExtractorError, -) - -from .common import InfoExtractor - - -class SockshareIE(InfoExtractor): -    _VALID_URL = r'https?://(?:www\.)?sockshare\.com/file/(?P<id>[0-9A-Za-z]+)' -    _FILE_DELETED_REGEX = r'This file doesn\'t exist, or has been removed\.</div>' -    _TEST = { -        'url': 'http://www.sockshare.com/file/437BE28B89D799D7', -        'md5': '9d0bf1cfb6dbeaa8d562f6c97506c5bd', -        'info_dict': { -            'id': '437BE28B89D799D7', -            'title': 'big_buck_bunny_720p_surround.avi', -            'ext': 'avi', -        } -    } - -    def _real_extract(self, url): -        video_id = self._match_id(url) -        url = 'http://sockshare.com/file/%s' % video_id -        webpage = self._download_webpage(url, video_id) - -        if re.search(self._FILE_DELETED_REGEX, webpage) is not None: -            raise ExtractorError('Video %s does not exist' % video_id, -                                 expected=True) - -        confirm_hash = self._html_search_regex(r'''(?x)<input\s+ -            type="hidden"\s+ -            value="([^"]*)"\s+ -            name="hash" -            ''', webpage, 'hash') - -        fields = { -            "hash": confirm_hash.encode('utf-8'), -            "confirm": "Continue as Free User" -        } - -        post = compat_urllib_parse.urlencode(fields) -        req = compat_urllib_request.Request(url, post) -        # Apparently, this header is required for confirmation to work. -        req.add_header('Host', 'www.sockshare.com') -        req.add_header('Content-type', 'application/x-www-form-urlencoded') - -        webpage = self._download_webpage( -            req, video_id, 'Downloading video page') - -        video_url = self._html_search_regex( -            r'<a href="([^"]*)".+class="download_file_link"', -            webpage, 'file url') -        video_url = "http://www.sockshare.com" + video_url -        title = self._html_search_regex(( -            r'<h1>(.+)<strong>', -            r'var name = "([^"]+)";'), -            webpage, 'title', default=None) -        thumbnail = self._html_search_regex( -            r'<img\s+src="([^"]*)".+?name="bg"', -            webpage, 'thumbnail', default=None) - -        formats = [{ -            'format_id': 'sd', -            'url': video_url, -            'ext': determine_ext(title), -        }] - -        return { -            'id': video_id, -            'title': title, -            'thumbnail': thumbnail, -            'formats': formats, -        } diff --git a/youtube_dl/extractor/spankwire.py b/youtube_dl/extractor/spankwire.py index b936202f6..06d6e6640 100644 --- a/youtube_dl/extractor/spankwire.py +++ b/youtube_dl/extractor/spankwire.py @@ -71,7 +71,7 @@ class SpankwireIE(InfoExtractor):              compat_urllib_parse.unquote,              re.findall(r'playerData\.cdnPath[0-9]{3,}\s*=\s*["\']([^"\']+)["\']', webpage)))          if webpage.find('flashvars\.encrypted = "true"') != -1: -            password = self._html_search_regex( +            password = self._search_regex(                  r'flashvars\.video_title = "([^"]+)',                  webpage, 'password').replace('+', ' ')              video_urls = list(map( diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py index 56be52638..b2a4b1fc0 100644 --- a/youtube_dl/extractor/teamcoco.py +++ b/youtube_dl/extractor/teamcoco.py @@ -10,6 +10,7 @@ from .common import InfoExtractor  from ..utils import (      ExtractorError,      qualities, +    determine_ext,  )  from ..compat import compat_ord @@ -108,7 +109,7 @@ class TeamcocoIE(InfoExtractor):          formats = []          get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p'])          for filed in data['files']: -            if filed['type'] == 'hls': +            if determine_ext(filed['url']) == 'm3u8':                  formats.extend(self._extract_m3u8_formats(                      filed['url'], video_id, ext='mp4'))              else: diff --git a/youtube_dl/extractor/tf1.py b/youtube_dl/extractor/tf1.py index 025d0877c..656410528 100644 --- a/youtube_dl/extractor/tf1.py +++ b/youtube_dl/extractor/tf1.py @@ -6,8 +6,8 @@ from .common import InfoExtractor  class TF1IE(InfoExtractor):      """TF1 uses the wat.tv player.""" -    _VALID_URL = r'http://(?:videos\.tf1|www\.tfou)\.fr/.*?-(?P<id>\d+)(?:-\d+)?\.html' -    _TESTS = { +    _VALID_URL = r'http://(?:videos\.tf1|www\.tfou|www\.tf1)\.fr/.*?-(?P<id>\d+)(?:-\d+)?\.html' +    _TESTS = [{          'url': 'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',          'info_dict': {              'id': '10635995', @@ -32,7 +32,10 @@ class TF1IE(InfoExtractor):              # Sometimes wat serves the whole file with the --test option              'skip_download': True,          }, -    } +    }, { +        'url': 'http://www.tf1.fr/tf1/koh-lanta/videos/replay-koh-lanta-22-mai-2015.html', +        'only_matching': True, +    }]      def _real_extract(self, url):          video_id = self._match_id(url) | 
