diff options
Diffstat (limited to 'youtube_dl')
| -rw-r--r-- | youtube_dl/downloader/http.py | 19 | ||||
| -rw-r--r-- | youtube_dl/extractor/dailymotion.py | 10 | ||||
| -rw-r--r-- | youtube_dl/extractor/nowtv.py | 68 | ||||
| -rw-r--r-- | youtube_dl/extractor/pornhub.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/screenwavemedia.py | 90 | 
5 files changed, 94 insertions, 95 deletions
diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index b7f144af9..a29f5cf31 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -4,6 +4,7 @@ import errno  import os  import socket  import time +import re  from .common import FileDownloader  from ..compat import ( @@ -57,6 +58,24 @@ class HttpFD(FileDownloader):              # Establish connection              try:                  data = self.ydl.urlopen(request) +                # When trying to resume, Content-Range HTTP header of response has to be checked +                # to match the value of requested Range HTTP header. This is due to a webservers +                # that don't support resuming and serve a whole file with no Content-Range +                # set in response despite of requested Range (see +                # https://github.com/rg3/youtube-dl/issues/6057#issuecomment-126129799) +                if resume_len > 0: +                    content_range = data.headers.get('Content-Range') +                    if content_range: +                        content_range_m = re.search(r'bytes (\d+)-', content_range) +                        # Content-Range is present and matches requested Range, resume is possible +                        if content_range_m and resume_len == int(content_range_m.group(1)): +                            break +                    # Content-Range is either not present or invalid. Assuming remote webserver is +                    # trying to send the whole file, resume is not possible, so wiping the local file +                    # and performing entire redownload +                    self.report_unable_to_resume() +                    resume_len = 0 +                    open_mode = 'wb'                  break              except (compat_urllib_error.HTTPError, ) as err:                  if (err.code < 500 or err.code >= 600) and err.code != 416: diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 85d945509..2d90b2224 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -15,7 +15,6 @@ from ..utils import (      ExtractorError,      determine_ext,      int_or_none, -    orderedSet,      parse_iso8601,      str_to_int,      unescapeHTML, @@ -278,7 +277,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):      }]      def _extract_entries(self, id): -        video_ids = [] +        video_ids = set()          processed_urls = set()          for pagenum in itertools.count(1):              page_url = self._PAGE_TEMPLATE % (id, pagenum) @@ -291,12 +290,13 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):              processed_urls.add(urlh.geturl()) -            video_ids.extend(re.findall(r'data-xid="(.+?)"', webpage)) +            for video_id in re.findall(r'data-xid="(.+?)"', webpage): +                if video_id not in video_ids: +                    yield self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion') +                    video_ids.add(video_id)              if re.search(self._MORE_PAGES_INDICATOR, webpage) is None:                  break -        return [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion') -                for video_id in orderedSet(video_ids)]      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/nowtv.py b/youtube_dl/extractor/nowtv.py index 0b5ff4760..ad938fb62 100644 --- a/youtube_dl/extractor/nowtv.py +++ b/youtube_dl/extractor/nowtv.py @@ -1,12 +1,11 @@  # coding: utf-8  from __future__ import unicode_literals -import re -  from .common import InfoExtractor  from ..compat import compat_str  from ..utils import (      ExtractorError, +    determine_ext,      int_or_none,      parse_iso8601,      parse_duration, @@ -15,7 +14,7 @@ from ..utils import (  class NowTVIE(InfoExtractor): -    _VALID_URL = r'https?://(?:www\.)?nowtv\.de/(?P<station>rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<id>.+?)/player' +    _VALID_URL = r'https?://(?:www\.)?nowtv\.de/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<id>.+?)/(?:player|preview)'      _TESTS = [{          # rtl @@ -23,7 +22,7 @@ class NowTVIE(InfoExtractor):          'info_dict': {              'id': '203519',              'display_id': 'bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit', -            'ext': 'mp4', +            'ext': 'flv',              'title': 'Die neuen Bauern und eine Hochzeit',              'description': 'md5:e234e1ed6d63cf06be5c070442612e7e',              'thumbnail': 're:^https?://.*\.jpg$', @@ -32,7 +31,7 @@ class NowTVIE(InfoExtractor):              'duration': 2786,          },          'params': { -            # m3u8 download +            # rtmp download              'skip_download': True,          },      }, { @@ -41,7 +40,7 @@ class NowTVIE(InfoExtractor):          'info_dict': {              'id': '203481',              'display_id': 'berlin-tag-nacht/berlin-tag-nacht-folge-934', -            'ext': 'mp4', +            'ext': 'flv',              'title': 'Berlin - Tag & Nacht (Folge 934)',              'description': 'md5:c85e88c2e36c552dfe63433bc9506dd0',              'thumbnail': 're:^https?://.*\.jpg$', @@ -50,7 +49,7 @@ class NowTVIE(InfoExtractor):              'duration': 2641,          },          'params': { -            # m3u8 download +            # rtmp download              'skip_download': True,          },      }, { @@ -59,7 +58,7 @@ class NowTVIE(InfoExtractor):          'info_dict': {              'id': '165780',              'display_id': 'alarm-fuer-cobra-11-die-autobahnpolizei/hals-und-beinbruch-2014-08-23-21-10-00', -            'ext': 'mp4', +            'ext': 'flv',              'title': 'Hals- und Beinbruch',              'description': 'md5:b50d248efffe244e6f56737f0911ca57',              'thumbnail': 're:^https?://.*\.jpg$', @@ -68,7 +67,7 @@ class NowTVIE(InfoExtractor):              'duration': 2742,          },          'params': { -            # m3u8 download +            # rtmp download              'skip_download': True,          },      }, { @@ -77,7 +76,7 @@ class NowTVIE(InfoExtractor):          'info_dict': {              'id': '99205',              'display_id': 'medicopter-117/angst', -            'ext': 'mp4', +            'ext': 'flv',              'title': 'Angst!',              'description': 'md5:30cbc4c0b73ec98bcd73c9f2a8c17c4e',              'thumbnail': 're:^https?://.*\.jpg$', @@ -86,7 +85,7 @@ class NowTVIE(InfoExtractor):              'duration': 3025,          },          'params': { -            # m3u8 download +            # rtmp download              'skip_download': True,          },      }, { @@ -95,7 +94,7 @@ class NowTVIE(InfoExtractor):          'info_dict': {              'id': '203521',              'display_id': 'ratgeber-geld/thema-ua-der-erste-blick-die-apple-watch', -            'ext': 'mp4', +            'ext': 'flv',              'title': 'Thema u.a.: Der erste Blick: Die Apple Watch',              'description': 'md5:4312b6c9d839ffe7d8caf03865a531af',              'thumbnail': 're:^https?://.*\.jpg$', @@ -104,7 +103,7 @@ class NowTVIE(InfoExtractor):              'duration': 1083,          },          'params': { -            # m3u8 download +            # rtmp download              'skip_download': True,          },      }, { @@ -113,7 +112,7 @@ class NowTVIE(InfoExtractor):          'info_dict': {              'id': '128953',              'display_id': 'der-hundeprofi/buero-fall-chihuahua-joel', -            'ext': 'mp4', +            'ext': 'flv',              'title': "Büro-Fall / Chihuahua 'Joel'",              'description': 'md5:e62cb6bf7c3cc669179d4f1eb279ad8d',              'thumbnail': 're:^https?://.*\.jpg$', @@ -122,15 +121,16 @@ class NowTVIE(InfoExtractor):              'duration': 3092,          },          'params': { -            # m3u8 download +            # rtmp download              'skip_download': True,          }, +    }, { +        'url': 'http://www.nowtv.de/rtl/bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit/preview', +        'only_matching': True,      }]      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        display_id = mobj.group('id') -        station = mobj.group('station') +        display_id = self._match_id(url)          info = self._download_json(              'https://api.nowtv.de/v3/movies/%s?fields=id,title,free,geoblocked,articleLong,articleShort,broadcastStartDate,seoUrl,duration,format,files' % display_id, @@ -148,29 +148,19 @@ class NowTVIE(InfoExtractor):                  raise ExtractorError(                      'Video %s is not available for free' % video_id, expected=True) -        f = info.get('format', {}) -        station = f.get('station') or station - -        STATIONS = { -            'rtl': 'rtlnow', -            'rtl2': 'rtl2now', -            'vox': 'voxnow', -            'nitro': 'rtlnitronow', -            'ntv': 'n-tvnow', -            'superrtl': 'superrtlnow' -        } -          formats = []          for item in files['items']: -            item_path = remove_start(item['path'], '/') -            tbr = int_or_none(item['bitrate']) -            m3u8_url = 'http://hls.fra.%s.de/hls-vod-enc/%s.m3u8' % (STATIONS[station], item_path) -            m3u8_url = m3u8_url.replace('now/', 'now/videos/') +            if determine_ext(item['path']) != 'f4v': +                continue +            app, play_path = remove_start(item['path'], '/').split('/', 1)              formats.append({ -                'url': m3u8_url, -                'format_id': '%s-%sk' % (item['id'], tbr), -                'ext': 'mp4', -                'tbr': tbr, +                'url': 'rtmpe://fms.rtl.de', +                'app': app, +                'play_path': 'mp4:%s' % play_path, +                'ext': 'flv', +                'page_url': url, +                'player_url': 'http://rtl-now.rtl.de/includes/nc_player.swf', +                'tbr': int_or_none(item.get('bitrate')),              })          self._sort_formats(formats) @@ -178,6 +168,8 @@ class NowTVIE(InfoExtractor):          description = info.get('articleLong') or info.get('articleShort')          timestamp = parse_iso8601(info.get('broadcastStartDate'), ' ')          duration = parse_duration(info.get('duration')) + +        f = info.get('format', {})          thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo')          return { diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 0b7886840..fec493046 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -81,7 +81,7 @@ class PornHubIE(InfoExtractor):          comment_count = self._extract_count(              r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment') -        video_urls = list(map(compat_urllib_parse_unquote, re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage))) +        video_urls = list(map(compat_urllib_parse_unquote, re.findall(r"player_quality_[0-9]{3}p\s*=\s*'([^']+)'", webpage)))          if webpage.find('"encrypted":true') != -1:              password = compat_urllib_parse_unquote_plus(                  self._search_regex(r'"video_title":"([^"]+)', webpage, 'password')) diff --git a/youtube_dl/extractor/screenwavemedia.py b/youtube_dl/extractor/screenwavemedia.py index d1ab66b32..3bc84989e 100644 --- a/youtube_dl/extractor/screenwavemedia.py +++ b/youtube_dl/extractor/screenwavemedia.py @@ -1,12 +1,11 @@  # encoding: utf-8  from __future__ import unicode_literals -import re -  from .common import InfoExtractor  from ..utils import (      int_or_none,      unified_strdate, +    js_to_json,  ) @@ -22,59 +21,48 @@ class ScreenwaveMediaIE(InfoExtractor):          video_id = self._match_id(url)          playerdata = self._download_webpage( -            'http://player.screenwavemedia.com/play/player.php?id=%s' % video_id, +            'http://player.screenwavemedia.com/player.php?id=%s' % video_id,              video_id, 'Downloading player webpage')          vidtitle = self._search_regex(              r'\'vidtitle\'\s*:\s*"([^"]+)"', playerdata, 'vidtitle').replace('\\/', '/') -        vidurl = self._search_regex( -            r'\'vidurl\'\s*:\s*"([^"]+)"', playerdata, 'vidurl').replace('\\/', '/') - -        videolist_url = None - -        mobj = re.search(r"'videoserver'\s*:\s*'(?P<videoserver>[^']+)'", playerdata) -        if mobj: -            videoserver = mobj.group('videoserver') -            mobj = re.search(r'\'vidid\'\s*:\s*"(?P<vidid>[^\']+)"', playerdata) -            vidid = mobj.group('vidid') if mobj else video_id -            videolist_url = 'http://%s/vod/smil:%s.smil/jwplayer.smil' % (videoserver, vidid) -        else: -            mobj = re.search(r"file\s*:\s*'(?P<smil>http.+?/jwplayer\.smil)'", playerdata) -            if mobj: -                videolist_url = mobj.group('smil') - -        if videolist_url: -            videolist = self._download_xml(videolist_url, video_id, 'Downloading videolist XML') -            formats = [] -            baseurl = vidurl[:vidurl.rfind('/') + 1] -            for video in videolist.findall('.//video'): -                src = video.get('src') -                if not src: -                    continue -                file_ = src.partition(':')[-1] -                width = int_or_none(video.get('width')) -                height = int_or_none(video.get('height')) -                bitrate = int_or_none(video.get('system-bitrate'), scale=1000) -                format = { -                    'url': baseurl + file_, -                    'format_id': src.rpartition('.')[0].rpartition('_')[-1], -                } -                if width or height: -                    format.update({ -                        'tbr': bitrate, -                        'width': width, -                        'height': height, -                    }) -                else: -                    format.update({ -                        'abr': bitrate, -                        'vcodec': 'none', -                    }) -                formats.append(format) -        else: -            formats = [{ -                'url': vidurl, -            }] + +        playerconfig = self._download_webpage( +            'http://player.screenwavemedia.com/player.js', +            video_id, 'Downloading playerconfig webpage') + +        videoserver = self._search_regex(r"\[ipaddress\]\s*=>\s*([\d\.]+)", playerdata, 'videoserver') + +        sources = self._parse_json( +            js_to_json( +                self._search_regex( +                    r"sources\s*:\s*(\[[^\]]+?\])", playerconfig, +                    'sources', +                ).replace( +                    "' + thisObj.options.videoserver + '", +                    videoserver +                ).replace( +                    "' + playerVidId + '", +                    video_id +                ) +            ), +            video_id +        ) + +        formats = [] +        for source in sources: +            if source['type'] == 'hls': +                formats.extend(self._extract_m3u8_formats(source['file'], video_id)) +            else: +                format_label = source.get('label') +                height = int_or_none(self._search_regex( +                    r'^(\d+)[pP]', format_label, 'height', default=None)) +                formats.append({ +                    'url': source['file'], +                    'format': format_label, +                    'ext': source.get('type'), +                    'height': height, +                })          self._sort_formats(formats)          return {  | 
