diff options
Diffstat (limited to 'youtube_dl/extractor/common.py')
| -rw-r--r-- | youtube_dl/extractor/common.py | 34 | 
1 files changed, 26 insertions, 8 deletions
| diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 7d8ce1808..87fce9cd8 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -27,7 +27,6 @@ from ..utils import (      compiled_regex_type,      ExtractorError,      float_or_none, -    HEADRequest,      int_or_none,      RegexNotFoundError,      sanitize_filename, @@ -398,6 +397,16 @@ class InfoExtractor(object):              if blocked_iframe:                  msg += ' Visit %s for more details' % blocked_iframe              raise ExtractorError(msg, expected=True) +        if '<title>The URL you requested has been blocked</title>' in content[:512]: +            msg = ( +                'Access to this webpage has been blocked by Indian censorship. ' +                'Use a VPN or proxy server (with --proxy) to route around it.') +            block_msg = self._html_search_regex( +                r'</h1><p>(.*?)</p>', +                content, 'block message', default=None) +            if block_msg: +                msg += ' (Message: "%s")' % block_msg.replace('\n', ' ') +            raise ExtractorError(msg, expected=True)          return content @@ -735,6 +744,7 @@ class InfoExtractor(object):                  f.get('language_preference') if f.get('language_preference') is not None else -1,                  f.get('quality') if f.get('quality') is not None else -1,                  f.get('tbr') if f.get('tbr') is not None else -1, +                f.get('filesize') if f.get('filesize') is not None else -1,                  f.get('vbr') if f.get('vbr') is not None else -1,                  f.get('height') if f.get('height') is not None else -1,                  f.get('width') if f.get('width') is not None else -1, @@ -742,7 +752,6 @@ class InfoExtractor(object):                  f.get('abr') if f.get('abr') is not None else -1,                  audio_ext_preference,                  f.get('fps') if f.get('fps') is not None else -1, -                f.get('filesize') if f.get('filesize') is not None else -1,                  f.get('filesize_approx') if f.get('filesize_approx') is not None else -1,                  f.get('source_preference') if f.get('source_preference') is not None else -1,                  f.get('format_id'), @@ -759,9 +768,7 @@ class InfoExtractor(object):      def _is_valid_url(self, url, video_id, item='video'):          try: -            self._request_webpage( -                HEADRequest(url), video_id, -                'Checking %s URL' % item) +            self._request_webpage(url, video_id, 'Checking %s URL' % item)              return True          except ExtractorError as e:              if isinstance(e.cause, compat_HTTPError): @@ -807,8 +814,8 @@ class InfoExtractor(object):              media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')          for i, media_el in enumerate(media_nodes):              if manifest_version == '2.0': -                manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/' -                                + (media_el.attrib.get('href') or media_el.attrib.get('url'))) +                manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/' + +                                (media_el.attrib.get('href') or media_el.attrib.get('url')))              tbr = int_or_none(media_el.attrib.get('bitrate'))              formats.append({                  'format_id': '-'.join(filter(None, [f4m_id, 'f4m-%d' % (i if tbr is None else tbr)])), @@ -832,7 +839,7 @@ class InfoExtractor(object):              'url': m3u8_url,              'ext': ext,              'protocol': 'm3u8', -            'preference': -1, +            'preference': preference - 1 if preference else -1,              'resolution': 'multiple',              'format_note': 'Quality selection URL',          }] @@ -847,6 +854,7 @@ class InfoExtractor(object):              note='Downloading m3u8 information',              errnote='Failed to download m3u8 information')          last_info = None +        last_media = None          kv_rex = re.compile(              r'(?P<key>[a-zA-Z_-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)')          for line in m3u8_doc.splitlines(): @@ -857,6 +865,13 @@ class InfoExtractor(object):                      if v.startswith('"'):                          v = v[1:-1]                      last_info[m.group('key')] = v +            elif line.startswith('#EXT-X-MEDIA:'): +                last_media = {} +                for m in kv_rex.finditer(line): +                    v = m.group('val') +                    if v.startswith('"'): +                        v = v[1:-1] +                    last_media[m.group('key')] = v              elif line.startswith('#') or not line.strip():                  continue              else: @@ -885,6 +900,9 @@ class InfoExtractor(object):                      width_str, height_str = resolution.split('x')                      f['width'] = int(width_str)                      f['height'] = int(height_str) +                if last_media is not None: +                    f['m3u8_media'] = last_media +                    last_media = None                  formats.append(f)                  last_info = {}          self._sort_formats(formats) | 
