diff options
Diffstat (limited to 'youtube_dl/extractor/youtube.py')
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 59 | 
1 files changed, 26 insertions, 33 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index d7c9b38f9..9053f3ead 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -74,14 +74,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):              self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))              return False -        galx = None -        dsh = None -        match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page) -        if match: -          galx = match.group(1) -        match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page) -        if match: -          dsh = match.group(1) +        galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"', +                                  login_page, u'Login GALX parameter')          # Log in          login_form_strs = { @@ -95,7 +89,6 @@ class YoutubeBaseInfoExtractor(InfoExtractor):                  u'checkConnection': u'',                  u'checkedDomains': u'youtube',                  u'dnConn': u'', -                u'dsh': dsh,                  u'pstMsg': u'0',                  u'rmShown': u'1',                  u'secTok': u'', @@ -236,11 +229,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):          '136': 'mp4',          '137': 'mp4',          '138': 'mp4', -        '139': 'mp4', -        '140': 'mp4', -        '141': 'mp4',          '160': 'mp4', +        # Dash mp4 audio +        '139': 'm4a', +        '140': 'm4a', +        '141': 'm4a', +          # Dash webm          '171': 'webm',          '172': 'webm', @@ -346,7 +341,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):          },          {              u"url":  u"http://www.youtube.com/watch?v=1ltcDfZMA3U", -            u"file":  u"1ltcDfZMA3U.flv", +            u"file":  u"1ltcDfZMA3U.mp4",              u"note": u"Test VEVO video (#897)",              u"info_dict": {                  u"upload_date": u"20070518", @@ -1116,7 +1111,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):                  'lang': lang,                  'v': video_id,                  'fmt': self._downloader.params.get('subtitlesformat'), -                'name': l[0], +                'name': l[0].encode('utf-8'),              })              url = u'http://www.youtube.com/api/timedtext?' + params              sub_lang_list[lang] = url @@ -1150,7 +1145,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):              list_page = self._download_webpage(list_url, video_id)              caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8'))              original_lang_node = caption_list.find('track') -            if original_lang_node.attrib.get('kind') != 'asr' : +            if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :                  self._downloader.report_warning(u'Video doesn\'t have automatic captions')                  return {}              original_lang = original_lang_node.attrib['lang_code'] @@ -1403,32 +1398,29 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):              # this signatures are encrypted              if 'url_encoded_fmt_stream_map' not in args:                  raise ValueError(u'No stream_map present')  # caught below -            m_s = re.search(r'[&,]s=', args['url_encoded_fmt_stream_map']) +            re_signature = re.compile(r'[&,]s=') +            m_s = re_signature.search(args['url_encoded_fmt_stream_map'])              if m_s is not None:                  self.to_screen(u'%s: Encrypted signatures detected.' % video_id)                  video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']] -            m_s = re.search(r'[&,]s=', args.get('adaptive_fmts', u'')) +            m_s = re_signature.search(args.get('adaptive_fmts', u''))              if m_s is not None: -                if 'url_encoded_fmt_stream_map' in video_info: -                    video_info['url_encoded_fmt_stream_map'][0] += ',' + args['adaptive_fmts'] -                else: -                    video_info['url_encoded_fmt_stream_map'] = [args['adaptive_fmts']] -            elif 'adaptive_fmts' in video_info: -                if 'url_encoded_fmt_stream_map' in video_info: -                    video_info['url_encoded_fmt_stream_map'][0] += ',' + video_info['adaptive_fmts'][0] +                if 'adaptive_fmts' in video_info: +                    video_info['adaptive_fmts'][0] += ',' + args['adaptive_fmts']                  else: -                    video_info['url_encoded_fmt_stream_map'] = video_info['adaptive_fmts'] +                    video_info['adaptive_fmts'] = [args['adaptive_fmts']]          except ValueError:              pass          if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):              self.report_rtmp_download()              video_url_list = [(None, video_info['conn'][0])] -        elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1: -            if 'rtmpe%3Dyes' in video_info['url_encoded_fmt_stream_map'][0]: +        elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1: +            encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts',[''])[0] +            if 'rtmpe%3Dyes' in encoded_url_map:                  raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)              url_map = {} -            for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','): +            for url_data_str in encoded_url_map.split(','):                  url_data = compat_parse_qs(url_data_str)                  if 'itag' in url_data and 'url' in url_data:                      url = url_data['url'][0] @@ -1481,13 +1473,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):              raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')          results = [] -        for format_param, video_real_url in video_url_list: +        for itag, video_real_url in video_url_list:              # Extension -            video_extension = self._video_extensions.get(format_param, 'flv') +            video_extension = self._video_extensions.get(itag, 'flv') -            video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension, -                                              self._video_dimensions.get(format_param, '???'), -                                              ' ('+self._special_itags[format_param]+')' if format_param in self._special_itags else '') +            video_format = '{0} - {1}{2}'.format(itag if itag else video_extension, +                                              self._video_dimensions.get(itag, '???'), +                                              ' ('+self._special_itags[itag]+')' if itag in self._special_itags else '')              results.append({                  'id':       video_id, @@ -1498,6 +1490,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):                  'title':    video_title,                  'ext':      video_extension,                  'format':   video_format, +                'format_id': itag,                  'thumbnail':    video_thumbnail,                  'description':  video_description,                  'player_url':   player_url,  | 
