diff options
| -rw-r--r-- | test/test_download.py | 3 | ||||
| -rw-r--r-- | youtube_dl/YoutubeDL.py | 13 | ||||
| -rw-r--r-- | youtube_dl/extractor/common.py | 3 | ||||
| -rw-r--r-- | youtube_dl/extractor/vimeo.py | 13 | ||||
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 3 | 
5 files changed, 24 insertions, 11 deletions
| diff --git a/test/test_download.py b/test/test_download.py index dfb04d010..d6cc9ec33 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -148,6 +148,9 @@ def generator(test_case):                  # Check for the presence of mandatory fields                  for key in ('id', 'url', 'title', 'ext'):                      self.assertTrue(key in info_dict.keys() and info_dict[key]) +                # Check for mandatory fields that are automatically set by YoutubeDL +                for key in ['webpage_url', 'extractor']: +                    self.assertTrue(info_dict.get(key), u'Missing field: %s' % key)          finally:              try_rm_tcs_files() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index a3e0a700f..8938a2cd3 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -354,8 +354,11 @@ class YoutubeDL(object):                          '_type': 'compat_list',                          'entries': ie_result,                      } -                if 'extractor' not in ie_result: -                    ie_result['extractor'] = ie.IE_NAME +                self.add_extra_info(ie_result, +                    { +                        'extractor': ie.IE_NAME, +                        'webpage_url': url +                    })                  return self.process_ie_result(ie_result, download, extra_info)              except ExtractorError as de: # An error we somewhat expected                  self.report_error(compat_str(de), de.format_traceback()) @@ -417,6 +420,7 @@ class YoutubeDL(object):                      'playlist': playlist,                      'playlist_index': i + playliststart,                      'extractor': ie_result['extractor'], +                    'webpage_url': ie_result['webpage_url'],                  }                  entry_result = self.process_ie_result(entry,                                                        download=download, @@ -427,7 +431,10 @@ class YoutubeDL(object):          elif result_type == 'compat_list':              def _fixup(r):                  self.add_extra_info(r, -                    {'extractor': ie_result['extractor']}) +                    { +                        'extractor': ie_result['extractor'], +                        'webpage_url': ie_result['webpage_url'], +                    })                  return r              ie_result['entries'] = [                  self.process_ie_result(_fixup(r), download, extra_info) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index cef4dce85..e0ccba533 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -71,6 +71,9 @@ class InfoExtractor(object):                                  ("3D" or "DASH video")                      * width     Width of the video, if known                      * height    Height of the video, if known +    webpage_url:    The url to the video webpage, if given to youtube-dl it +                    should allow to get the same result again. (It will be set +                    by YoutubeDL if it's missing)      Unless mentioned otherwise, the fields should be Unicode strings. diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index c7d864a2b..62273fd33 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -20,7 +20,7 @@ class VimeoIE(InfoExtractor):      """Information extractor for vimeo.com."""      # _VALID_URL matches Vimeo URLs -    _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?(?:#.*)?$' +    _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|(?P<player>player))\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?(?:#.*)?$'      _NETRC_MACHINE = 'vimeo'      IE_NAME = u'vimeo'      _TESTS = [ @@ -128,11 +128,9 @@ class VimeoIE(InfoExtractor):              raise ExtractorError(u'Invalid URL: %s' % url)          video_id = mobj.group('id') -        if not mobj.group('proto'): -            url = 'https://' + url -        elif mobj.group('pro'): +        if mobj.group('pro') or mobj.group('player'):              url = 'http://player.vimeo.com/video/' + video_id -        elif mobj.group('direct_link'): +        else:              url = 'https://vimeo.com/' + video_id          # Retrieve video webpage to extract further information @@ -234,7 +232,7 @@ class VimeoIE(InfoExtractor):          if len(formats) == 0:              raise ExtractorError(u'No known codec found') -        return [{ +        return {              'id':       video_id,              'uploader': video_uploader,              'uploader_id': video_uploader_id, @@ -243,7 +241,8 @@ class VimeoIE(InfoExtractor):              'thumbnail':    video_thumbnail,              'description':  video_description,              'formats': formats, -        }] +            'webpage_url': url, +        }  class VimeoChannelIE(InfoExtractor): diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index a19abe1f0..6ddd6ef06 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1485,7 +1485,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):                  'subtitles':    video_subtitles,                  'duration':     video_duration,                  'age_limit':    18 if age_gate else 0, -                'annotations':  video_annotations +                'annotations':  video_annotations, +                'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,              })          return results | 
