diff options
| author | Remita Amine <remitamine@gmail.com> | 2017-10-17 10:07:37 +0000 | 
|---|---|---|
| committer | Remita Amine <remitamine@gmail.com> | 2017-10-17 10:07:37 +0000 | 
| commit | fa4bc6e71261613cf530437a2407ff7b61ea6cb5 (patch) | |
| tree | 155f03bc6059132bdc779e07d3581045f8c2633c | |
| parent | 6b9cbd023f1206f90e60cbed4497e6b107438542 (diff) | |
[youtube] replace youtube redirect urls in description(fixes #14517)
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 13 | 
1 files changed, 12 insertions, 1 deletions
| diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 4e8db240d..5aef555fb 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1622,6 +1622,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):          # description          description_original = video_description = get_element_by_id("eow-description", video_webpage)          if video_description: + +            def replace_url(m): +                redir_url = compat_urlparse.urljoin(url, m.group(1)) +                parsed_redir_url = compat_urllib_parse_urlparse(redir_url) +                if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect': +                    qs = compat_parse_qs(parsed_redir_url.query) +                    q = qs.get('q') +                    if q and q[0]: +                        return q[0] +                return redir_url +              description_original = video_description = re.sub(r'''(?x)                  <a\s+                      (?:[a-zA-Z-]+="[^"]*"\s+)*? @@ -1630,7 +1641,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):                      class="[^"]*"[^>]*>                  [^<]+\.{3}\s*                  </a> -            ''', lambda m: compat_urlparse.urljoin(url, m.group(1)), video_description) +            ''', replace_url, video_description)              video_description = clean_html(video_description)          else:              fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage) | 
