diff options
| author | Sergey M․ <dstftw@gmail.com> | 2016-04-08 23:16:02 +0600 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2016-04-08 23:16:02 +0600 | 
| commit | 3c6c7e7d7e5a7cbf87385bf3c649342c3ee4327d (patch) | |
| tree | e118b1d7dde6aa3eec9bdf6064014e9a1f863922 | |
| parent | fb38aa8b53d25606d2582e1043d09ad1a077bf61 (diff) | |
[gdcvault] Fix extraction (Closes #9107, closes #9114)
| -rw-r--r-- | youtube_dl/extractor/gdcvault.py | 16 | 
1 files changed, 9 insertions, 7 deletions
| diff --git a/youtube_dl/extractor/gdcvault.py b/youtube_dl/extractor/gdcvault.py index 59ed4c38f..25e93c9a4 100644 --- a/youtube_dl/extractor/gdcvault.py +++ b/youtube_dl/extractor/gdcvault.py @@ -159,9 +159,10 @@ class GDCVaultIE(InfoExtractor):                  'title': title,              } +        PLAYER_REGEX = r'<iframe src="(?P<xml_root>.+?)/player.*?\.html.*?".*?</iframe>' +          xml_root = self._html_search_regex( -            r'<iframe src="(?P<xml_root>.*?)player.html.*?".*?</iframe>', -            start_page, 'xml root', default=None) +            PLAYER_REGEX, start_page, 'xml root', default=None)          if xml_root is None:              # Probably need to authenticate              login_res = self._login(webpage_url, display_id) @@ -171,18 +172,19 @@ class GDCVaultIE(InfoExtractor):                  start_page = login_res                  # Grab the url from the authenticated page                  xml_root = self._html_search_regex( -                    r'<iframe src="(.*?)player.html.*?".*?</iframe>', -                    start_page, 'xml root') +                    PLAYER_REGEX, start_page, 'xml root')          xml_name = self._html_search_regex(              r'<iframe src=".*?\?xml=(.+?\.xml).*?".*?</iframe>',              start_page, 'xml filename', default=None)          if xml_name is None:              # Fallback to the older format -            xml_name = self._html_search_regex(r'<iframe src=".*?\?xmlURL=xml/(?P<xml_file>.+?\.xml).*?".*?</iframe>', start_page, 'xml filename') +            xml_name = self._html_search_regex( +                r'<iframe src=".*?\?xmlURL=xml/(?P<xml_file>.+?\.xml).*?".*?</iframe>', +                start_page, 'xml filename') -        xml_description_url = xml_root + 'xml/' + xml_name -        xml_description = self._download_xml(xml_description_url, display_id) +        xml_description = self._download_xml( +            '%s/xml/%s' % (xml_root, xml_name), display_id)          video_title = xml_description.find('./metadata/title').text          video_formats = self._parse_mp4(xml_description) | 
