diff options
author | Sergey M․ <dstftw@gmail.com> | 2016-04-08 23:16:02 +0600 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2016-04-08 23:16:02 +0600 |
commit | 3c6c7e7d7e5a7cbf87385bf3c649342c3ee4327d (patch) | |
tree | e118b1d7dde6aa3eec9bdf6064014e9a1f863922 /youtube_dl | |
parent | fb38aa8b53d25606d2582e1043d09ad1a077bf61 (diff) |
[gdcvault] Fix extraction (Closes #9107, closes #9114)
Diffstat (limited to 'youtube_dl')
-rw-r--r-- | youtube_dl/extractor/gdcvault.py | 16 |
1 files changed, 9 insertions, 7 deletions
diff --git a/youtube_dl/extractor/gdcvault.py b/youtube_dl/extractor/gdcvault.py index 59ed4c38f..25e93c9a4 100644 --- a/youtube_dl/extractor/gdcvault.py +++ b/youtube_dl/extractor/gdcvault.py @@ -159,9 +159,10 @@ class GDCVaultIE(InfoExtractor): 'title': title, } + PLAYER_REGEX = r'<iframe src="(?P<xml_root>.+?)/player.*?\.html.*?".*?</iframe>' + xml_root = self._html_search_regex( - r'<iframe src="(?P<xml_root>.*?)player.html.*?".*?</iframe>', - start_page, 'xml root', default=None) + PLAYER_REGEX, start_page, 'xml root', default=None) if xml_root is None: # Probably need to authenticate login_res = self._login(webpage_url, display_id) @@ -171,18 +172,19 @@ class GDCVaultIE(InfoExtractor): start_page = login_res # Grab the url from the authenticated page xml_root = self._html_search_regex( - r'<iframe src="(.*?)player.html.*?".*?</iframe>', - start_page, 'xml root') + PLAYER_REGEX, start_page, 'xml root') xml_name = self._html_search_regex( r'<iframe src=".*?\?xml=(.+?\.xml).*?".*?</iframe>', start_page, 'xml filename', default=None) if xml_name is None: # Fallback to the older format - xml_name = self._html_search_regex(r'<iframe src=".*?\?xmlURL=xml/(?P<xml_file>.+?\.xml).*?".*?</iframe>', start_page, 'xml filename') + xml_name = self._html_search_regex( + r'<iframe src=".*?\?xmlURL=xml/(?P<xml_file>.+?\.xml).*?".*?</iframe>', + start_page, 'xml filename') - xml_description_url = xml_root + 'xml/' + xml_name - xml_description = self._download_xml(xml_description_url, display_id) + xml_description = self._download_xml( + '%s/xml/%s' % (xml_root, xml_name), display_id) video_title = xml_description.find('./metadata/title').text video_formats = self._parse_mp4(xml_description) |