aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilipp Hagemeister <phihag@phihag.de>2014-08-26 15:49:15 +0200
committerPhilipp Hagemeister <phihag@phihag.de>2014-08-26 15:51:42 +0200
commit8f1ea7cbb6cb365e4ffd75bdc2d901afcbfdf72f (patch)
tree7534633750e8f4e1035b9790feb9341894cf3757
parenta204c854083bd5aed79e41191f613275960eb600 (diff)
[empflix] Revert to XML parser
Don't rely on the XML being broken (if they fix it, our code wouldn't work anymore). Instead, use the transform function we already have :) This partially reverts commit c7bee2a7254d31b7c478c0ac33bf23bdeba1c53c.
-rw-r--r--youtube_dl/extractor/empflix.py18
1 files changed, 8 insertions, 10 deletions
diff --git a/youtube_dl/extractor/empflix.py b/youtube_dl/extractor/empflix.py
index 3c2289e57..1c498d8c8 100644
--- a/youtube_dl/extractor/empflix.py
+++ b/youtube_dl/extractor/empflix.py
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
+from ..utils import fix_xml_ampersands
class EmpflixIE(InfoExtractor):
@@ -35,20 +36,17 @@ class EmpflixIE(InfoExtractor):
r'flashvars\.config = escape\("([^"]+)"',
webpage, 'flashvars.config')
- # XML is malformed
- cfg_xml = self._download_webpage(
- cfg_url, video_id, note='Downloading metadata')
+ cfg_xml = self._download_xml(
+ cfg_url, video_id, note='Downloading metadata',
+ transform_source=fix_xml_ampersands)
formats = [
{
- 'url': item[1],
- 'format_id': item[0],
- } for item in re.findall(
- r'<item>\s*<res>([^>]+)</res>\s*<videoLink>([^<]+)</videoLink>\s*</item>', cfg_xml)
+ 'url': item.find('videoLink').text,
+ 'format_id': item.find('res').text,
+ } for item in cfg_xml.findall('./quality/item')
]
-
- thumbnail = self._html_search_regex(
- r'<startThumb>([^<]+)</startThumb>', cfg_xml, 'thumbnail', fatal=False)
+ thumbnail = cfg_xml.find('./startThumb').text
return {
'id': video_id,