diff options
| author | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2013-08-29 19:16:07 +0200 | 
|---|---|---|
| committer | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2013-08-29 19:16:07 +0200 | 
| commit | 545434670b7b055a7f0ff82b76ee7acbb3d07dd3 (patch) | |
| tree | f516e750374ae936e87f5b86675b7ccffbdfc781 | |
| parent | 54fda45bace9d37135a4d29a03130b22ed272ce5 (diff) | |
Add an extractor for orf.at (closes #1346)
Make find_xpath_attr also accept numbers in the value
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/orf.py | 65 | ||||
| -rw-r--r-- | youtube_dl/utils.py | 2 | 
3 files changed, 67 insertions, 1 deletions
| diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 6b5037c8c..90f1a4418 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -59,6 +59,7 @@ from .myvideo import MyVideoIE  from .nba import NBAIE  from .nbc import NBCNewsIE  from .ooyala import OoyalaIE +from .orf import ORFIE  from .pbs import PBSIE  from .photobucket import PhotobucketIE  from .pornotube import PornotubeIE diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py new file mode 100644 index 000000000..8da0a2c8e --- /dev/null +++ b/youtube_dl/extractor/orf.py @@ -0,0 +1,65 @@ +import re +import xml.etree.ElementTree +import json + +from .common import InfoExtractor +from ..utils import ( +    compat_urlparse, +    ExtractorError, +    find_xpath_attr, +) + +class ORFIE(InfoExtractor): +    _VALID_URL = r'https?://tvthek.orf.at/(programs/.+?/episodes|topics/.+?)/(?P<id>\d+)' + +    _TEST = { +        u'url': u'http://tvthek.orf.at/programs/1171769-Wetter-ZIB/episodes/6557323-Wetter', +        u'file': u'6566957.flv', +        u'info_dict': { +            u'title': u'Wetter', +            u'description': u'Christa Kummer, Marcus Wadsak und Kollegen  präsentieren abwechselnd ihre täglichen Wetterprognosen für Österreich.\r \r Mehr Wetter unter wetter.ORF.at', +        }, +        u'params': { +            # It uses rtmp +            u'skip_download': True, +        } +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        playlist_id = mobj.group('id') +        webpage = self._download_webpage(url, playlist_id) + +        flash_xml = self._search_regex('ORF.flashXML = \'(.+?)\'', webpage, u'flash xml') +        flash_xml = compat_urlparse.parse_qs('xml='+flash_xml)['xml'][0] +        flash_config = xml.etree.ElementTree.fromstring(flash_xml.encode('utf-8')) +        playlist_json = self._search_regex(r'playlist\': \'(\[.*?\])\'', webpage, u'playlist').replace(r'\"','"') +        playlist = json.loads(playlist_json) + +        videos = [] +        ns = '{http://tempuri.org/XMLSchema.xsd}' +        xpath = '%(ns)sPlaylist/%(ns)sItems/%(ns)sItem' % {'ns': ns} +        webpage_description = self._og_search_description(webpage) +        for (i, (item, info)) in enumerate(zip(flash_config.findall(xpath), playlist), 1): +            # Get best quality url +            rtmp_url = None +            for q in ['Q6A', 'Q4A', 'Q1A']: +                video_url = find_xpath_attr(item, '%sVideoUrl' % ns, 'quality', q) +                if video_url is not None: +                    rtmp_url = video_url.text +                    break +            if rtmp_url is None: +                raise ExtractorError(u'Couldn\'t get video url: %s' % info['id']) +            description = self._html_search_regex( +                r'id="playlist_entry_%s".*?<p>(.*?)</p>' % i, webpage, +                u'description', default=webpage_description, flags=re.DOTALL) +            videos.append({ +                '_type': 'video', +                'id': info['id'], +                'title': info['title'], +                'url': rtmp_url, +                'ext': 'flv', +                'description': description, +                }) + +        return videos diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index b3d0f64ea..201802cee 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -213,7 +213,7 @@ if sys.version_info >= (2,7):      def find_xpath_attr(node, xpath, key, val):          """ Find the xpath xpath[@key=val] """          assert re.match(r'^[a-zA-Z]+$', key) -        assert re.match(r'^[a-zA-Z@\s]*$', val) +        assert re.match(r'^[a-zA-Z0-9@\s]*$', val)          expr = xpath + u"[@%s='%s']" % (key, val)          return node.find(expr)  else: | 
