diff options
| author | Allan Zhou <allanzp@gmail.com> | 2013-08-30 17:51:59 -0700 | 
|---|---|---|
| committer | Allan Zhou <allanzp@gmail.com> | 2013-08-30 17:51:59 -0700 | 
| commit | 85f03346ebe38c0b4cd46ab64bf9b5825d12895a (patch) | |
| tree | 2aa3c34c3380174cbb10df809c7607f26c19b018 | |
| parent | bdc6b3fc64a03045b8130cdc824ee3f6c15eeff1 (diff) | |
| parent | 10f5c016ec6262e5d29327e97fe4f3d1127ccdff (diff) | |
Merge remote-tracking branch 'upstream/master'
| -rw-r--r-- | devscripts/bash-completion.in | 6 | ||||
| -rw-r--r-- | devscripts/youtube_genalgo.py | 8 | ||||
| -rw-r--r-- | test/test_download.py | 7 | ||||
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/common.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/ign.py | 8 | ||||
| -rw-r--r-- | youtube_dl/extractor/mit.py | 16 | ||||
| -rw-r--r-- | youtube_dl/extractor/orf.py | 67 | ||||
| -rw-r--r-- | youtube_dl/extractor/unistra.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 8 | ||||
| -rw-r--r-- | youtube_dl/utils.py | 2 | ||||
| -rw-r--r-- | youtube_dl/version.py | 2 | 
12 files changed, 101 insertions, 28 deletions
| diff --git a/devscripts/bash-completion.in b/devscripts/bash-completion.in index 3b99a9614..bd10f63c2 100644 --- a/devscripts/bash-completion.in +++ b/devscripts/bash-completion.in @@ -4,8 +4,12 @@ __youtube-dl()      COMPREPLY=()      cur="${COMP_WORDS[COMP_CWORD]}"      opts="{{flags}}" +    keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater" -    if [[ ${cur} == * ]] ; then +    if [[ ${cur} =~ : ]]; then +        COMPREPLY=( $(compgen -W "${keywords}" -- ${cur}) ) +        return 0 +    elif [[ ${cur} == * ]] ; then          COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) )          return 0      fi diff --git a/devscripts/youtube_genalgo.py b/devscripts/youtube_genalgo.py index 917e8f79d..97a0d7290 100644 --- a/devscripts/youtube_genalgo.py +++ b/devscripts/youtube_genalgo.py @@ -20,15 +20,15 @@ tests = [      # 87      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<",       "uioplkjhgfdsazxcvbnm1t34567890QWE2TYUIOPLKJHGFDSAZXCVeNM!@#$^&*()_-+={[]}|:;?/>.<"), -    # 86 - vflh9ybst 2013/08/23 +    # 86 - vflHOr_nV 2013/08/30      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<", -     "yuioplkjhgfdsazxcvbnm1234567890QWERrYUIOPLKqHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<"), +     "?;}|[{=+._)(*&^%$#@!MNBqCXZASDFGHJKLPOIUYTREWQ<987654321mnbvcxzasdfghjklpoiuytrew"),      # 85      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",       ".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"), -    # 84 - vflh9ybst 2013/08/23 (sporadic) +    # 84 - vflg0g8PQ 2013/08/29 (sporadic)      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<", -     "yuioplkjhgfdsazxcvbnm1234567890QWERrYUIOPLKqHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<"), +     ">?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWq0987654321mnbvcxzasdfghjklpoiuytr"),      # 83      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",       ".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"), diff --git a/test/test_download.py b/test/test_download.py index 21cb2e694..23a66254d 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -127,12 +127,11 @@ def generator(test_case):                      info_dict = json.load(infof)                  for (info_field, expected) in tc.get('info_dict', {}).items():                      if isinstance(expected, compat_str) and expected.startswith('md5:'): -                        self.assertEqual(expected, 'md5:' + md5(info_dict.get(info_field))) +                        got = 'md5:' + md5(info_dict.get(info_field))                      else:                          got = info_dict.get(info_field) -                        self.assertEqual( -                            expected, got, -                            u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got)) +                    self.assertEqual(expected, got, +                        u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))                  # If checkable fields are missing from the test case, print the info_dict                  test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value)) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 6b5037c8c..90f1a4418 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -59,6 +59,7 @@ from .myvideo import MyVideoIE  from .nba import NBAIE  from .nbc import NBCNewsIE  from .ooyala import OoyalaIE +from .orf import ORFIE  from .pbs import PBSIE  from .photobucket import PhotobucketIE  from .pornotube import PornotubeIE diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index a2986cebe..77726ee24 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -150,7 +150,7 @@ class InfoExtractor(object):          if m:              encoding = m.group(1)          else: -            m = re.search(br'<meta[^>]+charset="?([^"]+)[ /">]', +            m = re.search(br'<meta[^>]+charset=[\'"]?([^\'")]+)[ /\'">]',                            webpage_bytes[:1024])              if m:                  encoding = m.group(1).decode('ascii') diff --git a/youtube_dl/extractor/ign.py b/youtube_dl/extractor/ign.py index 62abab655..b1c84278a 100644 --- a/youtube_dl/extractor/ign.py +++ b/youtube_dl/extractor/ign.py @@ -13,7 +13,7 @@ class IGNIE(InfoExtractor):      Some videos of it.ign.com are also supported      """ -    _VALID_URL = r'https?://.+?\.ign\.com/(?:videos|show_videos)(/.+)?/(?P<name_or_id>.+)' +    _VALID_URL = r'https?://.+?\.ign\.com/(?P<type>videos|show_videos|articles)(/.+)?/(?P<name_or_id>.+)'      IE_NAME = u'ign.com'      _CONFIG_URL_TEMPLATE = 'http://www.ign.com/videos/configs/id/%s.config' @@ -41,7 +41,11 @@ class IGNIE(InfoExtractor):      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url)          name_or_id = mobj.group('name_or_id') +        page_type = mobj.group('type')          webpage = self._download_webpage(url, name_or_id) +        if page_type == 'articles': +            video_url = self._search_regex(r'var videoUrl = "(.+?)"', webpage, u'video url') +            return self.url_result(video_url, ie='IGN')          video_id = self._find_video_id(webpage)          result = self._get_video_info(video_id)          description = self._html_search_regex(self._DESCRIPTION_RE, @@ -68,7 +72,7 @@ class IGNIE(InfoExtractor):  class OneUPIE(IGNIE):      """Extractor for 1up.com, it uses the ign videos system.""" -    _VALID_URL = r'https?://gamevideos.1up.com/video/id/(?P<name_or_id>.+)' +    _VALID_URL = r'https?://gamevideos.1up.com/(?P<type>video)/id/(?P<name_or_id>.+)'      IE_NAME = '1up.com'      _DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>' diff --git a/youtube_dl/extractor/mit.py b/youtube_dl/extractor/mit.py index d09d03e36..52be9232f 100644 --- a/youtube_dl/extractor/mit.py +++ b/youtube_dl/extractor/mit.py @@ -25,23 +25,21 @@ class TechTVMITIE(InfoExtractor):      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url)          video_id = mobj.group('id') -        webpage = self._download_webpage( +        raw_page = self._download_webpage(              'http://techtv.mit.edu/videos/%s' % video_id, video_id) -        embed_page = self._download_webpage( -            'http://techtv.mit.edu/embeds/%s/' % video_id, video_id, -            note=u'Downloading embed page') +        clean_page = re.compile(u'<!--.*?-->', re.S).sub(u'', raw_page)          base_url = self._search_regex(r'ipadUrl: \'(.+?cloudfront.net/)', -            embed_page, u'base url') -        formats_json = self._search_regex(r'bitrates: (\[.+?\])', embed_page, +            raw_page, u'base url') +        formats_json = self._search_regex(r'bitrates: (\[.+?\])', raw_page,              u'video formats')          formats = json.loads(formats_json)          formats = sorted(formats, key=lambda f: f['bitrate']) -        title = get_element_by_id('edit-title', webpage) -        description = clean_html(get_element_by_id('edit-description', webpage)) +        title = get_element_by_id('edit-title', clean_page) +        description = clean_html(get_element_by_id('edit-description', clean_page))          thumbnail = self._search_regex(r'playlist:.*?url: \'(.+?)\'', -            embed_page, u'thumbnail', flags=re.DOTALL) +            raw_page, u'thumbnail', flags=re.DOTALL)          return {'id': video_id,                  'title': title, diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py new file mode 100644 index 000000000..41ef8e992 --- /dev/null +++ b/youtube_dl/extractor/orf.py @@ -0,0 +1,67 @@ +# coding: utf-8 + +import re +import xml.etree.ElementTree +import json + +from .common import InfoExtractor +from ..utils import ( +    compat_urlparse, +    ExtractorError, +    find_xpath_attr, +) + +class ORFIE(InfoExtractor): +    _VALID_URL = r'https?://tvthek.orf.at/(programs/.+?/episodes|topics/.+?)/(?P<id>\d+)' + +    _TEST = { +        u'url': u'http://tvthek.orf.at/programs/1171769-Wetter-ZIB/episodes/6557323-Wetter', +        u'file': u'6566957.flv', +        u'info_dict': { +            u'title': u'Wetter', +            u'description': u'Christa Kummer, Marcus Wadsak und Kollegen  präsentieren abwechselnd ihre täglichen Wetterprognosen für Österreich.\r \r Mehr Wetter unter wetter.ORF.at', +        }, +        u'params': { +            # It uses rtmp +            u'skip_download': True, +        } +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        playlist_id = mobj.group('id') +        webpage = self._download_webpage(url, playlist_id) + +        flash_xml = self._search_regex('ORF.flashXML = \'(.+?)\'', webpage, u'flash xml') +        flash_xml = compat_urlparse.parse_qs('xml='+flash_xml)['xml'][0] +        flash_config = xml.etree.ElementTree.fromstring(flash_xml.encode('utf-8')) +        playlist_json = self._search_regex(r'playlist\': \'(\[.*?\])\'', webpage, u'playlist').replace(r'\"','"') +        playlist = json.loads(playlist_json) + +        videos = [] +        ns = '{http://tempuri.org/XMLSchema.xsd}' +        xpath = '%(ns)sPlaylist/%(ns)sItems/%(ns)sItem' % {'ns': ns} +        webpage_description = self._og_search_description(webpage) +        for (i, (item, info)) in enumerate(zip(flash_config.findall(xpath), playlist), 1): +            # Get best quality url +            rtmp_url = None +            for q in ['Q6A', 'Q4A', 'Q1A']: +                video_url = find_xpath_attr(item, '%sVideoUrl' % ns, 'quality', q) +                if video_url is not None: +                    rtmp_url = video_url.text +                    break +            if rtmp_url is None: +                raise ExtractorError(u'Couldn\'t get video url: %s' % info['id']) +            description = self._html_search_regex( +                r'id="playlist_entry_%s".*?<p>(.*?)</p>' % i, webpage, +                u'description', default=webpage_description, flags=re.DOTALL) +            videos.append({ +                '_type': 'video', +                'id': info['id'], +                'title': info['title'], +                'url': rtmp_url, +                'ext': 'flv', +                'description': description, +                }) + +        return videos diff --git a/youtube_dl/extractor/unistra.py b/youtube_dl/extractor/unistra.py index 5ba0a9061..516e18914 100644 --- a/youtube_dl/extractor/unistra.py +++ b/youtube_dl/extractor/unistra.py @@ -11,7 +11,7 @@ class UnistraIE(InfoExtractor):          u'md5': u'736f605cfdc96724d55bb543ab3ced24',          u'info_dict': {              u'title': u'M!ss Yella', -            u'description': u'md5:75e8439a3e2981cd5d4b6db232e8fdfc', +            u'description': u'md5:104892c71bd48e55d70b902736b81bbf',          },      } diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index bd2b986ce..d331aa01b 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -346,7 +346,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):              u"info_dict": {                  u"upload_date": u"20120506",                  u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]", -                u"description": u"md5:b085c9804f5ab69f4adea963a2dceb3c", +                u"description": u"md5:3e2666e0a55044490499ea45fe9037b7",                  u"uploader": u"Icona Pop",                  u"uploader_id": u"IconaPop"              } @@ -434,11 +434,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):          elif len(s) == 87:              return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]          elif len(s) == 86: -            return s[5:40] + s[3] + s[41:48] + s[0] + s[49:86] +            return s[81:73:-1] + s[84] + s[72:58:-1] + s[0] + s[57:35:-1] + s[85] + s[34:0:-1]          elif len(s) == 85:              return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27]          elif len(s) == 84: -            return s[5:40] + s[3] + s[41:48] + s[0] + s[49:84] +            return s[81:36:-1] + s[0] + s[35:2:-1]          elif len(s) == 83:              return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]          elif len(s) == 82: @@ -1184,7 +1184,7 @@ class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):  class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):      IE_NAME = u'youtube:favorites'      IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)' -    _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:o?rites)?' +    _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'      _LOGIN_REQUIRED = True      def _real_extract(self, url): diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index b3d0f64ea..201802cee 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -213,7 +213,7 @@ if sys.version_info >= (2,7):      def find_xpath_attr(node, xpath, key, val):          """ Find the xpath xpath[@key=val] """          assert re.match(r'^[a-zA-Z]+$', key) -        assert re.match(r'^[a-zA-Z@\s]*$', val) +        assert re.match(r'^[a-zA-Z0-9@\s]*$', val)          expr = xpath + u"[@%s='%s']" % (key, val)          return node.find(expr)  else: diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 2ba75258d..b6284c6d6 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.08.28.1' +__version__ = '2013.08.30' | 
