diff options
Diffstat (limited to 'youtube_dl')
| -rwxr-xr-x | youtube_dl/YoutubeDL.py | 6 | ||||
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 4 | ||||
| -rw-r--r-- | youtube_dl/extractor/common.py | 4 | ||||
| -rw-r--r-- | youtube_dl/extractor/instagram.py | 8 | ||||
| -rw-r--r-- | youtube_dl/extractor/letv.py | 4 | ||||
| -rw-r--r-- | youtube_dl/extractor/qqmusic.py | 4 | ||||
| -rw-r--r-- | youtube_dl/extractor/sbs.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/sohu.py | 4 | ||||
| -rw-r--r-- | youtube_dl/extractor/tv2.py | 126 | ||||
| -rw-r--r-- | youtube_dl/extractor/ultimedia.py | 10 | ||||
| -rw-r--r-- | youtube_dl/extractor/vier.py | 9 | ||||
| -rw-r--r-- | youtube_dl/extractor/vuclip.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/vulture.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/wimp.py | 3 | ||||
| -rw-r--r-- | youtube_dl/extractor/xminus.py | 4 | ||||
| -rw-r--r-- | youtube_dl/extractor/yahoo.py | 18 | ||||
| -rw-r--r-- | youtube_dl/utils.py | 10 | ||||
| -rw-r--r-- | youtube_dl/version.py | 2 | 
18 files changed, 188 insertions, 33 deletions
| diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 5df889945..58b34e087 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1368,7 +1368,7 @@ class YoutubeDL(object):                          postprocessors = []                          self.report_warning('You have requested multiple '                                              'formats but ffmpeg or avconv are not installed.' -                                            ' The formats won\'t be merged') +                                            ' The formats won\'t be merged.')                      else:                          postprocessors = [merger] @@ -1395,8 +1395,8 @@ class YoutubeDL(object):                      requested_formats = info_dict['requested_formats']                      if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):                          info_dict['ext'] = 'mkv' -                        self.report_warning('You have requested formats incompatible for merge. ' -                                            'The formats will be merged into mkv') +                        self.report_warning( +                            'Requested formats are incompatible for merge and will be merged into mkv.')                      # Ensure filename always has a correct extension for successful merge                      filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])                      if os.path.exists(encodeFilename(filename)): diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 173e9a155..24efb7ce5 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -572,6 +572,10 @@ from .tumblr import TumblrIE  from .tunein import TuneInIE  from .turbo import TurboIE  from .tutv import TutvIE +from .tv2 import ( +    TV2IE, +    TV2ArticleIE, +)  from .tv4 import TV4IE  from .tvigle import TvigleIE  from .tvp import TvpIE, TvpSeriesIE diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 65bb77086..cecf917ff 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -786,8 +786,8 @@ class InfoExtractor(object):              return True          except ExtractorError as e:              if isinstance(e.cause, compat_HTTPError): -                self.report_warning( -                    '%s URL is invalid, skipping' % item, video_id) +                self.to_screen( +                    '%s: %s URL is invalid, skipping' % (video_id, item))                  return False              raise diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index 65f6ca103..b10755788 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -7,9 +7,9 @@ from ..utils import int_or_none  class InstagramIE(InfoExtractor): -    _VALID_URL = r'https?://instagram\.com/p/(?P<id>[\da-zA-Z]+)' +    _VALID_URL = r'https://instagram\.com/p/(?P<id>[\da-zA-Z]+)'      _TEST = { -        'url': 'http://instagram.com/p/aye83DjauH/?foo=bar#abc', +        'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',          'md5': '0d2da106a9d2631273e192b372806516',          'info_dict': {              'id': 'aye83DjauH', @@ -41,11 +41,11 @@ class InstagramIE(InfoExtractor):  class InstagramUserIE(InfoExtractor): -    _VALID_URL = r'http://instagram\.com/(?P<username>[^/]{2,})/?(?:$|[?#])' +    _VALID_URL = r'https://instagram\.com/(?P<username>[^/]{2,})/?(?:$|[?#])'      IE_DESC = 'Instagram user profile'      IE_NAME = 'instagram:user'      _TEST = { -        'url': 'http://instagram.com/porsche', +        'url': 'https://instagram.com/porsche',          'info_dict': {              'id': 'porsche',              'title': 'porsche', diff --git a/youtube_dl/extractor/letv.py b/youtube_dl/extractor/letv.py index 1484ac0d2..da896caf1 100644 --- a/youtube_dl/extractor/letv.py +++ b/youtube_dl/extractor/letv.py @@ -50,9 +50,7 @@ class LetvIE(InfoExtractor):              'title': '与龙共舞 完整版',              'description': 'md5:7506a5eeb1722bb9d4068f85024e3986',          }, -        'params': { -            'cn_verification_proxy': 'http://proxy.uku.im:8888' -        }, +        'skip': 'Only available in China',      }]      @staticmethod diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index 13113820b..b540033e2 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -26,7 +26,7 @@ class QQMusicIE(InfoExtractor):              'title': '可惜没如果',              'upload_date': '20141227',              'creator': '林俊杰', -            'description': 'md5:4348ff1dd24036906baa7b6f973f8d30', +            'description': 'md5:d327722d0361576fde558f1ac68a7065',          }      }] @@ -60,6 +60,8 @@ class QQMusicIE(InfoExtractor):          lrc_content = self._html_search_regex(              r'<div class="content" id="lrc_content"[^<>]*>([^<>]+)</div>',              detail_info_page, 'LRC lyrics', default=None) +        if lrc_content: +            lrc_content = lrc_content.replace('\\n', '\n')          guid = self.m_r_get_ruin() diff --git a/youtube_dl/extractor/sbs.py b/youtube_dl/extractor/sbs.py index 3073e5e86..d4bd1a0d7 100644 --- a/youtube_dl/extractor/sbs.py +++ b/youtube_dl/extractor/sbs.py @@ -1,7 +1,6 @@  # -*- coding: utf-8 -*-  from __future__ import unicode_literals -import json  import re  from .common import InfoExtractor  from ..utils import ( diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py index eab4adfca..29bd9ce6f 100644 --- a/youtube_dl/extractor/sohu.py +++ b/youtube_dl/extractor/sohu.py @@ -23,9 +23,7 @@ class SohuIE(InfoExtractor):              'ext': 'mp4',              'title': 'MV:Far East Movement《The Illest》',          }, -        'params': { -            'cn_verification_proxy': 'proxy.uku.im:8888' -        } +        'skip': 'On available in China',      }, {          'url': 'http://tv.sohu.com/20150305/n409385080.shtml',          'md5': 'ac9a5d322b4bf9ae184d53e4711e4f1a', diff --git a/youtube_dl/extractor/tv2.py b/youtube_dl/extractor/tv2.py new file mode 100644 index 000000000..fa338b936 --- /dev/null +++ b/youtube_dl/extractor/tv2.py @@ -0,0 +1,126 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( +    determine_ext, +    int_or_none, +    float_or_none, +    parse_iso8601, +    remove_end, +) + + +class TV2IE(InfoExtractor): +    _VALID_URL = 'http://(?:www\.)?tv2\.no/v/(?P<id>\d+)' +    _TEST = { +        'url': 'http://www.tv2.no/v/916509/', +        'md5': '9cb9e3410b18b515d71892f27856e9b1', +        'info_dict': { +            'id': '916509', +            'ext': 'flv', +            'title': 'Se Gryttens hyllest av Steven Gerrard', +            'description': 'TV 2 Sportens huspoet tar avskjed med Liverpools kaptein Steven Gerrard.', +            'timestamp': 1431715610, +            'upload_date': '20150515', +            'duration': 156.967, +            'view_count': int, +            'categories': list, +        } +    } + +    def _real_extract(self, url): +        video_id = self._match_id(url) + +        formats = [] +        format_urls = [] +        for protocol in ('HDS', 'HLS'): +            data = self._download_json( +                'http://sumo.tv2.no/api/web/asset/%s/play.json?protocol=%s&videoFormat=SMIL+ISMUSP' % (video_id, protocol), +                video_id, 'Downloading play JSON')['playback'] +            for item in data['items']['item']: +                video_url = item.get('url') +                if not video_url or video_url in format_urls: +                    continue +                format_id = '%s-%s' % (protocol.lower(), item.get('mediaFormat')) +                if not self._is_valid_url(video_url, video_id, format_id): +                    continue +                format_urls.append(video_url) +                ext = determine_ext(video_url) +                if ext == 'f4m': +                    formats.extend(self._extract_f4m_formats( +                        video_url, video_id, f4m_id=format_id)) +                elif ext == 'm3u8': +                    formats.extend(self._extract_m3u8_formats( +                        video_url, video_id, 'mp4', m3u8_id=format_id)) +                elif ext == 'ism' or video_url.endswith('.ism/Manifest'): +                    pass +                else: +                    formats.append({ +                        'url': video_url, +                        'format_id': format_id, +                        'tbr': int_or_none(item.get('bitrate')), +                        'filesize': int_or_none(item.get('fileSize')), +                    }) +        self._sort_formats(formats) + +        asset = self._download_json( +            'http://sumo.tv2.no/api/web/asset/%s.json' % video_id, +            video_id, 'Downloading metadata JSON')['asset'] + +        title = asset['title'] +        description = asset.get('description') +        timestamp = parse_iso8601(asset.get('createTime')) +        duration = float_or_none(asset.get('accurateDuration') or asset.get('duration')) +        view_count = int_or_none(asset.get('views')) +        categories = asset.get('keywords', '').split(',') + +        thumbnails = [{ +            'id': thumbnail.get('@type'), +            'url': thumbnail.get('url'), +        } for _, thumbnail in asset.get('imageVersions', {}).items()] + +        return { +            'id': video_id, +            'url': video_url, +            'title': title, +            'description': description, +            'thumbnails': thumbnails, +            'timestamp': timestamp, +            'duration': duration, +            'view_count': view_count, +            'categories': categories, +            'formats': formats, +        } + + +class TV2ArticleIE(InfoExtractor): +    _VALID_URL = 'http://(?:www\.)?tv2\.no/(?:a|\d{4}/\d{2}/\d{2}(/[^/]+)+)/(?P<id>\d+)' +    _TESTS = [{ +        'url': 'http://www.tv2.no/2015/05/16/nyheter/alesund/krim/pingvin/6930542', +        'info_dict': { +            'id': '6930542', +            'title': 'Russen hetses etter pingvintyveri – innrømmer å ha åpnet luken på buret', +            'description': 'md5:339573779d3eea3542ffe12006190954', +        }, +        'playlist_count': 2, +    }, { +        'url': 'http://www.tv2.no/a/6930542', +        'only_matching': True, +    }] + +    def _real_extract(self, url): +        playlist_id = self._match_id(url) + +        webpage = self._download_webpage(url, playlist_id) + +        entries = [ +            self.url_result('http://www.tv2.no/v/%s' % video_id, 'TV2') +            for video_id in re.findall(r'data-assetid="(\d+)"', webpage)] + +        title = remove_end(self._og_search_title(webpage), ' - TV2.no') +        description = remove_end(self._og_search_description(webpage), ' - TV2.no') + +        return self.playlist_result(entries, playlist_id, title, description) diff --git a/youtube_dl/extractor/ultimedia.py b/youtube_dl/extractor/ultimedia.py index 96c809eaf..c4751050e 100644 --- a/youtube_dl/extractor/ultimedia.py +++ b/youtube_dl/extractor/ultimedia.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor +from ..compat import compat_urllib_parse_urlparse  from ..utils import (      ExtractorError,      qualities, @@ -44,9 +45,9 @@ class UltimediaIE(InfoExtractor):          video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id) -        deliver_url = self._search_regex( -            r'<iframe[^>]+src="(https?://(?:www\.)?ultimedia\.com/deliver/[^"]+)"', -            webpage, 'deliver URL') +        deliver_url = self._proto_relative_url(self._search_regex( +            r'<iframe[^>]+src="((?:https?:)?//(?:www\.)?ultimedia\.com/deliver/[^"]+)"', +            webpage, 'deliver URL'), compat_urllib_parse_urlparse(url).scheme + ':')          deliver_page = self._download_webpage(              deliver_url, video_id, 'Downloading iframe page') @@ -57,7 +58,8 @@ class UltimediaIE(InfoExtractor):          player = self._parse_json(              self._search_regex( -                r"jwplayer\('player(?:_temp)?'\)\.setup\(({.+?})\)\.on", deliver_page, 'player'), +                r"jwplayer\('player(?:_temp)?'\)\.setup\(({.+?})\)\.on", +                deliver_page, 'player'),              video_id)          quality = qualities(['flash', 'html5']) diff --git a/youtube_dl/extractor/vier.py b/youtube_dl/extractor/vier.py index 619039e51..15377097e 100644 --- a/youtube_dl/extractor/vier.py +++ b/youtube_dl/extractor/vier.py @@ -38,11 +38,14 @@ class VierIE(InfoExtractor):          webpage = self._download_webpage(url, display_id)          video_id = self._search_regex( -            r'"nid"\s*:\s*"(\d+)"', webpage, 'video id') +            [r'data-nid="(\d+)"', r'"nid"\s*:\s*"(\d+)"'], +            webpage, 'video id')          application = self._search_regex( -            r'"application"\s*:\s*"([^"]+)"', webpage, 'application', default='vier_vod') +            [r'data-application="([^"]+)"', r'"application"\s*:\s*"([^"]+)"'], +            webpage, 'application', default='vier_vod')          filename = self._search_regex( -            r'"filename"\s*:\s*"([^"]+)"', webpage, 'filename') +            [r'data-filename="([^"]+)"', r'"filename"\s*:\s*"([^"]+)"'], +            webpage, 'filename')          playlist_url = 'http://vod.streamcloud.be/%s/mp4:_definst_/%s.mp4/playlist.m3u8' % (application, filename)          formats = self._extract_m3u8_formats(playlist_url, display_id, 'mp4') diff --git a/youtube_dl/extractor/vuclip.py b/youtube_dl/extractor/vuclip.py index c3fde53f5..a6d9b5fee 100644 --- a/youtube_dl/extractor/vuclip.py +++ b/youtube_dl/extractor/vuclip.py @@ -49,7 +49,7 @@ class VuClipIE(InfoExtractor):          links_code = self._search_regex(              r'''(?xs)                  (?: -                    <img\s+src="/im/play.gif".*?>| +                    <img\s+src="[^"]*/play.gif".*?>|                      <!--\ player\ end\ -->\s*</div><!--\ thumb\ end-->                  )                  (.*?) diff --git a/youtube_dl/extractor/vulture.py b/youtube_dl/extractor/vulture.py index 1eb24a3d6..faa167e65 100644 --- a/youtube_dl/extractor/vulture.py +++ b/youtube_dl/extractor/vulture.py @@ -44,7 +44,7 @@ class VultureIE(InfoExtractor):          query_webpage = self._download_webpage(              query_url, display_id, note='Downloading query page')          params_json = self._search_regex( -            r'(?sm)new MagnifyEmbeddablePlayer\({.*?contentItem:\s*(\{.*?\})\n,\n', +            r'(?sm)new MagnifyEmbeddablePlayer\({.*?contentItem:\s*(\{.*?\})\n?,\n',              query_webpage,              'player params')          params = json.loads(params_json) diff --git a/youtube_dl/extractor/wimp.py b/youtube_dl/extractor/wimp.py index d6dec25ca..f69d46a28 100644 --- a/youtube_dl/extractor/wimp.py +++ b/youtube_dl/extractor/wimp.py @@ -37,7 +37,8 @@ class WimpIE(InfoExtractor):          video_id = mobj.group(1)          webpage = self._download_webpage(url, video_id)          video_url = self._search_regex( -            r"[\"']file[\"']\s*[:,]\s*[\"'](.+?)[\"']", webpage, 'video URL') +            [r"[\"']file[\"']\s*[:,]\s*[\"'](.+?)[\"']", r"videoId\s*:\s*[\"']([^\"']+)[\"']"], +            webpage, 'video URL')          if YoutubeIE.suitable(video_url):              self.to_screen('Found YouTube video')              return { diff --git a/youtube_dl/extractor/xminus.py b/youtube_dl/extractor/xminus.py index 8c6241aed..7c9d8af6f 100644 --- a/youtube_dl/extractor/xminus.py +++ b/youtube_dl/extractor/xminus.py @@ -43,7 +43,7 @@ class XMinusIE(InfoExtractor):              r'minus_track\.dur_sec=\'([0-9]*?)\'',              webpage, 'duration', fatal=False))          filesize_approx = parse_filesize(self._html_search_regex( -            r'<div class="filesize[^"]*"></div>\s*([0-9.]+\s*[a-zA-Z][bB])', +            r'<div id="finfo"[^>]*>\s*↓\s*([0-9.]+\s*[a-zA-Z][bB])',              webpage, 'approximate filesize', fatal=False))          tbr = int_or_none(self._html_search_regex(              r'<div class="quality[^"]*"></div>\s*([0-9]+)\s*kbps', @@ -58,7 +58,7 @@ class XMinusIE(InfoExtractor):              description = re.sub(' *\r *', '\n', description)          enc_token = self._html_search_regex( -            r'minus_track\.tkn="(.+?)"', webpage, 'enc_token') +            r'minus_track\.s?tkn="(.+?)"', webpage, 'enc_token')          token = ''.join(              c if pos == 3 else compat_chr(compat_ord(c) - 1)              for pos, c in enumerate(reversed(enc_token))) diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index bf4e659ac..f9afbdbab 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -15,6 +15,7 @@ from ..utils import (      unescapeHTML,      ExtractorError,      int_or_none, +    mimetype2ext,  )  from .nbc import NBCSportsVPlayerIE @@ -236,6 +237,22 @@ class YahooIE(InfoExtractor):          self._sort_formats(formats) +        closed_captions = self._html_search_regex( +            r'"closedcaptions":(\[[^\]]+\])', webpage, 'closed captions', +            default='[]') + +        cc_json = self._parse_json(closed_captions, video_id, fatal=False) +        subtitles = {} +        if cc_json: +            for closed_caption in cc_json: +                lang = closed_caption['lang'] +                if lang not in subtitles: +                    subtitles[lang] = [] +                subtitles[lang].append({ +                    'url': closed_caption['url'], +                    'ext': mimetype2ext(closed_caption['content_type']), +                }) +          return {              'id': video_id,              'display_id': display_id, @@ -244,6 +261,7 @@ class YahooIE(InfoExtractor):              'description': clean_html(meta['description']),              'thumbnail': meta['thumbnail'] if meta.get('thumbnail') else self._og_search_thumbnail(webpage),              'duration': int_or_none(meta.get('duration')), +            'subtitles': subtitles,          } diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index ed9ed9ed6..52d198fa3 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1665,6 +1665,7 @@ def mimetype2ext(mt):      return {          'x-ms-wmv': 'wmv',          'x-mp4-fragmented': 'mp4', +        'ttml+xml': 'ttml',      }.get(res, res) @@ -1848,9 +1849,9 @@ def dfxp2srt(dfxp_data):          out = str_or_empty(node.text)          for child in node: -            if child.tag == _x('ttml:br'): +            if child.tag in (_x('ttml:br'), 'br'):                  out += '\n' + str_or_empty(child.tail) -            elif child.tag == _x('ttml:span'): +            elif child.tag in (_x('ttml:span'), 'span'):                  out += str_or_empty(parse_node(child))              else:                  out += str_or_empty(xml.etree.ElementTree.tostring(child)) @@ -1859,7 +1860,10 @@ def dfxp2srt(dfxp_data):      dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8'))      out = [] -    paras = dfxp.findall(_x('.//ttml:p')) +    paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p') + +    if not paras: +        raise ValueError('Invalid dfxp/TTML subtitle')      for para, index in zip(paras, itertools.count(1)):          begin_time = parse_dfxp_time_expr(para.attrib['begin']) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 38f00bc9b..b33385153 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@  from __future__ import unicode_literals -__version__ = '2015.05.15' +__version__ = '2015.05.20' | 
