diff options
| -rwxr-xr-x | youtube_dl/YoutubeDL.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/audimedia.py | 19 | ||||
| -rw-r--r-- | youtube_dl/extractor/bleacherreport.py | 10 | ||||
| -rw-r--r-- | youtube_dl/extractor/douyutv.py | 23 | ||||
| -rw-r--r-- | youtube_dl/extractor/elpais.py | 31 | ||||
| -rw-r--r-- | youtube_dl/extractor/foxnews.py | 4 | ||||
| -rw-r--r-- | youtube_dl/extractor/jeuxvideo.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/kusi.py | 99 | ||||
| -rw-r--r-- | youtube_dl/extractor/leeco.py | 12 | ||||
| -rw-r--r-- | youtube_dl/extractor/revision3.py | 97 | ||||
| -rw-r--r-- | youtube_dl/extractor/twitter.py | 18 | ||||
| -rw-r--r-- | youtube_dl/extractor/vgtv.py | 17 | ||||
| -rw-r--r-- | youtube_dl/extractor/yandexmusic.py | 18 | ||||
| -rw-r--r-- | youtube_dl/extractor/youporn.py | 5 | ||||
| -rw-r--r-- | youtube_dl/utils.py | 1 | 
16 files changed, 288 insertions, 71 deletions
| diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index dcc867e45..f91851df9 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -567,7 +567,7 @@ class YoutubeDL(object):                  elif template_dict.get('height'):                      template_dict['resolution'] = '%sp' % template_dict['height']                  elif template_dict.get('width'): -                    template_dict['resolution'] = '?x%d' % template_dict['width'] +                    template_dict['resolution'] = '%dx?' % template_dict['width']              sanitize = lambda k, v: sanitize_filename(                  compat_str(v), diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index db1827666..899bf8114 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -343,6 +343,7 @@ from .konserthusetplay import KonserthusetPlayIE  from .kontrtube import KontrTubeIE  from .krasview import KrasViewIE  from .ku6 import Ku6IE +from .kusi import KUSIIE  from .kuwo import (      KuwoIE,      KuwoAlbumIE, diff --git a/youtube_dl/extractor/audimedia.py b/youtube_dl/extractor/audimedia.py index 3b2effa15..aa6925623 100644 --- a/youtube_dl/extractor/audimedia.py +++ b/youtube_dl/extractor/audimedia.py @@ -10,9 +10,9 @@ from ..utils import (  class AudiMediaIE(InfoExtractor): -    _VALID_URL = r'https?://(?:www\.)?audimedia\.tv/(?:en|de)/vid/(?P<id>[^/?#]+)' +    _VALID_URL = r'https?://(?:www\.)?audi-mediacenter\.com/(?:en|de)/audimediatv/(?P<id>[^/?#]+)'      _TEST = { -        'url': 'https://audimedia.tv/en/vid/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test', +        'url': 'https://www.audi-mediacenter.com/en/audimediatv/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-1467',          'md5': '79a8b71c46d49042609795ab59779b66',          'info_dict': {              'id': '1565', @@ -32,7 +32,10 @@ class AudiMediaIE(InfoExtractor):          display_id = self._match_id(url)          webpage = self._download_webpage(url, display_id) -        raw_payload = self._search_regex(r'<script[^>]+class="amtv-embed"[^>]+id="([^"]+)"', webpage, 'raw payload') +        raw_payload = self._search_regex([ +            r'class="amtv-embed"[^>]+id="([^"]+)"', +            r'class=\\"amtv-embed\\"[^>]+id=\\"([^"]+)\\"', +        ], webpage, 'raw payload')          _, stage_mode, video_id, lang = raw_payload.split('-')          # TODO: handle s and e stage_mode (live streams and ended live streams) @@ -59,13 +62,19 @@ class AudiMediaIE(InfoExtractor):                  video_version_url = video_version.get('download_url') or video_version.get('stream_url')                  if not video_version_url:                      continue -                formats.append({ +                f = {                      'url': video_version_url,                      'width': int_or_none(video_version.get('width')),                      'height': int_or_none(video_version.get('height')),                      'abr': int_or_none(video_version.get('audio_bitrate')),                      'vbr': int_or_none(video_version.get('video_bitrate')), -                }) +                } +                bitrate = self._search_regex(r'(\d+)k', video_version_url, 'bitrate', default=None) +                if bitrate: +                    f.update({ +                        'format_id': 'http-%s' % bitrate, +                    }) +                formats.append(f)              self._sort_formats(formats)              return { diff --git a/youtube_dl/extractor/bleacherreport.py b/youtube_dl/extractor/bleacherreport.py index 38bda3af5..7a8e1f60b 100644 --- a/youtube_dl/extractor/bleacherreport.py +++ b/youtube_dl/extractor/bleacherreport.py @@ -28,10 +28,10 @@ class BleacherReportIE(InfoExtractor):          'add_ie': ['Ooyala'],      }, {          'url': 'http://bleacherreport.com/articles/2586817-aussie-golfers-get-fright-of-their-lives-after-being-chased-by-angry-kangaroo', -        'md5': 'af5f90dc9c7ba1c19d0a3eac806bbf50', +        'md5': '6a5cd403418c7b01719248ca97fb0692',          'info_dict': {              'id': '2586817', -            'ext': 'mp4', +            'ext': 'webm',              'title': 'Aussie Golfers Get Fright of Their Lives After Being Chased by Angry Kangaroo',              'timestamp': 1446839961,              'uploader': 'Sean Fay', @@ -93,10 +93,14 @@ class BleacherReportCMSIE(AMPIE):          'md5': '8c2c12e3af7805152675446c905d159b',          'info_dict': {              'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1', -            'ext': 'flv', +            'ext': 'mp4',              'title': 'Cena vs. Rollins Would Expose the Heavyweight Division',              'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e',          }, +        'params': { +            # m3u8 download +            'skip_download': True, +        },      }]      def _real_extract(self, url): diff --git a/youtube_dl/extractor/douyutv.py b/youtube_dl/extractor/douyutv.py index 373b3b4b4..bdc768c78 100644 --- a/youtube_dl/extractor/douyutv.py +++ b/youtube_dl/extractor/douyutv.py @@ -18,7 +18,7 @@ class DouyuTVIE(InfoExtractor):              'display_id': 'iseven',              'ext': 'flv',              'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', -            'description': 'md5:c93d6692dde6fe33809a46edcbecca44', +            'description': 'md5:f34981259a03e980a3c6404190a3ed61',              'thumbnail': 're:^https?://.*\.jpg$',              'uploader': '7师傅',              'uploader_id': '431925', @@ -26,7 +26,7 @@ class DouyuTVIE(InfoExtractor):          },          'params': {              'skip_download': True, -        } +        },      }, {          'url': 'http://www.douyutv.com/85982',          'info_dict': { @@ -42,7 +42,24 @@ class DouyuTVIE(InfoExtractor):          },          'params': {              'skip_download': True, -        } +        }, +        'skip': 'Romm not found', +    }, { +        'url': 'http://www.douyutv.com/17732', +        'info_dict': { +            'id': '17732', +            'display_id': '17732', +            'ext': 'flv', +            'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', +            'description': 'md5:f34981259a03e980a3c6404190a3ed61', +            'thumbnail': 're:^https?://.*\.jpg$', +            'uploader': '7师傅', +            'uploader_id': '431925', +            'is_live': True, +        }, +        'params': { +            'skip_download': True, +        },      }]      def _real_extract(self, url): diff --git a/youtube_dl/extractor/elpais.py b/youtube_dl/extractor/elpais.py index 00a69e631..8c725a4e6 100644 --- a/youtube_dl/extractor/elpais.py +++ b/youtube_dl/extractor/elpais.py @@ -9,7 +9,7 @@ class ElPaisIE(InfoExtractor):      _VALID_URL = r'https?://(?:[^.]+\.)?elpais\.com/.*/(?P<id>[^/#?]+)\.html(?:$|[?#])'      IE_DESC = 'El País' -    _TEST = { +    _TESTS = [{          'url': 'http://blogs.elpais.com/la-voz-de-inaki/2014/02/tiempo-nuevo-recetas-viejas.html',          'md5': '98406f301f19562170ec071b83433d55',          'info_dict': { @@ -19,30 +19,41 @@ class ElPaisIE(InfoExtractor):              'description': 'De lunes a viernes, a partir de las ocho de la mañana, Iñaki Gabilondo nos cuenta su visión de la actualidad nacional e internacional.',              'upload_date': '20140206',          } -    } +    }, { +        'url': 'http://elcomidista.elpais.com/elcomidista/2016/02/24/articulo/1456340311_668921.html#?id_externo_nwl=newsletter_diaria20160303t', +        'md5': '3bd5b09509f3519d7d9e763179b013de', +        'info_dict': { +            'id': '1456340311_668921', +            'ext': 'mp4', +            'title': 'Cómo hacer el mejor café con cafetera italiana', +            'description': 'Que sí, que las cápsulas son cómodas. Pero si le pides algo más a la vida, quizá deberías aprender a usar bien la cafetera italiana. No tienes más que ver este vídeo y seguir sus siete normas básicas.', +            'upload_date': '20160303', +        } +    }]      def _real_extract(self, url):          video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id)          prefix = self._html_search_regex( -            r'var url_cache = "([^"]+)";', webpage, 'URL prefix') +            r'var\s+url_cache\s*=\s*"([^"]+)";', webpage, 'URL prefix')          video_suffix = self._search_regex( -            r"URLMediaFile = url_cache \+ '([^']+)'", webpage, 'video URL') +            r"(?:URLMediaFile|urlVideo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'", webpage, 'video URL')          video_url = prefix + video_suffix          thumbnail_suffix = self._search_regex( -            r"URLMediaStill = url_cache \+ '([^']+)'", webpage, 'thumbnail URL', -            fatal=False) +            r"(?:URLMediaStill|urlFotogramaFijo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'", +            webpage, 'thumbnail URL', fatal=False)          thumbnail = (              None if thumbnail_suffix is None              else prefix + thumbnail_suffix)          title = self._html_search_regex( -            '<h2 class="entry-header entry-title.*?>(.*?)</h2>', +            (r"tituloVideo\s*=\s*'([^']+)'", webpage, 'title', +             r'<h2 class="entry-header entry-title.*?>(.*?)</h2>'),              webpage, 'title') -        date_str = self._search_regex( +        upload_date = unified_strdate(self._search_regex(              r'<p class="date-header date-int updated"\s+title="([^"]+)">', -            webpage, 'upload date', fatal=False) -        upload_date = (None if date_str is None else unified_strdate(date_str)) +            webpage, 'upload date', default=None) or self._html_search_meta( +            'datePublished', webpage, 'timestamp'))          return {              'id': video_id, diff --git a/youtube_dl/extractor/foxnews.py b/youtube_dl/extractor/foxnews.py index 318ac013d..1dc50318c 100644 --- a/youtube_dl/extractor/foxnews.py +++ b/youtube_dl/extractor/foxnews.py @@ -36,6 +36,10 @@ class FoxNewsIE(AMPIE):                  # 'upload_date': '20141204',                  'thumbnail': 're:^https?://.*\.jpg$',              }, +            'params': { +                # m3u8 download +                'skip_download': True, +            },          },          {              'url': 'http://video.foxnews.com/v/video-embed.html?video_id=3937480&d=video.foxnews.com', diff --git a/youtube_dl/extractor/jeuxvideo.py b/youtube_dl/extractor/jeuxvideo.py index eef7daa29..137db873c 100644 --- a/youtube_dl/extractor/jeuxvideo.py +++ b/youtube_dl/extractor/jeuxvideo.py @@ -30,7 +30,7 @@ class JeuxVideoIE(InfoExtractor):          webpage = self._download_webpage(url, title)          title = self._html_search_meta('name', webpage) or self._og_search_title(webpage)          config_url = self._html_search_regex( -            r'data-src="(/contenu/medias/video.php.*?)"', +            r'data-src(?:set-video)?="(/contenu/medias/video.php.*?)"',              webpage, 'config URL')          config_url = 'http://www.jeuxvideo.com' + config_url diff --git a/youtube_dl/extractor/kusi.py b/youtube_dl/extractor/kusi.py new file mode 100644 index 000000000..931f34c9b --- /dev/null +++ b/youtube_dl/extractor/kusi.py @@ -0,0 +1,99 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import random +import re + +from .common import InfoExtractor +from ..compat import compat_urllib_parse_unquote_plus +from ..utils import ( +    int_or_none, +    float_or_none, +    timeconvert, +    update_url_query, +    xpath_text, +) + + +class KUSIIE(InfoExtractor): +    _VALID_URL = r'http://(?:www\.)?kusi\.com/(?P<path>story/.+|video\?clipId=(?P<clipId>\d+))' +    _TESTS = [{ +        'url': 'http://www.kusi.com/story/31183873/turko-files-case-closed-put-on-hold', +        'md5': 'f926e7684294cf8cb7bdf8858e1b3988', +        'info_dict': { +            'id': '12203019', +            'ext': 'mp4', +            'title': 'Turko Files: Case Closed! & Put On Hold!', +            'duration': 231.0, +            'upload_date': '20160210', +            'timestamp': 1455087571, +            'thumbnail': 're:^https?://.*\.jpg$' +        }, +    }, { +        'url': 'http://kusi.com/video?clipId=12203019', +        'info_dict': { +            'id': '12203019', +            'ext': 'mp4', +            'title': 'Turko Files: Case Closed! & Put On Hold!', +            'duration': 231.0, +            'upload_date': '20160210', +            'timestamp': 1455087571, +            'thumbnail': 're:^https?://.*\.jpg$' +        }, +        'params': { +            'skip_download': True,  # Same as previous one +        }, +    }] + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        clip_id = mobj.group('clipId') +        video_id = clip_id or mobj.group('path') + +        webpage = self._download_webpage(url, video_id) + +        if clip_id is None: +            video_id = clip_id = self._html_search_regex( +                r'"clipId"\s*,\s*"(\d+)"', webpage, 'clip id') + +        affiliate_id = self._search_regex( +            r'affiliateId\s*:\s*\'([^\']+)\'', webpage, 'affiliate id') + +        # See __Packages/worldnow/model/GalleryModel.as of WNGallery.swf +        xml_url = update_url_query('http://www.kusi.com/build.asp', { +            'buildtype': 'buildfeaturexmlrequest', +            'featureType': 'Clip', +            'featureid': clip_id, +            'affiliateno': affiliate_id, +            'clientgroupid': '1', +            'rnd': int(round(random.random() * 1000000)), +        }) + +        doc = self._download_xml(xml_url, video_id) + +        video_title = xpath_text(doc, 'HEADLINE', fatal=True) +        duration = float_or_none(xpath_text(doc, 'DURATION'), scale=1000) +        description = xpath_text(doc, 'ABSTRACT') +        thumbnail = xpath_text(doc, './THUMBNAILIMAGE/FILENAME') +        createtion_time = timeconvert(xpath_text(doc, 'rfc822creationdate')) + +        quality_options = doc.find('{http://search.yahoo.com/mrss/}group').findall('{http://search.yahoo.com/mrss/}content') +        formats = [] +        for quality in quality_options: +            formats.append({ +                'url': compat_urllib_parse_unquote_plus(quality.attrib['url']), +                'height': int_or_none(quality.attrib.get('height')), +                'width': int_or_none(quality.attrib.get('width')), +                'vbr': float_or_none(quality.attrib.get('bitratebits'), scale=1000), +            }) +        self._sort_formats(formats) + +        return { +            'id': video_id, +            'title': video_title, +            'description': description, +            'duration': duration, +            'formats': formats, +            'thumbnail': thumbnail, +            'timestamp': createtion_time, +        } diff --git a/youtube_dl/extractor/leeco.py b/youtube_dl/extractor/leeco.py index d0cd3f591..df47e88ba 100644 --- a/youtube_dl/extractor/leeco.py +++ b/youtube_dl/extractor/leeco.py @@ -217,14 +217,8 @@ class LePlaylistIE(InfoExtractor):          'playlist_mincount': 96      }, {          'url': 'http://tv.le.com/pzt/lswjzzjc/index.shtml', -        'info_dict': { -            'id': 'lswjzzjc', -            # The title should be "劲舞青春", but I can't find a simple way to -            # determine the playlist title -            'title': '乐视午间自制剧场', -            'description': 'md5:b1eef244f45589a7b5b1af9ff25a4489' -        }, -        'playlist_mincount': 7 +        # This series is moved to http://www.le.com/tv/10005297.html +        'only_matching': True,      }, {          'url': 'http://www.le.com/comic/92063.html',          'only_matching': True, @@ -338,7 +332,7 @@ class LetvCloudIE(InfoExtractor):              formats.append({                  'url': url,                  'ext': determine_ext(decoded_url), -                'format_id': int_or_none(play_url.get('vtype')), +                'format_id': str_or_none(play_url.get('vtype')),                  'format_note': str_or_none(play_url.get('definition')),                  'width': int_or_none(play_url.get('vwidth')),                  'height': int_or_none(play_url.get('vheight')), diff --git a/youtube_dl/extractor/revision3.py b/youtube_dl/extractor/revision3.py index b1b8800b9..99979ebe1 100644 --- a/youtube_dl/extractor/revision3.py +++ b/youtube_dl/extractor/revision3.py @@ -19,7 +19,7 @@ class Revision3IE(InfoExtractor):          'url': 'http://www.revision3.com/technobuffalo/5-google-predictions-for-2016',          'md5': 'd94a72d85d0a829766de4deb8daaf7df',          'info_dict': { -            'id': '73034', +            'id': '71089',              'display_id': 'technobuffalo/5-google-predictions-for-2016',              'ext': 'webm',              'title': '5 Google Predictions for 2016', @@ -31,6 +31,7 @@ class Revision3IE(InfoExtractor):              'uploader_id': 'technobuffalo',          }      }, { +        # Show          'url': 'http://testtube.com/brainstuff',          'info_dict': {              'id': '251', @@ -41,7 +42,7 @@ class Revision3IE(InfoExtractor):      }, {          'url': 'https://testtube.com/dnews/5-weird-ways-plants-can-eat-animals?utm_source=FB&utm_medium=DNews&utm_campaign=DNewsSocial',          'info_dict': { -            'id': '60163', +            'id': '58227',              'display_id': 'dnews/5-weird-ways-plants-can-eat-animals',              'duration': 275,              'ext': 'webm', @@ -52,18 +53,72 @@ class Revision3IE(InfoExtractor):              'uploader': 'DNews',              'uploader_id': 'dnews',          }, +    }, { +        'url': 'http://testtube.com/tt-editors-picks/the-israel-palestine-conflict-explained-in-ten-min', +        'info_dict': { +            'id': '71618', +            'ext': 'mp4', +            'display_id': 'tt-editors-picks/the-israel-palestine-conflict-explained-in-ten-min', +            'title': 'The Israel-Palestine Conflict Explained in Ten Minutes', +            'description': 'If you\'d like to learn about the struggle between Israelis and Palestinians, this video is a great place to start', +            'uploader': 'Editors\' Picks', +            'uploader_id': 'tt-editors-picks', +            'timestamp': 1453309200, +            'upload_date': '20160120', +        }, +        'add_ie': ['Youtube'], +    }, { +        # Tag +        'url': 'http://testtube.com/tech-news', +        'info_dict': { +            'id': '21018', +            'title': 'tech news', +        }, +        'playlist_mincount': 9,      }]      _PAGE_DATA_TEMPLATE = 'http://www.%s/apiProxy/ddn/%s?domain=%s'      _API_KEY = 'ba9c741bce1b9d8e3defcc22193f3651b8867e62'      def _real_extract(self, url):          domain, display_id = re.match(self._VALID_URL, url).groups() +        site = domain.split('.')[0]          page_info = self._download_json(              self._PAGE_DATA_TEMPLATE % (domain, display_id, domain), display_id) -        if page_info['data']['type'] == 'episode': -            episode_data = page_info['data'] -            video_id = compat_str(episode_data['video']['data']['id']) +        page_data = page_info['data'] +        page_type = page_data['type'] +        if page_type in ('episode', 'embed'): +            show_data = page_data['show']['data'] +            page_id = compat_str(page_data['id']) +            video_id = compat_str(page_data['video']['data']['id']) + +            preference = qualities(['mini', 'small', 'medium', 'large']) +            thumbnails = [{ +                'url': image_url, +                'id': image_id, +                'preference': preference(image_id) +            } for image_id, image_url in page_data.get('images', {}).items()] + +            info = { +                'id': page_id, +                'display_id': display_id, +                'title': unescapeHTML(page_data['name']), +                'description': unescapeHTML(page_data.get('summary')), +                'timestamp': parse_iso8601(page_data.get('publishTime'), ' '), +                'author': page_data.get('author'), +                'uploader': show_data.get('name'), +                'uploader_id': show_data.get('slug'), +                'thumbnails': thumbnails, +                'extractor_key': site, +            } + +            if page_type == 'embed': +                info.update({ +                    '_type': 'url_transparent', +                    'url': page_data['video']['data']['embed'], +                }) +                return info +              video_data = self._download_json(                  'http://revision3.com/api/getPlaylist.json?api_key=%s&codecs=h264,vp8,theora&video_id=%s' % (self._API_KEY, video_id),                  video_id)['items'][0] @@ -84,36 +139,30 @@ class Revision3IE(InfoExtractor):                          })              self._sort_formats(formats) -            preference = qualities(['mini', 'small', 'medium', 'large']) -            thumbnails = [{ -                'url': image_url, -                'id': image_id, -                'preference': preference(image_id) -            } for image_id, image_url in video_data.get('images', {}).items()] - -            return { -                'id': video_id, -                'display_id': display_id, +            info.update({                  'title': unescapeHTML(video_data['title']),                  'description': unescapeHTML(video_data.get('summary')), -                'timestamp': parse_iso8601(episode_data.get('publishTime'), ' '), -                'author': episode_data.get('author'),                  'uploader': video_data.get('show', {}).get('name'),                  'uploader_id': video_data.get('show', {}).get('slug'),                  'duration': int_or_none(video_data.get('duration')), -                'thumbnails': thumbnails,                  'formats': formats, -            } +            }) +            return info          else: -            show_data = page_info['show']['data'] +            list_data = page_info[page_type]['data']              episodes_data = page_info['episodes']['data']              num_episodes = page_info['meta']['totalEpisodes']              processed_episodes = 0              entries = []              page_num = 1              while True: -                entries.extend([self.url_result( -                    'http://%s/%s/%s' % (domain, display_id, episode['slug'])) for episode in episodes_data]) +                entries.extend([{ +                    '_type': 'url', +                    'url': 'http://%s%s' % (domain, episode['path']), +                    'id': compat_str(episode['id']), +                    'ie_key': 'Revision3', +                    'extractor_key': site, +                } for episode in episodes_data])                  processed_episodes += len(episodes_data)                  if processed_episodes == num_episodes:                      break @@ -123,5 +172,5 @@ class Revision3IE(InfoExtractor):                      display_id)['episodes']['data']              return self.playlist_result( -                entries, compat_str(show_data['id']), -                show_data.get('name'), show_data.get('summary')) +                entries, compat_str(list_data['id']), +                list_data.get('name'), list_data.get('summary')) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index 67762a003..e70b2ab3c 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -102,6 +102,14 @@ class TwitterCardIE(TwitterBaseIE):              r'data-(?:player-)?config="([^"]+)"', webpage, 'data player config'),              video_id) +        def _search_dimensions_in_video_url(a_format, video_url): +            m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url) +            if m: +                a_format.update({ +                    'width': int(m.group('width')), +                    'height': int(m.group('height')), +                }) +          playlist = config.get('playlist')          if playlist:              video_url = playlist[0]['source'] @@ -110,12 +118,8 @@ class TwitterCardIE(TwitterBaseIE):                  'url': video_url,              } -            m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url) -            if m: -                f.update({ -                    'width': int(m.group('width')), -                    'height': int(m.group('height')), -                }) +            _search_dimensions_in_video_url(f, video_url) +              formats.append(f)          vmap_url = config.get('vmapUrl') or config.get('vmap_url') @@ -148,6 +152,8 @@ class TwitterCardIE(TwitterBaseIE):                      if not a_format['vbr']:                          del a_format['vbr'] +                    _search_dimensions_in_video_url(a_format, media_url) +                      formats.append(a_format)              duration = float_or_none(media_info.get('duration', {}).get('nanos'), scale=1e9) diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py index 14e945d49..e148b1ef5 100644 --- a/youtube_dl/extractor/vgtv.py +++ b/youtube_dl/extractor/vgtv.py @@ -20,6 +20,7 @@ class VGTVIE(XstreamIE):          'aftenbladet.no/tv': 'satv',          'fvn.no/fvntv': 'fvntv',          'aftenposten.no/webtv': 'aptv', +        'ap.vgtv.no/webtv': 'aptv',      }      _APP_NAME_TO_VENDOR = { @@ -35,7 +36,7 @@ class VGTVIE(XstreamIE):                      (?P<host>                          %s                      ) -                    / +                    /?                      (?:                          \#!/(?:video|live)/|                          embed?.*id= @@ -107,19 +108,27 @@ class VGTVIE(XstreamIE):              'md5': 'fd828cd29774a729bf4d4425fe192972',              'info_dict': {                  'id': '21039', -                'ext': 'mov', +                'ext': 'mp4',                  'title': 'TRAILER: «SWEATSHOP» - I can´t take any more',                  'description': 'md5:21891f2b0dd7ec2f78d84a50e54f8238',                  'duration': 66,                  'timestamp': 1417002452,                  'upload_date': '20141126',                  'view_count': int, -            } +            }, +            'params': { +                # m3u8 download +                'skip_download': True, +            },          },          {              'url': 'http://www.bt.no/tv/#!/video/100250/norling-dette-er-forskjellen-paa-1-divisjon-og-eliteserien',              'only_matching': True,          }, +        { +            'url': 'http://ap.vgtv.no/webtv#!/video/111084/de-nye-bysyklene-lettere-bedre-gir-stoerre-hjul-og-feste-til-mobil', +            'only_matching': True, +        },      ]      def _real_extract(self, url): @@ -144,8 +153,6 @@ class VGTVIE(XstreamIE):          if len(video_id) == 5:              if appname == 'bttv':                  info = self._extract_video_info('btno', video_id) -            elif appname == 'aptv': -                info = self._extract_video_info('ap', video_id)          streams = data['streamUrls']          stream_type = data.get('streamType') diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py index d3cc1a29f..e699e663f 100644 --- a/youtube_dl/extractor/yandexmusic.py +++ b/youtube_dl/extractor/yandexmusic.py @@ -10,13 +10,27 @@ from ..compat import (      compat_urllib_parse,  )  from ..utils import ( +    ExtractorError,      int_or_none,      float_or_none,      sanitized_Request,  ) -class YandexMusicTrackIE(InfoExtractor): +class YandexMusicBaseIE(InfoExtractor): +    @staticmethod +    def _handle_error(response): +        error = response.get('error') +        if error: +            raise ExtractorError(error, expected=True) + +    def _download_json(self, *args, **kwargs): +        response = super(YandexMusicBaseIE, self)._download_json(*args, **kwargs) +        self._handle_error(response) +        return response + + +class YandexMusicTrackIE(YandexMusicBaseIE):      IE_NAME = 'yandexmusic:track'      IE_DESC = 'Яндекс.Музыка - Трек'      _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<album_id>\d+)/track/(?P<id>\d+)' @@ -73,7 +87,7 @@ class YandexMusicTrackIE(InfoExtractor):          return self._get_track_info(track) -class YandexMusicPlaylistBaseIE(InfoExtractor): +class YandexMusicPlaylistBaseIE(YandexMusicBaseIE):      def _build_playlist(self, tracks):          return [              self.url_result( diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index b29baafc4..1124fe6c2 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -75,7 +75,7 @@ class YouPornIE(InfoExtractor):          links = []          sources = self._search_regex( -            r'sources\s*:\s*({.+?})', webpage, 'sources', default=None) +            r'(?s)sources\s*:\s*({.+?})', webpage, 'sources', default=None)          if sources:              for _, link in re.findall(r'[^:]+\s*:\s*(["\'])(http.+?)\1', sources):                  links.append(link) @@ -101,8 +101,9 @@ class YouPornIE(InfoExtractor):              }              # Video URL's path looks like this:              #  /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4 +            #  /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4              # We will benefit from it by extracting some metadata -            mobj = re.search(r'/(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+/', video_url) +            mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+/', video_url)              if mobj:                  height = int(mobj.group('height'))                  bitrate = int(mobj.group('bitrate')) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index d431aa6b7..22a39a0ab 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1743,6 +1743,7 @@ def update_url_query(url, query):      parsed_url = compat_urlparse.urlparse(url)      qs = compat_parse_qs(parsed_url.query)      qs.update(query) +    qs = encode_dict(qs)      return compat_urlparse.urlunparse(parsed_url._replace(          query=compat_urllib_parse.urlencode(qs, True))) | 
