diff options
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 4 | ||||
| -rw-r--r-- | youtube_dl/extractor/generic.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/hostingbulk.py | 84 | ||||
| -rw-r--r-- | youtube_dl/extractor/izlesene.py | 96 | ||||
| -rw-r--r-- | youtube_dl/extractor/khanacademy.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/moevideo.py | 112 | ||||
| -rw-r--r-- | youtube_dl/extractor/noco.py | 27 | ||||
| -rw-r--r-- | youtube_dl/extractor/nosvideo.py | 17 | ||||
| -rw-r--r-- | youtube_dl/extractor/rutv.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/sharesix.py | 91 | ||||
| -rw-r--r-- | youtube_dl/extractor/tvplay.py | 147 | ||||
| -rw-r--r-- | youtube_dl/extractor/unistra.py | 78 | ||||
| -rw-r--r-- | youtube_dl/extractor/vgtv.py | 119 | ||||
| -rw-r--r-- | youtube_dl/version.py | 2 | 
14 files changed, 715 insertions, 68 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 2ae656a4c..94e370281 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -140,6 +140,7 @@ from .hark import HarkIE  from .helsinki import HelsinkiIE  from .hentaistigma import HentaiStigmaIE  from .hornbunny import HornBunnyIE +from .hostingbulk import HostingBulkIE  from .hotnewhiphop import HotNewHipHopIE  from .howcast import HowcastIE  from .howstuffworks import HowStuffWorksIE @@ -200,6 +201,7 @@ from .mitele import MiTeleIE  from .mixcloud import MixcloudIE  from .mlb import MLBIE  from .mpora import MporaIE +from .moevideo import MoeVideoIE  from .mofosex import MofosexIE  from .mojvideo import MojvideoIE  from .mooshare import MooshareIE @@ -296,6 +298,7 @@ from .scivee import SciVeeIE  from .screencast import ScreencastIE  from .servingsys import ServingSysIE  from .shared import SharedIE +from .sharesix import ShareSixIE  from .sina import SinaIE  from .slideshare import SlideshareIE  from .slutload import SlutloadIE @@ -376,6 +379,7 @@ from .veehd import VeeHDIE  from .veoh import VeohIE  from .vesti import VestiIE  from .vevo import VevoIE +from .vgtv import VGTVIE  from .vh1 import VH1IE  from .viddler import ViddlerIE  from .videobam import VideoBamIE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 1b7697870..2bfa20606 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -628,7 +628,7 @@ class GenericIE(InfoExtractor):                  embedSWF\(?:\s*              )              (["\']) -                (?P<url>(?:https?:)?//(?:www\.)?youtube\.com/ +                (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/                  (?:embed|v)/.+?)              \1''', webpage)          if matches: diff --git a/youtube_dl/extractor/hostingbulk.py b/youtube_dl/extractor/hostingbulk.py new file mode 100644 index 000000000..8e812b669 --- /dev/null +++ b/youtube_dl/extractor/hostingbulk.py @@ -0,0 +1,84 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( +    ExtractorError, +    compat_urllib_request, +    int_or_none, +    urlencode_postdata, +) + + +class HostingBulkIE(InfoExtractor): +    _VALID_URL = r'''(?x) +        https?://(?:www\.)?hostingbulk\.com/ +        (?:embed-)?(?P<id>[A-Za-z0-9]{12})(?:-\d+x\d+)?\.html''' +    _FILE_DELETED_REGEX = r'<b>File Not Found</b>' +    _TEST = { +        'url': 'http://hostingbulk.com/n0ulw1hv20fm.html', +        'md5': '6c8653c8ecf7ebfa83b76e24b7b2fe3f', +        'info_dict': { +            'id': 'n0ulw1hv20fm', +            'ext': 'mp4', +            'title': 'md5:5afeba33f48ec87219c269e054afd622', +            'filesize': 6816081, +            'thumbnail': 're:^http://.*\.jpg$', +        } +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') + +        url = 'http://hostingbulk.com/{0:}.html'.format(video_id) + +        # Custom request with cookie to set language to English, so our file +        # deleted regex would work. +        request = compat_urllib_request.Request( +            url, headers={'Cookie': 'lang=english'}) +        webpage = self._download_webpage(request, video_id) + +        if re.search(self._FILE_DELETED_REGEX, webpage) is not None: +            raise ExtractorError('Video %s does not exist' % video_id, +                                 expected=True) + +        title = self._html_search_regex(r'<h3>(.*?)</h3>', webpage, 'title') +        filesize = int_or_none( +            self._search_regex( +                r'<small>\((\d+)\sbytes?\)</small>', +                webpage, +                'filesize', +                fatal=False +            ) +        ) +        thumbnail = self._search_regex( +            r'<img src="([^"]+)".+?class="pic"', +            webpage, 'thumbnail', fatal=False) + +        fields = dict(re.findall(r'''(?x)<input\s+ +            type="hidden"\s+ +            name="([^"]+)"\s+ +            value="([^"]*)" +            ''', webpage)) + +        request = compat_urllib_request.Request(url, urlencode_postdata(fields)) +        request.add_header('Content-type', 'application/x-www-form-urlencoded') +        response = self._request_webpage(request, video_id, +                                         'Submiting download request') +        video_url = response.geturl() + +        formats = [{ +            'format_id': 'sd', +            'filesize': filesize, +            'url': video_url, +        }] + +        return { +            'id': video_id, +            'title': title, +            'thumbnail': thumbnail, +            'formats': formats, +        } diff --git a/youtube_dl/extractor/izlesene.py b/youtube_dl/extractor/izlesene.py index 79e8430b5..a83dd249f 100644 --- a/youtube_dl/extractor/izlesene.py +++ b/youtube_dl/extractor/izlesene.py @@ -9,29 +9,50 @@ from ..utils import (      parse_iso8601,      determine_ext,      int_or_none, +    float_or_none,      str_to_int,  )  class IzleseneIE(InfoExtractor): -    _VALID_URL = r'https?://(?:(?:www|m)\.)?izlesene\.com/(?:video|embedplayer)/(?:[^/]+/)?(?P<id>[0-9]+)' -    _STREAM_URL = 'http://panel.izlesene.com/api/streamurl/{id:}/{format:}' -    _TEST = { -        'url': 'http://www.izlesene.com/video/sevincten-cildirtan-dogum-gunu-hediyesi/7599694', -        'md5': '4384f9f0ea65086734b881085ee05ac2', -        'info_dict': { -            'id': '7599694', -            'ext': 'mp4', -            'title': 'Sevinçten Çıldırtan Doğum Günü Hediyesi', -            'description': 'Annesi oğluna doğum günü hediyesi olarak minecraft cd si alıyor, ve çocuk hunharca seviniyor', -            'thumbnail': 're:^http://.*\.jpg', -            'uploader_id': 'pelikzzle', -            'timestamp': 1404298698, -            'upload_date': '20140702', -            'duration': 95.395, -            'age_limit': 0, -        } -    } +    _VALID_URL = r'''(?x) +        https?://(?:(?:www|m)\.)?izlesene\.com/ +        (?:video|embedplayer)/(?:[^/]+/)?(?P<id>[0-9]+) +        ''' +    _TESTS = [ +        { +            'url': 'http://www.izlesene.com/video/sevincten-cildirtan-dogum-gunu-hediyesi/7599694', +            'md5': '4384f9f0ea65086734b881085ee05ac2', +            'info_dict': { +                'id': '7599694', +                'ext': 'mp4', +                'title': 'Sevinçten Çıldırtan Doğum Günü Hediyesi', +                'description': 'md5:253753e2655dde93f59f74b572454f6d', +                'thumbnail': 're:^http://.*\.jpg', +                'uploader_id': 'pelikzzle', +                'timestamp': 1404298698, +                'upload_date': '20140702', +                'duration': 95.395, +                'age_limit': 0, +            } +        }, +        { +            'url': 'http://www.izlesene.com/video/tarkan-dortmund-2006-konseri/17997', +            'md5': '97f09b6872bffa284cb7fa4f6910cb72', +            'info_dict': { +                'id': '17997', +                'ext': 'mp4', +                'title': 'Tarkan Dortmund 2006 Konseri', +                'description': 'Tarkan Dortmund 2006 Konseri', +                'thumbnail': 're:^http://.*\.jpg', +                'uploader_id': 'parlayankiz', +                'timestamp': 1163318593, +                'upload_date': '20061112', +                'duration': 253.666, +                'age_limit': 0, +            } +        }, +    ]      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url) @@ -45,18 +66,19 @@ class IzleseneIE(InfoExtractor):          thumbnail = self._og_search_thumbnail(webpage)          uploader = self._html_search_regex( -            r"adduserUsername\s*=\s*'([^']+)';", webpage, 'uploader', fatal=False, default='') +            r"adduserUsername\s*=\s*'([^']+)';", +            webpage, 'uploader', fatal=False, default='')          timestamp = parse_iso8601(self._html_search_meta(              'uploadDate', webpage, 'upload date', fatal=False)) -        duration = int_or_none(self._html_search_regex( -            r'"videoduration"\s*:\s*"([^"]+)"', webpage, 'duration', fatal=False)) -        if duration: -            duration /= 1000.0 +        duration = float_or_none(self._html_search_regex( +            r'"videoduration"\s*:\s*"([^"]+)"', +            webpage, 'duration', fatal=False), scale=1000)          view_count = str_to_int(get_element_by_id('videoViewCount', webpage))          comment_count = self._html_search_regex( -            r'comment_count\s*=\s*\'([^\']+)\';', webpage, 'uploader', fatal=False) +            r'comment_count\s*=\s*\'([^\']+)\';', +            webpage, 'comment_count', fatal=False)          family_friendly = self._html_search_meta(              'isFamilyFriendly', webpage, 'age limit', fatal=False) @@ -66,20 +88,26 @@ class IzleseneIE(InfoExtractor):          ext = determine_ext(content_url, 'mp4')          # Might be empty for some videos. -        qualities = self._html_search_regex( -            r'"quality"\s*:\s*"([^"]+)"', webpage, 'qualities', fatal=False, default='') +        streams = self._html_search_regex( +            r'"qualitylevel"\s*:\s*"([^"]+)"', +            webpage, 'streams', fatal=False, default='')          formats = [] -        for quality in qualities.split('|'): -            json = self._download_json( -                self._STREAM_URL.format(id=video_id, format=quality), video_id, -                note='Getting video URL for "%s" quality' % quality, -                errnote='Failed to get video URL for "%s" quality' % quality -            ) +        if streams: +            for stream in streams.split('|'): +                quality, url = re.search(r'\[(\w+)\](.+)', stream).groups() +                formats.append({ +                    'format_id': '%sp' % quality if quality else 'sd', +                    'url': url, +                    'ext': ext, +                }) +        else: +            stream_url = self._search_regex( +                r'"streamurl"\s?:\s?"([^"]+)"', webpage, 'stream URL')              formats.append({ -                'url': json.get('streamurl'), +                'format_id': 'sd', +                'url': stream_url,                  'ext': ext, -                'format_id': '%sp' % quality if quality else 'sd',              })          return { diff --git a/youtube_dl/extractor/khanacademy.py b/youtube_dl/extractor/khanacademy.py index 04bac7517..408d00944 100644 --- a/youtube_dl/extractor/khanacademy.py +++ b/youtube_dl/extractor/khanacademy.py @@ -9,7 +9,7 @@ from ..utils import (  class KhanAcademyIE(InfoExtractor): -    _VALID_URL = r'^https?://(?:www\.)?khanacademy\.org/(?P<key>[^/]+)/(?:[^/]+/){,2}(?P<id>[^?#/]+)(?:$|[?#])' +    _VALID_URL = r'^https?://(?:(?:www|api)\.)?khanacademy\.org/(?P<key>[^/]+)/(?:[^/]+/){,2}(?P<id>[^?#/]+)(?:$|[?#])'      IE_NAME = 'KhanAcademy'      _TESTS = [{ diff --git a/youtube_dl/extractor/moevideo.py b/youtube_dl/extractor/moevideo.py new file mode 100644 index 000000000..2ff79b9b8 --- /dev/null +++ b/youtube_dl/extractor/moevideo.py @@ -0,0 +1,112 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json +import re + +from .common import InfoExtractor +from ..utils import ( +    ExtractorError, +    compat_urllib_parse, +    compat_urllib_request, +    int_or_none, +) + + +class MoeVideoIE(InfoExtractor): +    IE_DESC = 'LetitBit video services: moevideo.net, playreplay.net and videochart.net' +    _VALID_URL = r'''(?x) +        https?://(?P<host>(?:www\.)? +        (?:(?:moevideo|playreplay|videochart)\.net))/ +        (?:video|framevideo)/(?P<id>[0-9]+\.[0-9A-Za-z]+)''' +    _API_URL = 'http://api.letitbit.net/' +    _API_KEY = 'tVL0gjqo5' +    _TESTS = [ +        { +            'url': 'http://moevideo.net/video/00297.0036103fe3d513ef27915216fd29', +            'md5': '129f5ae1f6585d0e9bb4f38e774ffb3a', +            'info_dict': { +                'id': '00297.0036103fe3d513ef27915216fd29', +                'ext': 'flv', +                'title': 'Sink cut out machine', +                'description': 'md5:f29ff97b663aefa760bf7ca63c8ca8a8', +                'thumbnail': 're:^https?://.*\.jpg$', +                'width': 540, +                'height': 360, +                'duration': 179, +                'filesize': 17822500, +            } +        }, +        { +            'url': 'http://playreplay.net/video/77107.7f325710a627383d40540d8e991a', +            'md5': '74f0a014d5b661f0f0e2361300d1620e', +            'info_dict': { +                'id': '77107.7f325710a627383d40540d8e991a', +                'ext': 'flv', +                'title': 'Operacion Condor.', +                'description': 'md5:7e68cb2fcda66833d5081c542491a9a3', +                'thumbnail': 're:^https?://.*\.jpg$', +                'width': 480, +                'height': 296, +                'duration': 6027, +                'filesize': 588257923, +            } +        }, +    ] + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') + +        webpage = self._download_webpage( +            'http://%s/video/%s' % (mobj.group('host'), video_id), +            video_id, 'Downloading webpage') + +        title = self._og_search_title(webpage) +        thumbnail = self._og_search_thumbnail(webpage) +        description = self._og_search_description(webpage) + +        r = [ +            self._API_KEY, +            [ +                'preview/flv_link', +                { +                    'uid': video_id, +                }, +            ], +        ] +        r_json = json.dumps(r) +        post = compat_urllib_parse.urlencode({'r': r_json}) +        req = compat_urllib_request.Request(self._API_URL, post) +        req.add_header('Content-type', 'application/x-www-form-urlencoded') + +        response = self._download_json(req, video_id) +        if response['status'] != 'OK': +            raise ExtractorError( +                '%s returned error: %s' % (self.IE_NAME, response['data']), +                expected=True +            ) +        item = response['data'][0] +        video_url = item['link'] +        duration = int_or_none(item['length']) +        width = int_or_none(item['width']) +        height = int_or_none(item['height']) +        filesize = int_or_none(item['convert_size']) + +        formats = [{ +            'format_id': 'sd', +            'http_headers': {'Range': 'bytes=0-'},  # Required to download +            'url': video_url, +            'width': width, +            'height': height, +            'filesize': filesize, +        }] + +        return { +            'id': video_id, +            'title': title, +            'thumbnail': thumbnail, +            'description': description, +            'duration': duration, +            'formats': formats, +        } diff --git a/youtube_dl/extractor/noco.py b/youtube_dl/extractor/noco.py index da203538d..959fdf590 100644 --- a/youtube_dl/extractor/noco.py +++ b/youtube_dl/extractor/noco.py @@ -5,7 +5,10 @@ import re  from .common import InfoExtractor  from ..utils import ( +    compat_urllib_request, +    compat_urllib_parse,      ExtractorError, +    clean_html,      unified_strdate,      compat_str,  ) @@ -13,6 +16,8 @@ from ..utils import (  class NocoIE(InfoExtractor):      _VALID_URL = r'http://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)' +    _LOGIN_URL = 'http://noco.tv/do.php' +    _NETRC_MACHINE = 'noco'      _TEST = {          'url': 'http://noco.tv/emission/11538/nolife/ami-ami-idol-hello-france/', @@ -30,6 +35,28 @@ class NocoIE(InfoExtractor):          'skip': 'Requires noco account',      } +    def _real_initialize(self): +        self._login() + +    def _login(self): +        (username, password) = self._get_login_info() +        if username is None: +            return + +        login_form = { +            'a': 'login', +            'cookie': '1', +            'username': username, +            'password': password, +        } +        request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form)) +        request.add_header('Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8') + +        login = self._download_json(request, None, 'Logging in as %s' % username) + +        if 'erreur' in login: +            raise  ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True) +      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url)          video_id = mobj.group('id') diff --git a/youtube_dl/extractor/nosvideo.py b/youtube_dl/extractor/nosvideo.py index 095965add..8c2c428fc 100644 --- a/youtube_dl/extractor/nosvideo.py +++ b/youtube_dl/extractor/nosvideo.py @@ -5,8 +5,9 @@ import re  from .common import InfoExtractor  from ..utils import ( -    compat_urllib_parse, +    ExtractorError,      compat_urllib_request, +    urlencode_postdata,      xpath_with_ns,  ) @@ -18,11 +19,12 @@ class NosVideoIE(InfoExtractor):      _VALID_URL = r'https?://(?:www\.)?nosvideo\.com/' + \                   '(?:embed/|\?v=)(?P<id>[A-Za-z0-9]{12})/?'      _PLAYLIST_URL = 'http://nosvideo.com/xml/{xml_id:s}.xml' +    _FILE_DELETED_REGEX = r'<b>File Not Found</b>'      _TEST = { -        'url': 'http://nosvideo.com/?v=drlp6s40kg54', -        'md5': '4b4ac54c6ad5d70ab88f2c2c6ccec71c', +        'url': 'http://nosvideo.com/?v=mu8fle7g7rpq', +        'md5': '6124ed47130d8be3eacae635b071e6b6',          'info_dict': { -            'id': 'drlp6s40kg54', +            'id': 'mu8fle7g7rpq',              'ext': 'mp4',              'title': 'big_buck_bunny_480p_surround-fix.avi.mp4',              'thumbnail': 're:^https?://.*\.jpg$', @@ -38,11 +40,14 @@ class NosVideoIE(InfoExtractor):              'op': 'download1',              'method_free': 'Continue to Video',          } -        post = compat_urllib_parse.urlencode(fields) -        req = compat_urllib_request.Request(url, post) +        req = compat_urllib_request.Request(url, urlencode_postdata(fields))          req.add_header('Content-type', 'application/x-www-form-urlencoded')          webpage = self._download_webpage(req, video_id,                                           'Downloading download page') +        if re.search(self._FILE_DELETED_REGEX, webpage) is not None: +            raise ExtractorError('Video %s does not exist' % video_id, +                                 expected=True) +          xml_id = self._search_regex(r'php\|([^\|]+)\|', webpage, 'XML ID')          playlist_url = self._PLAYLIST_URL.format(xml_id=xml_id)          playlist = self._download_xml(playlist_url, video_id) diff --git a/youtube_dl/extractor/rutv.py b/youtube_dl/extractor/rutv.py index 6c5f5a680..f737b4e5f 100644 --- a/youtube_dl/extractor/rutv.py +++ b/youtube_dl/extractor/rutv.py @@ -100,7 +100,7 @@ class RUTVIE(InfoExtractor):              return mobj.group('url')          mobj = re.search( -            r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>http://player\.(?:rutv\.ru|vgtrk\.com)/flash2v/container\.swf\?id=.+?\2)', +            r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>https?://player\.(?:rutv\.ru|vgtrk\.com)/flash2v/container\.swf\?id=.+?\2)',              webpage)          if mobj:              return mobj.group('url') diff --git a/youtube_dl/extractor/sharesix.py b/youtube_dl/extractor/sharesix.py new file mode 100644 index 000000000..7531e8325 --- /dev/null +++ b/youtube_dl/extractor/sharesix.py @@ -0,0 +1,91 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( +    compat_urllib_parse, +    compat_urllib_request, +    parse_duration, +) + + +class ShareSixIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?sharesix\.com/(?:f/)?(?P<id>[0-9a-zA-Z]+)' +    _TESTS = [ +        { +            'url': 'http://sharesix.com/f/OXjQ7Y6', +            'md5': '9e8e95d8823942815a7d7c773110cc93', +            'info_dict': { +                'id': 'OXjQ7Y6', +                'ext': 'mp4', +                'title': 'big_buck_bunny_480p_surround-fix.avi', +                'duration': 596, +                'width': 854, +                'height': 480, +            }, +        }, +        { +            'url': 'http://sharesix.com/lfrwoxp35zdd', +            'md5': 'dd19f1435b7cec2d7912c64beeee8185', +            'info_dict': { +                'id': 'lfrwoxp35zdd', +                'ext': 'flv', +                'title': 'WhiteBoard___a_Mac_vs_PC_Parody_Cartoon.mp4.flv', +                'duration': 65, +                'width': 1280, +                'height': 720, +            }, +        } +    ] + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') + +        fields = { +            'method_free': 'Free' +        } +        post = compat_urllib_parse.urlencode(fields) +        req = compat_urllib_request.Request(url, post) +        req.add_header('Content-type', 'application/x-www-form-urlencoded') + +        webpage = self._download_webpage(req, video_id, +                                         'Downloading video page') + +        video_url = self._search_regex( +            r"var\slnk1\s=\s'([^']+)'", webpage, 'video URL') +        title = self._html_search_regex( +            r'(?s)<dt>Filename:</dt>.+?<dd>(.+?)</dd>', webpage, 'title') +        duration = parse_duration( +            self._search_regex( +                r'(?s)<dt>Length:</dt>.+?<dd>(.+?)</dd>', +                webpage, +                'duration', +                fatal=False +            ) +        ) + +        m = re.search( +            r'''(?xs)<dt>Width\sx\sHeight</dt>.+? +                     <dd>(?P<width>\d+)\sx\s(?P<height>\d+)</dd>''', +            webpage +        ) +        width = height = None +        if m: +            width, height = int(m.group('width')), int(m.group('height')) + +        formats = [{ +            'format_id': 'sd', +            'url': video_url, +            'width': width, +            'height': height, +        }] + +        return { +            'id': video_id, +            'title': title, +            'duration': duration, +            'formats': formats, +        } diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index a56a7ab5f..445e0ec41 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -6,13 +6,28 @@ import re  from .common import InfoExtractor  from ..utils import (      ExtractorError, +    compat_str,      parse_iso8601,      qualities,  )  class TVPlayIE(InfoExtractor): -    _VALID_URL = r'http://(?:www\.)?tvplay\.lv/parraides/[^/]+/(?P<id>\d+)' +    IE_DESC = 'TV3Play and related services' +    _VALID_URL = r'''(?x)http://(?:www\.)? +        (?:tvplay\.lv/parraides| +           tv3play\.lt/programos| +           tv3play\.ee/sisu| +           tv3play\.se/program| +           tv6play\.se/program| +           tv8play\.se/program| +           tv10play\.se/program| +           tv3play\.no/programmer| +           viasat4play\.no/programmer| +           tv6play\.no/programmer| +           tv3play\.dk/programmer| +        )/[^/]+/(?P<id>\d+) +        '''      _TESTS = [          {              'url': 'http://www.tvplay.lv/parraides/vinas-melo-labak/418113?autostart=true', @@ -30,6 +45,134 @@ class TVPlayIE(InfoExtractor):                  'skip_download': True,              },          }, +        { +            'url': 'http://www.tv3play.lt/programos/moterys-meluoja-geriau/409229?autostart=true', +            'info_dict': { +                'id': '409229', +                'ext': 'flv', +                'title': 'Moterys meluoja geriau', +                'description': 'md5:9aec0fc68e2cbc992d2a140bd41fa89e', +                'duration': 1330, +                'timestamp': 1403769181, +                'upload_date': '20140626', +            }, +            'params': { +                # rtmp download +                'skip_download': True, +            }, +        }, +        { +            'url': 'http://www.tv3play.ee/sisu/kodu-keset-linna/238551?autostart=true', +            'info_dict': { +                'id': '238551', +                'ext': 'flv', +                'title': 'Kodu keset linna 398537', +                'description': 'md5:7df175e3c94db9e47c0d81ffa5d68701', +                'duration': 1257, +                'timestamp': 1292449761, +                'upload_date': '20101215', +            }, +            'params': { +                # rtmp download +                'skip_download': True, +            }, +        }, +        { +            'url': 'http://www.tv3play.se/program/husraddarna/395385?autostart=true', +            'info_dict': { +                'id': '395385', +                'ext': 'flv', +                'title': 'Husräddarna S02E07', +                'description': 'md5:f210c6c89f42d4fc39faa551be813777', +                'duration': 2574, +                'timestamp': 1400596321, +                'upload_date': '20140520', +            }, +            'params': { +                # rtmp download +                'skip_download': True, +            }, +        }, +        { +            'url': 'http://www.tv6play.se/program/den-sista-dokusapan/266636?autostart=true', +            'info_dict': { +                'id': '266636', +                'ext': 'flv', +                'title': 'Den sista dokusåpan S01E08', +                'description': 'md5:295be39c872520221b933830f660b110', +                'duration': 1492, +                'timestamp': 1330522854, +                'upload_date': '20120229', +            }, +            'params': { +                # rtmp download +                'skip_download': True, +            }, +        }, +        { +            'url': 'http://www.tv8play.se/program/antikjakten/282756?autostart=true', +            'info_dict': { +                'id': '282756', +                'ext': 'flv', +                'title': 'Antikjakten S01E10', +                'description': 'md5:1b201169beabd97e20c5ad0ad67b13b8', +                'duration': 2646, +                'timestamp': 1348575868, +                'upload_date': '20120925', +            }, +            'params': { +                # rtmp download +                'skip_download': True, +            }, +        }, +        { +            'url': 'http://www.tv3play.no/programmer/anna-anka-soker-assistent/230898?autostart=true', +            'info_dict': { +                'id': '230898', +                'ext': 'flv', +                'title': 'Anna Anka søker assistent - Ep. 8', +                'description': 'md5:f80916bf5bbe1c5f760d127f8dd71474', +                'duration': 2656, +                'timestamp': 1277720005, +                'upload_date': '20100628', +            }, +            'params': { +                # rtmp download +                'skip_download': True, +            }, +        }, +        { +            'url': 'http://www.viasat4play.no/programmer/budbringerne/21873?autostart=true', +            'info_dict': { +                'id': '21873', +                'ext': 'flv', +                'title': 'Budbringerne program 10', +                'description': 'md5:4db78dc4ec8a85bb04fd322a3ee5092d', +                'duration': 1297, +                'timestamp': 1254205102, +                'upload_date': '20090929', +            }, +            'params': { +                # rtmp download +                'skip_download': True, +            }, +        }, +        { +            'url': 'http://www.tv6play.no/programmer/hotelinspektor-alex-polizzi/361883?autostart=true', +            'info_dict': { +                'id': '361883', +                'ext': 'flv', +                'title': 'Hotelinspektør Alex Polizzi - Ep. 10', +                'description': 'md5:3ecf808db9ec96c862c8ecb3a7fdaf81', +                'duration': 2594, +                'timestamp': 1393236292, +                'upload_date': '20140224', +            }, +            'params': { +                # rtmp download +                'skip_download': True, +            }, +        },      ]      def _real_extract(self, url): @@ -49,7 +192,7 @@ class TVPlayIE(InfoExtractor):          quality = qualities(['hls', 'medium', 'high'])          formats = []          for format_id, video_url in streams['streams'].items(): -            if not video_url: +            if not video_url or not isinstance(video_url, compat_str):                  continue              fmt = {                  'format_id': format_id, diff --git a/youtube_dl/extractor/unistra.py b/youtube_dl/extractor/unistra.py index 474610eec..f70978299 100644 --- a/youtube_dl/extractor/unistra.py +++ b/youtube_dl/extractor/unistra.py @@ -1,32 +1,66 @@ +from __future__ import unicode_literals +  import re  from .common import InfoExtractor +from ..utils import qualities +  class UnistraIE(InfoExtractor): -    _VALID_URL = r'http://utv\.unistra\.fr/(?:index|video)\.php\?id_video\=(\d+)' - -    _TEST = { -        u'url': u'http://utv.unistra.fr/video.php?id_video=154', -        u'file': u'154.mp4', -        u'md5': u'736f605cfdc96724d55bb543ab3ced24', -        u'info_dict': { -            u'title': u'M!ss Yella', -            u'description': u'md5:104892c71bd48e55d70b902736b81bbf', +    _VALID_URL = r'http://utv\.unistra\.fr/(?:index|video)\.php\?id_video\=(?P<id>\d+)' + +    _TESTS = [ +        { +            'url': 'http://utv.unistra.fr/video.php?id_video=154', +            'md5': '736f605cfdc96724d55bb543ab3ced24', +            'info_dict': { +                'id': '154', +                'ext': 'mp4', +                'title': 'M!ss Yella', +                'description': 'md5:104892c71bd48e55d70b902736b81bbf', +            },          }, -    } +        { +            'url': 'http://utv.unistra.fr/index.php?id_video=437', +            'md5': '1ddddd6cccaae76f622ce29b8779636d', +            'info_dict': { +                'id': '437', +                'ext': 'mp4', +                'title': 'Prix Louise Weiss 2014', +                'description': 'md5:cc3a8735f079f4fb6b0b570fc10c135a', +            }, +        } +    ]      def _real_extract(self, url): -        id = re.match(self._VALID_URL, url).group(1) -        webpage = self._download_webpage(url, id) -        file = re.search(r'file: "(.*?)",', webpage).group(1) -        title = self._html_search_regex(r'<title>UTV - (.*?)</', webpage, u'title') +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') -        video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file +        webpage = self._download_webpage(url, video_id) -        return {'id': id, -                'title': title, -                'ext': 'mp4', -                'url': video_url, -                'description': self._html_search_regex(r'<meta name="Description" content="(.*?)"', webpage, u'description', flags=re.DOTALL), -                'thumbnail': self._search_regex(r'image: "(.*?)"', webpage, u'thumbnail'), -                } +        files = set(re.findall(r'file\s*:\s*"([^"]+)"', webpage)) + +        quality = qualities(['SD', 'HD']) +        formats = [] +        for file_path in files: +            format_id = 'HD' if file_path.endswith('-HD.mp4') else 'SD' +            formats.append({ +                'url': 'http://vod-flash.u-strasbg.fr:8080%s' % file_path, +                'format_id': format_id, +                'quality': quality(format_id) +            }) + +        title = self._html_search_regex( +            r'<title>UTV - (.*?)</', webpage, 'title') +        description = self._html_search_regex( +            r'<meta name="Description" content="(.*?)"', webpage, 'description', flags=re.DOTALL) +        thumbnail = self._search_regex( +            r'image: "(.*?)"', webpage, 'thumbnail') + +        return { +            'id': video_id, +            'title': title, +            'description': description, +            'thumbnail': thumbnail, +            'formats': formats +        } diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py new file mode 100644 index 000000000..7d27d6c57 --- /dev/null +++ b/youtube_dl/extractor/vgtv.py @@ -0,0 +1,119 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import float_or_none + + +class VGTVIE(InfoExtractor): +    _VALID_URL = r'http://(?:www\.)?vgtv\.no/#!/(?:.*)/(?P<id>[0-9]+)' +    _TESTS = [ +        { +            # streamType: vod +            'url': 'http://www.vgtv.no/#!/video/84196/hevnen-er-soet-episode-10-abu', +            'md5': 'b8be7a234cebb840c0d512c78013e02f', +            'info_dict': { +                'id': '84196', +                'ext': 'mp4', +                'title': 'Hevnen er søt episode 10: Abu', +                'description': 'md5:e25e4badb5f544b04341e14abdc72234', +                'thumbnail': 're:^https?://.*\.jpg', +                'duration': 648.000, +                'timestamp': 1404626400, +                'upload_date': '20140706', +                'view_count': int, +            }, +        }, +        { +            # streamType: wasLive +            'url': 'http://www.vgtv.no/#!/live/100764/opptak-vgtv-foelger-em-kvalifiseringen', +            'info_dict': { +                'id': '100764', +                'ext': 'mp4', +                'title': 'OPPTAK: VGTV følger EM-kvalifiseringen', +                'description': 'md5:3772d9c0dc2dff92a886b60039a7d4d3', +                'thumbnail': 're:^https?://.*\.jpg', +                'duration': 9056.000, +                'timestamp': 1410113864, +                'upload_date': '20140907', +                'view_count': int, +            }, +            'params': { +                # m3u8 download +                'skip_download': True, +            }, +        }, +        { +            # streamType: live +            'url': 'http://www.vgtv.no/#!/live/100015/direkte-her-kan-du-se-laksen-live-fra-suldalslaagen', +            'info_dict': { +                'id': '100015', +                'ext': 'mp4', +                'title': 'DIREKTE: Her kan du se laksen live fra Suldalslågen!', +                'description': 'md5:9a60cc23fa349f761628924e56eeec2d', +                'thumbnail': 're:^https?://.*\.jpg', +                'duration': 0, +                'timestamp': 1407423348, +                'upload_date': '20140807', +                'view_count': int, +            }, +            'params': { +                # m3u8 download +                'skip_download': True, +            }, +        }, +    ] + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') + +        data = self._download_json( +            'http://svp.vg.no/svp/api/v1/vgtv/assets/%s?appName=vgtv-website' % video_id, +            video_id, 'Downloading media JSON') + +        streams = data['streamUrls'] + +        formats = [] + +        hls_url = streams.get('hls') +        if hls_url: +            formats.extend(self._extract_m3u8_formats(hls_url, video_id, 'mp4')) + +        hds_url = streams.get('hds') +        if hds_url: +            formats.extend(self._extract_f4m_formats(hds_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18', video_id)) + +        mp4_url = streams.get('mp4') +        if mp4_url: +            _url = hls_url or hds_url +            MP4_URL_TEMPLATE = '%s/%%s.%s' % (mp4_url.rpartition('/')[0], mp4_url.rpartition('.')[-1]) +            for mp4_format in _url.split(','): +                m = re.search('(?P<width>\d+)_(?P<height>\d+)_(?P<vbr>\d+)', mp4_format) +                if not m: +                    continue +                width = int(m.group('width')) +                height = int(m.group('height')) +                vbr = int(m.group('vbr')) +                formats.append({ +                    'url': MP4_URL_TEMPLATE % mp4_format, +                    'format_id': 'mp4-%s' % vbr, +                    'width': width, +                    'height': height, +                    'vbr': vbr, +                    'preference': 1, +                }) +        self._sort_formats(formats) + +        return { +            'id': video_id, +            'title': data['title'], +            'description': data['description'], +            'thumbnail': data['images']['main'] + '?t[]=900x506q80', +            'timestamp': data['published'], +            'duration': float_or_none(data['duration'], 1000), +            'view_count': data['displays'], +            'formats': formats, +        }
\ No newline at end of file diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 51e47fee8..680107346 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.09.04.3' +__version__ = '2014.09.10.1'  | 
