diff options
| -rw-r--r-- | youtube_dl/__init__.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/arte.py | 14 | ||||
| -rw-r--r-- | youtube_dl/extractor/firedrive.py | 3 | ||||
| -rw-r--r-- | youtube_dl/extractor/mojvideo.py | 58 | ||||
| -rw-r--r-- | youtube_dl/extractor/nowness.py | 42 | ||||
| -rw-r--r-- | youtube_dl/extractor/vimeo.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/vube.py | 71 | ||||
| -rw-r--r-- | youtube_dl/extractor/xboxclips.py | 57 | ||||
| -rw-r--r-- | youtube_dl/version.py | 2 | 
10 files changed, 196 insertions, 57 deletions
| diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 429630ce5..9b41587e7 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -66,6 +66,8 @@ __authors__  = (      'Naglis Jonaitis',      'Charles Chen',      'Hassaan Ali', +    'Dobrosław Żybort', +    'David Fabijan',  )  __license__ = 'Public Domain' diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index e5ce08bc1..2401940c3 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -182,6 +182,7 @@ from .mixcloud import MixcloudIE  from .mlb import MLBIE  from .mpora import MporaIE  from .mofosex import MofosexIE +from .mojvideo import MojvideoIE  from .mooshare import MooshareIE  from .morningstar import MorningstarIE  from .motherless import MotherlessIE @@ -384,6 +385,7 @@ from .wistia import WistiaIE  from .worldstarhiphop import WorldStarHipHopIE  from .wrzuta import WrzutaIE  from .xbef import XBefIE +from .xboxclips import XboxClipsIE  from .xhamster import XHamsterIE  from .xnxx import XNXXIE  from .xvideos import XVideosIE diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 9591bad8a..d86dbba8e 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -109,15 +109,19 @@ class ArteTVPlus7IE(InfoExtractor):              regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]              return any(re.match(r, f['versionCode']) for r in regexes)          # Some formats may not be in the same language as the url +        # TODO: Might want not to drop videos that does not match requested language +        # but to process those formats with lower precedence          formats = filter(_match_lang, all_formats) -        formats = list(formats) # in python3 filter returns an iterator +        formats = list(formats)  # in python3 filter returns an iterator          if not formats:              # Some videos are only available in the 'Originalversion'              # they aren't tagged as being in French or German -            if all(f['versionCode'] == 'VO' or f['versionCode'] == 'VA' for f in all_formats): -                formats = all_formats -            else: -                raise ExtractorError(u'The formats list is empty') +            # Sometimes there are neither videos of requested lang code +            # nor original version videos available +            # For such cases we just take all_formats as is +            formats = all_formats +            if not formats: +                raise ExtractorError('The formats list is empty')          if re.match(r'[A-Z]Q', formats[0]['quality']) is not None:              def sort_key(f): diff --git a/youtube_dl/extractor/firedrive.py b/youtube_dl/extractor/firedrive.py index 6d73c8a4a..af439ccfe 100644 --- a/youtube_dl/extractor/firedrive.py +++ b/youtube_dl/extractor/firedrive.py @@ -42,7 +42,6 @@ class FiredriveIE(InfoExtractor):          fields = dict(re.findall(r'''(?x)<input\s+              type="hidden"\s+              name="([^"]+)"\s+ -            (?:id="[^"]+"\s+)?              value="([^"]*)"              ''', webpage)) @@ -66,7 +65,7 @@ class FiredriveIE(InfoExtractor):          ext = self._search_regex(r'type:\s?\'([^\']+)\',',                                   webpage, 'extension', fatal=False)          video_url = self._search_regex( -            r'file:\s?\'(http[^\']+)\',', webpage, 'file url') +            r'file:\s?loadURL\(\'(http[^\']+)\'\),', webpage, 'file url')          formats = [{              'format_id': 'sd', diff --git a/youtube_dl/extractor/mojvideo.py b/youtube_dl/extractor/mojvideo.py new file mode 100644 index 000000000..90b460d65 --- /dev/null +++ b/youtube_dl/extractor/mojvideo.py @@ -0,0 +1,58 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( +    ExtractorError, +    parse_duration, +) + + +class MojvideoIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?mojvideo\.com/video-(?P<display_id>[^/]+)/(?P<id>[a-f0-9]+)' +    _TEST = { +        'url': 'http://www.mojvideo.com/video-v-avtu-pred-mano-rdecelaska-alfi-nipic/3d1ed4497707730b2906', +        'md5': 'f7fd662cc8ce2be107b0d4f2c0483ae7', +        'info_dict': { +            'id': '3d1ed4497707730b2906', +            'display_id': 'v-avtu-pred-mano-rdecelaska-alfi-nipic', +            'ext': 'mp4', +            'title': 'V avtu pred mano rdečelaska - Alfi Nipič', +            'thumbnail': 're:^http://.*\.jpg$', +            'duration': 242, +        } +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') +        display_id = mobj.group('display_id') + +        # XML is malformed +        playerapi = self._download_webpage( +            'http://www.mojvideo.com/playerapi.php?v=%s&t=1' % video_id, display_id) + +        if '<error>true</error>' in playerapi: +            error_desc = self._html_search_regex( +                r'<errordesc>([^<]*)</errordesc>', playerapi, 'error description', fatal=False) +            raise ExtractorError('%s said: %s' % (self.IE_NAME, error_desc), expected=True) + +        title = self._html_search_regex( +            r'<title>([^<]+)</title>', playerapi, 'title') +        video_url = self._html_search_regex( +            r'<file>([^<]+)</file>', playerapi, 'video URL') +        thumbnail = self._html_search_regex( +            r'<preview>([^<]+)</preview>', playerapi, 'thumbnail', fatal=False) +        duration = parse_duration(self._html_search_regex( +            r'<duration>([^<]+)</duration>', playerapi, 'duration', fatal=False)) + +        return { +            'id': video_id, +            'display_id': display_id, +            'url': video_url, +            'title': title, +            'thumbnail': thumbnail, +            'duration': duration, +        }
\ No newline at end of file diff --git a/youtube_dl/extractor/nowness.py b/youtube_dl/extractor/nowness.py index 1c5e9401f..6b2f3f55a 100644 --- a/youtube_dl/extractor/nowness.py +++ b/youtube_dl/extractor/nowness.py @@ -1,3 +1,4 @@ +# encoding: utf-8  from __future__ import unicode_literals  import re @@ -8,19 +9,34 @@ from ..utils import ExtractorError  class NownessIE(InfoExtractor): -    _VALID_URL = r'https?://(?:www\.)?nowness\.com/[^?#]*?/(?P<id>[0-9]+)/(?P<slug>[^/]+?)(?:$|[?#])' - -    _TEST = { -        'url': 'http://www.nowness.com/day/2013/6/27/3131/candor--the-art-of-gesticulation', -        'md5': '068bc0202558c2e391924cb8cc470676', -        'info_dict': { -            'id': '2520295746001', -            'ext': 'mp4', -            'description': 'Candor: The Art of Gesticulation', -            'uploader': 'Nowness', -            'title': 'Candor: The Art of Gesticulation', -        } -    } +    _VALID_URL = r'https?://(?:(?:www|cn)\.)?nowness\.com/[^?#]*?/(?P<id>[0-9]+)/(?P<slug>[^/]+?)(?:$|[?#])' + +    _TESTS = [ +        { +            'url': 'http://www.nowness.com/day/2013/6/27/3131/candor--the-art-of-gesticulation', +            'md5': '068bc0202558c2e391924cb8cc470676', +            'info_dict': { +                'id': '2520295746001', +                'ext': 'mp4', +                'title': 'Candor: The Art of Gesticulation', +                'description': 'Candor: The Art of Gesticulation', +                'thumbnail': 're:^https?://.*\.jpg', +                'uploader': 'Nowness', +            } +        }, +        { +            'url': 'http://cn.nowness.com/day/2014/8/7/4069/kasper-bj-rke-ft-jaakko-eino-kalevi--tnr', +            'md5': 'e79cf125e387216f86b2e0a5b5c63aa3', +            'info_dict': { +                'id': '3716354522001', +                'ext': 'mp4', +                'title': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR', +                'description': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR', +                'thumbnail': 're:^https?://.*\.jpg', +                'uploader': 'Nowness', +            } +        }, +    ]      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 10844f39e..11c7d7e81 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -276,7 +276,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):          if video_thumbnail is None:              video_thumbs = config["video"].get("thumbs")              if video_thumbs and isinstance(video_thumbs, dict): -                _, video_thumbnail = sorted((int(width), t_url) for (width, t_url) in video_thumbs.items())[-1] +                _, video_thumbnail = sorted((int(width if width.isdigit() else 0), t_url) for (width, t_url) in video_thumbs.items())[-1]          # Extract video description          video_description = None diff --git a/youtube_dl/extractor/vube.py b/youtube_dl/extractor/vube.py index f1b9e9a19..2544c24bd 100644 --- a/youtube_dl/extractor/vube.py +++ b/youtube_dl/extractor/vube.py @@ -1,10 +1,12 @@  from __future__ import unicode_literals -import json  import re  from .common import InfoExtractor -from ..utils import int_or_none +from ..utils import ( +    int_or_none, +    compat_str, +)  class VubeIE(InfoExtractor): @@ -29,6 +31,7 @@ class VubeIE(InfoExtractor):                  'like_count': int,                  'dislike_count': int,                  'comment_count': int, +                'categories': ['pop', 'music', 'cover', 'singing', 'jessie j', 'price tag', 'chiara grispo'],              }          },          { @@ -47,6 +50,7 @@ class VubeIE(InfoExtractor):                  'like_count': int,                  'dislike_count': int,                  'comment_count': int, +                'categories': ['seraina', 'jessica', 'krewella', 'alive'],              }          }, {              'url': 'http://vube.com/vote/Siren+Gene/0nmsMY5vEq?n=2&t=s', @@ -56,13 +60,15 @@ class VubeIE(InfoExtractor):                  'ext': 'mp4',                  'title': 'Frozen - Let It Go Cover by Siren Gene',                  'description': 'My rendition of "Let It Go" originally sung by Idina Menzel.', -                'uploader': 'Siren Gene', -                'uploader_id': 'Siren',                  'thumbnail': 're:^http://frame\.thestaticvube\.com/snap/[0-9x]+/10283ab622a-86c9-4681-51f2-30d1f65774af\.jpg$', +                'uploader': 'Siren', +                'timestamp': 1395448018, +                'upload_date': '20140322',                  'duration': 221.788,                  'like_count': int,                  'dislike_count': int,                  'comment_count': int, +                'categories': ['let it go', 'cover', 'idina menzel', 'frozen', 'singing', 'disney', 'siren gene'],              }          }      ] @@ -71,47 +77,40 @@ class VubeIE(InfoExtractor):          mobj = re.match(self._VALID_URL, url)          video_id = mobj.group('id') -        webpage = self._download_webpage(url, video_id) -        data_json = self._search_regex( -            r'(?s)window\["(?:tapiVideoData|vubeOriginalVideoData)"\]\s*=\s*(\{.*?\n});\n', -            webpage, 'video data' -        ) -        data = json.loads(data_json) -        video = ( -            data.get('video') or -            data) -        assert isinstance(video, dict) +        video = self._download_json( +            'http://vube.com/t-api/v1/video/%s' % video_id, video_id, 'Downloading video JSON')          public_id = video['public_id'] -        formats = [ -            { -                'url': 'http://video.thestaticvube.com/video/%s/%s.mp4' % (fmt['media_resolution_id'], public_id), -                'height': int(fmt['height']), -                'abr': int(fmt['audio_bitrate']), -                'vbr': int(fmt['video_bitrate']), -                'format_id': fmt['media_resolution_id'] -            } for fmt in video['mtm'] if fmt['transcoding_status'] == 'processed' -        ] +        formats = [] + +        for media in video['media'].get('video', []) + video['media'].get('audio', []): +            if media['transcoding_status'] != 'processed': +                continue +            fmt = { +                'url': 'http://video.thestaticvube.com/video/%s/%s.mp4' % (media['media_resolution_id'], public_id), +                'abr': int(media['audio_bitrate']), +                'format_id': compat_str(media['media_resolution_id']), +            } +            vbr = int(media['video_bitrate']) +            if vbr: +                fmt.update({ +                    'vbr': vbr, +                    'height': int(media['height']), +                }) +            formats.append(fmt)          self._sort_formats(formats)          title = video['title']          description = video.get('description') -        thumbnail = self._proto_relative_url( -            video.get('thumbnail') or video.get('thumbnail_src'), -            scheme='http:') -        uploader = data.get('user', {}).get('channel', {}).get('name') or video.get('user_alias') -        uploader_id = data.get('user', {}).get('name') +        thumbnail = self._proto_relative_url(video.get('thumbnail_src'), scheme='http:') +        uploader = video.get('user_alias') or video.get('channel')          timestamp = int_or_none(video.get('upload_time'))          duration = video['duration']          view_count = video.get('raw_view_count') -        like_count = video.get('rlikes') -        if like_count is None: -            like_count = video.get('total_likes') -        dislike_count = video.get('rhates') -        if dislike_count is None: -            dislike_count = video.get('total_hates') +        like_count = video.get('total_likes') +        dislike_count = video.get('total_hates')          comments = video.get('comments')          comment_count = None @@ -124,6 +123,8 @@ class VubeIE(InfoExtractor):          else:              comment_count = len(comments) +        categories = [tag['text'] for tag in video['tags']] +          return {              'id': video_id,              'formats': formats, @@ -131,11 +132,11 @@ class VubeIE(InfoExtractor):              'description': description,              'thumbnail': thumbnail,              'uploader': uploader, -            'uploader_id': uploader_id,              'timestamp': timestamp,              'duration': duration,              'view_count': view_count,              'like_count': like_count,              'dislike_count': dislike_count,              'comment_count': comment_count, +            'categories': categories,          } diff --git a/youtube_dl/extractor/xboxclips.py b/youtube_dl/extractor/xboxclips.py new file mode 100644 index 000000000..a9aa72e73 --- /dev/null +++ b/youtube_dl/extractor/xboxclips.py @@ -0,0 +1,57 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( +    parse_iso8601, +    float_or_none, +    int_or_none, +) + + +class XboxClipsIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?xboxclips\.com/video\.php\?.*vid=(?P<id>[\w-]{36})' +    _TEST = { +        'url': 'https://xboxclips.com/video.php?uid=2533274823424419&gamertag=Iabdulelah&vid=074a69a9-5faf-46aa-b93b-9909c1720325', +        'md5': 'fbe1ec805e920aeb8eced3c3e657df5d', +        'info_dict': { +            'id': '074a69a9-5faf-46aa-b93b-9909c1720325', +            'ext': 'mp4', +            'title': 'Iabdulelah playing Upload Studio', +            'filesize_approx': 28101836.8, +            'timestamp': 1407388500, +            'upload_date': '20140807', +            'duration': 56, +        } +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') + +        webpage = self._download_webpage(url, video_id) + +        video_url = self._html_search_regex( +            r'>Link: <a href="([^"]+)">', webpage, 'video URL') +        title = self._html_search_regex( +            r'<title>XboxClips \| ([^<]+)</title>', webpage, 'title') +        timestamp = parse_iso8601(self._html_search_regex( +            r'>Recorded: ([^<]+)<', webpage, 'upload date', fatal=False)) +        filesize = float_or_none(self._html_search_regex( +            r'>Size: ([\d\.]+)MB<', webpage, 'file size', fatal=False), invscale=1024 * 1024) +        duration = int_or_none(self._html_search_regex( +            r'>Duration: (\d+) Seconds<', webpage, 'duration', fatal=False)) +        view_count = int_or_none(self._html_search_regex( +            r'>Views: (\d+)<', webpage, 'view count', fatal=False)) + +        return { +            'id': video_id, +            'url': video_url, +            'title': title, +            'timestamp': timestamp, +            'filesize_approx': filesize, +            'duration': duration, +            'view_count': view_count, +        } diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 401fa3d10..08b5339f6 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.08.02.1' +__version__ = '2014.08.05' | 
