diff options
26 files changed, 591 insertions, 135 deletions
| @@ -11,6 +11,7 @@ try:      setuptools_available = True  except ImportError:      from distutils.core import setup +    setuptools_available = False  try:      # This will create an exe that needs Microsoft Visual C++ 2008 diff --git a/test/helper.py b/test/helper.py index 777119ea5..d7bf7a828 100644 --- a/test/helper.py +++ b/test/helper.py @@ -5,9 +5,11 @@ import json  import os.path  import re  import types +import sys  import youtube_dl.extractor  from youtube_dl import YoutubeDL +from youtube_dl.utils import preferredencoding  def global_setup(): @@ -33,6 +35,21 @@ def try_rm(filename):              raise +def report_warning(message): +    ''' +    Print the message to stderr, it will be prefixed with 'WARNING:' +    If stderr is a tty file the 'WARNING:' will be colored +    ''' +    if sys.stderr.isatty() and os.name != 'nt': +        _msg_header = u'\033[0;33mWARNING:\033[0m' +    else: +        _msg_header = u'WARNING:' +    output = u'%s %s\n' % (_msg_header, message) +    if 'b' in getattr(sys.stderr, 'mode', '') or sys.version_info[0] < 3: +        output = output.encode(preferredencoding()) +    sys.stderr.write(output) + +  class FakeYDL(YoutubeDL):      def __init__(self, override=None):          # Different instances of the downloader can't share the same dictionary diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index f8cd1bdce..58cf9c313 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -62,10 +62,10 @@ class TestFormatSelection(unittest.TestCase):      def test_format_limit(self):          formats = [ -            {u'format_id': u'meh'}, -            {u'format_id': u'good'}, -            {u'format_id': u'great'}, -            {u'format_id': u'excellent'}, +            {u'format_id': u'meh', u'url': u'http://example.com/meh'}, +            {u'format_id': u'good', u'url': u'http://example.com/good'}, +            {u'format_id': u'great', u'url': u'http://example.com/great'}, +            {u'format_id': u'excellent', u'url': u'http://example.com/exc'},          ]          info_dict = {              u'formats': formats, u'extractor': u'test', 'id': 'testvid'} @@ -128,6 +128,18 @@ class TestFormatSelection(unittest.TestCase):          downloaded = ydl.downloaded_info_dicts[0]          self.assertEqual(downloaded['format_id'], u'35') +    def test_add_extra_info(self): +        test_dict = { +            'extractor': 'Foo', +        } +        extra_info = { +            'extractor': 'Bar', +            'playlist': 'funny videos', +        } +        YDL.add_extra_info(test_dict, extra_info) +        self.assertEqual(test_dict['extractor'], 'Foo') +        self.assertEqual(test_dict['playlist'], 'funny videos') +  if __name__ == '__main__':      unittest.main() diff --git a/test/test_dailymotion_subtitles.py b/test/test_dailymotion_subtitles.py index c596415c4..ba3580ea4 100644 --- a/test/test_dailymotion_subtitles.py +++ b/test/test_dailymotion_subtitles.py @@ -22,7 +22,7 @@ class TestDailymotionSubtitles(unittest.TestCase):          return info_dict      def getSubtitles(self):          info_dict = self.getInfoDict() -        return info_dict[0]['subtitles'] +        return info_dict['subtitles']      def test_no_writesubtitles(self):          subtitles = self.getSubtitles()          self.assertEqual(subtitles, None) diff --git a/test/test_download.py b/test/test_download.py index b9a9be11d..73379beb1 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -6,7 +6,14 @@ import sys  import unittest  sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from test.helper import get_params, get_testcases, global_setup, try_rm, md5 +from test.helper import ( +    get_params, +    get_testcases, +    global_setup, +    try_rm, +    md5, +    report_warning +)  global_setup() @@ -19,6 +26,7 @@ import youtube_dl.YoutubeDL  from youtube_dl.utils import (      compat_str,      compat_urllib_error, +    compat_HTTPError,      DownloadError,      ExtractorError,      UnavailableVideoError, @@ -60,9 +68,12 @@ def generator(test_case):          if not ie._WORKING:              print_skipping('IE marked as not _WORKING')              return -        if 'playlist' not in test_case and not test_case['file']: -            print_skipping('No output file specified') -            return +        if 'playlist' not in test_case: +            info_dict = test_case.get('info_dict', {}) +            if not test_case.get('file') and not (info_dict.get('id') and info_dict.get('ext')): +                print_skipping('The output file cannot be know, the "file" ' +                    'key is missing or the info_dict is incomplete') +                return          if 'skip' in test_case:              print_skipping(test_case['skip'])              return @@ -77,35 +88,47 @@ def generator(test_case):                  finished_hook_called.add(status['filename'])          ydl.fd.add_progress_hook(_hook) +        def get_tc_filename(tc): +            return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {})) +          test_cases = test_case.get('playlist', [test_case]) -        for tc in test_cases: -            try_rm(tc['file']) -            try_rm(tc['file'] + '.part') -            try_rm(tc['file'] + '.info.json') +        def try_rm_tcs_files(): +            for tc in test_cases: +                tc_filename = get_tc_filename(tc) +                try_rm(tc_filename) +                try_rm(tc_filename + '.part') +                try_rm(tc_filename + '.info.json') +        try_rm_tcs_files()          try: -            for retry in range(1, RETRIES + 1): +            try_num = 1 +            while True:                  try:                      ydl.download([test_case['url']])                  except (DownloadError, ExtractorError) as err: -                    if retry == RETRIES: raise -                      # Check if the exception is not a network related one -                    if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError): +                    if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):                          raise -                    print('Retrying: {0} failed tries\n\n##########\n\n'.format(retry)) +                    if try_num == RETRIES: +                        report_warning(u'Failed due to network errors, skipping...') +                        return + +                    print('Retrying: {0} failed tries\n\n##########\n\n'.format(try_num)) + +                    try_num += 1                  else:                      break              for tc in test_cases: +                tc_filename = get_tc_filename(tc)                  if not test_case.get('params', {}).get('skip_download', False): -                    self.assertTrue(os.path.exists(tc['file']), msg='Missing file ' + tc['file']) -                    self.assertTrue(tc['file'] in finished_hook_called) -                self.assertTrue(os.path.exists(tc['file'] + '.info.json')) +                    self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename) +                    self.assertTrue(tc_filename in finished_hook_called) +                self.assertTrue(os.path.exists(tc_filename + '.info.json'))                  if 'md5' in tc: -                    md5_for_file = _file_md5(tc['file']) +                    md5_for_file = _file_md5(tc_filename)                      self.assertEqual(md5_for_file, tc['md5']) -                with io.open(tc['file'] + '.info.json', encoding='utf-8') as infof: +                with io.open(tc_filename + '.info.json', encoding='utf-8') as infof:                      info_dict = json.load(infof)                  for (info_field, expected) in tc.get('info_dict', {}).items():                      if isinstance(expected, compat_str) and expected.startswith('md5:'): @@ -125,11 +148,11 @@ def generator(test_case):                  # Check for the presence of mandatory fields                  for key in ('id', 'url', 'title', 'ext'):                      self.assertTrue(key in info_dict.keys() and info_dict[key]) +                # Check for mandatory fields that are automatically set by YoutubeDL +                for key in ['webpage_url', 'extractor', 'extractor_key']: +                    self.assertTrue(info_dict.get(key), u'Missing field: %s' % key)          finally: -            for tc in test_cases: -                try_rm(tc['file']) -                try_rm(tc['file'] + '.part') -                try_rm(tc['file'] + '.info.json') +            try_rm_tcs_files()      return test_template diff --git a/test/test_playlists.py b/test/test_playlists.py index d6a8d56df..de1e8d88e 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -20,6 +20,7 @@ from youtube_dl.extractor import (      SoundcloudUserIE,      LivestreamIE,      NHLVideocenterIE, +    BambuserChannelIE,  ) @@ -85,5 +86,13 @@ class TestPlaylists(unittest.TestCase):          self.assertEqual(result['title'], u'Highlights')          self.assertEqual(len(result['entries']), 12) +    def test_bambuser_channel(self): +        dl = FakeYDL() +        ie = BambuserChannelIE(dl) +        result = ie.extract('http://bambuser.com/channel/pixelversity') +        self.assertIsPlaylist(result) +        self.assertEqual(result['title'], u'pixelversity') +        self.assertTrue(len(result['entries']) >= 66) +  if __name__ == '__main__':      unittest.main() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 313295839..86a6fd043 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -272,7 +272,7 @@ class YoutubeDL(object):                  autonumber_size = 5              autonumber_templ = u'%0' + str(autonumber_size) + u'd'              template_dict['autonumber'] = autonumber_templ % self._num_downloads -            if template_dict['playlist_index'] is not None: +            if template_dict.get('playlist_index') is not None:                  template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']              sanitize = lambda k, v: sanitize_filename( @@ -318,6 +318,12 @@ class YoutubeDL(object):                      % info_dict)          return None +    @staticmethod +    def add_extra_info(info_dict, extra_info): +        '''Set the keys from extra_info in info dict if they are missing''' +        for key, value in extra_info.items(): +            info_dict.setdefault(key, value) +      def extract_info(self, url, download=True, ie_key=None, extra_info={}):          '''          Returns a list with a dictionary for each video we find. @@ -344,17 +350,17 @@ class YoutubeDL(object):                      break                  if isinstance(ie_result, list):                      # Backwards compatibility: old IE result format -                    for result in ie_result: -                        result.update(extra_info)                      ie_result = {                          '_type': 'compat_list',                          'entries': ie_result,                      } -                else: -                    ie_result.update(extra_info) -                if 'extractor' not in ie_result: -                    ie_result['extractor'] = ie.IE_NAME -                return self.process_ie_result(ie_result, download=download) +                self.add_extra_info(ie_result, +                    { +                        'extractor': ie.IE_NAME, +                        'webpage_url': url, +                        'extractor_key': ie.ie_key(), +                    }) +                return self.process_ie_result(ie_result, download, extra_info)              except ExtractorError as de: # An error we somewhat expected                  self.report_error(compat_str(de), de.format_traceback())                  break @@ -378,7 +384,7 @@ class YoutubeDL(object):          result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system          if result_type == 'video': -            ie_result.update(extra_info) +            self.add_extra_info(ie_result, extra_info)              return self.process_video_result(ie_result)          elif result_type == 'url':              # We have to add extra_info to the results because it may be @@ -388,6 +394,7 @@ class YoutubeDL(object):                                       ie_key=ie_result.get('ie_key'),                                       extra_info=extra_info)          elif result_type == 'playlist': +            self.add_extra_info(ie_result, extra_info)              # We process each entry in the playlist              playlist = ie_result.get('title', None) or ie_result.get('id', None)              self.to_screen(u'[download] Downloading playlist: %s' % playlist) @@ -413,12 +420,10 @@ class YoutubeDL(object):                  extra = {                      'playlist': playlist,                      'playlist_index': i + playliststart, +                    'extractor': ie_result['extractor'], +                    'webpage_url': ie_result['webpage_url'], +                    'extractor_key': ie_result['extractor_key'],                  } -                if not 'extractor' in entry: -                    # We set the extractor, if it's an url it will be set then to -                    # the new extractor, but if it's already a video we must make -                    # sure it's present: see issue #877 -                    entry['extractor'] = ie_result['extractor']                  entry_result = self.process_ie_result(entry,                                                        download=download,                                                        extra_info=extra) @@ -427,10 +432,15 @@ class YoutubeDL(object):              return ie_result          elif result_type == 'compat_list':              def _fixup(r): -                r.setdefault('extractor', ie_result['extractor']) +                self.add_extra_info(r, +                    { +                        'extractor': ie_result['extractor'], +                        'webpage_url': ie_result['webpage_url'], +                        'extractor_key': ie_result['extractor_key'], +                    })                  return r              ie_result['entries'] = [ -                self.process_ie_result(_fixup(r), download=download) +                self.process_ie_result(_fixup(r), download, extra_info)                  for r in ie_result['entries']              ]              return ie_result @@ -482,7 +492,7 @@ class YoutubeDL(object):                  format['format'] = u'{id} - {res}{note}'.format(                      id=format['format_id'],                      res=self.format_resolution(format), -                    note=u' ({})'.format(format['format_note']) if format.get('format_note') is not None else '', +                    note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',                  )              # Automatically determine file extension if missing              if 'ext' not in format: @@ -759,6 +769,8 @@ class YoutubeDL(object):      @staticmethod      def format_resolution(format, default='unknown'): +        if format.get('_resolution') is not None: +            return format['_resolution']          if format.get('height') is not None:              if format.get('width') is not None:                  res = u'%sx%s' % (format['width'], format['height']) @@ -769,19 +781,23 @@ class YoutubeDL(object):          return res      def list_formats(self, info_dict): -        formats_s = [] -        for format in info_dict.get('formats', [info_dict]): -            formats_s.append(u'%-15s%-7s     %-15s%s' % ( +        def line(format): +            return (u'%-15s%-10s%-12s%s' % (                  format['format_id'],                  format['ext'], -                format.get('format_note', ''),                  self.format_resolution(format), +                format.get('format_note', ''),                  )              ) -        if len(formats_s) != 1: -            formats_s[0] += ' (worst)' -            formats_s[-1] += ' (best)' -        formats_s = "\n".join(formats_s) -        self.to_screen(u'[info] Available formats for %s:\n' -            u'format code    extension   note           resolution\n%s' % ( -                info_dict['id'], formats_s)) + +        formats = info_dict.get('formats', [info_dict]) +        formats_s = list(map(line, formats)) +        if len(formats) > 1: +            formats_s[0] += (' ' if formats[0].get('format_note') else '') + '(worst)' +            formats_s[-1] += (' ' if formats[-1].get('format_note') else '') + '(best)' + +        header_line = line({ +            'format_id': u'format code', 'ext': u'extension', +            '_resolution': u'resolution', 'format_note': u'note'}) +        self.to_screen(u'[info] Available formats for %s:\n%s\n%s' % +                       (info_dict['id'], header_line, u"\n".join(formats_s))) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 045d4447a..33073a512 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -9,6 +9,7 @@ from .arte import (      ArteTVFutureIE,  )  from .auengine import AUEngineIE +from .bambuser import BambuserIE, BambuserChannelIE  from .bandcamp import BandcampIE  from .bliptv import BlipTVIE, BlipTVUserIE  from .bloomberg import BloombergIE @@ -39,6 +40,7 @@ from .ehow import EHowIE  from .eighttracks import EightTracksIE  from .escapist import EscapistIE  from .exfm import ExfmIE +from .extremetube import ExtremeTubeIE  from .facebook import FacebookIE  from .faz import FazIE  from .fktv import ( @@ -84,6 +86,7 @@ from .mixcloud import MixcloudIE  from .mofosex import MofosexIE  from .mtv import MTVIE  from .muzu import MuzuTVIE +from .myspace import MySpaceIE  from .myspass import MySpassIE  from .myvideo import MyVideoIE  from .naver import NaverIE @@ -142,6 +145,7 @@ from .videofyme import VideofyMeIE  from .videopremium import VideoPremiumIE  from .vimeo import VimeoIE, VimeoChannelIE  from .vine import VineIE +from .vk import VKIE  from .wat import WatIE  from .websurg import WeBSurgIE  from .weibo import WeiboIE @@ -150,6 +154,7 @@ from .worldstarhiphop import WorldStarHipHopIE  from .xhamster import XHamsterIE  from .xnxx import XNXXIE  from .xvideos import XVideosIE +from .xtube import XTubeIE  from .yahoo import YahooIE, YahooSearchIE  from .youjizz import YouJizzIE  from .youku import YoukuIE diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index d39b48951..e10c74c11 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -158,7 +158,9 @@ class ArteTVPlus7IE(InfoExtractor):              'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),          } -        formats = player_info['VSR'].values() +        all_formats = player_info['VSR'].values() +        # Some formats use the m3u8 protocol +        all_formats = list(filter(lambda f: f.get('videoFormat') != 'M3U8', all_formats))          def _match_lang(f):              if f.get('versionCode') is None:                  return True @@ -170,11 +172,16 @@ class ArteTVPlus7IE(InfoExtractor):              regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]              return any(re.match(r, f['versionCode']) for r in regexes)          # Some formats may not be in the same language as the url -        formats = filter(_match_lang, formats) -        # Some formats use the m3u8 protocol -        formats = filter(lambda f: f.get('videoFormat') != 'M3U8', formats) -        # We order the formats by quality +        formats = filter(_match_lang, all_formats)          formats = list(formats) # in python3 filter returns an iterator +        if not formats: +            # Some videos are only available in the 'Originalversion' +            # they aren't tagged as being in French or German +            if all(f['versionCode'] == 'VO' for f in all_formats): +                formats = all_formats +            else: +                raise ExtractorError(u'The formats list is empty') +        # We order the formats by quality          if re.match(r'[A-Z]Q', formats[0]['quality']) is not None:              sort_key = lambda f: ['HQ', 'MQ', 'EQ', 'SQ'].index(f['quality'])          else: diff --git a/youtube_dl/extractor/bambuser.py b/youtube_dl/extractor/bambuser.py new file mode 100644 index 000000000..f3b36f473 --- /dev/null +++ b/youtube_dl/extractor/bambuser.py @@ -0,0 +1,80 @@ +import re +import json +import itertools + +from .common import InfoExtractor +from ..utils import ( +    compat_urllib_request, +) + + +class BambuserIE(InfoExtractor): +    IE_NAME = u'bambuser' +    _VALID_URL = r'https?://bambuser\.com/v/(?P<id>\d+)' +    _API_KEY = '005f64509e19a868399060af746a00aa' + +    _TEST = { +        u'url': u'http://bambuser.com/v/4050584', +        u'md5': u'fba8f7693e48fd4e8641b3fd5539a641', +        u'info_dict': { +            u'id': u'4050584', +            u'ext': u'flv', +            u'title': u'Education engineering days - lightning talks', +            u'duration': 3741, +            u'uploader': u'pixelversity', +            u'uploader_id': u'344706', +        }, +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') +        info_url = ('http://player-c.api.bambuser.com/getVideo.json?' +            '&api_key=%s&vid=%s' % (self._API_KEY, video_id)) +        info_json = self._download_webpage(info_url, video_id) +        info = json.loads(info_json)['result'] + +        return { +            'id': video_id, +            'title': info['title'], +            'url': info['url'], +            'thumbnail': info.get('preview'), +            'duration': int(info['length']), +            'view_count': int(info['views_total']), +            'uploader': info['username'], +            'uploader_id': info['uid'], +        } + + +class BambuserChannelIE(InfoExtractor): +    IE_NAME = u'bambuser:channel' +    _VALID_URL = r'http://bambuser.com/channel/(?P<user>.*?)(?:/|#|\?|$)' +    # The maximum number we can get with each request +    _STEP = 50 + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        user = mobj.group('user') +        urls = [] +        last_id = '' +        for i in itertools.count(1): +            req_url = ('http://bambuser.com/xhr-api/index.php?username={user}' +                '&sort=created&access_mode=0%2C1%2C2&limit={count}' +                '&method=broadcast&format=json&vid_older_than={last}' +                ).format(user=user, count=self._STEP, last=last_id) +            req = compat_urllib_request.Request(req_url) +            # Without setting this header, we wouldn't get any result +            req.add_header('Referer', 'http://bambuser.com/channel/%s' % user) +            info_json = self._download_webpage(req, user, +                u'Downloading page %d' % i) +            results = json.loads(info_json)['result'] +            if len(results) == 0: +                break +            last_id = results[-1]['vid'] +            urls.extend(self.url_result(v['page'], 'Bambuser') for v in results) + +        return { +            '_type': 'playlist', +            'title': user, +            'entries': urls, +        } diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 1392f382a..0d9b87a34 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -23,7 +23,7 @@ class BrightcoveIE(InfoExtractor):              # From http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/              u'url': u'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1654948606001&flashID=myExperience&%40videoPlayer=2371591881001',              u'file': u'2371591881001.mp4', -            u'md5': u'9e80619e0a94663f0bdc849b4566af19', +            u'md5': u'8eccab865181d29ec2958f32a6a754f5',              u'note': u'Test Brightcove downloads and detection in GenericIE',              u'info_dict': {                  u'title': u'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”', @@ -122,12 +122,10 @@ class BrightcoveIE(InfoExtractor):              best_format = renditions[-1]              info.update({                  'url': best_format['defaultURL'], -                'ext': 'mp4',              })          elif video_info.get('FLVFullLengthURL') is not None:              info.update({                  'url': video_info['FLVFullLengthURL'], -                'ext': 'flv',              })          else:              raise ExtractorError(u'Unable to extract video url for %s' % info['id']) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index ce349fe20..e0ccba533 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -63,7 +63,7 @@ class InfoExtractor(object):                      * ext       Will be calculated from url if missing                      * format    A human-readable description of the format                                  ("mp4 container with h264/opus"). -                                Calculated from the format_id, width, height  +                                Calculated from the format_id, width, height.                                  and format_note fields if missing.                      * format_id A short description of the format                                  ("mp4_h264_opus" or "19") @@ -71,6 +71,9 @@ class InfoExtractor(object):                                  ("3D" or "DASH video")                      * width     Width of the video, if known                      * height    Height of the video, if known +    webpage_url:    The url to the video webpage, if given to youtube-dl it +                    should allow to get the same result again. (It will be set +                    by YoutubeDL if it's missing)      Unless mentioned otherwise, the fields should be Unicode strings. diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 4c0488245..355b4ed0a 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -21,6 +21,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):          """Build a request with the family filter disabled"""          request = compat_urllib_request.Request(url)          request.add_header('Cookie', 'family_filter=off') +        request.add_header('Cookie', 'ff=off')          return request  class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): @@ -61,6 +62,18 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):              },              u'skip': u'VEVO is only available in some countries',          }, +        # age-restricted video +        { +            u'url': u'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband', +            u'file': u'xyh2zz.mp4', +            u'md5': u'0d667a7b9cebecc3c89ee93099c4159d', +            u'info_dict': { +                u'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]', +                u'uploader': 'HotWaves1012', +                u'age_limit': 18, +            } + +        }      ]      def _real_extract(self, url): @@ -90,7 +103,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):          video_uploader = self._search_regex([r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>',                                               # Looking for official user                                               r'<(?:span|a) .*?rel="author".*?>([^<]+?)</'], -                                            webpage, 'video uploader') +                                            webpage, 'video uploader', fatal=False) +        age_limit = self._rta_search(webpage)          video_upload_date = None          mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage) @@ -132,15 +146,16 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):              self._list_available_subtitles(video_id)              return -        return [{ +        return {              'id':       video_id,              'formats': formats,              'uploader': video_uploader,              'upload_date':  video_upload_date,              'title':    self._og_search_title(webpage),              'subtitles':    video_subtitles, -            'thumbnail': info['thumbnail_url'] -        }] +            'thumbnail': info['thumbnail_url'], +            'age_limit': age_limit, +        }      def _get_available_subtitles(self, video_id):          try: diff --git a/youtube_dl/extractor/exfm.py b/youtube_dl/extractor/exfm.py index c74556579..a51d79b08 100644 --- a/youtube_dl/extractor/exfm.py +++ b/youtube_dl/extractor/exfm.py @@ -21,6 +21,7 @@ class ExfmIE(InfoExtractor):                  u'description': u'Test House \"Love Is Not Enough\" (Extended Mix) DeadJournalist Exclusive',              },              u'note': u'Soundcloud song', +            u'skip': u'The site is down too often',          },          {              u'url': u'http://ex.fm/song/wddt8', @@ -30,6 +31,7 @@ class ExfmIE(InfoExtractor):                  u'title': u'Safe and Sound',                  u'uploader': u'Capital Cities',              }, +            u'skip': u'The site is down too often',          },      ] diff --git a/youtube_dl/extractor/extremetube.py b/youtube_dl/extractor/extremetube.py new file mode 100644 index 000000000..0f1eec40f --- /dev/null +++ b/youtube_dl/extractor/extremetube.py @@ -0,0 +1,50 @@ +import os +import re + +from .common import InfoExtractor +from ..utils import ( +    compat_urllib_parse_urlparse, +    compat_urllib_request, +    compat_urllib_parse, +) + +class ExtremeTubeIE(InfoExtractor): +    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>extremetube\.com/video/.+?(?P<videoid>[0-9]+))(?:[/?&]|$)' +    _TEST = { +        u'url': u'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431', +        u'file': u'652431.mp4', +        u'md5': u'1fb9228f5e3332ec8c057d6ac36f33e0', +        u'info_dict': { +            u"title": u"Music Video 14 british euro brit european cumshots swallow", +            u"uploader": u"unknown", +            u"age_limit": 18, +        } +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('videoid') +        url = 'http://www.' + mobj.group('url') + +        req = compat_urllib_request.Request(url) +        req.add_header('Cookie', 'age_verified=1') +        webpage = self._download_webpage(req, video_id) + +        video_title = self._html_search_regex(r'<h1 [^>]*?title="([^"]+)"[^>]*>\1<', webpage, u'title') +        uploader = self._html_search_regex(r'>Posted by:(?=<)(?:\s|<[^>]*>)*(.+?)\|', webpage, u'uploader', fatal=False) +        video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&', webpage, u'video_url')) +        path = compat_urllib_parse_urlparse( video_url ).path +        extension = os.path.splitext( path )[1][1:] +        format = path.split('/')[5].split('_')[:2] +        format = "-".join( format ) + +        return { +            'id': video_id, +            'title': video_title, +            'uploader': uploader, +            'url': video_url, +            'ext': extension, +            'format': format, +            'format_id': format, +            'age_limit': 18, +        } diff --git a/youtube_dl/extractor/keezmovies.py b/youtube_dl/extractor/keezmovies.py index 5e05900da..786924445 100644 --- a/youtube_dl/extractor/keezmovies.py +++ b/youtube_dl/extractor/keezmovies.py @@ -12,7 +12,7 @@ from ..aes import (  )  class KeezMoviesIE(InfoExtractor): -    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>keezmovies\.com/video/.+?(?P<videoid>[0-9]+))' +    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>keezmovies\.com/video/.+?(?P<videoid>[0-9]+))(?:[/?&]|$)'      _TEST = {          u'url': u'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711',          u'file': u'1214711.mp4', diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py index d04da98c8..4531fd6ab 100644 --- a/youtube_dl/extractor/livestream.py +++ b/youtube_dl/extractor/livestream.py @@ -40,13 +40,9 @@ class LivestreamIE(InfoExtractor):          if video_id is None:              # This is an event page: -            player = get_meta_content('twitter:player', webpage) -            if player is None: -                raise ExtractorError('Couldn\'t extract event api url') -            api_url = player.replace('/player', '') -            api_url = re.sub(r'^(https?://)(new\.)', r'\1api.\2', api_url) -            info = json.loads(self._download_webpage(api_url, event_name, -                                                     u'Downloading event info')) +            config_json = self._search_regex(r'window.config = ({.*?});', +                webpage, u'window config') +            info = json.loads(config_json)['event']              videos = [self._extract_video_info(video_data['data'])                  for video_data in info['feed']['data'] if video_data['type'] == u'video']              return self.playlist_result(videos, info['id'], info['full_name']) diff --git a/youtube_dl/extractor/metacafe.py b/youtube_dl/extractor/metacafe.py index 234b9e80f..91480ba87 100644 --- a/youtube_dl/extractor/metacafe.py +++ b/youtube_dl/extractor/metacafe.py @@ -20,7 +20,9 @@ class MetacafeIE(InfoExtractor):      _DISCLAIMER = 'http://www.metacafe.com/family_filter/'      _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'      IE_NAME = u'metacafe' -    _TESTS = [{ +    _TESTS = [ +    # Youtube video +    {          u"add_ie": ["Youtube"],          u"url":  u"http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/",          u"file":  u"_aUehQsCQtM.mp4", @@ -32,15 +34,42 @@ class MetacafeIE(InfoExtractor):              u"uploader_id": u"PBS"          }      }, +    # Normal metacafe video +    { +        u'url': u'http://www.metacafe.com/watch/11121940/news_stuff_you_wont_do_with_your_playstation_4/', +        u'md5': u'6e0bca200eaad2552e6915ed6fd4d9ad', +        u'info_dict': { +            u'id': u'11121940', +            u'ext': u'mp4', +            u'title': u'News: Stuff You Won\'t Do with Your PlayStation 4', +            u'uploader': u'ign', +            u'description': u'Sony released a massive FAQ on the PlayStation Blog detailing the PS4\'s capabilities and limitations.', +        }, +    }, +    # AnyClip video      {          u"url": u"http://www.metacafe.com/watch/an-dVVXnuY7Jh77J/the_andromeda_strain_1971_stop_the_bomb_part_3/",          u"file": u"an-dVVXnuY7Jh77J.mp4",          u"info_dict": {              u"title": u"The Andromeda Strain (1971): Stop the Bomb Part 3",              u"uploader": u"anyclip", -            u"description": u"md5:38c711dd98f5bb87acf973d573442e67" -        } -    }] +            u"description": u"md5:38c711dd98f5bb87acf973d573442e67", +        }, +    }, +    # age-restricted video +    { +        u'url': u'http://www.metacafe.com/watch/5186653/bbc_internal_christmas_tape_79_uncensored_outtakes_etc/', +        u'md5': u'98dde7c1a35d02178e8ab7560fe8bd09', +        u'info_dict': { +            u'id': u'5186653', +            u'ext': u'mp4', +            u'title': u'BBC INTERNAL Christmas Tape \'79 - UNCENSORED Outtakes, Etc.', +            u'uploader': u'Dwayne Pipe', +            u'description': u'md5:950bf4c581e2c059911fa3ffbe377e4b', +            u'age_limit': 18, +        }, +    }, +    ]      def report_disclaimer(self): @@ -62,6 +91,7 @@ class MetacafeIE(InfoExtractor):              'submit': "Continue - I'm over 18",              }          request = compat_urllib_request.Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form)) +        request.add_header('Content-Type', 'application/x-www-form-urlencoded')          try:              self.report_age_confirmation()              compat_urllib_request.urlopen(request).read() @@ -83,7 +113,12 @@ class MetacafeIE(InfoExtractor):          # Retrieve video webpage to extract further information          req = compat_urllib_request.Request('http://www.metacafe.com/watch/%s/' % video_id) -        req.headers['Cookie'] = 'flashVersion=0;' + +        # AnyClip videos require the flashversion cookie so that we get the link +        # to the mp4 file +        mobj_an = re.match(r'^an-(.*?)$', video_id) +        if mobj_an: +            req.headers['Cookie'] = 'flashVersion=0;'          webpage = self._download_webpage(req, video_id)          # Extract URL, uploader and title from webpage @@ -125,6 +160,11 @@ class MetacafeIE(InfoExtractor):                  r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',                  webpage, u'uploader nickname', fatal=False) +        if re.search(r'"contentRating":"restricted"', webpage) is not None: +            age_limit = 18 +        else: +            age_limit = 0 +          return {              '_type':    'video',              'id':       video_id, @@ -134,4 +174,5 @@ class MetacafeIE(InfoExtractor):              'upload_date':  None,              'title':    video_title,              'ext':      video_ext, +            'age_limit': age_limit,          } diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index e520e2bb4..e96d3952c 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -80,6 +80,8 @@ class MTVIE(InfoExtractor):          video_id = self._id_from_uri(uri)          self.report_extraction(video_id)          mediagen_url = itemdoc.find('%s/%s' % (_media_xml_tag('group'), _media_xml_tag('content'))).attrib['url'] +        # Remove the templates, like &device={device} +        mediagen_url = re.sub(r'&[^=]*?={.*?}(?=(&|$))', u'', mediagen_url)          if 'acceptMethods' not in mediagen_url:              mediagen_url += '&acceptMethods=fms'          mediagen_page = self._download_webpage(mediagen_url, video_id, diff --git a/youtube_dl/extractor/myspace.py b/youtube_dl/extractor/myspace.py new file mode 100644 index 000000000..050f54a5a --- /dev/null +++ b/youtube_dl/extractor/myspace.py @@ -0,0 +1,48 @@ +import re +import json + +from .common import InfoExtractor +from ..utils import ( +    compat_str, +) + + +class MySpaceIE(InfoExtractor): +    _VALID_URL = r'https?://myspace\.com/([^/]+)/video/[^/]+/(?P<id>\d+)' + +    _TEST = { +        u'url': u'https://myspace.com/coldplay/video/viva-la-vida/100008689', +        u'info_dict': { +            u'id': u'100008689', +            u'ext': u'flv', +            u'title': u'Viva La Vida', +            u'description': u'The official Viva La Vida video, directed by Hype Williams', +            u'uploader': u'Coldplay', +            u'uploader_id': u'coldplay', +        }, +        u'params': { +            # rtmp download +            u'skip_download': True, +        }, +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') +        webpage = self._download_webpage(url, video_id) +        context = json.loads(self._search_regex(r'context = ({.*?});', webpage, +            u'context')) +        video = context['video'] +        rtmp_url, play_path = video['streamUrl'].split(';', 1) + +        return { +            'id': compat_str(video['mediaId']), +            'title': video['title'], +            'url': rtmp_url, +            'play_path': play_path, +            'ext': 'flv', +            'description': video['description'], +            'thumbnail': video['imageUrl'], +            'uploader': video['artistName'], +            'uploader_id': video['artistUsername'], +        } diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index 1c1cc418d..3f6020f74 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -5,7 +5,7 @@ import datetime  from .common import InfoExtractor  from ..utils import ( -    determine_ext, +    compat_HTTPError,      ExtractorError,  ) @@ -16,26 +16,22 @@ class VevoIE(InfoExtractor):      (currently used by MTVIE)      """      _VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*?)(\?|$)' -    _TEST = { +    _TESTS = [{          u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',          u'file': u'GB1101300280.mp4', +        u"md5": u"06bea460acb744eab74a9d7dcb4bfd61",          u'info_dict': {              u"upload_date": u"20130624",              u"uploader": u"Hurts",              u"title": u"Somebody to Die For", -            u'duration': 230, +            u"duration": 230, +            u"width": 1920, +            u"height": 1080,          } -    } +    }] +    _SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/' -    def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') - -        json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id -        info_json = self._download_webpage(json_url, video_id, u'Downloading json info') - -        self.report_extraction(video_id) -        video_info = json.loads(info_json)['video'] +    def _formats_from_json(self, video_info):          last_version = {'version': -1}          for version in video_info['videoVersions']:              # These are the HTTP downloads, other types are for different manifests @@ -50,17 +46,74 @@ class VevoIE(InfoExtractor):          # Already sorted from worst to best quality          for rend in renditions.findall('rendition'):              attr = rend.attrib -            f_url = attr['url'] +            format_note = '%(videoCodec)s@%(videoBitrate)4sk, %(audioCodec)s@%(audioBitrate)3sk' % attr              formats.append({ -                'url': f_url, -                'ext': determine_ext(f_url), +                'url': attr['url'], +                'format_id': attr['name'], +                'format_note': format_note,                  'height': int(attr['frameheight']),                  'width': int(attr['frameWidth']),              }) +        return formats + +    def _formats_from_smil(self, smil_xml): +        formats = [] +        smil_doc = xml.etree.ElementTree.fromstring(smil_xml.encode('utf-8')) +        els = smil_doc.findall('.//{http://www.w3.org/2001/SMIL20/Language}video') +        for el in els: +            src = el.attrib['src'] +            m = re.match(r'''(?xi) +                (?P<ext>[a-z0-9]+): +                (?P<path> +                    [/a-z0-9]+     # The directory and main part of the URL +                    _(?P<cbr>[0-9]+)k +                    _(?P<width>[0-9]+)x(?P<height>[0-9]+) +                    _(?P<vcodec>[a-z0-9]+) +                    _(?P<vbr>[0-9]+) +                    _(?P<acodec>[a-z0-9]+) +                    _(?P<abr>[0-9]+) +                    \.[a-z0-9]+  # File extension +                )''', src) +            if not m: +                continue -        date_epoch = int(self._search_regex( -            r'/Date\((\d+)\)/', video_info['launchDate'], u'launch date'))/1000 -        upload_date = datetime.datetime.fromtimestamp(date_epoch) +            format_url = self._SMIL_BASE_URL + m.group('path') +            format_note = ('%(vcodec)s@%(vbr)4sk, %(acodec)s@%(abr)3sk' % +                           m.groupdict()) +            formats.append({ +                'url': format_url, +                'format_id': u'SMIL_' + m.group('cbr'), +                'format_note': format_note, +                'ext': m.group('ext'), +                'width': int(m.group('width')), +                'height': int(m.group('height')), +            }) +        return formats + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') + +        json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id +        info_json = self._download_webpage(json_url, video_id, u'Downloading json info') +        video_info = json.loads(info_json)['video'] + +        formats = self._formats_from_json(video_info) +        try: +            smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % ( +                self._SMIL_BASE_URL, video_id, video_id.lower()) +            smil_xml = self._download_webpage(smil_url, video_id, +                                              u'Downloading SMIL info') +            formats.extend(self._formats_from_smil(smil_xml)) +        except ExtractorError as ee: +            if not isinstance(ee.cause, compat_HTTPError): +                raise +            self._downloader.report_warning( +                u'Cannot download SMIL information, falling back to JSON ..') + +        timestamp_ms = int(self._search_regex( +            r'/Date\((\d+)\)/', video_info['launchDate'], u'launch date')) +        upload_date = datetime.datetime.fromtimestamp(timestamp_ms // 1000)          info = {              'id': video_id,              'title': video_info['title'], @@ -71,7 +124,4 @@ class VevoIE(InfoExtractor):              'duration': video_info['duration'],          } -        # TODO: Remove when #980 has been merged -        info.update(formats[-1]) -          return info diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index b4dbcd2ee..62273fd33 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -20,14 +20,14 @@ class VimeoIE(InfoExtractor):      """Information extractor for vimeo.com."""      # _VALID_URL matches Vimeo URLs -    _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?(?:#.*)?$' +    _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|(?P<player>player))\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?(?:#.*)?$'      _NETRC_MACHINE = 'vimeo'      IE_NAME = u'vimeo'      _TESTS = [          {              u'url': u'http://vimeo.com/56015672#at=0',              u'file': u'56015672.mp4', -            u'md5': u'ae7a1d8b183758a0506b0622f37dfa14', +            u'md5': u'8879b6cc097e987f02484baf890129e5',              u'info_dict': {                  u"upload_date": u"20121220",                   u"description": u"This is a test case for youtube-dl.\nFor more information, see github.com/rg3/youtube-dl\nTest chars: \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",  @@ -128,11 +128,9 @@ class VimeoIE(InfoExtractor):              raise ExtractorError(u'Invalid URL: %s' % url)          video_id = mobj.group('id') -        if not mobj.group('proto'): -            url = 'https://' + url -        elif mobj.group('pro'): +        if mobj.group('pro') or mobj.group('player'):              url = 'http://player.vimeo.com/video/' + video_id -        elif mobj.group('direct_link'): +        else:              url = 'https://vimeo.com/' + video_id          # Retrieve video webpage to extract further information @@ -234,7 +232,7 @@ class VimeoIE(InfoExtractor):          if len(formats) == 0:              raise ExtractorError(u'No known codec found') -        return [{ +        return {              'id':       video_id,              'uploader': video_uploader,              'uploader_id': video_uploader_id, @@ -243,7 +241,8 @@ class VimeoIE(InfoExtractor):              'thumbnail':    video_thumbnail,              'description':  video_description,              'formats': formats, -        }] +            'webpage_url': url, +        }  class VimeoChannelIE(InfoExtractor): diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py new file mode 100644 index 000000000..90d8a6d07 --- /dev/null +++ b/youtube_dl/extractor/vk.py @@ -0,0 +1,45 @@ +# encoding: utf-8 +import re +import json + +from .common import InfoExtractor +from ..utils import ( +    compat_str, +    unescapeHTML, +) + + +class VKIE(InfoExtractor): +    IE_NAME = u'vk.com' +    _VALID_URL = r'https?://vk\.com/(?:videos.*?\?.*?z=)?video(?P<id>.*?)(?:\?|%2F|$)' + +    _TEST = { +        u'url': u'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521', +        u'md5': u'0deae91935c54e00003c2a00646315f0', +        u'info_dict': { +            u'id': u'162222515', +            u'ext': u'flv', +            u'title': u'ProtivoGunz - Хуёвая песня', +            u'uploader': u'Noize MC', +        }, +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') +        info_url = 'http://vk.com/al_video.php?act=show&al=1&video=%s' % video_id +        info_page = self._download_webpage(info_url, video_id) +        m_yt = re.search(r'src="(http://www.youtube.com/.*?)"', info_page) +        if m_yt is not None: +            self.to_screen(u'Youtube video detected') +            return self.url_result(m_yt.group(1), 'Youtube') +        vars_json = self._search_regex(r'var vars = ({.*?});', info_page, u'vars') +        vars = json.loads(vars_json) + +        return { +            'id': compat_str(vars['vid']), +            'url': vars['url240'], +            'title': unescapeHTML(vars['md_title']), +            'thumbnail': vars['jpg'], +            'uploader': vars['md_author'], +        } diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py new file mode 100644 index 000000000..483fb0791 --- /dev/null +++ b/youtube_dl/extractor/xtube.py @@ -0,0 +1,55 @@ +import os +import re + +from .common import InfoExtractor +from ..utils import ( +    compat_urllib_parse_urlparse, +    compat_urllib_request, +    compat_urllib_parse, +) + +class XTubeIE(InfoExtractor): +    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>xtube\.com/watch\.php\?v=(?P<videoid>[^/?&]+))' +    _TEST = { +        u'url': u'http://www.xtube.com/watch.php?v=kVTUy_G222_', +        u'file': u'kVTUy_G222_.mp4', +        u'md5': u'092fbdd3cbe292c920ef6fc6a8a9cdab', +        u'info_dict': { +            u"title": u"strange erotica", +            u"description": u"surreal gay themed erotica...almost an ET kind of thing", +            u"uploader": u"greenshowers", +            u"age_limit": 18, +        } +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('videoid') +        url = 'http://www.' + mobj.group('url') + +        req = compat_urllib_request.Request(url) +        req.add_header('Cookie', 'age_verified=1') +        webpage = self._download_webpage(req, video_id) + +        video_title = self._html_search_regex(r'<div class="p_5px[^>]*>([^<]+)', webpage, u'title') +        video_uploader = self._html_search_regex(r'so_s\.addVariable\("owner_u", "([^"]+)', webpage, u'uploader', fatal=False) +        video_description = self._html_search_regex(r'<p class="video_description">([^<]+)', webpage, u'description', default=None) +        video_url= self._html_search_regex(r'var videoMp4 = "([^"]+)', webpage, u'video_url').replace('\\/', '/') +        path = compat_urllib_parse_urlparse( video_url ).path +        extension = os.path.splitext( path )[1][1:] +        format = path.split('/')[5].split('_')[:2] +        format[0] += 'p' +        format[1] += 'k' +        format = "-".join( format ) + +        return { +            'id': video_id, +            'title': video_title, +            'uploader': video_uploader, +            'description': video_description, +            'url': video_url, +            'ext': extension, +            'format': format, +            'format_id': format, +            'age_limit': 18, +        } diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index d05d0a8c1..6ddd6ef06 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -74,14 +74,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):              self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))              return False -        galx = None -        dsh = None -        match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page) -        if match: -          galx = match.group(1) -        match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page) -        if match: -          dsh = match.group(1) +        galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"', +                                  login_page, u'Login GALX parameter')          # Log in          login_form_strs = { @@ -95,7 +89,6 @@ class YoutubeBaseInfoExtractor(InfoExtractor):                  u'checkConnection': u'',                  u'checkedDomains': u'youtube',                  u'dnConn': u'', -                u'dsh': dsh,                  u'pstMsg': u'0',                  u'rmShown': u'1',                  u'secTok': u'', @@ -347,18 +340,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):              }          },          { -            u"url":  u"http://www.youtube.com/watch?v=1ltcDfZMA3U", -            u"file":  u"1ltcDfZMA3U.mp4", -            u"note": u"Test VEVO video (#897)", -            u"info_dict": { -                u"upload_date": u"20070518", -                u"title": u"Maps - It Will Find You", -                u"description": u"Music video by Maps performing It Will Find You.", -                u"uploader": u"MuteUSA", -                u"uploader_id": u"MuteUSA" -            } -        }, -        {              u"url":  u"http://www.youtube.com/watch?v=UxxajLWwzqY",              u"file":  u"UxxajLWwzqY.mp4",              u"note": u"Test generic use_cipher_signature video (#897)", @@ -1118,7 +1099,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):                  'lang': lang,                  'v': video_id,                  'fmt': self._downloader.params.get('subtitlesformat'), -                'name': l[0], +                'name': l[0].encode('utf-8'),              })              url = u'http://www.youtube.com/api/timedtext?' + params              sub_lang_list[lang] = url @@ -1504,7 +1485,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):                  'subtitles':    video_subtitles,                  'duration':     video_duration,                  'age_limit':    18 if age_gate else 0, -                'annotations':  video_annotations +                'annotations':  video_annotations, +                'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,              })          return results diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 048afc8e7..75a46a2d5 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.10.28' +__version__ = '2013.11.02' | 
