diff options
30 files changed, 469 insertions, 344 deletions
diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 5203a0273..01de10e31 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -8,6 +8,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))  from test.helper import FakeYDL  from youtube_dl import YoutubeDL +from youtube_dl.extractor import YoutubeIE  class YDL(FakeYDL): @@ -33,6 +34,8 @@ class TestFormatSelection(unittest.TestCase):              {u'ext': u'mp4',  u'height': 460},          ]          info_dict = {u'formats': formats, u'extractor': u'test'} +        yie = YoutubeIE(ydl) +        yie._sort_formats(info_dict['formats'])          ydl.process_ie_result(info_dict)          downloaded = ydl.downloaded_info_dicts[0]          self.assertEqual(downloaded[u'ext'], u'webm') @@ -45,28 +48,46 @@ class TestFormatSelection(unittest.TestCase):              {u'ext': u'mp4', u'height': 1080},          ]          info_dict[u'formats'] = formats +        yie = YoutubeIE(ydl) +        yie._sort_formats(info_dict['formats'])          ydl.process_ie_result(info_dict)          downloaded = ydl.downloaded_info_dicts[0]          self.assertEqual(downloaded[u'ext'], u'mp4') -        # No prefer_free_formats => keep original formats order +        # No prefer_free_formats => prefer mp4 and flv for greater compatibilty          ydl = YDL()          ydl.params['prefer_free_formats'] = False          formats = [              {u'ext': u'webm', u'height': 720}, +            {u'ext': u'mp4', u'height': 720},              {u'ext': u'flv', u'height': 720},          ]          info_dict[u'formats'] = formats +        yie = YoutubeIE(ydl) +        yie._sort_formats(info_dict['formats']) +        ydl.process_ie_result(info_dict) +        downloaded = ydl.downloaded_info_dicts[0] +        self.assertEqual(downloaded[u'ext'], u'mp4') + +        ydl = YDL() +        ydl.params['prefer_free_formats'] = False +        formats = [ +            {u'ext': u'flv', u'height': 720}, +            {u'ext': u'webm', u'height': 720}, +        ] +        info_dict[u'formats'] = formats +        yie = YoutubeIE(ydl) +        yie._sort_formats(info_dict['formats'])          ydl.process_ie_result(info_dict)          downloaded = ydl.downloaded_info_dicts[0]          self.assertEqual(downloaded[u'ext'], u'flv')      def test_format_limit(self):          formats = [ -            {u'format_id': u'meh', u'url': u'http://example.com/meh'}, -            {u'format_id': u'good', u'url': u'http://example.com/good'}, -            {u'format_id': u'great', u'url': u'http://example.com/great'}, -            {u'format_id': u'excellent', u'url': u'http://example.com/exc'}, +            {u'format_id': u'meh', u'url': u'http://example.com/meh', 'preference': 1}, +            {u'format_id': u'good', u'url': u'http://example.com/good', 'preference': 2}, +            {u'format_id': u'great', u'url': u'http://example.com/great', 'preference': 3}, +            {u'format_id': u'excellent', u'url': u'http://example.com/exc', 'preference': 4},          ]          info_dict = {              u'formats': formats, u'extractor': u'test', 'id': 'testvid'} @@ -97,10 +118,10 @@ class TestFormatSelection(unittest.TestCase):      def test_format_selection(self):          formats = [ -            {u'format_id': u'35', u'ext': u'mp4'}, -            {u'format_id': u'45', u'ext': u'webm'}, -            {u'format_id': u'47', u'ext': u'webm'}, -            {u'format_id': u'2', u'ext': u'flv'}, +            {u'format_id': u'35', u'ext': u'mp4', 'preference': 1}, +            {u'format_id': u'45', u'ext': u'webm', 'preference': 2}, +            {u'format_id': u'47', u'ext': u'webm', 'preference': 3}, +            {u'format_id': u'2', u'ext': u'flv', 'preference': 4},          ]          info_dict = {u'formats': formats, u'extractor': u'test'} @@ -129,6 +150,42 @@ class TestFormatSelection(unittest.TestCase):          downloaded = ydl.downloaded_info_dicts[0]          self.assertEqual(downloaded['format_id'], u'35') +    def test_youtube_format_selection(self): +        order = [ +            '38', '37', '46', '22', '45', '35', '44', '18', '34', '43', '6', '5', '36', '17', '13', +            # Apple HTTP Live Streaming +            '96', '95', '94', '93', '92', '132', '151', +            # 3D +            '85', '84', '102', '83', '101', '82', '100', +            # Dash video +            '138', '137', '248', '136', '247', '135', '246', +            '245', '244', '134', '243', '133', '242', '160', +            # Dash audio +            '141', '172', '140', '139', '171', +        ] + +        for f1id, f2id in zip(order, order[1:]): +            f1 = YoutubeIE._formats[f1id].copy() +            f1['format_id'] = f1id +            f2 = YoutubeIE._formats[f2id].copy() +            f2['format_id'] = f2id + +            info_dict = {'formats': [f1, f2], 'extractor': 'youtube'} +            ydl = YDL() +            yie = YoutubeIE(ydl) +            yie._sort_formats(info_dict['formats']) +            ydl.process_ie_result(info_dict) +            downloaded = ydl.downloaded_info_dicts[0] +            self.assertEqual(downloaded['format_id'], f1id) + +            info_dict = {'formats': [f2, f1], 'extractor': 'youtube'} +            ydl = YDL() +            yie = YoutubeIE(ydl) +            yie._sort_formats(info_dict['formats']) +            ydl.process_ie_result(info_dict) +            downloaded = ydl.downloaded_info_dicts[0] +            self.assertEqual(downloaded['format_id'], f1id) +      def test_add_extra_info(self):          test_dict = {              'extractor': 'Foo', diff --git a/test/test_playlists.py b/test/test_playlists.py index 81352de6b..9d522b357 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -29,7 +29,7 @@ from youtube_dl.extractor import (      SmotriCommunityIE,      SmotriUserIE,      IviCompilationIE, -    ImdbListIE +    ImdbListIE,  ) diff --git a/test/test_utils.py b/test/test_utils.py index e5778cd83..bee355ee0 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -18,6 +18,7 @@ from youtube_dl.utils import (      find_xpath_attr,      get_meta_content,      orderedSet, +    parse_duration,      sanitize_filename,      shell_quote,      smuggle_url, @@ -192,5 +193,12 @@ class TestUtil(unittest.TestCase):              url_basename(u'http://media.w3.org/2010/05/sintel/trailer.mp4'),              u'trailer.mp4') +    def test_parse_duration(self): +        self.assertEqual(parse_duration(None), None) +        self.assertEqual(parse_duration('1'), 1) +        self.assertEqual(parse_duration('1337:12'), 80232) +        self.assertEqual(parse_duration('9:12:43'), 33163) +        self.assertEqual(parse_duration('x:y'), None) +  if __name__ == '__main__':      unittest.main() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 41a9114ad..0a4dbbbd9 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -164,6 +164,8 @@ class YoutubeDL(object):      def __init__(self, params=None):          """Create a FileDownloader object with the given options.""" +        if params is None: +            params = {}          self._ies = []          self._ies_instances = {}          self._pps = [] @@ -172,7 +174,7 @@ class YoutubeDL(object):          self._num_downloads = 0          self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]          self._err_file = sys.stderr -        self.params = {} if params is None else params +        self.params = params          if params.get('bidi_workaround', False):              try: @@ -676,17 +678,8 @@ class YoutubeDL(object):              formats = list(takewhile_inclusive(                  lambda f: f['format_id'] != format_limit, formats              )) -        if self.params.get('prefer_free_formats'): -            def _free_formats_key(f): -                try: -                    ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext']) -                except ValueError: -                    ext_ord = -1 -                # We only compare the extension if they have the same height and width -                return (f.get('height') if f.get('height') is not None else -1, -                        f.get('width') if f.get('width') is not None else -1, -                        ext_ord) -            formats = sorted(formats, key=_free_formats_key) + +        # TODO Central sorting goes here          if formats[0] is not info_dict:               # only set the 'formats' fields if the original info_dict list them @@ -1007,13 +1000,15 @@ class YoutubeDL(object):      def format_resolution(format, default='unknown'):          if format.get('vcodec') == 'none':              return 'audio only' -        if format.get('_resolution') is not None: -            return format['_resolution'] +        if format.get('resolution') is not None: +            return format['resolution']          if format.get('height') is not None:              if format.get('width') is not None:                  res = u'%sx%s' % (format['width'], format['height'])              else:                  res = u'%sp' % format['height'] +        elif format.get('width') is not None: +            res = u'?x%d' % format['width']          else:              res = default          return res @@ -1021,15 +1016,19 @@ class YoutubeDL(object):      def list_formats(self, info_dict):          def format_note(fdict):              res = u'' +            if fdict.get('ext') in ['f4f', 'f4m']: +                res += u'(unsupported) '              if fdict.get('format_note') is not None:                  res += fdict['format_note'] + u' ' +            if fdict.get('tbr') is not None: +                res += u'%4dk ' % fdict['tbr']              if (fdict.get('vcodec') is not None and                      fdict.get('vcodec') != 'none'): -                res += u'%-5s' % fdict['vcodec'] -            elif fdict.get('vbr') is not None: -                res += u'video' +                res += u'%-5s@' % fdict['vcodec'] +            elif fdict.get('vbr') is not None and fdict.get('abr') is not None: +                res += u'video@'              if fdict.get('vbr') is not None: -                res += u'@%4dk' % fdict['vbr'] +                res += u'%4dk' % fdict['vbr']              if fdict.get('acodec') is not None:                  if res:                      res += u', ' @@ -1064,7 +1063,7 @@ class YoutubeDL(object):          header_line = line({              'format_id': u'format code', 'ext': u'extension', -            '_resolution': u'resolution', 'format_note': u'note'}, idlen=idlen) +            'resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)          self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %                         (info_dict['id'], header_line, u"\n".join(formats_s))) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index c37d28c59..657e3fd07 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -44,6 +44,7 @@ __license__ = 'Public Domain'  import codecs  import getpass +import locale  import optparse  import os  import random @@ -473,6 +474,8 @@ def parseOpts(overrideArguments=None):              write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')              write_string(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n')              write_string(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n') +            write_string(u'[debug] Encodings: locale %r, fs %r, out %r, pref: %r\n' % +                         (locale.getpreferredencoding(), sys.getfilesystemencoding(), sys.stdout.encoding, preferredencoding()))      return parser, opts, args diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 14b88efd3..8407727ba 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -133,7 +133,7 @@ class HttpFD(FileDownloader):                      return False              try:                  stream.write(data_block) -            except (IOError, OSError): +            except (IOError, OSError) as err:                  self.to_stderr(u"\n")                  self.report_error(u'unable to write data: %s' % str(err))                  return False diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index c96212760..5319a9292 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -28,6 +28,7 @@ from .channel9 import Channel9IE  from .cinemassacre import CinemassacreIE  from .clipfish import ClipfishIE  from .clipsyndicate import ClipsyndicateIE +from .cmt import CMTIE  from .cnn import CNNIE  from .collegehumor import CollegeHumorIE  from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py index ef5644aa5..e7361ae06 100644 --- a/youtube_dl/extractor/appletrailers.py +++ b/youtube_dl/extractor/appletrailers.py @@ -110,7 +110,8 @@ class AppleTrailersIE(InfoExtractor):                      'width': format['width'],                      'height': int(format['height']),                  }) -            formats = sorted(formats, key=lambda f: (f['height'], f['width'])) + +            self._sort_formats(formats)              playlist.append({                  '_type': 'video', diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 3a32c14c5..15aee2786 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -10,14 +10,14 @@ from ..utils import (  class BandcampIE(InfoExtractor): -    IE_NAME = u'Bandcamp'      _VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)'      _TESTS = [{          u'url': u'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',          u'file': u'1812978515.mp3', -        u'md5': u'cdeb30cdae1921719a3cbcab696ef53c', +        u'md5': u'c557841d5e50261777a6585648adf439',          u'info_dict': { -            u"title": u"youtube-dl test song \"'/\\\u00e4\u21ad" +            u"title": u"youtube-dl  \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad", +            u"duration": 10,          },          u'skip': u'There is a limit of 200 free downloads / month for the test song'      }] @@ -30,29 +30,42 @@ class BandcampIE(InfoExtractor):          m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)          if m_download is None:              m_trackinfo = re.search(r'trackinfo: (.+),\s*?\n', webpage) -        if m_trackinfo: -            json_code = m_trackinfo.group(1) -            data = json.loads(json_code) +            if m_trackinfo: +                json_code = m_trackinfo.group(1) +                data = json.loads(json_code) +                d = data[0] + +                duration = int(round(d['duration'])) +                formats = [] +                for format_id, format_url in d['file'].items(): +                    ext, _, abr_str = format_id.partition('-') + +                    formats.append({ +                        'format_id': format_id, +                        'url': format_url, +                        'ext': format_id.partition('-')[0], +                        'vcodec': 'none', +                        'acodec': format_id.partition('-')[0], +                        'abr': int(format_id.partition('-')[2]), +                    }) + +                self._sort_formats(formats) -            for d in data: -                formats = [{ -                    'format_id': 'format_id', -                    'url': format_url, -                    'ext': format_id.partition('-')[0] -                } for format_id, format_url in sorted(d['file'].items())]                  return {                      'id': compat_str(d['id']),                      'title': d['title'],                      'formats': formats, +                    'duration': duration,                  } -        else: -            raise ExtractorError(u'No free songs found') +            else: +                raise ExtractorError(u'No free songs found')          download_link = m_download.group(1) -        id = re.search(r'var TralbumData = {(.*?)id: (?P<id>\d*?)$',  -                       webpage, re.MULTILINE|re.DOTALL).group('id') +        video_id = re.search( +            r'var TralbumData = {(.*?)id: (?P<id>\d*?)$', +            webpage, re.MULTILINE | re.DOTALL).group('id') -        download_webpage = self._download_webpage(download_link, id, +        download_webpage = self._download_webpage(download_link, video_id,                                                    'Downloading free downloads page')          # We get the dictionary of the track from some javascrip code          info = re.search(r'items: (.*?),$', @@ -66,21 +79,21 @@ class BandcampIE(InfoExtractor):          m_url = re.match(re_url, initial_url)          #We build the url we will use to get the final track url          # This url is build in Bandcamp in the script download_bunde_*.js -        request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), id, m_url.group('ts')) +        request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), video_id, m_url.group('ts'))          final_url_webpage = self._download_webpage(request_url, id, 'Requesting download url')          # If we could correctly generate the .rand field the url would be          #in the "download_url" key          final_url = re.search(r'"retry_url":"(.*?)"', final_url_webpage).group(1) -        track_info = {'id':id, -                      'title' : info[u'title'], -                      'ext' :   'mp3', -                      'url' :   final_url, -                      'thumbnail' : info[u'thumb_url'], -                      'uploader' :  info[u'artist'] -                      } - -        return [track_info] +        return { +            'id': video_id, +            'title': info[u'title'], +            'ext': 'mp3', +            'vcodec': 'none', +            'url': final_url, +            'thumbnail': info[u'thumb_url'], +            'uploader': info[u'artist'], +        }  class BandcampAlbumIE(InfoExtractor): @@ -117,7 +130,7 @@ class BandcampAlbumIE(InfoExtractor):          webpage = self._download_webpage(url, title)          tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage)          if not tracks_paths: -            raise ExtractorError(u'The page doesn\'t contain any track') +            raise ExtractorError(u'The page doesn\'t contain any tracks')          entries = [              self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())              for t_path in tracks_paths] diff --git a/youtube_dl/extractor/blinkx.py b/youtube_dl/extractor/blinkx.py index 144ce64cc..0229840a3 100644 --- a/youtube_dl/extractor/blinkx.py +++ b/youtube_dl/extractor/blinkx.py @@ -61,9 +61,10 @@ class BlinkxIE(InfoExtractor):              elif m['type'] in ('flv', 'mp4'):                  vcodec = remove_start(m['vcodec'], 'ff')                  acodec = remove_start(m['acodec'], 'ff') +                tbr = (int(m['vbr']) + int(m['abr'])) // 1000                  format_id = (u'%s-%sk-%s' %                               (vcodec, -                              (int(m['vbr']) + int(m['abr'])) // 1000, +                              tbr,                                m['w']))                  formats.append({                      'format_id': format_id, @@ -72,10 +73,12 @@ class BlinkxIE(InfoExtractor):                      'acodec': acodec,                      'abr': int(m['abr']) // 1000,                      'vbr': int(m['vbr']) // 1000, +                    'tbr': tbr,                      'width': int(m['w']),                      'height': int(m['h']),                  }) -        formats.sort(key=lambda f: (f['width'], f['vbr'], f['abr'])) + +        self._sort_formats(formats)          return {              'id': display_id, diff --git a/youtube_dl/extractor/channel9.py b/youtube_dl/extractor/channel9.py index ae70ea229..574881b70 100644 --- a/youtube_dl/extractor/channel9.py +++ b/youtube_dl/extractor/channel9.py @@ -76,14 +76,18 @@ class Channel9IE(InfoExtractor):              </div>)?                                                # File size part may be missing          '''          # Extract known formats -        formats = [{'url': x.group('url'), -                 'format_id': x.group('quality'), -                 'format_note': x.group('note'), -                 'format': '%s (%s)' % (x.group('quality'), x.group('note')),  -                 'filesize': self._restore_bytes(x.group('filesize')), # File size is approximate -                 } for x in list(re.finditer(FORMAT_REGEX, html)) if x.group('quality') in self._known_formats] -        # Sort according to known formats list -        formats.sort(key=lambda fmt: self._known_formats.index(fmt['format_id'])) +        formats = [{ +            'url': x.group('url'), +            'format_id': x.group('quality'), +            'format_note': x.group('note'), +            'format': u'%s (%s)' % (x.group('quality'), x.group('note')), +            'filesize': self._restore_bytes(x.group('filesize')), # File size is approximate +            'preference': self._known_formats.index(x.group('quality')), +            'vcodec': 'none' if x.group('note') == 'Audio only' else None, +        } for x in list(re.finditer(FORMAT_REGEX, html)) if x.group('quality') in self._known_formats] + +        self._sort_formats(formats) +          return formats      def _extract_title(self, html): diff --git a/youtube_dl/extractor/cmt.py b/youtube_dl/extractor/cmt.py new file mode 100644 index 000000000..88e0e9aba --- /dev/null +++ b/youtube_dl/extractor/cmt.py @@ -0,0 +1,19 @@ +from .mtv import MTVIE + +class CMTIE(MTVIE): +    IE_NAME = u'cmt.com' +    _VALID_URL = r'https?://www\.cmt\.com/videos/.+?/(?P<videoid>[^/]+)\.jhtml' +    _FEED_URL = 'http://www.cmt.com/sitewide/apps/player/embed/rss/' + +    _TESTS = [ +        { +            u'url': u'http://www.cmt.com/videos/garth-brooks/989124/the-call-featuring-trisha-yearwood.jhtml#artist=30061', +            u'md5': u'e6b7ef3c4c45bbfae88061799bbba6c2', +            u'info_dict': { +                u'id': u'989124', +                u'ext': u'mp4', +                u'title': u'Garth Brooks - "The Call (featuring Trisha Yearwood)"', +                u'description': u'Blame It All On My Roots', +            }, +        }, +    ] diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py index a034bb2fb..ecac5e0e9 100644 --- a/youtube_dl/extractor/cnn.py +++ b/youtube_dl/extractor/cnn.py @@ -1,7 +1,10 @@  import re  from .common import InfoExtractor -from ..utils import determine_ext +from ..utils import ( +    int_or_none, +    parse_duration, +)  class CNNIE(InfoExtractor): @@ -15,6 +18,8 @@ class CNNIE(InfoExtractor):          u'info_dict': {              u'title': u'Nadal wins 8th French Open title',              u'description': u'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.', +            u'duration': 135, +            u'upload_date': u'20130609',          },      },      { @@ -35,22 +40,58 @@ class CNNIE(InfoExtractor):          info = self._download_xml(info_url, page_title)          formats = [] +        rex = re.compile(r'''(?x) +            (?P<width>[0-9]+)x(?P<height>[0-9]+) +            (?:_(?P<bitrate>[0-9]+)k)? +        ''')          for f in info.findall('files/file'): -            mf = re.match(r'(\d+)x(\d+)(?:_(.*)k)?',f.attrib['bitrate']) -            if mf is not None: -                formats.append((int(mf.group(1)), int(mf.group(2)), int(mf.group(3) or 0), f.text)) -        formats = sorted(formats) -        (_,_,_, video_path) = formats[-1] -        video_url = 'http://ht.cdn.turner.com/cnn/big%s' % video_path +            video_url = 'http://ht.cdn.turner.com/cnn/big%s' % (f.text.strip()) +            fdct = { +                'format_id': f.attrib['bitrate'], +                'url': video_url, +            } + +            mf = rex.match(f.attrib['bitrate']) +            if mf: +                fdct['width'] = int(mf.group('width')) +                fdct['height'] = int(mf.group('height')) +                fdct['tbr'] = int_or_none(mf.group('bitrate')) +            else: +                mf = rex.search(f.text) +                if mf: +                    fdct['width'] = int(mf.group('width')) +                    fdct['height'] = int(mf.group('height')) +                    fdct['tbr'] = int_or_none(mf.group('bitrate')) +                else: +                    mi = re.match(r'ios_(audio|[0-9]+)$', f.attrib['bitrate']) +                    if mi: +                        if mi.group(1) == 'audio': +                            fdct['vcodec'] = 'none' +                            fdct['ext'] = 'm4a' +                        else: +                            fdct['tbr'] = int(mi.group(1)) + +            formats.append(fdct) + +        self._sort_formats(formats)          thumbnails = sorted([((int(t.attrib['height']),int(t.attrib['width'])), t.text) for t in info.findall('images/image')])          thumbs_dict = [{'resolution': res, 'url': t_url} for (res, t_url) in thumbnails] -        return {'id': info.attrib['id'], -                'title': info.find('headline').text, -                'url': video_url, -                'ext': determine_ext(video_url), -                'thumbnail': thumbnails[-1][1], -                'thumbnails': thumbs_dict, -                'description': info.find('description').text, -                } +        metas_el = info.find('metas') +        upload_date = ( +            metas_el.attrib.get('version') if metas_el is not None else None) + +        duration_el = info.find('length') +        duration = parse_duration(duration_el.text) + +        return { +            'id': info.attrib['id'], +            'title': info.find('headline').text, +            'formats': formats, +            'thumbnail': thumbnails[-1][1], +            'thumbnails': thumbs_dict, +            'description': info.find('description').text, +            'duration': duration, +            'upload_date': upload_date, +        } diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 0dd504444..f34d36cb0 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -9,6 +9,7 @@ import xml.etree.ElementTree  from ..utils import (      compat_http_client,      compat_urllib_error, +    compat_urllib_parse_urlparse,      compat_str,      clean_html, @@ -37,10 +38,12 @@ class InfoExtractor(object):      id:             Video identifier.      title:          Video title, unescaped. -    Additionally, it must contain either a formats entry or url and ext: +    Additionally, it must contain either a formats entry or a url one: -    formats:        A list of dictionaries for each format available, it must -                    be ordered from worst to best quality. Potential fields: +    formats:        A list of dictionaries for each format available, ordered +                    from worst to best quality. + +                    Potential fields:                      * url        Mandatory. The URL of the video file                      * ext        Will be calculated from url if missing                      * format     A human-readable description of the format @@ -48,17 +51,27 @@ class InfoExtractor(object):                                   Calculated from the format_id, width, height.                                   and format_note fields if missing.                      * format_id  A short description of the format -                                 ("mp4_h264_opus" or "19") +                                 ("mp4_h264_opus" or "19"). +                                Technically optional, but strongly recommended.                      * format_note Additional info about the format                                   ("3D" or "DASH video")                      * width      Width of the video, if known                      * height     Height of the video, if known +                    * resolution Textual description of width and height +                    * tbr        Average bitrate of audio and video in KBit/s                      * abr        Average audio bitrate in KBit/s                      * acodec     Name of the audio codec in use                      * vbr        Average video bitrate in KBit/s                      * vcodec     Name of the video codec in use                      * filesize   The number of bytes, if known in advance                      * player_url SWF Player URL (used for rtmpdump). +                    * protocol   The protocol that will be used for the actual +                                 download, lower-case. +                                 "http", "https", "rtsp", "rtmp" or so. +                    * preference Order number of this format. If this field is +                                 present, the formats get sorted by this field. +                                 -1 for default (order by other properties), +                                 -2 or smaller for less than default.      url:            Final video URL.      ext:            Video filename extension.      format:         The video format, defaults to ext (used for --get-format) @@ -429,6 +442,56 @@ class InfoExtractor(object):          }          return RATING_TABLE.get(rating.lower(), None) +    def _sort_formats(self, formats): +        def _formats_key(f): +            # TODO remove the following workaround +            from ..utils import determine_ext +            if not f.get('ext') and 'url' in f: +                f['ext'] = determine_ext(f['url']) + +            preference = f.get('preference') +            if preference is None: +                proto = f.get('protocol') +                if proto is None: +                    proto = compat_urllib_parse_urlparse(f.get('url', '')).scheme + +                preference = 0 if proto in ['http', 'https'] else -0.1 +                if f.get('ext') in ['f4f', 'f4m']:  # Not yet supported +                    preference -= 0.5 + +            if f.get('vcodec') == 'none':  # audio only +                if self._downloader.params.get('prefer_free_formats'): +                    ORDER = [u'aac', u'mp3', u'm4a', u'webm', u'ogg', u'opus'] +                else: +                    ORDER = [u'webm', u'opus', u'ogg', u'mp3', u'aac', u'm4a'] +                ext_preference = 0 +                try: +                    audio_ext_preference = ORDER.index(f['ext']) +                except ValueError: +                    audio_ext_preference = -1 +            else: +                if self._downloader.params.get('prefer_free_formats'): +                    ORDER = [u'flv', u'mp4', u'webm'] +                else: +                    ORDER = [u'webm', u'flv', u'mp4'] +                try: +                    ext_preference = ORDER.index(f['ext']) +                except ValueError: +                    ext_preference = -1 +                audio_ext_preference = 0 + +            return ( +                preference, +                f.get('height') if f.get('height') is not None else -1, +                f.get('width') if f.get('width') is not None else -1, +                ext_preference, +                f.get('vbr') if f.get('vbr') is not None else -1, +                f.get('abr') if f.get('abr') is not None else -1, +                audio_ext_preference, +                f.get('filesize') if f.get('filesize') is not None else -1, +                f.get('format_id'), +            ) +        formats.sort(key=_formats_key)  class SearchInfoExtractor(InfoExtractor): diff --git a/youtube_dl/extractor/dreisat.py b/youtube_dl/extractor/dreisat.py index cb7226f82..416e25156 100644 --- a/youtube_dl/extractor/dreisat.py +++ b/youtube_dl/extractor/dreisat.py @@ -4,7 +4,6 @@ import re  from .common import InfoExtractor  from ..utils import ( -    determine_ext,      unified_strdate,  ) @@ -52,18 +51,12 @@ class DreiSatIE(InfoExtractor):              'width': int(fe.find('./width').text),              'height': int(fe.find('./height').text),              'url': fe.find('./url').text, -            'ext': determine_ext(fe.find('./url').text),              'filesize': int(fe.find('./filesize').text),              'video_bitrate': int(fe.find('./videoBitrate').text), -            '3sat_qualityname': fe.find('./quality').text,          } for fe in format_els              if not fe.find('./url').text.startswith('http://www.metafilegenerator.de/')] -        def _sortkey(format): -            qidx = ['low', 'med', 'high', 'veryhigh'].index(format['3sat_qualityname']) -            prefer_http = 1 if 'rtmp' in format['url'] else 0 -            return (qidx, prefer_http, format['video_bitrate']) -        formats.sort(key=_sortkey) +        self._sort_formats(formats)          return {              '_type': 'video', diff --git a/youtube_dl/extractor/internetvideoarchive.py b/youtube_dl/extractor/internetvideoarchive.py index 16a6f73c8..4ddda2f1b 100644 --- a/youtube_dl/extractor/internetvideoarchive.py +++ b/youtube_dl/extractor/internetvideoarchive.py @@ -5,7 +5,6 @@ from ..utils import (      compat_urlparse,      compat_urllib_parse,      xpath_with_ns, -    determine_ext,  ) @@ -63,13 +62,17 @@ class InternetVideoArchiveIE(InfoExtractor):          for content in item.findall(_bp('media:group/media:content')):              attr = content.attrib              f_url = attr['url'] +            width = int(attr['width']) +            bitrate = int(attr['bitrate']) +            format_id = '%d-%dk' % (width, bitrate)              formats.append({ +                'format_id': format_id,                  'url': f_url, -                'ext': determine_ext(f_url), -                'width': int(attr['width']), -                'bitrate': int(attr['bitrate']), +                'width': width, +                'tbr': bitrate,              }) -        formats = sorted(formats, key=lambda f: f['bitrate']) + +        self._sort_formats(formats)          return {              'id': video_id, diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py index 4bdf55f93..98d1d272a 100644 --- a/youtube_dl/extractor/ivi.py +++ b/youtube_dl/extractor/ivi.py @@ -84,14 +84,16 @@ class IviIE(InfoExtractor):          result = video_json[u'result'] -        formats = [{'url': x[u'url'], -                    'format_id': x[u'content_format'] -                    } for x in result[u'files'] if x[u'content_format'] in self._known_formats] -        formats.sort(key=lambda fmt: self._known_formats.index(fmt['format_id'])) - -        if len(formats) == 0: -            self._downloader.report_warning(u'No media links available for %s' % video_id) -            return +        formats = [{ +            'url': x[u'url'], +            'format_id': x[u'content_format'], +            'preference': self._known_formats.index(x[u'content_format']), +        } for x in result[u'files'] if x[u'content_format'] in self._known_formats] + +        self._sort_formats(formats) + +        if not formats: +            raise ExtractorError(u'No media links available for %s' % video_id)          duration = result[u'duration']          compilation = result[u'compilation'] diff --git a/youtube_dl/extractor/mdr.py b/youtube_dl/extractor/mdr.py index 08ce0647f..7aa0080d7 100644 --- a/youtube_dl/extractor/mdr.py +++ b/youtube_dl/extractor/mdr.py @@ -52,10 +52,11 @@ class MDRIE(InfoExtractor):                      'format_id': u'%s-%d' % (media_type, vbr),                  })              formats.append(format) -        formats.sort(key=lambda f: (f.get('vbr'), f['abr']))          if not formats:              raise ExtractorError(u'Could not find any valid formats') +        self._sort_formats(formats) +          return {              'id': video_id,              'title': title, diff --git a/youtube_dl/extractor/mit.py b/youtube_dl/extractor/mit.py index 52be9232f..76b717fe5 100644 --- a/youtube_dl/extractor/mit.py +++ b/youtube_dl/extractor/mit.py @@ -33,8 +33,18 @@ class TechTVMITIE(InfoExtractor):              raw_page, u'base url')          formats_json = self._search_regex(r'bitrates: (\[.+?\])', raw_page,              u'video formats') -        formats = json.loads(formats_json) -        formats = sorted(formats, key=lambda f: f['bitrate']) +        formats_mit = json.loads(formats_json) +        formats = [ +            { +                'format_id': f['label'], +                'url': base_url + f['url'].partition(':')[2], +                'ext': f['url'].partition(':')[0], +                'format': f['label'], +                'width': f['width'], +                'vbr': f['bitrate'], +            } +            for f in formats_mit +        ]          title = get_element_by_id('edit-title', clean_page)          description = clean_html(get_element_by_id('edit-description', clean_page)) @@ -43,8 +53,7 @@ class TechTVMITIE(InfoExtractor):          return {'id': video_id,                  'title': title, -                'url': base_url + formats[-1]['url'].replace('mp4:', ''), -                'ext': 'mp4', +                'formats': formats,                  'description': description,                  'thumbnail': thumbnail,                  } diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index ed11f521a..f1cf41e2d 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -129,7 +129,7 @@ class MTVIE(MTVServicesInfoExtractor):      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url)          video_id = mobj.group('videoid') -        uri = mobj.group('mgid') +        uri = mobj.groupdict().get('mgid')          if uri is None:              webpage = self._download_webpage(url, video_id) diff --git a/youtube_dl/extractor/smotri.py b/youtube_dl/extractor/smotri.py index beea58d63..99f5b19d2 100644 --- a/youtube_dl/extractor/smotri.py +++ b/youtube_dl/extractor/smotri.py @@ -138,7 +138,7 @@ class SmotriIE(InfoExtractor):          # Warning if video is unavailable          warning = self._html_search_regex(              r'<div class="videoUnModer">(.*?)</div>', video_page, -            u'warning messagef', default=None) +            u'warning message', default=None)          if warning is not None:              self._downloader.report_warning(                  u'Video %s may not be available; smotri said: %s ' % diff --git a/youtube_dl/extractor/spiegel.py b/youtube_dl/extractor/spiegel.py index 695520524..051a34d5b 100644 --- a/youtube_dl/extractor/spiegel.py +++ b/youtube_dl/extractor/spiegel.py @@ -51,9 +51,10 @@ class SpiegelIE(InfoExtractor):              # Blacklist type 6, it's extremely LQ and not available on the same server              if n.tag.startswith('type') and n.tag != 'type6'          ] -        formats.sort(key=lambda f: f['vbr'])          duration = float(idoc[0].findall('./duration')[0].text) +        self._sort_formats(formats) +          info = {              'id': video_id,              'title': video_title, diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index cec65261b..23172143e 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -55,15 +55,21 @@ class ThePlatformIE(InfoExtractor):          formats = []          for f in switch.findall(_x('smil:video')):              attr = f.attrib +            width = int(attr['width']) +            height = int(attr['height']) +            vbr = int(attr['system-bitrate']) // 1000 +            format_id = '%dx%d_%dk' % (width, height, vbr)              formats.append({ +                'format_id': format_id,                  'url': base_url,                  'play_path': 'mp4:' + attr['src'],                  'ext': 'flv', -                'width': int(attr['width']), -                'height': int(attr['height']), -                'vbr': int(attr['system-bitrate']), +                'width': width, +                'height': height, +                'vbr': vbr,              }) -        formats.sort(key=lambda f: (f['height'], f['width'], f['vbr'])) + +        self._sort_formats(formats)          return {              'id': video_id, diff --git a/youtube_dl/extractor/wistia.py b/youtube_dl/extractor/wistia.py index e1748c261..584550455 100644 --- a/youtube_dl/extractor/wistia.py +++ b/youtube_dl/extractor/wistia.py @@ -45,7 +45,8 @@ class WistiaIE(InfoExtractor):                  'filesize': a['size'],                  'ext': a['ext'],              }) -        formats.sort(key=lambda a: a['filesize']) + +        self._sort_formats(formats)          return {              'id': video_id, diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index 5c9c361b9..e17a39782 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -6,8 +6,8 @@ from .common import InfoExtractor, SearchInfoExtractor  from ..utils import (      compat_urllib_parse,      compat_urlparse, -    determine_ext,      clean_html, +    int_or_none,  ) @@ -68,9 +68,9 @@ class YahooIE(InfoExtractor):          formats = []          for s in info['streams']:              format_info = { -                'width': s.get('width'), -                'height': s.get('height'), -                'bitrate': s.get('bitrate'), +                'width': int_or_none(s.get('width')), +                'height': int_or_none(s.get('height')), +                'tbr': int_or_none(s.get('bitrate')),              }              host = s['host'] @@ -84,10 +84,10 @@ class YahooIE(InfoExtractor):              else:                  format_url = compat_urlparse.urljoin(host, path)                  format_info['url'] = format_url -                format_info['ext'] = determine_ext(format_url)              formats.append(format_info) -        formats = sorted(formats, key=lambda f:(f['height'], f['width'])) + +        self._sort_formats(formats)          return {              'id': video_id, diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index bd0f2cae0..77ad423c4 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -1,5 +1,4 @@  import json -import os  import re  import sys @@ -16,6 +15,7 @@ from ..aes import (      aes_decrypt_text  ) +  class YouPornIE(InfoExtractor):      _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+))'      _TEST = { @@ -23,9 +23,9 @@ class YouPornIE(InfoExtractor):          u'file': u'505835.mp4',          u'md5': u'71ec5fcfddacf80f495efa8b6a8d9a89',          u'info_dict': { -            u"upload_date": u"20101221",  -            u"description": u"Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?",  -            u"uploader": u"Ask Dan And Jennifer",  +            u"upload_date": u"20101221", +            u"description": u"Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?", +            u"uploader": u"Ask Dan And Jennifer",              u"title": u"Sex Ed: Is It Safe To Masturbate Daily?",              u"age_limit": 18,          } @@ -71,38 +71,36 @@ class YouPornIE(InfoExtractor):              link = aes_decrypt_text(encrypted_link, video_title, 32).decode('utf-8')              links.append(link) -        if not links: -            raise ExtractorError(u'ERROR: no known formats available for video') -          formats = []          for link in links: -              # A link looks like this:              # http://cdn1.download.youporn.phncdn.com/201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4?nvb=20121113051249&nva=20121114051249&ir=1200&sr=1200&hash=014b882080310e95fb6a0              # A path looks like this:              # /201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4              video_url = unescapeHTML(link)              path = compat_urllib_parse_urlparse(video_url).path -            extension = os.path.splitext(path)[1][1:] -            format = path.split('/')[4].split('_')[:2] +            format_parts = path.split('/')[4].split('_')[:2] -            # size = format[0] -            # bitrate = format[1] -            format = "-".join(format) -            # title = u'%s-%s-%s' % (video_title, size, bitrate) +            dn = compat_urllib_parse_urlparse(video_url).netloc.partition('.')[0] + +            resolution = format_parts[0] +            height = int(resolution[:-len('p')]) +            bitrate = int(format_parts[1][:-len('k')]) +            format = u'-'.join(format_parts) + u'-' + dn              formats.append({                  'url': video_url, -                'ext': extension,                  'format': format,                  'format_id': format, +                'height': height, +                'tbr': bitrate, +                'resolution': resolution,              }) -        # Sort and remove doubles -        formats.sort(key=lambda format: list(map(lambda s: s.zfill(6), format['format'].split('-')))) -        for i in range(len(formats)-1,0,-1): -            if formats[i]['format_id'] == formats[i-1]['format_id']: -                del formats[i] +        self._sort_formats(formats) + +        if not formats: +            raise ExtractorError(u'ERROR: no known formats available for video')          return {              'id': video_id, diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 55c345e8a..b0e29c2a8 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -150,151 +150,68 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):                       (?(1).+)?                                                # if we found the ID, everything can follow                       $"""      _NEXT_URL_RE = r'[\?&]next_url=([^&]+)' -    # Listed in order of quality -    _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '36', '17', '13', -                          # Apple HTTP Live Streaming -                          '96', '95', '94', '93', '92', '132', '151', -                          # 3D -                          '85', '84', '102', '83', '101', '82', '100', -                          # Dash video -                          '138', '137', '248', '136', '247', '135', '246', -                          '245', '244', '134', '243', '133', '242', '160', -                          # Dash audio -                          '141', '172', '140', '171', '139', -                          ] -    _video_extensions = { -        '13': '3gp', -        '17': '3gp', -        '18': 'mp4', -        '22': 'mp4', -        '36': '3gp', -        '37': 'mp4', -        '38': 'mp4', -        '43': 'webm', -        '44': 'webm', -        '45': 'webm', -        '46': 'webm', +    _formats = { +        '5': {'ext': 'flv', 'width': 400, 'height': 240}, +        '6': {'ext': 'flv', 'width': 450, 'height': 270}, +        '13': {'ext': '3gp'}, +        '17': {'ext': '3gp', 'width': 176, 'height': 144}, +        '18': {'ext': 'mp4', 'width': 640, 'height': 360}, +        '22': {'ext': 'mp4', 'width': 1280, 'height': 720}, +        '34': {'ext': 'flv', 'width': 640, 'height': 360}, +        '35': {'ext': 'flv', 'width': 854, 'height': 480}, +        '36': {'ext': '3gp', 'width': 320, 'height': 240}, +        '37': {'ext': 'mp4', 'width': 1920, 'height': 1080}, +        '38': {'ext': 'mp4', 'width': 4096, 'height': 3072}, +        '43': {'ext': 'webm', 'width': 640, 'height': 360}, +        '44': {'ext': 'webm', 'width': 854, 'height': 480}, +        '45': {'ext': 'webm', 'width': 1280, 'height': 720}, +        '46': {'ext': 'webm', 'width': 1920, 'height': 1080}, +          # 3d videos -        '82': 'mp4', -        '83': 'mp4', -        '84': 'mp4', -        '85': 'mp4', -        '100': 'webm', -        '101': 'webm', -        '102': 'webm', +        '82': {'ext': 'mp4', 'height': 360, 'resolution': '360p', 'format_note': '3D', 'preference': -20}, +        '83': {'ext': 'mp4', 'height': 480, 'resolution': '480p', 'format_note': '3D', 'preference': -20}, +        '84': {'ext': 'mp4', 'height': 720, 'resolution': '720p', 'format_note': '3D', 'preference': -20}, +        '85': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': '3D', 'preference': -20}, +        '100': {'ext': 'webm', 'height': 360, 'resolution': '360p', 'format_note': '3D', 'preference': -20}, +        '101': {'ext': 'webm', 'height': 480, 'resolution': '480p', 'format_note': '3D', 'preference': -20}, +        '102': {'ext': 'webm', 'height': 720, 'resolution': '720p', 'format_note': '3D', 'preference': -20},          # Apple HTTP Live Streaming -        '92': 'mp4', -        '93': 'mp4', -        '94': 'mp4', -        '95': 'mp4', -        '96': 'mp4', -        '132': 'mp4', -        '151': 'mp4', - -        # Dash mp4 -        '133': 'mp4', -        '134': 'mp4', -        '135': 'mp4', -        '136': 'mp4', -        '137': 'mp4', -        '138': 'mp4', -        '160': 'mp4', +        '92': {'ext': 'mp4', 'height': 240, 'resolution': '240p', 'format_note': 'HLS', 'preference': -10}, +        '93': {'ext': 'mp4', 'height': 360, 'resolution': '360p', 'format_note': 'HLS', 'preference': -10}, +        '94': {'ext': 'mp4', 'height': 480, 'resolution': '480p', 'format_note': 'HLS', 'preference': -10}, +        '95': {'ext': 'mp4', 'height': 720, 'resolution': '720p', 'format_note': 'HLS', 'preference': -10}, +        '96': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'HLS', 'preference': -10}, +        '132': {'ext': 'mp4', 'height': 240, 'resolution': '240p', 'format_note': 'HLS', 'preference': -10}, +        '151': {'ext': 'mp4', 'height': 72, 'resolution': '72p', 'format_note': 'HLS', 'preference': -10}, + +        # DASH mp4 video +        '133': {'ext': 'mp4', 'height': 240, 'resolution': '240p', 'format_note': 'DASH video', 'preference': -40}, +        '134': {'ext': 'mp4', 'height': 360, 'resolution': '360p', 'format_note': 'DASH video', 'preference': -40}, +        '135': {'ext': 'mp4', 'height': 480, 'resolution': '480p', 'format_note': 'DASH video', 'preference': -40}, +        '136': {'ext': 'mp4', 'height': 720, 'resolution': '720p', 'format_note': 'DASH video', 'preference': -40}, +        '137': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'preference': -40}, +        '138': {'ext': 'mp4', 'height': 1081, 'resolution': '>1080p', 'format_note': 'DASH video', 'preference': -40}, +        '160': {'ext': 'mp4', 'height': 192, 'resolution': '192p', 'format_note': 'DASH video', 'preference': -40},          # Dash mp4 audio -        '139': 'm4a', -        '140': 'm4a', -        '141': 'm4a', +        '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50}, +        '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 128, 'preference': -50}, +        '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 256, 'preference': -50},          # Dash webm -        '171': 'webm', -        '172': 'webm', -        '242': 'webm', -        '243': 'webm', -        '244': 'webm', -        '245': 'webm', -        '246': 'webm', -        '247': 'webm', -        '248': 'webm', -    } -    _video_dimensions = { -        '5': {'width': 400, 'height': 240}, -        '6': {}, -        '13': {}, -        '17': {'width': 176, 'height': 144}, -        '18': {'width': 640, 'height': 360}, -        '22': {'width': 1280, 'height': 720}, -        '34': {'width': 640, 'height': 360}, -        '35': {'width': 854, 'height': 480}, -        '36': {'width': 320, 'height': 240}, -        '37': {'width': 1920, 'height': 1080}, -        '38': {'width': 4096, 'height': 3072}, -        '43': {'width': 640, 'height': 360}, -        '44': {'width': 854, 'height': 480}, -        '45': {'width': 1280, 'height': 720}, -        '46': {'width': 1920, 'height': 1080}, -        '82': {'height': 360, 'display': '360p'}, -        '83': {'height': 480, 'display': '480p'}, -        '84': {'height': 720, 'display': '720p'}, -        '85': {'height': 1080, 'display': '1080p'}, -        '92': {'height': 240, 'display': '240p'}, -        '93': {'height': 360, 'display': '360p'}, -        '94': {'height': 480, 'display': '480p'}, -        '95': {'height': 720, 'display': '720p'}, -        '96': {'height': 1080, 'display': '1080p'}, -        '100': {'height': 360, 'display': '360p'}, -        '101': {'height': 480, 'display': '480p'}, -        '102': {'height': 720, 'display': '720p'}, -        '132': {'height': 240, 'display': '240p'}, -        '151': {'height': 72, 'display': '72p'}, -        '133': {'height': 240, 'display': '240p'}, -        '134': {'height': 360, 'display': '360p'}, -        '135': {'height': 480, 'display': '480p'}, -        '136': {'height': 720, 'display': '720p'}, -        '137': {'height': 1080, 'display': '1080p'}, -        '138': {'height': 1081, 'display': '>1080p'}, -        '139': {'display': '48k'}, -        '140': {'display': '128k'}, -        '141': {'display': '256k'}, -        '160': {'height': 192, 'display': '192p'}, -        '171': {'display': '128k'}, -        '172': {'display': '256k'}, -        '242': {'height': 240, 'display': '240p'}, -        '243': {'height': 360, 'display': '360p'}, -        '244': {'height': 480, 'display': '480p'}, -        '245': {'height': 480, 'display': '480p'}, -        '246': {'height': 480, 'display': '480p'}, -        '247': {'height': 720, 'display': '720p'}, -        '248': {'height': 1080, 'display': '1080p'}, -    } -    _special_itags = { -        '82': '3D', -        '83': '3D', -        '84': '3D', -        '85': '3D', -        '100': '3D', -        '101': '3D', -        '102': '3D', -        '133': 'DASH Video', -        '134': 'DASH Video', -        '135': 'DASH Video', -        '136': 'DASH Video', -        '137': 'DASH Video', -        '138': 'DASH Video', -        '139': 'DASH Audio', -        '140': 'DASH Audio', -        '141': 'DASH Audio', -        '160': 'DASH Video', -        '171': 'DASH Audio', -        '172': 'DASH Audio', -        '242': 'DASH Video', -        '243': 'DASH Video', -        '244': 'DASH Video', -        '245': 'DASH Video', -        '246': 'DASH Video', -        '247': 'DASH Video', -        '248': 'DASH Video', +        '242': {'ext': 'webm', 'height': 240, 'resolution': '240p', 'format_note': 'DASH webm', 'preference': -40}, +        '243': {'ext': 'webm', 'height': 360, 'resolution': '360p', 'format_note': 'DASH webm', 'preference': -40}, +        '244': {'ext': 'webm', 'height': 480, 'resolution': '480p', 'format_note': 'DASH webm', 'preference': -40}, +        '245': {'ext': 'webm', 'height': 480, 'resolution': '480p', 'format_note': 'DASH webm', 'preference': -40}, +        '246': {'ext': 'webm', 'height': 480, 'resolution': '480p', 'format_note': 'DASH webm', 'preference': -40}, +        '247': {'ext': 'webm', 'height': 720, 'resolution': '720p', 'format_note': 'DASH webm', 'preference': -40}, +        '248': {'ext': 'webm', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH webm', 'preference': -40}, + +        # Dash webm audio +        '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH webm audio', 'abr': 48, 'preference': -50}, +        '172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH webm audio', 'abr': 256, 'preference': -50},      }      IE_NAME = u'youtube' @@ -1148,7 +1065,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):          Transform a dictionary in the format {itag:url} to a list of (itag, url)          with the requested formats.          """ -        existing_formats = [x for x in self._available_formats if x in url_map] +        existing_formats = [x for x in self._formats if x in url_map]          if len(existing_formats) == 0:              raise ExtractorError(u'no known formats available for video')          video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats @@ -1410,39 +1327,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):          formats = []          for itag, video_real_url in video_url_list: -            # Extension -            video_extension = self._video_extensions.get(itag, 'flv') -            resolution = self._video_dimensions.get(itag, {}).get('display') -            width = self._video_dimensions.get(itag, {}).get('width') -            height = self._video_dimensions.get(itag, {}).get('height') -            note = self._special_itags.get(itag) - -            video_format = '{0} - {1}{2}'.format(itag if itag else video_extension, -                                              '%dx%d' % (width, height) if width is not None and height is not None else (resolution if resolution is not None else '???'), -                                              ' ('+self._special_itags[itag]+')' if itag in self._special_itags else '') - -            formats.append({ -                'url':         video_real_url, -                'ext':         video_extension, -                'format':      video_format, -                'format_id':   itag, -                'player_url':  player_url, -                '_resolution': resolution, -                'width':       width, -                'height':      height, -                'format_note': note, -            }) +            dct = { +                'format_id': itag, +                'url': video_real_url, +                'player_url': player_url, +            } +            dct.update(self._formats[itag]) +            formats.append(dct) -        def _formats_key(f): -            note = f.get('format_note') -            if note is None: -                note = u'' -            is_dash = u'DASH' in note -            return ( -                0 if is_dash else 1, -                f.get('height') if f.get('height') is not None else -1, -                f.get('width') if f.get('width') is not None else -1) -        formats.sort(key=_formats_key) +        self._sort_formats(formats)          return {              'id':           video_id, diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py index 35ece354a..829f002cf 100644 --- a/youtube_dl/extractor/zdf.py +++ b/youtube_dl/extractor/zdf.py @@ -1,10 +1,10 @@  # coding: utf-8 -import operator  import re  from .common import InfoExtractor  from ..utils import ( +    int_or_none,      unified_strdate,  ) @@ -67,29 +67,13 @@ class ZDFIE(InfoExtractor):              ''', format_id)              ext = format_m.group('container') -            is_supported = ext != 'f4f' - -            PROTO_ORDER = ['http', 'rtmp', 'rtsp'] -            try: -                proto_pref = -PROTO_ORDER.index(format_m.group('proto')) -            except ValueError: -                proto_pref = -999 +            proto = format_m.group('proto').lower()              quality = fnode.find('./quality').text -            QUALITY_ORDER = ['veryhigh', '300', 'high', 'med', 'low'] -            try: -                quality_pref = -QUALITY_ORDER.index(quality) -            except ValueError: -                quality_pref = -999 -              abr = int(fnode.find('./audioBitrate').text) // 1000              vbr = int(fnode.find('./videoBitrate').text) // 1000 -            pref = (is_available, is_supported, -                    proto_pref, quality_pref, vbr, abr)              format_note = u'' -            if not is_supported: -                format_note += u'(unsupported)'              if not format_note:                  format_note = None @@ -101,18 +85,20 @@ class ZDFIE(InfoExtractor):                  'vcodec': format_m.group('vcodec'),                  'abr': abr,                  'vbr': vbr, -                'width': int(fnode.find('./width').text), -                'height': int(fnode.find('./height').text), -                'filesize': int(fnode.find('./filesize').text), +                'width': int_or_none(fnode.find('./width').text), +                'height': int_or_none(fnode.find('./height').text), +                'filesize': int_or_none(fnode.find('./filesize').text),                  'format_note': format_note, -                '_pref': pref, +                'protocol': proto,                  '_available': is_available,              }          format_nodes = doc.findall('.//formitaeten/formitaet') -        formats = sorted(filter(lambda f: f['_available'], -                                map(xml_to_format, format_nodes)), -                         key=operator.itemgetter('_pref')) +        formats = list(filter( +            lambda f: f['_available'], +            map(xml_to_format, format_nodes))) + +        self._sort_formats(formats)          return {              'id': video_id, diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 2e48f187e..83a274043 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1098,3 +1098,23 @@ def url_basename(url):  class HEADRequest(compat_urllib_request.Request):      def get_method(self):          return "HEAD" + + +def int_or_none(v): +    return v if v is None else int(v) + + +def parse_duration(s): +    if s is None: +        return None + +    m = re.match( +        r'(?:(?:(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)$', s) +    if not m: +        return None +    res = int(m.group('secs')) +    if m.group('mins'): +        res += int(m.group('mins')) * 60 +        if m.group('hours'): +            res += int(m.group('hours')) * 60 * 60 +    return res diff --git a/youtube_dl/version.py b/youtube_dl/version.py index b3d015634..332913b31 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.12.23.4' +__version__ = '2013.12.26'  | 
