diff options
Diffstat (limited to 'youtube_dl')
40 files changed, 931 insertions, 190 deletions
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index b5ef5e009..ce4b72fd3 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1701,10 +1701,10 @@ class YoutubeDL(object):              out = out.decode().strip()              if re.match('[0-9a-f]+', out):                  self._write_string('[debug] Git HEAD: ' + out + '\n') -        except: +        except Exception:              try:                  sys.exc_clear() -            except: +            except Exception:                  pass          self._write_string('[debug] Python version %s - %s\n' % (              platform.python_version(), platform_name())) @@ -1768,6 +1768,14 @@ class YoutubeDL(object):          debuglevel = 1 if self.params.get('debug_printtraffic') else 0          https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel) +        # The ssl context is only available in python 2.7.9 and 3.x +        if hasattr(https_handler, '_context'): +            ctx = https_handler._context +            # get_ca_certs is unavailable prior to python 3.4 +            if hasattr(ctx, 'get_ca_certs') and len(ctx.get_ca_certs()) == 0: +                self.report_warning( +                    'No ssl certificates were loaded, urls that use https ' +                    'won\'t work')          ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)          opener = compat_urllib_request.build_opener(              proxy_handler, https_handler, cookie_processor, ydlh) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 852b2fc3d..1c8b411b7 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -189,10 +189,6 @@ def _real_main(argv=None):      if opts.allsubtitles and not opts.writeautomaticsub:          opts.writesubtitles = True -    if sys.version_info < (3,): -        # In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems) -        if opts.outtmpl is not None: -            opts.outtmpl = opts.outtmpl.decode(preferredencoding())      outtmpl = ((opts.outtmpl is not None and opts.outtmpl) or                 (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') or                 (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') or diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index b2bf149ef..973bcd320 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -389,7 +389,7 @@ else:                  stdout=subprocess.PIPE, stderr=subprocess.PIPE)              out, err = sp.communicate()              lines, columns = map(int, out.split()) -        except: +        except Exception:              pass          return _terminal_size(columns, lines) diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 8ed5c19a6..a0fc5ead0 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -204,7 +204,7 @@ class FileDownloader(object):              return          try:              os.utime(filename, (time.time(), filetime)) -        except: +        except Exception:              pass          return filetime @@ -318,7 +318,7 @@ class FileDownloader(object):          )          continuedl_and_exists = ( -            self.params.get('continuedl', False) and +            self.params.get('continuedl', True) and              os.path.isfile(encodeFilename(filename)) and              not self.params.get('nopart', False)          ) diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 4047d7167..d136bebd1 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -49,7 +49,7 @@ class HttpFD(FileDownloader):          open_mode = 'wb'          if resume_len != 0: -            if self.params.get('continuedl', False): +            if self.params.get('continuedl', True):                  self.report_resuming_byte(resume_len)                  request.add_header('Range', 'bytes=%d-' % resume_len)                  open_mode = 'ab' diff --git a/youtube_dl/downloader/rtmp.py b/youtube_dl/downloader/rtmp.py index 89e98ae61..ddf5724ae 100644 --- a/youtube_dl/downloader/rtmp.py +++ b/youtube_dl/downloader/rtmp.py @@ -105,7 +105,7 @@ class RtmpFD(FileDownloader):          protocol = info_dict.get('rtmp_protocol', None)          real_time = info_dict.get('rtmp_real_time', False)          no_resume = info_dict.get('no_resume', False) -        continue_dl = info_dict.get('continuedl', False) +        continue_dl = info_dict.get('continuedl', True)          self.report_destination(filename)          tmpfilename = self.temp_name(filename) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index df4a7419a..d7e8138be 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -106,6 +106,7 @@ from .dbtv import DBTVIE  from .dctp import DctpTvIE  from .deezer import DeezerPlaylistIE  from .dfb import DFBIE +from .dhm import DHMIE  from .dotsub import DotsubIE  from .douyutv import DouyuTVIE  from .dreisat import DreiSatIE @@ -114,6 +115,7 @@ from .drtuber import DrTuberIE  from .drtv import DRTVIE  from .dvtv import DVTVIE  from .dump import DumpIE +from .dumpert import DumpertIE  from .defense import DefenseGouvFrIE  from .discovery import DiscoveryIE  from .divxstage import DivxStageIE @@ -274,6 +276,7 @@ from .metacritic import MetacriticIE  from .mgoon import MgoonIE  from .minhateca import MinhatecaIE  from .ministrygrid import MinistryGridIE +from .miomio import MioMioIE  from .mit import TechTVMITIE, MITIE, OCWMITIE  from .mitele import MiTeleIE  from .mixcloud import MixcloudIE @@ -309,6 +312,8 @@ from .nba import NBAIE  from .nbc import (      NBCIE,      NBCNewsIE, +    NBCSportsIE, +    NBCSportsVPlayerIE,  )  from .ndr import NDRIE  from .ndtv import NDTVIE @@ -421,6 +426,10 @@ from .rutube import (  )  from .rutv import RUTVIE  from .sandia import SandiaIE +from .safari import ( +    SafariIE, +    SafariCourseIE, +)  from .sapo import SapoIE  from .savefrom import SaveFromIE  from .sbs import SBSIE @@ -553,6 +562,7 @@ from .varzesh3 import Varzesh3IE  from .vbox7 import Vbox7IE  from .veehd import VeeHDIE  from .veoh import VeohIE +from .vessel import VesselIE  from .vesti import VestiIE  from .vevo import VevoIE  from .vgtv import VGTVIE diff --git a/youtube_dl/extractor/bloomberg.py b/youtube_dl/extractor/bloomberg.py index 4a88ccd13..0dca29b71 100644 --- a/youtube_dl/extractor/bloomberg.py +++ b/youtube_dl/extractor/bloomberg.py @@ -6,32 +6,39 @@ from .common import InfoExtractor  class BloombergIE(InfoExtractor): -    _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<id>.+?)\.html' +    _VALID_URL = r'https?://www\.bloomberg\.com/news/videos/[^/]+/(?P<id>[^/?#]+)'      _TEST = { -        'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html', +        'url': 'http://www.bloomberg.com/news/videos/b/aaeae121-5949-481e-a1ce-4562db6f5df2',          # The md5 checksum changes          'info_dict': {              'id': 'qurhIVlJSB6hzkVi229d8g',              'ext': 'flv',              'title': 'Shah\'s Presentation on Foreign-Exchange Strategies', -            'description': 'md5:0681e0d30dcdfc6abf34594961d8ea88', +            'description': 'md5:a8ba0302912d03d246979735c17d2761',          },      }      def _real_extract(self, url):          name = self._match_id(url)          webpage = self._download_webpage(url, name) - -        f4m_url = self._search_regex( -            r'<source src="(https?://[^"]+\.f4m.*?)"', webpage, -            'f4m url') +        video_id = self._search_regex(r'"bmmrId":"(.+?)"', webpage, 'id')          title = re.sub(': Video$', '', self._og_search_title(webpage)) +        embed_info = self._download_json( +            'http://www.bloomberg.com/api/embed?id=%s' % video_id, video_id) +        formats = [] +        for stream in embed_info['streams']: +            if stream["muxing_format"] == "TS": +                formats.extend(self._extract_m3u8_formats(stream['url'], video_id)) +            else: +                formats.extend(self._extract_f4m_formats(stream['url'], video_id)) +        self._sort_formats(formats) +          return { -            'id': name.split('-')[-1], +            'id': video_id,              'title': title, -            'formats': self._extract_f4m_formats(f4m_url, name), +            'formats': formats,              'description': self._og_search_description(webpage),              'thumbnail': self._og_search_thumbnail(webpage),          } diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py index 90ea07438..0a77e951c 100644 --- a/youtube_dl/extractor/cnn.py +++ b/youtube_dl/extractor/cnn.py @@ -12,7 +12,7 @@ from ..utils import (  class CNNIE(InfoExtractor):      _VALID_URL = r'''(?x)https?://(?:(?:edition|www)\.)?cnn\.com/video/(?:data/.+?|\?)/ -        (?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:cnn|hln)(?:-ap)?|(?=&)))''' +        (?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:cnn|hln|ktvk)(?:-ap)?|(?=&)))'''      _TESTS = [{          'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn', @@ -45,6 +45,9 @@ class CNNIE(InfoExtractor):              'description': 'md5:e7223a503315c9f150acac52e76de086',              'upload_date': '20141222',          } +    }, { +        'url': 'http://cnn.com/video/?/video/politics/2015/03/27/pkg-arizona-senator-church-attendance-mandatory.ktvk', +        'only_matching': True,      }]      def _real_extract(self, url): diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 4f67c3aac..47d58330b 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -25,8 +25,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):      def _build_request(url):          """Build a request with the family filter disabled"""          request = compat_urllib_request.Request(url) -        request.add_header('Cookie', 'family_filter=off') -        request.add_header('Cookie', 'ff=off') +        request.add_header('Cookie', 'family_filter=off; ff=off')          return request @@ -112,8 +111,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor):              video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1)          embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id -        embed_page = self._download_webpage(embed_url, video_id, -                                            'Downloading embed page') +        embed_request = self._build_request(embed_url) +        embed_page = self._download_webpage( +            embed_request, video_id, 'Downloading embed page')          info = self._search_regex(r'var info = ({.*?}),$', embed_page,                                    'video info', flags=re.MULTILINE)          info = json.loads(info) diff --git a/youtube_dl/extractor/dhm.py b/youtube_dl/extractor/dhm.py new file mode 100644 index 000000000..3ed1f1663 --- /dev/null +++ b/youtube_dl/extractor/dhm.py @@ -0,0 +1,73 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( +    xpath_text, +    parse_duration, +) + + +class DHMIE(InfoExtractor): +    IE_DESC = 'Filmarchiv - Deutsches Historisches Museum' +    _VALID_URL = r'https?://(?:www\.)?dhm\.de/filmarchiv/(?:[^/]+/)+(?P<id>[^/]+)' + +    _TESTS = [{ +        'url': 'http://www.dhm.de/filmarchiv/die-filme/the-marshallplan-at-work-in-west-germany/', +        'md5': '11c475f670209bf6acca0b2b7ef51827', +        'info_dict': { +            'id': 'the-marshallplan-at-work-in-west-germany', +            'ext': 'flv', +            'title': 'MARSHALL PLAN AT WORK IN WESTERN GERMANY, THE', +            'description': 'md5:1fabd480c153f97b07add61c44407c82', +            'duration': 660, +            'thumbnail': 're:^https?://.*\.jpg$', +        }, +    }, { +        'url': 'http://www.dhm.de/filmarchiv/02-mapping-the-wall/peter-g/rolle-1/', +        'md5': '09890226332476a3e3f6f2cb74734aa5', +        'info_dict': { +            'id': 'rolle-1', +            'ext': 'flv', +            'title': 'ROLLE 1', +            'thumbnail': 're:^https?://.*\.jpg$', +        }, +    }] + +    def _real_extract(self, url): +        video_id = self._match_id(url) + +        webpage = self._download_webpage(url, video_id) + +        playlist_url = self._search_regex( +            r"file\s*:\s*'([^']+)'", webpage, 'playlist url') + +        playlist = self._download_xml(playlist_url, video_id) + +        track = playlist.find( +            './{http://xspf.org/ns/0/}trackList/{http://xspf.org/ns/0/}track') + +        video_url = xpath_text( +            track, './{http://xspf.org/ns/0/}location', +            'video url', fatal=True) +        thumbnail = xpath_text( +            track, './{http://xspf.org/ns/0/}image', +            'thumbnail') + +        title = self._search_regex( +            [r'dc:title="([^"]+)"', r'<title> »([^<]+)</title>'], +            webpage, 'title').strip() +        description = self._html_search_regex( +            r'<p><strong>Description:</strong>(.+?)</p>', +            webpage, 'description', default=None) +        duration = parse_duration(self._search_regex( +            r'<em>Length\s*</em>\s*:\s*</strong>([^<]+)', +            webpage, 'duration', default=None)) + +        return { +            'id': video_id, +            'url': video_url, +            'title': title, +            'description': description, +            'duration': duration, +            'thumbnail': thumbnail, +        } diff --git a/youtube_dl/extractor/douyutv.py b/youtube_dl/extractor/douyutv.py index d7956e6e4..479430c51 100644 --- a/youtube_dl/extractor/douyutv.py +++ b/youtube_dl/extractor/douyutv.py @@ -1,19 +1,23 @@  # coding: utf-8  from __future__ import unicode_literals +import hashlib +import time  from .common import InfoExtractor -from ..utils import ExtractorError +from ..utils import (ExtractorError, unescapeHTML) +from ..compat import (compat_str, compat_basestring)  class DouyuTVIE(InfoExtractor):      _VALID_URL = r'http://(?:www\.)?douyutv\.com/(?P<id>[A-Za-z0-9]+)' -    _TEST = { +    _TESTS = [{          'url': 'http://www.douyutv.com/iseven',          'info_dict': { -            'id': 'iseven', +            'id': '17732', +            'display_id': 'iseven',              'ext': 'flv',              'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', -            'description': 'md5:9e525642c25a0a24302869937cf69d17', +            'description': 'md5:c93d6692dde6fe33809a46edcbecca44',              'thumbnail': 're:^https?://.*\.jpg$',              'uploader': '7师傅',              'uploader_id': '431925', @@ -22,22 +26,52 @@ class DouyuTVIE(InfoExtractor):          'params': {              'skip_download': True,          } -    } +    }, { +        'url': 'http://www.douyutv.com/85982', +        'info_dict': { +            'id': '85982', +            'display_id': '85982', +            'ext': 'flv', +            'title': 're:^小漠从零单排记!——CSOL2躲猫猫 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', +            'description': 'md5:746a2f7a253966a06755a912f0acc0d2', +            'thumbnail': 're:^https?://.*\.jpg$', +            'uploader': 'douyu小漠', +            'uploader_id': '3769985', +            'is_live': True, +        }, +        'params': { +            'skip_download': True, +        } +    }]      def _real_extract(self, url):          video_id = self._match_id(url) +        if video_id.isdigit(): +            room_id = video_id +        else: +            page = self._download_webpage(url, video_id) +            room_id = self._html_search_regex( +                r'"room_id"\s*:\s*(\d+),', page, 'room id') + +        prefix = 'room/%s?aid=android&client_sys=android&time=%d' % ( +            room_id, int(time.time())) + +        auth = hashlib.md5((prefix + '1231').encode('ascii')).hexdigest()          config = self._download_json( -            'http://www.douyutv.com/api/client/room/%s' % video_id, video_id) +            'http://www.douyutv.com/api/v1/%s&auth=%s' % (prefix, auth), +            video_id)          data = config['data']          error_code = config.get('error', 0) -        show_status = data.get('show_status')          if error_code is not 0: -            raise ExtractorError( -                'Server reported error %i' % error_code, expected=True) +            error_desc = 'Server reported error %i' % error_code +            if isinstance(data, (compat_str, compat_basestring)): +                error_desc += ': ' + data +            raise ExtractorError(error_desc, expected=True) +        show_status = data.get('show_status')          # 1 = live, 2 = offline          if show_status == '2':              raise ExtractorError( @@ -46,7 +80,7 @@ class DouyuTVIE(InfoExtractor):          base_url = data['rtmp_url']          live_path = data['rtmp_live'] -        title = self._live_title(data['room_name']) +        title = self._live_title(unescapeHTML(data['room_name']))          description = data.get('show_details')          thumbnail = data.get('room_src') @@ -66,7 +100,8 @@ class DouyuTVIE(InfoExtractor):          self._sort_formats(formats)          return { -            'id': video_id, +            'id': room_id, +            'display_id': video_id,              'title': title,              'description': description,              'thumbnail': thumbnail, diff --git a/youtube_dl/extractor/dumpert.py b/youtube_dl/extractor/dumpert.py new file mode 100644 index 000000000..e43bc81b2 --- /dev/null +++ b/youtube_dl/extractor/dumpert.py @@ -0,0 +1,56 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import base64 + +from .common import InfoExtractor +from ..utils import qualities + + +class DumpertIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?dumpert\.nl/mediabase/(?P<id>[0-9]+/[0-9a-zA-Z]+)' +    _TEST = { +        'url': 'http://www.dumpert.nl/mediabase/6646981/951bc60f/', +        'md5': '1b9318d7d5054e7dcb9dc7654f21d643', +        'info_dict': { +            'id': '6646981/951bc60f', +            'ext': 'mp4', +            'title': 'Ik heb nieuws voor je', +            'description': 'Niet schrikken hoor', +            'thumbnail': 're:^https?://.*\.jpg$', +        } +    } + +    def _real_extract(self, url): +        video_id = self._match_id(url) +        webpage = self._download_webpage(url, video_id) + +        files_base64 = self._search_regex( +            r'data-files="([^"]+)"', webpage, 'data files') + +        files = self._parse_json( +            base64.b64decode(files_base64.encode('utf-8')).decode('utf-8'), +            video_id) + +        quality = qualities(['flv', 'mobile', 'tablet', '720p']) + +        formats = [{ +            'url': video_url, +            'format_id': format_id, +            'quality': quality(format_id), +        } for format_id, video_url in files.items() if format_id != 'still'] +        self._sort_formats(formats) + +        title = self._html_search_meta( +            'title', webpage) or self._og_search_title(webpage) +        description = self._html_search_meta( +            'description', webpage) or self._og_search_description(webpage) +        thumbnail = files.get('still') or self._og_search_thumbnail(webpage) + +        return { +            'id': video_id, +            'title': title, +            'description': description, +            'thumbnail': thumbnail, +            'formats': formats +        } diff --git a/youtube_dl/extractor/eroprofile.py b/youtube_dl/extractor/eroprofile.py index 79e2fbd39..0cbca90b0 100644 --- a/youtube_dl/extractor/eroprofile.py +++ b/youtube_dl/extractor/eroprofile.py @@ -1,11 +1,17 @@  from __future__ import unicode_literals +import re +  from .common import InfoExtractor +from ..compat import compat_urllib_parse +from ..utils import ExtractorError  class EroProfileIE(InfoExtractor):      _VALID_URL = r'https?://(?:www\.)?eroprofile\.com/m/videos/view/(?P<id>[^/]+)' -    _TEST = { +    _LOGIN_URL = 'http://www.eroprofile.com/auth/auth.php?' +    _NETRC_MACHINE = 'eroprofile' +    _TESTS = [{          'url': 'http://www.eroprofile.com/m/videos/view/sexy-babe-softcore',          'md5': 'c26f351332edf23e1ea28ce9ec9de32f',          'info_dict': { @@ -16,13 +22,55 @@ class EroProfileIE(InfoExtractor):              'thumbnail': 're:https?://.*\.jpg',              'age_limit': 18,          } -    } +    }, { +        'url': 'http://www.eroprofile.com/m/videos/view/Try-It-On-Pee_cut_2-wmv-4shared-com-file-sharing-download-movie-file', +        'md5': '1baa9602ede46ce904c431f5418d8916', +        'info_dict': { +            'id': '1133519', +            'ext': 'm4v', +            'title': 'Try It On Pee_cut_2.wmv - 4shared.com - file sharing - download movie file', +            'thumbnail': 're:https?://.*\.jpg', +            'age_limit': 18, +        }, +        'skip': 'Requires login', +    }] + +    def _login(self): +        (username, password) = self._get_login_info() +        if username is None: +            return + +        query = compat_urllib_parse.urlencode({ +            'username': username, +            'password': password, +            'url': 'http://www.eroprofile.com/', +        }) +        login_url = self._LOGIN_URL + query +        login_page = self._download_webpage(login_url, None, False) + +        m = re.search(r'Your username or password was incorrect\.', login_page) +        if m: +            raise ExtractorError( +                'Wrong username and/or password.', expected=True) + +        self.report_login() +        redirect_url = self._search_regex( +            r'<script[^>]+?src="([^"]+)"', login_page, 'login redirect url') +        self._download_webpage(redirect_url, None, False) + +    def _real_initialize(self): +        self._login()      def _real_extract(self, url):          display_id = self._match_id(url)          webpage = self._download_webpage(url, display_id) +        m = re.search(r'You must be logged in to view this video\.', webpage) +        if m: +            raise ExtractorError( +                'This video requires login. Please specify a username and password and try again.', expected=True) +          video_id = self._search_regex(              [r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'],              webpage, 'video id', default=None) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 8a49b0b54..2ff002643 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -29,6 +29,7 @@ from ..utils import (      xpath_text,  )  from .brightcove import BrightcoveIE +from .nbc import NBCSportsVPlayerIE  from .ooyala import OoyalaIE  from .rutv import RUTVIE  from .smotri import SmotriIE @@ -620,6 +621,16 @@ class GenericIE(InfoExtractor):                  'age_limit': 0,              },          }, +        # 5min embed +        { +            'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/', +            'md5': '4c6f127a30736b59b3e2c19234ee2bf7', +            'info_dict': { +                'id': '518726732', +                'ext': 'mp4', +                'title': 'Facebook Creates "On This Day" | Crunch Report', +            }, +        },          # RSS feed with enclosure          {              'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml', @@ -629,6 +640,16 @@ class GenericIE(InfoExtractor):                  'upload_date': '20150228',                  'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',              } +        }, +        # NBC Sports vplayer embed +        { +            'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a', +            'info_dict': { +                'id': 'ln7x1qSThw4k', +                'ext': 'flv', +                'title': "PFT Live: New leader in the 'new-look' defense", +                'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e', +            },          }      ] @@ -1236,6 +1257,17 @@ class GenericIE(InfoExtractor):          if mobj is not None:              return self.url_result(mobj.group('url'), 'Pladform') +        # Look for 5min embeds +        mobj = re.search( +            r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage) +        if mobj is not None: +            return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin') + +        # Look for NBC Sports VPlayer embeds +        nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage) +        if nbc_sports_url: +            return self.url_result(nbc_sports_url, 'NBCSportsVPlayer') +          def check_video(vurl):              if YoutubeIE.suitable(vurl):                  return True diff --git a/youtube_dl/extractor/miomio.py b/youtube_dl/extractor/miomio.py new file mode 100644 index 000000000..cc3f27194 --- /dev/null +++ b/youtube_dl/extractor/miomio.py @@ -0,0 +1,93 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import random + +from .common import InfoExtractor +from ..utils import ( +    xpath_text, +    int_or_none, +) + + +class MioMioIE(InfoExtractor): +    IE_NAME = 'miomio.tv' +    _VALID_URL = r'https?://(?:www\.)?miomio\.tv/watch/cc(?P<id>[0-9]+)' +    _TESTS = [{ +        'url': 'http://www.miomio.tv/watch/cc179734/', +        'md5': '48de02137d0739c15b440a224ad364b9', +        'info_dict': { +            'id': '179734', +            'ext': 'flv', +            'title': '手绘动漫鬼泣但丁全程画法', +            'duration': 354, +        }, +    }, { +        'url': 'http://www.miomio.tv/watch/cc184024/', +        'info_dict': { +            'id': '43729', +            'title': '《动漫同人插画绘制》', +        }, +        'playlist_mincount': 86, +    }] + +    def _real_extract(self, url): +        video_id = self._match_id(url) +        webpage = self._download_webpage(url, video_id) + +        title = self._html_search_meta( +            'description', webpage, 'title', fatal=True) + +        mioplayer_path = self._search_regex( +            r'src="(/mioplayer/[^"]+)"', webpage, 'ref_path') + +        xml_config = self._search_regex( +            r'flashvars="type=sina&(.+?)&', +            webpage, 'xml config') + +        # skipping the following page causes lags and eventually connection drop-outs +        self._request_webpage( +            'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/xml.php?id=%s&r=%s' % (id, random.randint(100, 999)), +            video_id) + +        # the following xml contains the actual configuration information on the video file(s) +        vid_config = self._download_xml( +            'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/sina.php?{0}'.format(xml_config), +            video_id) + +        http_headers = { +            'Referer': 'http://www.miomio.tv%s' % mioplayer_path, +        } + +        entries = [] +        for f in vid_config.findall('./durl'): +            segment_url = xpath_text(f, 'url', 'video url') +            if not segment_url: +                continue +            order = xpath_text(f, 'order', 'order') +            segment_id = video_id +            segment_title = title +            if order: +                segment_id += '-%s' % order +                segment_title += ' part %s' % order +            entries.append({ +                'id': segment_id, +                'url': segment_url, +                'title': segment_title, +                'duration': int_or_none(xpath_text(f, 'length', 'duration'), 1000), +                'http_headers': http_headers, +            }) + +        if len(entries) == 1: +            segment = entries[0] +            segment['id'] = video_id +            segment['title'] = title +            return segment + +        return { +            '_type': 'multi_video', +            'id': video_id, +            'entries': entries, +            'title': title, +            'http_headers': http_headers, +        } diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 21aea0c55..84f291558 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -97,7 +97,7 @@ class MixcloudIE(InfoExtractor):              r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)          description = self._og_search_description(webpage)          like_count = str_to_int(self._search_regex( -            r'\bbutton-favorite\b.+m-ajax-toggle-count="([^"]+)"', +            r'\bbutton-favorite\b[^>]+m-ajax-toggle-count="([^"]+)"',              webpage, 'like count', fatal=False))          view_count = str_to_int(self._search_regex(              [r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"', diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 3645d3033..ecd0ac8b1 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -14,7 +14,7 @@ from ..utils import (  class NBCIE(InfoExtractor): -    _VALID_URL = r'http://www\.nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)' +    _VALID_URL = r'https?://www\.nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)'      _TESTS = [          { @@ -50,6 +50,57 @@ class NBCIE(InfoExtractor):          return self.url_result(theplatform_url) +class NBCSportsVPlayerIE(InfoExtractor): +    _VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)' + +    _TESTS = [{ +        'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_share/select/9CsDKds0kvHI', +        'info_dict': { +            'id': '9CsDKds0kvHI', +            'ext': 'flv', +            'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d', +            'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson', +        } +    }, { +        'url': 'http://vplayer.nbcsports.com/p/BxmELC/nbc_embedshare/select/_hqLjQ95yx8Z', +        'only_matching': True, +    }] + +    @staticmethod +    def _extract_url(webpage): +        iframe_m = re.search( +            r'<iframe[^>]+src="(?P<url>https?://vplayer\.nbcsports\.com/[^"]+)"', webpage) +        if iframe_m: +            return iframe_m.group('url') + +    def _real_extract(self, url): +        video_id = self._match_id(url) +        webpage = self._download_webpage(url, video_id) +        theplatform_url = self._og_search_video_url(webpage) +        return self.url_result(theplatform_url, 'ThePlatform') + + +class NBCSportsIE(InfoExtractor): +    # Does not include https becuase its certificate is invalid +    _VALID_URL = r'http://www\.nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)' + +    _TEST = { +        'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke', +        'info_dict': { +            'id': 'PHJSaFWbrTY9', +            'ext': 'flv', +            'title': 'Tom Izzo, Michigan St. has \'so much respect\' for Duke', +            'description': 'md5:ecb459c9d59e0766ac9c7d5d0eda8113', +        } +    } + +    def _real_extract(self, url): +        video_id = self._match_id(url) +        webpage = self._download_webpage(url, video_id) +        return self.url_result( +            NBCSportsVPlayerIE._extract_url(webpage), 'NBCSportsVPlayer') + +  class NBCNewsIE(InfoExtractor):      _VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/          (?:video/.+?/(?P<id>\d+)| diff --git a/youtube_dl/extractor/phoenix.py b/youtube_dl/extractor/phoenix.py index a20672c0c..46cebc0d7 100644 --- a/youtube_dl/extractor/phoenix.py +++ b/youtube_dl/extractor/phoenix.py @@ -5,19 +5,33 @@ from .zdf import extract_from_xml_url  class PhoenixIE(InfoExtractor): -    _VALID_URL = r'https?://(?:www\.)?phoenix\.de/content/(?P<id>[0-9]+)' -    _TEST = { -        'url': 'http://www.phoenix.de/content/884301', -        'md5': 'ed249f045256150c92e72dbb70eadec6', -        'info_dict': { -            'id': '884301', -            'ext': 'mp4', -            'title': 'Michael Krons mit Hans-Werner Sinn', -            'description': 'Im Dialog - Sa. 25.10.14, 00.00 - 00.35 Uhr', -            'upload_date': '20141025', -            'uploader': 'Im Dialog', -        } -    } +    _VALID_URL = r'''(?x)https?://(?:www\.)?phoenix\.de/content/ +        (?: +            phoenix/die_sendungen/(?:[^/]+/)? +        )? +        (?P<id>[0-9]+)''' +    _TESTS = [ +        { +            'url': 'http://www.phoenix.de/content/884301', +            'md5': 'ed249f045256150c92e72dbb70eadec6', +            'info_dict': { +                'id': '884301', +                'ext': 'mp4', +                'title': 'Michael Krons mit Hans-Werner Sinn', +                'description': 'Im Dialog - Sa. 25.10.14, 00.00 - 00.35 Uhr', +                'upload_date': '20141025', +                'uploader': 'Im Dialog', +            } +        }, +        { +            'url': 'http://www.phoenix.de/content/phoenix/die_sendungen/869815', +            'only_matching': True, +        }, +        { +            'url': 'http://www.phoenix.de/content/phoenix/die_sendungen/diskussionen/928234', +            'only_matching': True, +        }, +    ]      def _real_extract(self, url):          video_id = self._match_id(url) diff --git a/youtube_dl/extractor/playfm.py b/youtube_dl/extractor/playfm.py index 9576aed0e..e766ccca3 100644 --- a/youtube_dl/extractor/playfm.py +++ b/youtube_dl/extractor/playfm.py @@ -4,85 +4,72 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor -from ..compat import ( -    compat_urllib_parse, -    compat_urllib_request, -) +from ..compat import compat_str  from ..utils import (      ExtractorError, -    float_or_none,      int_or_none, -    str_to_int, +    parse_iso8601,  )  class PlayFMIE(InfoExtractor):      IE_NAME = 'play.fm' -    _VALID_URL = r'https?://(?:www\.)?play\.fm/[^?#]*(?P<upload_date>[0-9]{8})(?P<id>[0-9]{6})(?:$|[?#])' +    _VALID_URL = r'https?://(?:www\.)?play\.fm/(?P<slug>(?:[^/]+/)+(?P<id>[^/]+))/?(?:$|[?#])'      _TEST = { -        'url': 'http://www.play.fm/recording/leipzigelectronicmusicbatofarparis_fr20140712137220', +        'url': 'https://www.play.fm/dan-drastic/sven-tasnadi-leipzig-electronic-music-batofar-paris-fr-2014-07-12',          'md5': 'c505f8307825a245d0c7ad1850001f22',          'info_dict': { -            'id': '137220', +            'id': '71276',              'ext': 'mp3', -            'title': 'LEIPZIG ELECTRONIC MUSIC @ Batofar (Paris,FR) - 2014-07-12', -            'uploader': 'Sven Tasnadi', -            'uploader_id': 'sventasnadi', -            'duration': 5627.428, -            'upload_date': '20140712', +            'title': 'Sven Tasnadi - LEIPZIG ELECTRONIC MUSIC @ Batofar (Paris,FR) - 2014-07-12', +            'description': '', +            'duration': 5627, +            'timestamp': 1406033781, +            'upload_date': '20140722', +            'uploader': 'Dan Drastic', +            'uploader_id': '71170',              'view_count': int,              'comment_count': int, -            'thumbnail': 're:^https?://.*\.jpg$',          },      }      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url)          video_id = mobj.group('id') -        upload_date = mobj.group('upload_date') - -        rec_data = compat_urllib_parse.urlencode({'rec_id': video_id}) -        req = compat_urllib_request.Request( -            'http://www.play.fm/flexRead/recording', data=rec_data) -        req.add_header('Content-Type', 'application/x-www-form-urlencoded') -        rec_doc = self._download_xml(req, video_id) +        slug = mobj.group('slug') -        error_node = rec_doc.find('./error') -        if error_node is not None: -            raise ExtractorError('An error occured: %s (code %s)' % ( -                error_node.text, rec_doc.find('./status').text)) +        recordings = self._download_json( +            'http://v2api.play.fm/recordings/slug/%s' % slug, video_id) -        recording = rec_doc.find('./recording') -        title = recording.find('./title').text -        view_count = str_to_int(recording.find('./stats/playcount').text) -        comment_count = str_to_int(recording.find('./stats/comments').text) -        duration = float_or_none(recording.find('./duration').text, scale=1000) -        thumbnail = recording.find('./image').text +        error = recordings.get('error') +        if isinstance(error, dict): +            raise ExtractorError( +                '%s returned error: %s' % (self.IE_NAME, error.get('message')), +                expected=True) -        artist = recording.find('./artists/artist') -        uploader = artist.find('./name').text -        uploader_id = artist.find('./slug').text - -        video_url = '%s//%s/%s/%s/offset/0/sh/%s/rec/%s/jingle/%s/loc/%s' % ( -            'http:', recording.find('./url').text, -            recording.find('./_class').text, recording.find('./file_id').text, -            rec_doc.find('./uuid').text, video_id, -            rec_doc.find('./jingle/file_id').text, -            'http%3A%2F%2Fwww.play.fm%2Fplayer', -        ) +        audio_url = recordings['audio'] +        video_id = compat_str(recordings.get('id') or video_id) +        title = recordings['title'] +        description = recordings.get('description') +        duration = int_or_none(recordings.get('recordingDuration')) +        timestamp = parse_iso8601(recordings.get('created_at')) +        uploader = recordings.get('page', {}).get('title') +        uploader_id = compat_str(recordings.get('page', {}).get('id')) +        view_count = int_or_none(recordings.get('playCount')) +        comment_count = int_or_none(recordings.get('commentCount')) +        categories = [tag['name'] for tag in recordings.get('tags', []) if tag.get('name')]          return {              'id': video_id, -            'url': video_url, -            'ext': 'mp3', -            'filesize': int_or_none(recording.find('./size').text), +            'url': audio_url,              'title': title, -            'upload_date': upload_date, -            'view_count': view_count, -            'comment_count': comment_count, +            'description': description,              'duration': duration, -            'thumbnail': thumbnail, +            'timestamp': timestamp,              'uploader': uploader,              'uploader_id': uploader_id, +            'view_count': view_count, +            'comment_count': comment_count, +            'categories': categories,          } diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 3a27e3789..0c8b731cf 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -33,10 +33,8 @@ class PornHubIE(InfoExtractor):      }      def _extract_count(self, pattern, webpage, name): -        count = self._html_search_regex(pattern, webpage, '%s count' % name, fatal=False) -        if count: -            count = str_to_int(count) -        return count +        return str_to_int(self._search_regex( +            pattern, webpage, '%s count' % name, fatal=False))      def _real_extract(self, url):          video_id = self._match_id(url) @@ -62,11 +60,14 @@ class PornHubIE(InfoExtractor):          if thumbnail:              thumbnail = compat_urllib_parse.unquote(thumbnail) -        view_count = self._extract_count(r'<span class="count">([\d,\.]+)</span> views', webpage, 'view') -        like_count = self._extract_count(r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like') -        dislike_count = self._extract_count(r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike') +        view_count = self._extract_count( +            r'<span class="count">([\d,\.]+)</span> views', webpage, 'view') +        like_count = self._extract_count( +            r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like') +        dislike_count = self._extract_count( +            r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike')          comment_count = self._extract_count( -            r'All comments \(<var class="videoCommentCount">([\d,\.]+)</var>', webpage, 'comment') +            r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')          video_urls = list(map(compat_urllib_parse.unquote, re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))          if webpage.find('"encrypted":true') != -1: diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 385681d06..7cc799664 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -10,6 +10,7 @@ from ..compat import (  )  from ..utils import (      unified_strdate, +    int_or_none,  ) @@ -24,7 +25,7 @@ class ProSiebenSat1IE(InfoExtractor):              'info_dict': {                  'id': '2104602',                  'ext': 'mp4', -                'title': 'Staffel 2, Episode 18 - Jahresrückblick', +                'title': 'Episode 18 - Staffel 2',                  'description': 'md5:8733c81b702ea472e069bc48bb658fc1',                  'upload_date': '20131231',                  'duration': 5845.04, @@ -266,6 +267,9 @@ class ProSiebenSat1IE(InfoExtractor):              urls_sources = urls_sources.values()          def fix_bitrate(bitrate): +            bitrate = int_or_none(bitrate) +            if not bitrate: +                return None              return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate          for source in urls_sources: diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py new file mode 100644 index 000000000..10251f29e --- /dev/null +++ b/youtube_dl/extractor/safari.py @@ -0,0 +1,157 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from .brightcove import BrightcoveIE + +from ..compat import ( +    compat_urllib_parse, +    compat_urllib_request, +) +from ..utils import ( +    ExtractorError, +    smuggle_url, +    std_headers, +) + + +class SafariBaseIE(InfoExtractor): +    _LOGIN_URL = 'https://www.safaribooksonline.com/accounts/login/' +    _SUCCESSFUL_LOGIN_REGEX = r'<a href="/accounts/logout/"[^>]*>Sign Out</a>' +    _ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to supply credentials for safaribooksonline.com' +    _NETRC_MACHINE = 'safari' + +    _API_BASE = 'https://www.safaribooksonline.com/api/v1/book' +    _API_FORMAT = 'json' + +    LOGGED_IN = False + +    def _real_initialize(self): +        # We only need to log in once for courses or individual videos +        if not self.LOGGED_IN: +            self._login() +            SafariBaseIE.LOGGED_IN = True + +    def _login(self): +        (username, password) = self._get_login_info() +        if username is None: +            raise ExtractorError( +                self._ACCOUNT_CREDENTIALS_HINT, +                expected=True) + +        headers = std_headers +        if 'Referer' not in headers: +            headers['Referer'] = self._LOGIN_URL + +        login_page = self._download_webpage( +            self._LOGIN_URL, None, +            'Downloading login form') + +        csrf = self._html_search_regex( +            r"name='csrfmiddlewaretoken'\s+value='([^']+)'", +            login_page, 'csrf token') + +        login_form = { +            'csrfmiddlewaretoken': csrf, +            'email': username, +            'password1': password, +            'login': 'Sign In', +            'next': '', +        } + +        request = compat_urllib_request.Request( +            self._LOGIN_URL, compat_urllib_parse.urlencode(login_form), headers=headers) +        login_page = self._download_webpage( +            request, None, 'Logging in as %s' % username) + +        if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None: +            raise ExtractorError( +                'Login failed; make sure your credentials are correct and try again.', +                expected=True) + +        self.to_screen('Login successful') + + +class SafariIE(SafariBaseIE): +    IE_NAME = 'safari' +    IE_DESC = 'safaribooksonline.com online video' +    _VALID_URL = r'''(?x)https?:// +                            (?:www\.)?safaribooksonline\.com/ +                                (?: +                                    library/view/[^/]+| +                                    api/v1/book +                                )/ +                                (?P<course_id>\d+)/ +                                    (?:chapter(?:-content)?/)? +                                (?P<part>part\d+)\.html +    ''' + +    _TESTS = [{ +        'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html', +        'md5': '5b0c4cc1b3c1ba15dda7344085aa5592', +        'info_dict': { +            'id': '2842601850001', +            'ext': 'mp4', +            'title': 'Introduction', +        }, +        'skip': 'Requires safaribooksonline account credentials', +    }, { +        'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html', +        'only_matching': True, +    }] + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        course_id = mobj.group('course_id') +        part = mobj.group('part') + +        webpage = self._download_webpage( +            '%s/%s/chapter-content/%s.html' % (self._API_BASE, course_id, part), +            part) + +        bc_url = BrightcoveIE._extract_brightcove_url(webpage) +        if not bc_url: +            raise ExtractorError('Could not extract Brightcove URL from %s' % url, expected=True) + +        return self.url_result(smuggle_url(bc_url, {'Referer': url}), 'Brightcove') + + +class SafariCourseIE(SafariBaseIE): +    IE_NAME = 'safari:course' +    IE_DESC = 'safaribooksonline.com online courses' + +    _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/(?:library/view/[^/]+|api/v1/book)/(?P<id>\d+)/?(?:[#?]|$)' + +    _TESTS = [{ +        'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/', +        'info_dict': { +            'id': '9780133392838', +            'title': 'Hadoop Fundamentals LiveLessons', +        }, +        'playlist_count': 22, +        'skip': 'Requires safaribooksonline account credentials', +    }, { +        'url': 'https://www.safaribooksonline.com/api/v1/book/9781449396459/?override_format=json', +        'only_matching': True, +    }] + +    def _real_extract(self, url): +        course_id = self._match_id(url) + +        course_json = self._download_json( +            '%s/%s/?override_format=%s' % (self._API_BASE, course_id, self._API_FORMAT), +            course_id, 'Downloading course JSON') + +        if 'chapters' not in course_json: +            raise ExtractorError( +                'No chapters found for course %s' % course_id, expected=True) + +        entries = [ +            self.url_result(chapter, 'Safari') +            for chapter in course_json['chapters']] + +        course_title = course_json['title'] + +        return self.playlist_result(entries, course_id, course_title) diff --git a/youtube_dl/extractor/slideshare.py b/youtube_dl/extractor/slideshare.py index 9f79ff5c1..0b717a1e4 100644 --- a/youtube_dl/extractor/slideshare.py +++ b/youtube_dl/extractor/slideshare.py @@ -30,7 +30,7 @@ class SlideshareIE(InfoExtractor):          page_title = mobj.group('title')          webpage = self._download_webpage(url, page_title)          slideshare_obj = self._search_regex( -            r'var\s+slideshare_object\s*=\s*({.*?});\s*var\s+user_info\s*=', +            r'\$\.extend\(slideshare_object,\s*(\{.*?\})\);',              webpage, 'slideshare object')          info = json.loads(slideshare_obj)          if info['slideshow']['type'] != 'video': diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 9d4505972..316b2c90f 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -242,7 +242,7 @@ class SoundcloudIE(InfoExtractor):  class SoundcloudSetIE(SoundcloudIE): -    _VALID_URL = r'https?://(?:www\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?' +    _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?'      IE_NAME = 'soundcloud:set'      _TESTS = [{          'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep', @@ -287,7 +287,7 @@ class SoundcloudSetIE(SoundcloudIE):  class SoundcloudUserIE(SoundcloudIE): -    _VALID_URL = r'https?://(www\.)?soundcloud\.com/(?P<user>[^/]+)/?((?P<rsrc>tracks|likes)/?)?(\?.*)?$' +    _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<user>[^/]+)/?((?P<rsrc>tracks|likes)/?)?(\?.*)?$'      IE_NAME = 'soundcloud:user'      _TESTS = [{          'url': 'https://soundcloud.com/the-concept-band', diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py index 7cb06f351..a46a7ecba 100644 --- a/youtube_dl/extractor/teamcoco.py +++ b/youtube_dl/extractor/teamcoco.py @@ -54,7 +54,7 @@ class TeamcocoIE(InfoExtractor):              embed_url, video_id, 'Downloading embed page')          player_data = self._parse_json(self._search_regex( -            r'Y\.Ginger\.Module\.Player\((\{.*?\})\);', embed, 'player data'), video_id) +            r'Y\.Ginger\.Module\.Player(?:;var\s*player\s*=\s*new\s*m)?\((\{.*?\})\);', embed, 'player data'), video_id)          data = self._parse_json(              base64.b64decode(player_data['preload'].encode('ascii')).decode('utf-8'), video_id) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index feac666f7..0e3e627f4 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -92,7 +92,7 @@ class ThePlatformIE(InfoExtractor):              error_msg = next(                  n.attrib['abstract']                  for n in meta.findall(_x('.//smil:ref')) -                if n.attrib.get('title') == 'Geographic Restriction') +                if n.attrib.get('title') == 'Geographic Restriction' or n.attrib.get('title') == 'Expired')          except StopIteration:              pass          else: diff --git a/youtube_dl/extractor/ultimedia.py b/youtube_dl/extractor/ultimedia.py index 06554a1be..96c809eaf 100644 --- a/youtube_dl/extractor/ultimedia.py +++ b/youtube_dl/extractor/ultimedia.py @@ -42,7 +42,6 @@ class UltimediaIE(InfoExtractor):      def _real_extract(self, url):          video_id = self._match_id(url) -          webpage = self._download_webpage(url, video_id)          deliver_url = self._search_regex( @@ -81,8 +80,8 @@ class UltimediaIE(InfoExtractor):          title = clean_html((              self._html_search_regex(                  r'(?s)<div\s+id="catArticle">.+?</div>(.+?)</h1>', -                webpage, 'title', default=None) -            or self._search_regex( +                webpage, 'title', default=None) or +            self._search_regex(                  r"var\s+nameVideo\s*=\s*'([^']+)'",                  deliver_page, 'title'))) diff --git a/youtube_dl/extractor/varzesh3.py b/youtube_dl/extractor/varzesh3.py index eb49586cc..9369abaf8 100644 --- a/youtube_dl/extractor/varzesh3.py +++ b/youtube_dl/extractor/varzesh3.py @@ -1,48 +1,45 @@  # coding: utf-8  from __future__ import unicode_literals +  from .common import InfoExtractor -from ..utils import ( -    ExtractorError, -) -import re  class Varzesh3IE(InfoExtractor): -    _VALID_URL = r'(?P<url>(https?://(?:www\.)?video\.varzesh3\.com)/(?P<id>.+))' -    _TEST ={ +    _VALID_URL = r'https?://(?:www\.)?video\.varzesh3\.com/(?:[^/]+/)+(?P<id>[^/]+)/?' +    _TEST = {          'url': 'http://video.varzesh3.com/germany/bundesliga/5-%D9%88%D8%A7%DA%A9%D9%86%D8%B4-%D8%A8%D8%B1%D8%AA%D8%B1-%D8%AF%D8%B1%D9%88%D8%A7%D8%B2%D9%87%E2%80%8C%D8%A8%D8%A7%D9%86%D8%A7%D9%86%D8%9B%D9%87%D9%81%D8%AA%D9%87-26-%D8%A8%D9%88%D9%86%D8%AF%D8%B3/',          'md5': '2a933874cb7dce4366075281eb49e855',          'info_dict': { -            'url': 'http://dl1.video.varzesh3.com/video/clip94/1/video/namayeshi/saves_week26.mp4',              'id': '76337',              'ext': 'mp4',              'title': '۵ واکنش برتر دروازهبانان؛هفته ۲۶ بوندسلیگا', -            'thumbnail': 'http://video.varzesh3.com/wp-content/uploads/230315_saves_week26.jpg',              'description': 'فصل ۲۰۱۵-۲۰۱۴', +            'thumbnail': 're:^https?://.*\.jpg$',          }      }      def _real_extract(self, url): -        video_id = self._match_id(url) -        webpage = self._download_webpage(url, video_id) +        display_id = self._match_id(url) + +        webpage = self._download_webpage(url, display_id) + +        video_url = self._search_regex( +            r'<source[^>]+src="([^"]+)"', webpage, 'video url') -        if not 'shortlink' in webpage: -            raise ExtractorError('URL has no videos or there is a problem.') +        title = self._og_search_title(webpage) +        description = self._html_search_regex( +            r'(?s)<div class="matn">(.+?)</div>', +            webpage, 'description', fatal=False) +        thumbnail = self._og_search_thumbnail(webpage) -        title = self._html_search_regex(r'meta[^>]+property="og:title"[^>]+content="([^"]+)"', webpage, 'title') -        video_link = self._html_search_regex(r'source[^>]+src="([^"]+)"', webpage, 'video_link') -        vid_id = self._html_search_regex(r"link[^>]+rel='canonical'[^>]+href='\/\?p=([^']+)'\/>", webpage, 'vid_id') -        try: -            description = self._html_search_regex(r'<div class="matn">(.*?)</div>', webpage, 'description', flags=re.DOTALL) -        except: -            description = title -        thumbnail = self._html_search_regex(r'link[^>]+rel="image_src"[^>]+href="([^"]+)"', webpage, 'thumbnail') +        video_id = self._search_regex( +            r"<link[^>]+rel='(?:canonical|shortlink)'[^>]+href='/\?p=([^']+)'", +            webpage, display_id, default=display_id)          return { -            'url': video_link, -            'id': vid_id, +            'url': video_url, +            'id': video_id,              'title': title, -            'ext': video_link.split(".")[-1],              'description': description,              'thumbnail': thumbnail,          } diff --git a/youtube_dl/extractor/vessel.py b/youtube_dl/extractor/vessel.py new file mode 100644 index 000000000..6215f0642 --- /dev/null +++ b/youtube_dl/extractor/vessel.py @@ -0,0 +1,127 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json + +from .common import InfoExtractor +from ..compat import compat_urllib_request +from ..utils import ( +    ExtractorError, +    parse_iso8601, +) + + +class VesselIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?vessel\.com/videos/(?P<id>[0-9a-zA-Z]+)' +    _API_URL_TEMPLATE = 'https://www.vessel.com/api/view/items/%s' +    _LOGIN_URL = 'https://www.vessel.com/api/account/login' +    _NETRC_MACHINE = 'vessel' +    _TEST = { +        'url': 'https://www.vessel.com/videos/HDN7G5UMs', +        'md5': '455cdf8beb71c6dd797fd2f3818d05c4', +        'info_dict': { +            'id': 'HDN7G5UMs', +            'ext': 'mp4', +            'title': 'Nvidia GeForce GTX Titan X - The Best Video Card on the Market?', +            'thumbnail': 're:^https?://.*\.jpg$', +            'upload_date': '20150317', +            'description': 'Did Nvidia pull out all the stops on the Titan X, or does its performance leave something to be desired?', +            'timestamp': int, +        }, +    } + +    @staticmethod +    def make_json_request(url, data): +        payload = json.dumps(data).encode('utf-8') +        req = compat_urllib_request.Request(url, payload) +        req.add_header('Content-Type', 'application/json; charset=utf-8') +        return req + +    @staticmethod +    def find_assets(data, asset_type): +        for asset in data.get('assets', []): +            if asset.get('type') == asset_type: +                yield asset + +    def _check_access_rights(self, data): +        access_info = data.get('__view', {}) +        if not access_info.get('allow_access', True): +            err_code = access_info.get('error_code') or '' +            if err_code == 'ITEM_PAID_ONLY': +                raise ExtractorError( +                    'This video requires subscription.', expected=True) +            else: +                raise ExtractorError( +                    'Access to this content is restricted. (%s said: %s)' % (self.IE_NAME, err_code), expected=True) + +    def _login(self): +        (username, password) = self._get_login_info() +        if username is None: +            return +        self.report_login() +        data = { +            'client_id': 'web', +            'type': 'password', +            'user_key': username, +            'password': password, +        } +        login_request = VesselIE.make_json_request(self._LOGIN_URL, data) +        self._download_webpage(login_request, None, False, 'Wrong login info') + +    def _real_initialize(self): +        self._login() + +    def _real_extract(self, url): +        video_id = self._match_id(url) + +        webpage = self._download_webpage(url, video_id) +        data = self._parse_json(self._search_regex( +            r'App\.bootstrapData\((.*?)\);', webpage, 'data'), video_id) +        asset_id = data['model']['data']['id'] + +        req = VesselIE.make_json_request( +            self._API_URL_TEMPLATE % asset_id, {'client': 'web'}) +        data = self._download_json(req, video_id) + +        self._check_access_rights(data) + +        try: +            video_asset = next(VesselIE.find_assets(data, 'video')) +        except StopIteration: +            raise ExtractorError('No video assets found') + +        formats = [] +        for f in video_asset.get('sources', []): +            if f['name'] == 'hls-index': +                formats.extend(self._extract_m3u8_formats( +                    f['location'], video_id, ext='mp4', m3u8_id='m3u8')) +            else: +                formats.append({ +                    'format_id': f['name'], +                    'tbr': f.get('bitrate'), +                    'height': f.get('height'), +                    'width': f.get('width'), +                    'url': f['location'], +                }) +        self._sort_formats(formats) + +        thumbnails = [] +        for im_asset in VesselIE.find_assets(data, 'image'): +            thumbnails.append({ +                'url': im_asset['location'], +                'width': im_asset.get('width', 0), +                'height': im_asset.get('height', 0), +            }) + +        return { +            'id': video_id, +            'title': data['title'], +            'formats': formats, +            'thumbnails': thumbnails, +            'description': data.get('short_description'), +            'duration': data.get('duration'), +            'comment_count': data.get('comment_count'), +            'like_count': data.get('like_count'), +            'view_count': data.get('view_count'), +            'timestamp': parse_iso8601(data.get('released_at')), +        } diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index bd09652cd..28bcc89cd 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -244,6 +244,16 @@ class VimeoIE(VimeoBaseInfoExtractor):          # and latter we extract those that are Vimeo specific.          self.report_extraction(video_id) +        vimeo_config = self._search_regex( +            r'vimeo\.config\s*=\s*({.+?});', webpage, +            'vimeo config', default=None) +        if vimeo_config: +            seed_status = self._parse_json(vimeo_config, video_id).get('seed_status', {}) +            if seed_status.get('state') == 'failed': +                raise ExtractorError( +                    '%s returned error: %s' % (self.IE_NAME, seed_status['title']), +                    expected=True) +          # Extract the config JSON          try:              try: diff --git a/youtube_dl/extractor/xuite.py b/youtube_dl/extractor/xuite.py index 4971965f9..81d885fdc 100644 --- a/youtube_dl/extractor/xuite.py +++ b/youtube_dl/extractor/xuite.py @@ -69,18 +69,26 @@ class XuiteIE(InfoExtractor):          'only_matching': True,      }] +    @staticmethod +    def base64_decode_utf8(data): +        return base64.b64decode(data.encode('utf-8')).decode('utf-8') + +    @staticmethod +    def base64_encode_utf8(data): +        return base64.b64encode(data.encode('utf-8')).decode('utf-8') +      def _extract_flv_config(self, media_id): -        base64_media_id = base64.b64encode(media_id.encode('utf-8')).decode('utf-8') +        base64_media_id = self.base64_encode_utf8(media_id)          flv_config = self._download_xml(              'http://vlog.xuite.net/flash/player?media=%s' % base64_media_id,              'flv config')          prop_dict = {}          for prop in flv_config.findall('./property'): -            prop_id = base64.b64decode(prop.attrib['id']).decode('utf-8') +            prop_id = self.base64_decode_utf8(prop.attrib['id'])              # CDATA may be empty in flv config              if not prop.text:                  continue -            encoded_content = base64.b64decode(prop.text).decode('utf-8') +            encoded_content = self.base64_decode_utf8(prop.text)              prop_dict[prop_id] = compat_urllib_parse_unquote(encoded_content)          return prop_dict diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index 97dbac4cc..b777159c5 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -17,6 +17,8 @@ from ..utils import (      int_or_none,  ) +from .nbc import NBCSportsVPlayerIE +  class YahooIE(InfoExtractor):      IE_DESC = 'Yahoo screen and movies' @@ -129,6 +131,15 @@ class YahooIE(InfoExtractor):          }, {              'url': 'https://gma.yahoo.com/pizza-delivery-man-surprised-huge-tip-college-kids-195200785.html',              'only_matching': True, +        }, { +            'note': 'NBC Sports embeds', +            'url': 'http://sports.yahoo.com/blogs/ncaab-the-dagger/tyler-kalinoski-s-buzzer-beater-caps-davidson-s-comeback-win-185609842.html?guid=nbc_cbk_davidsonbuzzerbeater_150313', +            'info_dict': { +                'id': '9CsDKds0kvHI', +                'ext': 'flv', +                'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d', +                'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson', +            }          }      ] @@ -151,6 +162,10 @@ class YahooIE(InfoExtractor):                  items = json.loads(items_json)                  video_id = items[0]['id']                  return self._get_info(video_id, display_id, webpage) +        # Look for NBCSports iframes +        nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage) +        if nbc_sports_url: +            return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')          items_json = self._search_regex(              r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE, diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index e4c855ee0..6abe72f73 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -52,7 +52,7 @@ class YouPornIE(InfoExtractor):              webpage, 'JSON parameters')          try:              params = json.loads(json_params) -        except: +        except ValueError:              raise ExtractorError('Invalid JSON')          self.report_extraction(video_id) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 27c8c4453..5488101e1 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1263,27 +1263,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):          return self.playlist_result(url_results, playlist_id, title) -    def _real_extract(self, url): -        # Extract playlist id -        mobj = re.match(self._VALID_URL, url) -        if mobj is None: -            raise ExtractorError('Invalid URL: %s' % url) -        playlist_id = mobj.group(1) or mobj.group(2) - -        # Check if it's a video-specific URL -        query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) -        if 'v' in query_dict: -            video_id = query_dict['v'][0] -            if self._downloader.params.get('noplaylist'): -                self.to_screen('Downloading just video %s because of --no-playlist' % video_id) -                return self.url_result(video_id, 'Youtube', video_id=video_id) -            else: -                self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id)) - -        if playlist_id.startswith('RD') or playlist_id.startswith('UL'): -            # Mixes require a custom extraction process -            return self._extract_mix(playlist_id) - +    def _extract_playlist(self, playlist_id):          url = self._TEMPLATE_URL % playlist_id          page = self._download_webpage(url, playlist_id)          more_widget_html = content_html = page @@ -1327,6 +1307,29 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):          url_results = self._ids_to_results(ids)          return self.playlist_result(url_results, playlist_id, playlist_title) +    def _real_extract(self, url): +        # Extract playlist id +        mobj = re.match(self._VALID_URL, url) +        if mobj is None: +            raise ExtractorError('Invalid URL: %s' % url) +        playlist_id = mobj.group(1) or mobj.group(2) + +        # Check if it's a video-specific URL +        query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) +        if 'v' in query_dict: +            video_id = query_dict['v'][0] +            if self._downloader.params.get('noplaylist'): +                self.to_screen('Downloading just video %s because of --no-playlist' % video_id) +                return self.url_result(video_id, 'Youtube', video_id=video_id) +            else: +                self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id)) + +        if playlist_id.startswith('RD') or playlist_id.startswith('UL'): +            # Mixes require a custom extraction process +            return self._extract_mix(playlist_id) + +        return self._extract_playlist(playlist_id) +  class YoutubeChannelIE(InfoExtractor):      IE_DESC = 'YouTube.com channels' @@ -1643,21 +1646,26 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):  class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor): +    IE_NAME = 'youtube:recommended'      IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'      _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'      _FEED_NAME = 'recommended'      _PLAYLIST_TITLE = 'Youtube Recommended videos' -class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor): +class YoutubeWatchLaterIE(YoutubePlaylistIE): +    IE_NAME = 'youtube:watchlater'      IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)' -    _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater' -    _FEED_NAME = 'watch_later' -    _PLAYLIST_TITLE = 'Youtube Watch Later' -    _PERSONAL_FEED = True +    _VALID_URL = r'https?://www\.youtube\.com/(?:feed/watch_later|playlist\?list=WL)|:ytwatchlater' + +    _TESTS = []  # override PlaylistIE tests + +    def _real_extract(self, url): +        return self._extract_playlist('WL')  class YoutubeHistoryIE(YoutubeFeedsInfoExtractor): +    IE_NAME = 'youtube:history'      IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'      _VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory'      _FEED_NAME = 'history' diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 35c7e5fb3..8e80e3759 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -13,6 +13,7 @@ from .compat import (      compat_kwargs,  )  from .utils import ( +    preferredencoding,      write_string,  )  from .version import __version__ @@ -797,7 +798,7 @@ def parseOpts(overrideArguments=None):          # Workaround for Python 2.x, where argv is a byte list          if sys.version_info < (3,):              command_line_conf = [ -                a.decode('utf-8', 'replace') for a in command_line_conf] +                a.decode(preferredencoding(), 'replace') for a in command_line_conf]          if '--ignore-config' in command_line_conf:              system_conf = [] diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index b6f51cfd5..0b60ac7e7 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -3,7 +3,6 @@ from __future__ import unicode_literals  import io  import os  import subprocess -import sys  import time @@ -118,6 +117,10 @@ class FFmpegPostProcessor(PostProcessor):          return self._paths[self.basename]      @property +    def probe_available(self): +        return self.probe_basename is not None + +    @property      def probe_executable(self):          return self._paths[self.probe_basename] @@ -169,7 +172,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):      def get_audio_codec(self, path): -        if not self.probe_executable: +        if not self.probe_available:              raise PostProcessingError('ffprobe or avprobe not found. Please install one.')          try:              cmd = [ @@ -269,19 +272,17 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):              else:                  self._downloader.to_screen('[' + self.basename + '] Destination: ' + new_path)                  self.run_ffmpeg(path, new_path, acodec, more_opts) -        except: -            etype, e, tb = sys.exc_info() -            if isinstance(e, AudioConversionError): -                msg = 'audio conversion failed: ' + e.msg -            else: -                msg = 'error running ' + self.basename -            raise PostProcessingError(msg) +        except AudioConversionError as e: +            raise PostProcessingError( +                'audio conversion failed: ' + e.msg) +        except Exception: +            raise PostProcessingError('error running ' + self.basename)          # Try to update the date time for extracted audio file.          if information.get('filetime') is not None:              try:                  os.utime(encodeFilename(new_path), (time.time(), information['filetime'])) -            except: +            except Exception:                  self._downloader.report_warning('Cannot update utime of audio file')          information['filepath'] = new_path diff --git a/youtube_dl/update.py b/youtube_dl/update.py index d8be4049f..de3169eef 100644 --- a/youtube_dl/update.py +++ b/youtube_dl/update.py @@ -65,7 +65,7 @@ def update_self(to_screen, verbose):      # Check if there is a new version      try:          newversion = opener.open(VERSION_URL).read().decode('utf-8').strip() -    except: +    except Exception:          if verbose:              to_screen(compat_str(traceback.format_exc()))          to_screen('ERROR: can\'t find the current version. Please try again later.') @@ -78,7 +78,7 @@ def update_self(to_screen, verbose):      try:          versions_info = opener.open(JSON_URL).read().decode('utf-8')          versions_info = json.loads(versions_info) -    except: +    except Exception:          if verbose:              to_screen(compat_str(traceback.format_exc()))          to_screen('ERROR: can\'t obtain versions info. Please try again later.') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 472d4df41..90e0ed9ab 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -75,7 +75,7 @@ def preferredencoding():      try:          pref = locale.getpreferredencoding()          'TEST'.encode(pref) -    except: +    except Exception:          pref = 'UTF-8'      return pref @@ -127,7 +127,7 @@ def write_json_file(obj, fn):              except OSError:                  pass          os.rename(tf.name, fn) -    except: +    except Exception:          try:              os.remove(tf.name)          except OSError: @@ -348,7 +348,7 @@ def _htmlentity_transform(entity):      if entity in compat_html_entities.name2codepoint:          return compat_chr(compat_html_entities.name2codepoint[entity]) -    mobj = re.match(r'#(x?[0-9]+)', entity) +    mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)      if mobj is not None:          numstr = mobj.group(1)          if numstr.startswith('x'): diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 039ceadf2..e1c385bec 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@  from __future__ import unicode_literals -__version__ = '2015.03.24' +__version__ = '2015.04.03'  | 
