diff options
Diffstat (limited to 'youtube_dl')
75 files changed, 2188 insertions, 839 deletions
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 9a8c7da05..50425b8d7 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -47,7 +47,9 @@ from .utils import (      DEFAULT_OUTTMPL,      determine_ext,      DownloadError, +    encode_compat_str,      encodeFilename, +    error_to_compat_str,      ExtractorError,      format_bytes,      formatSeconds, @@ -495,7 +497,7 @@ class YoutubeDL(object):                      tb = ''                      if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:                          tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info)) -                    tb += compat_str(traceback.format_exc()) +                    tb += encode_compat_str(traceback.format_exc())                  else:                      tb_data = traceback.format_list(traceback.extract_stack())                      tb = ''.join(tb_data) @@ -674,14 +676,14 @@ class YoutubeDL(object):                      return self.process_ie_result(ie_result, download, extra_info)                  else:                      return ie_result -            except ExtractorError as de:  # An error we somewhat expected -                self.report_error(compat_str(de), de.format_traceback()) +            except ExtractorError as e:  # An error we somewhat expected +                self.report_error(compat_str(e), e.format_traceback())                  break              except MaxDownloadsReached:                  raise              except Exception as e:                  if self.params.get('ignoreerrors', False): -                    self.report_error(compat_str(e), tb=compat_str(traceback.format_exc())) +                    self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))                      break                  else:                      raise @@ -1110,6 +1112,12 @@ class YoutubeDL(object):                                            'contain the video, try using '                                            '"-f %s+%s"' % (format_2, format_1))                          return +                    # Formats must be opposite (video+audio) +                    if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none': +                        self.report_error( +                            'Both formats %s and %s are video-only, you must specify "-f video+audio"' +                            % (format_1, format_2)) +                        return                      output_ext = (                          formats_info[0]['ext']                          if self.params.get('merge_output_format') is None @@ -1453,7 +1461,7 @@ class YoutubeDL(object):              if dn and not os.path.exists(dn):                  os.makedirs(dn)          except (OSError, IOError) as err: -            self.report_error('unable to create directory ' + compat_str(err)) +            self.report_error('unable to create directory ' + error_to_compat_str(err))              return          if self.params.get('writedescription', False): @@ -1504,7 +1512,7 @@ class YoutubeDL(object):                              sub_info['url'], info_dict['id'], note=False)                      except ExtractorError as err:                          self.report_warning('Unable to download subtitle for "%s": %s' % -                                            (sub_lang, compat_str(err.cause))) +                                            (sub_lang, error_to_compat_str(err.cause)))                          continue                  try:                      sub_filename = subtitles_filename(filename, sub_lang, sub_format) @@ -2033,4 +2041,4 @@ class YoutubeDL(object):                                     (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))                  except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:                      self.report_warning('Unable to download thumbnail "%s": %s' % -                                        (t['url'], compat_str(err))) +                                        (t['url'], error_to_compat_str(err))) diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index b8bf8daf8..beae8c4d0 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -5,9 +5,9 @@ import re  import sys  import time -from ..compat import compat_str  from ..utils import (      encodeFilename, +    error_to_compat_str,      decodeArgument,      format_bytes,      timeconvert, @@ -186,7 +186,7 @@ class FileDownloader(object):                  return              os.rename(encodeFilename(old_filename), encodeFilename(new_filename))          except (IOError, OSError) as err: -            self.report_error('unable to rename file: %s' % compat_str(err)) +            self.report_error('unable to rename file: %s' % error_to_compat_str(err))      def try_utime(self, filename, last_modified_hdr):          """Try to set the last-modified time of the given file.""" diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index 6170cc155..aaf0c49c8 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -15,6 +15,7 @@ from ..compat import (  )  from ..utils import (      encodeFilename, +    fix_xml_ampersands,      sanitize_open,      struct_pack,      struct_unpack, @@ -288,7 +289,10 @@ class F4mFD(FragmentFD):          self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME)          urlh = self.ydl.urlopen(man_url)          man_url = urlh.geturl() -        manifest = urlh.read() +        # Some manifests may be malformed, e.g. prosiebensat1 generated manifests +        # (see https://github.com/rg3/youtube-dl/issues/6215#issuecomment-121704244 +        # and https://github.com/rg3/youtube-dl/issues/7823) +        manifest = fix_xml_ampersands(urlh.read().decode('utf-8', 'ignore')).strip()          doc = compat_etree_fromstring(manifest)          formats = [(int(f.attrib.get('bitrate', -1)), f) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 5b842204f..add1df023 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -3,9 +3,15 @@ from __future__ import unicode_literals  from .abc import ABCIE  from .abc7news import Abc7NewsIE  from .academicearth import AcademicEarthCourseIE +from .acast import ( +    ACastIE, +    ACastChannelIE, +)  from .addanime import AddAnimeIE  from .adobetv import (      AdobeTVIE, +    AdobeTVShowIE, +    AdobeTVChannelIE,      AdobeTVVideoIE,  )  from .adultswim import AdultSwimIE @@ -71,7 +77,6 @@ from .camdemy import (      CamdemyIE,      CamdemyFolderIE  ) -from .canal13cl import Canal13clIE  from .canalplus import CanalplusIE  from .canalc2 import Canalc2IE  from .cbs import CBSIE @@ -199,7 +204,9 @@ from .francetv import (  from .freesound import FreesoundIE  from .freespeech import FreespeechIE  from .freevideo import FreeVideoIE +from .funimation import FunimationIE  from .funnyordie import FunnyOrDieIE +from .gameinformer import GameInformerIE  from .gamekings import GamekingsIE  from .gameone import (      GameOneIE, @@ -223,9 +230,11 @@ from .globo import (  from .godtube import GodTubeIE  from .goldenmoustache import GoldenMoustacheIE  from .golem import GolemIE +from .googledrive import GoogleDriveIE  from .googleplus import GooglePlusIE  from .googlesearch import GoogleSearchIE  from .goshgay import GoshgayIE +from .gputechconf import GPUTechConfIE  from .groupon import GrouponIE  from .hark import HarkIE  from .hearthisat import HearThisAtIE @@ -351,7 +360,6 @@ from .motherless import MotherlessIE  from .motorsport import MotorsportIE  from .movieclips import MovieClipsIE  from .moviezine import MoviezineIE -from .movshare import MovShareIE  from .mtv import (      MTVIE,      MTVServicesEmbeddedIE, @@ -417,7 +425,13 @@ from .noco import NocoIE  from .normalboots import NormalbootsIE  from .nosvideo import NosVideoIE  from .nova import NovaIE -from .novamov import NovaMovIE +from .novamov import ( +    NovaMovIE, +    WholeCloudIE, +    NowVideoIE, +    VideoWeedIE, +    CloudTimeIE, +)  from .nowness import (      NownessIE,      NownessPlaylistIE, @@ -427,7 +441,6 @@ from .nowtv import (      NowTVIE,      NowTVListIE,  ) -from .nowvideo import NowVideoIE  from .npo import (      NPOIE,      NPOLiveIE, @@ -636,6 +649,7 @@ from .teachingchannel import TeachingChannelIE  from .teamcoco import TeamcocoIE  from .techtalks import TechTalksIE  from .ted import TEDIE +from .tele13 import Tele13IE  from .telebruxelles import TeleBruxellesIE  from .telecinco import TelecincoIE  from .telegraaf import TelegraafIE @@ -664,6 +678,7 @@ from .tnaflix import (      EMPFlixIE,      MovieFapIE,  ) +from .toggle import ToggleIE  from .thvideo import (      THVideoIE,      THVideoPlaylistIE @@ -738,7 +753,6 @@ from .videofyme import VideofyMeIE  from .videomega import VideoMegaIE  from .videopremium import VideoPremiumIE  from .videott import VideoTtIE -from .videoweed import VideoWeedIE  from .vidme import VidmeIE  from .vidzi import VidziIE  from .vier import VierIE, VierVideosIE @@ -840,7 +854,7 @@ from .youtube import (      YoutubeTruncatedIDIE,      YoutubeTruncatedURLIE,      YoutubeUserIE, -    YoutubeUserPlaylistsIE, +    YoutubePlaylistsIE,      YoutubeWatchLaterIE,  )  from .zapiks import ZapiksIE diff --git a/youtube_dl/extractor/acast.py b/youtube_dl/extractor/acast.py new file mode 100644 index 000000000..be7913bc7 --- /dev/null +++ b/youtube_dl/extractor/acast.py @@ -0,0 +1,70 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import int_or_none + + +class ACastBaseIE(InfoExtractor): +    _API_BASE_URL = 'https://www.acast.com/api/' + + +class ACastIE(ACastBaseIE): +    IE_NAME = 'acast' +    _VALID_URL = r'https?://(?:www\.)?acast\.com/(?P<channel>[^/]+)/(?P<id>[^/#?]+)' +    _TEST = { +        'url': 'https://www.acast.com/condenasttraveler/-where-are-you-taipei-101-taiwan', +        'md5': 'ada3de5a1e3a2a381327d749854788bb', +        'info_dict': { +            'id': '57de3baa-4bb0-487e-9418-2692c1277a34', +            'ext': 'mp3', +            'title': '"Where Are You?": Taipei 101, Taiwan', +            'timestamp': 1196172000000, +            'description': 'md5:0c5d8201dfea2b93218ea986c91eee6e', +            'duration': 211, +        } +    } + +    def _real_extract(self, url): +        channel, display_id = re.match(self._VALID_URL, url).groups() +        cast_data = self._download_json(self._API_BASE_URL + 'channels/%s/acasts/%s/playback' % (channel, display_id), display_id) + +        return { +            'id': compat_str(cast_data['id']), +            'display_id': display_id, +            'url': cast_data['blings'][0]['audio'], +            'title': cast_data['name'], +            'description': cast_data.get('description'), +            'thumbnail': cast_data.get('image'), +            'timestamp': int_or_none(cast_data.get('publishingDate')), +            'duration': int_or_none(cast_data.get('duration')), +        } + + +class ACastChannelIE(ACastBaseIE): +    IE_NAME = 'acast:channel' +    _VALID_URL = r'https?://(?:www\.)?acast\.com/(?P<id>[^/#?]+)' +    _TEST = { +        'url': 'https://www.acast.com/condenasttraveler', +        'info_dict': { +            'id': '50544219-29bb-499e-a083-6087f4cb7797', +            'title': 'Condé Nast Traveler Podcast', +            'description': 'md5:98646dee22a5b386626ae31866638fbd', +        }, +        'playlist_mincount': 20, +    } + +    @classmethod +    def suitable(cls, url): +        return False if ACastIE.suitable(url) else super(ACastChannelIE, cls).suitable(url) + +    def _real_extract(self, url): +        display_id = self._match_id(url) +        channel_data = self._download_json(self._API_BASE_URL + 'channels/%s' % display_id, display_id) +        casts = self._download_json(self._API_BASE_URL + 'channels/%s/acasts' % display_id, display_id) +        entries = [self.url_result('https://www.acast.com/%s/%s' % (display_id, cast['url']), 'ACast') for cast in casts] + +        return self.playlist_result(entries, compat_str(channel_data['id']), channel_data['name'], channel_data.get('description')) diff --git a/youtube_dl/extractor/adobetv.py b/youtube_dl/extractor/adobetv.py index 5e43adc51..8753ee2cf 100644 --- a/youtube_dl/extractor/adobetv.py +++ b/youtube_dl/extractor/adobetv.py @@ -1,23 +1,32 @@  from __future__ import unicode_literals +import re +  from .common import InfoExtractor +from ..compat import compat_str  from ..utils import (      parse_duration,      unified_strdate,      str_to_int, +    int_or_none,      float_or_none,      ISO639Utils, +    determine_ext,  ) -class AdobeTVIE(InfoExtractor): -    _VALID_URL = r'https?://tv\.adobe\.com/watch/[^/]+/(?P<id>[^/]+)' +class AdobeTVBaseIE(InfoExtractor): +    _API_BASE_URL = 'http://tv.adobe.com/api/v4/' + + +class AdobeTVIE(AdobeTVBaseIE): +    _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?watch/(?P<show_urlname>[^/]+)/(?P<id>[^/]+)'      _TEST = {          'url': 'http://tv.adobe.com/watch/the-complete-picture-with-julieanne-kost/quick-tip-how-to-draw-a-circle-around-an-object-in-photoshop/',          'md5': '9bc5727bcdd55251f35ad311ca74fa1e',          'info_dict': { -            'id': 'quick-tip-how-to-draw-a-circle-around-an-object-in-photoshop', +            'id': '10981',              'ext': 'mp4',              'title': 'Quick Tip - How to Draw a Circle Around an Object in Photoshop',              'description': 'md5:99ec318dc909d7ba2a1f2b038f7d2311', @@ -29,50 +38,106 @@ class AdobeTVIE(InfoExtractor):      }      def _real_extract(self, url): -        video_id = self._match_id(url) -        webpage = self._download_webpage(url, video_id) - -        player = self._parse_json( -            self._search_regex(r'html5player:\s*({.+?})\s*\n', webpage, 'player'), -            video_id) +        language, show_urlname, urlname = re.match(self._VALID_URL, url).groups() +        if not language: +            language = 'en' -        title = player.get('title') or self._search_regex( -            r'data-title="([^"]+)"', webpage, 'title') -        description = self._og_search_description(webpage) -        thumbnail = self._og_search_thumbnail(webpage) - -        upload_date = unified_strdate( -            self._html_search_meta('datepublished', webpage, 'upload date')) - -        duration = parse_duration( -            self._html_search_meta('duration', webpage, 'duration') or -            self._search_regex( -                r'Runtime:\s*(\d{2}:\d{2}:\d{2})', -                webpage, 'duration', fatal=False)) - -        view_count = str_to_int(self._search_regex( -            r'<div class="views">\s*Views?:\s*([\d,.]+)\s*</div>', -            webpage, 'view count')) +        video_data = self._download_json( +            self._API_BASE_URL + 'episode/get/?language=%s&show_urlname=%s&urlname=%s&disclosure=standard' % (language, show_urlname, urlname), +            urlname)['data'][0]          formats = [{ -            'url': source['src'], -            'format_id': source.get('quality') or source['src'].split('-')[-1].split('.')[0] or None, -            'tbr': source.get('bitrate'), -        } for source in player['sources']] +            'url': source['url'], +            'format_id': source.get('quality_level') or source['url'].split('-')[-1].split('.')[0] or None, +            'width': int_or_none(source.get('width')), +            'height': int_or_none(source.get('height')), +            'tbr': int_or_none(source.get('video_data_rate')), +        } for source in video_data['videos']]          self._sort_formats(formats)          return { -            'id': video_id, -            'title': title, -            'description': description, -            'thumbnail': thumbnail, -            'upload_date': upload_date, -            'duration': duration, -            'view_count': view_count, +            'id': compat_str(video_data['id']), +            'title': video_data['title'], +            'description': video_data.get('description'), +            'thumbnail': video_data.get('thumbnail'), +            'upload_date': unified_strdate(video_data.get('start_date')), +            'duration': parse_duration(video_data.get('duration')), +            'view_count': str_to_int(video_data.get('playcount')),              'formats': formats,          } +class AdobeTVPlaylistBaseIE(AdobeTVBaseIE): +    def _parse_page_data(self, page_data): +        return [self.url_result(self._get_element_url(element_data)) for element_data in page_data] + +    def _extract_playlist_entries(self, url, display_id): +        page = self._download_json(url, display_id) +        entries = self._parse_page_data(page['data']) +        for page_num in range(2, page['paging']['pages'] + 1): +            entries.extend(self._parse_page_data( +                self._download_json(url + '&page=%d' % page_num, display_id)['data'])) +        return entries + + +class AdobeTVShowIE(AdobeTVPlaylistBaseIE): +    _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?show/(?P<id>[^/]+)' + +    _TEST = { +        'url': 'http://tv.adobe.com/show/the-complete-picture-with-julieanne-kost', +        'info_dict': { +            'id': '36', +            'title': 'The Complete Picture with Julieanne Kost', +            'description': 'md5:fa50867102dcd1aa0ddf2ab039311b27', +        }, +        'playlist_mincount': 136, +    } + +    def _get_element_url(self, element_data): +        return element_data['urls'][0] + +    def _real_extract(self, url): +        language, show_urlname = re.match(self._VALID_URL, url).groups() +        if not language: +            language = 'en' +        query = 'language=%s&show_urlname=%s' % (language, show_urlname) + +        show_data = self._download_json(self._API_BASE_URL + 'show/get/?%s' % query, show_urlname)['data'][0] + +        return self.playlist_result( +            self._extract_playlist_entries(self._API_BASE_URL + 'episode/?%s' % query, show_urlname), +            compat_str(show_data['id']), +            show_data['show_name'], +            show_data['show_description']) + + +class AdobeTVChannelIE(AdobeTVPlaylistBaseIE): +    _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?channel/(?P<id>[^/]+)(?:/(?P<category_urlname>[^/]+))?' + +    _TEST = { +        'url': 'http://tv.adobe.com/channel/development', +        'info_dict': { +            'id': 'development', +        }, +        'playlist_mincount': 96, +    } + +    def _get_element_url(self, element_data): +        return element_data['url'] + +    def _real_extract(self, url): +        language, channel_urlname, category_urlname = re.match(self._VALID_URL, url).groups() +        if not language: +            language = 'en' +        query = 'language=%s&channel_urlname=%s' % (language, channel_urlname) +        if category_urlname: +            query += '&category_urlname=%s' % category_urlname + +        return self.playlist_result( +            self._extract_playlist_entries(self._API_BASE_URL + 'show/?%s' % query, channel_urlname), +            channel_urlname) + +  class AdobeTVVideoIE(InfoExtractor):      _VALID_URL = r'https?://video\.tv\.adobe\.com/v/(?P<id>\d+)' @@ -91,28 +156,25 @@ class AdobeTVVideoIE(InfoExtractor):      def _real_extract(self, url):          video_id = self._match_id(url) - -        webpage = self._download_webpage(url, video_id) - -        player_params = self._parse_json(self._search_regex( -            r'var\s+bridge\s*=\s*([^;]+);', webpage, 'player parameters'), -            video_id) +        video_data = self._download_json(url + '?format=json', video_id)          formats = [{ +            'format_id': '%s-%s' % (determine_ext(source['src']), source.get('height')),              'url': source['src'], -            'width': source.get('width'), -            'height': source.get('height'), -            'tbr': source.get('bitrate'), -        } for source in player_params['sources']] +            'width': int_or_none(source.get('width')), +            'height': int_or_none(source.get('height')), +            'tbr': int_or_none(source.get('bitrate')), +        } for source in video_data['sources']] +        self._sort_formats(formats)          # For both metadata and downloaded files the duration varies among          # formats. I just pick the max one          duration = max(filter(None, [              float_or_none(source.get('duration'), scale=1000) -            for source in player_params['sources']])) +            for source in video_data['sources']]))          subtitles = {} -        for translation in player_params.get('translations', []): +        for translation in video_data.get('translations', []):              lang_id = translation.get('language_w3c') or ISO639Utils.long2short(translation['language_medium'])              if lang_id not in subtitles:                  subtitles[lang_id] = [] @@ -124,8 +186,9 @@ class AdobeTVVideoIE(InfoExtractor):          return {              'id': video_id,              'formats': formats, -            'title': player_params['title'], -            'description': self._og_search_description(webpage), +            'title': video_data['title'], +            'description': video_data.get('description'), +            'thumbnail': video_data['video'].get('poster'),              'duration': duration,              'subtitles': subtitles,          } diff --git a/youtube_dl/extractor/audimedia.py b/youtube_dl/extractor/audimedia.py index cad503510..b0b089dee 100644 --- a/youtube_dl/extractor/audimedia.py +++ b/youtube_dl/extractor/audimedia.py @@ -31,8 +31,8 @@ class AudiMediaIE(InfoExtractor):      def _real_extract(self, url):          display_id = self._match_id(url)          webpage = self._download_webpage(url, display_id) -         -        raw_payload = self._search_regex(r'<script[^>]+class="amtv-embed"[^>]+id="([^"]+)"', webpage, 'raw payload'); + +        raw_payload = self._search_regex(r'<script[^>]+class="amtv-embed"[^>]+id="([^"]+)"', webpage, 'raw payload')          _, stage_mode, video_id, lang = raw_payload.split('-')          # TODO: handle s and e stage_mode (live streams and ended live streams) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 7fb80aa38..691aecc0d 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -47,9 +47,8 @@ class BBCCoUkIE(InfoExtractor):              'info_dict': {                  'id': 'b039d07m',                  'ext': 'flv', -                'title': 'Kaleidoscope, Leonard Cohen', +                'title': 'Leonard Cohen, Kaleidoscope - BBC Radio 4',                  'description': 'The Canadian poet and songwriter reflects on his musical career.', -                'duration': 1740,              },              'params': {                  # rtmp download @@ -112,7 +111,8 @@ class BBCCoUkIE(InfoExtractor):              'params': {                  # rtmp download                  'skip_download': True, -            } +            }, +            'skip': 'Episode is no longer available on BBC iPlayer Radio',          }, {              'url': 'http://www.bbc.co.uk/music/clips/p02frcc3',              'note': 'Audio', @@ -454,6 +454,7 @@ class BBCCoUkIE(InfoExtractor):          webpage = self._download_webpage(url, group_id, 'Downloading video page')          programme_id = None +        duration = None          tviplayer = self._search_regex(              r'mediator\.bind\(({.+?})\s*,\s*document\.getElementById', @@ -473,7 +474,9 @@ class BBCCoUkIE(InfoExtractor):              title = self._og_search_title(webpage)              description = self._search_regex(                  r'<p class="[^"]*medium-description[^"]*">([^<]+)</p>', -                webpage, 'description', fatal=False) +                webpage, 'description', default=None) +            if not description: +                description = self._html_search_meta('description', webpage)          else:              programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id) @@ -587,6 +590,7 @@ class BBCIE(BBCCoUkIE):              'ext': 'mp4',              'title': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''',              'duration': 56, +            'description': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''',          },          'params': {              'skip_download': True, @@ -729,6 +733,7 @@ class BBCIE(BBCCoUkIE):          # article with multiple videos embedded with playlist.sxml (e.g.          # http://www.bbc.com/sport/0/football/34475836)          playlists = re.findall(r'<param[^>]+name="playlist"[^>]+value="([^"]+)"', webpage) +        playlists.extend(re.findall(r'data-media-id="([^"]+/playlist\.sxml)"', webpage))          if playlists:              entries = [                  self._extract_from_playlist_sxml(playlist_url, playlist_id, timestamp) diff --git a/youtube_dl/extractor/beeg.py b/youtube_dl/extractor/beeg.py index e63c2ac00..c8d921daf 100644 --- a/youtube_dl/extractor/beeg.py +++ b/youtube_dl/extractor/beeg.py @@ -34,15 +34,29 @@ class BeegIE(InfoExtractor):          video_id = self._match_id(url)          video = self._download_json( -            'http://beeg.com/api/v3/video/%s' % video_id, video_id) +            'http://beeg.com/api/v5/video/%s' % video_id, video_id) + +        def split(o, e): +            def cut(s, x): +                n.append(s[:x]) +                return s[x:] +            n = [] +            r = len(o) % e +            if r > 0: +                o = cut(o, r) +            while len(o) > e: +                o = cut(o, e) +            n.append(o) +            return n          def decrypt_key(key): -            # Reverse engineered from http://static.beeg.com/cpl/1067.js -            a = '8RPUUCS35ZWp3ADnKcSmpH71ZusrROo' +            # Reverse engineered from http://static.beeg.com/cpl/1105.js +            a = '5ShMcIQlssOd7zChAIOlmeTZDaUxULbJRnywYaiB'              e = compat_urllib_parse_unquote(key) -            return ''.join([ -                compat_chr(compat_ord(e[n]) - compat_ord(a[n % len(a)]) % 25) +            o = ''.join([ +                compat_chr(compat_ord(e[n]) - compat_ord(a[n % len(a)]) % 21)                  for n in range(len(e))]) +            return ''.join(split(o, 3)[::-1])          def decrypt_url(encrypted_url):              encrypted_url = self._proto_relative_url( diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 1c3644587..59beb11bc 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -2,16 +2,14 @@  from __future__ import unicode_literals  import re -import json  from .common import InfoExtractor -from ..compat import ( -    compat_etree_fromstring, -) +from ..compat import compat_str  from ..utils import (      int_or_none,      unescapeHTML,      ExtractorError, +    xpath_text,  ) @@ -58,27 +56,22 @@ class BiliBiliIE(InfoExtractor):          cid = view_data['cid']          title = unescapeHTML(view_data['title']) -        page = self._download_webpage( +        doc = self._download_xml(              'http://interface.bilibili.com/v_cdn_play?appkey=8e9fc618fbd41e28&cid=%s' % cid,              cid,              'Downloading page %s/%s' % (page_num, view_data['pages'])          ) -        try: -            err_info = json.loads(page) -            raise ExtractorError( -                'BiliBili said: ' + err_info['error_text'], expected=True) -        except ValueError: -            pass -        doc = compat_etree_fromstring(page) +        if xpath_text(doc, './result') == 'error': +            raise ExtractorError('%s said: %s' % (self.IE_NAME, xpath_text(doc, './message')), expected=True)          entries = []          for durl in doc.findall('./durl'): -            size = durl.find('./filesize|./size') +            size = xpath_text(durl, ['./filesize', './size'])              formats = [{                  'url': durl.find('./url').text, -                'filesize': int_or_none(size.text) if size else None, +                'filesize': int_or_none(size),                  'ext': 'flv',              }]              backup_urls = durl.find('./backup_url') @@ -88,21 +81,21 @@ class BiliBiliIE(InfoExtractor):              formats.reverse()              entries.append({ -                'id': '%s_part%s' % (cid, durl.find('./order').text), +                'id': '%s_part%s' % (cid, xpath_text(durl, './order')),                  'title': title, -                'duration': int_or_none(durl.find('./length').text) // 1000, +                'duration': int_or_none(xpath_text(durl, './length'), 1000),                  'formats': formats,              })          info = { -            'id': str(cid), +            'id': compat_str(cid),              'title': title,              'description': view_data.get('description'),              'thumbnail': view_data.get('pic'),              'uploader': view_data.get('author'),              'timestamp': int_or_none(view_data.get('created')), -            'view_count': view_data.get('play'), -            'duration': int_or_none(doc.find('./timelength').text), +            'view_count': int_or_none(view_data.get('play')), +            'duration': int_or_none(xpath_text(doc, './timelength')),          }          if len(entries) == 1: diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index f5ebae1e6..03a4f446e 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -355,7 +355,7 @@ class BrightcoveLegacyIE(InfoExtractor):  class BrightcoveNewIE(InfoExtractor):      IE_NAME = 'brightcove:new' -    _VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*videoId=(?P<video_id>\d+)' +    _VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*videoId=(?P<video_id>(?:ref:)?\d+)'      _TESTS = [{          'url': 'http://players.brightcove.net/929656772001/e41d32dc-ec74-459e-a845-6c69f7b724ea_default/index.html?videoId=4463358922001',          'md5': 'c8100925723840d4b0d243f7025703be', @@ -387,14 +387,24 @@ class BrightcoveNewIE(InfoExtractor):          'params': {              'skip_download': True,          } +    }, { +        # ref: prefixed video id +        'url': 'http://players.brightcove.net/3910869709001/21519b5c-4b3b-4363-accb-bdc8f358f823_default/index.html?videoId=ref:7069442', +        'only_matching': True,      }]      @staticmethod +    def _extract_url(webpage): +        urls = BrightcoveNewIE._extract_urls(webpage) +        return urls[0] if urls else None + +    @staticmethod      def _extract_urls(webpage):          # Reference:          # 1. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideoiniframe -        # 2. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideousingjavascript) +        # 2. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideousingjavascript          # 3. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/embed-in-page.html +        # 4. https://support.brightcove.com/en/video-cloud/docs/dynamically-assigning-videos-player          entries = [] @@ -407,9 +417,10 @@ class BrightcoveNewIE(InfoExtractor):          for video_id, account_id, player_id, embed in re.findall(                  # According to examples from [3] it's unclear whether video id                  # may be optional and what to do when it is +                # According to [4] data-video-id may be prefixed with ref:                  r'''(?sx)                      <video[^>]+ -                        data-video-id=["\'](\d+)["\'][^>]*>.*? +                        data-video-id=["\']((?:ref:)?\d+)["\'][^>]*>.*?                      </video>.*?                      <script[^>]+                          src=["\'](?:https?:)?//players\.brightcove\.net/ diff --git a/youtube_dl/extractor/byutv.py b/youtube_dl/extractor/byutv.py index 3b2de517e..dda98059e 100644 --- a/youtube_dl/extractor/byutv.py +++ b/youtube_dl/extractor/byutv.py @@ -14,9 +14,10 @@ class BYUtvIE(InfoExtractor):          'info_dict': {              'id': 'studio-c-season-5-episode-5',              'ext': 'mp4', -            'description': 'md5:5438d33774b6bdc662f9485a340401cc', +            'description': 'md5:e07269172baff037f8e8bf9956bc9747',              'title': 'Season 5 Episode 5', -            'thumbnail': 're:^https?://.*\.jpg$' +            'thumbnail': 're:^https?://.*\.jpg$', +            'duration': 1486.486,          },          'params': {              'skip_download': True, diff --git a/youtube_dl/extractor/canal13cl.py b/youtube_dl/extractor/canal13cl.py deleted file mode 100644 index 93241fefe..000000000 --- a/youtube_dl/extractor/canal13cl.py +++ /dev/null @@ -1,48 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor - - -class Canal13clIE(InfoExtractor): -    _VALID_URL = r'^http://(?:www\.)?13\.cl/(?:[^/?#]+/)*(?P<id>[^/?#]+)' -    _TEST = { -        'url': 'http://www.13.cl/t13/nacional/el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda', -        'md5': '4cb1fa38adcad8fea88487a078831755', -        'info_dict': { -            'id': '1403022125', -            'display_id': 'el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda', -            'ext': 'mp4', -            'title': 'El "círculo de hierro" de Michelle Bachelet en su regreso a La Moneda', -            'description': '(Foto: Agencia Uno) En nueve días más, Michelle Bachelet va a asumir por segunda vez como presidenta de la República. Entre aquellos que la acompañarán hay caras que se repiten y otras que se consolidan en su entorno de colaboradores más cercanos.', -        } -    } - -    def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        display_id = mobj.group('id') - -        webpage = self._download_webpage(url, display_id) - -        title = self._html_search_meta( -            'twitter:title', webpage, 'title', fatal=True) -        description = self._html_search_meta( -            'twitter:description', webpage, 'description') -        url = self._html_search_regex( -            r'articuloVideo = \"(.*?)\"', webpage, 'url') -        real_id = self._search_regex( -            r'[^0-9]([0-9]{7,})[^0-9]', url, 'id', default=display_id) -        thumbnail = self._html_search_regex( -            r'articuloImagen = \"(.*?)\"', webpage, 'thumbnail') - -        return { -            'id': real_id, -            'display_id': display_id, -            'url': url, -            'title': title, -            'description': description, -            'ext': 'mp4', -            'thumbnail': thumbnail, -        } diff --git a/youtube_dl/extractor/clipfish.py b/youtube_dl/extractor/clipfish.py index 7af903571..3a47f6fa4 100644 --- a/youtube_dl/extractor/clipfish.py +++ b/youtube_dl/extractor/clipfish.py @@ -1,14 +1,9 @@  from __future__ import unicode_literals -import re -  from .common import InfoExtractor  from ..utils import ( -    determine_ext,      int_or_none, -    js_to_json, -    parse_iso8601, -    remove_end, +    unified_strdate,  ) @@ -21,48 +16,47 @@ class ClipfishIE(InfoExtractor):              'id': '3966754',              'ext': 'mp4',              'title': 'FIFA 14 - E3 2013 Trailer', -            'timestamp': 1370938118, +            'description': 'Video zu FIFA 14: E3 2013 Trailer',              'upload_date': '20130611',              'duration': 82, +            'view_count': int,          }      }      def _real_extract(self, url):          video_id = self._match_id(url) -        webpage = self._download_webpage(url, video_id) - -        video_info = self._parse_json( -            js_to_json(self._html_search_regex( -                '(?s)videoObject\s*=\s*({.+?});', webpage, 'video object')), -            video_id) +        video_info = self._download_json( +            'http://www.clipfish.de/devapi/id/%s?format=json&apikey=hbbtv' % video_id, +            video_id)['items'][0]          formats = [] -        for video_url in re.findall(r'var\s+videourl\s*=\s*"([^"]+)"', webpage): -            ext = determine_ext(video_url) -            if ext == 'm3u8': -                formats.append({ -                    'url': video_url.replace('de.hls.fra.clipfish.de', 'hls.fra.clipfish.de'), -                    'ext': 'mp4', -                    'format_id': 'hls', -                }) -            else: -                formats.append({ -                    'url': video_url, -                    'format_id': ext, -                }) -        self._sort_formats(formats) -        title = remove_end(self._og_search_title(webpage), ' - Video') -        thumbnail = self._og_search_thumbnail(webpage) -        duration = int_or_none(video_info.get('length')) -        timestamp = parse_iso8601(self._html_search_meta('uploadDate', webpage, 'upload date')) +        m3u8_url = video_info.get('media_videourl_hls') +        if m3u8_url: +            formats.append({ +                'url': m3u8_url.replace('de.hls.fra.clipfish.de', 'hls.fra.clipfish.de'), +                'ext': 'mp4', +                'format_id': 'hls', +            }) + +        mp4_url = video_info.get('media_videourl') +        if mp4_url: +            formats.append({ +                'url': mp4_url, +                'format_id': 'mp4', +                'width': int_or_none(video_info.get('width')), +                'height': int_or_none(video_info.get('height')), +                'tbr': int_or_none(video_info.get('bitrate')), +            })          return {              'id': video_id, -            'title': title, +            'title': video_info['title'], +            'description': video_info.get('descr'),              'formats': formats, -            'thumbnail': thumbnail, -            'duration': duration, -            'timestamp': timestamp, +            'thumbnail': video_info.get('media_content_thumbnail_large') or video_info.get('media_thumbnail'), +            'duration': int_or_none(video_info.get('media_length')), +            'upload_date': unified_strdate(video_info.get('pubDate')), +            'view_count': int_or_none(video_info.get('media_views'))          } diff --git a/youtube_dl/extractor/cliphunter.py b/youtube_dl/extractor/cliphunter.py index d46592cc5..2996b6b09 100644 --- a/youtube_dl/extractor/cliphunter.py +++ b/youtube_dl/extractor/cliphunter.py @@ -1,7 +1,7 @@  from __future__ import unicode_literals  from .common import InfoExtractor -from ..utils import determine_ext +from ..utils import int_or_none  _translation_table = { @@ -42,31 +42,26 @@ class CliphunterIE(InfoExtractor):          video_title = self._search_regex(              r'mediaTitle = "([^"]+)"', webpage, 'title') -        fmts = {} -        for fmt in ('mp4', 'flv'): -            fmt_list = self._parse_json(self._search_regex( -                r'var %sjson\s*=\s*(\[.*?\]);' % fmt, webpage, '%s formats' % fmt), video_id) -            for f in fmt_list: -                fmts[f['fname']] = _decode(f['sUrl']) - -        qualities = self._parse_json(self._search_regex( -            r'var player_btns\s*=\s*(.*?);\n', webpage, 'quality info'), video_id) +        gexo_files = self._parse_json( +            self._search_regex( +                r'var\s+gexoFiles\s*=\s*({.+?});', webpage, 'gexo files'), +            video_id)          formats = [] -        for fname, url in fmts.items(): -            f = { -                'url': url, -            } -            if fname in qualities: -                qual = qualities[fname] -                f.update({ -                    'format_id': '%s_%sp' % (determine_ext(url), qual['h']), -                    'width': qual['w'], -                    'height': qual['h'], -                    'tbr': qual['br'], -                }) -            formats.append(f) - +        for format_id, f in gexo_files.items(): +            video_url = f.get('url') +            if not video_url: +                continue +            fmt = f.get('fmt') +            height = f.get('h') +            format_id = '%s_%sp' % (fmt, height) if fmt and height else format_id +            formats.append({ +                'url': _decode(video_url), +                'format_id': format_id, +                'width': int_or_none(f.get('w')), +                'height': int_or_none(height), +                'tbr': int_or_none(f.get('br')), +            })          self._sort_formats(formats)          thumbnail = self._search_regex( diff --git a/youtube_dl/extractor/cnet.py b/youtube_dl/extractor/cnet.py index 5dd69bff7..5c3908f72 100644 --- a/youtube_dl/extractor/cnet.py +++ b/youtube_dl/extractor/cnet.py @@ -1,15 +1,11 @@  # coding: utf-8  from __future__ import unicode_literals -import json +from .theplatform import ThePlatformIE +from ..utils import int_or_none -from .common import InfoExtractor -from ..utils import ( -    ExtractorError, -) - -class CNETIE(InfoExtractor): +class CNETIE(ThePlatformIE):      _VALID_URL = r'https?://(?:www\.)?cnet\.com/videos/(?P<id>[^/]+)/'      _TESTS = [{          'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/', @@ -18,25 +14,20 @@ class CNETIE(InfoExtractor):              'ext': 'flv',              'title': 'Hands-on with Microsoft Windows 8.1 Update',              'description': 'The new update to the Windows 8 OS brings improved performance for mouse and keyboard users.', -            'thumbnail': 're:^http://.*/flmswindows8.jpg$',              'uploader_id': '6085384d-619e-11e3-b231-14feb5ca9861',              'uploader': 'Sarah Mitroff', +            'duration': 70,          }, -        'params': { -            'skip_download': 'requires rtmpdump', -        }      }, {          'url': 'http://www.cnet.com/videos/whiny-pothole-tweets-at-local-government-when-hit-by-cars-tomorrow-daily-187/',          'info_dict': {              'id': '56527b93-d25d-44e3-b738-f989ce2e49ba',              'ext': 'flv', +            'title': 'Whiny potholes tweet at local government when hit by cars (Tomorrow Daily 187)',              'description': 'Khail and Ashley wonder what other civic woes can be solved by self-tweeting objects, investigate a new kind of VR camera and watch an origami robot self-assemble, walk, climb, dig and dissolve. #TDPothole',              'uploader_id': 'b163284d-6b73-44fc-b3e6-3da66c392d40',              'uploader': 'Ashley Esqueda', -            'title': 'Whiny potholes tweet at local government when hit by cars (Tomorrow Daily 187)', -        }, -        'params': { -            'skip_download': True,  # requires rtmpdump +            'duration': 1482,          },      }] @@ -45,26 +36,13 @@ class CNETIE(InfoExtractor):          webpage = self._download_webpage(url, display_id)          data_json = self._html_search_regex( -            r"<div class=\"cnetVideoPlayer\"\s+.*?data-cnet-video-options='([^']+)'", +            r"data-cnet-video(?:-uvp)?-options='([^']+)'",              webpage, 'data json') -        data = json.loads(data_json) -        vdata = data['video'] -        if not vdata: -            vdata = data['videos'][0] -        if not vdata: -            raise ExtractorError('Cannot find video data') - -        mpx_account = data['config']['players']['default']['mpx_account'] -        vid = vdata['files'].get('rtmp', vdata['files']['hds']) -        tp_link = 'http://link.theplatform.com/s/%s/%s' % (mpx_account, vid) +        data = self._parse_json(data_json, display_id) +        vdata = data.get('video') or data['videos'][0]          video_id = vdata['id'] -        title = vdata.get('headline') -        if title is None: -            title = vdata.get('title') -        if title is None: -            raise ExtractorError('Cannot find title!') -        thumbnail = vdata.get('image', {}).get('path') +        title = vdata['title']          author = vdata.get('author')          if author:              uploader = '%s %s' % (author['firstName'], author['lastName']) @@ -73,13 +51,34 @@ class CNETIE(InfoExtractor):              uploader = None              uploader_id = None +        mpx_account = data['config']['uvpConfig']['default']['mpx_account'] + +        metadata = self.get_metadata('%s/%s' % (mpx_account, list(vdata['files'].values())[0]), video_id) +        description = vdata.get('description') or metadata.get('description') +        duration = int_or_none(vdata.get('duration')) or metadata.get('duration') + +        formats = [] +        subtitles = {} +        for (fkey, vid) in vdata['files'].items(): +            if fkey == 'hls_phone' and 'hls_tablet' in vdata['files']: +                continue +            release_url = 'http://link.theplatform.com/s/%s/%s?format=SMIL&mbr=true' % (mpx_account, vid) +            if fkey == 'hds': +                release_url += '&manifest=f4m' +            tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % fkey) +            formats.extend(tp_formats) +            subtitles = self._merge_subtitles(subtitles, tp_subtitles) +        self._sort_formats(formats) +          return { -            '_type': 'url_transparent', -            'url': tp_link,              'id': video_id,              'display_id': display_id,              'title': title, +            'description': description, +            'thumbnail': metadata.get('thumbnail'), +            'duration': duration,              'uploader': uploader,              'uploader_id': uploader_id, -            'thumbnail': thumbnail, +            'subtitles': subtitles, +            'formats': formats,          } diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 6ab2d68d6..828f58f12 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -30,6 +30,7 @@ from ..utils import (      clean_html,      compiled_regex_type,      determine_ext, +    error_to_compat_str,      ExtractorError,      fix_xml_ampersands,      float_or_none, @@ -332,7 +333,8 @@ class InfoExtractor(object):                  return False              if errnote is None:                  errnote = 'Unable to download webpage' -            errmsg = '%s: %s' % (errnote, compat_str(err)) + +            errmsg = '%s: %s' % (errnote, error_to_compat_str(err))              if fatal:                  raise ExtractorError(errmsg, sys.exc_info()[2], cause=err)              else: @@ -622,7 +624,7 @@ class InfoExtractor(object):                  else:                      raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)              except (IOError, netrc.NetrcParseError) as err: -                self._downloader.report_warning('parsing .netrc: %s' % compat_str(err)) +                self._downloader.report_warning('parsing .netrc: %s' % error_to_compat_str(err))          return (username, password) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index ab7f3aec4..0c5b6617f 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -7,10 +7,10 @@ import itertools  from .common import InfoExtractor -from ..compat import compat_str  from ..utils import ( -    ExtractorError,      determine_ext, +    error_to_compat_str, +    ExtractorError,      int_or_none,      parse_iso8601,      sanitized_Request, @@ -99,6 +99,11 @@ class DailymotionIE(DailymotionBaseInfoExtractor):          {              'url': 'http://www.dailymotion.com/video/xhza0o',              'only_matching': True, +        }, +        # with subtitles +        { +            'url': 'http://www.dailymotion.com/video/x20su5f_the-power-of-nightmares-1-the-rise-of-the-politics-of-fear-bbc-2004_news', +            'only_matching': True,          }      ] @@ -122,7 +127,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor):              webpage, 'comment count', fatal=False))          player_v5 = self._search_regex( -            [r'buildPlayer\(({.+?})\);', r'playerV5\s*=\s*dmp\.create\([^,]+?,\s*({.+?})\);'], +            [r'buildPlayer\(({.+?})\);\n',  # See https://github.com/rg3/youtube-dl/issues/7826 +             r'playerV5\s*=\s*dmp\.create\([^,]+?,\s*({.+?})\);', +             r'buildPlayer\(({.+?})\);'],              webpage, 'player v5', default=None)          if player_v5:              player = self._parse_json(player_v5, video_id) @@ -172,11 +179,13 @@ class DailymotionIE(DailymotionBaseInfoExtractor):              uploader_id = metadata.get('owner', {}).get('id')              subtitles = {} -            for subtitle_lang, subtitle in metadata.get('subtitles', {}).get('data', {}).items(): -                subtitles[subtitle_lang] = [{ -                    'ext': determine_ext(subtitle_url), -                    'url': subtitle_url, -                } for subtitle_url in subtitle.get('urls', [])] +            subtitles_data = metadata.get('subtitles', {}).get('data', {}) +            if subtitles_data and isinstance(subtitles_data, dict): +                for subtitle_lang, subtitle in subtitles_data.items(): +                    subtitles[subtitle_lang] = [{ +                        'ext': determine_ext(subtitle_url), +                        'url': subtitle_url, +                    } for subtitle_url in subtitle.get('urls', [])]              return {                  'id': video_id, @@ -269,7 +278,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):                  'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,                  video_id, note=False)          except ExtractorError as err: -            self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err)) +            self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))              return {}          info = json.loads(sub_list)          if (info['total'] > 0): diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 321eec59e..39c481068 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -7,11 +7,11 @@ import socket  from .common import InfoExtractor  from ..compat import (      compat_http_client, -    compat_str,      compat_urllib_error,      compat_urllib_parse_unquote,  )  from ..utils import ( +    error_to_compat_str,      ExtractorError,      limit_length,      sanitized_Request, @@ -116,7 +116,7 @@ class FacebookIE(InfoExtractor):              if re.search(r'id="checkpointSubmitButton"', check_response) is not None:                  self._downloader.report_warning('Unable to confirm login, you have to login in your brower and authorize the login.')          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: -            self._downloader.report_warning('unable to log in: %s' % compat_str(err)) +            self._downloader.report_warning('unable to log in: %s' % error_to_compat_str(err))              return      def _real_initialize(self): diff --git a/youtube_dl/extractor/faz.py b/youtube_dl/extractor/faz.py index cebdd0193..6f9b003c2 100644 --- a/youtube_dl/extractor/faz.py +++ b/youtube_dl/extractor/faz.py @@ -2,6 +2,11 @@  from __future__ import unicode_literals  from .common import InfoExtractor +from ..utils import ( +    xpath_element, +    xpath_text, +    int_or_none, +)  class FazIE(InfoExtractor): @@ -37,31 +42,32 @@ class FazIE(InfoExtractor):          video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id) +        description = self._og_search_description(webpage)          config_xml_url = self._search_regex( -            r'writeFLV\(\'(.+?)\',', webpage, 'config xml url') +            r'videoXMLURL\s*=\s*"([^"]+)', webpage, 'config xml url')          config = self._download_xml(              config_xml_url, video_id, 'Downloading config xml') -        encodings = config.find('ENCODINGS') +        encodings = xpath_element(config, 'ENCODINGS', 'encodings', True)          formats = []          for pref, code in enumerate(['LOW', 'HIGH', 'HQ']): -            encoding = encodings.find(code) -            if encoding is None: -                continue -            encoding_url = encoding.find('FILENAME').text -            formats.append({ -                'url': encoding_url, -                'format_id': code.lower(), -                'quality': pref, -            }) +            encoding = xpath_element(encodings, code) +            if encoding: +                encoding_url = xpath_text(encoding, 'FILENAME') +                if encoding_url: +                    formats.append({ +                        'url': encoding_url, +                        'format_id': code.lower(), +                        'quality': pref, +                        'tbr': int_or_none(xpath_text(encoding, 'AVERAGEBITRATE')), +                    })          self._sort_formats(formats) -        descr = self._html_search_regex( -            r'<p class="Content Copy">(.*?)</p>', webpage, 'description', fatal=False)          return {              'id': video_id,              'title': self._og_search_title(webpage),              'formats': formats, -            'description': descr, -            'thumbnail': config.find('STILL/STILL_BIG').text, +            'description': description.strip() if description else None, +            'thumbnail': xpath_text(config, 'STILL/STILL_BIG'), +            'duration': int_or_none(xpath_text(config, 'DURATION')),          } diff --git a/youtube_dl/extractor/fc2.py b/youtube_dl/extractor/fc2.py index 92e8c571f..4c81271d3 100644 --- a/youtube_dl/extractor/fc2.py +++ b/youtube_dl/extractor/fc2.py @@ -37,8 +37,8 @@ class FC2IE(InfoExtractor):          'params': {              'username': 'ytdl@yt-dl.org',              'password': '(snip)', -            'skip': 'requires actual password' -        } +        }, +        'skip': 'requires actual password',      }, {          'url': 'http://video.fc2.com/en/a/content/20130926eZpARwsF',          'only_matching': True, diff --git a/youtube_dl/extractor/fktv.py b/youtube_dl/extractor/fktv.py index 40ea27895..5f6e65dae 100644 --- a/youtube_dl/extractor/fktv.py +++ b/youtube_dl/extractor/fktv.py @@ -1,12 +1,10 @@  from __future__ import unicode_literals -import re -  from .common import InfoExtractor  from ..utils import (      clean_html,      determine_ext, -    ExtractorError, +    js_to_json,  ) @@ -32,24 +30,22 @@ class FKTVIE(InfoExtractor):              'http://fernsehkritik.tv/folge-%s/play' % episode, episode)          title = clean_html(self._html_search_regex(              '<h3>([^<]+)</h3>', webpage, 'title')) -        matches = re.search( -            r'(?s)<video(?:(?!poster)[^>])+(?:poster="([^"]+)")?[^>]*>(.*)</video>', -            webpage) -        if matches is None: -            raise ExtractorError('Unable to extract the video') - -        poster, sources = matches.groups() -        if poster is None: -            self.report_warning('unable to extract thumbnail') - -        urls = re.findall(r'<source[^>]+src="([^"]+)"', sources) -        formats = [{ -            'url': furl, -            'format_id': determine_ext(furl), -        } for furl in urls] +        thumbnail = self._search_regex(r'POSTER\s*=\s*"([^"]+)', webpage, 'thumbnail', fatal=False) +        sources = self._parse_json(self._search_regex(r'(?s)MEDIA\s*=\s*(\[.+?\]);', webpage, 'media'), episode, js_to_json) + +        formats = [] +        for source in sources: +            furl = source.get('src') +            if furl: +                formats.append({ +                    'url': furl, +                    'format_id': determine_ext(furl), +                }) +        self._sort_formats(formats) +          return {              'id': episode,              'title': title,              'formats': formats, -            'thumbnail': poster, +            'thumbnail': thumbnail,          } diff --git a/youtube_dl/extractor/flickr.py b/youtube_dl/extractor/flickr.py index 91cd46e76..452b27b26 100644 --- a/youtube_dl/extractor/flickr.py +++ b/youtube_dl/extractor/flickr.py @@ -1,67 +1,87 @@  from __future__ import unicode_literals -import re -  from .common import InfoExtractor +from ..compat import compat_urllib_parse  from ..utils import (      ExtractorError, -    find_xpath_attr, -    sanitized_Request, +    int_or_none, +    qualities,  )  class FlickrIE(InfoExtractor): -    _VALID_URL = r'https?://(?:www\.|secure\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*' +    _VALID_URL = r'https?://(?:www\.|secure\.)?flickr\.com/photos/[\w\-_@]+/(?P<id>\d+)'      _TEST = {          'url': 'http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/', -        'md5': '6fdc01adbc89d72fc9c4f15b4a4ba87b', +        'md5': '164fe3fa6c22e18d448d4d5af2330f31',          'info_dict': {              'id': '5645318632', -            'ext': 'mp4', -            "description": "Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.", -            "uploader_id": "forestwander-nature-pictures", -            "title": "Dark Hollow Waterfalls" +            'ext': 'mpg', +            'description': 'Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.', +            'title': 'Dark Hollow Waterfalls', +            'duration': 19, +            'timestamp': 1303528740, +            'upload_date': '20110423', +            'uploader_id': '10922353@N03', +            'uploader': 'Forest Wander', +            'comment_count': int, +            'view_count': int, +            'tags': list,          }      } -    def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) +    _API_BASE_URL = 'https://api.flickr.com/services/rest?' -        video_id = mobj.group('id') -        video_uploader_id = mobj.group('uploader_id') -        webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id -        req = sanitized_Request(webpage_url) -        req.add_header( -            'User-Agent', -            # it needs a more recent version -            'Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20150101 Firefox/38.0 (Chrome)') -        webpage = self._download_webpage(req, video_id) +    def _call_api(self, method, video_id, api_key, note, secret=None): +        query = { +            'photo_id': video_id, +            'method': 'flickr.%s' % method, +            'api_key': api_key, +            'format': 'json', +            'nojsoncallback': 1, +        } +        if secret: +            query['secret'] = secret +        data = self._download_json(self._API_BASE_URL + compat_urllib_parse.urlencode(query), video_id, note) +        if data['stat'] != 'ok': +            raise ExtractorError(data['message']) +        return data -        secret = self._search_regex(r'secret"\s*:\s*"(\w+)"', webpage, 'secret') +    def _real_extract(self, url): +        video_id = self._match_id(url) -        first_url = 'https://secure.flickr.com/apps/video/video_mtl_xml.gne?v=x&photo_id=' + video_id + '&secret=' + secret + '&bitrate=700&target=_self' -        first_xml = self._download_xml(first_url, video_id, 'Downloading first data webpage') +        api_key = self._download_json('https://www.flickr.com/hermes_error_beacon.gne', video_id, 'Downloading api key',)['site_key'] -        node_id = find_xpath_attr( -            first_xml, './/{http://video.yahoo.com/YEP/1.0/}Item', 'id', -            'id').text +        video_info = self._call_api('photos.getInfo', video_id, api_key, 'Downloading video info')['photo'] +        if video_info['media'] == 'video': +            streams = self._call_api('video.getStreamInfo', video_id, api_key, 'Downloading streams info', video_info['secret'])['streams'] -        second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1' -        second_xml = self._download_xml(second_url, video_id, 'Downloading second data webpage') +            preference = qualities(['iphone_wifi', '700', 'appletv', 'orig']) -        self.report_extraction(video_id) +            formats = [] +            for stream in streams['stream']: +                stream_type = str(stream.get('type')) +                formats.append({ +                    'format_id': stream_type, +                    'url': stream['_content'], +                    'preference': preference(stream_type), +                }) +            self._sort_formats(formats) -        stream = second_xml.find('.//STREAM') -        if stream is None: -            raise ExtractorError('Unable to extract video url') -        video_url = stream.attrib['APP'] + stream.attrib['FULLPATH'] +            owner = video_info.get('owner', {}) -        return { -            'id': video_id, -            'url': video_url, -            'ext': 'mp4', -            'title': self._og_search_title(webpage), -            'description': self._og_search_description(webpage), -            'thumbnail': self._og_search_thumbnail(webpage), -            'uploader_id': video_uploader_id, -        } +            return { +                'id': video_id, +                'title': video_info['title']['_content'], +                'description': video_info.get('description', {}).get('_content'), +                'formats': formats, +                'timestamp': int_or_none(video_info.get('dateuploaded')), +                'duration': int_or_none(video_info.get('video', {}).get('duration')), +                'uploader_id': owner.get('nsid'), +                'uploader': owner.get('realname'), +                'comment_count': int_or_none(video_info.get('comments', {}).get('_content')), +                'view_count': int_or_none(video_info.get('views')), +                'tags': [tag.get('_content') for tag in video_info.get('tags', {}).get('tag', [])] +            } +        else: +            raise ExtractorError('not a video', expected=True) diff --git a/youtube_dl/extractor/footyroom.py b/youtube_dl/extractor/footyroom.py index 4c7dbca40..370fd006f 100644 --- a/youtube_dl/extractor/footyroom.py +++ b/youtube_dl/extractor/footyroom.py @@ -13,6 +13,7 @@ class FootyRoomIE(InfoExtractor):              'title': 'Schalke 04 0 – 2 Real Madrid',          },          'playlist_count': 3, +        'skip': 'Video for this match is not available',      }, {          'url': 'http://footyroom.com/georgia-0-2-germany-2015-03/',          'info_dict': { diff --git a/youtube_dl/extractor/funimation.py b/youtube_dl/extractor/funimation.py new file mode 100644 index 000000000..d1a95d87f --- /dev/null +++ b/youtube_dl/extractor/funimation.py @@ -0,0 +1,193 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( +    clean_html, +    determine_ext, +    encode_dict, +    int_or_none, +    sanitized_Request, +    ExtractorError, +    urlencode_postdata +) + + +class FunimationIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?funimation\.com/shows/[^/]+/videos/(?:official|promotional)/(?P<id>[^/?#&]+)' + +    _NETRC_MACHINE = 'funimation' + +    _TESTS = [{ +        'url': 'http://www.funimation.com/shows/air/videos/official/breeze', +        'info_dict': { +            'id': '658', +            'display_id': 'breeze', +            'ext': 'mp4', +            'title': 'Air - 1 - Breeze', +            'description': 'md5:1769f43cd5fc130ace8fd87232207892', +            'thumbnail': 're:https?://.*\.jpg', +        }, +    }, { +        'url': 'http://www.funimation.com/shows/hacksign/videos/official/role-play', +        'info_dict': { +            'id': '31128', +            'display_id': 'role-play', +            'ext': 'mp4', +            'title': '.hack//SIGN - 1 - Role Play', +            'description': 'md5:b602bdc15eef4c9bbb201bb6e6a4a2dd', +            'thumbnail': 're:https?://.*\.jpg', +        }, +    }, { +        'url': 'http://www.funimation.com/shows/attack-on-titan-junior-high/videos/promotional/broadcast-dub-preview', +        'info_dict': { +            'id': '9635', +            'display_id': 'broadcast-dub-preview', +            'ext': 'mp4', +            'title': 'Attack on Titan: Junior High - Broadcast Dub Preview', +            'description': 'md5:f8ec49c0aff702a7832cd81b8a44f803', +            'thumbnail': 're:https?://.*\.(?:jpg|png)', +        }, +    }] + +    def _login(self): +        (username, password) = self._get_login_info() +        if username is None: +            return +        data = urlencode_postdata(encode_dict({ +            'email_field': username, +            'password_field': password, +        })) +        login_request = sanitized_Request('http://www.funimation.com/login', data, headers={ +            'User-Agent': 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:42.0) Gecko/20100101 Firefox/42.0', +            'Content-Type': 'application/x-www-form-urlencoded' +        }) +        login_page = self._download_webpage( +            login_request, None, 'Logging in as %s' % username) +        if any(p in login_page for p in ('funimation.com/logout', '>Log Out<')): +            return +        error = self._html_search_regex( +            r'(?s)<div[^>]+id=["\']errorMessages["\'][^>]*>(.+?)</div>', +            login_page, 'error messages', default=None) +        if error: +            raise ExtractorError('Unable to login: %s' % error, expected=True) +        raise ExtractorError('Unable to log in') + +    def _real_initialize(self): +        self._login() + +    def _real_extract(self, url): +        display_id = self._match_id(url) + +        errors = [] +        formats = [] + +        ERRORS_MAP = { +            'ERROR_MATURE_CONTENT_LOGGED_IN': 'matureContentLoggedIn', +            'ERROR_MATURE_CONTENT_LOGGED_OUT': 'matureContentLoggedOut', +            'ERROR_SUBSCRIPTION_LOGGED_OUT': 'subscriptionLoggedOut', +            'ERROR_VIDEO_EXPIRED': 'videoExpired', +            'ERROR_TERRITORY_UNAVAILABLE': 'territoryUnavailable', +            'SVODBASIC_SUBSCRIPTION_IN_PLAYER': 'basicSubscription', +            'SVODNON_SUBSCRIPTION_IN_PLAYER': 'nonSubscription', +            'ERROR_PLAYER_NOT_RESPONDING': 'playerNotResponding', +            'ERROR_UNABLE_TO_CONNECT_TO_CDN': 'unableToConnectToCDN', +            'ERROR_STREAM_NOT_FOUND': 'streamNotFound', +        } + +        USER_AGENTS = ( +            # PC UA is served with m3u8 that provides some bonus lower quality formats +            ('pc', 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:42.0) Gecko/20100101 Firefox/42.0'), +            # Mobile UA allows to extract direct links and also does not fail when +            # PC UA fails with hulu error (e.g. +            # http://www.funimation.com/shows/hacksign/videos/official/role-play) +            ('mobile', 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36'), +        ) + +        for kind, user_agent in USER_AGENTS: +            request = sanitized_Request(url) +            request.add_header('User-Agent', user_agent) +            webpage = self._download_webpage( +                request, display_id, 'Downloading %s webpage' % kind) + +            playlist = self._parse_json( +                self._search_regex( +                    r'var\s+playersData\s*=\s*(\[.+?\]);\n', +                    webpage, 'players data'), +                display_id)[0]['playlist'] + +            items = next(item['items'] for item in playlist if item.get('items')) +            item = next(item for item in items if item.get('itemAK') == display_id) + +            error_messages = {} +            video_error_messages = self._search_regex( +                r'var\s+videoErrorMessages\s*=\s*({.+?});\n', +                webpage, 'error messages', default=None) +            if video_error_messages: +                error_messages_json = self._parse_json(video_error_messages, display_id, fatal=False) +                if error_messages_json: +                    for _, error in error_messages_json.items(): +                        type_ = error.get('type') +                        description = error.get('description') +                        content = error.get('content') +                        if type_ == 'text' and description and content: +                            error_message = ERRORS_MAP.get(description) +                            if error_message: +                                error_messages[error_message] = content + +            for video in item.get('videoSet', []): +                auth_token = video.get('authToken') +                if not auth_token: +                    continue +                funimation_id = video.get('FUNImationID') or video.get('videoId') +                preference = 1 if video.get('languageMode') == 'dub' else 0 +                if not auth_token.startswith('?'): +                    auth_token = '?%s' % auth_token +                for quality, height in (('sd', 480), ('hd', 720), ('hd1080', 1080)): +                    format_url = video.get('%sUrl' % quality) +                    if not format_url: +                        continue +                    if not format_url.startswith(('http', '//')): +                        errors.append(format_url) +                        continue +                    if determine_ext(format_url) == 'm3u8': +                        m3u8_formats = self._extract_m3u8_formats( +                            format_url + auth_token, display_id, 'mp4', entry_protocol='m3u8_native', +                            preference=preference, m3u8_id='%s-hls' % funimation_id, fatal=False) +                        if m3u8_formats: +                            formats.extend(m3u8_formats) +                    else: +                        tbr = int_or_none(self._search_regex( +                            r'-(\d+)[Kk]', format_url, 'tbr', default=None)) +                        formats.append({ +                            'url': format_url + auth_token, +                            'format_id': '%s-http-%dp' % (funimation_id, height), +                            'height': height, +                            'tbr': tbr, +                            'preference': preference, +                        }) + +        if not formats and errors: +            raise ExtractorError( +                '%s returned error: %s' +                % (self.IE_NAME, clean_html(error_messages.get(errors[0], errors[0]))), +                expected=True) + +        self._sort_formats(formats) + +        title = item['title'] +        artist = item.get('artist') +        if artist: +            title = '%s - %s' % (artist, title) +        description = self._og_search_description(webpage) or item.get('description') +        thumbnail = self._og_search_thumbnail(webpage) or item.get('posterUrl') +        video_id = item.get('itemId') or display_id + +        return { +            'id': video_id, +            'display_id': display_id, +            'title': title, +            'description': description, +            'thumbnail': thumbnail, +            'formats': formats, +        } diff --git a/youtube_dl/extractor/gameinformer.py b/youtube_dl/extractor/gameinformer.py new file mode 100644 index 000000000..25870c131 --- /dev/null +++ b/youtube_dl/extractor/gameinformer.py @@ -0,0 +1,43 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import int_or_none + + +class GameInformerIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?gameinformer\.com/(?:[^/]+/)*(?P<id>.+)\.aspx' +    _TEST = { +        'url': 'http://www.gameinformer.com/b/features/archive/2015/09/26/replay-animal-crossing.aspx', +        'info_dict': { +            'id': '4515472681001', +            'ext': 'm3u8', +            'title': 'Replay - Animal Crossing', +            'description': 'md5:2e211891b215c85d061adc7a4dd2d930', +            'timestamp': 1443457610706, +        }, +        'params': { +            # m3u8 download +            'skip_download': True, +        }, +    } + +    def _real_extract(self, url): +        display_id = self._match_id(url) +        webpage = self._download_webpage(url, display_id) + +        bc_api_url = self._search_regex(r"getVideo\('([^']+)'", webpage, 'brightcove api url') +        json_data = self._download_json( +            bc_api_url + '&video_fields=id,name,shortDescription,publishedDate,videoStillURL,length,IOSRenditions', +            display_id) + +        return { +            'id': compat_str(json_data['id']), +            'display_id': display_id, +            'url': json_data['IOSRenditions'][0]['url'], +            'title': json_data['name'], +            'description': json_data.get('shortDescription'), +            'timestamp': int_or_none(json_data.get('publishedDate')), +            'duration': int_or_none(json_data.get('length')), +        } diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index f5dd88f54..3c3066e38 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -53,6 +53,8 @@ from .onionstudios import OnionStudiosIE  from .snagfilms import SnagFilmsEmbedIE  from .screenwavemedia import ScreenwaveMediaIE  from .mtv import MTVServicesEmbeddedIE +from .pladform import PladformIE +from .googledrive import GoogleDriveIE  from .jwplatform import JWPlatformIE @@ -339,6 +341,7 @@ class GenericIE(InfoExtractor):                  'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',                  'ext': 'mp4',                  'title': '2cc213299525360.mov',  # that's what we get +                'duration': 238.231,              },              'add_ie': ['Ooyala'],          }, @@ -350,6 +353,7 @@ class GenericIE(InfoExtractor):                  'ext': 'mp4',                  'title': '"Steve Jobs: Man in the Machine" trailer',                  'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."', +                'duration': 135.427,              },              'params': {                  'skip_download': True, @@ -960,8 +964,9 @@ class GenericIE(InfoExtractor):              'info_dict': {                  'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',                  'ext': 'mp4', -                'description': 'VIDEO: Index/Match versus VLOOKUP.', +                'description': 'VIDEO: INDEX/MATCH versus VLOOKUP.',                  'title': 'This is what separates the Excel masters from the wannabes', +                'duration': 191.933,              },              'params': {                  # m3u8 downloads @@ -1496,7 +1501,7 @@ class GenericIE(InfoExtractor):                  re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or                  re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))          if mobj is not None: -            return OoyalaIE._build_url_result(mobj.group('ec')) +            return OoyalaIE._build_url_result(smuggle_url(mobj.group('ec'), {'domain': url}))          # Look for multiple Ooyala embeds on SBN network websites          mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage) @@ -1504,7 +1509,7 @@ class GenericIE(InfoExtractor):              embeds = self._parse_json(mobj.group(1), video_id, fatal=False)              if embeds:                  return _playlist_from_matches( -                    embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala') +                    embeds, getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')          # Look for Aparat videos          mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage) @@ -1733,10 +1738,9 @@ class GenericIE(InfoExtractor):              return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')          # Look for Pladform embeds -        mobj = re.search( -            r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage) -        if mobj is not None: -            return self.url_result(mobj.group('url'), 'Pladform') +        pladform_url = PladformIE._extract_url(webpage) +        if pladform_url: +            return self.url_result(pladform_url)          # Look for Playwire embeds          mobj = re.search( @@ -1761,6 +1765,11 @@ class GenericIE(InfoExtractor):          if nbc_sports_url:              return self.url_result(nbc_sports_url, 'NBCSportsVPlayer') +        # Look for Google Drive embeds +        google_drive_url = GoogleDriveIE._extract_url(webpage) +        if google_drive_url: +            return self.url_result(google_drive_url, 'GoogleDrive') +          # Look for UDN embeds          mobj = re.search(              r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage) diff --git a/youtube_dl/extractor/googledrive.py b/youtube_dl/extractor/googledrive.py new file mode 100644 index 000000000..f354c9c7a --- /dev/null +++ b/youtube_dl/extractor/googledrive.py @@ -0,0 +1,88 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( +    ExtractorError, +    int_or_none, +) + + +class GoogleDriveIE(InfoExtractor): +    _VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28})' +    _TEST = { +        'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1', +        'md5': '881f7700aec4f538571fa1e0eed4a7b6', +        'info_dict': { +            'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ', +            'ext': 'mp4', +            'title': 'Big Buck Bunny.mp4', +            'duration': 46, +        } +    } +    _FORMATS_EXT = { +        '5': 'flv', +        '6': 'flv', +        '13': '3gp', +        '17': '3gp', +        '18': 'mp4', +        '22': 'mp4', +        '34': 'flv', +        '35': 'flv', +        '36': '3gp', +        '37': 'mp4', +        '38': 'mp4', +        '43': 'webm', +        '44': 'webm', +        '45': 'webm', +        '46': 'webm', +        '59': 'mp4', +    } + +    @staticmethod +    def _extract_url(webpage): +        mobj = re.search( +            r'<iframe[^>]+src="https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28})', +            webpage) +        if mobj: +            return 'https://drive.google.com/file/d/%s' % mobj.group('id') + +    def _real_extract(self, url): +        video_id = self._match_id(url) +        webpage = self._download_webpage( +            'http://docs.google.com/file/d/%s' % video_id, video_id, encoding='unicode_escape') + +        reason = self._search_regex(r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None) +        if reason: +            raise ExtractorError(reason) + +        title = self._search_regex(r'"title"\s*,\s*"([^"]+)', webpage, 'title') +        duration = int_or_none(self._search_regex( +            r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds', default=None)) +        fmt_stream_map = self._search_regex( +            r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage, 'fmt stream map').split(',') +        fmt_list = self._search_regex(r'"fmt_list"\s*,\s*"([^"]+)', webpage, 'fmt_list').split(',') + +        formats = [] +        for fmt, fmt_stream in zip(fmt_list, fmt_stream_map): +            fmt_id, fmt_url = fmt_stream.split('|') +            resolution = fmt.split('/')[1] +            width, height = resolution.split('x') +            formats.append({ +                'url': fmt_url, +                'format_id': fmt_id, +                'resolution': resolution, +                'width': int_or_none(width), +                'height': int_or_none(height), +                'ext': self._FORMATS_EXT[fmt_id], +            }) +        self._sort_formats(formats) + +        return { +            'id': video_id, +            'title': title, +            'thumbnail': self._og_search_thumbnail(webpage), +            'duration': duration, +            'formats': formats, +        } diff --git a/youtube_dl/extractor/gputechconf.py b/youtube_dl/extractor/gputechconf.py new file mode 100644 index 000000000..145b55bf3 --- /dev/null +++ b/youtube_dl/extractor/gputechconf.py @@ -0,0 +1,55 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( +    xpath_element, +    xpath_text, +    int_or_none, +    parse_duration, +) + + +class GPUTechConfIE(InfoExtractor): +    _VALID_URL = r'https?://on-demand\.gputechconf\.com/gtc/2015/video/S(?P<id>\d+)\.html' +    _TEST = { +        'url': 'http://on-demand.gputechconf.com/gtc/2015/video/S5156.html', +        'md5': 'a8862a00a0fd65b8b43acc5b8e33f798', +        'info_dict': { +            'id': '5156', +            'ext': 'mp4', +            'title': 'Coordinating More Than 3 Million CUDA Threads for Social Network Analysis', +            'duration': 1219, +        } +    } + +    def _real_extract(self, url): +        video_id = self._match_id(url) +        webpage = self._download_webpage(url, video_id) + +        root_path = self._search_regex(r'var\s+rootPath\s*=\s*"([^"]+)', webpage, 'root path', 'http://evt.dispeak.com/nvidia/events/gtc15/') +        xml_file_id = self._search_regex(r'var\s+xmlFileId\s*=\s*"([^"]+)', webpage, 'xml file id') + +        doc = self._download_xml('%sxml/%s.xml' % (root_path, xml_file_id), video_id) + +        metadata = xpath_element(doc, 'metadata') +        http_host = xpath_text(metadata, 'httpHost', 'http host', True) +        mbr_videos = xpath_element(metadata, 'MBRVideos') + +        formats = [] +        for mbr_video in mbr_videos.findall('MBRVideo'): +            stream_name = xpath_text(mbr_video, 'streamName') +            if stream_name: +                formats.append({ +                    'url': 'http://%s/%s' % (http_host, stream_name.replace('mp4:', '')), +                    'tbr': int_or_none(xpath_text(mbr_video, 'bitrate')), +                }) +        self._sort_formats(formats) + +        return { +            'id': video_id, +            'title': xpath_text(metadata, 'title'), +            'duration': parse_duration(xpath_text(metadata, 'endTime')), +            'creator': xpath_text(metadata, 'speaker'), +            'formats': formats, +        } diff --git a/youtube_dl/extractor/groupon.py b/youtube_dl/extractor/groupon.py index 8b9e0e2f8..63c05b6a6 100644 --- a/youtube_dl/extractor/groupon.py +++ b/youtube_dl/extractor/groupon.py @@ -18,6 +18,8 @@ class GrouponIE(InfoExtractor):                  'id': 'tubGNycTo_9Uxg82uESj4i61EYX8nyuf',                  'ext': 'mp4',                  'title': 'Bikram Yoga Huntington Beach | Orange County', +                'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', +                'duration': 44.961,              },          }],          'params': { diff --git a/youtube_dl/extractor/howcast.py b/youtube_dl/extractor/howcast.py index 16677f179..e8f51e545 100644 --- a/youtube_dl/extractor/howcast.py +++ b/youtube_dl/extractor/howcast.py @@ -16,6 +16,7 @@ class HowcastIE(InfoExtractor):              'description': 'md5:dbe792e5f6f1489027027bf2eba188a3',              'timestamp': 1276081287,              'upload_date': '20100609', +            'duration': 56.823,          },          'params': {              # m3u8 download diff --git a/youtube_dl/extractor/hypem.py b/youtube_dl/extractor/hypem.py index cca3dd498..b3706fe6d 100644 --- a/youtube_dl/extractor/hypem.py +++ b/youtube_dl/extractor/hypem.py @@ -28,15 +28,12 @@ class HypemIE(InfoExtractor):          track_id = self._match_id(url)          data = {'ax': 1, 'ts': time.time()} -        data_encoded = compat_urllib_parse.urlencode(data) -        complete_url = url + "?" + data_encoded -        request = sanitized_Request(complete_url) +        request = sanitized_Request(url + '?' + compat_urllib_parse.urlencode(data))          response, urlh = self._download_webpage_handle(              request, track_id, 'Downloading webpage with the url') -        cookie = urlh.headers.get('Set-Cookie', '')          html_tracks = self._html_search_regex( -            r'(?ms)<script type="application/json" id="displayList-data">\s*(.*?)\s*</script>', +            r'(?ms)<script type="application/json" id="displayList-data">(.+?)</script>',              response, 'tracks')          try:              track_list = json.loads(html_tracks) @@ -46,15 +43,14 @@ class HypemIE(InfoExtractor):          key = track['key']          track_id = track['id'] -        artist = track['artist']          title = track['song'] -        serve_url = "http://hypem.com/serve/source/%s/%s" % (track_id, key)          request = sanitized_Request( -            serve_url, '', {'Content-Type': 'application/json'}) -        request.add_header('cookie', cookie) +            'http://hypem.com/serve/source/%s/%s' % (track_id, key), +            '', {'Content-Type': 'application/json'})          song_data = self._download_json(request, track_id, 'Downloading metadata') -        final_url = song_data["url"] +        final_url = song_data['url'] +        artist = track.get('artist')          return {              'id': track_id, diff --git a/youtube_dl/extractor/infoq.py b/youtube_dl/extractor/infoq.py index 71cfd12c5..016af2084 100644 --- a/youtube_dl/extractor/infoq.py +++ b/youtube_dl/extractor/infoq.py @@ -1,3 +1,5 @@ +# coding: utf-8 +  from __future__ import unicode_literals  import base64 @@ -5,8 +7,9 @@ import base64  from .common import InfoExtractor  from ..compat import (      compat_urllib_parse_unquote, -    compat_urlparse, +    compat_parse_qs,  ) +from ..utils import determine_ext  class InfoQIE(InfoExtractor): @@ -16,7 +19,7 @@ class InfoQIE(InfoExtractor):          'url': 'http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things',          'md5': 'b5ca0e0a8c1fed93b0e65e48e462f9a2',          'info_dict': { -            'id': '12-jan-pythonthings', +            'id': 'A-Few-of-My-Favorite-Python-Things',              'ext': 'mp4',              'description': 'Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.',              'title': 'A Few of My Favorite [Python] Things', @@ -24,40 +27,84 @@ class InfoQIE(InfoExtractor):      }, {          'url': 'http://www.infoq.com/fr/presentations/changez-avis-sur-javascript',          'only_matching': True, +    }, { +        'url': 'http://www.infoq.com/cn/presentations/openstack-continued-delivery', +        'md5': '4918d0cca1497f2244572caf626687ef', +        'info_dict': { +            'id': 'openstack-continued-delivery', +            'title': 'OpenStack持续交付之路', +            'ext': 'flv', +            'description': 'md5:308d981fb28fa42f49f9568322c683ff', +        },      }] -    def _real_extract(self, url): -        video_id = self._match_id(url) -        webpage = self._download_webpage(url, video_id) +    def _extract_bokecc_videos(self, webpage, video_id): +        # TODO: bokecc.com is a Chinese video cloud platform +        # It should have an independent extractor but I don't have other +        # examples using bokecc +        player_params_str = self._html_search_regex( +            r'<script[^>]+src="http://p\.bokecc\.com/player\?([^"]+)', +            webpage, 'player params', default=None) -        video_title = self._html_search_regex(r'<title>(.*?)</title>', webpage, 'title') -        video_description = self._html_search_meta('description', webpage, 'description') +        player_params = compat_parse_qs(player_params_str) + +        info_xml = self._download_xml( +            'http://p.bokecc.com/servlet/playinfo?uid=%s&vid=%s&m=1' % ( +                player_params['siteid'][0], player_params['vid'][0]), video_id) + +        return [{ +            'format_id': 'bokecc', +            'url': quality.find('./copy').attrib['playurl'], +            'preference': int(quality.attrib['value']), +        } for quality in info_xml.findall('./video/quality')] +    def _extract_rtmp_videos(self, webpage):          # The server URL is hardcoded          video_url = 'rtmpe://video.infoq.com/cfx/st/'          # Extract video URL          encoded_id = self._search_regex( -            r"jsclassref\s*=\s*'([^']*)'", webpage, 'encoded id') +            r"jsclassref\s*=\s*'([^']*)'", webpage, 'encoded id', default=None) +          real_id = compat_urllib_parse_unquote(base64.b64decode(encoded_id.encode('ascii')).decode('utf-8'))          playpath = 'mp4:' + real_id -        video_filename = playpath.split('/')[-1] -        video_id, extension = video_filename.split('.') - -        http_base = self._search_regex( -            r'EXPRESSINSTALL_SWF\s*=\s*[^"]*"((?:https?:)?//[^/"]+/)', webpage, -            'HTTP base URL') - -        formats = [{ +        return [{              'format_id': 'rtmp',              'url': video_url, -            'ext': extension, +            'ext': determine_ext(playpath),              'play_path': playpath, -        }, { +        }] + +    def _extract_http_videos(self, webpage): +        http_video_url = self._search_regex(r'P\.s\s*=\s*\'([^\']+)\'', webpage, 'video URL') + +        policy = self._search_regex(r'InfoQConstants.scp\s*=\s*\'([^\']+)\'', webpage, 'policy') +        signature = self._search_regex(r'InfoQConstants.scs\s*=\s*\'([^\']+)\'', webpage, 'signature') +        key_pair_id = self._search_regex(r'InfoQConstants.sck\s*=\s*\'([^\']+)\'', webpage, 'key-pair-id') + +        return [{              'format_id': 'http', -            'url': compat_urlparse.urljoin(url, http_base) + real_id, +            'url': http_video_url, +            'http_headers': { +                'Cookie': 'CloudFront-Policy=%s; CloudFront-Signature=%s; CloudFront-Key-Pair-Id=%s' % ( +                    policy, signature, key_pair_id), +            },          }] + +    def _real_extract(self, url): +        video_id = self._match_id(url) +        webpage = self._download_webpage(url, video_id) + +        video_title = self._html_search_regex(r'<title>(.*?)</title>', webpage, 'title') +        video_description = self._html_search_meta('description', webpage, 'description') + +        if '/cn/' in url: +            # for China videos, HTTP video URL exists but always fails with 403 +            formats = self._extract_bokecc_videos(webpage, video_id) +        else: +            formats = self._extract_rtmp_videos(webpage) + self._extract_http_videos(webpage) +          self._sort_formats(formats)          return { diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index 2df1da3f0..c3731a110 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -205,9 +205,8 @@ class IqiyiIE(InfoExtractor):      def get_enc_key(self, swf_url, video_id):          # TODO: automatic key extraction -        # last update at 2015-10-22 for Zombie::bite -        # '7223c67061dbea1259d0ceb44f44b6d62288f4f80c972170de5201d2321060270e05'[2:66][0::2] -        enc_key = '2c76de15dcb44bd28ff0927d50d31620' +        # last update at 2015-12-18 for Zombie::bite +        enc_key = '8b6b683780897eb8d9a48a02ccc4817d'[::-1]          return enc_key      def _real_extract(self, url): diff --git a/youtube_dl/extractor/keezmovies.py b/youtube_dl/extractor/keezmovies.py index d79261bb5..126ca13df 100644 --- a/youtube_dl/extractor/keezmovies.py +++ b/youtube_dl/extractor/keezmovies.py @@ -1,23 +1,25 @@  from __future__ import unicode_literals -import os  import re  from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlparse -from ..utils import sanitized_Request +from ..utils import ( +    sanitized_Request, +    url_basename, +)  class KeezMoviesIE(InfoExtractor):      _VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/.+?(?P<id>[0-9]+)(?:[/?&]|$)'      _TEST = {          'url': 'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711', -        'md5': '6e297b7e789329923fcf83abb67c9289', +        'md5': '1c1e75d22ffa53320f45eeb07bc4cdc0',          'info_dict': {              'id': '1214711',              'ext': 'mp4',              'title': 'Petite Asian Lady Mai Playing In Bathtub',              'age_limit': 18, +            'thumbnail': 're:^https?://.*\.jpg$',          }      } @@ -36,21 +38,29 @@ class KeezMoviesIE(InfoExtractor):          video_title = self._html_search_regex(              r'<h1 [^>]*>([^<]+)', webpage, 'title') -        video_url = self._html_search_regex( -            r'(?s)html5VideoPlayer = .*?src="([^"]+)"', webpage, 'video URL') -        path = compat_urllib_parse_urlparse(video_url).path -        extension = os.path.splitext(path)[1][1:] -        format = path.split('/')[4].split('_')[:2] -        format = "-".join(format) +        flashvars = self._parse_json(self._search_regex( +            r'var\s+flashvars\s*=\s*([^;]+);', webpage, 'flashvars'), video_id) + +        formats = [] +        for height in (180, 240, 480): +            if flashvars.get('quality_%dp' % height): +                video_url = flashvars['quality_%dp' % height] +                a_format = { +                    'url': video_url, +                    'height': height, +                    'format_id': '%dp' % height, +                } +                filename_parts = url_basename(video_url).split('_') +                if len(filename_parts) >= 2 and re.match(r'\d+[Kk]', filename_parts[1]): +                    a_format['tbr'] = int(filename_parts[1][:-1]) +                formats.append(a_format)          age_limit = self._rta_search(webpage)          return {              'id': video_id,              'title': video_title, -            'url': video_url, -            'ext': extension, -            'format': format, -            'format_id': format, +            'formats': formats,              'age_limit': age_limit, +            'thumbnail': flashvars.get('image_url')          } diff --git a/youtube_dl/extractor/metacafe.py b/youtube_dl/extractor/metacafe.py index 3c786a36d..67d6271e1 100644 --- a/youtube_dl/extractor/metacafe.py +++ b/youtube_dl/extractor/metacafe.py @@ -154,10 +154,10 @@ class MetacafeIE(InfoExtractor):          # Extract URL, uploader and title from webpage          self.report_extraction(video_id)          video_url = None -        mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage) +        mobj = re.search(r'(?m)&(?:media|video)URL=([^&]+)', webpage)          if mobj is not None:              mediaURL = compat_urllib_parse_unquote(mobj.group(1)) -            video_ext = mediaURL[-3:] +            video_ext = determine_ext(mediaURL)              # Extract gdaKey if available              mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage) @@ -229,7 +229,7 @@ class MetacafeIE(InfoExtractor):          age_limit = (              18 -            if re.search(r'"contentRating":"restricted"', webpage) +            if re.search(r'(?:"contentRating":|"rating",)"restricted"', webpage)              else 0)          if isinstance(video_url, list): diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index d47aeceda..c2b7ed9ab 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -64,7 +64,8 @@ class MixcloudIE(InfoExtractor):          preview_url = self._search_regex(              r'\s(?:data-preview-url|m-preview)="([^"]+)"', webpage, 'preview url') -        song_url = preview_url.replace('/previews/', '/c/originals/') +        song_url = re.sub(r'audiocdn(\d+)', r'stream\1', preview_url) +        song_url = song_url.replace('/previews/', '/c/originals/')          if not self._check_url(song_url, track_id, 'mp3'):              song_url = song_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/')              if not self._check_url(song_url, track_id, 'm4a'): diff --git a/youtube_dl/extractor/movshare.py b/youtube_dl/extractor/movshare.py deleted file mode 100644 index 6101063f2..000000000 --- a/youtube_dl/extractor/movshare.py +++ /dev/null @@ -1,27 +0,0 @@ -from __future__ import unicode_literals - -from .novamov import NovaMovIE - - -class MovShareIE(NovaMovIE): -    IE_NAME = 'movshare' -    IE_DESC = 'MovShare' - -    _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'movshare\.(?:net|sx|ag)'} - -    _HOST = 'www.movshare.net' - -    _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<' -    _TITLE_REGEX = r'<strong>Title:</strong> ([^<]+)</p>' -    _DESCRIPTION_REGEX = r'<strong>Description:</strong> ([^<]+)</p>' - -    _TEST = { -        'url': 'http://www.movshare.net/video/559e28be54d96', -        'md5': 'abd31a2132947262c50429e1d16c1bfd', -        'info_dict': { -            'id': '559e28be54d96', -            'ext': 'flv', -            'title': 'dissapeared image', -            'description': 'optical illusion  dissapeared image  magic illusion', -        } -    } diff --git a/youtube_dl/extractor/nba.py b/youtube_dl/extractor/nba.py index 944096e1c..7c6b7841d 100644 --- a/youtube_dl/extractor/nba.py +++ b/youtube_dl/extractor/nba.py @@ -1,63 +1,102 @@  from __future__ import unicode_literals +import re +  from .common import InfoExtractor  from ..utils import ( -    remove_end,      parse_duration, +    int_or_none, +    xpath_text, +    xpath_attr,  )  class NBAIE(InfoExtractor): -    _VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?:nba/)?video(?P<id>/[^?]*?)/?(?:/index\.html)?(?:\?.*)?$' +    _VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?P<path>(?:[^/]+/)?video/(?P<id>[^?]*?))/?(?:/index\.html)?(?:\?.*)?$'      _TESTS = [{          'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html', -        'md5': 'c0edcfc37607344e2ff8f13c378c88a4', +        'md5': '9e7729d3010a9c71506fd1248f74e4f4',          'info_dict': { -            'id': '0021200253-okc-bkn-recap.nba', -            'ext': 'mp4', +            'id': '0021200253-okc-bkn-recap', +            'ext': 'flv',              'title': 'Thunder vs. Nets',              'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',              'duration': 181, +            'timestamp': 1354638466, +            'upload_date': '20121204',          },      }, {          'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/',          'only_matching': True,      }, { -        'url': 'http://watch.nba.com/nba/video/channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba', +        'url': 'http://watch.nba.com/video/channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba', +        'md5': 'b2b39b81cf28615ae0c3360a3f9668c4',          'info_dict': { -            'id': '0041400301-cle-atl-recap.nba', +            'id': '0041400301-cle-atl-recap',              'ext': 'mp4', -            'title': 'NBA GAME TIME | Video: Hawks vs. Cavaliers Game 1', +            'title': 'Hawks vs. Cavaliers Game 1',              'description': 'md5:8094c3498d35a9bd6b1a8c396a071b4d',              'duration': 228, -        }, -        'params': { -            'skip_download': True, +            'timestamp': 1432134543, +            'upload_date': '20150520',          }      }]      def _real_extract(self, url): -        video_id = self._match_id(url) -        webpage = self._download_webpage(url, video_id) - -        video_url = 'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4' +        path, video_id = re.match(self._VALID_URL, url).groups() +        if path.startswith('nba/'): +            path = path[3:] +        video_info = self._download_xml('http://www.nba.com/%s.xml' % path, video_id) +        video_id = xpath_text(video_info, 'slug') +        title = xpath_text(video_info, 'headline') +        description = xpath_text(video_info, 'description') +        duration = parse_duration(xpath_text(video_info, 'length')) +        timestamp = int_or_none(xpath_attr(video_info, 'dateCreated', 'uts')) -        shortened_video_id = video_id.rpartition('/')[2] -        title = remove_end( -            self._og_search_title(webpage, default=shortened_video_id), ' : NBA.com') +        thumbnails = [] +        for image in video_info.find('images'): +            thumbnails.append({ +                'id': image.attrib.get('cut'), +                'url': image.text, +                'width': int_or_none(image.attrib.get('width')), +                'height': int_or_none(image.attrib.get('height')), +            }) -        description = self._og_search_description(webpage) -        duration_str = self._html_search_meta( -            'duration', webpage, 'duration', default=None) -        if not duration_str: -            duration_str = self._html_search_regex( -                r'Duration:</b>\s*(\d+:\d+)', webpage, 'duration', fatal=False) -        duration = parse_duration(duration_str) +        formats = [] +        for video_file in video_info.findall('.//file'): +            video_url = video_file.text +            if video_url.startswith('/'): +                continue +            if video_url.endswith('.m3u8'): +                m3u8_formats = self._extract_m3u8_formats(video_url, video_id, m3u8_id='hls', fatal=False) +                if m3u8_formats: +                    formats.extend(m3u8_formats) +            elif video_url.endswith('.f4m'): +                f4m_formats = self._extract_f4m_formats(video_url + '?hdcore=3.4.1.1', video_id, f4m_id='hds', fatal=False) +                if f4m_formats: +                    formats.extend(f4m_formats) +            else: +                key = video_file.attrib.get('bitrate') +                format_info = { +                    'format_id': key, +                    'url': video_url, +                } +                mobj = re.search(r'(\d+)x(\d+)(?:_(\d+))?', key) +                if mobj: +                    format_info.update({ +                        'width': int(mobj.group(1)), +                        'height': int(mobj.group(2)), +                        'tbr': int_or_none(mobj.group(3)), +                    }) +                formats.append(format_info) +        self._sort_formats(formats)          return { -            'id': shortened_video_id, -            'url': video_url, +            'id': video_id,              'title': title,              'description': description,              'duration': duration, +            'timestamp': timestamp, +            'thumbnails': thumbnails, +            'formats': formats,          } diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index e683d24c4..340c922bd 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -3,14 +3,12 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor -from ..compat import ( -    compat_str, -    compat_HTTPError, -) +from ..compat import compat_HTTPError  from ..utils import (      ExtractorError,      find_xpath_attr,      lowercase_escape, +    smuggle_url,      unescapeHTML,  ) @@ -62,12 +60,13 @@ class NBCIE(InfoExtractor):          theplatform_url = unescapeHTML(lowercase_escape(self._html_search_regex(              [                  r'(?:class="video-player video-player-full" data-mpx-url|class="player" src)="(.*?)"', +                r'<iframe[^>]+src="((?:https?:)?//player\.theplatform\.com/[^"]+)"',                  r'"embedURL"\s*:\s*"([^"]+)"'              ],              webpage, 'theplatform url').replace('_no_endcard', '').replace('\\/', '/')))          if theplatform_url.startswith('//'):              theplatform_url = 'http:' + theplatform_url -        return self.url_result(theplatform_url) +        return self.url_result(smuggle_url(theplatform_url, {'source_url': url}))  class NBCSportsVPlayerIE(InfoExtractor): @@ -187,7 +186,7 @@ class NBCNewsIE(InfoExtractor):                  'title': info.find('headline').text,                  'ext': 'flv',                  'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text, -                'description': compat_str(info.find('caption').text), +                'description': info.find('caption').text,                  'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,              }          else: diff --git a/youtube_dl/extractor/ndr.py b/youtube_dl/extractor/ndr.py index 16213eed9..894c51399 100644 --- a/youtube_dl/extractor/ndr.py +++ b/youtube_dl/extractor/ndr.py @@ -88,10 +88,10 @@ class NDRIE(NDRBaseIE):              'embedURL', webpage, 'embed URL', fatal=True)          description = self._search_regex(              r'<p[^>]+itemprop="description">([^<]+)</p>', -            webpage, 'description', fatal=False) +            webpage, 'description', default=None) or self._og_search_description(webpage)          timestamp = parse_iso8601(              self._search_regex( -                r'<span itemprop="datePublished" content="([^"]+)">', +                r'<span[^>]+itemprop="(?:datePublished|uploadDate)"[^>]+content="([^"]+)"',                  webpage, 'upload date', fatal=False))          return {              '_type': 'url_transparent', diff --git a/youtube_dl/extractor/noco.py b/youtube_dl/extractor/noco.py index 76bd21e6d..d440313d5 100644 --- a/youtube_dl/extractor/noco.py +++ b/youtube_dl/extractor/noco.py @@ -9,6 +9,7 @@ from .common import InfoExtractor  from ..compat import (      compat_str,      compat_urllib_parse, +    compat_urlparse,  )  from ..utils import (      clean_html, @@ -82,14 +83,21 @@ class NocoIE(InfoExtractor):          if 'erreur' in login:              raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True) +    @staticmethod +    def _ts(): +        return int(time.time() * 1000) +      def _call_api(self, path, video_id, note, sub_lang=None): -        ts = compat_str(int(time.time() * 1000)) +        ts = compat_str(self._ts() + self._ts_offset)          tk = hashlib.md5((hashlib.md5(ts.encode('ascii')).hexdigest() + '#8S?uCraTedap6a').encode('ascii')).hexdigest()          url = self._API_URL_TEMPLATE % (path, ts, tk)          if sub_lang:              url += self._SUB_LANG_TEMPLATE % sub_lang -        resp = self._download_json(url, video_id, note) +        request = sanitized_Request(url) +        request.add_header('Referer', self._referer) + +        resp = self._download_json(request, video_id, note)          if isinstance(resp, dict) and resp.get('error'):              self._raise_error(resp['error'], resp['description']) @@ -102,8 +110,22 @@ class NocoIE(InfoExtractor):              expected=True)      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') +        video_id = self._match_id(url) + +        # Timestamp adjustment offset between server time and local time +        # must be calculated in order to use timestamps closest to server's +        # in all API requests (see https://github.com/rg3/youtube-dl/issues/7864) +        webpage = self._download_webpage(url, video_id) + +        player_url = self._search_regex( +            r'(["\'])(?P<player>https?://noco\.tv/(?:[^/]+/)+NocoPlayer.+?\.swf.*?)\1', +            webpage, 'noco player', group='player', +            default='http://noco.tv/cdata/js/player/NocoPlayer-v1.2.40.swf') + +        qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(player_url).query) +        ts = int_or_none(qs.get('ts', [None])[0]) +        self._ts_offset = ts - self._ts() if ts else 0 +        self._referer = player_url          medias = self._call_api(              'shows/%s/medias' % video_id, @@ -155,8 +177,8 @@ class NocoIE(InfoExtractor):                          'format_id': format_id_extended,                          'width': int_or_none(fmt.get('res_width')),                          'height': int_or_none(fmt.get('res_lines')), -                        'abr': int_or_none(fmt.get('audiobitrate')), -                        'vbr': int_or_none(fmt.get('videobitrate')), +                        'abr': int_or_none(fmt.get('audiobitrate'), 1000), +                        'vbr': int_or_none(fmt.get('videobitrate'), 1000),                          'filesize': int_or_none(fmt.get('filesize')),                          'format_note': qualities[format_id].get('quality_name'),                          'quality': qualities[format_id].get('priority'), diff --git a/youtube_dl/extractor/novamov.py b/youtube_dl/extractor/novamov.py index 6163e8855..d68c1ad79 100644 --- a/youtube_dl/extractor/novamov.py +++ b/youtube_dl/extractor/novamov.py @@ -17,15 +17,16 @@ class NovaMovIE(InfoExtractor):      IE_NAME = 'novamov'      IE_DESC = 'NovaMov' -    _VALID_URL_TEMPLATE = r'http://(?:(?:www\.)?%(host)s/(?:file|video)/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P<id>[a-z\d]{13})' +    _VALID_URL_TEMPLATE = r'http://(?:(?:www\.)?%(host)s/(?:file|video|mobile/#/videos)/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P<id>[a-z\d]{13})'      _VALID_URL = _VALID_URL_TEMPLATE % {'host': 'novamov\.com'}      _HOST = 'www.novamov.com'      _FILE_DELETED_REGEX = r'This file no longer exists on our servers!</h2>' -    _FILEKEY_REGEX = r'flashvars\.filekey="(?P<filekey>[^"]+)";' +    _FILEKEY_REGEX = r'flashvars\.filekey=(?P<filekey>"?[^"]+"?);'      _TITLE_REGEX = r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>([^<]+)</h3>'      _DESCRIPTION_REGEX = r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>[^<]+</h3><p>([^<]+)</p>' +    _URL_TEMPLATE = 'http://%s/video/%s'      _TEST = {          'url': 'http://www.novamov.com/video/4rurhn9x446jj', @@ -39,20 +40,28 @@ class NovaMovIE(InfoExtractor):          'skip': '"Invalid token" errors abound (in web interface as well as youtube-dl, there is nothing we can do about it.)'      } +    def _check_existence(self, webpage, video_id): +        if re.search(self._FILE_DELETED_REGEX, webpage) is not None: +            raise ExtractorError('Video %s does not exist' % video_id, expected=True) +      def _real_extract(self, url):          video_id = self._match_id(url) -        url = 'http://%s/video/%s' % (self._HOST, video_id) +        url = self._URL_TEMPLATE % (self._HOST, video_id)          webpage = self._download_webpage(              url, video_id, 'Downloading video page') -        if re.search(self._FILE_DELETED_REGEX, webpage) is not None: -            raise ExtractorError('Video %s does not exist' % video_id, expected=True) +        self._check_existence(webpage, video_id)          def extract_filekey(default=NO_DEFAULT): -            return self._search_regex( +            filekey = self._search_regex(                  self._FILEKEY_REGEX, webpage, 'filekey', default=default) +            if filekey is not default and (filekey[0] != '"' or filekey[-1] != '"'): +                return self._search_regex( +                    r'var\s+%s\s*=\s*"([^"]+)"' % re.escape(filekey), webpage, 'filekey', default=default) +            else: +                return filekey          filekey = extract_filekey(default=None) @@ -69,6 +78,7 @@ class NovaMovIE(InfoExtractor):              request.add_header('Referer', post_url)              webpage = self._download_webpage(                  request, video_id, 'Downloading continue to the video page') +            self._check_existence(webpage, video_id)          filekey = extract_filekey() @@ -92,3 +102,89 @@ class NovaMovIE(InfoExtractor):              'title': title,              'description': description          } + + +class WholeCloudIE(NovaMovIE): +    IE_NAME = 'wholecloud' +    IE_DESC = 'WholeCloud' + +    _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': '(?:wholecloud\.net|movshare\.(?:net|sx|ag))'} + +    _HOST = 'www.wholecloud.net' + +    _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<' +    _TITLE_REGEX = r'<strong>Title:</strong> ([^<]+)</p>' +    _DESCRIPTION_REGEX = r'<strong>Description:</strong> ([^<]+)</p>' + +    _TEST = { +        'url': 'http://www.wholecloud.net/video/559e28be54d96', +        'md5': 'abd31a2132947262c50429e1d16c1bfd', +        'info_dict': { +            'id': '559e28be54d96', +            'ext': 'flv', +            'title': 'dissapeared image', +            'description': 'optical illusion  dissapeared image  magic illusion', +        } +    } + + +class NowVideoIE(NovaMovIE): +    IE_NAME = 'nowvideo' +    IE_DESC = 'NowVideo' + +    _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'nowvideo\.(?:to|ch|ec|sx|eu|at|ag|co|li)'} + +    _HOST = 'www.nowvideo.to' + +    _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<' +    _TITLE_REGEX = r'<h4>([^<]+)</h4>' +    _DESCRIPTION_REGEX = r'</h4>\s*<p>([^<]+)</p>' + +    _TEST = { +        'url': 'http://www.nowvideo.sx/video/f1d6fce9a968b', +        'md5': '12c82cad4f2084881d8bc60ee29df092', +        'info_dict': { +            'id': 'f1d6fce9a968b', +            'ext': 'flv', +            'title': 'youtubedl test video BaWjenozKc', +            'description': 'Description', +        }, +    } + + +class VideoWeedIE(NovaMovIE): +    IE_NAME = 'videoweed' +    IE_DESC = 'VideoWeed' + +    _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'videoweed\.(?:es|com)'} + +    _HOST = 'www.videoweed.es' + +    _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<' +    _TITLE_REGEX = r'<h1 class="text_shadow">([^<]+)</h1>' +    _URL_TEMPLATE = 'http://%s/file/%s' + +    _TEST = { +        'url': 'http://www.videoweed.es/file/b42178afbea14', +        'md5': 'abd31a2132947262c50429e1d16c1bfd', +        'info_dict': { +            'id': 'b42178afbea14', +            'ext': 'flv', +            'title': 'optical illusion  dissapeared image magic illusion', +            'description': '' +        }, +    } + + +class CloudTimeIE(NovaMovIE): +    IE_NAME = 'cloudtime' +    IE_DESC = 'CloudTime' + +    _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'cloudtime\.to'} + +    _HOST = 'www.cloudtime.to' + +    _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<' +    _TITLE_REGEX = r'<div[^>]+class=["\']video_det["\'][^>]*>\s*<strong>([^<]+)</strong>' + +    _TEST = None diff --git a/youtube_dl/extractor/nowness.py b/youtube_dl/extractor/nowness.py index d480fb58c..446f5901c 100644 --- a/youtube_dl/extractor/nowness.py +++ b/youtube_dl/extractor/nowness.py @@ -1,7 +1,10 @@  # encoding: utf-8  from __future__ import unicode_literals -from .brightcove import BrightcoveLegacyIE +from .brightcove import ( +    BrightcoveLegacyIE, +    BrightcoveNewIE, +)  from .common import InfoExtractor  from ..compat import compat_str  from ..utils import ( @@ -23,9 +26,12 @@ class NownessBaseIE(InfoExtractor):                              note='Downloading player JavaScript',                              errnote='Unable to download player JavaScript')                          bc_url = BrightcoveLegacyIE._extract_brightcove_url(player_code) -                        if bc_url is None: -                            raise ExtractorError('Could not find player definition') -                        return self.url_result(bc_url, 'BrightcoveLegacy') +                        if bc_url: +                            return self.url_result(bc_url, BrightcoveLegacyIE.ie_key()) +                        bc_url = BrightcoveNewIE._extract_url(player_code) +                        if bc_url: +                            return self.url_result(bc_url, BrightcoveNewIE.ie_key()) +                        raise ExtractorError('Could not find player definition')                      elif source == 'vimeo':                          return self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo')                      elif source == 'youtube': diff --git a/youtube_dl/extractor/nowtv.py b/youtube_dl/extractor/nowtv.py index 67e34b294..fd107aca2 100644 --- a/youtube_dl/extractor/nowtv.py +++ b/youtube_dl/extractor/nowtv.py @@ -71,7 +71,7 @@ class NowTVBaseIE(InfoExtractor):  class NowTVIE(NowTVBaseIE): -    _VALID_URL = r'https?://(?:www\.)?nowtv\.(?:de|at|ch)/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/(?:list/[^/]+/)?(?P<id>[^/]+)/(?:player|preview)' +    _VALID_URL = r'https?://(?:www\.)?nowtv\.(?:de|at|ch)/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/(?:(?:list/[^/]+|jahr/\d{4}/\d{1,2})/)?(?P<id>[^/]+)/(?:player|preview)'      _TESTS = [{          # rtl @@ -190,6 +190,9 @@ class NowTVIE(NowTVBaseIE):      }, {          'url': 'http://www.nowtv.de/rtl2/echtzeit/list/aktuell/schnelles-geld-am-ende-der-welt/player',          'only_matching': True, +    }, { +        'url': 'http://www.nowtv.de/rtl2/zuhause-im-glueck/jahr/2015/11/eine-erschuetternde-diagnose/player', +        'only_matching': True,      }]      def _real_extract(self, url): diff --git a/youtube_dl/extractor/nowvideo.py b/youtube_dl/extractor/nowvideo.py deleted file mode 100644 index 57ee3d366..000000000 --- a/youtube_dl/extractor/nowvideo.py +++ /dev/null @@ -1,28 +0,0 @@ -from __future__ import unicode_literals - -from .novamov import NovaMovIE - - -class NowVideoIE(NovaMovIE): -    IE_NAME = 'nowvideo' -    IE_DESC = 'NowVideo' - -    _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'nowvideo\.(?:to|ch|ec|sx|eu|at|ag|co|li)'} - -    _HOST = 'www.nowvideo.to' - -    _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<' -    _FILEKEY_REGEX = r'var fkzd="([^"]+)";' -    _TITLE_REGEX = r'<h4>([^<]+)</h4>' -    _DESCRIPTION_REGEX = r'</h4>\s*<p>([^<]+)</p>' - -    _TEST = { -        'url': 'http://www.nowvideo.ch/video/0mw0yow7b6dxa', -        'md5': 'f8fbbc8add72bd95b7850c6a02fc8817', -        'info_dict': { -            'id': '0mw0yow7b6dxa', -            'ext': 'flv', -            'title': 'youtubedl test video _BaW_jenozKc.mp4', -            'description': 'Description', -        } -    } diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index a262a9f6d..8603fd692 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -1,108 +1,78 @@  from __future__ import unicode_literals  import re -import json  import base64  from .common import InfoExtractor  from ..utils import ( -    unescapeHTML, -    ExtractorError, -    determine_ext,      int_or_none, +    float_or_none, +    ExtractorError, +    unsmuggle_url,  ) +from ..compat import compat_urllib_parse  class OoyalaBaseIE(InfoExtractor): -    def _extract_result(self, info, more_info): -        embedCode = info['embedCode'] -        video_url = info.get('ipad_url') or info['url'] - -        if determine_ext(video_url) == 'm3u8': -            formats = self._extract_m3u8_formats(video_url, embedCode, ext='mp4') -        else: -            formats = [{ -                'url': video_url, -                'ext': 'mp4', -            }] - -        return { -            'id': embedCode, -            'title': unescapeHTML(info['title']), -            'formats': formats, -            'description': unescapeHTML(more_info['description']), -            'thumbnail': more_info['promo'], +    def _extract(self, content_tree_url, video_id, domain='example.org'): +        content_tree = self._download_json(content_tree_url, video_id)['content_tree'] +        metadata = content_tree[list(content_tree)[0]] +        embed_code = metadata['embed_code'] +        pcode = metadata.get('asset_pcode') or embed_code +        video_info = { +            'id': embed_code, +            'title': metadata['title'], +            'description': metadata.get('description'), +            'thumbnail': metadata.get('thumbnail_image') or metadata.get('promo_image'), +            'duration': float_or_none(metadata.get('duration'), 1000),          } -    def _extract(self, player_url, video_id): -        player = self._download_webpage(player_url, video_id) -        mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="', -                                        player, 'mobile player url') -        # Looks like some videos are only available for particular devices -        # (e.g. http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0 -        # is only available for ipad) -        # Working around with fetching URLs for all the devices found starting with 'unknown' -        # until we succeed or eventually fail for each device. -        devices = re.findall(r'device\s*=\s*"([^"]+)";', player) -        devices.remove('unknown') -        devices.insert(0, 'unknown') -        for device in devices: -            mobile_player = self._download_webpage( -                '%s&device=%s' % (mobile_url, device), video_id, -                'Downloading mobile player JS for %s device' % device) -            videos_info = self._search_regex( -                r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);', -                mobile_player, 'info', fatal=False, default=None) -            if videos_info: -                break - -        if not videos_info: -            formats = [] +        urls = [] +        formats = [] +        for supported_format in ('mp4', 'm3u8', 'hds', 'rtmp'):              auth_data = self._download_json( -                'http://player.ooyala.com/sas/player_api/v1/authorization/embed_code/%s/%s?domain=www.example.org&supportedFormats=mp4,webm' % (video_id, video_id), -                video_id) - -            cur_auth_data = auth_data['authorization_data'][video_id] - -            for stream in cur_auth_data['streams']: -                formats.append({ -                    'url': base64.b64decode(stream['url']['data'].encode('ascii')).decode('utf-8'), -                    'ext': stream.get('delivery_type'), -                    'format': stream.get('video_codec'), -                    'format_id': stream.get('profile'), -                    'width': int_or_none(stream.get('width')), -                    'height': int_or_none(stream.get('height')), -                    'abr': int_or_none(stream.get('audio_bitrate')), -                    'vbr': int_or_none(stream.get('video_bitrate')), -                }) -            if formats: -                return { -                    'id': video_id, -                    'formats': formats, -                    'title': 'Ooyala video', -                } - -            if not cur_auth_data['authorized']: -                raise ExtractorError(cur_auth_data['message'], expected=True) - -        if not videos_info: -            raise ExtractorError('Unable to extract info') -        videos_info = videos_info.replace('\\"', '"') -        videos_more_info = self._search_regex( -            r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, 'more info').replace('\\"', '"') -        videos_info = json.loads(videos_info) -        videos_more_info = json.loads(videos_more_info) - -        if videos_more_info.get('lineup'): -            videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])] -            return { -                '_type': 'playlist', -                'id': video_id, -                'title': unescapeHTML(videos_more_info['title']), -                'entries': videos, -            } -        else: -            return self._extract_result(videos_info[0], videos_more_info) +                'http://player.ooyala.com/sas/player_api/v1/authorization/embed_code/%s/%s?' % (pcode, embed_code) + compat_urllib_parse.urlencode({'domain': domain, 'supportedFormats': supported_format}), +                video_id, 'Downloading %s JSON' % supported_format) + +            cur_auth_data = auth_data['authorization_data'][embed_code] + +            if cur_auth_data['authorized']: +                for stream in cur_auth_data['streams']: +                    url = base64.b64decode(stream['url']['data'].encode('ascii')).decode('utf-8') +                    if url in urls: +                        continue +                    urls.append(url) +                    delivery_type = stream['delivery_type'] +                    if delivery_type == 'hls' or '.m3u8' in url: +                        m3u8_formats = self._extract_m3u8_formats(url, embed_code, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) +                        if m3u8_formats: +                            formats.extend(m3u8_formats) +                    elif delivery_type == 'hds' or '.f4m' in url: +                        f4m_formats = self._extract_f4m_formats(url, embed_code, f4m_id='hds', fatal=False) +                        if f4m_formats: +                            formats.extend(f4m_formats) +                    elif '.smil' in url: +                        smil_formats = self._extract_smil_formats(url, embed_code, fatal=False) +                        if smil_formats: +                            formats.extend(smil_formats) +                    else: +                        formats.append({ +                            'url': url, +                            'ext': stream.get('delivery_type'), +                            'vcodec': stream.get('video_codec'), +                            'format_id': delivery_type, +                            'width': int_or_none(stream.get('width')), +                            'height': int_or_none(stream.get('height')), +                            'abr': int_or_none(stream.get('audio_bitrate')), +                            'vbr': int_or_none(stream.get('video_bitrate')), +                            'fps': float_or_none(stream.get('framerate')), +                        }) +            else: +                raise ExtractorError('%s said: %s' % (self.IE_NAME, cur_auth_data['message']), expected=True) +        self._sort_formats(formats) + +        video_info['formats'] = formats +        return video_info  class OoyalaIE(OoyalaBaseIE): @@ -117,6 +87,7 @@ class OoyalaIE(OoyalaBaseIE):                  'ext': 'mp4',                  'title': 'Explaining Data Recovery from Hard Drives and SSDs',                  'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.', +                'duration': 853.386,              },          }, {              # Only available for ipad @@ -125,7 +96,7 @@ class OoyalaIE(OoyalaBaseIE):                  'id': 'x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',                  'ext': 'mp4',                  'title': 'Simulation Overview - Levels of Simulation', -                'description': '', +                'duration': 194.948,              },          },          { @@ -136,7 +107,8 @@ class OoyalaIE(OoyalaBaseIE):              'info_dict': {                  'id': 'FiOG81ZTrvckcchQxmalf4aQj590qTEx',                  'ext': 'mp4', -                'title': 'Ooyala video', +                'title': 'Divide Tool Path.mp4', +                'duration': 204.405,              }          }      ] @@ -151,9 +123,11 @@ class OoyalaIE(OoyalaBaseIE):                                ie=cls.ie_key())      def _real_extract(self, url): +        url, smuggled_data = unsmuggle_url(url, {})          embed_code = self._match_id(url) -        player_url = 'http://player.ooyala.com/player.js?embedCode=%s' % embed_code -        return self._extract(player_url, embed_code) +        domain = smuggled_data.get('domain') +        content_tree_url = 'http://player.ooyala.com/player_api/v1/content_tree/embed_code/%s/%s' % (embed_code, embed_code) +        return self._extract(content_tree_url, embed_code, domain)  class OoyalaExternalIE(OoyalaBaseIE): @@ -170,7 +144,7 @@ class OoyalaExternalIE(OoyalaBaseIE):                          .*?&pcode=                      )                      (?P<pcode>.+?) -                    (&|$) +                    (?:&|$)                      '''      _TEST = { @@ -179,7 +153,7 @@ class OoyalaExternalIE(OoyalaBaseIE):              'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG',              'ext': 'mp4',              'title': 'dm_140128_30for30Shorts___JudgingJewellv2', -            'description': '', +            'duration': 1302000,          },          'params': {              # m3u8 download @@ -188,9 +162,6 @@ class OoyalaExternalIE(OoyalaBaseIE):      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        partner_id = mobj.group('partner_id') -        video_id = mobj.group('id') -        pcode = mobj.group('pcode') -        player_url = 'http://player.ooyala.com/player.js?externalId=%s:%s&pcode=%s' % (partner_id, video_id, pcode) -        return self._extract(player_url, video_id) +        partner_id, video_id, pcode = re.match(self._VALID_URL, url).groups() +        content_tree_url = 'http://player.ooyala.com/player_api/v1/content_tree/external_id/%s/%s:%s' % (pcode, partner_id, video_id) +        return self._extract(content_tree_url, video_id) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index b787e2a73..744e4a09a 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -15,16 +15,181 @@ from ..utils import (  class PBSIE(InfoExtractor): +    _STATIONS = ( +        (r'(?:video|www)\.pbs\.org', 'PBS: Public Broadcasting Service'),  # http://www.pbs.org/ +        (r'video\.aptv\.org', 'APT - Alabama Public Television (WBIQ)'),  # http://aptv.org/ +        (r'video\.gpb\.org', 'GPB/Georgia Public Broadcasting (WGTV)'),  # http://www.gpb.org/ +        (r'video\.mpbonline\.org', 'Mississippi Public Broadcasting (WMPN)'),  # http://www.mpbonline.org +        (r'video\.wnpt\.org', 'Nashville Public Television (WNPT)'),  # http://www.wnpt.org +        (r'video\.wfsu\.org', 'WFSU-TV (WFSU)'),  # http://wfsu.org/ +        (r'video\.wsre\.org', 'WSRE (WSRE)'),  # http://www.wsre.org +        (r'video\.wtcitv\.org', 'WTCI (WTCI)'),  # http://www.wtcitv.org +        (r'video\.pba\.org', 'WPBA/Channel 30 (WPBA)'),  # http://pba.org/ +        (r'video\.alaskapublic\.org', 'Alaska Public Media (KAKM)'),  # http://alaskapublic.org/kakm +        # (r'kuac\.org', 'KUAC (KUAC)'),  # http://kuac.org/kuac-tv/ +        # (r'ktoo\.org', '360 North (KTOO)'),  # http://www.ktoo.org/ +        # (r'azpm\.org', 'KUAT 6 (KUAT)'),  # http://www.azpm.org/ +        (r'video\.azpbs\.org', 'Arizona PBS (KAET)'),  # http://www.azpbs.org +        (r'portal\.knme\.org', 'KNME-TV/Channel 5 (KNME)'),  # http://www.newmexicopbs.org/ +        (r'video\.vegaspbs\.org', 'Vegas PBS (KLVX)'),  # http://vegaspbs.org/ +        (r'watch\.aetn\.org', 'AETN/ARKANSAS ETV NETWORK (KETS)'),  # http://www.aetn.org/ +        (r'video\.ket\.org', 'KET (WKLE)'),  # http://www.ket.org/ +        (r'video\.wkno\.org', 'WKNO/Channel 10 (WKNO)'),  # http://www.wkno.org/ +        (r'video\.lpb\.org', 'LPB/LOUISIANA PUBLIC BROADCASTING (WLPB)'),  # http://www.lpb.org/ +        (r'videos\.oeta\.tv', 'OETA (KETA)'),  # http://www.oeta.tv +        (r'video\.optv\.org', 'Ozarks Public Television (KOZK)'),  # http://www.optv.org/ +        (r'watch\.wsiu\.org', 'WSIU Public Broadcasting (WSIU)'),  # http://www.wsiu.org/ +        (r'video\.keet\.org', 'KEET TV (KEET)'),  # http://www.keet.org +        (r'pbs\.kixe\.org', 'KIXE/Channel 9 (KIXE)'),  # http://kixe.org/ +        (r'video\.kpbs\.org', 'KPBS San Diego (KPBS)'),  # http://www.kpbs.org/ +        (r'video\.kqed\.org', 'KQED (KQED)'),  # http://www.kqed.org +        (r'vids\.kvie\.org', 'KVIE Public Television (KVIE)'),  # http://www.kvie.org +        (r'video\.pbssocal\.org', 'PBS SoCal/KOCE (KOCE)'),  # http://www.pbssocal.org/ +        (r'video\.valleypbs\.org', 'ValleyPBS (KVPT)'),  # http://www.valleypbs.org/ +        (r'video\.cptv\.org', 'CONNECTICUT PUBLIC TELEVISION (WEDH)'),  # http://cptv.org +        (r'watch\.knpb\.org', 'KNPB Channel 5 (KNPB)'),  # http://www.knpb.org/ +        (r'video\.soptv\.org', 'SOPTV (KSYS)'),  # http://www.soptv.org +        # (r'klcs\.org', 'KLCS/Channel 58 (KLCS)'),  # http://www.klcs.org +        # (r'krcb\.org', 'KRCB Television & Radio (KRCB)'),  # http://www.krcb.org +        # (r'kvcr\.org', 'KVCR TV/DT/FM :: Vision for the Future (KVCR)'),  # http://kvcr.org +        (r'video\.rmpbs\.org', 'Rocky Mountain PBS (KRMA)'),  # http://www.rmpbs.org +        (r'video\.kenw\.org', 'KENW-TV3 (KENW)'),  # http://www.kenw.org +        (r'video\.kued\.org', 'KUED Channel 7 (KUED)'),  # http://www.kued.org +        (r'video\.wyomingpbs\.org', 'Wyoming PBS (KCWC)'),  # http://www.wyomingpbs.org +        (r'video\.cpt12\.org', 'Colorado Public Television / KBDI 12 (KBDI)'),  # http://www.cpt12.org/ +        (r'video\.kbyueleven\.org', 'KBYU-TV (KBYU)'),  # http://www.kbyutv.org/ +        (r'video\.thirteen\.org', 'Thirteen/WNET New York (WNET)'),  # http://www.thirteen.org +        (r'video\.wgbh\.org', 'WGBH/Channel 2 (WGBH)'),  # http://wgbh.org +        (r'video\.wgby\.org', 'WGBY (WGBY)'),  # http://www.wgby.org +        (r'watch\.njtvonline\.org', 'NJTV Public Media NJ (WNJT)'),  # http://www.njtvonline.org/ +        # (r'ripbs\.org', 'Rhode Island PBS (WSBE)'),  # http://www.ripbs.org/home/ +        (r'watch\.wliw\.org', 'WLIW21 (WLIW)'),  # http://www.wliw.org/ +        (r'video\.mpt\.tv', 'mpt/Maryland Public Television (WMPB)'),  # http://www.mpt.org +        (r'watch\.weta\.org', 'WETA Television and Radio (WETA)'),  # http://www.weta.org +        (r'video\.whyy\.org', 'WHYY (WHYY)'),  # http://www.whyy.org +        (r'video\.wlvt\.org', 'PBS 39 (WLVT)'),  # http://www.wlvt.org/ +        (r'video\.wvpt\.net', 'WVPT - Your Source for PBS and More! (WVPT)'),  # http://www.wvpt.net +        (r'video\.whut\.org', 'Howard University Television (WHUT)'),  # http://www.whut.org +        (r'video\.wedu\.org', 'WEDU PBS (WEDU)'),  # http://www.wedu.org +        (r'video\.wgcu\.org', 'WGCU Public Media (WGCU)'),  # http://www.wgcu.org/ +        # (r'wjct\.org', 'WJCT Public Broadcasting (WJCT)'),  # http://www.wjct.org +        (r'video\.wpbt2\.org', 'WPBT2 (WPBT)'),  # http://www.wpbt2.org +        (r'video\.wucftv\.org', 'WUCF TV (WUCF)'),  # http://wucftv.org +        (r'video\.wuft\.org', 'WUFT/Channel 5 (WUFT)'),  # http://www.wuft.org +        (r'watch\.wxel\.org', 'WXEL/Channel 42 (WXEL)'),  # http://www.wxel.org/home/ +        (r'video\.wlrn\.org', 'WLRN/Channel 17 (WLRN)'),  # http://www.wlrn.org/ +        (r'video\.wusf\.usf\.edu', 'WUSF Public Broadcasting (WUSF)'),  # http://wusf.org/ +        (r'video\.scetv\.org', 'ETV (WRLK)'),  # http://www.scetv.org +        (r'video\.unctv\.org', 'UNC-TV (WUNC)'),  # http://www.unctv.org/ +        # (r'pbsguam\.org', 'PBS Guam (KGTF)'),  # http://www.pbsguam.org/ +        (r'video\.pbshawaii\.org', 'PBS Hawaii - Oceanic Cable Channel 10 (KHET)'),  # http://www.pbshawaii.org/ +        (r'video\.idahoptv\.org', 'Idaho Public Television (KAID)'),  # http://idahoptv.org +        (r'video\.ksps\.org', 'KSPS (KSPS)'),  # http://www.ksps.org/home/ +        (r'watch\.opb\.org', 'OPB (KOPB)'),  # http://www.opb.org +        (r'watch\.nwptv\.org', 'KWSU/Channel 10 & KTNW/Channel 31 (KWSU)'),  # http://www.kwsu.org +        (r'video\.will\.illinois\.edu', 'WILL-TV (WILL)'),  # http://will.illinois.edu/ +        (r'video\.networkknowledge\.tv', 'Network Knowledge - WSEC/Springfield (WSEC)'),  # http://www.wsec.tv +        (r'video\.wttw\.com', 'WTTW11 (WTTW)'),  # http://www.wttw.com/ +        # (r'wtvp\.org', 'WTVP & WTVP.org, Public Media for Central Illinois (WTVP)'),  # http://www.wtvp.org/ +        (r'video\.iptv\.org', 'Iowa Public Television/IPTV (KDIN)'),  # http://www.iptv.org/ +        (r'video\.ninenet\.org', 'Nine Network (KETC)'),  # http://www.ninenet.org +        (r'video\.wfwa\.org', 'PBS39 Fort Wayne (WFWA)'),  # http://wfwa.org/ +        (r'video\.wfyi\.org', 'WFYI Indianapolis (WFYI)'),  # http://www.wfyi.org +        (r'video\.mptv\.org', 'Milwaukee Public Television (WMVS)'),  # http://www.mptv.org +        (r'video\.wnin\.org', 'WNIN (WNIN)'),  # http://www.wnin.org/ +        (r'video\.wnit\.org', 'WNIT Public Television (WNIT)'),  # http://www.wnit.org/ +        (r'video\.wpt\.org', 'WPT (WPNE)'),  # http://www.wpt.org/ +        (r'video\.wvut\.org', 'WVUT/Channel 22 (WVUT)'),  # http://wvut.org/ +        (r'video\.weiu\.net', 'WEIU/Channel 51 (WEIU)'),  # http://www.weiu.net +        (r'video\.wqpt\.org', 'WQPT-TV (WQPT)'),  # http://www.wqpt.org +        (r'video\.wycc\.org', 'WYCC PBS Chicago (WYCC)'),  # http://www.wycc.org +        # (r'lakeshorepublicmedia\.org', 'Lakeshore Public Television (WYIN)'),  # http://lakeshorepublicmedia.org/ +        (r'video\.wipb\.org', 'WIPB-TV (WIPB)'),  # http://wipb.org +        (r'video\.indianapublicmedia\.org', 'WTIU (WTIU)'),  # http://indianapublicmedia.org/tv/ +        (r'watch\.cetconnect\.org', 'CET  (WCET)'),  # http://www.cetconnect.org +        (r'video\.thinktv\.org', 'ThinkTVNetwork (WPTD)'),  # http://www.thinktv.org +        (r'video\.wbgu\.org', 'WBGU-TV (WBGU)'),  # http://wbgu.org +        (r'video\.wgvu\.org', 'WGVU TV (WGVU)'),  # http://www.wgvu.org/ +        (r'video\.netnebraska\.org', 'NET1 (KUON)'),  # http://netnebraska.org +        (r'video\.pioneer\.org', 'Pioneer Public Television (KWCM)'),  # http://www.pioneer.org +        (r'watch\.sdpb\.org', 'SDPB Television (KUSD)'),  # http://www.sdpb.org +        (r'video\.tpt\.org', 'TPT (KTCA)'),  # http://www.tpt.org +        (r'watch\.ksmq\.org', 'KSMQ (KSMQ)'),  # http://www.ksmq.org/ +        (r'watch\.kpts\.org', 'KPTS/Channel 8 (KPTS)'),  # http://www.kpts.org/ +        (r'watch\.ktwu\.org', 'KTWU/Channel 11 (KTWU)'),  # http://ktwu.org +        # (r'shptv\.org', 'Smoky Hills Public Television (KOOD)'),  # http://www.shptv.org +        # (r'kcpt\.org', 'KCPT Kansas City Public Television (KCPT)'),  # http://kcpt.org/ +        # (r'blueridgepbs\.org', 'Blue Ridge PBS (WBRA)'),  # http://www.blueridgepbs.org/ +        (r'watch\.easttennesseepbs\.org', 'East Tennessee PBS (WSJK)'),  # http://easttennesseepbs.org +        (r'video\.wcte\.tv', 'WCTE-TV (WCTE)'),  # http://www.wcte.org +        (r'video\.wljt\.org', 'WLJT, Channel 11 (WLJT)'),  # http://wljt.org/ +        (r'video\.wosu\.org', 'WOSU TV (WOSU)'),  # http://wosu.org/ +        (r'video\.woub\.org', 'WOUB/WOUC (WOUB)'),  # http://woub.org/tv/index.php?section=5 +        (r'video\.wvpublic\.org', 'WVPB (WVPB)'),  # http://wvpublic.org/ +        (r'video\.wkyupbs\.org', 'WKYU-PBS (WKYU)'),  # http://www.wkyupbs.org +        # (r'wyes\.org', 'WYES-TV/New Orleans (WYES)'),  # http://www.wyes.org +        (r'video\.kera\.org', 'KERA 13 (KERA)'),  # http://www.kera.org/ +        (r'video\.mpbn\.net', 'MPBN (WCBB)'),  # http://www.mpbn.net/ +        (r'video\.mountainlake\.org', 'Mountain Lake PBS (WCFE)'),  # http://www.mountainlake.org/ +        (r'video\.nhptv\.org', 'NHPTV (WENH)'),  # http://nhptv.org/ +        (r'video\.vpt\.org', 'Vermont PBS (WETK)'),  # http://www.vpt.org +        (r'video\.witf\.org', 'witf (WITF)'),  # http://www.witf.org +        (r'watch\.wqed\.org', 'WQED Multimedia (WQED)'),  # http://www.wqed.org/ +        (r'video\.wmht\.org', 'WMHT Educational Telecommunications (WMHT)'),  # http://www.wmht.org/home/ +        (r'video\.deltabroadcasting\.org', 'Q-TV (WDCQ)'),  # http://www.deltabroadcasting.org +        (r'video\.dptv\.org', 'WTVS Detroit Public TV (WTVS)'),  # http://www.dptv.org/ +        (r'video\.wcmu\.org', 'CMU Public Television (WCMU)'),  # http://www.wcmu.org +        (r'video\.wkar\.org', 'WKAR-TV (WKAR)'),  # http://wkar.org/ +        (r'wnmuvideo\.nmu\.edu', 'WNMU-TV Public TV 13 (WNMU)'),  # http://wnmutv.nmu.edu +        (r'video\.wdse\.org', 'WDSE - WRPT (WDSE)'),  # http://www.wdse.org/ +        (r'video\.wgte\.org', 'WGTE TV (WGTE)'),  # http://www.wgte.org +        (r'video\.lptv\.org', 'Lakeland Public Television (KAWE)'),  # http://www.lakelandptv.org +        # (r'prairiepublic\.org', 'PRAIRIE PUBLIC (KFME)'),  # http://www.prairiepublic.org/ +        (r'video\.kmos\.org', 'KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS)'),  # http://www.kmos.org/ +        (r'watch\.montanapbs\.org', 'MontanaPBS (KUSM)'),  # http://montanapbs.org +        (r'video\.krwg\.org', 'KRWG/Channel 22 (KRWG)'),  # http://www.krwg.org +        (r'video\.kacvtv\.org', 'KACV (KACV)'),  # http://www.panhandlepbs.org/home/ +        (r'video\.kcostv\.org', 'KCOS/Channel 13 (KCOS)'),  # www.kcostv.org +        (r'video\.wcny\.org', 'WCNY/Channel 24 (WCNY)'),  # http://www.wcny.org +        (r'video\.wned\.org', 'WNED (WNED)'),  # http://www.wned.org/ +        (r'watch\.wpbstv\.org', 'WPBS (WPBS)'),  # http://www.wpbstv.org +        (r'video\.wskg\.org', 'WSKG Public TV (WSKG)'),  # http://wskg.org +        (r'video\.wxxi\.org', 'WXXI (WXXI)'),  # http://wxxi.org +        (r'video\.wpsu\.org', 'WPSU (WPSU)'),  # http://www.wpsu.org +        # (r'wqln\.org', 'WQLN/Channel 54 (WQLN)'),  # http://www.wqln.org +        (r'on-demand\.wvia\.org', 'WVIA Public Media Studios (WVIA)'),  # http://www.wvia.org/ +        (r'video\.wtvi\.org', 'WTVI (WTVI)'),  # http://www.wtvi.org/ +        # (r'whro\.org', 'WHRO (WHRO)'),  # http://whro.org +        (r'video\.westernreservepublicmedia\.org', 'Western Reserve PBS (WNEO)'),  # http://www.WesternReservePublicMedia.org/ +        (r'video\.ideastream\.org', 'WVIZ/PBS ideastream (WVIZ)'),  # http://www.wviz.org/ +        (r'video\.kcts9\.org', 'KCTS 9 (KCTS)'),  # http://kcts9.org/ +        (r'video\.basinpbs\.org', 'Basin PBS (KPBT)'),  # http://www.basinpbs.org +        (r'video\.houstonpbs\.org', 'KUHT / Channel 8 (KUHT)'),  # http://www.houstonpublicmedia.org/ +        # (r'tamu\.edu', 'KAMU - TV (KAMU)'),  # http://KAMU.tamu.edu +        # (r'kedt\.org', 'KEDT/Channel 16 (KEDT)'),  # http://www.kedt.org +        (r'video\.klrn\.org', 'KLRN (KLRN)'),  # http://www.klrn.org +        (r'video\.klru\.tv', 'KLRU (KLRU)'),  # http://www.klru.org +        # (r'kmbh\.org', 'KMBH-TV (KMBH)'),  # http://www.kmbh.org +        # (r'knct\.org', 'KNCT (KNCT)'),  # http://www.knct.org +        # (r'ktxt\.org', 'KTTZ-TV (KTXT)'),  # http://www.ktxt.org +        (r'video\.wtjx\.org', 'WTJX Channel 12 (WTJX)'),  # http://www.wtjx.org/ +        (r'video\.ideastations\.org', 'WCVE PBS (WCVE)'),  # http://ideastations.org/ +        (r'video\.kbtc\.org', 'KBTC Public Television (KBTC)'),  # http://kbtc.org +    ) + +    IE_NAME = 'pbs' +    IE_DESC = 'Public Broadcasting Service (PBS) and member stations: %s' % ', '.join(list(zip(*_STATIONS))[1]) +      _VALID_URL = r'''(?x)https?://          (?:             # Direct video URL -           video\.pbs\.org/(?:viralplayer|video)/(?P<id>[0-9]+)/? | +           (?:%s)/(?:viralplayer|video)/(?P<id>[0-9]+)/? |             # Article with embedded player (or direct video)             (?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) |             # Player             (?:video|player)\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/          ) -    ''' +    ''' % '|'.join(list(zip(*_STATIONS))[0])      _TESTS = [          { @@ -174,6 +339,10 @@ class PBSIE(InfoExtractor):          {              'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true',              'only_matching': True, +        }, +        { +            'url': 'http://watch.knpb.org/video/2365616055/', +            'only_matching': True,          }      ]      _ERRORS = { @@ -204,6 +373,7 @@ class PBSIE(InfoExtractor):              MEDIA_ID_REGEXES = [                  r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'",  # frontline video embed                  r'class="coveplayerid">([^<]+)<',                       # coveplayer +                r'<section[^>]+data-coveid="(\d+)"',                    # coveplayer from http://www.pbs.org/wgbh/frontline/film/real-csi/                  r'<input type="hidden" id="pbs_video_id_[0-9]+" value="([0-9]+)"/>',  # jwplayer              ] diff --git a/youtube_dl/extractor/pladform.py b/youtube_dl/extractor/pladform.py index 551c8c9f0..bc559d1df 100644 --- a/youtube_dl/extractor/pladform.py +++ b/youtube_dl/extractor/pladform.py @@ -1,6 +1,8 @@  # coding: utf-8  from __future__ import unicode_literals +import re +  from .common import InfoExtractor  from ..utils import (      ExtractorError, @@ -44,6 +46,13 @@ class PladformIE(InfoExtractor):          'only_matching': True,      }] +    @staticmethod +    def _extract_url(webpage): +        mobj = re.search( +            r'<iframe[^>]+src="(?P<url>(?:https?:)?//out\.pladform\.ru/player\?.+?)"', webpage) +        if mobj: +            return mobj.group('url') +      def _real_extract(self, url):          video_id = self._match_id(url) diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py index aa7dbcb63..55c11b3bf 100644 --- a/youtube_dl/extractor/pluralsight.py +++ b/youtube_dl/extractor/pluralsight.py @@ -1,5 +1,6 @@  from __future__ import unicode_literals +import re  import json  import random  import collections @@ -14,6 +15,7 @@ from ..utils import (      ExtractorError,      int_or_none,      parse_duration, +    qualities,      sanitized_Request,  ) @@ -140,15 +142,28 @@ class PluralsightIE(PluralsightBaseIE):              'low': {'width': 640, 'height': 480},              'medium': {'width': 848, 'height': 640},              'high': {'width': 1024, 'height': 768}, +            'high-widescreen': {'width': 1280, 'height': 720},          } +        QUALITIES_PREFERENCE = ('low', 'medium', 'high', 'high-widescreen',) +        quality_key = qualities(QUALITIES_PREFERENCE) +          AllowedQuality = collections.namedtuple('AllowedQuality', ['ext', 'qualities'])          ALLOWED_QUALITIES = ( -            AllowedQuality('webm', ('high',)), -            AllowedQuality('mp4', ('low', 'medium', 'high',)), +            AllowedQuality('webm', ['high', ]), +            AllowedQuality('mp4', ['low', 'medium', 'high', ]),          ) +        # Some courses also offer widescreen resolution for high quality (see +        # https://github.com/rg3/youtube-dl/issues/7766) +        widescreen = True if re.search( +            r'courseSupportsWidescreenVideoFormats\s*:\s*true', webpage) else False +        best_quality = 'high-widescreen' if widescreen else 'high' +        if widescreen: +            for allowed_quality in ALLOWED_QUALITIES: +                allowed_quality.qualities.append(best_quality) +          # In order to minimize the number of calls to ViewClip API and reduce          # the probability of being throttled or banned by Pluralsight we will request          # only single format until formats listing was explicitly requested. @@ -157,19 +172,19 @@ class PluralsightIE(PluralsightBaseIE):          else:              def guess_allowed_qualities():                  req_format = self._downloader.params.get('format') or 'best' -                req_format_split = req_format.split('-') +                req_format_split = req_format.split('-', 1)                  if len(req_format_split) > 1:                      req_ext, req_quality = req_format_split                      for allowed_quality in ALLOWED_QUALITIES:                          if req_ext == allowed_quality.ext and req_quality in allowed_quality.qualities:                              return (AllowedQuality(req_ext, (req_quality, )), )                  req_ext = 'webm' if self._downloader.params.get('prefer_free_formats') else 'mp4' -                return (AllowedQuality(req_ext, ('high', )), ) +                return (AllowedQuality(req_ext, (best_quality, )), )              allowed_qualities = guess_allowed_qualities()          formats = [] -        for ext, qualities in allowed_qualities: -            for quality in qualities: +        for ext, qualities_ in allowed_qualities: +            for quality in qualities_:                  f = QUALITIES[quality].copy()                  clip_post = {                      'a': author, @@ -205,6 +220,7 @@ class PluralsightIE(PluralsightBaseIE):                      'url': clip_url,                      'ext': ext,                      'format_id': format_id, +                    'quality': quality_key(quality),                  })                  formats.append(f)          self._sort_formats(formats) diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py index 6b09550b0..9db62adb1 100644 --- a/youtube_dl/extractor/rutube.py +++ b/youtube_dl/extractor/rutube.py @@ -17,9 +17,9 @@ from ..utils import (  class RutubeIE(InfoExtractor):      IE_NAME = 'rutube'      IE_DESC = 'Rutube videos' -    _VALID_URL = r'https?://rutube\.ru/video/(?P<id>[\da-z]{32})' +    _VALID_URL = r'https?://rutube\.ru/(?:video|play/embed)/(?P<id>[\da-z]{32})' -    _TEST = { +    _TESTS = [{          'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',          'info_dict': {              'id': '3eac3b4561676c17df9132a9a1e62e3e', @@ -36,7 +36,10 @@ class RutubeIE(InfoExtractor):              # It requires ffmpeg (m3u8 download)              'skip_download': True,          }, -    } +    }, { +        'url': 'http://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661', +        'only_matching': True, +    }]      def _real_extract(self, url):          video_id = self._match_id(url) diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py index 919704261..7de7b7273 100644 --- a/youtube_dl/extractor/safari.py +++ b/youtube_dl/extractor/safari.py @@ -6,12 +6,12 @@ import re  from .common import InfoExtractor  from .brightcove import BrightcoveLegacyIE -from ..compat import compat_urllib_parse  from ..utils import (      ExtractorError,      sanitized_Request,      smuggle_url,      std_headers, +    urlencode_postdata,  ) @@ -57,7 +57,7 @@ class SafariBaseIE(InfoExtractor):          }          request = sanitized_Request( -            self._LOGIN_URL, compat_urllib_parse.urlencode(login_form), headers=headers) +            self._LOGIN_URL, urlencode_postdata(login_form), headers=headers)          login_page = self._download_webpage(              request, None, 'Logging in as %s' % username) diff --git a/youtube_dl/extractor/skynewsarabia.py b/youtube_dl/extractor/skynewsarabia.py index f09fee102..05e1b02ad 100644 --- a/youtube_dl/extractor/skynewsarabia.py +++ b/youtube_dl/extractor/skynewsarabia.py @@ -9,7 +9,7 @@ from ..utils import (  ) -class SkyNewArabiaBaseIE(InfoExtractor): +class SkyNewsArabiaBaseIE(InfoExtractor):      _IMAGE_BASE_URL = 'http://www.skynewsarabia.com/web/images'      def _call_api(self, path, value): @@ -40,7 +40,7 @@ class SkyNewArabiaBaseIE(InfoExtractor):          } -class SkyNewsArabiaIE(SkyNewArabiaBaseIE): +class SkyNewsArabiaIE(SkyNewsArabiaBaseIE):      IE_NAME = 'skynewsarabia:video'      _VALID_URL = r'https?://(?:www\.)?skynewsarabia\.com/web/video/(?P<id>[0-9]+)'      _TEST = { @@ -66,7 +66,7 @@ class SkyNewsArabiaIE(SkyNewArabiaBaseIE):          return self._extract_video_info(video_data) -class SkyNewsArabiaArticleIE(SkyNewArabiaBaseIE): +class SkyNewsArabiaArticleIE(SkyNewsArabiaBaseIE):      IE_NAME = 'skynewsarabia:video'      _VALID_URL = r'https?://(?:www\.)?skynewsarabia\.com/web/article/(?P<id>[0-9]+)'      _TESTS = [{ diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py index daf6ad555..ea8fc258d 100644 --- a/youtube_dl/extractor/sohu.py +++ b/youtube_dl/extractor/sohu.py @@ -158,6 +158,7 @@ class SohuIE(InfoExtractor):                          'file': clips_url[i],                          'new': su[i],                          'prod': 'flash', +                        'rb': 1,                      }                      if cdnId is not None: diff --git a/youtube_dl/extractor/srf.py b/youtube_dl/extractor/srf.py index 77eec0bc7..16e1bf2d6 100644 --- a/youtube_dl/extractor/srf.py +++ b/youtube_dl/extractor/srf.py @@ -11,7 +11,7 @@ from ..utils import (  class SrfIE(InfoExtractor): -    _VALID_URL = r'https?://(?:www\.srf\.ch/play(?:er)?/tv/[^/]+/video/(?P<display_id>[^?]+)\?id=|tp\.srgssr\.ch/p/flash\?urn=urn:srf:ais:video:)(?P<id>[0-9a-f\-]{36})' +    _VALID_URL = r'https?://(?:www\.srf\.ch/play(?:er)?/(?:tv|radio)/[^/]+/(?P<media_type>video|audio)/(?P<display_id>[^?]+)\?id=|tp\.srgssr\.ch/p/flash\?urn=urn:srf:ais:video:)(?P<id>[0-9a-f\-]{36})'      _TESTS = [{          'url': 'http://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5',          'md5': '4cd93523723beff51bb4bee974ee238d', @@ -36,6 +36,20 @@ class SrfIE(InfoExtractor):              'timestamp': 1373493600,          },      }, { +        'url': 'http://www.srf.ch/play/radio/hoerspielarchiv-srf-musikwelle/audio/saegel-ohni-wind-von-jakob-stebler?id=415bf3d3-6429-4de7-968d-95866e37cfbc', +        'md5': '', +        'info_dict': { +            'id': '415bf3d3-6429-4de7-968d-95866e37cfbc', +            'display_id': 'saegel-ohni-wind-von-jakob-stebler', +            'ext': 'mp3', +            'upload_date': '20080518', +            'title': '«Sägel ohni Wind» von Jakob Stebler', +            'timestamp': 1211112000, +        }, +        'params': { +            'skip_download': True,  # requires rtmpdump +        }, +    }, {          'url': 'http://www.srf.ch/player/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5',          'only_matching': True,      }, { @@ -44,11 +58,13 @@ class SrfIE(InfoExtractor):      }]      def _real_extract(self, url): -        video_id = self._match_id(url) -        display_id = re.match(self._VALID_URL, url).group('display_id') or video_id +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') +        media_type = mobj.group('media_type') +        display_id = mobj.group('display_id') or video_id          video_data = self._download_xml( -            'http://il.srgssr.ch/integrationlayer/1.0/ue/srf/video/play/%s.xml' % video_id, +            'http://il.srgssr.ch/integrationlayer/1.0/ue/srf/%s/play/%s.xml' % (media_type, video_id),              display_id)          title = xpath_text( @@ -64,7 +80,7 @@ class SrfIE(InfoExtractor):              for url_node in item.findall('url'):                  quality = url_node.attrib['quality']                  full_url = url_node.text -                original_ext = determine_ext(full_url) +                original_ext = determine_ext(full_url).lower()                  format_id = '%s-%s' % (quality, item.attrib['protocol'])                  if original_ext == 'f4m':                      formats.extend(self._extract_f4m_formats( diff --git a/youtube_dl/extractor/teachingchannel.py b/youtube_dl/extractor/teachingchannel.py index 117afa9bf..e0477382c 100644 --- a/youtube_dl/extractor/teachingchannel.py +++ b/youtube_dl/extractor/teachingchannel.py @@ -16,6 +16,7 @@ class TeachingChannelIE(InfoExtractor):              'ext': 'mp4',              'title': 'A History of Teaming',              'description': 'md5:2a9033db8da81f2edffa4c99888140b3', +            'duration': 422.255,          },          'params': {              # m3u8 download diff --git a/youtube_dl/extractor/tele13.py b/youtube_dl/extractor/tele13.py new file mode 100644 index 000000000..a363b4d40 --- /dev/null +++ b/youtube_dl/extractor/tele13.py @@ -0,0 +1,81 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from .youtube import YoutubeIE +from ..utils import ( +    js_to_json, +    qualities, +    determine_ext, +) + + +class Tele13IE(InfoExtractor): +    _VALID_URL = r'^http://(?:www\.)?t13\.cl/videos(?:/[^/]+)+/(?P<id>[\w-]+)' +    _TESTS = [ +        { +            'url': 'http://www.t13.cl/videos/actualidad/el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda', +            'md5': '4cb1fa38adcad8fea88487a078831755', +            'info_dict': { +                'id': 'el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda', +                'ext': 'mp4', +                'title': 'El círculo de hierro de Michelle Bachelet en su regreso a La Moneda', +            }, +            'params': { +                # HTTP Error 404: Not Found +                'skip_download': True, +            }, +        }, +        { +            'url': 'http://www.t13.cl/videos/mundo/tendencias/video-captan-misteriosa-bola-fuego-cielos-bangkok', +            'md5': '867adf6a3b3fef932c68a71d70b70946', +            'info_dict': { +                'id': 'rOoKv2OMpOw', +                'ext': 'mp4', +                'title': 'Shooting star seen on 7-Sep-2015', +                'description': 'md5:7292ff2a34b2f673da77da222ae77e1e', +                'uploader': 'Porjai Jaturongkhakun', +                'upload_date': '20150906', +                'uploader_id': 'UCnLY_3ezwNcDSC_Wc6suZxw', +            }, +            'add_ie': ['Youtube'], +        } +    ] + +    def _real_extract(self, url): +        display_id = self._match_id(url) +        webpage = self._download_webpage(url, display_id) + +        setup_js = self._search_regex(r"(?s)jwplayer\('player-vivo'\).setup\((\{.*?\})\)", webpage, 'setup code') +        sources = self._parse_json(self._search_regex(r'sources\s*:\s*(\[[^\]]+\])', setup_js, 'sources'), display_id, js_to_json) + +        preference = qualities(['Móvil', 'SD', 'HD']) +        formats = [] +        urls = [] +        for f in sources: +            format_url = f['file'] +            if format_url and format_url not in urls: +                ext = determine_ext(format_url) +                if ext == 'm3u8': +                    m3u8_formats = self._extract_m3u8_formats(format_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) +                    if m3u8_formats: +                        formats.extend(m3u8_formats) +                elif YoutubeIE.suitable(format_url): +                    return self.url_result(format_url, 'Youtube') +                else: +                    formats.append({ +                        'url': format_url, +                        'format_id': f.get('label'), +                        'preference': preference(f.get('label')), +                        'ext': ext, +                    }) +                urls.append(format_url) +        self._sort_formats(formats) + +        return { +            'id': display_id, +            'title': self._search_regex(r'title\s*:\s*"([^"]+)"', setup_js, 'title'), +            'description': self._html_search_meta('description', webpage, 'description'), +            'thumbnail': self._search_regex(r'image\s*:\s*"([^"]+)"', setup_js, 'thumbnail', default=None), +            'formats': formats, +        } diff --git a/youtube_dl/extractor/tf1.py b/youtube_dl/extractor/tf1.py index 3a68eaa80..6890021cf 100644 --- a/youtube_dl/extractor/tf1.py +++ b/youtube_dl/extractor/tf1.py @@ -6,7 +6,7 @@ from .common import InfoExtractor  class TF1IE(InfoExtractor):      """TF1 uses the wat.tv player.""" -    _VALID_URL = r'http://(?:(?:videos|www|lci)\.tf1|www\.tfou)\.fr/.*?-(?P<id>\d+)(?:-\d+)?\.html' +    _VALID_URL = r'http://(?:(?:videos|www|lci)\.tf1|www\.tfou)\.fr/(?:[^/]+/)*(?P<id>.+?)\.html'      _TESTS = [{          'url': 'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',          'info_dict': { @@ -22,7 +22,7 @@ class TF1IE(InfoExtractor):      }, {          'url': 'http://www.tfou.fr/chuggington/videos/le-grand-mysterioso-chuggington-7085291-739.html',          'info_dict': { -            'id': '12043945', +            'id': 'le-grand-mysterioso-chuggington-7085291-739',              'ext': 'mp4',              'title': 'Le grand Mystérioso - Chuggington',              'description': 'Le grand Mystérioso - Emery rêve qu\'un article lui soit consacré dans le journal.', @@ -32,22 +32,24 @@ class TF1IE(InfoExtractor):              # Sometimes wat serves the whole file with the --test option              'skip_download': True,          }, +        'skip': 'HTTP Error 410: Gone',      }, {          'url': 'http://www.tf1.fr/tf1/koh-lanta/videos/replay-koh-lanta-22-mai-2015.html',          'only_matching': True,      }, {          'url': 'http://lci.tf1.fr/sept-a-huit/videos/sept-a-huit-du-24-mai-2015-8611550.html',          'only_matching': True, +    }, { +        'url': 'http://www.tf1.fr/hd1/documentaire/videos/mylene-farmer-d-une-icone.html', +        'only_matching': True,      }]      def _real_extract(self, url):          video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id) -        embed_url = self._html_search_regex( -            r'["\'](https?://www.wat.tv/embedframe/.*?)["\']', webpage, 'embed url') -        embed_page = self._download_webpage(embed_url, video_id, -                                            'Downloading embed player page') -        wat_id = self._search_regex(r'UVID=(.*?)&', embed_page, 'wat id') +        wat_id = self._html_search_regex( +            r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8})\1', +            webpage, 'wat id', group='id')          wat_info = self._download_json(              'http://www.wat.tv/interface/contentv3/%s' % wat_id, video_id)          return self.url_result(wat_info['media']['url'], 'Wat') diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 1555aa77c..0bf6726b5 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -16,11 +16,12 @@ from ..compat import (  from ..utils import (      determine_ext,      ExtractorError, -    xpath_with_ns, -    unsmuggle_url, +    float_or_none,      int_or_none, +    sanitized_Request, +    unsmuggle_url,      url_basename, -    float_or_none, +    xpath_with_ns,  )  default_ns = 'http://www.w3.org/2005/SMIL21/Language' @@ -204,7 +205,12 @@ class ThePlatformIE(ThePlatformBaseIE):              smil_url = url          # Explicitly specified SMIL (see https://github.com/rg3/youtube-dl/issues/7385)          elif '/guid/' in url: -            webpage = self._download_webpage(url, video_id) +            headers = {} +            source_url = smuggled_data.get('source_url') +            if source_url: +                headers['Referer'] = source_url +            request = sanitized_Request(url, headers=headers) +            webpage = self._download_webpage(request, video_id)              smil_url = self._search_regex(                  r'<link[^>]+href=(["\'])(?P<url>.+?)\1[^>]+type=["\']application/smil\+xml',                  webpage, 'smil url', group='url') diff --git a/youtube_dl/extractor/toggle.py b/youtube_dl/extractor/toggle.py new file mode 100644 index 000000000..a47239952 --- /dev/null +++ b/youtube_dl/extractor/toggle.py @@ -0,0 +1,194 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json +import re + +from .common import InfoExtractor +from ..utils import ( +    determine_ext, +    ExtractorError, +    float_or_none, +    int_or_none, +    parse_iso8601, +    sanitized_Request, +) + + +class ToggleIE(InfoExtractor): +    IE_NAME = 'toggle' +    _VALID_URL = r'https?://video\.toggle\.sg/(?:en|zh)/(?:series|clips|movies)/(?:[^/]+/)+(?P<id>[0-9]+)' +    _TESTS = [{ +        'url': 'http://video.toggle.sg/en/series/lion-moms-tif/trailers/lion-moms-premier/343115', +        'info_dict': { +            'id': '343115', +            'ext': 'mp4', +            'title': 'Lion Moms Premiere', +            'description': 'md5:aea1149404bff4d7f7b6da11fafd8e6b', +            'upload_date': '20150910', +            'timestamp': 1441858274, +        }, +        'params': { +            'skip_download': 'm3u8 download', +        } +    }, { +        'note': 'DRM-protected video', +        'url': 'http://video.toggle.sg/en/movies/dug-s-special-mission/341413', +        'info_dict': { +            'id': '341413', +            'ext': 'wvm', +            'title': 'Dug\'s Special Mission', +            'description': 'md5:e86c6f4458214905c1772398fabc93e0', +            'upload_date': '20150827', +            'timestamp': 1440644006, +        }, +        'params': { +            'skip_download': 'DRM-protected wvm download', +        } +    }, { +        # this also tests correct video id extraction +        'note': 'm3u8 links are geo-restricted, but Android/mp4 is okay', +        'url': 'http://video.toggle.sg/en/series/28th-sea-games-5-show/28th-sea-games-5-show-ep11/332861', +        'info_dict': { +            'id': '332861', +            'ext': 'mp4', +            'title': '28th SEA Games (5 Show) -  Episode  11', +            'description': 'md5:3cd4f5f56c7c3b1340c50a863f896faa', +            'upload_date': '20150605', +            'timestamp': 1433480166, +        }, +        'params': { +            'skip_download': 'DRM-protected wvm download', +        }, +        'skip': 'm3u8 links are geo-restricted' +    }, { +        'url': 'http://video.toggle.sg/en/clips/seraph-sun-aloysius-will-suddenly-sing-some-old-songs-in-high-pitch-on-set/343331', +        'only_matching': True, +    }, { +        'url': 'http://video.toggle.sg/zh/series/zero-calling-s2-hd/ep13/336367', +        'only_matching': True, +    }, { +        'url': 'http://video.toggle.sg/en/series/vetri-s2/webisodes/jeeva-is-an-orphan-vetri-s2-webisode-7/342302', +        'only_matching': True, +    }, { +        'url': 'http://video.toggle.sg/en/movies/seven-days/321936', +        'only_matching': True, +    }] + +    _FORMAT_PREFERENCES = { +        'wvm-STBMain': -10, +        'wvm-iPadMain': -20, +        'wvm-iPhoneMain': -30, +        'wvm-Android': -40, +    } +    _API_USER = 'tvpapi_147' +    _API_PASS = '11111' + +    def _real_extract(self, url): +        video_id = self._match_id(url) + +        webpage = self._download_webpage( +            url, video_id, note='Downloading video page') + +        api_user = self._search_regex( +            r'apiUser\s*:\s*(["\'])(?P<user>.+?)\1', webpage, 'apiUser', +            default=self._API_USER, group='user') +        api_pass = self._search_regex( +            r'apiPass\s*:\s*(["\'])(?P<pass>.+?)\1', webpage, 'apiPass', +            default=self._API_PASS, group='pass') + +        params = { +            'initObj': { +                'Locale': { +                    'LocaleLanguage': '', +                    'LocaleCountry': '', +                    'LocaleDevice': '', +                    'LocaleUserState': 0 +                }, +                'Platform': 0, +                'SiteGuid': 0, +                'DomainID': '0', +                'UDID': '', +                'ApiUser': api_user, +                'ApiPass': api_pass +            }, +            'MediaID': video_id, +            'mediaType': 0, +        } + +        req = sanitized_Request( +            'http://tvpapi.as.tvinci.com/v2_9/gateways/jsonpostgw.aspx?m=GetMediaInfo', +            json.dumps(params).encode('utf-8')) +        info = self._download_json(req, video_id, 'Downloading video info json') + +        title = info['MediaName'] + +        formats = [] +        for video_file in info.get('Files', []): +            video_url, vid_format = video_file.get('URL'), video_file.get('Format') +            if not video_url or not vid_format: +                continue +            ext = determine_ext(video_url) +            vid_format = vid_format.replace(' ', '') +            # if geo-restricted, m3u8 is inaccessible, but mp4 is okay +            if ext == 'm3u8': +                m3u8_formats = self._extract_m3u8_formats( +                    video_url, video_id, ext='mp4', m3u8_id=vid_format, +                    note='Downloading %s m3u8 information' % vid_format, +                    errnote='Failed to download %s m3u8 information' % vid_format, +                    fatal=False) +                if m3u8_formats: +                    formats.extend(m3u8_formats) +            elif ext in ('mp4', 'wvm'): +                # wvm are drm-protected files +                formats.append({ +                    'ext': ext, +                    'url': video_url, +                    'format_id': vid_format, +                    'preference': self._FORMAT_PREFERENCES.get(ext + '-' + vid_format) or -1, +                    'format_note': 'DRM-protected video' if ext == 'wvm' else None +                }) +        if not formats: +            # Most likely because geo-blocked +            raise ExtractorError('No downloadable videos found', expected=True) +        self._sort_formats(formats) + +        duration = int_or_none(info.get('Duration')) +        description = info.get('Description') +        created_at = parse_iso8601(info.get('CreationDate') or None) + +        average_rating = float_or_none(info.get('Rating')) +        view_count = int_or_none(info.get('ViewCounter') or info.get('view_counter')) +        like_count = int_or_none(info.get('LikeCounter') or info.get('like_counter')) + +        thumbnails = [] +        for picture in info.get('Pictures', []): +            if not isinstance(picture, dict): +                continue +            pic_url = picture.get('URL') +            if not pic_url: +                continue +            thumbnail = { +                'url': pic_url, +            } +            pic_size = picture.get('PicSize', '') +            m = re.search(r'(?P<width>\d+)[xX](?P<height>\d+)', pic_size) +            if m: +                thumbnail.update({ +                    'width': int(m.group('width')), +                    'height': int(m.group('height')), +                }) +            thumbnails.append(thumbnail) + +        return { +            'id': video_id, +            'title': title, +            'description': description, +            'duration': duration, +            'timestamp': created_at, +            'average_rating': average_rating, +            'view_count': view_count, +            'like_count': like_count, +            'thumbnails': thumbnails, +            'formats': formats, +        } diff --git a/youtube_dl/extractor/trilulilu.py b/youtube_dl/extractor/trilulilu.py index 185accc4b..a800449e9 100644 --- a/youtube_dl/extractor/trilulilu.py +++ b/youtube_dl/extractor/trilulilu.py @@ -1,80 +1,103 @@  # coding: utf-8  from __future__ import unicode_literals -import re -  from .common import InfoExtractor -from ..utils import ExtractorError +from ..utils import ( +    ExtractorError, +    int_or_none, +    parse_iso8601, +)  class TriluliluIE(InfoExtractor): -    _VALID_URL = r'https?://(?:www\.)?trilulilu\.ro/(?:video-[^/]+/)?(?P<id>[^/#\?]+)' -    _TEST = { -        'url': 'http://www.trilulilu.ro/video-animatie/big-buck-bunny-1', -        'md5': 'c1450a00da251e2769b74b9005601cac', +    _VALID_URL = r'https?://(?:(?:www|m)\.)?trilulilu\.ro/(?:[^/]+/)?(?P<id>[^/#\?]+)' +    _TESTS = [{ +        'url': 'http://www.trilulilu.ro/big-buck-bunny-1', +        'md5': '68da087b676a6196a413549212f60cc6',          'info_dict': {              'id': 'ae2899e124140b',              'ext': 'mp4',              'title': 'Big Buck Bunny',              'description': ':) pentru copilul din noi', +            'uploader_id': 'chipy', +            'upload_date': '20120304', +            'timestamp': 1330830647, +            'uploader': 'chipy', +            'view_count': int, +            'like_count': int, +            'comment_count': int,          }, -    } +    }, { +        'url': 'http://www.trilulilu.ro/adena-ft-morreti-inocenta', +        'md5': '929dfb8729dc71750463af88bbbbf4a4', +        'info_dict': { +            'id': 'f299710e3c91c5', +            'ext': 'mp4', +            'title': 'Adena ft. Morreti - Inocenta', +            'description': 'pop music', +            'uploader_id': 'VEVOmixt', +            'upload_date': '20151204', +            'uploader': 'VEVOmixt', +            'timestamp': 1449187937, +            'view_count': int, +            'like_count': int, +            'comment_count': int, +        }, +    }]      def _real_extract(self, url):          display_id = self._match_id(url) -        webpage = self._download_webpage(url, display_id) +        media_info = self._download_json('http://m.trilulilu.ro/%s?format=json' % display_id, display_id) -        if re.search(r'Fişierul nu este disponibil pentru vizionare în ţara dumneavoastră', webpage): -            raise ExtractorError( -                'This video is not available in your country.', expected=True) -        elif re.search('Fişierul poate fi accesat doar de către prietenii lui', webpage): +        age_limit = 0 +        errors = media_info.get('errors', {}) +        if errors.get('friends'):              raise ExtractorError('This video is private.', expected=True) +        elif errors.get('geoblock'): +            raise ExtractorError('This video is not available in your country.', expected=True) +        elif errors.get('xxx_unlogged'): +            age_limit = 18 -        flashvars_str = self._search_regex( -            r'block_flash_vars\s*=\s*(\{[^\}]+\})', webpage, 'flashvars', fatal=False, default=None) +        media_class = media_info.get('class') +        if media_class not in ('video', 'audio'): +            raise ExtractorError('not a video or an audio') -        if flashvars_str: -            flashvars = self._parse_json(flashvars_str, display_id) -        else: -            raise ExtractorError( -                'This page does not contain videos', expected=True) +        user = media_info.get('user', {}) -        if flashvars['isMP3'] == 'true': -            raise ExtractorError( -                'Audio downloads are currently not supported', expected=True) +        thumbnail = media_info.get('cover_url') +        if thumbnail: +            thumbnail.format(width='1600', height='1200') -        video_id = flashvars['hash'] -        title = self._og_search_title(webpage) -        thumbnail = self._og_search_thumbnail(webpage) -        description = self._og_search_description(webpage, default=None) - -        format_url = ('http://fs%(server)s.trilulilu.ro/%(hash)s/' -                      'video-formats2' % flashvars) -        format_doc = self._download_xml( -            format_url, video_id, -            note='Downloading formats', -            errnote='Error while downloading formats') - -        video_url_template = ( -            'http://fs%(server)s.trilulilu.ro/stream.php?type=video' -            '&source=site&hash=%(hash)s&username=%(userid)s&' -            'key=ministhebest&format=%%s&sig=&exp=' % -            flashvars) -        formats = [ -            { -                'format_id': fnode.text.partition('-')[2], -                'url': video_url_template % fnode.text, -                'ext': fnode.text.partition('-')[0] -            } - -            for fnode in format_doc.findall('./formats/format') -        ] +        # TODO: get correct ext for audio files +        stream_type = media_info.get('stream_type') +        formats = [{ +            'url': media_info['href'], +            'ext': stream_type, +        }] +        if media_info.get('is_hd'): +            formats.append({ +                'format_id': 'hd', +                'url': media_info['hrefhd'], +                'ext': stream_type, +            }) +        if media_class == 'audio': +            formats[0]['vcodec'] = 'none' +        else: +            formats[0]['format_id'] = 'sd'          return { -            'id': video_id, +            'id': media_info['identifier'].split('|')[1],              'display_id': display_id,              'formats': formats, -            'title': title, -            'description': description, +            'title': media_info['title'], +            'description': media_info.get('description'),              'thumbnail': thumbnail, +            'uploader_id': user.get('username'), +            'uploader': user.get('fullname'), +            'timestamp': parse_iso8601(media_info.get('published'), ' '), +            'duration': int_or_none(media_info.get('duration')), +            'view_count': int_or_none(media_info.get('count_views')), +            'like_count': int_or_none(media_info.get('count_likes')), +            'comment_count': int_or_none(media_info.get('count_comments')), +            'age_limit': age_limit,          } diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index 571289421..02dfd36f4 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -3,7 +3,10 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor -from ..compat import compat_etree_fromstring +from ..compat import ( +    compat_etree_fromstring, +    compat_urlparse, +)  from ..utils import (      ExtractorError,      int_or_none, @@ -67,6 +70,17 @@ class VevoIE(InfoExtractor):          'params': {              'skip_download': 'true',          } +    }, { +        'note': 'No video_info', +        'url': 'http://www.vevo.com/watch/k-camp-1/Till-I-Die/USUV71503000', +        'md5': '8b83cc492d72fc9cf74a02acee7dc1b0', +        'info_dict': { +            'id': 'USUV71503000', +            'ext': 'mp4', +            'title': 'Till I Die - K Camp ft. T.I.', +            'duration': 193, +        }, +        'expected_warnings': ['Unable to download SMIL file'],      }]      _SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/' @@ -81,11 +95,17 @@ class VevoIE(InfoExtractor):          if webpage is False:              self._oauth_token = None          else: +            if 'THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION' in webpage: +                raise ExtractorError('%s said: This page is currently unavailable in your region.' % self.IE_NAME, expected=True) +              self._oauth_token = self._search_regex(                  r'access_token":\s*"([^"]+)"',                  webpage, 'access token', fatal=False)      def _formats_from_json(self, video_info): +        if not video_info: +            return [] +          last_version = {'version': -1}          for version in video_info['videoVersions']:              # These are the HTTP downloads, other types are for different manifests @@ -110,9 +130,8 @@ class VevoIE(InfoExtractor):              })          return formats -    def _formats_from_smil(self, smil_xml): +    def _formats_from_smil(self, smil_doc):          formats = [] -        smil_doc = compat_etree_fromstring(smil_xml.encode('utf-8'))          els = smil_doc.findall('.//{http://www.w3.org/2001/SMIL20/Language}video')          for el in els:              src = el.attrib['src'] @@ -145,14 +164,14 @@ class VevoIE(InfoExtractor):              })          return formats -    def _download_api_formats(self, video_id): +    def _download_api_formats(self, video_id, video_url):          if not self._oauth_token:              self._downloader.report_warning(                  'No oauth token available, skipping API HLS download')              return [] -        api_url = 'https://apiv2.vevo.com/video/%s/streams/hls?token=%s' % ( -            video_id, self._oauth_token) +        api_url = compat_urlparse.urljoin(video_url, '//apiv2.vevo.com/video/%s/streams/hls?token=%s' % ( +            video_id, self._oauth_token))          api_data = self._download_json(              api_url, video_id,              note='Downloading HLS formats', @@ -166,18 +185,26 @@ class VevoIE(InfoExtractor):              preference=0)      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') +        video_id = self._match_id(url) + +        webpage = None          json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id          response = self._download_json(json_url, video_id) -        video_info = response['video'] +        video_info = response['video'] or {} -        if not video_info: +        if not video_info and response.get('statusCode') != 909:              if 'statusMessage' in response:                  raise ExtractorError('%s said: %s' % (self.IE_NAME, response['statusMessage']), expected=True)              raise ExtractorError('Unable to extract videos') +        if not video_info: +            if url.startswith('vevo:'): +                raise ExtractorError('Please specify full Vevo URL for downloading', expected=True) +            webpage = self._download_webpage(url, video_id) + +        title = video_info.get('title') or self._og_search_title(webpage) +          formats = self._formats_from_json(video_info)          is_explicit = video_info.get('isExplicit') @@ -189,11 +216,11 @@ class VevoIE(InfoExtractor):              age_limit = None          # Download via HLS API -        formats.extend(self._download_api_formats(video_id)) +        formats.extend(self._download_api_formats(video_id, url))          # Download SMIL          smil_blocks = sorted(( -            f for f in video_info['videoVersions'] +            f for f in video_info.get('videoVersions', [])              if f['sourceType'] == 13),              key=lambda f: f['version'])          smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % ( @@ -205,23 +232,26 @@ class VevoIE(InfoExtractor):              if smil_url_m is not None:                  smil_url = smil_url_m          if smil_url: -            smil_xml = self._download_webpage( -                smil_url, video_id, 'Downloading SMIL info', fatal=False) -            if smil_xml: -                formats.extend(self._formats_from_smil(smil_xml)) +            smil_doc = self._download_smil(smil_url, video_id, fatal=False) +            if smil_doc: +                formats.extend(self._formats_from_smil(smil_doc))          self._sort_formats(formats) -        timestamp_ms = int_or_none(self._search_regex( +        timestamp = int_or_none(self._search_regex(              r'/Date\((\d+)\)/', -            video_info['launchDate'], 'launch date', fatal=False)) +            video_info['launchDate'], 'launch date', fatal=False), +            scale=1000) if video_info else None + +        duration = video_info.get('duration') or int_or_none( +            self._html_search_meta('video:duration', webpage))          return {              'id': video_id, -            'title': video_info['title'], +            'title': title,              'formats': formats, -            'thumbnail': video_info['imageUrl'], -            'timestamp': timestamp_ms // 1000, -            'uploader': video_info['mainArtists'][0]['artistName'], -            'duration': video_info['duration'], +            'thumbnail': video_info.get('imageUrl'), +            'timestamp': timestamp, +            'uploader': video_info['mainArtists'][0]['artistName'] if video_info else None, +            'duration': duration,              'age_limit': age_limit,          } diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py index 01af7a995..3db6286e4 100644 --- a/youtube_dl/extractor/vice.py +++ b/youtube_dl/extractor/vice.py @@ -15,6 +15,7 @@ class ViceIE(InfoExtractor):                  'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp',                  'ext': 'mp4',                  'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov', +                'duration': 725.983,              },              'params': {                  # Requires ffmpeg (m3u8 manifest) diff --git a/youtube_dl/extractor/videoweed.py b/youtube_dl/extractor/videoweed.py deleted file mode 100644 index ca2e50935..000000000 --- a/youtube_dl/extractor/videoweed.py +++ /dev/null @@ -1,26 +0,0 @@ -from __future__ import unicode_literals - -from .novamov import NovaMovIE - - -class VideoWeedIE(NovaMovIE): -    IE_NAME = 'videoweed' -    IE_DESC = 'VideoWeed' - -    _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'videoweed\.(?:es|com)'} - -    _HOST = 'www.videoweed.es' - -    _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<' -    _TITLE_REGEX = r'<h1 class="text_shadow">([^<]+)</h1>' - -    _TEST = { -        'url': 'http://www.videoweed.es/file/b42178afbea14', -        'md5': 'abd31a2132947262c50429e1d16c1bfd', -        'info_dict': { -            'id': 'b42178afbea14', -            'ext': 'flv', -            'title': 'optical illusion  dissapeared image magic illusion', -            'description': '' -        }, -    } diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index f392ccf1c..ce08e6955 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -23,6 +23,7 @@ from ..utils import (      unsmuggle_url,      urlencode_postdata,      unescapeHTML, +    parse_filesize,  ) @@ -185,6 +186,20 @@ class VimeoIE(VimeoBaseInfoExtractor):              },          },          { +            # contains original format +            'url': 'https://vimeo.com/33951933', +            'md5': '53c688fa95a55bf4b7293d37a89c5c53', +            'info_dict': { +                'id': '33951933', +                'ext': 'mp4', +                'title': 'FOX CLASSICS - Forever Classic ID - A Full Minute', +                'uploader': 'The DMCI', +                'uploader_id': 'dmci', +                'upload_date': '20111220', +                'description': 'md5:ae23671e82d05415868f7ad1aec21147', +            }, +        }, +        {              'url': 'https://vimeo.com/109815029',              'note': 'Video not completely processed, "failed" seed status',              'only_matching': True, @@ -392,6 +407,21 @@ class VimeoIE(VimeoBaseInfoExtractor):              comment_count = None          formats = [] +        download_request = sanitized_Request('https://vimeo.com/%s?action=load_download_config' % video_id, headers={ +            'X-Requested-With': 'XMLHttpRequest'}) +        download_data = self._download_json(download_request, video_id, fatal=False) +        if download_data: +            source_file = download_data.get('source_file') +            if source_file and not source_file.get('is_cold') and not source_file.get('is_defrosting'): +                formats.append({ +                    'url': source_file['download_url'], +                    'ext': source_file['extension'].lower(), +                    'width': int_or_none(source_file.get('width')), +                    'height': int_or_none(source_file.get('height')), +                    'filesize': parse_filesize(source_file.get('size')), +                    'format_id': source_file.get('public_name', 'Original'), +                    'preference': 1, +                })          config_files = config['video'].get('files') or config['request'].get('files', {})          for f in config_files.get('progressive', []):              video_url = f.get('url') @@ -408,12 +438,12 @@ class VimeoIE(VimeoBaseInfoExtractor):          m3u8_url = config_files.get('hls', {}).get('url')          if m3u8_url:              m3u8_formats = self._extract_m3u8_formats( -                m3u8_url, video_id, 'mp4', 'm3u8_native', 0, 'hls', fatal=False) +                m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)              if m3u8_formats:                  formats.extend(m3u8_formats)          # Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps          # at the same time without actual units specified. This lead to wrong sorting. -        self._sort_formats(formats, field_preference=('height', 'width', 'fps', 'format_id')) +        self._sort_formats(formats, field_preference=('preference', 'height', 'width', 'fps', 'format_id'))          subtitles = {}          text_tracks = config['request'].get('text_tracks') diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index d99a42a9f..90557fa61 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -18,6 +18,7 @@ from ..utils import (      unified_strdate,  )  from .vimeo import VimeoIE +from .pladform import PladformIE  class VKIE(InfoExtractor): @@ -164,6 +165,11 @@ class VKIE(InfoExtractor):              # vk wrapper              'url': 'http://www.biqle.ru/watch/847655_160197695',              'only_matching': True, +        }, +        { +            # pladform embed +            'url': 'https://vk.com/video-76116461_171554880', +            'only_matching': True,          }      ] @@ -254,10 +260,13 @@ class VKIE(InfoExtractor):          if vimeo_url is not None:              return self.url_result(vimeo_url) +        pladform_url = PladformIE._extract_url(info_page) +        if pladform_url: +            return self.url_result(pladform_url) +          m_rutube = re.search(              r'\ssrc="((?:https?:)?//rutube\.ru\\?/video\\?/embed(?:.*?))\\?"', info_page)          if m_rutube is not None: -            self.to_screen('rutube video detected')              rutube_url = self._proto_relative_url(                  m_rutube.group(1).replace('\\', ''))              return self.url_result(rutube_url) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index b46802306..ef096cbd2 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -10,8 +10,8 @@ from ..compat import (      compat_urlparse,  )  from ..utils import ( -    determine_ext,      unified_strdate, +    qualities,  ) @@ -33,6 +33,7 @@ class WDRIE(InfoExtractor):              'params': {                  'skip_download': True,              }, +            'skip': 'Page Not Found',          },          {              'url': 'http://www1.wdr.de/themen/av/videomargaspiegelisttot101-videoplayer.html', @@ -47,6 +48,7 @@ class WDRIE(InfoExtractor):              'params': {                  'skip_download': True,              }, +            'skip': 'Page Not Found',          },          {              'url': 'http://www1.wdr.de/themen/kultur/audioerlebtegeschichtenmargaspiegel100-audioplayer.html', @@ -71,6 +73,7 @@ class WDRIE(InfoExtractor):                  'upload_date': '20140717',                  'is_live': False              }, +            'skip': 'Page Not Found',          },          {              'url': 'http://www1.wdr.de/mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100.html', @@ -83,10 +86,10 @@ class WDRIE(InfoExtractor):              'url': 'http://www1.wdr.de/mediathek/video/livestream/index.html',              'info_dict': {                  'id': 'mdb-103364', -                'title': 're:^WDR Fernsehen [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', +                'title': 're:^WDR Fernsehen Live [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',                  'description': 'md5:ae2ff888510623bf8d4b115f95a9b7c9',                  'ext': 'flv', -                'upload_date': '20150212', +                'upload_date': '20150101',                  'is_live': True              },              'params': { @@ -150,25 +153,52 @@ class WDRIE(InfoExtractor):          if upload_date:              upload_date = unified_strdate(upload_date) +        formats = [] +        preference = qualities(['S', 'M', 'L', 'XL']) +          if video_url.endswith('.f4m'): -            video_url += '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18' -            ext = 'flv' +            f4m_formats = self._extract_f4m_formats(video_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18', page_id, f4m_id='hds', fatal=False) +            if f4m_formats: +                formats.extend(f4m_formats)          elif video_url.endswith('.smil'): -            fmt = self._extract_smil_formats(video_url, page_id)[0] -            video_url = fmt['url'] -            sep = '&' if '?' in video_url else '?' -            video_url += sep -            video_url += 'hdcore=3.3.0&plugin=aasp-3.3.0.99.43' -            ext = fmt['ext'] +            smil_formats = self._extract_smil_formats(video_url, page_id, False, { +                'hdcore': '3.3.0', +                'plugin': 'aasp-3.3.0.99.43', +            }) +            if smil_formats: +                formats.extend(smil_formats)          else: -            ext = determine_ext(video_url) +            formats.append({ +                'url': video_url, +                'http_headers': { +                    'User-Agent': 'mobile', +                }, +            }) + +        m3u8_url = self._search_regex(r'rel="adaptiv"[^>]+href="([^"]+)"', webpage, 'm3u8 url', default=None) +        if m3u8_url: +            m3u8_formats = self._extract_m3u8_formats(m3u8_url, page_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) +            if m3u8_formats: +                formats.extend(m3u8_formats) + +        direct_urls = re.findall(r'rel="web(S|M|L|XL)"[^>]+href="([^"]+)"', webpage) +        if direct_urls: +            for quality, video_url in direct_urls: +                formats.append({ +                    'url': video_url, +                    'preference': preference(quality), +                    'http_headers': { +                        'User-Agent': 'mobile', +                    }, +                }) + +        self._sort_formats(formats)          description = self._html_search_meta('Description', webpage, 'description')          return {              'id': page_id, -            'url': video_url, -            'ext': ext, +            'formats': formats,              'title': title,              'description': description,              'thumbnail': thumbnail, diff --git a/youtube_dl/extractor/wimp.py b/youtube_dl/extractor/wimp.py index e4f50e64c..041ff6c55 100644 --- a/youtube_dl/extractor/wimp.py +++ b/youtube_dl/extractor/wimp.py @@ -5,7 +5,7 @@ from .youtube import YoutubeIE  class WimpIE(InfoExtractor): -    _VALID_URL = r'http://(?:www\.)?wimp\.com/(?P<id>[^/]+)/' +    _VALID_URL = r'http://(?:www\.)?wimp\.com/(?P<id>[^/]+)'      _TESTS = [{          'url': 'http://www.wimp.com/maruexhausted/',          'md5': 'ee21217ffd66d058e8b16be340b74883', @@ -28,18 +28,23 @@ class WimpIE(InfoExtractor):      def _real_extract(self, url):          video_id = self._match_id(url) +          webpage = self._download_webpage(url, video_id) -        video_url = self._search_regex( -            [r"[\"']file[\"']\s*[:,]\s*[\"'](.+?)[\"']", r"videoId\s*:\s*[\"']([^\"']+)[\"']"], -            webpage, 'video URL') -        if YoutubeIE.suitable(video_url): -            self.to_screen('Found YouTube video') + +        youtube_id = self._search_regex( +            r"videoId\s*:\s*[\"']([0-9A-Za-z_-]{11})[\"']", +            webpage, 'video URL', default=None) +        if youtube_id:              return {                  '_type': 'url', -                'url': video_url, +                'url': youtube_id,                  'ie_key': YoutubeIE.ie_key(),              } +        video_url = self._search_regex( +            r'<video[^>]+>\s*<source[^>]+src=(["\'])(?P<url>.+?)\1', +            webpage, 'video URL', group='url') +          return {              'id': video_id,              'url': video_url, diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index 69ecc837a..3a3432be8 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -25,8 +25,8 @@ class YoukuIE(InfoExtractor):      '''      _TESTS = [{ +        # MD5 is unstable          'url': 'http://v.youku.com/v_show/id_XMTc1ODE5Njcy.html', -        'md5': '5f3af4192eabacc4501508d54a8cabd7',          'info_dict': {              'id': 'XMTc1ODE5Njcy_part1',              'title': '★Smile﹗♡ Git Fresh -Booty Music舞蹈.', @@ -42,6 +42,7 @@ class YoukuIE(InfoExtractor):              'title': '武媚娘传奇 85',          },          'playlist_count': 11, +        'skip': 'Available in China only',      }, {          'url': 'http://v.youku.com/v_show/id_XMTI1OTczNDM5Mg==.html',          'info_dict': { @@ -49,7 +50,6 @@ class YoukuIE(InfoExtractor):              'title': '花千骨 04',          },          'playlist_count': 13, -        'skip': 'Available in China only',      }, {          'url': 'http://v.youku.com/v_show/id_XNjA1NzA2Njgw.html',          'note': 'Video protected with password', @@ -63,7 +63,7 @@ class YoukuIE(InfoExtractor):          },      }] -    def construct_video_urls(self, data1, data2): +    def construct_video_urls(self, data):          # get sid, token          def yk_t(s1, s2):              ls = list(range(256)) @@ -81,34 +81,24 @@ class YoukuIE(InfoExtractor):              return bytes(s)          sid, token = yk_t( -            b'becaf9be', base64.b64decode(data2['ep'].encode('ascii')) +            b'becaf9be', base64.b64decode(data['security']['encrypt_string'].encode('ascii'))          ).decode('ascii').split('_')          # get oip -        oip = data2['ip'] - -        # get fileid -        string_ls = list( -            'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890') -        shuffled_string_ls = [] -        seed = data1['seed'] -        N = len(string_ls) -        for ii in range(N): -            seed = (seed * 0xd3 + 0x754f) % 0x10000 -            idx = seed * len(string_ls) // 0x10000 -            shuffled_string_ls.append(string_ls[idx]) -            del string_ls[idx] +        oip = data['security']['ip']          fileid_dict = {} -        for format in data1['streamtypes']: -            streamfileid = [ -                int(i) for i in data1['streamfileids'][format].strip('*').split('*')] -            fileid = ''.join( -                [shuffled_string_ls[i] for i in streamfileid]) -            fileid_dict[format] = fileid[:8] + '%s' + fileid[10:] +        for stream in data['stream']: +            format = stream.get('stream_type') +            fileid = stream['stream_fileid'] +            fileid_dict[format] = fileid          def get_fileid(format, n): -            fileid = fileid_dict[format] % hex(int(n))[2:].upper().zfill(2) +            number = hex(int(str(n), 10))[2:].upper() +            if len(number) == 1: +                number = '0' + number +            streamfileids = fileid_dict[format] +            fileid = streamfileids[0:8] + number + streamfileids[10:]              return fileid          # get ep @@ -123,15 +113,15 @@ class YoukuIE(InfoExtractor):          # generate video_urls          video_urls_dict = {} -        for format in data1['streamtypes']: +        for stream in data['stream']: +            format = stream.get('stream_type')              video_urls = [] -            for dt in data1['segs'][format]: -                n = str(int(dt['no'])) +            for dt in stream['segs']: +                n = str(stream['segs'].index(dt))                  param = { -                    'K': dt['k'], +                    'K': dt['key'],                      'hd': self.get_hd(format),                      'myp': 0, -                    'ts': dt['seconds'],                      'ypp': 0,                      'ctype': 12,                      'ev': 1, @@ -142,7 +132,7 @@ class YoukuIE(InfoExtractor):                  video_url = \                      'http://k.youku.com/player/getFlvPath/' + \                      'sid/' + sid + \ -                    '_' + str(int(n) + 1).zfill(2) + \ +                    '_00' + \                      '/st/' + self.parse_ext_l(format) + \                      '/fileid/' + get_fileid(format, n) + '?' + \                      compat_urllib_parse.urlencode(param) @@ -153,23 +143,31 @@ class YoukuIE(InfoExtractor):      def get_hd(self, fm):          hd_id_dict = { +            '3gp': '0', +            '3gphd': '1',              'flv': '0', +            'flvhd': '0',              'mp4': '1', +            'mp4hd': '1', +            'mp4hd2': '1', +            'mp4hd3': '1',              'hd2': '2',              'hd3': '3', -            '3gp': '0', -            '3gphd': '1'          }          return hd_id_dict[fm]      def parse_ext_l(self, fm):          ext_dict = { +            '3gp': 'flv', +            '3gphd': 'mp4',              'flv': 'flv', +            'flvhd': 'flv',              'mp4': 'mp4', +            'mp4hd': 'mp4', +            'mp4hd2': 'flv', +            'mp4hd3': 'flv',              'hd2': 'flv',              'hd3': 'flv', -            '3gp': 'flv', -            '3gphd': 'mp4'          }          return ext_dict[fm] @@ -178,9 +176,13 @@ class YoukuIE(InfoExtractor):              '3gp': 'h6',              '3gphd': 'h5',              'flv': 'h4', +            'flvhd': 'h4',              'mp4': 'h3', +            'mp4hd': 'h3', +            'mp4hd2': 'h4', +            'mp4hd3': 'h4',              'hd2': 'h2', -            'hd3': 'h1' +            'hd3': 'h1',          }          return _dict[fm] @@ -188,45 +190,46 @@ class YoukuIE(InfoExtractor):          video_id = self._match_id(url)          def retrieve_data(req_url, note): -            req = sanitized_Request(req_url) +            headers = { +                'Referer': req_url, +            } +            self._set_cookie('youku.com', 'xreferrer', 'http://www.youku.com') +            req = sanitized_Request(req_url, headers=headers)              cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')              if cn_verification_proxy:                  req.add_header('Ytdl-request-proxy', cn_verification_proxy)              raw_data = self._download_json(req, video_id, note=note) -            return raw_data['data'][0] + +            return raw_data['data']          video_password = self._downloader.params.get('videopassword', None)          # request basic data -        basic_data_url = 'http://v.youku.com/player/getPlayList/VideoIDS/%s' % video_id +        basic_data_url = "http://play.youku.com/play/get.json?vid=%s&ct=12" % video_id          if video_password: -            basic_data_url += '?password=%s' % video_password - -        data1 = retrieve_data( -            basic_data_url, -            'Downloading JSON metadata 1') -        data2 = retrieve_data( -            'http://v.youku.com/player/getPlayList/VideoIDS/%s/Pf/4/ctype/12/ev/1' % video_id, -            'Downloading JSON metadata 2') - -        error_code = data1.get('error_code') -        if error_code: -            error = data1.get('error') -            if error is not None and '因版权原因无法观看此视频' in error: +            basic_data_url += '&pwd=%s' % video_password + +        data = retrieve_data(basic_data_url, 'Downloading JSON metadata') + +        error = data.get('error') +        if error: +            error_note = error.get('note') +            if error_note is not None and '因版权原因无法观看此视频' in error_note:                  raise ExtractorError(                      'Youku said: Sorry, this video is available in China only', expected=True)              else: -                msg = 'Youku server reported error %i' % error_code -                if error is not None: -                    msg += ': ' + error +                msg = 'Youku server reported error %i' % error.get('code') +                if error_note is not None: +                    msg += ': ' + error_note                  raise ExtractorError(msg) -        title = data1['title'] +        # get video title +        title = data['video']['title']          # generate video_urls_dict -        video_urls_dict = self.construct_video_urls(data1, data2) +        video_urls_dict = self.construct_video_urls(data)          # construct info          entries = [{ @@ -235,10 +238,11 @@ class YoukuIE(InfoExtractor):              'formats': [],              # some formats are not available for all parts, we have to detect              # which one has all -        } for i in range(max(len(v) for v in data1['segs'].values()))] -        for fm in data1['streamtypes']: +        } for i in range(max(len(v.get('segs')) for v in data['stream']))] +        for stream in data['stream']: +            fm = stream.get('stream_type')              video_urls = video_urls_dict[fm] -            for video_url, seg, entry in zip(video_urls, data1['segs'][fm], entries): +            for video_url, seg, entry in zip(video_urls, stream['segs'], entries):                  entry['formats'].append({                      'url': video_url,                      'format_id': self.get_format_name(fm), diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 9b39505ba..4aac2cc03 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -26,6 +26,7 @@ from ..compat import (  from ..utils import (      clean_html,      encode_dict, +    error_to_compat_str,      ExtractorError,      float_or_none,      get_element_by_attribute, @@ -33,6 +34,7 @@ from ..utils import (      int_or_none,      orderedSet,      parse_duration, +    remove_quotes,      remove_start,      sanitized_Request,      smuggle_url, @@ -395,12 +397,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):                  'ext': 'mp4',                  'upload_date': '20120506',                  'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]', +                'alt_title': 'I Love It (feat. Charli XCX)',                  'description': 'md5:782e8651347686cba06e58f71ab51773',                  'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',                           'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',                           'iconic ep', 'iconic', 'love', 'it'],                  'uploader': 'Icona Pop',                  'uploader_id': 'IconaPop', +                'creator': 'Icona Pop',              }          },          { @@ -411,9 +415,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):                  'ext': 'mp4',                  'upload_date': '20130703',                  'title': 'Justin Timberlake - Tunnel Vision (Explicit)', +                'alt_title': 'Tunnel Vision',                  'description': 'md5:64249768eec3bc4276236606ea996373',                  'uploader': 'justintimberlakeVEVO',                  'uploader_id': 'justintimberlakeVEVO', +                'creator': 'Justin Timberlake',                  'age_limit': 18,              }          }, @@ -492,10 +498,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):                  'id': 'nfWlot6h_JM',                  'ext': 'm4a',                  'title': 'Taylor Swift - Shake It Off', +                'alt_title': 'Shake It Off',                  'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',                  'uploader': 'TaylorSwiftVEVO',                  'uploader_id': 'TaylorSwiftVEVO',                  'upload_date': '20140818', +                'creator': 'Taylor Swift',              },              'params': {                  'youtube_include_dash_manifest': True, @@ -551,9 +559,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):                  'ext': 'mp4',                  'upload_date': '20100430',                  'uploader_id': 'deadmau5', +                'creator': 'deadmau5',                  'description': 'md5:12c56784b8032162bb936a5f76d55360',                  'uploader': 'deadmau5',                  'title': 'Deadmau5 - Some Chords (HD)', +                'alt_title': 'Some Chords',              },              'expected_warnings': [                  'DASH manifest missing', @@ -701,10 +711,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):                  'id': 'lsguqyKfVQg',                  'ext': 'mp4',                  'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21', +                'alt_title': 'Dark Walk',                  'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',                  'upload_date': '20151119',                  'uploader_id': 'IronSoulElf',                  'uploader': 'IronSoulElf', +                'creator': 'Todd Haberman, Daniel Law Heath & Aaron Kaplan',              },              'params': {                  'skip_download': True, @@ -892,7 +904,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):                  'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,                  video_id, note=False)          except ExtractorError as err: -            self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err)) +            self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))              return {}          sub_lang_list = {} @@ -1308,6 +1320,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):                  upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())          upload_date = unified_strdate(upload_date) +        m_music = re.search( +            r'<h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*<ul[^>]*>\s*<li>(?P<title>.+?) by (?P<creator>.+?)(?:\(.+?\))?</li', +            video_webpage) +        if m_music: +            video_alt_title = remove_quotes(unescapeHTML(m_music.group('title'))) +            video_creator = clean_html(m_music.group('creator')) +        else: +            video_alt_title = video_creator = None +          m_cat_container = self._search_regex(              r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',              video_webpage, 'categories', default=None) @@ -1537,7 +1558,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):              'uploader': video_uploader,              'uploader_id': video_uploader_id,              'upload_date': upload_date, +            'creator': video_creator,              'title': video_title, +            'alt_title': video_alt_title,              'thumbnail': video_thumbnail,              'description': video_description,              'categories': video_categories, @@ -1752,6 +1775,10 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):          },      }] +    @classmethod +    def suitable(cls, url): +        return False if YoutubePlaylistsIE.suitable(url) else super(YoutubeChannelIE, cls).suitable(url) +      def _real_extract(self, url):          channel_id = self._match_id(url) @@ -1825,10 +1852,10 @@ class YoutubeUserIE(YoutubeChannelIE):              return super(YoutubeUserIE, cls).suitable(url) -class YoutubeUserPlaylistsIE(YoutubePlaylistsBaseInfoExtractor): -    IE_DESC = 'YouTube.com user playlists' -    _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/user/(?P<id>[^/]+)/playlists' -    IE_NAME = 'youtube:user:playlists' +class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor): +    IE_DESC = 'YouTube.com user/channel playlists' +    _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists' +    IE_NAME = 'youtube:playlists'      _TESTS = [{          'url': 'http://www.youtube.com/user/ThirstForScience/playlists', @@ -1845,6 +1872,13 @@ class YoutubeUserPlaylistsIE(YoutubePlaylistsBaseInfoExtractor):              'id': 'igorkle1',              'title': 'Игорь Клейнер',          }, +    }, { +        'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists', +        'playlist_mincount': 17, +        'info_dict': { +            'id': 'UCiU1dHvZObB2iP6xkJ__Icw', +            'title': 'Chem Player', +        },      }] diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index 2191e8b89..a7440c582 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -232,7 +232,7 @@ class JSInterpreter(object):      def extract_function(self, funcname):          func_m = re.search(              r'''(?x) -                (?:function\s+%s|[{;]%s\s*=\s*function|var\s+%s\s*=\s*function)\s* +                (?:function\s+%s|[{;,]%s\s*=\s*function|var\s+%s\s*=\s*function)\s*                  \((?P<args>[^)]*)\)\s*                  \{(?P<code>[^}]+)\}''' % (                  re.escape(funcname), re.escape(funcname), re.escape(funcname)), diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 5ed723bc6..daca5d814 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -52,7 +52,7 @@ class FFmpegPostProcessor(PostProcessor):      def _determine_executables(self):          programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe'] -        prefer_ffmpeg = self._downloader.params.get('prefer_ffmpeg', False) +        prefer_ffmpeg = False          self.basename = None          self.probe_basename = None @@ -60,6 +60,7 @@ class FFmpegPostProcessor(PostProcessor):          self._paths = None          self._versions = None          if self._downloader: +            prefer_ffmpeg = self._downloader.params.get('prefer_ffmpeg', False)              location = self._downloader.params.get('ffmpeg_location')              if location is not None:                  if not os.path.exists(location): diff --git a/youtube_dl/update.py b/youtube_dl/update.py index 074eb64a7..995b8ed96 100644 --- a/youtube_dl/update.py +++ b/youtube_dl/update.py @@ -9,7 +9,7 @@ import subprocess  import sys  from zipimport import zipimporter -from .compat import compat_str +from .utils import encode_compat_str  from .version import __version__ @@ -61,7 +61,7 @@ def update_self(to_screen, verbose, opener):          newversion = opener.open(VERSION_URL).read().decode('utf-8').strip()      except Exception:          if verbose: -            to_screen(compat_str(traceback.format_exc())) +            to_screen(encode_compat_str(traceback.format_exc()))          to_screen('ERROR: can\'t find the current version. Please try again later.')          return      if newversion == __version__: @@ -74,7 +74,7 @@ def update_self(to_screen, verbose, opener):          versions_info = json.loads(versions_info)      except Exception:          if verbose: -            to_screen(compat_str(traceback.format_exc())) +            to_screen(encode_compat_str(traceback.format_exc()))          to_screen('ERROR: can\'t obtain versions info. Please try again later.')          return      if 'signature' not in versions_info: @@ -123,7 +123,7 @@ def update_self(to_screen, verbose, opener):              urlh.close()          except (IOError, OSError):              if verbose: -                to_screen(compat_str(traceback.format_exc())) +                to_screen(encode_compat_str(traceback.format_exc()))              to_screen('ERROR: unable to download latest version')              return @@ -137,7 +137,7 @@ def update_self(to_screen, verbose, opener):                  outf.write(newcontent)          except (IOError, OSError):              if verbose: -                to_screen(compat_str(traceback.format_exc())) +                to_screen(encode_compat_str(traceback.format_exc()))              to_screen('ERROR: unable to write the new version')              return @@ -157,7 +157,7 @@ start /b "" cmd /c del "%%~f0"&exit /b"              return  # Do not show premature success messages          except (IOError, OSError):              if verbose: -                to_screen(compat_str(traceback.format_exc())) +                to_screen(encode_compat_str(traceback.format_exc()))              to_screen('ERROR: unable to overwrite current version')              return @@ -169,7 +169,7 @@ start /b "" cmd /c del "%%~f0"&exit /b"              urlh.close()          except (IOError, OSError):              if verbose: -                to_screen(compat_str(traceback.format_exc())) +                to_screen(encode_compat_str(traceback.format_exc()))              to_screen('ERROR: unable to download latest version')              return @@ -183,7 +183,7 @@ start /b "" cmd /c del "%%~f0"&exit /b"                  outf.write(newcontent)          except (IOError, OSError):              if verbose: -                to_screen(compat_str(traceback.format_exc())) +                to_screen(encode_compat_str(traceback.format_exc()))              to_screen('ERROR: unable to overwrite current version')              return diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index d0606b4bc..1737ac5f6 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1406,6 +1406,15 @@ def remove_end(s, end):      return s +def remove_quotes(s): +    if s is None or len(s) < 2: +        return s +    for quote in ('"', "'", ): +        if s[0] == quote and s[-1] == quote: +            return s[1:-1] +    return s + +  def url_basename(url):      path = compat_urlparse.urlparse(url).path      return path.strip('/').split('/')[-1] @@ -1703,6 +1712,10 @@ def encode_dict(d, encoding='utf-8'):      return dict((encode(k), encode(v)) for k, v in d.items()) +def encode_compat_str(string, encoding=preferredencoding(), errors='strict'): +    return string if isinstance(string, compat_str) else compat_str(string, encoding, errors) + +  US_RATINGS = {      'G': 0,      'PG': 10, @@ -1797,6 +1810,15 @@ def args_to_str(args):      return ' '.join(shlex_quote(a) for a in args) +def error_to_compat_str(err): +    err_str = str(err) +    # On python 2 error byte string must be decoded with proper +    # encoding rather than ascii +    if sys.version_info[0] < 3: +        err_str = err_str.decode(preferredencoding()) +    return err_str + +  def mimetype2ext(mt):      _, _, res = mt.rpartition('/') @@ -1967,15 +1989,15 @@ def match_filter_func(filter_str):  def parse_dfxp_time_expr(time_expr):      if not time_expr: -        return 0.0 +        return      mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)      if mobj:          return float(mobj.group('time_offset')) -    mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:\.\d+)?)$', time_expr) +    mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)      if mobj: -        return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3)) +        return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))  def srt_subtitles_timecode(seconds): @@ -2011,10 +2033,15 @@ def dfxp2srt(dfxp_data):          raise ValueError('Invalid dfxp/TTML subtitle')      for para, index in zip(paras, itertools.count(1)): -        begin_time = parse_dfxp_time_expr(para.attrib['begin']) +        begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))          end_time = parse_dfxp_time_expr(para.attrib.get('end')) +        dur = parse_dfxp_time_expr(para.attrib.get('dur')) +        if begin_time is None: +            continue          if not end_time: -            end_time = begin_time + parse_dfxp_time_expr(para.attrib['dur']) +            if not dur: +                continue +            end_time = begin_time + dur          out.append('%d\n%s --> %s\n%s\n\n' % (              index,              srt_subtitles_timecode(begin_time), diff --git a/youtube_dl/version.py b/youtube_dl/version.py index bd0de9f53..01607693e 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@  from __future__ import unicode_literals -__version__ = '2015.11.27.1' +__version__ = '2015.12.18'  | 
