diff options
57 files changed, 1327 insertions, 616 deletions
| diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index a2fe59f80..c03092442 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@  --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.28*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.28** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.04.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.04.1**  ### Before submitting an *issue* make sure you have:  - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>  [debug] User config: []  [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']  [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.08.28 +[debug] youtube-dl version 2016.09.04.1  [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2  [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4  [debug] Proxy map: {} @@ -182,3 +182,4 @@ Rob van Bekkum  Petr Zvoníček  Pratyush Singh  Aleksander Nitecki +Sebastian Blunt @@ -1,6 +1,85 @@  version <unreleased>  Extractors ++ [tvnoe] New extractor (#10524) + + +version 2016.09.04.1 + +Core +* In DASH downloader if the first segment fails, abort the whole download +  process to prevent throttling (#10497) ++ Add support for --skip-unavailable-fragments and --fragment retries in +  hlsnative downloader (#10165, #10448). ++ Add support for --skip-unavailable-fragments in DASH downloader ++ Introduce --skip-unavailable-fragments option for fragment based downloaders +  that allows to skip fragments unavailable due to a HTTP error +* Fix extraction of video/audio entries with src attribute in +  _parse_html5_media_entries (#10540) + +Extractors +* [theplatform] Relax URL regular expression (#10546) +* [youtube:playlist] Extend URL regular expression +* [rottentomatoes] Delegate extraction to internetvideoarchive extractor +* [internetvideoarchive] Extract all formats +* [pornvoisines] Fix extraction (#10469) +* [rottentomatoes] Fix extraction (#10467) +* [espn] Extend URL regular expression (#10549) +* [vimple] Extend URL regular expression (#10547) +* [youtube:watchlater] Fix extraction (#10544) +* [youjizz] Fix extraction (#10437) ++ [foxnews] Add support for FoxNews Insider (#10445) ++ [fc2] Recognize Flash player URLs (#10512) + + +version 2016.09.03 + +Core +* Restore usage of NAME attribute from EXT-X-MEDIA tag for formats codes in +  _extract_m3u8_formats (#10522) +* Handle semicolon in mimetype2ext + +Extractors ++ [youtube] Add support for rental videos' previews (#10532) +* [youtube:playlist] Fallback to video extraction for video/playlist URLs when +  no playlist is actually served (#10537) ++ [drtv] Add support for dr.dk/nyheder (#10536) ++ [facebook:plugins:video] Add extractor (#10530) ++ [go] Add extractor for *.go.com sites +* [adobepass] Check for authz_token expiration (#10527) +* [nytimes] improve extraction +* [thestar] Fix extraction (#10465) +* [glide] Fix extraction (#10478) +- [exfm] Remove extractor (#10482) +* [youporn] Fix categories and tags extraction (#10521) ++ [curiositystream] Add extractor for app.curiositystream.com +- [thvideo] Remove extractor (#10464) +* [movingimage] Fix for the new site name (#10466) ++ [cbs] Add support for once formats (#10515) +* [limelight] Skip ism snd duplicate manifests ++ [porncom] Extract categories and tags (#10510) ++ [facebook] Extract timestamp (#10508) ++ [yahoo] Extract more formats + + +version 2016.08.31 + +Extractors +* [soundcloud] Fix URL regular expression to avoid clashes with sets (#10505) +* [bandcamp:album] Fix title extraction (#10455) +* [pyvideo] Fix extraction (#10468) ++ [ctv] Add support for tsn.ca, bnn.ca and thecomedynetwork.ca (#10016) +* [9c9media] Extract more metadata +* [9c9media] Fix multiple stacks extraction (#10016) +* [adultswim] Improve video info extraction (#10492) +* [vodplatform] Improve embed regular expression +- [played] Remove extractor (#10470) ++ [tbs] Add extractor for tbs.com and tntdrama.com (#10222) ++ [cartoonnetwork] Add extractor for cartoonnetwork.com (#10110) +* [adultswim] Rework in terms of turner extractor +* [cnn] Rework in terms of turner extractor +* [nba] Rework in terms of turner extractor ++ [turner] Add base extractor for Turner Broadcasting System based sites  * [bilibili] Fix extraction (#10375)  * [openload] Fix extraction (#10408) @@ -89,6 +89,8 @@ which means you can modify it, redistribute it or use it however you like.      --mark-watched                   Mark videos watched (YouTube only)      --no-mark-watched                Do not mark videos watched (YouTube only)      --no-color                       Do not emit color codes in output +    --abort-on-unavailable-fragment  Abort downloading when some fragment is not +                                     available  ## Network Options:      --proxy URL                      Use the specified HTTP/HTTPS/SOCKS proxy. @@ -173,7 +175,10 @@ which means you can modify it, redistribute it or use it however you like.      -R, --retries RETRIES            Number of retries (default is 10), or                                       "infinite".      --fragment-retries RETRIES       Number of retries for a fragment (default -                                     is 10), or "infinite" (DASH only) +                                     is 10), or "infinite" (DASH and hlsnative +                                     only) +    --skip-unavailable-fragments     Skip unavailable fragments (DASH and +                                     hlsnative only)      --buffer-size SIZE               Size of download buffer (e.g. 1024 or 16K)                                       (default is 1024)      --no-resize-buffer               Do not automatically adjust the buffer diff --git a/docs/supportedsites.md b/docs/supportedsites.md index bf08697be..9e21016f7 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -13,6 +13,8 @@   - **5min**   - **8tracks**   - **91porn** + - **9c9media** + - **9c9media:stack**   - **9gag**   - **9now.com.au**   - **abc.net.au** @@ -89,7 +91,7 @@   - **Bet**   - **Bigflix**   - **Bild**: Bild.de - - **BiliBili** (Currently broken) + - **BiliBili**   - **BioBioChileTV**   - **BIQLE**   - **BleacherReport** @@ -115,6 +117,7 @@   - **Canvas**   - **CarambaTV**   - **CarambaTVPage** + - **CartoonNetwork**   - **cbc.ca**   - **cbc.ca:player**   - **cbc.ca:watch** @@ -168,6 +171,8 @@   - **CTVNews**   - **culturebox.francetvinfo.fr**   - **CultureUnplugged** + - **curiositystream** + - **curiositystream:collection**   - **CWTV**   - **DailyMail**   - **dailymotion** @@ -220,13 +225,14 @@   - **EsriVideo**   - **Europa**   - **EveryonesMixtape** - - **exfm**: ex.fm   - **ExpoTV**   - **ExtremeTube**   - **EyedoTV**   - **facebook** + - **FacebookPluginsVideo**   - **faz.net**   - **fc2** + - **fc2:embed**   - **Fczenit**   - **features.aol.com**   - **fernsehkritik.tv** @@ -240,6 +246,7 @@   - **FOX**   - **Foxgay**   - **FoxNews**: Fox News and Fox Business Video + - **foxnews:insider**   - **FoxSports**   - **france2.fr:generation-quoi**   - **FranceCulture** @@ -268,6 +275,7 @@   - **Glide**: Glide mobile video messages (glide.me)   - **Globo**   - **GloboArticle** + - **Go**   - **GodTube**   - **GodTV**   - **Golem** @@ -403,6 +411,7 @@   - **MovieClips**   - **MovieFap**   - **Moviezine** + - **MovingImage**   - **MPORA**   - **MSN**   - **mtg**: MTG services @@ -459,7 +468,6 @@   - **nick.de**   - **niconico**: ニコニコ動画   - **NiconicoPlaylist** - - **NineCNineMedia**   - **Nintendo**   - **njoy**: N-JOY   - **njoy:embed** @@ -517,7 +525,6 @@   - **Pinkbike**   - **Pladform**   - **play.fm** - - **played.to**   - **PlaysTV**   - **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz   - **Playvid** @@ -658,7 +665,6 @@   - **sr:mediathek**: Saarländischer Rundfunk   - **SRGSSR**   - **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites - - **SSA**   - **stanfordoc**: Stanford Open ClassRoom   - **Steam**   - **Stitcher** @@ -675,6 +681,7 @@   - **Tagesschau**   - **tagesschau:player**   - **Tass** + - **TBS**   - **TDSLifeway**   - **teachertube**: teachertube.com videos   - **teachertube:user:collection**: teachertube.com user and collection videos @@ -700,8 +707,6 @@   - **TheStar**   - **ThisAmericanLife**   - **ThisAV** - - **THVideo** - - **THVideoPlaylist**   - **tinypic**: tinypic.com videos   - **tlc.de**   - **TMZ** diff --git a/test/test_utils.py b/test/test_utils.py index d16ea7f77..405c5d351 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -39,6 +39,7 @@ from youtube_dl.utils import (      is_html,      js_to_json,      limit_length, +    mimetype2ext,      ohdave_rsa_encrypt,      OnDemandPagedList,      orderedSet, @@ -625,6 +626,14 @@ class TestUtil(unittest.TestCase):              limit_length('foo bar baz asd', 12).startswith('foo bar'))          self.assertTrue('...' in limit_length('foo bar baz asd', 12)) +    def test_mimetype2ext(self): +        self.assertEqual(mimetype2ext(None), None) +        self.assertEqual(mimetype2ext('video/x-flv'), 'flv') +        self.assertEqual(mimetype2ext('application/x-mpegURL'), 'm3u8') +        self.assertEqual(mimetype2ext('text/vtt'), 'vtt') +        self.assertEqual(mimetype2ext('text/vtt;charset=utf-8'), 'vtt') +        self.assertEqual(mimetype2ext('text/html; charset=utf-8'), 'html') +      def test_parse_codecs(self):          self.assertEqual(parse_codecs(''), {})          self.assertEqual(parse_codecs('avc1.77.30, mp4a.40.2'), { diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index a9730292c..42128272a 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -318,6 +318,7 @@ def _real_main(argv=None):          'nooverwrites': opts.nooverwrites,          'retries': opts.retries,          'fragment_retries': opts.fragment_retries, +        'skip_unavailable_fragments': opts.skip_unavailable_fragments,          'buffersize': opts.buffersize,          'noresizebuffer': opts.noresizebuffer,          'continuedl': opts.continue_dl, diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py index 8bbab9dbc..41fc9cfc2 100644 --- a/youtube_dl/downloader/dash.py +++ b/youtube_dl/downloader/dash.py @@ -38,8 +38,10 @@ class DashSegmentsFD(FragmentFD):          segments_filenames = []          fragment_retries = self.params.get('fragment_retries', 0) +        skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) -        def append_url_to_file(target_url, tmp_filename, segment_name): +        def process_segment(segment, tmp_filename, fatal): +            target_url, segment_name = segment              target_filename = '%s-%s' % (tmp_filename, segment_name)              count = 0              while count <= fragment_retries: @@ -52,26 +54,35 @@ class DashSegmentsFD(FragmentFD):                      down.close()                      segments_filenames.append(target_sanitized)                      break -                except (compat_urllib_error.HTTPError, ) as err: +                except compat_urllib_error.HTTPError as err:                      # YouTube may often return 404 HTTP error for a fragment causing the                      # whole download to fail. However if the same fragment is immediately                      # retried with the same request data this usually succeeds (1-2 attemps                      # is usually enough) thus allowing to download the whole file successfully. -                    # So, we will retry all fragments that fail with 404 HTTP error for now. -                    if err.code != 404: -                        raise -                    # Retry fragment +                    # To be future-proof we will retry all fragments that fail with any +                    # HTTP error.                      count += 1                      if count <= fragment_retries: -                        self.report_retry_fragment(segment_name, count, fragment_retries) +                        self.report_retry_fragment(err, segment_name, count, fragment_retries)              if count > fragment_retries: +                if not fatal: +                    self.report_skip_fragment(segment_name) +                    return True                  self.report_error('giving up after %s fragment retries' % fragment_retries)                  return False - -        if initialization_url: -            append_url_to_file(initialization_url, ctx['tmpfilename'], 'Init') -        for i, segment_url in enumerate(segment_urls): -            append_url_to_file(segment_url, ctx['tmpfilename'], 'Seg%d' % i) +            return True + +        segments_to_download = [(initialization_url, 'Init')] if initialization_url else [] +        segments_to_download.extend([ +            (segment_url, 'Seg%d' % i) +            for i, segment_url in enumerate(segment_urls)]) + +        for i, segment in enumerate(segments_to_download): +            # In DASH, the first segment contains necessary headers to +            # generate a valid MP4 file, so always abort for the first segment +            fatal = i == 0 or not skip_unavailable_fragments +            if not process_segment(segment, ctx['tmpfilename'], fatal): +                return False          self._finish_frag_download(ctx) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 17f12e970..0aeae3b8f 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -223,7 +223,8 @@ class FFmpegFD(ExternalFD):              if proxy.startswith('socks'):                  self.report_warning( -                    '%s does not support SOCKS proxies. Downloading may fail.' % self.get_basename()) +                    '%s does not support SOCKS proxies. Downloading is likely to fail. ' +                    'Consider adding --hls-prefer-native to your command.' % self.get_basename())              # Since December 2015 ffmpeg supports -http_proxy option (see              # http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd) diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py index ba903ae10..84aacf7db 100644 --- a/youtube_dl/downloader/fragment.py +++ b/youtube_dl/downloader/fragment.py @@ -6,6 +6,7 @@ import time  from .common import FileDownloader  from .http import HttpFD  from ..utils import ( +    error_to_compat_str,      encodeFilename,      sanitize_open,  ) @@ -22,13 +23,19 @@ class FragmentFD(FileDownloader):      Available options: -    fragment_retries:   Number of times to retry a fragment for HTTP error (DASH only) +    fragment_retries:   Number of times to retry a fragment for HTTP error (DASH +                        and hlsnative only) +    skip_unavailable_fragments: +                        Skip unavailable fragments (DASH and hlsnative only)      """ -    def report_retry_fragment(self, fragment_name, count, retries): +    def report_retry_fragment(self, err, fragment_name, count, retries):          self.to_screen( -            '[download] Got server HTTP error. Retrying fragment %s (attempt %d of %s)...' -            % (fragment_name, count, self.format_retries(retries))) +            '[download] Got server HTTP error: %s. Retrying fragment %s (attempt %d of %s)...' +            % (error_to_compat_str(err), fragment_name, count, self.format_retries(retries))) + +    def report_skip_fragment(self, fragment_name): +        self.to_screen('[download] Skipping fragment %s...' % fragment_name)      def _prepare_and_start_frag_download(self, ctx):          self._prepare_frag_download(ctx) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index baaff44d5..5d70abf62 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -13,6 +13,7 @@ from .fragment import FragmentFD  from .external import FFmpegFD  from ..compat import ( +    compat_urllib_error,      compat_urlparse,      compat_struct_pack,  ) @@ -83,6 +84,10 @@ class HlsFD(FragmentFD):          self._prepare_and_start_frag_download(ctx) +        fragment_retries = self.params.get('fragment_retries', 0) +        skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) +        test = self.params.get('test', False) +          extra_query = None          extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url')          if extra_param_to_segment_url: @@ -99,15 +104,37 @@ class HlsFD(FragmentFD):                          line                          if re.match(r'^https?://', line)                          else compat_urlparse.urljoin(man_url, line)) -                    frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], i) +                    frag_name = 'Frag%d' % i +                    frag_filename = '%s-%s' % (ctx['tmpfilename'], frag_name)                      if extra_query:                          frag_url = update_url_query(frag_url, extra_query) -                    success = ctx['dl'].download(frag_filename, {'url': frag_url}) -                    if not success: +                    count = 0 +                    while count <= fragment_retries: +                        try: +                            success = ctx['dl'].download(frag_filename, {'url': frag_url}) +                            if not success: +                                return False +                            down, frag_sanitized = sanitize_open(frag_filename, 'rb') +                            frag_content = down.read() +                            down.close() +                            break +                        except compat_urllib_error.HTTPError as err: +                            # Unavailable (possibly temporary) fragments may be served. +                            # First we try to retry then either skip or abort. +                            # See https://github.com/rg3/youtube-dl/issues/10165, +                            # https://github.com/rg3/youtube-dl/issues/10448). +                            count += 1 +                            if count <= fragment_retries: +                                self.report_retry_fragment(err, frag_name, count, fragment_retries) +                    if count > fragment_retries: +                        if skip_unavailable_fragments: +                            i += 1 +                            media_sequence += 1 +                            self.report_skip_fragment(frag_name) +                            continue +                        self.report_error( +                            'giving up after %s fragment retries' % fragment_retries)                          return False -                    down, frag_sanitized = sanitize_open(frag_filename, 'rb') -                    frag_content = down.read() -                    down.close()                      if decrypt_info['METHOD'] == 'AES-128':                          iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)                          frag_content = AES.new( @@ -115,7 +142,7 @@ class HlsFD(FragmentFD):                      ctx['dest_stream'].write(frag_content)                      frags_filenames.append(frag_sanitized)                      # We only download the first fragment during the test -                    if self.params.get('test', False): +                    if test:                          break                      i += 1                      media_sequence += 1 diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index 9e3a3e362..68ec37e00 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -37,6 +37,10 @@ class AdobePassIE(InfoExtractor):              return self._search_regex(                  '<%s>(.+?)</%s>' % (tag, tag), xml_str, tag) +        def is_expired(token, date_ele): +            token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(token, date_ele))) +            return token_expires and token_expires <= int(time.time()) +          mvpd_headers = {              'ap_42': 'anonymous',              'ap_11': 'Linux i686', @@ -47,11 +51,8 @@ class AdobePassIE(InfoExtractor):          guid = xml_text(resource, 'guid')          requestor_info = self._downloader.cache.load('mvpd', requestor_id) or {}          authn_token = requestor_info.get('authn_token') -        if authn_token: -            token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(authn_token, 'simpleTokenExpires'))) -            if token_expires and token_expires <= int(time.time()): -                authn_token = None -                requestor_info = {} +        if authn_token and is_expired(authn_token, 'simpleTokenExpires'): +            authn_token = None          if not authn_token:              # TODO add support for other TV Providers              mso_id = 'DTV' @@ -98,6 +99,8 @@ class AdobePassIE(InfoExtractor):              self._downloader.cache.store('mvpd', requestor_id, requestor_info)          authz_token = requestor_info.get(guid) +        if authz_token and is_expired(authz_token, 'simpleTokenTTL'): +            authz_token = None          if not authz_token:              authorize = self._download_webpage(                  self._SERVICE_PROVIDER_TEMPLATE % 'authorize', video_id, diff --git a/youtube_dl/extractor/adultswim.py b/youtube_dl/extractor/adultswim.py index ef3cc2a61..5d0bf5a68 100644 --- a/youtube_dl/extractor/adultswim.py +++ b/youtube_dl/extractor/adultswim.py @@ -4,7 +4,10 @@ from __future__ import unicode_literals  import re  from .turner import TurnerBaseIE -from ..utils import ExtractorError +from ..utils import ( +    ExtractorError, +    int_or_none, +)  class AdultSwimIE(TurnerBaseIE): @@ -144,7 +147,10 @@ class AdultSwimIE(TurnerBaseIE):                  if bootstrapped_data.get('slugged_video', {}).get('slug') == episode_path:                      video_info = bootstrapped_data['slugged_video']              if not video_info: -                video_info = bootstrapped_data.get('heroMetadata', {}).get('trailer').get('video') +                video_info = bootstrapped_data.get( +                    'heroMetadata', {}).get('trailer', {}).get('video') +            if not video_info: +                video_info = bootstrapped_data.get('onlineOriginals', [None])[0]              if not video_info:                  raise ExtractorError('Unable to find video info') @@ -167,8 +173,9 @@ class AdultSwimIE(TurnerBaseIE):          episode_id = video_info['id']          episode_title = video_info['title'] -        episode_description = video_info['description'] -        episode_duration = video_info.get('duration') +        episode_description = video_info.get('description') +        episode_duration = int_or_none(video_info.get('duration')) +        view_count = int_or_none(video_info.get('views'))          entries = []          for part_num, segment_id in enumerate(segment_ids): @@ -197,5 +204,6 @@ class AdultSwimIE(TurnerBaseIE):              'entries': entries,              'title': '%s - %s' % (show_title, episode_title),              'description': episode_description, -            'duration': episode_duration +            'duration': episode_duration, +            'view_count': view_count,          } diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 991ab0676..249c3d956 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -162,6 +162,15 @@ class BandcampAlbumIE(InfoExtractor):              'uploader_id': 'dotscale',          },          'playlist_mincount': 7, +    }, { +        # with escaped quote in title +        'url': 'https://jstrecords.bandcamp.com/album/entropy-ep', +        'info_dict': { +            'title': '"Entropy" EP', +            'uploader_id': 'jstrecords', +            'id': 'entropy-ep', +        }, +        'playlist_mincount': 3,      }]      def _real_extract(self, url): @@ -176,8 +185,11 @@ class BandcampAlbumIE(InfoExtractor):          entries = [              self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())              for t_path in tracks_paths] -        title = self._search_regex( -            r'album_title\s*:\s*"(.*?)"', webpage, 'title', fatal=False) +        title = self._html_search_regex( +            r'album_title\s*:\s*"((?:\\.|[^"\\])+?)"', +            webpage, 'title', fatal=False) +        if title: +            title = title.replace(r'\"', '"')          return {              '_type': 'playlist',              'uploader_id': uploader_id, diff --git a/youtube_dl/extractor/cbs.py b/youtube_dl/extractor/cbs.py index c72ed2dbb..3f4dea40c 100644 --- a/youtube_dl/extractor/cbs.py +++ b/youtube_dl/extractor/cbs.py @@ -51,7 +51,7 @@ class CBSIE(CBSBaseIE):          path = 'dJ5BDC/media/guid/2198311517/' + guid          smil_url = 'http://link.theplatform.com/s/%s?mbr=true' % path          formats, subtitles = self._extract_theplatform_smil(smil_url + '&manifest=m3u', guid) -        for r in ('HLS&formats=M3U', 'RTMP', 'WIFI', '3G'): +        for r in ('OnceURL&formats=M3U', 'HLS&formats=M3U', 'RTMP', 'WIFI', '3G'):              try:                  tp_formats, _ = self._extract_theplatform_smil(smil_url + '&assetTypes=' + r, guid, 'Downloading %s SMIL data' % r.split('&')[0])                  formats.extend(tp_formats) diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py index 1bf87f6ea..5fc311f53 100644 --- a/youtube_dl/extractor/cnn.py +++ b/youtube_dl/extractor/cnn.py @@ -15,7 +15,7 @@ class CNNIE(TurnerBaseIE):          'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',          'md5': '3e6121ea48df7e2259fe73a0628605c4',          'info_dict': { -            'id': 'nadal-1-on-1', +            'id': 'sports/2013/06/09/nadal-1-on-1.cnn',              'ext': 'mp4',              'title': 'Nadal wins 8th French Open title',              'description': 'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.', @@ -27,7 +27,7 @@ class CNNIE(TurnerBaseIE):          'url': 'http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29',          'md5': 'b5cc60c60a3477d185af8f19a2a26f4e',          'info_dict': { -            'id': 'sot-student-gives-epic-speech', +            'id': 'us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology',              'ext': 'mp4',              'title': "Student's epic speech stuns new freshmen",              'description': "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"", @@ -38,7 +38,7 @@ class CNNIE(TurnerBaseIE):          'url': 'http://www.cnn.com/video/data/2.0/video/living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln.html',          'md5': 'f14d02ebd264df951feb2400e2c25a1b',          'info_dict': { -            'id': 'growing-america-nashville-salemtown-board-episode-1', +            'id': 'living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln',              'ext': 'mp4',              'title': 'Nashville Ep. 1: Hand crafted skateboards',              'description': 'md5:e7223a503315c9f150acac52e76de086', @@ -49,7 +49,7 @@ class CNNIE(TurnerBaseIE):          'url': 'http://money.cnn.com/video/news/2016/08/19/netflix-stunning-stats.cnnmoney/index.html',          'md5': '52a515dc1b0f001cd82e4ceda32be9d1',          'info_dict': { -            'id': 'netflix-stunning-stats', +            'id': '/video/news/2016/08/19/netflix-stunning-stats.cnnmoney',              'ext': 'mp4',              'title': '5 stunning stats about Netflix',              'description': 'Did you know that Netflix has more than 80 million members? Here are five facts about the online video distributor that you probably didn\'t know.', @@ -83,6 +83,10 @@ class CNNIE(TurnerBaseIE):          },      } +    def _extract_timestamp(self, video_data): +        # TODO: fix timestamp extraction +        return None +      def _real_extract(self, url):          sub_domain, path, page_title = re.match(self._VALID_URL, url).groups()          if sub_domain not in ('money', 'edition'): @@ -108,6 +112,7 @@ class CNNBlogsIE(InfoExtractor):              'description': 'Glenn Greenwald responds to comments made this week on Capitol Hill that journalists could be criminal accessories.',              'upload_date': '20140209',          }, +        'expected_warnings': ['Failed to download m3u8 information'],          'add_ie': ['CNN'],      } @@ -130,9 +135,10 @@ class CNNArticleIE(InfoExtractor):              'id': 'bestoftv/2014/12/21/ip-north-korea-obama.cnn',              'ext': 'mp4',              'title': 'Obama: Cyberattack not an act of war', -            'description': 'md5:51ce6750450603795cad0cdfbd7d05c5', +            'description': 'md5:0a802a40d2376f60e6b04c8d5bcebc4b',              'upload_date': '20141221',          }, +        'expected_warnings': ['Failed to download m3u8 information'],          'add_ie': ['CNN'],      } diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index da0af29ec..6edd5a769 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1163,13 +1163,6 @@ class InfoExtractor(object):                                m3u8_id=None, note=None, errnote=None,                                fatal=True, live=False): -        formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)] - -        format_url = lambda u: ( -            u -            if re.match(r'^https?://', u) -            else compat_urlparse.urljoin(m3u8_url, u)) -          res = self._download_webpage_handle(              m3u8_url, video_id,              note=note or 'Downloading m3u8 information', @@ -1180,6 +1173,13 @@ class InfoExtractor(object):          m3u8_doc, urlh = res          m3u8_url = urlh.geturl() +        formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)] + +        format_url = lambda u: ( +            u +            if re.match(r'^https?://', u) +            else compat_urlparse.urljoin(m3u8_url, u)) +          # We should try extracting formats only from master playlists [1], i.e.          # playlists that describe available qualities. On the other hand media          # playlists [2] should be returned as is since they contain just the media @@ -1201,7 +1201,8 @@ class InfoExtractor(object):                  'protocol': entry_protocol,                  'preference': preference,              }] -        last_info = None +        last_info = {} +        last_media = {}          for line in m3u8_doc.splitlines():              if line.startswith('#EXT-X-STREAM-INF:'):                  last_info = parse_m3u8_attributes(line) @@ -1224,23 +1225,24 @@ class InfoExtractor(object):                              'protocol': entry_protocol,                              'preference': preference,                          }) +                    else: +                        # When there is no URI in EXT-X-MEDIA let this tag's +                        # data be used by regular URI lines below +                        last_media = media              elif line.startswith('#') or not line.strip():                  continue              else: -                if last_info is None: -                    formats.append({'url': format_url(line)}) -                    continue                  tbr = int_or_none(last_info.get('AVERAGE-BANDWIDTH') or last_info.get('BANDWIDTH'), scale=1000)                  format_id = []                  if m3u8_id:                      format_id.append(m3u8_id) +                # Despite specification does not mention NAME attribute for +                # EXT-X-STREAM-INF it still sometimes may be present +                stream_name = last_info.get('NAME') or last_media.get('NAME')                  # Bandwidth of live streams may differ over time thus making                  # format_id unpredictable. So it's better to keep provided                  # format_id intact.                  if not live: -                    # Despite specification does not mention NAME attribute for -                    # EXT-X-STREAM-INF it still sometimes may be present -                    stream_name = last_info.get('NAME')                      format_id.append(stream_name if stream_name else '%d' % (tbr if tbr else len(formats)))                  f = {                      'format_id': '-'.join(format_id), @@ -1269,6 +1271,7 @@ class InfoExtractor(object):                  f.update(parse_codecs(last_info.get('CODECS')))                  formats.append(f)                  last_info = {} +                last_media = {}          return formats      @staticmethod @@ -1746,7 +1749,7 @@ class InfoExtractor(object):              media_attributes = extract_attributes(media_tag)              src = media_attributes.get('src')              if src: -                _, formats = _media_formats(src) +                _, formats = _media_formats(src, media_type)                  media_info['formats'].extend(formats)              media_info['thumbnail'] = media_attributes.get('poster')              if media_content: diff --git a/youtube_dl/extractor/ctv.py b/youtube_dl/extractor/ctv.py index 5807fbac9..a1fe86316 100644 --- a/youtube_dl/extractor/ctv.py +++ b/youtube_dl/extractor/ctv.py @@ -1,11 +1,13 @@  # coding: utf-8  from __future__ import unicode_literals +import re +  from .common import InfoExtractor  class CTVIE(InfoExtractor): -    _VALID_URL = r'https?://(?:www\.)?ctv\.ca/video/player\?vid=(?P<id>[0-9.]+)' +    _VALID_URL = r'https?://(?:www\.)?(?P<domain>ctv|tsn|bnn|thecomedynetwork)\.ca/.*?(?:\bvid=|-vid|~|%7E)(?P<id>[0-9.]+)'      _TESTS = [{          'url': 'http://www.ctv.ca/video/player?vid=706966',          'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0', @@ -18,13 +20,27 @@ class CTVIE(InfoExtractor):              'timestamp': 1442624700,          },          'expected_warnings': ['HTTP Error 404'], +    }, { +        'url': 'http://www.thecomedynetwork.ca/video/player?vid=923582', +        'only_matching': True, +    }, { +        'url': 'http://www.tsn.ca/video/expectations-high-for-milos-raonic-at-us-open~939549', +        'only_matching': True, +    }, { +        'url': 'http://www.bnn.ca/video/berman-s-call-part-two-viewer-questions~939654', +        'only_matching': True, +    }, { +        'url': 'http://www.ctv.ca/YourMorning/Video/S1E6-Monday-August-29-2016-vid938009', +        'only_matching': True,      }]      def _real_extract(self, url): -        video_id = self._match_id(url) +        domain, video_id = re.match(self._VALID_URL, url).groups() +        if domain == 'thecomedynetwork': +            domain = 'comedy'          return {              '_type': 'url_transparent',              'id': video_id, -            'url': '9c9media:ctv_web:%s' % video_id, +            'url': '9c9media:%s_web:%s' % (domain, video_id),              'ie_key': 'NineCNineMedia',          } diff --git a/youtube_dl/extractor/curiositystream.py b/youtube_dl/extractor/curiositystream.py new file mode 100644 index 000000000..e3c99468c --- /dev/null +++ b/youtube_dl/extractor/curiositystream.py @@ -0,0 +1,120 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( +    int_or_none, +    urlencode_postdata, +    compat_str, +    ExtractorError, +) + + +class CuriosityStreamBaseIE(InfoExtractor): +    _NETRC_MACHINE = 'curiositystream' +    _auth_token = None +    _API_BASE_URL = 'https://api.curiositystream.com/v1/' + +    def _handle_errors(self, result): +        error = result.get('error', {}).get('message') +        if error: +            if isinstance(error, dict): +                error = ', '.join(error.values()) +            raise ExtractorError( +                '%s said: %s' % (self.IE_NAME, error), expected=True) + +    def _call_api(self, path, video_id): +        headers = {} +        if self._auth_token: +            headers['X-Auth-Token'] = self._auth_token +        result = self._download_json( +            self._API_BASE_URL + path, video_id, headers=headers) +        self._handle_errors(result) +        return result['data'] + +    def _real_initialize(self): +        (email, password) = self._get_login_info() +        if email is None: +            return +        result = self._download_json( +            self._API_BASE_URL + 'login', None, data=urlencode_postdata({ +                'email': email, +                'password': password, +            })) +        self._handle_errors(result) +        self._auth_token = result['message']['auth_token'] + +    def _extract_media_info(self, media): +        video_id = compat_str(media['id']) +        limelight_media_id = media['limelight_media_id'] +        title = media['title'] + +        subtitles = {} +        for closed_caption in media.get('closed_captions', []): +            sub_url = closed_caption.get('file') +            if not sub_url: +                continue +            lang = closed_caption.get('code') or closed_caption.get('language') or 'en' +            subtitles.setdefault(lang, []).append({ +                'url': sub_url, +            }) + +        return { +            '_type': 'url_transparent', +            'id': video_id, +            'url': 'limelight:media:' + limelight_media_id, +            'title': title, +            'description': media.get('description'), +            'thumbnail': media.get('image_large') or media.get('image_medium') or media.get('image_small'), +            'duration': int_or_none(media.get('duration')), +            'tags': media.get('tags'), +            'subtitles': subtitles, +            'ie_key': 'LimelightMedia', +        } + + +class CuriosityStreamIE(CuriosityStreamBaseIE): +    IE_NAME = 'curiositystream' +    _VALID_URL = r'https?://app\.curiositystream\.com/video/(?P<id>\d+)' +    _TEST = { +        'url': 'https://app.curiositystream.com/video/2', +        'md5': 'a0074c190e6cddaf86900b28d3e9ee7a', +        'info_dict': { +            'id': '2', +            'ext': 'mp4', +            'title': 'How Did You Develop The Internet?', +            'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.', +            'timestamp': 1448388615, +            'upload_date': '20151124', +        } +    } + +    def _real_extract(self, url): +        video_id = self._match_id(url) +        media = self._call_api('media/' + video_id, video_id) +        return self._extract_media_info(media) + + +class CuriosityStreamCollectionIE(CuriosityStreamBaseIE): +    IE_NAME = 'curiositystream:collection' +    _VALID_URL = r'https?://app\.curiositystream\.com/collection/(?P<id>\d+)' +    _TEST = { +        'url': 'https://app.curiositystream.com/collection/2', +        'info_dict': { +            'id': '2', +            'title': 'Curious Minds: The Internet', +            'description': 'How is the internet shaping our lives in the 21st Century?', +        }, +        'playlist_mincount': 17, +    } + +    def _real_extract(self, url): +        collection_id = self._match_id(url) +        collection = self._call_api( +            'collections/' + collection_id, collection_id) +        entries = [] +        for media in collection.get('media', []): +            entries.append(self._extract_media_info(media)) +        return self.playlist_result( +            entries, collection_id, +            collection.get('title'), collection.get('description')) diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index 2d74ff855..88d096b30 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -4,26 +4,45 @@ from __future__ import unicode_literals  from .common import InfoExtractor  from ..utils import (      ExtractorError, +    int_or_none, +    float_or_none, +    mimetype2ext,      parse_iso8601, +    remove_end,  )  class DRTVIE(InfoExtractor): -    _VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)' +    _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder)/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)' -    _TEST = { -        'url': 'https://www.dr.dk/tv/se/boern/ultra/panisk-paske/panisk-paske-5', -        'md5': 'dc515a9ab50577fa14cc4e4b0265168f', +    _TESTS = [{ +        'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10', +        'md5': '25e659cccc9a2ed956110a299fdf5983',          'info_dict': { -            'id': 'panisk-paske-5', +            'id': 'klassen-darlig-taber-10',              'ext': 'mp4', -            'title': 'Panisk Påske (5)', -            'description': 'md5:ca14173c5ab24cd26b0fcc074dff391c', -            'timestamp': 1426984612, -            'upload_date': '20150322', -            'duration': 1455, +            'title': 'Klassen - Dårlig taber (10)', +            'description': 'md5:815fe1b7fa656ed80580f31e8b3c79aa', +            'timestamp': 1471991907, +            'upload_date': '20160823', +            'duration': 606.84,          }, -    } +        'params': { +            'skip_download': True, +        }, +    }, { +        'url': 'https://www.dr.dk/nyheder/indland/live-christianias-rydning-af-pusher-street-er-i-gang', +        'md5': '2c37175c718155930f939ef59952474a', +        'info_dict': { +            'id': 'christiania-pusher-street-ryddes-drdkrjpo', +            'ext': 'mp4', +            'title': 'LIVE Christianias rydning af Pusher Street er i gang', +            'description': '- Det er det fedeste, der er sket i 20 år, fortæller christianit til DR Nyheder.', +            'timestamp': 1472800279, +            'upload_date': '20160902', +            'duration': 131.4, +        }, +    }]      def _real_extract(self, url):          video_id = self._match_id(url) @@ -35,7 +54,8 @@ class DRTVIE(InfoExtractor):                  'Video %s is not available' % video_id, expected=True)          video_id = self._search_regex( -            r'data-(?:material-identifier|episode-slug)="([^"]+)"', +            (r'data-(?:material-identifier|episode-slug)="([^"]+)"', +                r'data-resource="[^>"]+mu/programcard/expanded/([^"]+)"'),              webpage, 'video id')          programcard = self._download_json( @@ -43,9 +63,12 @@ class DRTVIE(InfoExtractor):              video_id, 'Downloading video JSON')          data = programcard['Data'][0] -        title = data['Title'] -        description = data['Description'] -        timestamp = parse_iso8601(data['CreatedTime']) +        title = remove_end(self._og_search_title( +            webpage, default=None), ' | TV | DR') or data['Title'] +        description = self._og_search_description( +            webpage, default=None) or data.get('Description') + +        timestamp = parse_iso8601(data.get('CreatedTime'))          thumbnail = None          duration = None @@ -56,16 +79,18 @@ class DRTVIE(InfoExtractor):          subtitles = {}          for asset in data['Assets']: -            if asset['Kind'] == 'Image': -                thumbnail = asset['Uri'] -            elif asset['Kind'] == 'VideoResource': -                duration = asset['DurationInMilliseconds'] / 1000.0 -                restricted_to_denmark = asset['RestrictedToDenmark'] -                spoken_subtitles = asset['Target'] == 'SpokenSubtitles' -                for link in asset['Links']: -                    uri = link['Uri'] -                    target = link['Target'] -                    format_id = target +            if asset.get('Kind') == 'Image': +                thumbnail = asset.get('Uri') +            elif asset.get('Kind') == 'VideoResource': +                duration = float_or_none(asset.get('DurationInMilliseconds'), 1000) +                restricted_to_denmark = asset.get('RestrictedToDenmark') +                spoken_subtitles = asset.get('Target') == 'SpokenSubtitles' +                for link in asset.get('Links', []): +                    uri = link.get('Uri') +                    if not uri: +                        continue +                    target = link.get('Target') +                    format_id = target or ''                      preference = None                      if spoken_subtitles:                          preference = -1 @@ -76,8 +101,8 @@ class DRTVIE(InfoExtractor):                              video_id, preference, f4m_id=format_id))                      elif target == 'HLS':                          formats.extend(self._extract_m3u8_formats( -                            uri, video_id, 'mp4', preference=preference, -                            m3u8_id=format_id)) +                            uri, video_id, 'mp4', entry_protocol='m3u8_native', +                            preference=preference, m3u8_id=format_id))                      else:                          bitrate = link.get('Bitrate')                          if bitrate: @@ -85,7 +110,7 @@ class DRTVIE(InfoExtractor):                          formats.append({                              'url': uri,                              'format_id': format_id, -                            'tbr': bitrate, +                            'tbr': int_or_none(bitrate),                              'ext': link.get('FileFormat'),                          })                  subtitles_list = asset.get('SubtitlesList') @@ -94,12 +119,18 @@ class DRTVIE(InfoExtractor):                          'Danish': 'da',                      }                      for subs in subtitles_list: -                        lang = subs['Language'] -                        subtitles[LANGS.get(lang, lang)] = [{'url': subs['Uri'], 'ext': 'vtt'}] +                        if not subs.get('Uri'): +                            continue +                        lang = subs.get('Language') or 'da' +                        subtitles.setdefault(LANGS.get(lang, lang), []).append({ +                            'url': subs['Uri'], +                            'ext': mimetype2ext(subs.get('MimeType')) or 'vtt' +                        })          if not formats and restricted_to_denmark: -            raise ExtractorError( -                'Unfortunately, DR is not allowed to show this program outside Denmark.', expected=True) +            self.raise_geo_restricted( +                'Unfortunately, DR is not allowed to show this program outside Denmark.', +                expected=True)          self._sort_formats(formats) diff --git a/youtube_dl/extractor/espn.py b/youtube_dl/extractor/espn.py index 66c08bec4..6d10f8e68 100644 --- a/youtube_dl/extractor/espn.py +++ b/youtube_dl/extractor/espn.py @@ -5,7 +5,7 @@ from ..utils import remove_end  class ESPNIE(InfoExtractor): -    _VALID_URL = r'https?://espn\.go\.com/(?:[^/]+/)*(?P<id>[^/]+)' +    _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P<id>[^/]+)'      _TESTS = [{          'url': 'http://espn.go.com/video/clip?id=10365079',          'md5': '60e5d097a523e767d06479335d1bdc58', @@ -47,6 +47,9 @@ class ESPNIE(InfoExtractor):      }, {          'url': 'http://espn.go.com/nba/playoffs/2015/story/_/id/12887571/john-wall-washington-wizards-no-swelling-left-hand-wrist-game-5-return',          'only_matching': True, +    }, { +        'url': 'http://www.espn.com/video/clip?id=10365079', +        'only_matching': True,      }]      def _real_extract(self, url): diff --git a/youtube_dl/extractor/exfm.py b/youtube_dl/extractor/exfm.py deleted file mode 100644 index 09ed4f2b5..000000000 --- a/youtube_dl/extractor/exfm.py +++ /dev/null @@ -1,58 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor - - -class ExfmIE(InfoExtractor): -    IE_NAME = 'exfm' -    IE_DESC = 'ex.fm' -    _VALID_URL = r'https?://(?:www\.)?ex\.fm/song/(?P<id>[^/]+)' -    _SOUNDCLOUD_URL = r'http://(?:www\.)?api\.soundcloud\.com/tracks/([^/]+)/stream' -    _TESTS = [ -        { -            'url': 'http://ex.fm/song/eh359', -            'md5': 'e45513df5631e6d760970b14cc0c11e7', -            'info_dict': { -                'id': '44216187', -                'ext': 'mp3', -                'title': 'Test House "Love Is Not Enough" (Extended Mix) DeadJournalist Exclusive', -                'uploader': 'deadjournalist', -                'upload_date': '20120424', -                'description': 'Test House \"Love Is Not Enough\" (Extended Mix) DeadJournalist Exclusive', -            }, -            'note': 'Soundcloud song', -            'skip': 'The site is down too often', -        }, -        { -            'url': 'http://ex.fm/song/wddt8', -            'md5': '966bd70741ac5b8570d8e45bfaed3643', -            'info_dict': { -                'id': 'wddt8', -                'ext': 'mp3', -                'title': 'Safe and Sound', -                'uploader': 'Capital Cities', -            }, -            'skip': 'The site is down too often', -        }, -    ] - -    def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        song_id = mobj.group('id') -        info_url = 'http://ex.fm/api/v3/song/%s' % song_id -        info = self._download_json(info_url, song_id)['song'] -        song_url = info['url'] -        if re.match(self._SOUNDCLOUD_URL, song_url) is not None: -            self.to_screen('Soundcloud song detected') -            return self.url_result(song_url.replace('/stream', ''), 'Soundcloud') -        return { -            'id': song_id, -            'url': song_url, -            'ext': 'mp3', -            'title': info['title'], -            'thumbnail': info['image']['large'], -            'uploader': info['artist'], -            'view_count': info['loved_count'], -        } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 20e85703f..e47adc26c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -194,6 +194,10 @@ from .ctsnews import CtsNewsIE  from .ctv import CTVIE  from .ctvnews import CTVNewsIE  from .cultureunplugged import CultureUnpluggedIE +from .curiositystream import ( +    CuriosityStreamIE, +    CuriosityStreamCollectionIE, +)  from .cwtv import CWTVIE  from .dailymail import DailyMailIE  from .dailymotion import ( @@ -257,13 +261,18 @@ from .espn import ESPNIE  from .esri import EsriVideoIE  from .europa import EuropaIE  from .everyonesmixtape import EveryonesMixtapeIE -from .exfm import ExfmIE  from .expotv import ExpoTVIE  from .extremetube import ExtremeTubeIE  from .eyedotv import EyedoTVIE -from .facebook import FacebookIE +from .facebook import ( +    FacebookIE, +    FacebookPluginsVideoIE, +)  from .faz import FazIE -from .fc2 import FC2IE +from .fc2 import ( +    FC2IE, +    FC2EmbedIE, +)  from .fczenit import FczenitIE  from .firstpost import FirstpostIE  from .firsttv import FirstTVIE @@ -278,7 +287,10 @@ from .formula1 import Formula1IE  from .fourtube import FourTubeIE  from .fox import FOXIE  from .foxgay import FoxgayIE -from .foxnews import FoxNewsIE +from .foxnews import ( +    FoxNewsIE, +    FoxNewsInsiderIE, +)  from .foxsports import FoxSportsIE  from .franceculture import FranceCultureIE  from .franceinter import FranceInterIE @@ -315,6 +327,7 @@ from .globo import (      GloboIE,      GloboArticleIE,  ) +from .go import GoIE  from .godtube import GodTubeIE  from .godtv import GodTVIE  from .golem import GolemIE @@ -486,6 +499,7 @@ from .motherless import MotherlessIE  from .motorsport import MotorsportIE  from .movieclips import MovieClipsIE  from .moviezine import MoviezineIE +from .movingimage import MovingImageIE  from .msn import MSNIE  from .mtv import (      MTVIE, @@ -554,7 +568,10 @@ from .nick import (      NickDeIE,  )  from .niconico import NiconicoIE, NiconicoPlaylistIE -from .ninecninemedia import NineCNineMediaIE +from .ninecninemedia import ( +    NineCNineMediaStackIE, +    NineCNineMediaIE, +)  from .ninegag import NineGagIE  from .ninenow import NineNowIE  from .nintendo import NintendoIE @@ -803,7 +820,6 @@ from .srgssr import (      SRGSSRPlayIE,  )  from .srmediathek import SRMediathekIE -from .ssa import SSAIE  from .stanfordoc import StanfordOpenClassroomIE  from .steam import SteamIE  from .streamable import StreamableIE @@ -866,10 +882,6 @@ from .tnaflix import (      MovieFapIE,  )  from .toggle import ToggleIE -from .thvideo import ( -    THVideoIE, -    THVideoPlaylistIE -)  from .toutv import TouTvIE  from .toypics import ToypicsUserIE, ToypicsIE  from .traileraddict import TrailerAddictIE @@ -904,6 +916,7 @@ from .tvc import (  )  from .tvigle import TvigleIE  from .tvland import TVLandIE +from .tvnoe import TVNoeIE  from .tvp import (      TVPEmbedIE,      TVPIE, diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 0fb781a73..3a220e995 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -15,6 +15,7 @@ from ..compat import (  from ..utils import (      error_to_compat_str,      ExtractorError, +    int_or_none,      limit_length,      sanitized_Request,      urlencode_postdata, @@ -62,6 +63,8 @@ class FacebookIE(InfoExtractor):              'ext': 'mp4',              'title': 're:Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam',              'uploader': 'Tennis on Facebook', +            'upload_date': '20140908', +            'timestamp': 1410199200,          }      }, {          'note': 'Video without discernible title', @@ -71,6 +74,8 @@ class FacebookIE(InfoExtractor):              'ext': 'mp4',              'title': 'Facebook video #274175099429670',              'uploader': 'Asif Nawab Butt', +            'upload_date': '20140506', +            'timestamp': 1399398998,          },          'expected_warnings': [              'title' @@ -78,12 +83,14 @@ class FacebookIE(InfoExtractor):      }, {          'note': 'Video with DASH manifest',          'url': 'https://www.facebook.com/video.php?v=957955867617029', -        'md5': '54706e4db4f5ad58fbad82dde1f1213f', +        'md5': 'b2c28d528273b323abe5c6ab59f0f030',          'info_dict': {              'id': '957955867617029',              'ext': 'mp4',              'title': 'When you post epic content on instagram.com/433 8 million followers, this is ...',              'uploader': 'Demy de Zeeuw', +            'upload_date': '20160110', +            'timestamp': 1452431627,          },      }, {          'url': 'https://www.facebook.com/maxlayn/posts/10153807558977570', @@ -306,12 +313,16 @@ class FacebookIE(InfoExtractor):          if not video_title:              video_title = 'Facebook video #%s' % video_id          uploader = clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage)) +        timestamp = int_or_none(self._search_regex( +            r'<abbr[^>]+data-utime=["\'](\d+)', webpage, +            'timestamp', default=None))          info_dict = {              'id': video_id,              'title': video_title,              'formats': formats,              'uploader': uploader, +            'timestamp': timestamp,          }          return webpage, info_dict @@ -340,3 +351,32 @@ class FacebookIE(InfoExtractor):                  self._VIDEO_PAGE_TEMPLATE % video_id,                  video_id, fatal_if_no_video=True)              return info_dict + + +class FacebookPluginsVideoIE(InfoExtractor): +    _VALID_URL = r'https?://(?:[\w-]+\.)?facebook\.com/plugins/video\.php\?.*?\bhref=(?P<id>https.+)' + +    _TESTS = [{ +        'url': 'https://www.facebook.com/plugins/video.php?href=https%3A%2F%2Fwww.facebook.com%2Fgov.sg%2Fvideos%2F10154383743583686%2F&show_text=0&width=560', +        'md5': '5954e92cdfe51fe5782ae9bda7058a07', +        'info_dict': { +            'id': '10154383743583686', +            'ext': 'mp4', +            'title': 'What to do during the haze?', +            'uploader': 'Gov.sg', +            'upload_date': '20160826', +            'timestamp': 1472184808, +        }, +        'add_ie': [FacebookIE.ie_key()], +    }, { +        'url': 'https://www.facebook.com/plugins/video.php?href=https%3A%2F%2Fwww.facebook.com%2Fvideo.php%3Fv%3D10204634152394104', +        'only_matching': True, +    }, { +        'url': 'https://www.facebook.com/plugins/video.php?href=https://www.facebook.com/gov.sg/videos/10154383743583686/&show_text=0&width=560', +        'only_matching': True, +    }] + +    def _real_extract(self, url): +        return self.url_result( +            compat_urllib_parse_unquote(self._match_id(url)), +            FacebookIE.ie_key()) diff --git a/youtube_dl/extractor/fc2.py b/youtube_dl/extractor/fc2.py index c7d69ff1f..c032d4d02 100644 --- a/youtube_dl/extractor/fc2.py +++ b/youtube_dl/extractor/fc2.py @@ -1,10 +1,12 @@ -#! -*- coding: utf-8 -*- +# coding: utf-8  from __future__ import unicode_literals  import hashlib +import re  from .common import InfoExtractor  from ..compat import ( +    compat_parse_qs,      compat_urllib_request,      compat_urlparse,  ) @@ -16,7 +18,7 @@ from ..utils import (  class FC2IE(InfoExtractor): -    _VALID_URL = r'^https?://video\.fc2\.com/(?:[^/]+/)*content/(?P<id>[^/]+)' +    _VALID_URL = r'^(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P<id>[^/]+)'      IE_NAME = 'fc2'      _NETRC_MACHINE = 'fc2'      _TESTS = [{ @@ -75,12 +77,17 @@ class FC2IE(InfoExtractor):      def _real_extract(self, url):          video_id = self._match_id(url)          self._login() -        webpage = self._download_webpage(url, video_id) -        self._downloader.cookiejar.clear_session_cookies()  # must clear -        self._login() - -        title = self._og_search_title(webpage) -        thumbnail = self._og_search_thumbnail(webpage) +        webpage = None +        if not url.startswith('fc2:'): +            webpage = self._download_webpage(url, video_id) +            self._downloader.cookiejar.clear_session_cookies()  # must clear +            self._login() + +        title = 'FC2 video %s' % video_id +        thumbnail = None +        if webpage is not None: +            title = self._og_search_title(webpage) +            thumbnail = self._og_search_thumbnail(webpage)          refer = url.replace('/content/', '/a/content/') if '/a/content/' not in url else url          mimi = hashlib.md5((video_id + '_gGddgPfeaf_gzyr').encode('utf-8')).hexdigest() @@ -113,3 +120,41 @@ class FC2IE(InfoExtractor):              'ext': 'flv',              'thumbnail': thumbnail,          } + + +class FC2EmbedIE(InfoExtractor): +    _VALID_URL = r'https?://video\.fc2\.com/flv2\.swf\?(?P<query>.+)' +    IE_NAME = 'fc2:embed' + +    _TEST = { +        'url': 'http://video.fc2.com/flv2.swf?t=201404182936758512407645&i=20130316kwishtfitaknmcgd76kjd864hso93htfjcnaogz629mcgfs6rbfk0hsycma7shkf85937cbchfygd74&i=201403223kCqB3Ez&d=2625&sj=11&lang=ja&rel=1&from=11&cmt=1&tk=TlRBM09EQTNNekU9&tl=プリズン・ブレイク%20S1-01%20マイケル%20【吹替】', +        'md5': 'b8aae5334cb691bdb1193a88a6ab5d5a', +        'info_dict': { +            'id': '201403223kCqB3Ez', +            'ext': 'flv', +            'title': 'プリズン・ブレイク S1-01 マイケル 【吹替】', +            'thumbnail': 're:^https?://.*\.jpg$', +        }, +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        query = compat_parse_qs(mobj.group('query')) + +        video_id = query['i'][-1] +        title = query.get('tl', ['FC2 video %s' % video_id])[0] + +        sj = query.get('sj', [None])[0] +        thumbnail = None +        if sj: +            # See thumbnailImagePath() in ServerConst.as of flv2.swf +            thumbnail = 'http://video%s-thumbnail.fc2.com/up/pic/%s.jpg' % ( +                sj, '/'.join((video_id[:6], video_id[6:8], video_id[-2], video_id[-1], video_id))) + +        return { +            '_type': 'url_transparent', +            'ie_key': FC2IE.ie_key(), +            'url': 'fc2:%s' % video_id, +            'title': title, +            'thumbnail': thumbnail, +        } diff --git a/youtube_dl/extractor/foxnews.py b/youtube_dl/extractor/foxnews.py index b04da2415..5c7acd795 100644 --- a/youtube_dl/extractor/foxnews.py +++ b/youtube_dl/extractor/foxnews.py @@ -3,11 +3,12 @@ from __future__ import unicode_literals  import re  from .amp import AMPIE +from .common import InfoExtractor  class FoxNewsIE(AMPIE):      IE_DESC = 'Fox News and Fox Business Video' -    _VALID_URL = r'https?://(?P<host>video\.fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)' +    _VALID_URL = r'https?://(?P<host>video\.(?:insider\.)?fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'      _TESTS = [          {              'url': 'http://video.foxnews.com/v/3937480/frozen-in-time/#sp=show-clips', @@ -49,6 +50,11 @@ class FoxNewsIE(AMPIE):              'url': 'http://video.foxbusiness.com/v/4442309889001',              'only_matching': True,          }, +        { +            # From http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words +            'url': 'http://video.insider.foxnews.com/v/video-embed.html?video_id=5099377331001&autoplay=true&share_url=http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words&share_title=Student%20Group:%20Saying%20%27Politically%20Correct,%27%20%27Trash%27%20and%20%27Lame%27%20Is%20Offensive&share=true', +            'only_matching': True, +        },      ]      def _real_extract(self, url): @@ -58,3 +64,43 @@ class FoxNewsIE(AMPIE):              'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id))          info['id'] = video_id          return info + + +class FoxNewsInsiderIE(InfoExtractor): +    _VALID_URL = r'https?://insider\.foxnews\.com/([^/]+/)+(?P<id>[a-z-]+)' +    IE_NAME = 'foxnews:insider' + +    _TEST = { +        'url': 'http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words', +        'md5': 'a10c755e582d28120c62749b4feb4c0c', +        'info_dict': { +            'id': '5099377331001', +            'display_id': 'univ-wisconsin-student-group-pushing-silence-certain-words', +            'ext': 'mp4', +            'title': 'Student Group: Saying \'Politically Correct,\' \'Trash\' and \'Lame\' Is Offensive', +            'description': 'Is campus censorship getting out of control?', +            'timestamp': 1472168725, +            'upload_date': '20160825', +            'thumbnail': 're:^https?://.*\.jpg$', +        }, +        'add_ie': [FoxNewsIE.ie_key()], +    } + +    def _real_extract(self, url): +        display_id = self._match_id(url) + +        webpage = self._download_webpage(url, display_id) + +        embed_url = self._html_search_meta('embedUrl', webpage, 'embed URL') + +        title = self._og_search_title(webpage) +        description = self._og_search_description(webpage) + +        return { +            '_type': 'url_transparent', +            'ie_key': FoxNewsIE.ie_key(), +            'url': embed_url, +            'display_id': display_id, +            'title': title, +            'description': description, +        } diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index c6e655c84..24b217715 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2243,11 +2243,11 @@ class GenericIE(InfoExtractor):          # Look for VODPlatform embeds          mobj = re.search( -            r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vod-platform\.net/embed/[^/?#]+)', +            r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vod-platform\.net/[eE]mbed/.+?)\1',              webpage)          if mobj is not None:              return self.url_result( -                self._proto_relative_url(unescapeHTML(mobj.group(1))), 'VODPlatform') +                self._proto_relative_url(unescapeHTML(mobj.group('url'))), 'VODPlatform')          # Look for Instagram embeds          instagram_embed_url = InstagramIE._extract_embed_url(webpage) diff --git a/youtube_dl/extractor/glide.py b/youtube_dl/extractor/glide.py index 62ff84835..f0d951396 100644 --- a/youtube_dl/extractor/glide.py +++ b/youtube_dl/extractor/glide.py @@ -2,7 +2,6 @@  from __future__ import unicode_literals  from .common import InfoExtractor -from ..utils import unified_strdate  class GlideIE(InfoExtractor): @@ -14,10 +13,8 @@ class GlideIE(InfoExtractor):          'info_dict': {              'id': 'UZF8zlmuQbe4mr+7dCiQ0w==',              'ext': 'mp4', -            'title': 'Damon Timm\'s Glide message', +            'title': "Damon's Glide message",              'thumbnail': 're:^https?://.*?\.cloudfront\.net/.*\.jpg$', -            'uploader': 'Damon Timm', -            'upload_date': '20140919',          }      } @@ -27,7 +24,8 @@ class GlideIE(InfoExtractor):          webpage = self._download_webpage(url, video_id)          title = self._html_search_regex( -            r'<title>(.+?)</title>', webpage, 'title') +            r'<title>(.+?)</title>', webpage, +            'title', default=None) or self._og_search_title(webpage)          video_url = self._proto_relative_url(self._search_regex(              r'<source[^>]+src=(["\'])(?P<url>.+?)\1',              webpage, 'video URL', default=None, @@ -36,18 +34,10 @@ class GlideIE(InfoExtractor):              r'<img[^>]+id=["\']video-thumbnail["\'][^>]+src=(["\'])(?P<url>.+?)\1',              webpage, 'thumbnail url', default=None,              group='url')) or self._og_search_thumbnail(webpage) -        uploader = self._search_regex( -            r'<div[^>]+class=["\']info-name["\'][^>]*>([^<]+)', -            webpage, 'uploader', fatal=False) -        upload_date = unified_strdate(self._search_regex( -            r'<div[^>]+class="info-date"[^>]*>([^<]+)', -            webpage, 'upload date', fatal=False))          return {              'id': video_id,              'title': title,              'url': video_url,              'thumbnail': thumbnail, -            'uploader': uploader, -            'upload_date': upload_date,          } diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py new file mode 100644 index 000000000..6a437c54d --- /dev/null +++ b/youtube_dl/extractor/go.py @@ -0,0 +1,101 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( +    int_or_none, +    determine_ext, +    parse_age_limit, +) + + +class GoIE(InfoExtractor): +    _BRANDS = { +        'abc': '001', +        'freeform': '002', +        'watchdisneychannel': '004', +        'watchdisneyjunior': '008', +        'watchdisneyxd': '009', +    } +    _VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/.*?vdka(?P<id>\w+)' % '|'.join(_BRANDS.keys()) +    _TESTS = [{ +        'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx', +        'info_dict': { +            'id': '0_g86w5onx', +            'ext': 'mp4', +            'title': 'Sneak Peek: Language Arts', +            'description': 'md5:7dcdab3b2d17e5217c953256af964e9c', +        }, +        'params': { +            # m3u8 download +            'skip_download': True, +        }, +    }, { +        'url': 'http://abc.go.com/shows/after-paradise/video/most-recent/vdka3335601', +        'only_matching': True, +    }] + +    def _real_extract(self, url): +        sub_domain, video_id = re.match(self._VALID_URL, url).groups() +        video_data = self._download_json( +            'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/-1/-1/%s/-1/-1.json' % (self._BRANDS[sub_domain], video_id), +            video_id)['video'][0] +        title = video_data['title'] + +        formats = [] +        for asset in video_data.get('assets', {}).get('asset', []): +            asset_url = asset.get('value') +            if not asset_url: +                continue +            format_id = asset.get('format') +            ext = determine_ext(asset_url) +            if ext == 'm3u8': +                formats.extend(self._extract_m3u8_formats( +                    asset_url, video_id, 'mp4', m3u8_id=format_id or 'hls', fatal=False)) +            else: +                formats.append({ +                    'format_id': format_id, +                    'url': asset_url, +                    'ext': ext, +                }) +        self._sort_formats(formats) + +        subtitles = {} +        for cc in video_data.get('closedcaption', {}).get('src', []): +            cc_url = cc.get('value') +            if not cc_url: +                continue +            ext = determine_ext(cc_url) +            if ext == 'xml': +                ext = 'ttml' +            subtitles.setdefault(cc.get('lang'), []).append({ +                'url': cc_url, +                'ext': ext, +            }) + +        thumbnails = [] +        for thumbnail in video_data.get('thumbnails', {}).get('thumbnail', []): +            thumbnail_url = thumbnail.get('value') +            if not thumbnail_url: +                continue +            thumbnails.append({ +                'url': thumbnail_url, +                'width': int_or_none(thumbnail.get('width')), +                'height': int_or_none(thumbnail.get('height')), +            }) + +        return { +            'id': video_id, +            'title': title, +            'description': video_data.get('longdescription') or video_data.get('description'), +            'duration': int_or_none(video_data.get('duration', {}).get('value'), 1000), +            'age_limit': parse_age_limit(video_data.get('tvrating', {}).get('rating')), +            'episode_number': int_or_none(video_data.get('episodenumber')), +            'series': video_data.get('show', {}).get('title'), +            'season_number': int_or_none(video_data.get('season', {}).get('num')), +            'thumbnails': thumbnails, +            'formats': formats, +            'subtitles': subtitles, +        } diff --git a/youtube_dl/extractor/internetvideoarchive.py b/youtube_dl/extractor/internetvideoarchive.py index 45add007f..76cc5ec3e 100644 --- a/youtube_dl/extractor/internetvideoarchive.py +++ b/youtube_dl/extractor/internetvideoarchive.py @@ -48,13 +48,23 @@ class InternetVideoArchiveIE(InfoExtractor):              # There are multiple videos in the playlist whlie only the first one              # matches the video played in browsers              video_info = configuration['playlist'][0] +            title = video_info['title']              formats = []              for source in video_info['sources']:                  file_url = source['file']                  if determine_ext(file_url) == 'm3u8': -                    formats.extend(self._extract_m3u8_formats( -                        file_url, video_id, ext='mp4', m3u8_id='hls')) +                    m3u8_formats = self._extract_m3u8_formats( +                        file_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) +                    if m3u8_formats: +                        formats.extend(m3u8_formats) +                        file_url = m3u8_formats[0]['url'] +                        formats.extend(self._extract_f4m_formats( +                            file_url.replace('.m3u8', '.f4m'), +                            video_id, f4m_id='hds', fatal=False)) +                        formats.extend(self._extract_mpd_formats( +                            file_url.replace('.m3u8', '.mpd'), +                            video_id, mpd_id='dash', fatal=False))                  else:                      a_format = {                          'url': file_url, @@ -70,7 +80,6 @@ class InternetVideoArchiveIE(InfoExtractor):              self._sort_formats(formats) -            title = video_info['title']              description = video_info.get('description')              thumbnail = video_info.get('image')          else: diff --git a/youtube_dl/extractor/kusi.py b/youtube_dl/extractor/kusi.py index 12cc56e44..2e66e8cf9 100644 --- a/youtube_dl/extractor/kusi.py +++ b/youtube_dl/extractor/kusi.py @@ -18,31 +18,20 @@ from ..utils import (  class KUSIIE(InfoExtractor):      _VALID_URL = r'https?://(?:www\.)?kusi\.com/(?P<path>story/.+|video\?clipId=(?P<clipId>\d+))'      _TESTS = [{ -        'url': 'http://www.kusi.com/story/31183873/turko-files-case-closed-put-on-hold', -        'md5': 'f926e7684294cf8cb7bdf8858e1b3988', +        'url': 'http://www.kusi.com/story/32849881/turko-files-refused-to-help-it-aint-right', +        'md5': '4e76ce8e53660ce9697d06c0ba6fc47d',          'info_dict': { -            'id': '12203019', +            'id': '12689020',              'ext': 'mp4', -            'title': 'Turko Files: Case Closed! & Put On Hold!', -            'duration': 231.0, -            'upload_date': '20160210', -            'timestamp': 1455087571, +            'title': "Turko Files: Refused to Help, It Ain't Right!", +            'duration': 223.586, +            'upload_date': '20160826', +            'timestamp': 1472233118,              'thumbnail': 're:^https?://.*\.jpg$'          },      }, {          'url': 'http://kusi.com/video?clipId=12203019', -        'info_dict': { -            'id': '12203019', -            'ext': 'mp4', -            'title': 'Turko Files: Case Closed! & Put On Hold!', -            'duration': 231.0, -            'upload_date': '20160210', -            'timestamp': 1455087571, -            'thumbnail': 're:^https?://.*\.jpg$' -        }, -        'params': { -            'skip_download': True,  # Same as previous one -        }, +        'only_matching': True,      }]      def _real_extract(self, url): diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py index a425bafe3..6752ffee2 100644 --- a/youtube_dl/extractor/limelight.py +++ b/youtube_dl/extractor/limelight.py @@ -34,11 +34,12 @@ class LimelightBaseIE(InfoExtractor):      def _extract_info(self, streams, mobile_urls, properties):          video_id = properties['media_id']          formats = [] - +        urls = []          for stream in streams:              stream_url = stream.get('url') -            if not stream_url or stream.get('drmProtected'): +            if not stream_url or stream.get('drmProtected') or stream_url in urls:                  continue +            urls.append(stream_url)              ext = determine_ext(stream_url)              if ext == 'f4m':                  formats.extend(self._extract_f4m_formats( @@ -58,9 +59,11 @@ class LimelightBaseIE(InfoExtractor):                      format_id = 'rtmp'                      if stream.get('videoBitRate'):                          format_id += '-%d' % int_or_none(stream['videoBitRate']) +                    http_url = 'http://%s/%s' % (rtmp.group('host').replace('csl.', 'cpl.'), rtmp.group('playpath')[4:]) +                    urls.append(http_url)                      http_fmt = fmt.copy()                      http_fmt.update({ -                        'url': 'http://%s/%s' % (rtmp.group('host').replace('csl.', 'cpl.'), rtmp.group('playpath')[4:]), +                        'url': http_url,                          'format_id': format_id.replace('rtmp', 'http'),                      })                      formats.append(http_fmt) @@ -76,8 +79,9 @@ class LimelightBaseIE(InfoExtractor):          for mobile_url in mobile_urls:              media_url = mobile_url.get('mobileUrl')              format_id = mobile_url.get('targetMediaPlatform') -            if not media_url or format_id == 'Widevine': +            if not media_url or format_id in ('Widevine', 'SmoothStreaming') or media_url in urls:                  continue +            urls.append(media_url)              ext = determine_ext(media_url)              if ext == 'm3u8':                  formats.extend(self._extract_m3u8_formats( diff --git a/youtube_dl/extractor/ssa.py b/youtube_dl/extractor/movingimage.py index 54d1843f2..bb789c32e 100644 --- a/youtube_dl/extractor/ssa.py +++ b/youtube_dl/extractor/movingimage.py @@ -7,22 +7,19 @@ from ..utils import (  ) -class SSAIE(InfoExtractor): -    _VALID_URL = r'https?://ssa\.nls\.uk/film/(?P<id>\d+)' +class MovingImageIE(InfoExtractor): +    _VALID_URL = r'https?://movingimage\.nls\.uk/film/(?P<id>\d+)'      _TEST = { -        'url': 'http://ssa.nls.uk/film/3561', +        'url': 'http://movingimage.nls.uk/film/3561', +        'md5': '4caa05c2b38453e6f862197571a7be2f',          'info_dict': {              'id': '3561', -            'ext': 'flv', +            'ext': 'mp4',              'title': 'SHETLAND WOOL',              'description': 'md5:c5afca6871ad59b4271e7704fe50ab04',              'duration': 900,              'thumbnail': 're:^https?://.*\.jpg$',          }, -        'params': { -            # rtmp download -            'skip_download': True, -        },      }      def _real_extract(self, url): @@ -30,10 +27,9 @@ class SSAIE(InfoExtractor):          webpage = self._download_webpage(url, video_id) -        streamer = self._search_regex( -            r"'streamer'\s*,\S*'(rtmp[^']+)'", webpage, 'streamer') -        play_path = self._search_regex( -            r"'file'\s*,\s*'([^']+)'", webpage, 'file').rpartition('.')[0] +        formats = self._extract_m3u8_formats( +            self._html_search_regex(r'file\s*:\s*"([^"]+)"', webpage, 'm3u8 manifest URL'), +            video_id, ext='mp4', entry_protocol='m3u8_native')          def search_field(field_name, fatal=False):              return self._search_regex( @@ -44,13 +40,11 @@ class SSAIE(InfoExtractor):          description = unescapeHTML(search_field('Description'))          duration = parse_duration(search_field('Running time'))          thumbnail = self._search_regex( -            r"'image'\s*,\s*'([^']+)'", webpage, 'thumbnails', fatal=False) +            r"image\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False)          return {              'id': video_id, -            'url': streamer, -            'play_path': play_path, -            'ext': 'flv', +            'formats': formats,              'title': title,              'description': description,              'duration': duration, diff --git a/youtube_dl/extractor/myvidster.py b/youtube_dl/extractor/myvidster.py index 731c24542..2117d302d 100644 --- a/youtube_dl/extractor/myvidster.py +++ b/youtube_dl/extractor/myvidster.py @@ -13,7 +13,7 @@ class MyVidsterIE(InfoExtractor):              'id': '3685814',              'title': 'md5:7d8427d6d02c4fbcef50fe269980c749',              'upload_date': '20141027', -            'uploader_id': 'utkualp', +            'uploader': 'utkualp',              'ext': 'mp4',              'age_limit': 18,          }, diff --git a/youtube_dl/extractor/nba.py b/youtube_dl/extractor/nba.py index aabd5b670..53561961c 100644 --- a/youtube_dl/extractor/nba.py +++ b/youtube_dl/extractor/nba.py @@ -39,18 +39,19 @@ class NBAIE(TurnerBaseIE):          'url': 'http://watch.nba.com/video/channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba',          'md5': 'b2b39b81cf28615ae0c3360a3f9668c4',          'info_dict': { -            'id': '0041400301-cle-atl-recap', +            'id': 'channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba',              'ext': 'mp4',              'title': 'Hawks vs. Cavaliers Game 1',              'description': 'md5:8094c3498d35a9bd6b1a8c396a071b4d',              'duration': 228,              'timestamp': 1432134543,              'upload_date': '20150520', -        } +        }, +        'expected_warnings': ['Unable to download f4m manifest'],      }, {          'url': 'http://www.nba.com/clippers/news/doc-rivers-were-not-trading-blake',          'info_dict': { -            'id': '1455672027478-Doc_Feb16_720', +            'id': 'teams/clippers/2016/02/17/1455672027478-Doc_Feb16_720.mov-297324',              'ext': 'mp4',              'title': 'Practice: Doc Rivers - 2/16/16',              'description': 'Head Coach Doc Rivers addresses the media following practice.', @@ -61,6 +62,7 @@ class NBAIE(TurnerBaseIE):              # m3u8 download              'skip_download': True,          }, +        'expected_warnings': ['Unable to download f4m manifest'],      }, {          'url': 'http://www.nba.com/timberwolves/wiggins-shootaround#',          'info_dict': { @@ -75,7 +77,7 @@ class NBAIE(TurnerBaseIE):      }, {          'url': 'http://www.nba.com/timberwolves/wiggins-shootaround#',          'info_dict': { -            'id': 'Wigginsmp4-3462601', +            'id': 'teams/timberwolves/2014/12/12/Wigginsmp4-3462601',              'ext': 'mp4',              'title': 'Shootaround Access - Dec. 12 | Andrew Wiggins',              'description': 'Wolves rookie Andrew Wiggins addresses the media after Friday\'s shootaround.', @@ -87,6 +89,7 @@ class NBAIE(TurnerBaseIE):              # m3u8 download              'skip_download': True,          }, +        'expected_warnings': ['Unable to download f4m manifest'],      }]      _PAGE_SIZE = 30 diff --git a/youtube_dl/extractor/ninecninemedia.py b/youtube_dl/extractor/ninecninemedia.py index d889245ad..ec4d675e2 100644 --- a/youtube_dl/extractor/ninecninemedia.py +++ b/youtube_dl/extractor/ninecninemedia.py @@ -4,40 +4,36 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor +from ..compat import compat_str  from ..utils import (      parse_iso8601, -    parse_duration, -    ExtractorError +    float_or_none, +    ExtractorError, +    int_or_none,  ) -class NineCNineMediaIE(InfoExtractor): -    _VALID_URL = r'9c9media:(?P<destination_code>[^:]+):(?P<id>\d+)' +class NineCNineMediaBaseIE(InfoExtractor): +    _API_BASE_TEMPLATE = 'http://capi.9c9media.com/destinations/%s/platforms/desktop/contents/%s/' + + +class NineCNineMediaStackIE(NineCNineMediaBaseIE): +    IE_NAME = '9c9media:stack' +    _VALID_URL = r'9c9media:stack:(?P<destination_code>[^:]+):(?P<content_id>\d+):(?P<content_package>\d+):(?P<id>\d+)'      def _real_extract(self, url): -        destination_code, video_id = re.match(self._VALID_URL, url).groups() -        api_base_url = 'http://capi.9c9media.com/destinations/%s/platforms/desktop/contents/%s/' % (destination_code, video_id) -        content = self._download_json(api_base_url, video_id, query={ -            '$include': '[contentpackages]', -        }) -        title = content['Name'] -        if len(content['ContentPackages']) > 1: -            raise ExtractorError('multiple content packages') -        content_package = content['ContentPackages'][0] -        stacks_base_url = api_base_url + 'contentpackages/%s/stacks/' % content_package['Id'] -        stacks = self._download_json(stacks_base_url, video_id)['Items'] -        if len(stacks) > 1: -            raise ExtractorError('multiple stacks') -        stack = stacks[0] -        stack_base_url = '%s%s/manifest.' % (stacks_base_url, stack['Id']) +        destination_code, content_id, package_id, stack_id = re.match(self._VALID_URL, url).groups() +        stack_base_url_template = self._API_BASE_TEMPLATE + 'contentpackages/%s/stacks/%s/manifest.' +        stack_base_url = stack_base_url_template % (destination_code, content_id, package_id, stack_id) +          formats = []          formats.extend(self._extract_m3u8_formats( -            stack_base_url + 'm3u8', video_id, 'mp4', +            stack_base_url + 'm3u8', stack_id, 'mp4',              'm3u8_native', m3u8_id='hls', fatal=False))          formats.extend(self._extract_f4m_formats( -            stack_base_url + 'f4m', video_id, +            stack_base_url + 'f4m', stack_id,              f4m_id='hds', fatal=False)) -        mp4_url = self._download_webpage(stack_base_url + 'pd', video_id, fatal=False) +        mp4_url = self._download_webpage(stack_base_url + 'pd', stack_id, fatal=False)          if mp4_url:              formats.append({                  'url': mp4_url, @@ -46,10 +42,86 @@ class NineCNineMediaIE(InfoExtractor):          self._sort_formats(formats)          return { -            'id': video_id, -            'title': title, -            'description': content.get('Desc') or content.get('ShortDesc'), -            'timestamp': parse_iso8601(content.get('BroadcastDateTime')), -            'duration': parse_duration(content.get('BroadcastTime')), +            'id': stack_id,              'formats': formats,          } + + +class NineCNineMediaIE(NineCNineMediaBaseIE): +    IE_NAME = '9c9media' +    _VALID_URL = r'9c9media:(?P<destination_code>[^:]+):(?P<id>\d+)' + +    def _real_extract(self, url): +        destination_code, content_id = re.match(self._VALID_URL, url).groups() +        api_base_url = self._API_BASE_TEMPLATE % (destination_code, content_id) +        content = self._download_json(api_base_url, content_id, query={ +            '$include': '[Media,Season,ContentPackages]', +        }) +        title = content['Name'] +        if len(content['ContentPackages']) > 1: +            raise ExtractorError('multiple content packages') +        content_package = content['ContentPackages'][0] +        package_id = content_package['Id'] +        content_package_url = api_base_url + 'contentpackages/%s/' % package_id +        content_package = self._download_json(content_package_url, content_id) + +        if content_package.get('Constraints', {}).get('Security', {}).get('Type') == 'adobe-drm': +            raise ExtractorError('This video is DRM protected.', expected=True) + +        stacks = self._download_json(content_package_url + 'stacks/', package_id)['Items'] +        multistacks = len(stacks) > 1 + +        thumbnails = [] +        for image in content.get('Images', []): +            image_url = image.get('Url') +            if not image_url: +                continue +            thumbnails.append({ +                'url': image_url, +                'width': int_or_none(image.get('Width')), +                'height': int_or_none(image.get('Height')), +            }) + +        tags, categories = [], [] +        for source_name, container in (('Tags', tags), ('Genres', categories)): +            for e in content.get(source_name, []): +                e_name = e.get('Name') +                if not e_name: +                    continue +                container.append(e_name) + +        description = content.get('Desc') or content.get('ShortDesc') +        season = content.get('Season', {}) +        base_info = { +            'description': description, +            'timestamp': parse_iso8601(content.get('BroadcastDateTime')), +            'episode_number': int_or_none(content.get('Episode')), +            'season': season.get('Name'), +            'season_number': season.get('Number'), +            'season_id': season.get('Id'), +            'series': content.get('Media', {}).get('Name'), +            'tags': tags, +            'categories': categories, +        } + +        entries = [] +        for stack in stacks: +            stack_id = compat_str(stack['Id']) +            entry = { +                '_type': 'url_transparent', +                'url': '9c9media:stack:%s:%s:%s:%s' % (destination_code, content_id, package_id, stack_id), +                'id': stack_id, +                'title': '%s_part%s' % (title, stack['Name']) if multistacks else title, +                'duration': float_or_none(stack.get('Duration')), +                'ie_key': 'NineCNineMediaStack', +            } +            entry.update(base_info) +            entries.append(entry) + +        return { +            '_type': 'multi_video', +            'id': content_id, +            'title': title, +            'description': description, +            'entries': entries, +        } diff --git a/youtube_dl/extractor/nytimes.py b/youtube_dl/extractor/nytimes.py index 681683e86..142c34256 100644 --- a/youtube_dl/extractor/nytimes.py +++ b/youtube_dl/extractor/nytimes.py @@ -1,26 +1,37 @@  from __future__ import unicode_literals +import hmac +import hashlib +import base64 +  from .common import InfoExtractor  from ..utils import (      float_or_none,      int_or_none,      parse_iso8601, +    mimetype2ext, +    determine_ext,  )  class NYTimesBaseIE(InfoExtractor): +    _SECRET = b'pX(2MbU2);4N{7J8)>YwKRJ+/pQ3JkiU2Q^V>mFYv6g6gYvt6v' +      def _extract_video_from_id(self, video_id): -        video_data = self._download_json( -            'http://www.nytimes.com/svc/video/api/v2/video/%s' % video_id, -            video_id, 'Downloading video JSON') +        # Authorization generation algorithm is reverse engineered from `signer` in +        # http://graphics8.nytimes.com/video/vhs/vhs-2.x.min.js +        path = '/svc/video/api/v3/video/' + video_id +        hm = hmac.new(self._SECRET, (path + ':vhs').encode(), hashlib.sha512).hexdigest() +        video_data = self._download_json('http://www.nytimes.com' + path, video_id, 'Downloading video JSON', headers={ +            'Authorization': 'NYTV ' + base64.b64encode(hm.encode()).decode(), +            'X-NYTV': 'vhs', +        }, fatal=False) +        if not video_data: +            video_data = self._download_json( +                'http://www.nytimes.com/svc/video/api/v2/video/' + video_id, +                video_id, 'Downloading video JSON')          title = video_data['headline'] -        description = video_data.get('summary') -        duration = float_or_none(video_data.get('duration'), 1000) - -        uploader = video_data.get('byline') -        publication_date = video_data.get('publication_date') -        timestamp = parse_iso8601(publication_date[:-8]) if publication_date else None          def get_file_size(file_size):              if isinstance(file_size, int): @@ -28,35 +39,59 @@ class NYTimesBaseIE(InfoExtractor):              elif isinstance(file_size, dict):                  return int(file_size.get('value', 0))              else: -                return 0 - -        formats = [ -            { -                'url': video['url'], -                'format_id': video.get('type'), -                'vcodec': video.get('video_codec'), -                'width': int_or_none(video.get('width')), -                'height': int_or_none(video.get('height')), -                'filesize': get_file_size(video.get('fileSize')), -            } for video in video_data['renditions'] if video.get('url') -        ] +                return None + +        urls = [] +        formats = [] +        for video in video_data.get('renditions', []): +            video_url = video.get('url') +            format_id = video.get('type') +            if not video_url or format_id == 'thumbs' or video_url in urls: +                continue +            urls.append(video_url) +            ext = mimetype2ext(video.get('mimetype')) or determine_ext(video_url) +            if ext == 'm3u8': +                formats.extend(self._extract_m3u8_formats( +                    video_url, video_id, 'mp4', 'm3u8_native', +                    m3u8_id=format_id or 'hls', fatal=False)) +            elif ext == 'mpd': +                continue +            #     formats.extend(self._extract_mpd_formats( +            #         video_url, video_id, format_id or 'dash', fatal=False)) +            else: +                formats.append({ +                    'url': video_url, +                    'format_id': format_id, +                    'vcodec': video.get('videoencoding') or video.get('video_codec'), +                    'width': int_or_none(video.get('width')), +                    'height': int_or_none(video.get('height')), +                    'filesize': get_file_size(video.get('file_size') or video.get('fileSize')), +                    'tbr': int_or_none(video.get('bitrate'), 1000), +                    'ext': ext, +                })          self._sort_formats(formats) -        thumbnails = [ -            { -                'url': 'http://www.nytimes.com/%s' % image['url'], +        thumbnails = [] +        for image in video_data.get('images', []): +            image_url = image.get('url') +            if not image_url: +                continue +            thumbnails.append({ +                'url': 'http://www.nytimes.com/' + image_url,                  'width': int_or_none(image.get('width')),                  'height': int_or_none(image.get('height')), -            } for image in video_data.get('images', []) if image.get('url') -        ] +            }) + +        publication_date = video_data.get('publication_date') +        timestamp = parse_iso8601(publication_date[:-8]) if publication_date else None          return {              'id': video_id,              'title': title, -            'description': description, +            'description': video_data.get('summary'),              'timestamp': timestamp, -            'uploader': uploader, -            'duration': duration, +            'uploader': video_data.get('byline'), +            'duration': float_or_none(video_data.get('duration'), 1000),              'formats': formats,              'thumbnails': thumbnails,          } @@ -67,7 +102,7 @@ class NYTimesIE(NYTimesBaseIE):      _TESTS = [{          'url': 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263', -        'md5': '18a525a510f942ada2720db5f31644c0', +        'md5': 'd665342765db043f7e225cff19df0f2d',          'info_dict': {              'id': '100000002847155',              'ext': 'mov', diff --git a/youtube_dl/extractor/porncom.py b/youtube_dl/extractor/porncom.py index 4baf79688..d85e0294d 100644 --- a/youtube_dl/extractor/porncom.py +++ b/youtube_dl/extractor/porncom.py @@ -26,6 +26,8 @@ class PornComIE(InfoExtractor):              'duration': 551,              'view_count': int,              'age_limit': 18, +            'categories': list, +            'tags': list,          },      }, {          'url': 'http://se.porn.com/videos/marsha-may-rides-seth-on-top-of-his-thick-cock-2658067', @@ -75,7 +77,14 @@ class PornComIE(InfoExtractor):          self._sort_formats(formats)          view_count = str_to_int(self._search_regex( -            r'class=["\']views["\'][^>]*><p>([\d,.]+)', webpage, 'view count')) +            r'class=["\']views["\'][^>]*><p>([\d,.]+)', webpage, +            'view count', fatal=False)) + +        def extract_list(kind): +            s = self._search_regex( +                r'(?s)<p[^>]*>%s:(.+?)</p>' % kind.capitalize(), +                webpage, kind, fatal=False) +            return re.findall(r'<a[^>]+>([^<]+)</a>', s or '')          return {              'id': video_id, @@ -86,4 +95,6 @@ class PornComIE(InfoExtractor):              'view_count': view_count,              'formats': formats,              'age_limit': 18, +            'categories': extract_list('categories'), +            'tags': extract_list('tags'),          } diff --git a/youtube_dl/extractor/pornovoisines.py b/youtube_dl/extractor/pornovoisines.py index 6b51e5c54..58f557e39 100644 --- a/youtube_dl/extractor/pornovoisines.py +++ b/youtube_dl/extractor/pornovoisines.py @@ -2,7 +2,6 @@  from __future__ import unicode_literals  import re -import random  from .common import InfoExtractor  from ..utils import ( @@ -13,61 +12,69 @@ from ..utils import (  class PornoVoisinesIE(InfoExtractor): -    _VALID_URL = r'https?://(?:www\.)?pornovoisines\.com/showvideo/(?P<id>\d+)/(?P<display_id>[^/]+)' - -    _VIDEO_URL_TEMPLATE = 'http://stream%d.pornovoisines.com' \ -        '/static/media/video/transcoded/%s-640x360-1000-trscded.mp4' - -    _SERVER_NUMBERS = (1, 2) +    _VALID_URL = r'https?://(?:www\.)?pornovoisines\.com/videos/show/(?P<id>\d+)/(?P<display_id>[^/.]+)'      _TEST = { -        'url': 'http://www.pornovoisines.com/showvideo/1285/recherche-appartement/', -        'md5': '5ac670803bc12e9e7f9f662ce64cf1d1', +        'url': 'http://www.pornovoisines.com/videos/show/919/recherche-appartement.html', +        'md5': '6f8aca6a058592ab49fe701c8ba8317b',          'info_dict': { -            'id': '1285', +            'id': '919',              'display_id': 'recherche-appartement',              'ext': 'mp4',              'title': 'Recherche appartement', -            'description': 'md5:819ea0b785e2a04667a1a01cdc89594e', +            'description': 'md5:fe10cb92ae2dd3ed94bb4080d11ff493',              'thumbnail': 're:^https?://.*\.jpg$',              'upload_date': '20140925',              'duration': 120,              'view_count': int,              'average_rating': float, -            'categories': ['Débutantes', 'Scénario', 'Sodomie'], +            'categories': ['Débutante', 'Débutantes', 'Scénario', 'Sodomie'],              'age_limit': 18, +            'subtitles': { +                'fr': [{ +                    'ext': 'vtt', +                }] +            },          }      } -    @classmethod -    def build_video_url(cls, num): -        return cls._VIDEO_URL_TEMPLATE % (random.choice(cls._SERVER_NUMBERS), num) -      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url)          video_id = mobj.group('id')          display_id = mobj.group('display_id') -        webpage = self._download_webpage(url, video_id) +        settings_url = self._download_json( +            'http://www.pornovoisines.com/api/video/%s/getsettingsurl/' % video_id, +            video_id, note='Getting settings URL')['video_settings_url'] +        settings = self._download_json(settings_url, video_id)['data'] + +        formats = [] +        for kind, data in settings['variants'].items(): +            if kind == 'HLS': +                formats.extend(self._extract_m3u8_formats( +                    data, video_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls')) +            elif kind == 'MP4': +                for item in data: +                    formats.append({ +                        'url': item['url'], +                        'height': item.get('height'), +                        'bitrate': item.get('bitrate'), +                    }) +        self._sort_formats(formats) -        video_url = self.build_video_url(video_id) +        webpage = self._download_webpage(url, video_id) -        title = self._html_search_regex( -            r'<h1>(.+?)</h1>', webpage, 'title', flags=re.DOTALL) -        description = self._html_search_regex( -            r'<article id="descriptif">(.+?)</article>', -            webpage, 'description', fatal=False, flags=re.DOTALL) +        title = self._og_search_title(webpage) +        description = self._og_search_description(webpage) -        thumbnail = self._search_regex( -            r'<div id="mediaspace%s">\s*<img src="/?([^"]+)"' % video_id, -            webpage, 'thumbnail', fatal=False) -        if thumbnail: -            thumbnail = 'http://www.pornovoisines.com/%s' % thumbnail +        # The webpage has a bug - there's no space between "thumb" and src= +        thumbnail = self._html_search_regex( +            r'<img[^>]+class=([\'"])thumb\1[^>]*src=([\'"])(?P<url>[^"]+)\2', +            webpage, 'thumbnail', fatal=False, group='url')          upload_date = unified_strdate(self._search_regex( -            r'Publié le ([\d-]+)', webpage, 'upload date', fatal=False)) -        duration = int_or_none(self._search_regex( -            'Durée (\d+)', webpage, 'duration', fatal=False)) +            r'Le\s*<b>([\d/]+)', webpage, 'upload date', fatal=False)) +        duration = settings.get('main', {}).get('duration')          view_count = int_or_none(self._search_regex(              r'(\d+) vues', webpage, 'view count', fatal=False))          average_rating = self._search_regex( @@ -75,15 +82,19 @@ class PornoVoisinesIE(InfoExtractor):          if average_rating:              average_rating = float_or_none(average_rating.replace(',', '.')) -        categories = self._html_search_meta( -            'keywords', webpage, 'categories', fatal=False) +        categories = self._html_search_regex( +            r'(?s)Catégories\s*:\s*<b>(.+?)</b>', webpage, 'categories', fatal=False)          if categories:              categories = [category.strip() for category in categories.split(',')] +        subtitles = {'fr': [{ +            'url': subtitle, +        } for subtitle in settings.get('main', {}).get('vtt_tracks', {}).values()]} +          return {              'id': video_id,              'display_id': display_id, -            'url': video_url, +            'formats': formats,              'title': title,              'description': description,              'thumbnail': thumbnail, @@ -93,4 +104,5 @@ class PornoVoisinesIE(InfoExtractor):              'average_rating': average_rating,              'categories': categories,              'age_limit': 18, +            'subtitles': subtitles,          } diff --git a/youtube_dl/extractor/pyvideo.py b/youtube_dl/extractor/pyvideo.py index cc0416cb8..b8ac93a62 100644 --- a/youtube_dl/extractor/pyvideo.py +++ b/youtube_dl/extractor/pyvideo.py @@ -1,59 +1,72 @@  from __future__ import unicode_literals  import re -import os  from .common import InfoExtractor +from ..compat import compat_str +from ..utils import int_or_none  class PyvideoIE(InfoExtractor): -    _VALID_URL = r'https?://(?:www\.)?pyvideo\.org/video/(?P<id>\d+)/(.*)' - -    _TESTS = [ -        { -            'url': 'http://pyvideo.org/video/1737/become-a-logging-expert-in-30-minutes', -            'md5': '520915673e53a5c5d487c36e0c4d85b5', -            'info_dict': { -                'id': '24_4WWkSmNo', -                'ext': 'webm', -                'title': 'Become a logging expert in 30 minutes', -                'description': 'md5:9665350d466c67fb5b1598de379021f7', -                'upload_date': '20130320', -                'uploader': 'Next Day Video', -                'uploader_id': 'NextDayVideo', -            }, -            'add_ie': ['Youtube'], +    _VALID_URL = r'https?://(?:www\.)?pyvideo\.org/(?P<category>[^/]+)/(?P<id>[^/?#&.]+)' + +    _TESTS = [{ +        'url': 'http://pyvideo.org/pycon-us-2013/become-a-logging-expert-in-30-minutes.html', +        'info_dict': { +            'id': 'become-a-logging-expert-in-30-minutes',          }, -        { -            'url': 'http://pyvideo.org/video/2542/gloriajw-spotifywitherikbernhardsson182m4v', -            'md5': '5fe1c7e0a8aa5570330784c847ff6d12', -            'info_dict': { -                'id': '2542', -                'ext': 'm4v', -                'title': 'Gloriajw-SpotifyWithErikBernhardsson182', -            }, +        'playlist_count': 2, +    }, { +        'url': 'http://pyvideo.org/pygotham-2012/gloriajw-spotifywitherikbernhardsson182m4v.html', +        'md5': '5fe1c7e0a8aa5570330784c847ff6d12', +        'info_dict': { +            'id': '2542', +            'ext': 'm4v', +            'title': 'Gloriajw-SpotifyWithErikBernhardsson182.m4v',          }, -    ] +    }]      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url) +        category = mobj.group('category')          video_id = mobj.group('id') -        webpage = self._download_webpage(url, video_id) +        entries = [] -        m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', webpage) -        if m_youtube is not None: -            return self.url_result(m_youtube.group(1), 'Youtube') +        data = self._download_json( +            'https://raw.githubusercontent.com/pyvideo/data/master/%s/videos/%s.json' +            % (category, video_id), video_id, fatal=False) -        title = self._html_search_regex( -            r'<div class="section">\s*<h3(?:\s+class="[^"]*"[^>]*)?>([^>]+?)</h3>', -            webpage, 'title', flags=re.DOTALL) -        video_url = self._search_regex( -            [r'<source src="(.*?)"', r'<dt>Download</dt>.*?<a href="(.+?)"'], -            webpage, 'video url', flags=re.DOTALL) +        if data: +            for video in data['videos']: +                video_url = video.get('url') +                if video_url: +                    if video.get('type') == 'youtube': +                        entries.append(self.url_result(video_url, 'Youtube')) +                    else: +                        entries.append({ +                            'id': compat_str(data.get('id') or video_id), +                            'url': video_url, +                            'title': data['title'], +                            'description': data.get('description') or data.get('summary'), +                            'thumbnail': data.get('thumbnail_url'), +                            'duration': int_or_none(data.get('duration')), +                        }) +        else: +            webpage = self._download_webpage(url, video_id) +            title = self._og_search_title(webpage) +            media_urls = self._search_regex( +                r'(?s)Media URL:(.+?)</li>', webpage, 'media urls') +            for m in re.finditer( +                    r'<a[^>]+href=(["\'])(?P<url>http.+?)\1', media_urls): +                media_url = m.group('url') +                if re.match(r'https?://www\.youtube\.com/watch\?v=.*', media_url): +                    entries.append(self.url_result(media_url, 'Youtube')) +                else: +                    entries.append({ +                        'id': video_id, +                        'url': media_url, +                        'title': title, +                    }) -        return { -            'id': video_id, -            'title': os.path.splitext(title)[0], -            'url': video_url, -        } +        return self.playlist_result(entries, video_id) diff --git a/youtube_dl/extractor/rottentomatoes.py b/youtube_dl/extractor/rottentomatoes.py index f9cd48790..23abf7a27 100644 --- a/youtube_dl/extractor/rottentomatoes.py +++ b/youtube_dl/extractor/rottentomatoes.py @@ -1,7 +1,6 @@  from __future__ import unicode_literals  from .common import InfoExtractor -from ..compat import compat_urlparse  from .internetvideoarchive import InternetVideoArchiveIE @@ -11,21 +10,23 @@ class RottenTomatoesIE(InfoExtractor):      _TEST = {          'url': 'http://www.rottentomatoes.com/m/toy_story_3/trailers/11028566/',          'info_dict': { -            'id': '613340', +            'id': '11028566',              'ext': 'mp4',              'title': 'Toy Story 3', +            'description': 'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.', +            'thumbnail': 're:^https?://.*\.jpg$',          },      }      def _real_extract(self, url):          video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id) -        og_video = self._og_search_video_url(webpage) -        query = compat_urlparse.urlparse(og_video).query +        iva_id = self._search_regex(r'publishedid=(\d+)', webpage, 'internet video archive id')          return {              '_type': 'url_transparent', -            'url': InternetVideoArchiveIE._build_xml_url(query), +            'url': 'http://video.internetvideoarchive.net/player/6/configuration.ashx?domain=www.videodetective.com&customerid=69249&playerid=641&publishedid=' + iva_id,              'ie_key': InternetVideoArchiveIE.ie_key(), +            'id': video_id,              'title': self._og_search_title(webpage),          } diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index aeae931a2..9635c2b49 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -32,7 +32,7 @@ class SoundcloudIE(InfoExtractor):      _VALID_URL = r'''(?x)^(?:https?://)?                      (?:(?:(?:www\.|m\.)?soundcloud\.com/                              (?P<uploader>[\w\d-]+)/ -                            (?!(?:tracks|sets(?:/[^/?#]+)?|reposts|likes|spotlight)/?(?:$|[?#])) +                            (?!(?:tracks|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#]))                              (?P<title>[\w\d-]+)/?                              (?P<token>[^?]+?)?(?:[?].*)?$)                         |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+) @@ -265,6 +265,9 @@ class SoundcloudSetIE(SoundcloudIE):              'title': 'The Royal Concept EP',          },          'playlist_mincount': 6, +    }, { +        'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep/token', +        'only_matching': True,      }]      def _real_extract(self, url): diff --git a/youtube_dl/extractor/southpark.py b/youtube_dl/extractor/southpark.py index a147f7db1..e2a9e45ac 100644 --- a/youtube_dl/extractor/southpark.py +++ b/youtube_dl/extractor/southpark.py @@ -35,6 +35,7 @@ class SouthParkEsIE(SouthParkIE):              'description': 'Cartman Consigue Una Sonda Anal',          },          'playlist_count': 4, +        'skip': 'Geo-restricted',      }] diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 23067e8c6..6febf805b 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -96,7 +96,7 @@ class ThePlatformBaseIE(OnceIE):  class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):      _VALID_URL = r'''(?x)          (?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/ -           (?:(?:(?:[^/]+/)+select/)?(?P<media>media/(?:guid/\d+/)?)|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))? +           (?:(?:(?:[^/]+/)+select/)?(?P<media>media/(?:guid/\d+/)?)?|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))?           |theplatform:)(?P<id>[^/\?&]+)'''      _TESTS = [{ @@ -116,6 +116,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):              # rtmp download              'skip_download': True,          }, +        'skip': '404 Not Found',      }, {          # from http://www.cnet.com/videos/tesla-model-s-a-second-step-towards-a-cleaner-motoring-future/          'url': 'http://link.theplatform.com/s/kYEXFC/22d_qsQ6MIRT', diff --git a/youtube_dl/extractor/thestar.py b/youtube_dl/extractor/thestar.py index ba1380abc..c3f118894 100644 --- a/youtube_dl/extractor/thestar.py +++ b/youtube_dl/extractor/thestar.py @@ -2,8 +2,6 @@  from __future__ import unicode_literals  from .common import InfoExtractor -from .brightcove import BrightcoveLegacyIE -from ..compat import compat_parse_qs  class TheStarIE(InfoExtractor): @@ -30,6 +28,9 @@ class TheStarIE(InfoExtractor):      def _real_extract(self, url):          display_id = self._match_id(url)          webpage = self._download_webpage(url, display_id) -        brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage) -        brightcove_id = compat_parse_qs(brightcove_legacy_url)['@videoPlayer'][0] -        return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id) +        brightcove_id = self._search_regex( +            r'mainartBrightcoveVideoId["\']?\s*:\s*["\']?(\d+)', +            webpage, 'brightcove id') +        return self.url_result( +            self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, +            'BrightcoveNew', brightcove_id) diff --git a/youtube_dl/extractor/thvideo.py b/youtube_dl/extractor/thvideo.py deleted file mode 100644 index 406f4a826..000000000 --- a/youtube_dl/extractor/thvideo.py +++ /dev/null @@ -1,84 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( -    unified_strdate -) - - -class THVideoIE(InfoExtractor): -    _VALID_URL = r'https?://(?:www\.)?thvideo\.tv/(?:v/th|mobile\.php\?cid=)(?P<id>[0-9]+)' -    _TEST = { -        'url': 'http://thvideo.tv/v/th1987/', -        'md5': 'fa107b1f73817e325e9433505a70db50', -        'info_dict': { -            'id': '1987', -            'ext': 'mp4', -            'title': '【动画】秘封活动记录 ~ The Sealed Esoteric History.分镜稿预览', -            'display_id': 'th1987', -            'thumbnail': 'http://thvideo.tv/uploadfile/2014/0722/20140722013459856.jpg', -            'description': '社团京都幻想剧团的第一个东方二次同人动画作品「秘封活动记录 ~ The Sealed Esoteric History.」 本视频是该动画第一期的分镜草稿...', -            'upload_date': '20140722' -        } -    } - -    def _real_extract(self, url): -        video_id = self._match_id(url) - -        # extract download link from mobile player page -        webpage_player = self._download_webpage( -            'http://thvideo.tv/mobile.php?cid=%s-0' % (video_id), -            video_id, note='Downloading video source page') -        video_url = self._html_search_regex( -            r'<source src="(.*?)" type', webpage_player, 'video url') - -        # extract video info from main page -        webpage = self._download_webpage( -            'http://thvideo.tv/v/th%s' % (video_id), video_id) -        title = self._og_search_title(webpage) -        display_id = 'th%s' % video_id -        thumbnail = self._og_search_thumbnail(webpage) -        description = self._og_search_description(webpage) -        upload_date = unified_strdate(self._html_search_regex( -            r'span itemprop="datePublished" content="(.*?)">', webpage, -            'upload date', fatal=False)) - -        return { -            'id': video_id, -            'ext': 'mp4', -            'url': video_url, -            'title': title, -            'display_id': display_id, -            'thumbnail': thumbnail, -            'description': description, -            'upload_date': upload_date -        } - - -class THVideoPlaylistIE(InfoExtractor): -    _VALID_URL = r'http?://(?:www\.)?thvideo\.tv/mylist(?P<id>[0-9]+)' -    _TEST = { -        'url': 'http://thvideo.tv/mylist2', -        'info_dict': { -            'id': '2', -            'title': '幻想万華鏡', -        }, -        'playlist_mincount': 23, -    } - -    def _real_extract(self, url): -        playlist_id = self._match_id(url) - -        webpage = self._download_webpage(url, playlist_id) -        list_title = self._html_search_regex( -            r'<h1 class="show_title">(.*?)<b id', webpage, 'playlist title', -            fatal=False) - -        entries = [ -            self.url_result('http://thvideo.tv/v/th' + id, 'THVideo') -            for id in re.findall(r'<dd><a href="http://thvideo.tv/v/th(\d+)/" target=', webpage)] - -        return self.playlist_result(entries, playlist_id, list_title) diff --git a/youtube_dl/extractor/turner.py b/youtube_dl/extractor/turner.py index 108caa9d8..b59dafda6 100644 --- a/youtube_dl/extractor/turner.py +++ b/youtube_dl/extractor/turner.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor +from ..compat import compat_str  from ..utils import (      xpath_text,      int_or_none, @@ -16,9 +17,12 @@ from ..utils import (  class TurnerBaseIE(InfoExtractor): +    def _extract_timestamp(self, video_data): +        return int_or_none(xpath_attr(video_data, 'dateCreated', 'uts')) +      def _extract_cvp_info(self, data_src, video_id, path_data={}):          video_data = self._download_xml(data_src, video_id) -        video_id = video_data.attrib['id'].split('/')[-1].split('.')[0] +        video_id = video_data.attrib['id']          title = xpath_text(video_data, 'headline', fatal=True)          # rtmp_src = xpath_text(video_data, 'akamai/src')          # if rtmp_src: @@ -30,11 +34,11 @@ class TurnerBaseIE(InfoExtractor):          tokens = {}          urls = []          formats = [] -        rex = re.compile(r'''(?x) -            (?P<width>[0-9]+)x(?P<height>[0-9]+) -            (?:_(?P<bitrate>[0-9]+))? -        ''') -        for video_file in video_data.findall('files/file'): +        rex = re.compile( +            r'(?P<width>[0-9]+)x(?P<height>[0-9]+)(?:_(?P<bitrate>[0-9]+))?') +        # Possible formats locations: files/file, files/groupFiles/files +        # and maybe others +        for video_file in video_data.findall('.//file'):              video_url = video_file.text.strip()              if not video_url:                  continue @@ -84,12 +88,14 @@ class TurnerBaseIE(InfoExtractor):              if video_url in urls:                  continue              urls.append(video_url) -            format_id = video_file.attrib['bitrate'] +            format_id = video_file.get('bitrate')              if ext == 'smil': -                formats.extend(self._extract_smil_formats(video_url, video_id, fatal=False)) +                formats.extend(self._extract_smil_formats( +                    video_url, video_id, fatal=False))              elif ext == 'm3u8':                  m3u8_formats = self._extract_m3u8_formats( -                    video_url, video_id, 'mp4', m3u8_id=format_id, fatal=False) +                    video_url, video_id, 'mp4', m3u8_id=format_id or 'hls', +                    fatal=False)                  if m3u8_formats:                      # Sometimes final URLs inside m3u8 are unsigned, let's fix this                      # ourselves @@ -103,7 +109,7 @@ class TurnerBaseIE(InfoExtractor):              elif ext == 'f4m':                  formats.extend(self._extract_f4m_formats(                      update_url_query(video_url, {'hdcore': '3.7.0'}), -                    video_id, f4m_id=format_id, fatal=False)) +                    video_id, f4m_id=format_id or 'hds', fatal=False))              else:                  f = {                      'format_id': format_id, @@ -117,29 +123,31 @@ class TurnerBaseIE(InfoExtractor):                          'height': int(mobj.group('height')),                          'tbr': int_or_none(mobj.group('bitrate')),                      }) -                elif format_id.isdigit(): -                    f['tbr'] = int(format_id) -                else: -                    mobj = re.match(r'ios_(audio|[0-9]+)$', format_id) -                    if mobj: -                        if mobj.group(1) == 'audio': -                            f.update({ -                                'vcodec': 'none', -                                'ext': 'm4a', -                            }) -                        else: -                            f['tbr'] = int(mobj.group(1)) +                elif isinstance(format_id, compat_str): +                    if format_id.isdigit(): +                        f['tbr'] = int(format_id) +                    else: +                        mobj = re.match(r'ios_(audio|[0-9]+)$', format_id) +                        if mobj: +                            if mobj.group(1) == 'audio': +                                f.update({ +                                    'vcodec': 'none', +                                    'ext': 'm4a', +                                }) +                            else: +                                f['tbr'] = int(mobj.group(1))                  formats.append(f)          self._sort_formats(formats)          subtitles = {}          for source in video_data.findall('closedCaptions/source'):              for track in source.findall('track'): -                source_url = source.get('url') -                if not source_url: +                track_url = track.get('url') +                if not isinstance(track_url, compat_str) or track_url.endswith('/big'):                      continue -                subtitles.set_default(source.get('lang') or source.get('label') or 'en', []).append({ -                    'url': source_url, +                lang = track.get('lang') or track.get('label') or 'en' +                subtitles.setdefault(lang, []).append({ +                    'url': track_url,                      'ext': {                          'scc': 'scc',                          'webvtt': 'vtt', @@ -154,10 +162,6 @@ class TurnerBaseIE(InfoExtractor):              'height': int_or_none(image.get('height')),          } for image in video_data.findall('images/image')] -        timestamp = None -        if 'cnn.com' not in data_src: -            timestamp = int_or_none(xpath_attr(video_data, 'dateCreated', 'uts')) -          return {              'id': video_id,              'title': title, @@ -166,7 +170,7 @@ class TurnerBaseIE(InfoExtractor):              'thumbnails': thumbnails,              'description': xpath_text(video_data, 'description'),              'duration': parse_duration(xpath_text(video_data, 'length') or xpath_text(video_data, 'trt')), -            'timestamp': timestamp, +            'timestamp': self._extract_timestamp(video_data),              'upload_date': xpath_attr(video_data, 'metas', 'version'),              'series': xpath_text(video_data, 'showTitle'),              'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')), diff --git a/youtube_dl/extractor/tvnoe.py b/youtube_dl/extractor/tvnoe.py new file mode 100644 index 000000000..1cd3e6a58 --- /dev/null +++ b/youtube_dl/extractor/tvnoe.py @@ -0,0 +1,49 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .jwplatform import JWPlatformBaseIE +from ..utils import ( +    clean_html, +    get_element_by_class, +    js_to_json, +) + + +class TVNoeIE(JWPlatformBaseIE): +    _VALID_URL = r'https?://(www\.)?tvnoe\.cz/video/(?P<id>[0-9]+)' +    _TEST = { +        'url': 'http://www.tvnoe.cz/video/10362', +        'md5': 'aee983f279aab96ec45ab6e2abb3c2ca', +        'info_dict': { +            'id': '10362', +            'ext': 'mp4', +            'series': 'Noční univerzita', +            'title': 'prof. Tomáš Halík, Th.D. - Návrat náboženství a střet civilizací', +            'description': 'md5:f337bae384e1a531a52c55ebc50fff41', +        } +    } + +    def _real_extract(self, url): +        video_id = self._match_id(url) +        webpage = self._download_webpage(url, video_id) + +        iframe_url = self._search_regex( +            r'<iframe[^>]+src="([^"]+)"', webpage, 'iframe URL') + +        ifs_page = self._download_webpage(iframe_url, video_id) +        jwplayer_data = self._parse_json( +            self._find_jwplayer_data(ifs_page), +            video_id, transform_source=js_to_json) +        info_dict = self._parse_jwplayer_data( +            jwplayer_data, video_id, require_title=False, base_url=iframe_url) + +        info_dict.update({ +            'id': video_id, +            'title': clean_html(get_element_by_class( +                'field-name-field-podnazev', webpage)), +            'description': clean_html(get_element_by_class( +                'field-name-body', webpage)), +            'series': clean_html(get_element_by_class('title', webpage)) +        }) + +        return info_dict diff --git a/youtube_dl/extractor/vimple.py b/youtube_dl/extractor/vimple.py index 92321d66e..7fd9b777b 100644 --- a/youtube_dl/extractor/vimple.py +++ b/youtube_dl/extractor/vimple.py @@ -28,23 +28,24 @@ class SprutoBaseIE(InfoExtractor):  class VimpleIE(SprutoBaseIE):      IE_DESC = 'Vimple - one-click video hosting' -    _VALID_URL = r'https?://(?:player\.vimple\.ru/iframe|vimple\.ru)/(?P<id>[\da-f-]{32,36})' -    _TESTS = [ -        { -            'url': 'http://vimple.ru/c0f6b1687dcd4000a97ebe70068039cf', -            'md5': '2e750a330ed211d3fd41821c6ad9a279', -            'info_dict': { -                'id': 'c0f6b168-7dcd-4000-a97e-be70068039cf', -                'ext': 'mp4', -                'title': 'Sunset', -                'duration': 20, -                'thumbnail': 're:https?://.*?\.jpg', -            }, -        }, { -            'url': 'http://player.vimple.ru/iframe/52e1beec-1314-4a83-aeac-c61562eadbf9', -            'only_matching': True, -        } -    ] +    _VALID_URL = r'https?://(?:player\.vimple\.(?:ru|co)/iframe|vimple\.(?:ru|co))/(?P<id>[\da-f-]{32,36})' +    _TESTS = [{ +        'url': 'http://vimple.ru/c0f6b1687dcd4000a97ebe70068039cf', +        'md5': '2e750a330ed211d3fd41821c6ad9a279', +        'info_dict': { +            'id': 'c0f6b168-7dcd-4000-a97e-be70068039cf', +            'ext': 'mp4', +            'title': 'Sunset', +            'duration': 20, +            'thumbnail': 're:https?://.*?\.jpg', +        }, +    }, { +        'url': 'http://player.vimple.ru/iframe/52e1beec-1314-4a83-aeac-c61562eadbf9', +        'only_matching': True, +    }, { +        'url': 'http://vimple.co/04506a053f124483b8fb05ed73899f19', +        'only_matching': True, +    }]      def _real_extract(self, url):          video_id = self._match_id(url) diff --git a/youtube_dl/extractor/vodplatform.py b/youtube_dl/extractor/vodplatform.py index b49542b16..7bdd8b1dc 100644 --- a/youtube_dl/extractor/vodplatform.py +++ b/youtube_dl/extractor/vodplatform.py @@ -6,7 +6,7 @@ from ..utils import unescapeHTML  class VODPlatformIE(InfoExtractor): -    _VALID_URL = r'https?://(?:www\.)?vod-platform\.net/embed/(?P<id>[^/?#]+)' +    _VALID_URL = r'https?://(?:www\.)?vod-platform\.net/[eE]mbed/(?P<id>[^/?#]+)'      _TEST = {          # from http://www.lbcgroup.tv/watch/chapter/29143/52844/%D8%A7%D9%84%D9%86%D8%B5%D8%B1%D8%A9-%D9%81%D9%8A-%D8%B6%D9%8A%D8%A7%D9%81%D8%A9-%D8%A7%D9%84%D9%80-cnn/ar          'url': 'http://vod-platform.net/embed/RufMcytHDolTH1MuKHY9Fw', diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index b0679dfb7..d7a81ab8c 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -8,7 +8,6 @@ import re  from .common import InfoExtractor, SearchInfoExtractor  from ..compat import (      compat_urllib_parse, -    compat_urllib_parse_urlencode,      compat_urlparse,  )  from ..utils import ( @@ -17,6 +16,7 @@ from ..utils import (      ExtractorError,      int_or_none,      mimetype2ext, +    determine_ext,  )  from .brightcove import BrightcoveNewIE @@ -39,7 +39,7 @@ class YahooIE(InfoExtractor):          },          {              'url': 'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html', -            'md5': 'c3466d2b6d5dd6b9f41ba9ed04c24b23', +            'md5': '251af144a19ebc4a033e8ba91ac726bb',              'info_dict': {                  'id': 'd1dedf8c-d58c-38c3-8963-e899929ae0a9',                  'ext': 'mp4', @@ -50,7 +50,7 @@ class YahooIE(InfoExtractor):          },          {              'url': 'https://screen.yahoo.com/community/community-sizzle-reel-203225340.html?format=embed', -            'md5': '75ffabdb87c16d4ffe8c036dc4d1c136', +            'md5': '7993e572fac98e044588d0b5260f4352',              'info_dict': {                  'id': '4fe78544-8d48-39d8-97cd-13f205d9fcdb',                  'ext': 'mp4', @@ -61,7 +61,7 @@ class YahooIE(InfoExtractor):          },          {              'url': 'https://tw.news.yahoo.com/%E6%95%A2%E5%95%8F%E5%B8%82%E9%95%B7%20%E9%BB%83%E7%A7%80%E9%9C%9C%E6%89%B9%E8%B3%B4%E6%B8%85%E5%BE%B7%20%E9%9D%9E%E5%B8%B8%E9%AB%98%E5%82%B2-034024051.html', -            'md5': '9035d38f88b1782682a3e89f985be5bb', +            'md5': '45c024bad51e63e9b6f6fad7a43a8c23',              'info_dict': {                  'id': 'cac903b3-fcf4-3c14-b632-643ab541712f',                  'ext': 'mp4', @@ -72,10 +72,10 @@ class YahooIE(InfoExtractor):          },          {              'url': 'https://uk.screen.yahoo.com/editor-picks/cute-raccoon-freed-drain-using-091756545.html', -            'md5': '0b51660361f0e27c9789e7037ef76f4b', +            'md5': '71298482f7c64cbb7fa064e4553ff1c1',              'info_dict': {                  'id': 'b3affa53-2e14-3590-852b-0e0db6cd1a58', -                'ext': 'mp4', +                'ext': 'webm',                  'title': 'Cute Raccoon Freed From Drain\u00a0Using Angle Grinder',                  'description': 'md5:f66c890e1490f4910a9953c941dee944',                  'duration': 97, @@ -98,7 +98,7 @@ class YahooIE(InfoExtractor):                  'id': '154609075',              },              'playlist': [{ -                'md5': 'f8e336c6b66f503282e5f719641d6565', +                'md5': '000887d0dc609bc3a47c974151a40fb8',                  'info_dict': {                      'id': 'e624c4bc-3389-34de-9dfc-025f74943409',                      'ext': 'mp4', @@ -107,7 +107,7 @@ class YahooIE(InfoExtractor):                      'duration': 30,                  },              }, { -                'md5': '958bcb90b4d6df71c56312137ee1cd5a', +                'md5': '81bc74faf10750fe36e4542f9a184c66',                  'info_dict': {                      'id': '1fc8ada0-718e-3abe-a450-bf31f246d1a9',                      'ext': 'mp4', @@ -139,7 +139,7 @@ class YahooIE(InfoExtractor):              'skip': 'Domain name in.lifestyle.yahoo.com gone',          }, {              'url': 'https://www.yahoo.com/movies/v/true-story-trailer-173000497.html', -            'md5': 'b17ac378b1134fa44370fb27db09a744', +            'md5': '2a9752f74cb898af5d1083ea9f661b58',              'info_dict': {                  'id': '071c4013-ce30-3a93-a5b2-e0413cd4a9d1',                  'ext': 'mp4', @@ -168,7 +168,7 @@ class YahooIE(InfoExtractor):          }, {              # Query result is embedded in webpage, but explicit request to video API fails with geo restriction              'url': 'https://screen.yahoo.com/community/communitary-community-episode-1-ladders-154501237.html', -            'md5': '1ddbf7c850777548438e5c4f147c7b8c', +            'md5': '4fbafb9c9b6f07aa8f870629f6671b35',              'info_dict': {                  'id': '1f32853c-a271-3eef-8cb6-f6d6872cb504',                  'ext': 'mp4', @@ -196,6 +196,7 @@ class YahooIE(InfoExtractor):                  'description': 'Galactic',                  'title': 'Dolla Diva (feat. Maggie Koerner)',              }, +            'skip': 'redirect to https://www.yahoo.com/music',          },      ] @@ -213,15 +214,7 @@ class YahooIE(InfoExtractor):          entries = []          iframe_urls = re.findall(r'<iframe[^>]+src="(/video/.+?-\d+\.html\?format=embed.*?)"', webpage)          for idx, iframe_url in enumerate(iframe_urls): -            iframepage = self._download_webpage( -                host + iframe_url, display_id, -                note='Downloading iframe webpage for video #%d' % idx) -            items_json = self._search_regex( -                r'mediaItems: (\[.+?\])$', iframepage, 'items', flags=re.MULTILINE, default=None) -            if items_json: -                items = json.loads(items_json) -                video_id = items[0]['id'] -                entries.append(self._get_info(video_id, display_id, webpage)) +            entries.append(self.url_result(host + iframe_url, 'Yahoo'))          if entries:              return self.playlist_result(entries, page_id) @@ -246,7 +239,9 @@ class YahooIE(InfoExtractor):              if config:                  sapi = config.get('models', {}).get('applet_model', {}).get('data', {}).get('sapi')                  if sapi and 'query' in sapi: -                    return self._extract_info(display_id, sapi, webpage) +                    info = self._extract_info(display_id, sapi, webpage) +                    self._sort_formats(info['formats']) +                    return info          items_json = self._search_regex(              r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE, @@ -292,15 +287,17 @@ class YahooIE(InfoExtractor):          formats = []          for s in info['streams']: +            tbr = int_or_none(s.get('bitrate'))              format_info = {                  'width': int_or_none(s.get('width')),                  'height': int_or_none(s.get('height')), -                'tbr': int_or_none(s.get('bitrate')), +                'tbr': tbr,              }              host = s['host']              path = s['path']              if host.startswith('rtmp'): +                fmt = 'rtmp'                  format_info.update({                      'url': host,                      'play_path': path, @@ -308,14 +305,18 @@ class YahooIE(InfoExtractor):                  })              else:                  if s.get('format') == 'm3u8_playlist': -                    format_info['protocol'] = 'm3u8_native' -                    format_info['ext'] = 'mp4' +                    fmt = 'hls' +                    format_info.update({ +                        'protocol': 'm3u8_native', +                        'ext': 'mp4', +                    }) +                else: +                    fmt = format_info['ext'] = determine_ext(path)                  format_url = compat_urlparse.urljoin(host, path)                  format_info['url'] = format_url +            format_info['format_id'] = fmt + ('-%d' % tbr if tbr else '')              formats.append(format_info) -        self._sort_formats(formats) -          closed_captions = self._html_search_regex(              r'"closedcaptions":(\[[^\]]+\])', webpage, 'closed captions',              default='[]') @@ -346,17 +347,25 @@ class YahooIE(InfoExtractor):      def _get_info(self, video_id, display_id, webpage):          region = self._search_regex(              r'\\?"region\\?"\s*:\s*\\?"([^"]+?)\\?"', -            webpage, 'region', fatal=False, default='US') -        data = compat_urllib_parse_urlencode({ -            'protocol': 'http', -            'region': region.upper(), -        }) -        query_url = ( -            'https://video.media.yql.yahoo.com/v1/video/sapi/streams/' -            '{id}?{data}'.format(id=video_id, data=data)) -        query_result = self._download_json( -            query_url, display_id, 'Downloading video info') -        return self._extract_info(display_id, query_result, webpage) +            webpage, 'region', fatal=False, default='US').upper() +        formats = [] +        info = {} +        for fmt in ('webm', 'mp4'): +            query_result = self._download_json( +                'https://video.media.yql.yahoo.com/v1/video/sapi/streams/' + video_id, +                display_id, 'Downloading %s video info' % fmt, query={ +                    'protocol': 'http', +                    'region': region, +                    'format': fmt, +                }) +            info = self._extract_info(display_id, query_result, webpage) +            formats.extend(info['formats']) +        formats.extend(self._extract_m3u8_formats( +            'http://video.media.yql.yahoo.com/v1/hls/%s?region=%s' % (video_id, region), +            video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) +        self._sort_formats(formats) +        info['formats'] = formats +        return info  class YahooSearchIE(SearchInfoExtractor): diff --git a/youtube_dl/extractor/youjizz.py b/youtube_dl/extractor/youjizz.py index 31e2f9263..b50f34e9b 100644 --- a/youtube_dl/extractor/youjizz.py +++ b/youtube_dl/extractor/youjizz.py @@ -1,21 +1,16 @@  from __future__ import unicode_literals -import re -  from .common import InfoExtractor -from ..utils import ( -    ExtractorError, -)  class YouJizzIE(InfoExtractor):      _VALID_URL = r'https?://(?:\w+\.)?youjizz\.com/videos/(?:[^/#?]+)?-(?P<id>[0-9]+)\.html(?:$|[?#])'      _TESTS = [{          'url': 'http://www.youjizz.com/videos/zeichentrick-1-2189178.html', -        'md5': '07e15fa469ba384c7693fd246905547c', +        'md5': '78fc1901148284c69af12640e01c6310',          'info_dict': {              'id': '2189178', -            'ext': 'flv', +            'ext': 'mp4',              'title': 'Zeichentrick 1',              'age_limit': 18,          } @@ -27,38 +22,18 @@ class YouJizzIE(InfoExtractor):      def _real_extract(self, url):          video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id) +        # YouJizz's HTML5 player has invalid HTML +        webpage = webpage.replace('"controls', '" controls')          age_limit = self._rta_search(webpage)          video_title = self._html_search_regex(              r'<title>\s*(.*)\s*</title>', webpage, 'title') -        embed_page_url = self._search_regex( -            r'(https?://www.youjizz.com/videos/embed/[0-9]+)', -            webpage, 'embed page') -        webpage = self._download_webpage( -            embed_page_url, video_id, note='downloading embed page') - -        # Get the video URL -        m_playlist = re.search(r'so.addVariable\("playlist", ?"(?P<playlist>.+?)"\);', webpage) -        if m_playlist is not None: -            playlist_url = m_playlist.group('playlist') -            playlist_page = self._download_webpage(playlist_url, video_id, -                                                   'Downloading playlist page') -            m_levels = list(re.finditer(r'<level bitrate="(\d+?)" file="(.*?)"', playlist_page)) -            if len(m_levels) == 0: -                raise ExtractorError('Unable to extract video url') -            videos = [(int(m.group(1)), m.group(2)) for m in m_levels] -            (_, video_url) = sorted(videos)[0] -            video_url = video_url.replace('%252F', '%2F') -        else: -            video_url = self._search_regex(r'so.addVariable\("file",encodeURIComponent\("(?P<source>[^"]+)"\)\);', -                                           webpage, 'video URL') +        info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0] -        return { +        info_dict.update({              'id': video_id, -            'url': video_url,              'title': video_title, -            'ext': 'flv', -            'format': 'flv', -            'player_url': embed_page_url,              'age_limit': age_limit, -        } +        }) + +        return info_dict diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index 0df2d76ee..0265a64a7 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -35,7 +35,7 @@ class YouPornIE(InfoExtractor):              'age_limit': 18,          },      }, { -        # Anonymous User uploader +        # Unknown uploader          'url': 'http://www.youporn.com/watch/561726/big-tits-awesome-brunette-on-amazing-webcam-show/?from=related3&al=2&from_id=561726&pos=4',          'info_dict': {              'id': '561726', @@ -44,7 +44,7 @@ class YouPornIE(InfoExtractor):              'title': 'Big Tits Awesome Brunette On amazing webcam show',              'description': 'http://sweetlivegirls.com Big Tits Awesome Brunette On amazing webcam show.mp4',              'thumbnail': 're:^https?://.*\.jpg$', -            'uploader': 'Anonymous User', +            'uploader': 'Unknown',              'upload_date': '20111125',              'average_rating': int,              'view_count': int, @@ -140,17 +140,17 @@ class YouPornIE(InfoExtractor):              r'>All [Cc]omments? \(([\d,.]+)\)',              webpage, 'comment count', fatal=False)) -        def extract_tag_box(title): -            tag_box = self._search_regex( -                (r'<div[^>]+class=["\']tagBoxTitle["\'][^>]*>\s*%s\b.*?</div>\s*' -                 '<div[^>]+class=["\']tagBoxContent["\']>(.+?)</div>') % re.escape(title), -                webpage, '%s tag box' % title, default=None) +        def extract_tag_box(regex, title): +            tag_box = self._search_regex(regex, webpage, title, default=None)              if not tag_box:                  return []              return re.findall(r'<a[^>]+href=[^>]+>([^<]+)', tag_box) -        categories = extract_tag_box('Category') -        tags = extract_tag_box('Tags') +        categories = extract_tag_box( +            r'(?s)Categories:.*?</[^>]+>(.+?)</div>', 'categories') +        tags = extract_tag_box( +            r'(?s)Tags:.*?</div>\s*<div[^>]+class=["\']tagBoxContent["\'][^>]*>(.+?)</div>', +            'tags')          return {              'id': video_id, diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index d5d5b7334..8fc26bd02 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -264,7 +264,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):                           )                       )?                                                       # all until now is optional -> you can pass the naked ID                       ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID -                     (?!.*?&list=)                                            # combined list/video URLs are handled by the playlist IE +                     (?!.*?\blist=)                                            # combined list/video URLs are handled by the playlist IE                       (?(1).+)?                                                # if we found the ID, everything can follow                       $"""      _NEXT_URL_RE = r'[\?&]next_url=([^&]+)' @@ -844,6 +844,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor):              # YouTube Red paid video (https://github.com/rg3/youtube-dl/issues/10059)              'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',              'only_matching': True, +        }, +        { +            # Rental video preview +            'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg', +            'info_dict': { +                'id': 'uGpuVWrhIzE', +                'ext': 'mp4', +                'title': 'Piku - Trailer', +                'description': 'md5:c36bd60c3fd6f1954086c083c72092eb', +                'upload_date': '20150811', +                'uploader': 'FlixMatrix', +                'uploader_id': 'FlixMatrixKaravan', +                'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan', +                'license': 'Standard YouTube License', +            }, +            'params': { +                'skip_download': True, +            },          }      ] @@ -1254,6 +1272,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):                      # Convert to the same format returned by compat_parse_qs                      video_info = dict((k, [v]) for k, v in args.items())                      add_dash_mpd(video_info) +                # Rental video is not rented but preview is available (e.g. +                # https://www.youtube.com/watch?v=yYr8q0y5Jfg, +                # https://github.com/rg3/youtube-dl/issues/10532) +                if not video_info and args.get('ypc_vid'): +                    return self.url_result( +                        args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])                  if args.get('livestream') == '1' or args.get('live_playback') == 1:                      is_live = True              if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True): @@ -1754,11 +1778,14 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):      _VALID_URL = r"""(?x)(?:                          (?:https?://)?                          (?:\w+\.)? -                        youtube\.com/                          (?: -                           (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries) -                           \? (?:.*?[&;])*? (?:p|a|list)= -                        |  p/ +                            youtube\.com/ +                            (?: +                               (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries) +                               \? (?:.*?[&;])*? (?:p|a|list)= +                            |  p/ +                            )| +                            youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=                          )                          (                              (?:PL|LL|EC|UU|FL|RD|UL)?[0-9A-Za-z-_]{10,} @@ -1841,6 +1868,31 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):              'id': 'UUXw-G3eDE9trcvY2sBMM_aA',          },          'playlist_mincout': 21, +    }, { +        # Playlist URL that does not actually serve a playlist +        'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4', +        'info_dict': { +            'id': 'FqZTN594JQw', +            'ext': 'webm', +            'title': "Smiley's People 01 detective, Adventure Series, Action", +            'uploader': 'STREEM', +            'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng', +            'uploader_url': 're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng', +            'upload_date': '20150526', +            'license': 'Standard YouTube License', +            'description': 'md5:507cdcb5a49ac0da37a920ece610be80', +            'categories': ['People & Blogs'], +            'tags': list, +            'like_count': int, +            'dislike_count': int, +        }, +        'params': { +            'skip_download': True, +        }, +        'add_ie': [YoutubeIE.ie_key()], +    }, { +        'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21', +        'only_matching': True,      }]      def _real_initialize(self): @@ -1901,9 +1953,20 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):          playlist_title = self._html_search_regex(              r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>', -            page, 'title') +            page, 'title', default=None) + +        has_videos = True -        return self.playlist_result(self._entries(page, playlist_id), playlist_id, playlist_title) +        if not playlist_title: +            try: +                # Some playlist URLs don't actually serve a playlist (e.g. +                # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4) +                next(self._entries(page, playlist_id)) +            except StopIteration: +                has_videos = False + +        return has_videos, self.playlist_result( +            self._entries(page, playlist_id), playlist_id, playlist_title)      def _check_download_just_video(self, url, playlist_id):          # Check if it's a video-specific URL @@ -1912,9 +1975,11 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):              video_id = query_dict['v'][0]              if self._downloader.params.get('noplaylist'):                  self.to_screen('Downloading just video %s because of --no-playlist' % video_id) -                return self.url_result(video_id, 'Youtube', video_id=video_id) +                return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)              else:                  self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id)) +                return video_id, None +        return None, None      def _real_extract(self, url):          # Extract playlist id @@ -1923,7 +1988,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):              raise ExtractorError('Invalid URL: %s' % url)          playlist_id = mobj.group(1) or mobj.group(2) -        video = self._check_download_just_video(url, playlist_id) +        video_id, video = self._check_download_just_video(url, playlist_id)          if video:              return video @@ -1931,7 +1996,15 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):              # Mixes require a custom extraction process              return self._extract_mix(playlist_id) -        return self._extract_playlist(playlist_id) +        has_videos, playlist = self._extract_playlist(playlist_id) +        if has_videos or not video_id: +            return playlist + +        # Some playlist URLs don't actually serve a playlist (see +        # https://github.com/rg3/youtube-dl/issues/10537). +        # Fallback to plain video extraction if there is a video id +        # along with playlist id. +        return self.url_result(video_id, 'Youtube', video_id=video_id)  class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor): @@ -2309,10 +2382,11 @@ class YoutubeWatchLaterIE(YoutubePlaylistIE):      }]      def _real_extract(self, url): -        video = self._check_download_just_video(url, 'WL') +        _, video = self._check_download_just_video(url, 'WL')          if video:              return video -        return self._extract_playlist('WL') +        _, playlist = self._extract_playlist('WL') +        return playlist  class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 5d62deef4..56f312f57 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -423,7 +423,15 @@ def parseOpts(overrideArguments=None):      downloader.add_option(          '--fragment-retries',          dest='fragment_retries', metavar='RETRIES', default=10, -        help='Number of retries for a fragment (default is %default), or "infinite" (DASH only)') +        help='Number of retries for a fragment (default is %default), or "infinite" (DASH and hlsnative only)') +    downloader.add_option( +        '--skip-unavailable-fragments', +        action='store_true', dest='skip_unavailable_fragments', default=True, +        help='Skip unavailable fragments (DASH and hlsnative only)') +    general.add_option( +        '--abort-on-unavailable-fragment', +        action='store_false', dest='skip_unavailable_fragments', +        help='Abort downloading when some fragment is not available')      downloader.add_option(          '--buffer-size',          dest='buffersize', metavar='SIZE', default='1024', diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 1091f17f3..ed199c4ad 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2148,7 +2148,7 @@ def mimetype2ext(mt):          return ext      _, _, res = mt.rpartition('/') -    res = res.lower() +    res = res.split(';')[0].strip().lower()      return {          '3gpp': '3gp', @@ -2168,6 +2168,7 @@ def mimetype2ext(mt):          'f4m+xml': 'f4m',          'hds+xml': 'f4m',          'vnd.ms-sstr+xml': 'ism', +        'quicktime': 'mov',      }.get(res, res) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index ee30ca2ad..b2ea6dac6 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@  from __future__ import unicode_literals -__version__ = '2016.08.28' +__version__ = '2016.09.04.1' | 
