diff options
| -rw-r--r-- | .travis.yml | 1 | ||||
| -rw-r--r-- | youtube_dl/downloader/hls.py | 12 | ||||
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 21 | ||||
| -rw-r--r-- | youtube_dl/extractor/generic.py | 7 | ||||
| -rw-r--r-- | youtube_dl/extractor/gogoanime.py | 76 | ||||
| -rw-r--r-- | youtube_dl/extractor/khanacademy.py | 4 | ||||
| -rw-r--r-- | youtube_dl/extractor/lrt.py | 6 | ||||
| -rw-r--r-- | youtube_dl/extractor/normalboots.py | 6 | ||||
| -rw-r--r-- | youtube_dl/extractor/play44.py | 149 | ||||
| -rw-r--r-- | youtube_dl/extractor/rtlnl.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/ted.py | 4 | ||||
| -rw-r--r-- | youtube_dl/extractor/tf1.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/videofun.py | 36 | ||||
| -rw-r--r-- | youtube_dl/extractor/vimple.py | 23 | ||||
| -rw-r--r-- | youtube_dl/extractor/webofstories.py | 102 | ||||
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 46 | ||||
| -rw-r--r-- | youtube_dl/postprocessor/ffmpeg.py | 2 | ||||
| -rw-r--r-- | youtube_dl/utils.py | 1 | 
18 files changed, 132 insertions, 368 deletions
| diff --git a/.travis.yml b/.travis.yml index c6cc7a994..f14014414 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,7 +9,6 @@ notifications:    email:      - filippo.valsorda@gmail.com      - phihag@phihag.de -    - jaime.marquinez.ferrandiz+travis@gmail.com      - yasoob.khld@gmail.com  #  irc:  #    channels: diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 5bb0f3cfd..aa58b52ab 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -11,7 +11,6 @@ from ..compat import (      compat_urllib_request,  )  from ..utils import ( -    check_executable,      encodeFilename,  ) @@ -27,16 +26,13 @@ class HlsFD(FileDownloader):              '-bsf:a', 'aac_adtstoasc',              encodeFilename(tmpfilename, for_subprocess=True)] -        for program in ['avconv', 'ffmpeg']: -            if check_executable(program, ['-version']): -                break -        else: +        ffpp = FFmpegPostProcessor(downloader=self) +        program = ffpp._executable +        if program is None:              self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')              return False -        cmd = [program] + args - -        ffpp = FFmpegPostProcessor(downloader=self)          ffpp.check_version() +        cmd = [program] + args          retval = subprocess.call(cmd)          if retval == 0: diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 349f4fe71..8e47bd60d 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -164,10 +164,6 @@ from .globo import GloboIE  from .godtube import GodTubeIE  from .goldenmoustache import GoldenMoustacheIE  from .golem import GolemIE -from .gogoanime import ( -    GoGoAnimeIE, -    GoGoAnimeSearchIE -)  from .googleplus import GooglePlusIE  from .googlesearch import GoogleSearchIE  from .gorillavid import GorillaVidIE @@ -317,16 +313,6 @@ from .phoenix import PhoenixIE  from .photobucket import PhotobucketIE  from .planetaplay import PlanetaPlayIE  from .played import PlayedIE -from .play44 import ( -    Play44IE, -    ByZooIE, -    Video44IE, -    VideoWingIE, -    PlayPandaIE, -    VideoZooIE, -    PlayBBIE, -    EasyVideoIE -)  from .playfm import PlayFMIE  from .playvid import PlayvidIE  from .podomatic import PodomaticIE @@ -389,10 +375,6 @@ from .smotri import (  from .snotr import SnotrIE  from .sockshare import SockshareIE  from .sohu import SohuIE -from .soulanime import ( -    SoulAnimeWatchingIE, -    SoulAnimeSeriesIE -)  from .soundcloud import (      SoundcloudIE,      SoundcloudSetIE, @@ -487,7 +469,6 @@ from .viddler import ViddlerIE  from .videobam import VideoBamIE  from .videodetective import VideoDetectiveIE  from .videolecturesnet import VideoLecturesNetIE -from .videofun import VideoFunIE  from .videofyme import VideofyMeIE  from .videomega import VideoMegaIE  from .videopremium import VideoPremiumIE @@ -531,6 +512,7 @@ from .wdr import (      WDRMobileIE,      WDRMausIE,  ) +from .webofstories import WebOfStoriesIE  from .weibo import WeiboIE  from .wimp import WimpIE  from .wistia import WistiaIE @@ -566,7 +548,6 @@ from .youtube import (      YoutubeSearchURLIE,      YoutubeShowIE,      YoutubeSubscriptionsIE, -    YoutubeTopListIE,      YoutubeTruncatedIDIE,      YoutubeTruncatedURLIE,      YoutubeUserIE, diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 493afb57d..2d871f8b4 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -131,12 +131,13 @@ class GenericIE(InfoExtractor):          # ooyala video          {              'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219', -            'md5': '5644c6ca5d5782c1d0d350dad9bd840c', +            'md5': '166dd577b433b4d4ebfee10b0824d8ff',              'info_dict': {                  'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',                  'ext': 'mp4',                  'title': '2cc213299525360.mov',  # that's what we get              }, +            'add_ie': ['Ooyala'],          },          # google redirect          { @@ -146,7 +147,7 @@ class GenericIE(InfoExtractor):                  'ext': 'mp4',                  'upload_date': '20130224',                  'uploader_id': 'TheVerge', -                'description': 'Chris Ziegler takes a look at the Alcatel OneTouch Fire and the ZTE Open; two of the first Firefox OS handsets to be officially announced.', +                'description': 're:^Chris Ziegler takes a look at the\.*',                  'uploader': 'The Verge',                  'title': 'First Firefox OS phones side-by-side',              }, @@ -925,7 +926,7 @@ class GenericIE(InfoExtractor):          # Look for embedded TED player          mobj = re.search( -            r'<iframe[^>]+?src=(["\'])(?P<url>http://embed\.ted\.com/.+?)\1', webpage) +                r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)          if mobj is not None:              return self.url_result(mobj.group('url'), 'TED') diff --git a/youtube_dl/extractor/gogoanime.py b/youtube_dl/extractor/gogoanime.py deleted file mode 100644 index d4f4ecc58..000000000 --- a/youtube_dl/extractor/gogoanime.py +++ /dev/null @@ -1,76 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( -    ExtractorError, -    compat_urllib_parse, -    get_element_by_attribute, -    unescapeHTML -) - - -class GoGoAnimeIE(InfoExtractor): -    IE_NAME = 'gogoanime' -    IE_DESC = 'GoGoAnime' -    _VALID_URL = r'http://www.gogoanime.com/(?P<id>[A-Za-z0-9-]+)' - -    _TEST = { -        'url': 'http://www.gogoanime.com/mahou-shoujo-madoka-magica-movie-1', -        'info_dict': { -            'id': 'mahou-shoujo-madoka-magica-movie-1' -        }, -        'playlist_count': 3 -    } - -    def _real_extract(self, url): -        video_id = self._match_id(url) -        page = self._download_webpage(url, video_id) - -        if 'Oops! Page Not Found</font>' in page: -            raise ExtractorError('Video does not exist', expected=True) - -        content = get_element_by_attribute("class", "postcontent", page) -        vids = re.findall(r'<iframe[^>]*?src=[\'"](h[^\'"]+)[\'"]', content) -        vids = [ -            unescapeHTML(compat_urllib_parse.unquote(x)) -            for x in vids if not re.search(r".*videofun.*", x)] - -        if re.search(r'<div class="postcontent">[^<]*<p><iframe src=[\'"][^>]+></iframe><br />', page): -            return self.playlist_result([self.url_result(vid) for vid in vids], video_id) - -        title = self._html_search_regex( -            r'<div class="postdesc">[^<]*<h1>([^<]+)</h1>', page, 'title') - -        return { -            '_type': 'url', -            'id': video_id, -            'url': vids[0], -            'title': title, -        } - - -class GoGoAnimeSearchIE(InfoExtractor): -    IE_NAME = 'gogoanime:search' -    IE_DESC = 'GoGoAnime Search' - -    _VALID_URL = r'http://www\.gogoanime\.com/.*\?s=(?P<id>[^&]*)' -    _TEST = { -        'url': 'http://www.gogoanime.com/?s=bokusatsu', -        'info_dict': { -            'id': 'bokusatsu' -        }, -        'playlist_count': 6 -    } - -    def _real_extract(self, url): -        playlist_id = self._match_id(url) -        webpage = self._download_webpage(url, playlist_id) - -        posts = re.findall( -            r'<div class="postlist">[^<]*<p[^>]*>[^<]*<a href="(?P<url>[^"]+)"', -            webpage) - -        return self.playlist_result( -            [self.url_result(p) for p in posts], playlist_id) diff --git a/youtube_dl/extractor/khanacademy.py b/youtube_dl/extractor/khanacademy.py index 408d00944..08a671fa8 100644 --- a/youtube_dl/extractor/khanacademy.py +++ b/youtube_dl/extractor/khanacademy.py @@ -22,8 +22,10 @@ class KhanAcademyIE(InfoExtractor):              'description': 'The perfect cipher',              'duration': 176,              'uploader': 'Brit Cruise', +            'uploader_id': 'khanacademy',              'upload_date': '20120411', -        } +        }, +        'add_ie': ['Youtube'],      }, {          'url': 'https://www.khanacademy.org/math/applied-math/cryptography',          'info_dict': { diff --git a/youtube_dl/extractor/lrt.py b/youtube_dl/extractor/lrt.py index d72d470aa..9c2fbdd96 100644 --- a/youtube_dl/extractor/lrt.py +++ b/youtube_dl/extractor/lrt.py @@ -2,7 +2,6 @@  from __future__ import unicode_literals  import re -import json  from .common import InfoExtractor  from ..utils import ( @@ -28,7 +27,6 @@ class LRTIE(InfoExtractor):          'params': {              'skip_download': True,  # HLS download          }, -      }      def _real_extract(self, url): @@ -44,7 +42,9 @@ class LRTIE(InfoExtractor):          formats = []          for js in re.findall(r'(?s)config:\s*(\{.*?\})', webpage): -            data = json.loads(js_to_json(js)) +            data = self._parse_json(js, video_id, transform_source=js_to_json) +            if 'provider' not in data: +                continue              if data['provider'] == 'rtmp':                  formats.append({                      'format_id': 'rtmp', diff --git a/youtube_dl/extractor/normalboots.py b/youtube_dl/extractor/normalboots.py index 3d35b11ac..c13ff0d65 100644 --- a/youtube_dl/extractor/normalboots.py +++ b/youtube_dl/extractor/normalboots.py @@ -22,7 +22,11 @@ class NormalbootsIE(InfoExtractor):              'description': 'Jon is late for Christmas. Typical. Thanks to: Paul Ritchey for Co-Writing/Filming: http://www.youtube.com/user/ContinueShow Michael Azzi for Christmas Intro Animation: http://michafrar.tumblr.com/ Jerrod Waters for Christmas Intro Music: http://www.youtube.com/user/xXJerryTerryXx Casey Ormond for ‘Tense Battle Theme’:\xa0http://www.youtube.com/Kiamet/',              'uploader': 'JonTron',              'upload_date': '20140125', -        } +        }, +        'params': { +            # rtmp download +            'skip_download': True, +        },      }      def _real_extract(self, url): diff --git a/youtube_dl/extractor/play44.py b/youtube_dl/extractor/play44.py deleted file mode 100644 index b8696e516..000000000 --- a/youtube_dl/extractor/play44.py +++ /dev/null @@ -1,149 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( -    compat_urllib_parse -) - - -class Play44IE(InfoExtractor): -    _VALID_URL = r'http://[w.]*play44\.net/embed\.php[^/]*/(?P<id>.+)' - -    _TESTS = [{ -        'url': 'http://play44.net/embed.php?w=600&h=438&vid=M/mahou-shoujo-madoka-magica-07.flv', -        'md5': 'e37e99d665f503dd2db952f7c4dba9e6', -        'info_dict': { -            'id': 'mahou-shoujo-madoka-magica-07', -            'ext': 'flv', -            'title': 'mahou-shoujo-madoka-magica-07', -        } -    }] - -    def _real_extract(self, url): -        video_id = self._match_id(url) -        page = self._download_webpage(url, video_id) - -        video_url = compat_urllib_parse.unquote(self._html_search_regex( -            r'_url = "(https?://[^"]+?)";', page, 'url')) -        title = self._search_regex(r'.*/(?P<title>[^.]*).', video_url, 'title') - -        return { -            'id': title, -            'url': video_url, -            'title': title, -        } - - -class ByZooIE(Play44IE): -    _VALID_URL = r'http://[w.]*byzoo\.org/embed\.php[^/]*/(?P<id>.+)' - -    _TESTS = [{ -        'url': 'http://byzoo.org/embed.php?w=600&h=438&vid=at/nw/mahou_shoujo_madoka_magica_movie_3_-_part1.mp4', -        'md5': '455c83dabe2cd9fd74a87612b01fe017', -        'info_dict': { -            'id': 'mahou_shoujo_madoka_magica_movie_3_-_part1', -            'ext': 'mp4', -            'title': 'mahou_shoujo_madoka_magica_movie_3_-_part1', -        } -    }] - - -class Video44IE(Play44IE): -    _VALID_URL = r'http://[w.]*video44\.net/.*file=(?P<id>[^&].).*' - -    _TESTS = [{ -        'url': 'http://www.video44.net/gogo/?w=600&h=438&file=chaoshead-12.flv&sv=1', -        'md5': '43eaec6d0beb10e8d42459b9f108aff3', -        'info_dict': { -            'id': 'chaoshead-12', -            'ext': 'mp4', -            'title': 'chaoshead-12', -        } -    }] - - -class VideoWingIE(Play44IE): -    _VALID_URL = r'''(?x) -        http://[w.]*videowing\.[^/]*/ -        (?: -            .*video=/* -            |embed/ -        ) -        (?P<id>[^&?.]+) -    ''' - -    _TESTS = [{ -        'url': 'http://videowing.me/embed?w=718&h=438&video=ongoing/boku_wa_tomodachi_ga_sukunai_-_05.mp4', -        'md5': '4ed320e353ed26c742c4f12a9c210b60', -        'info_dict': { -            'id': 'boku_wa_tomodachi_ga_sukunai_-_05', -            'ext': 'mp4', -            'title': 'boku_wa_tomodachi_ga_sukunai_-_05', -        } -    }, { -        'url': 'http://videowing.me/embed/a8d6a39522df066bd734a69f2334497e?w=600&h=438', -        'md5': '33fdd71581357018c226f95c5cedcfd7', -        'info_dict': { -            'id': 'mahoushoujomadokamagicamovie1part1', -            'ext': 'flv', -            'title': 'mahoushoujomadokamagicamovie1part1', -        } -    }] - - -class PlayPandaIE(Play44IE): -    _VALID_URL = r'http://[w.]*playpanda\.[^/]*/.*vid=/*(?P<id>[^&].).*' - -    _TESTS = [{ -        'url': 'http://playpanda.net/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4', -        'md5': '4ed320e353ed26c742c4f12a9c210b60', -        'info_dict': { -            'id': 'boku_wa_tomodachi_ga_sukunai_-_05', -            'ext': 'mp4', -            'title': 'boku_wa_tomodachi_ga_sukunai_-_05', -            'description': 'boku_wa_tomodachi_ga_sukunai_-_05' -        } -    }] - - -class VideoZooIE(Play44IE): -    _VALID_URL = r'http://[w.]*videozoo\.[^/]*/.*vid=/*(?P<id>[^&].).*' - -    _TESTS = [{ -        'url': 'http://videozoo.me/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4', -        'md5': '4ed320e353ed26c742c4f12a9c210b60', -        'info_dict': { -            'id': 'boku_wa_tomodachi_ga_sukunai_-_05', -            'ext': 'mp4', -            'title': 'boku_wa_tomodachi_ga_sukunai_-_05', -        } -    }] - - -class PlayBBIE(Play44IE): -    _VALID_URL = r'http://[w.]*playbb\.[^/]*/.*vid=/*(?P<id>[^&].).*' - -    _TESTS = [{ -        'url': 'http://playbb.me/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4', -        'md5': '4ed320e353ed26c742c4f12a9c210b60', -        'info_dict': { -            'id': 'boku_wa_tomodachi_ga_sukunai_-_05', -            'ext': 'mp4', -            'title': 'boku_wa_tomodachi_ga_sukunai_-_05', -        } -    }] - - -class EasyVideoIE(Play44IE): -    _VALID_URL = r'http://[w.]*easyvideo\.[^/]*/.*file=/*(?P<id>[^&.]+)' - -    _TESTS = [{ -        'url': 'http://easyvideo.me/gogo/?w=718&h=438&file=bokuwatomodachigasukunai-04.flv&sv=1', -        'md5': '26178b57629b7650106d72b191137176', -        'info_dict': { -            'id': 'bokuwatomodachigasukunai-04', -            'ext': 'mp4', -            'title': 'bokuwatomodachigasukunai-04', -        }, -        'skip': 'Blocked in Germany', -    }] diff --git a/youtube_dl/extractor/rtlnl.py b/youtube_dl/extractor/rtlnl.py index d029b0ec5..a3ca79f2c 100644 --- a/youtube_dl/extractor/rtlnl.py +++ b/youtube_dl/extractor/rtlnl.py @@ -8,7 +8,7 @@ from ..utils import parse_duration  class RtlXlIE(InfoExtractor):      IE_NAME = 'rtlxl.nl' -    _VALID_URL = r'https?://www\.rtlxl\.nl/#!/[^/]+/(?P<uuid>[^/?]+)' +    _VALID_URL = r'https?://(www\.)?rtlxl\.nl/#!/[^/]+/(?P<uuid>[^/?]+)'      _TEST = {          'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/6e4203a6-0a5e-3596-8424-c599a59e0677', diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index 944177426..10b3b706a 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -13,7 +13,7 @@ from ..compat import (  class TEDIE(SubtitlesInfoExtractor):      _VALID_URL = r'''(?x)          (?P<proto>https?://) -        (?P<type>www|embed)(?P<urlmain>\.ted\.com/ +        (?P<type>www|embed(?:-ssl)?)(?P<urlmain>\.ted\.com/          (              (?P<type_playlist>playlists(?:/\d+)?) # We have a playlist              | @@ -98,7 +98,7 @@ class TEDIE(SubtitlesInfoExtractor):      def _real_extract(self, url):          m = re.match(self._VALID_URL, url, re.VERBOSE) -        if m.group('type') == 'embed': +        if m.group('type').startswith('embed'):              desktop_url = m.group('proto') + 'www' + m.group('urlmain')              return self.url_result(desktop_url, 'TED')          name = m.group('name') diff --git a/youtube_dl/extractor/tf1.py b/youtube_dl/extractor/tf1.py index 07cc81226..025d0877c 100644 --- a/youtube_dl/extractor/tf1.py +++ b/youtube_dl/extractor/tf1.py @@ -1,8 +1,6 @@  # coding: utf-8  from __future__ import unicode_literals -import re -  from .common import InfoExtractor diff --git a/youtube_dl/extractor/videofun.py b/youtube_dl/extractor/videofun.py deleted file mode 100644 index 0364b9d32..000000000 --- a/youtube_dl/extractor/videofun.py +++ /dev/null @@ -1,36 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( -    compat_urllib_parse -) - - -class VideoFunIE(InfoExtractor): -    _VALID_URL = r'http://[w.]*videofun\.me/embed/(?P<id>[0-9a-f]+)' - -    _TEST = { -        'url': 'http://videofun.me/embed/8267659be070860af600fee7deadbcdb?w=600&h=438', -        'md5': 'e37e99d665f503dd2db952f7c4dba9e6', -        'info_dict': { -            'id': 'Mahou-Shoujo-Madoka-Magica-07', -            'ext': 'flv', -            'title': 'Mahou-Shoujo-Madoka-Magica-07', -        } -    } - -    def _real_extract(self, url): -        video_id = self._match_id(url) -        webpage = self._download_webpage( -            url, video_id, 'Downloading video page') - -        video_url_encoded = self._html_search_regex( -            r'url: "(http://gateway\.videofun\.me[^"]+)"', webpage, 'video url') -        video_url = compat_urllib_parse.unquote(video_url_encoded) -        title = self._html_search_regex(r'.*/([^.]*)\.', video_url, 'title') - -        return { -            'id': title, -            'url': video_url, -            'title': title, -        } diff --git a/youtube_dl/extractor/vimple.py b/youtube_dl/extractor/vimple.py index 33d370e1c..ee3d86117 100644 --- a/youtube_dl/extractor/vimple.py +++ b/youtube_dl/extractor/vimple.py @@ -14,28 +14,17 @@ class VimpleIE(InfoExtractor):      IE_DESC = 'Vimple.ru'      _VALID_URL = r'https?://(player.vimple.ru/iframe|vimple.ru)/(?P<id>[a-f0-9]{10,})'      _TESTS = [ -        # Quality: Large, from iframe          { -            'url': 'http://player.vimple.ru/iframe/b132bdfd71b546d3972f9ab9a25f201c', +            'url': 'http://vimple.ru/c0f6b1687dcd4000a97ebe70068039cf', +            'md5': '2e750a330ed211d3fd41821c6ad9a279',              'info_dict': { -                'id': 'b132bdfd71b546d3972f9ab9a25f201c', -                'title': 'great-escape-minecraft.flv', +                'id': 'c0f6b1687dcd4000a97ebe70068039cf',                  'ext': 'mp4', -                'duration': 352, -                'webpage_url': 'http://vimple.ru/b132bdfd71b546d3972f9ab9a25f201c', +                'title': 'Sunset', +                'duration': 20, +                'thumbnail': 're:https?://.*?\.jpg',              },          }, -        # Quality: Medium, from mainpage -        { -            'url': 'http://vimple.ru/a15950562888453b8e6f9572dc8600cd', -            'info_dict': { -                'id': 'a15950562888453b8e6f9572dc8600cd', -                'title': 'DB 01', -                'ext': 'flv', -                'duration': 1484, -                'webpage_url': 'http://vimple.ru/a15950562888453b8e6f9572dc8600cd', -            } -        },      ]      def _real_extract(self, url): diff --git a/youtube_dl/extractor/webofstories.py b/youtube_dl/extractor/webofstories.py new file mode 100644 index 000000000..396cf4e83 --- /dev/null +++ b/youtube_dl/extractor/webofstories.py @@ -0,0 +1,102 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import int_or_none + + +class WebOfStoriesIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?webofstories\.com/play/(?:[^/]+/)?(?P<id>[0-9]+)' +    _VIDEO_DOMAIN = 'http://eu-mobile.webofstories.com/' +    _GREAT_LIFE_STREAMER = 'rtmp://eu-cdn1.webofstories.com/cfx/st/' +    _USER_STREAMER = 'rtmp://eu-users.webofstories.com/cfx/st/' +    _TESTS = [ +        { +            'url': 'http://www.webofstories.com/play/hans.bethe/71', +            'md5': '373e4dd915f60cfe3116322642ddf364', +            'info_dict': { +                'id': '4536', +                'ext': 'mp4', +                'title': 'The temperature of the sun', +                'thumbnail': 're:^https?://.*\.jpg$', +                'description': 'Hans Bethe talks about calculating the temperature of the sun', +                'duration': 238, +            } +        }, +        { +            'url': 'http://www.webofstories.com/play/55908', +            'md5': '2985a698e1fe3211022422c4b5ed962c', +            'info_dict': { +                'id': '55908', +                'ext': 'mp4', +                'title': 'The story of Gemmata obscuriglobus', +                'thumbnail': 're:^https?://.*\.jpg$', +                'description': 'Planctomycete talks about The story of Gemmata obscuriglobus', +                'duration': 169, +            } +        }, +    ] + +    def _real_extract(self, url): +        video_id = self._match_id(url) + +        webpage = self._download_webpage(url, video_id) +        title = self._og_search_title(webpage) +        description = self._html_search_meta('description', webpage) +        thumbnail = self._og_search_thumbnail(webpage) + +        story_filename = self._search_regex( +            r'\.storyFileName\("([^"]+)"\)', webpage, 'story filename') +        speaker_id = self._search_regex( +            r'\.speakerId\("([^"]+)"\)', webpage, 'speaker ID') +        story_id = self._search_regex( +            r'\.storyId\((\d+)\)', webpage, 'story ID') +        speaker_type = self._search_regex( +            r'\.speakerType\("([^"]+)"\)', webpage, 'speaker type') +        great_life = self._search_regex( +            r'isGreatLifeStory\s*=\s*(true|false)', webpage, 'great life story') +        is_great_life_series = great_life == 'true' +        duration = int_or_none(self._search_regex( +            r'\.duration\((\d+)\)', webpage, 'duration', fatal=False)) + +        # URL building, see: http://www.webofstories.com/scripts/player.js +        ms_prefix = '' +        if speaker_type.lower() == 'ms': +            ms_prefix = 'mini_sites/' + +        if is_great_life_series: +            mp4_url = '{0:}lives/{1:}/{2:}.mp4'.format( +                self._VIDEO_DOMAIN, speaker_id, story_filename) +            rtmp_ext = 'flv' +            streamer = self._GREAT_LIFE_STREAMER +            play_path = 'stories/{0:}/{1:}'.format( +                speaker_id, story_filename) +        else: +            mp4_url = '{0:}{1:}{2:}/{3:}.mp4'.format( +                self._VIDEO_DOMAIN, ms_prefix, speaker_id, story_filename) +            rtmp_ext = 'mp4' +            streamer = self._USER_STREAMER +            play_path = 'mp4:{0:}{1:}/{2}.mp4'.format( +                ms_prefix, speaker_id, story_filename) + +        formats = [{ +            'format_id': 'mp4_sd', +            'url': mp4_url, +        }, { +            'format_id': 'rtmp_sd', +            'page_url': url, +            'url': streamer, +            'ext': rtmp_ext, +            'play_path': play_path, +        }] + +        self._sort_formats(formats) + +        return { +            'id': story_id, +            'title': title, +            'formats': formats, +            'thumbnail': thumbnail, +            'description': description, +            'duration': duration, +        } diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index e9bf39a00..d1bbf0b01 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1206,9 +1206,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):          if playlist_id.startswith('RD'):              # Mixes require a custom extraction process              return self._extract_mix(playlist_id) -        if playlist_id.startswith('TL'): -            raise ExtractorError('For downloading YouTube.com top lists, use ' -                                 'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)          url = self._TEMPLATE_URL % playlist_id          page = self._download_webpage(url, playlist_id) @@ -1254,49 +1251,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):          return self.playlist_result(url_results, playlist_id, playlist_title) -class YoutubeTopListIE(YoutubePlaylistIE): -    IE_NAME = 'youtube:toplist' -    IE_DESC = ('YouTube.com top lists, "yttoplist:{channel}:{list title}"' -               ' (Example: "yttoplist:music:Top Tracks")') -    _VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$' -    _TESTS = [{ -        'url': 'yttoplist:music:Trending', -        'playlist_mincount': 5, -        'skip': 'Only works for logged-in users', -    }] - -    def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        channel = mobj.group('chann') -        title = mobj.group('title') -        query = compat_urllib_parse.urlencode({'title': title}) -        channel_page = self._download_webpage( -            'https://www.youtube.com/%s' % channel, title) -        link = self._html_search_regex( -            r'''(?x) -                <a\s+href="([^"]+)".*?>\s* -                <span\s+class="branded-page-module-title-text">\s* -                <span[^>]*>.*?%s.*?</span>''' % re.escape(query), -            channel_page, 'list') -        url = compat_urlparse.urljoin('https://www.youtube.com/', link) - -        video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"' -        ids = [] -        # sometimes the webpage doesn't contain the videos -        # retry until we get them -        for i in itertools.count(0): -            msg = 'Downloading Youtube mix' -            if i > 0: -                msg += ', retry #%d' % i - -            webpage = self._download_webpage(url, title, msg) -            ids = orderedSet(re.findall(video_re, webpage)) -            if ids: -                break -        url_results = self._ids_to_results(ids) -        return self.playlist_result(url_results, playlist_title=title) - -  class YoutubeChannelIE(InfoExtractor):      IE_DESC = 'YouTube.com channels'      _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)' diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 048525efc..473536dcc 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -520,7 +520,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor):  class FFmpegMergerPP(FFmpegPostProcessor):      def run(self, info):          filename = info['filepath'] -        args = ['-c', 'copy', '-map', '0:v:0', '-map', '1:a:0', '-shortest'] +        args = ['-c', 'copy', '-map', '0:v:0', '-map', '1:a:0']          self._downloader.to_screen('[ffmpeg] Merging formats into "%s"' % filename)          self.run_ffmpeg_multiple_files(info['__files_to_merge'], filename, args)          return True, info diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index bdfe053a7..d4951c406 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1560,4 +1560,3 @@ def urlhandle_detect_ext(url_handle):          getheader = url_handle.info().getheader      return getheader('Content-Type').split("/")[1] - | 
