diff options
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/embedly.py | 16 | ||||
| -rw-r--r-- | youtube_dl/extractor/patreon.py | 26 | 
3 files changed, 40 insertions, 3 deletions
| diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index c712b907f..84a7edffc 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -121,6 +121,7 @@ from .ellentv import (      EllenTVClipsIE,  )  from .elpais import ElPaisIE +from .embedly import EmbedlyIE  from .empflix import EMPFlixIE  from .engadget import EngadgetIE  from .eporner import EpornerIE diff --git a/youtube_dl/extractor/embedly.py b/youtube_dl/extractor/embedly.py new file mode 100644 index 000000000..1cdb11e34 --- /dev/null +++ b/youtube_dl/extractor/embedly.py @@ -0,0 +1,16 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_urllib_parse_unquote + + +class EmbedlyIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www|cdn\.)?embedly\.com/widgets/media\.html\?(?:[^#]*?&)?url=(?P<id>[^#&]+)' +    _TESTS = [{ +        'url': 'https://cdn.embedly.com/widgets/media.html?src=http%3A%2F%2Fwww.youtube.com%2Fembed%2Fvideoseries%3Flist%3DUUGLim4T2loE5rwCMdpCIPVg&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DSU4fj_aEMVw%26list%3DUUGLim4T2loE5rwCMdpCIPVg&image=http%3A%2F%2Fi.ytimg.com%2Fvi%2FSU4fj_aEMVw%2Fhqdefault.jpg&key=8ee8a2e6a8cc47aab1a5ee67f9a178e0&type=text%2Fhtml&schema=youtube&autoplay=1', +        'only_matching': True, +    }] + +    def _real_extract(self, url): +        return self.url_result(compat_urllib_parse_unquote(self._match_id(url))) diff --git a/youtube_dl/extractor/patreon.py b/youtube_dl/extractor/patreon.py index e5c9822c5..f179ea200 100644 --- a/youtube_dl/extractor/patreon.py +++ b/youtube_dl/extractor/patreon.py @@ -1,9 +1,6 @@  # encoding: utf-8  from __future__ import unicode_literals -import json -import re -  from .common import InfoExtractor  from ..utils import (      js_to_json, @@ -35,6 +32,23 @@ class PatreonIE(InfoExtractor):                  'thumbnail': 're:^https?://.*$',              },          }, +        { +            'url': 'https://www.patreon.com/creation?hid=1682498', +            'info_dict': { +                'id': 'SU4fj_aEMVw', +                'ext': 'mp4', +                'title': 'I\'m on Patreon!', +                'uploader': 'TraciJHines', +                'thumbnail': 're:^https?://.*$', +                'upload_date': '20150211', +                'description': 'md5:c5a706b1f687817a3de09db1eb93acd4', +                'uploader_id': 'TraciJHines', +            }, +            'params': { +                'noplaylist': True, +                'skip_download': True, +            } +        }      ]      # Currently Patreon exposes download URL via hidden CSS, so login is not @@ -72,11 +86,17 @@ class PatreonIE(InfoExtractor):          attach_fn = self._html_search_regex(              r'<div class="attach"><a target="_blank" href="([^"]+)">',              webpage, 'attachment URL', default=None) +        embed = self._html_search_regex( +            r'<div id="watchCreation">\s*<iframe class="embedly-embed" src="([^"]+)"', +            webpage, 'embedded URL', default=None) +          if attach_fn is not None:              video_url = 'http://www.patreon.com' + attach_fn              thumbnail = self._og_search_thumbnail(webpage)              uploader = self._html_search_regex(                  r'<strong>(.*?)</strong> is creating', webpage, 'uploader') +        elif embed is not None: +            return self.url_result(embed)          else:              playlist = self._parse_json(self._search_regex(                  r'(?s)new\s+jPlayerPlaylist\(\s*\{\s*[^}]*},\s*(\[.*?,?\s*\])', | 
