diff options
| author | Philipp Hagemeister <phihag@phihag.de> | 2013-12-20 17:05:28 +0100 | 
|---|---|---|
| committer | Philipp Hagemeister <phihag@phihag.de> | 2013-12-20 17:05:39 +0100 | 
| commit | aa94a6d3159af8333b56d16f3ed0bc3a164a882a (patch) | |
| tree | 34c838d89031442e68d941bd7f25e93098ea1ec0 | |
| parent | 768df745385a283f4df3a38ee4734feec518ec87 (diff) | |
[aparat] Add support (Fixes #2012)
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/aparat.py | 56 | ||||
| -rw-r--r-- | youtube_dl/extractor/common.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/generic.py | 15 | ||||
| -rw-r--r-- | youtube_dl/utils.py | 5 | 
5 files changed, 73 insertions, 6 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 7f2f8806e..7de9d594a 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -1,6 +1,7 @@  from .academicearth import AcademicEarthCourseIE  from .addanime import AddAnimeIE  from .anitube import AnitubeIE +from .aparat import AparatIE  from .appletrailers import AppleTrailersIE  from .archiveorg import ArchiveOrgIE  from .ard import ARDIE diff --git a/youtube_dl/extractor/aparat.py b/youtube_dl/extractor/aparat.py new file mode 100644 index 000000000..7e93bc4df --- /dev/null +++ b/youtube_dl/extractor/aparat.py @@ -0,0 +1,56 @@ +#coding: utf-8 + +import re + +from .common import InfoExtractor +from ..utils import ( +    ExtractorError, +    HEADRequest, +) + + +class AparatIE(InfoExtractor): +    _VALID_URL = r'^https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)' + +    _TEST = { +        u'url': u'http://www.aparat.com/v/wP8On', +        u'file': u'wP8On.mp4', +        u'md5': u'6714e0af7e0d875c5a39c4dc4ab46ad1', +        u'info_dict': { +            u"title": u"تیم گلکسی 11 - زومیت", +        }, +        #u'skip': u'Extremely unreliable', +    } + +    def _real_extract(self, url): +        m = re.match(self._VALID_URL, url) +        video_id = m.group('id') + +        # Note: There is an easier-to-parse configuration at +        # http://www.aparat.com/video/video/config/videohash/%video_id +        # but the URL in there does not work +        embed_url = (u'http://www.aparat.com/video/video/embed/videohash/' + +                     video_id + u'/vt/frame') +        webpage = self._download_webpage(embed_url, video_id) + +        video_urls = re.findall(r'fileList\[[0-9]+\]\s*=\s*"([^"]+)"', webpage) +        for i, video_url in enumerate(video_urls): +            req = HEADRequest(video_url) +            res = self._request_webpage( +                req, video_id, note=u'Testing video URL %d' % i, errnote=False) +            if res: +                break +        else: +            raise ExtractorError(u'No working video URLs found') + +        title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, u'title') +        thumbnail = self._search_regex( +            r'\s+image:\s*"([^"]+)"', webpage, u'thumbnail', fatal=False) + +        return { +            'id': video_id, +            'title': title, +            'url': video_url, +            'ext': 'mp4', +            'thumbnail': thumbnail, +        } diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index f89df57ed..ba46a7bc7 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -170,6 +170,8 @@ class InfoExtractor(object):          try:              return self._downloader.urlopen(url_or_request)          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: +            if errnote is False: +                return False              if errnote is None:                  errnote = u'Unable to download webpage'              errmsg = u'%s: %s' % (errnote, compat_str(err)) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 75cb96eb7..bdb4f58d6 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -11,6 +11,7 @@ from ..utils import (      compat_urlparse,      ExtractorError, +    HEADRequest,      smuggle_url,      unescapeHTML,      unified_strdate, @@ -109,21 +110,18 @@ class GenericIE(InfoExtractor):      def _send_head(self, url):          """Check if it is a redirect, like url shorteners, in case return the new url.""" -        class HeadRequest(compat_urllib_request.Request): -            def get_method(self): -                return "HEAD"          class HEADRedirectHandler(compat_urllib_request.HTTPRedirectHandler):              """              Subclass the HTTPRedirectHandler to make it use our -            HeadRequest also on the redirected URL +            HEADRequest also on the redirected URL              """              def redirect_request(self, req, fp, code, msg, headers, newurl):                  if code in (301, 302, 303, 307):                      newurl = newurl.replace(' ', '%20')                      newheaders = dict((k,v) for k,v in req.headers.items()                                        if k.lower() not in ("content-length", "content-type")) -                    return HeadRequest(newurl, +                    return HEADRequest(newurl,                                         headers=newheaders,                                         origin_req_host=req.get_origin_req_host(),                                         unverifiable=True) @@ -152,7 +150,7 @@ class GenericIE(InfoExtractor):                          compat_urllib_request.HTTPErrorProcessor, compat_urllib_request.HTTPSHandler]:              opener.add_handler(handler()) -        response = opener.open(HeadRequest(url)) +        response = opener.open(HEADRequest(url))          if response is None:              raise ExtractorError(u'Invalid URL protocol')          return response @@ -296,6 +294,11 @@ class GenericIE(InfoExtractor):          if mobj is not None:              return OoyalaIE._build_url_result(mobj.group(1)) +        # Look for Aparat videos +        mobj = re.search(r'<iframe src="(http://www.aparat.com/video/[^"]+)"', webpage) +        if mobj is not None: +            return self.url_result(mobj.group(1), 'Aparat') +          # Start with something easy: JW Player in SWFObject          mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)          if mobj is None: diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index cc391bddd..2e48f187e 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1093,3 +1093,8 @@ def remove_start(s, start):  def url_basename(url):      path = compat_urlparse.urlparse(url).path      return path.strip(u'/').split(u'/')[-1] + + +class HEADRequest(compat_urllib_request.Request): +    def get_method(self): +        return "HEAD"  | 
