diff options
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/yesjapan.py | 62 | 
2 files changed, 63 insertions, 0 deletions
| diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 3ae7a8a52..caf8c04f1 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -510,6 +510,7 @@ from .yahoo import (      YahooIE,      YahooSearchIE,  ) +from .yesjapan import YesJapanIE  from .ynet import YnetIE  from .youjizz import YouJizzIE  from .youku import YoukuIE diff --git a/youtube_dl/extractor/yesjapan.py b/youtube_dl/extractor/yesjapan.py new file mode 100644 index 000000000..fec1ad1ac --- /dev/null +++ b/youtube_dl/extractor/yesjapan.py @@ -0,0 +1,62 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( +    HEADRequest, +    get_element_by_attribute, +    parse_iso8601, +) + + +class YesJapanIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?yesjapan\.com/video/(?P<slug>[A-Za-z0-9\-]*)_(?P<id>[A-Za-z0-9]+)\.html' +    _TEST = { +        'url': 'http://www.yesjapan.com/video/japanese-in-5-20-wa-and-ga-particle-usages_726497834.html', +        'md5': 'f0be416314e5be21a12b499b330c21cf', +        'info_dict': { +            'id': '726497834', +            'title': 'Japanese in 5! #20 - WA And GA Particle Usages', +            'description': 'This should clear up some issues most students of Japanese encounter with WA and GA....', +            'ext': 'mp4', +            'timestamp': 1416391590, +            'upload_date': '20141119', +            'thumbnail': 're:^https?://.*\.jpg$', +        } +    } + +    def _real_extract(self, url): +        video_id = self._match_id(url) + +        webpage = self._download_webpage(url, video_id) +        title = self._og_search_title(webpage) +        video_url = self._og_search_video_url(webpage) +        description = self._og_search_description(webpage) +        thumbnail = self._og_search_thumbnail(webpage) + +        timestamp = None +        submit_info = get_element_by_attribute('class', 'pm-submit-data', webpage) +        if submit_info: +            timestamp = parse_iso8601(self._search_regex( +                r'datetime="([^"]+)"', webpage, 'upload date', fatal=False, default=None)) + +        # attempt to resolve the final URL in order to get a proper extension +        redirect_req = HEADRequest(video_url) +        req = self._request_webpage( +            redirect_req, video_id, note='Resolving final URL', errnote='Could not resolve final URL', fatal=False) +        if req: +            video_url = req.geturl() + +        formats = [{ +            'format_id': 'sd', +            'url': video_url, +        }] + +        return { +            'id': video_id, +            'title': title, +            'formats': formats, +            'description': description, +            'timestamp': timestamp, +            'thumbnail': thumbnail, +        } | 
