diff options
Diffstat (limited to 'youtube_dl/extractor')
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/naver.py | 34 | ||||
| -rw-r--r-- | youtube_dl/extractor/tagesschau.py | 79 | ||||
| -rw-r--r-- | youtube_dl/extractor/teachingchannel.py | 33 | ||||
| -rw-r--r-- | youtube_dl/extractor/xvideos.py | 17 | ||||
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 7 | 
6 files changed, 150 insertions, 22 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 2ad1db555..a19e85543 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -266,10 +266,12 @@ from .streamcz import StreamCZIE  from .swrmediathek import SWRMediathekIE  from .syfy import SyfyIE  from .sztvhu import SztvHuIE +from .tagesschau import TagesschauIE  from .teachertube import (      TeacherTubeIE,      TeacherTubeClassroomIE,  ) +from .teachingchannel import TeachingChannelIE  from .teamcoco import TeamcocoIE  from .techtalks import TechTalksIE  from .ted import TEDIE diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py index 4cab30631..c0231c197 100644 --- a/youtube_dl/extractor/naver.py +++ b/youtube_dl/extractor/naver.py @@ -1,4 +1,6 @@  # encoding: utf-8 +from __future__ import unicode_literals +  import re  from .common import InfoExtractor @@ -12,12 +14,13 @@ class NaverIE(InfoExtractor):      _VALID_URL = r'https?://(?:m\.)?tvcast\.naver\.com/v/(?P<id>\d+)'      _TEST = { -        u'url': u'http://tvcast.naver.com/v/81652', -        u'file': u'81652.mp4', -        u'info_dict': { -            u'title': u'[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번', -            u'description': u'합격불변의 법칙 메가스터디 | 메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.', -            u'upload_date': u'20130903', +        'url': 'http://tvcast.naver.com/v/81652', +        'info_dict': { +            'id': '81652', +            'ext': 'mp4', +            'title': '[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번', +            'description': '합격불변의 법칙 메가스터디 | 메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.', +            'upload_date': '20130903',          },      } @@ -28,7 +31,7 @@ class NaverIE(InfoExtractor):          m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"',              webpage)          if m_id is None: -            raise ExtractorError(u'couldn\'t extract vid and key') +            raise ExtractorError('couldn\'t extract vid and key')          vid = m_id.group(1)          key = m_id.group(2)          query = compat_urllib_parse.urlencode({'vid': vid, 'inKey': key,}) @@ -39,22 +42,27 @@ class NaverIE(InfoExtractor):          })          info = self._download_xml(              'http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?' + query, -            video_id, u'Downloading video info') +            video_id, 'Downloading video info')          urls = self._download_xml(              'http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?' + query_urls, -            video_id, u'Downloading video formats info') +            video_id, 'Downloading video formats info')          formats = []          for format_el in urls.findall('EncodingOptions/EncodingOption'):              domain = format_el.find('Domain').text -            if domain.startswith('rtmp'): -                continue -            formats.append({ +            f = {                  'url': domain + format_el.find('uri').text,                  'ext': 'mp4',                  'width': int(format_el.find('width').text),                  'height': int(format_el.find('height').text), -            }) +            } +            if domain.startswith('rtmp'): +                f.update({ +                    'ext': 'flv', +                    'rtmp_protocol': '1', # rtmpt +                }) +            formats.append(f) +        self._sort_formats(formats)          return {              'id': video_id, diff --git a/youtube_dl/extractor/tagesschau.py b/youtube_dl/extractor/tagesschau.py new file mode 100644 index 000000000..36331529e --- /dev/null +++ b/youtube_dl/extractor/tagesschau.py @@ -0,0 +1,79 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class TagesschauIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/video/video(?P<id>-?[0-9]+)\.html' + +    _TESTS = [{ +        'url': 'http://www.tagesschau.de/multimedia/video/video1399128.html', +        'md5': 'bcdeac2194fb296d599ce7929dfa4009', +        'info_dict': { +            'id': '1399128', +            'ext': 'mp4', +            'title': 'Harald Range, Generalbundesanwalt, zu den Ermittlungen', +            'description': 'md5:69da3c61275b426426d711bde96463ab', +            'thumbnail': 're:^http:.*\.jpg$', +        }, +    }, { +        'url': 'http://www.tagesschau.de/multimedia/video/video-196.html', +        'md5': '8aaa8bf3ae1ca2652309718c03019128', +        'info_dict': { +            'id': '196', +            'ext': 'mp4', +            'title': 'Ukraine-Konflikt: Klitschko in Kiew als Bürgermeister vereidigt', +            'description': 'md5:f22e4af75821d174fa6c977349682691', +            'thumbnail': 're:http://.*\.jpg', +        }, +    }] + +    _FORMATS = { +        's': {'width': 256, 'height': 144, 'quality': 1}, +        'm': {'width': 512, 'height': 288, 'quality': 2}, +        'l': {'width': 960, 'height': 544, 'quality': 3}, +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') + +        if video_id.startswith('-'): +            display_id = video_id.strip('-') +        else: +            display_id = video_id + +        webpage = self._download_webpage(url, display_id) + +        playerpage = self._download_webpage( +            'http://www.tagesschau.de/multimedia/video/video%s~player_autoplay-true.html' % video_id, +            display_id, 'Downloading player page') + +        medias = re.findall( +            r'"(http://media.+?)", type:"video/(.+?)", quality:"(.+?)"', +            playerpage) + +        formats = [] +        for url, ext, res in medias: +            f = { +                'format_id': res + '_' + ext, +                'url': url, +                'ext': ext, +            } +            f.update(self._FORMATS.get(res, {})) +            formats.append(f) + +        self._sort_formats(formats) + +        thumbnail = re.findall(r'"(/multimedia/.+?\.jpg)"', playerpage)[-1] + +        return { +            'id': display_id, +            'title': self._og_search_title(webpage).strip(), +            'thumbnail': 'http://www.tagesschau.de' + thumbnail, +            'formats': formats, +            'description': self._og_search_description(webpage).strip(), +        } diff --git a/youtube_dl/extractor/teachingchannel.py b/youtube_dl/extractor/teachingchannel.py new file mode 100644 index 000000000..117afa9bf --- /dev/null +++ b/youtube_dl/extractor/teachingchannel.py @@ -0,0 +1,33 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from .ooyala import OoyalaIE + + +class TeachingChannelIE(InfoExtractor): +    _VALID_URL = r'https?://www\.teachingchannel\.org/videos/(?P<title>.+)' + +    _TEST = { +        'url': 'https://www.teachingchannel.org/videos/teacher-teaming-evolution', +        'info_dict': { +            'id': 'F3bnlzbToeI6pLEfRyrlfooIILUjz4nM', +            'ext': 'mp4', +            'title': 'A History of Teaming', +            'description': 'md5:2a9033db8da81f2edffa4c99888140b3', +        }, +        'params': { +            # m3u8 download +            'skip_download': True, +        }, +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        title = mobj.group('title') +        webpage = self._download_webpage(url, title) +        ooyala_code = self._search_regex( +            r'data-embed-code=\'(.+?)\'', webpage, 'ooyala code') + +        return OoyalaIE._build_url_result(ooyala_code) diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py index 85e99e1b0..7e0044824 100644 --- a/youtube_dl/extractor/xvideos.py +++ b/youtube_dl/extractor/xvideos.py @@ -5,18 +5,21 @@ import re  from .common import InfoExtractor  from ..utils import (      compat_urllib_parse, +    ExtractorError, +    clean_html,  )  class XVideosIE(InfoExtractor):      _VALID_URL = r'^(?:https?://)?(?:www\.)?xvideos\.com/video([0-9]+)(?:.*)'      _TEST = { -        'url': 'http://www.xvideos.com/video939581/funny_porns_by_s_-1', -        'file': '939581.flv', -        'md5': '1d0c835822f0a71a7bf011855db929d0', +        'url': 'http://www.xvideos.com/video4588838/biker_takes_his_girl', +        'md5': '4b46ae6ea5e6e9086e714d883313c0c9',          'info_dict': { -            "title": "Funny Porns By >>>>S<<<<<< -1", -            "age_limit": 18, +            'id': '4588838', +            'ext': 'flv', +            'title': 'Biker Takes his Girl', +            'age_limit': 18,          }      } @@ -28,6 +31,10 @@ class XVideosIE(InfoExtractor):          self.report_extraction(video_id) +        mobj = re.search(r'<h1 class="inlineError">(.+?)</h1>', webpage) +        if mobj: +            raise ExtractorError('%s said: %s' % (self.IE_NAME, clean_html(mobj.group(1))), expected=True) +          # Extract video URL          video_url = compat_urllib_parse.unquote(              self._search_regex(r'flv_url=(.+?)&', webpage, 'video URL')) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 8327fb146..7c50881c4 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -223,6 +223,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):          '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},          '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},          '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, +        '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},          # Dash webm audio          '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 48, 'preference': -50}, @@ -1414,11 +1415,9 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):          title_span = (search_title('playlist-title') or              search_title('title long-title') or search_title('title'))          title = clean_html(title_span) -        video_re = r'''(?x)data-video-username="(.*?)".*? +        video_re = r'''(?x)data-video-username=".*?".*?                         href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id) -        matches = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL)) -        # Some of the videos may have been deleted, their username field is empty -        ids = [video_id for (username, video_id) in matches if username] +        ids = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL))          url_results = self._ids_to_results(ids)          return self.playlist_result(url_results, playlist_id, title)  | 
