diff options
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/empflix.py | 22 | ||||
| -rw-r--r-- | youtube_dl/extractor/gamekings.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/pornhub.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/streamcz.py | 22 | ||||
| -rw-r--r-- | youtube_dl/extractor/swrmediathek.py | 104 | 
6 files changed, 139 insertions, 14 deletions
| diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 3503c76b7..4a4eb4b74 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -260,6 +260,7 @@ from .stanfordoc import StanfordOpenClassroomIE  from .steam import SteamIE  from .streamcloud import StreamcloudIE  from .streamcz import StreamCZIE +from .swrmediathek import SWRMediathekIE  from .syfy import SyfyIE  from .sztvhu import SztvHuIE  from .teamcoco import TeamcocoIE diff --git a/youtube_dl/extractor/empflix.py b/youtube_dl/extractor/empflix.py index eaeee5a51..e6952588f 100644 --- a/youtube_dl/extractor/empflix.py +++ b/youtube_dl/extractor/empflix.py @@ -3,20 +3,18 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor -from ..utils import ( -    ExtractorError, -)  class EmpflixIE(InfoExtractor):      _VALID_URL = r'^https?://www\.empflix\.com/videos/.*?-(?P<id>[0-9]+)\.html'      _TEST = {          'url': 'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html', -        'md5': '5e5cc160f38ca9857f318eb97146e13e', +        'md5': 'b1bc15b6412d33902d6e5952035fcabc',          'info_dict': {              'id': '33051', -            'ext': 'flv', +            'ext': 'mp4',              'title': 'Amateur Finger Fuck', +            'description': 'Amateur solo finger fucking.',              'age_limit': 18,          }      } @@ -30,6 +28,8 @@ class EmpflixIE(InfoExtractor):          video_title = self._html_search_regex(              r'name="title" value="(?P<title>[^"]*)"', webpage, 'title') +        video_description = self._html_search_regex( +            r'name="description" value="([^"]*)"', webpage, 'description', fatal=False)          cfg_url = self._html_search_regex(              r'flashvars\.config = escape\("([^"]+)"', @@ -37,12 +37,18 @@ class EmpflixIE(InfoExtractor):          cfg_xml = self._download_xml(              cfg_url, video_id, note='Downloading metadata') -        video_url = cfg_xml.find('videoLink').text + +        formats = [ +            { +                'url': item.find('videoLink').text, +                'format_id': item.find('res').text, +            } for item in cfg_xml.findall('./quality/item') +        ]          return {              'id': video_id, -            'url': video_url, -            'ext': 'flv',              'title': video_title, +            'description': video_description, +            'formats': formats,              'age_limit': age_limit,          } diff --git a/youtube_dl/extractor/gamekings.py b/youtube_dl/extractor/gamekings.py index 233398966..11fee3d31 100644 --- a/youtube_dl/extractor/gamekings.py +++ b/youtube_dl/extractor/gamekings.py @@ -15,7 +15,7 @@ class GamekingsIE(InfoExtractor):              'id': '20130811',              'ext': 'mp4',              'title': 'Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review', -            'description': 'md5:632e61a9f97d700e83f43d77ddafb6a4', +            'description': 'md5:36fd701e57e8c15ac8682a2374c99731',          }      } diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 7dd3dca0d..4118ee956 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -45,7 +45,7 @@ class PornHubIE(InfoExtractor):          video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title')          video_uploader = self._html_search_regex( -            r'(?s)<div class="video-info-row">\s*From: .+?<(?:a href="/users/|<span class="username)[^>]+>(.+?)<', +            r'(?s)From: .+?<(?:a href="/users/|<span class="username)[^>]+>(.+?)<',              webpage, 'uploader', fatal=False)          thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False)          if thumbnail: diff --git a/youtube_dl/extractor/streamcz.py b/youtube_dl/extractor/streamcz.py index 7362904db..73efe9542 100644 --- a/youtube_dl/extractor/streamcz.py +++ b/youtube_dl/extractor/streamcz.py @@ -5,13 +5,16 @@ import re  import json  from .common import InfoExtractor -from ..utils import int_or_none +from ..utils import ( +    int_or_none, +    compat_str, +)  class StreamCZIE(InfoExtractor):      _VALID_URL = r'https?://(?:www\.)?stream\.cz/.+/(?P<videoid>.+)' -    _TEST = { +    _TESTS = [{          'url': 'http://www.stream.cz/peklonataliri/765767-ecka-pro-deti',          'md5': '6d3ca61a8d0633c9c542b92fcb936b0c',          'info_dict': { @@ -22,7 +25,18 @@ class StreamCZIE(InfoExtractor):              'thumbnail': 'http://im.stream.cz/episode/52961d7e19d423f8f06f0100',              'duration': 256,          }, -    } +    }, { +        'url': 'http://www.stream.cz/blanik/10002447-tri-roky-pro-mazanka', +        'md5': '246272e753e26bbace7fcd9deca0650c', +        'info_dict': { +            'id': '10002447', +            'ext': 'mp4', +            'title': 'Kancelář Blaník: Tři roky pro Mazánka', +            'description': 'md5:9177695a8b756a0a8ab160de4043b392', +            'thumbnail': 'http://im.stream.cz/episode/537f838c50c11f8d21320000', +            'duration': 368, +        }, +    }]      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url) @@ -57,7 +71,7 @@ class StreamCZIE(InfoExtractor):          self._sort_formats(formats)          return { -            'id': str(jsonData['id']), +            'id': compat_str(jsonData['episode_id']),              'title': self._og_search_title(webpage),              'thumbnail': jsonData['episode_image_original_url'].replace('//', 'http://'),              'formats': formats, diff --git a/youtube_dl/extractor/swrmediathek.py b/youtube_dl/extractor/swrmediathek.py new file mode 100644 index 000000000..6c688c520 --- /dev/null +++ b/youtube_dl/extractor/swrmediathek.py @@ -0,0 +1,104 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import parse_duration + + +class SWRMediathekIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?swrmediathek\.de/player\.htm\?show=(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' + +    _TESTS = [{ +        'url': 'http://swrmediathek.de/player.htm?show=849790d0-dab8-11e3-a953-0026b975f2e6', +        'md5': '8c5f6f0172753368547ca8413a7768ac', +        'info_dict': { +            'id': '849790d0-dab8-11e3-a953-0026b975f2e6', +            'ext': 'mp4', +            'title': 'SWR odysso', +            'description': 'md5:2012e31baad36162e97ce9eb3f157b8a', +            'thumbnail': 're:^http:.*\.jpg$', +            'duration': 2602, +            'upload_date': '20140515', +            'uploader': 'SWR Fernsehen', +            'uploader_id': '990030', +        }, +    }, { +        'url': 'http://swrmediathek.de/player.htm?show=0e1a8510-ddf2-11e3-9be3-0026b975f2e6', +        'md5': 'b10ab854f912eecc5a6b55cd6fc1f545', +        'info_dict': { +            'id': '0e1a8510-ddf2-11e3-9be3-0026b975f2e6', +            'ext': 'mp4', +            'title': 'Nachtcafé - Alltagsdroge Alkohol - zwischen Sektempfang und Komasaufen', +            'description': 'md5:e0a3adc17e47db2c23aab9ebc36dbee2', +            'thumbnail': 're:http://.*\.jpg', +            'duration': 5305, +            'upload_date': '20140516', +            'uploader': 'SWR Fernsehen', +            'uploader_id': '990030', +        }, +    }, { +        'url': 'http://swrmediathek.de/player.htm?show=bba23e10-cb93-11e3-bf7f-0026b975f2e6', +        'md5': '4382e4ef2c9d7ce6852535fa867a0dd3', +        'info_dict': { +            'id': 'bba23e10-cb93-11e3-bf7f-0026b975f2e6', +            'ext': 'mp3', +            'title': 'Saša Stanišic: Vor dem Fest', +            'description': 'md5:5b792387dc3fbb171eb709060654e8c9', +            'thumbnail': 're:http://.*\.jpg', +            'duration': 3366, +            'upload_date': '20140520', +            'uploader': 'SWR 2', +            'uploader_id': '284670', +        } +    }] + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') + +        video = self._download_json( +            'http://swrmediathek.de/AjaxEntry?ekey=%s' % video_id, video_id, 'Downloading video JSON') + +        attr = video['attr'] +        media_type = attr['entry_etype'] + +        formats = [] +        for entry in video['sub']: +            if entry['name'] != 'entry_media': +                continue + +            entry_attr = entry['attr'] +            codec = entry_attr['val0'] +            quality = int(entry_attr['val1']) + +            fmt = { +                'url': entry_attr['val2'], +                'quality': quality, +            } + +            if media_type == 'Video': +                fmt.update({ +                    'format_note': ['144p', '288p', '544p'][quality-1], +                    'vcodec': codec, +                }) +            elif media_type == 'Audio': +                fmt.update({ +                    'acodec': codec, +                }) +            formats.append(fmt) + +        self._sort_formats(formats) + +        return { +            'id': video_id, +            'title': attr['entry_title'], +            'description': attr['entry_descl'], +            'thumbnail': attr['entry_image_16_9'], +            'duration': parse_duration(attr['entry_durat']), +            'upload_date': attr['entry_pdatet'][:-4], +            'uploader': attr['channel_title'], +            'uploader_id': attr['channel_idkey'], +            'formats': formats, +        }
\ No newline at end of file | 
