diff options
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/common.py | 28 | ||||
| -rw-r--r-- | youtube_dl/extractor/toutv.py | 75 | ||||
| -rw-r--r-- | youtube_dl/utils.py | 2 | 
4 files changed, 106 insertions, 0 deletions
| diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 2679d1a8f..b0df1cef7 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -133,6 +133,7 @@ from .techtalks import TechTalksIE  from .ted import TEDIE  from .tf1 import TF1IE  from .thisav import ThisAVIE +from .toutv import TouTvIE  from .traileraddict import TrailerAddictIE  from .trilulilu import TriluliluIE  from .tube8 import Tube8IE diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index f787d0a3c..eb3435c77 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -350,6 +350,17 @@ class InfoExtractor(object):          if secure: regexes = self._og_regexes('video:secure_url') + regexes          return self._html_search_regex(regexes, html, name, **kargs) +    def _html_search_meta(self, name, html, display_name=None): +        if display_name is None: +            display_name = name +        return self._html_search_regex( +            r'''(?ix)<meta(?=[^>]+(?:name|property)=["\']%s["\']) +                    [^>]+content=["\']([^"\']+)["\']''' % re.escape(name), +            html, display_name, fatal=False) + +    def _dc_search_uploader(self, html): +        return self._html_search_meta('dc.creator', html, 'uploader') +      def _rta_search(self, html):          # See http://www.rtalabel.org/index.php?content=howtofaq#single          if re.search(r'(?ix)<meta\s+name="rating"\s+' @@ -358,6 +369,23 @@ class InfoExtractor(object):              return 18          return 0 +    def _media_rating_search(self, html): +        # See http://www.tjg-designs.com/WP/metadata-code-examples-adding-metadata-to-your-web-pages/ +        rating = self._html_search_meta('rating', html) + +        if not rating: +            return None + +        RATING_TABLE = { +            'safe for kids': 0, +            'general': 8, +            '14 years': 14, +            'mature': 17, +            'restricted': 19, +        } +        return RATING_TABLE.get(rating.lower(), None) + +  class SearchInfoExtractor(InfoExtractor):      """ diff --git a/youtube_dl/extractor/toutv.py b/youtube_dl/extractor/toutv.py new file mode 100644 index 000000000..73ea67da9 --- /dev/null +++ b/youtube_dl/extractor/toutv.py @@ -0,0 +1,75 @@ +# coding: utf-8 +import re +import xml.etree.ElementTree + +from .common import InfoExtractor +from ..utils import ( +    ExtractorError, +    unified_strdate, +) + + +class TouTvIE(InfoExtractor): +    IE_NAME = u'tou.tv' +    _VALID_URL = r'https?://www\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/(?P<episode>S[0-9]+E[0-9]+)))' + +    _TEST = { +        u'url': u'http://www.tou.tv/30-vies/S04E41', +        u'file': u'30-vies_S04E41.mp4', +        u'info_dict': { +            u'title': u'30 vies Saison 4 / Épisode 41', +            u'description': u'md5:da363002db82ccbe4dafeb9cab039b09', +            u'age_limit': 8, +            u'uploader': u'Groupe des Nouveaux Médias', +            u'duration': 1296, +            u'upload_date': u'20131118', +            u'thumbnail': u'http://static.tou.tv/medias/images/2013-11-18_19_00_00_30VIES_0341_01_L.jpeg', +        }, +        u'params': { +            u'skip_download': True,  # Requires rtmpdump +        }, +        u'xskip': 'Only available in Canada' +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') +        webpage = self._download_webpage(url, video_id) + +        mediaId = self._search_regex( +            r'"idMedia":\s*"([^"]+)"', webpage, u'media ID') + +        # TODO test from de +        streams_url = u'http://release.theplatform.com/content.select?pid=' + mediaId +        streams_webpage = self._download_webpage( +            streams_url, video_id, note=u'Downloading stream list') + +        streams_doc = xml.etree.ElementTree.fromstring( +            streams_webpage.encode('utf-8')) +        video_url = next(n.text +                         for n in streams_doc.findall('.//choice/url') +                         if u'//ad.doubleclick' not in n.text) +        if video_url.endswith('/Unavailable.flv'): +            raise ExtractorError( +                u'Access to this video is blocked from outside of Canada', +                expected=True) + +        duration_str = self._html_search_meta( +            'video:duration', webpage, u'duration') +        duration = int(duration_str) if duration_str else None +        upload_date_str = self._html_search_meta( +            'video:release_date', webpage, u'upload date') +        upload_date = unified_strdate(upload_date_str) if upload_date_str else None + +        return { +            'id': video_id, +            'title': self._og_search_title(webpage), +            'url': video_url, +            'description': self._og_search_description(webpage), +            'uploader': self._dc_search_uploader(webpage), +            'thumbnail': self._og_search_thumbnail(webpage), +            'age_limit': self._media_rating_search(webpage), +            'duration': duration, +            'upload_date': upload_date, +            'ext': 'mp4', +        } diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 1d9785341..b50c8166f 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -734,6 +734,8 @@ def unified_strdate(date_str):          '%Y/%m/%d %H:%M:%S',          '%d.%m.%Y %H:%M',          '%Y-%m-%dT%H:%M:%SZ', +        '%Y-%m-%dT%H:%M:%S.%fZ', +        '%Y-%m-%dT%H:%M:%S.%f0Z',          '%Y-%m-%dT%H:%M:%S',      ]      for expression in format_expressions: | 
