diff options
| author | Sergey M․ <dstftw@gmail.com> | 2018-03-22 22:55:28 +0700 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2018-03-22 23:07:31 +0700 | 
| commit | 8b7340a45eb0e3aeaa996896ff8690b6c3a32af6 (patch) | |
| tree | 5bd61864c3b72d6b0aca69ce8da3d28c79c81e29 /youtube_dl | |
| parent | 1d4a0520ba8b5fe4748ea74e1e6dda4e11f1e2f7 (diff) | |
[lenta] Add extractor (closes #15953)
Diffstat (limited to 'youtube_dl')
| -rw-r--r-- | youtube_dl/extractor/extractors.py | 5 | ||||
| -rw-r--r-- | youtube_dl/extractor/generic.py | 18 | ||||
| -rw-r--r-- | youtube_dl/extractor/lenta.py | 53 | 
3 files changed, 56 insertions, 20 deletions
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 3bde40eb3..de48a37ad 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -532,13 +532,14 @@ from .lcp import (  )  from .learnr import LearnrIE  from .lecture2go import Lecture2GoIE -from .lego import LEGOIE -from .lemonde import LemondeIE  from .leeco import (      LeIE,      LePlaylistIE,      LetvCloudIE,  ) +from .lego import LEGOIE +from .lemonde import LemondeIE +from .lenta import LentaIE  from .libraryofcongress import LibraryOfCongressIE  from .libsyn import LibsynIE  from .lifenews import ( diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 1cc491b19..cf64398e3 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1270,24 +1270,6 @@ class GenericIE(InfoExtractor):              },              'add_ie': ['Kaltura'],          }, -        # EaglePlatform embed (generic URL) -        { -            'url': 'http://lenta.ru/news/2015/03/06/navalny/', -            # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used -            'info_dict': { -                'id': '227304', -                'ext': 'mp4', -                'title': 'Навальный вышел на свободу', -                'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5', -                'thumbnail': r're:^https?://.*\.jpg$', -                'duration': 87, -                'view_count': int, -                'age_limit': 0, -            }, -            'params': { -                'skip_download': True, -            }, -        },          # referrer protected EaglePlatform embed          {              'url': 'https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/', diff --git a/youtube_dl/extractor/lenta.py b/youtube_dl/extractor/lenta.py new file mode 100644 index 000000000..2ebd4e577 --- /dev/null +++ b/youtube_dl/extractor/lenta.py @@ -0,0 +1,53 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class LentaIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?lenta\.ru/[^/]+/\d+/\d+/\d+/(?P<id>[^/?#&]+)' +    _TESTS = [{ +        'url': 'https://lenta.ru/news/2018/03/22/savshenko_go/', +        'info_dict': { +            'id': '964400', +            'ext': 'mp4', +            'title': 'Надежду Савченко задержали', +            'thumbnail': r're:^https?://.*\.jpg$', +            'duration': 61, +            'view_count': int, +        }, +        'params': { +            'skip_download': True, +        }, +    }, { +        # EaglePlatform iframe embed +        'url': 'http://lenta.ru/news/2015/03/06/navalny/', +        'info_dict': { +            'id': '227304', +            'ext': 'mp4', +            'title': 'Навальный вышел на свободу', +            'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5', +            'thumbnail': r're:^https?://.*\.jpg$', +            'duration': 87, +            'view_count': int, +            'age_limit': 0, +        }, +        'params': { +            'skip_download': True, +        }, +    }] + +    def _real_extract(self, url): +        display_id = self._match_id(url) + +        webpage = self._download_webpage(url, display_id) + +        video_id = self._search_regex( +            r'vid\s*:\s*["\']?(\d+)', webpage, 'eagleplatform id', +            default=None) +        if video_id: +            return self.url_result( +                'eagleplatform:lentaru.media.eagleplatform.com:%s' % video_id, +                ie='EaglePlatform', video_id=video_id) + +        return self.url_result(url, ie='Generic')  | 
