diff options
30 files changed, 925 insertions, 340 deletions
@@ -126,3 +126,4 @@ Matthias Küch Julian Richen Ping O. Mister Hat +Peter Ding @@ -168,7 +168,7 @@ which means you can modify it, redistribute it or use it however you like. --no-progress Do not print progress bar --console-title Display progress in console titlebar -v, --verbose Print various debugging information - --dump-pages Print downloaded pages to debug problems (very verbose) + --dump-pages Print downloaded pages encoded using base64 to debug problems (very verbose) --write-pages Write downloaded intermediary pages to files in the current directory to debug problems --print-traffic Display sent and read HTTP traffic -C, --call-home Contact the youtube-dl server for debugging @@ -220,7 +220,7 @@ which means you can modify it, redistribute it or use it however you like. --embed-thumbnail Embed thumbnail in the audio as cover art --add-metadata Write metadata to the video file --metadata-from-title FORMAT Parse additional metadata like song title / artist from the video title. The format syntax is the same as --output, the parsed - parameters replace existing values. Additional templates: %(album), %(artist). Example: --metadata-from-title "%(artist)s - + parameters replace existing values. Additional templates: %(album)s, %(artist)s. Example: --metadata-from-title "%(artist)s - %(title)s" matches a title like "Coldplay - Paradise" --xattrs Write metadata to the video file's xattrs (using dublin core and xdg standards) --fixup POLICY Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn(the default; diff --git a/docs/supportedsites.md b/docs/supportedsites.md index a4879bd9a..a421ae62b 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -26,8 +26,7 @@ - **anitube.se** - **AnySex** - **Aparat** - - **AppleDailyAnimationNews** - - **AppleDailyRealtimeNews** + - **AppleDaily** - **AppleTrailers** - **archive.org**: archive.org videos - **ARD** @@ -152,7 +151,6 @@ - **fc2** - **fernsehkritik.tv** - **fernsehkritik.tv:postecke** - - **Firedrive** - **Firstpost** - **Flickr** - **Folketinget**: Folketinget (ft.dk; Danish parliament) @@ -230,6 +228,7 @@ - **KanalPlay**: Kanal 5/9/11 Play - **Kankan** - **Karaoketv** + - **KarriereVideos** - **keek** - **KeezMovies** - **KhanAcademy** @@ -322,6 +321,7 @@ - **NosVideo** - **novamov**: NovaMov - **Nowness** + - **NowTV** - **nowvideo**: NowVideo - **npo.nl** - **npo.nl:live** @@ -393,7 +393,6 @@ - **Rte** - **rtl.nl**: rtl.nl and rtlxl.nl - **RTL2** - - **RTLnow** - **RTP** - **RTS**: RTS.ch - **rtve.es:alacarta**: RTVE a la carta @@ -431,7 +430,6 @@ - **smotri:community**: Smotri.com community videos - **smotri:user**: Smotri.com user videos - **Snotr** - - **Sockshare** - **Sohu** - **soundcloud** - **soundcloud:playlist** @@ -564,6 +562,7 @@ - **vier:videos** - **Viewster** - **viki** + - **viki:channel** - **vimeo** - **vimeo:album** - **vimeo:channel** diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index d1953c18f..aa6ec9d9a 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -49,6 +49,7 @@ from .utils import ( ExtractorError, format_bytes, formatSeconds, + HEADRequest, locked_file, make_HTTPS_handler, MaxDownloadsReached, @@ -923,8 +924,9 @@ class YoutubeDL(object): if f.get('vcodec') != 'none' and f.get('acodec') != 'none'] if audiovideo_formats: return audiovideo_formats[format_idx] - # for audio only urls, select the best/worst audio format - elif all(f.get('acodec') != 'none' for f in available_formats): + # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format + elif (all(f.get('acodec') != 'none' for f in available_formats) or + all(f.get('vcodec') != 'none' for f in available_formats)): return available_formats[format_idx] elif format_spec == 'bestaudio': audio_formats = [ @@ -1047,6 +1049,8 @@ class YoutubeDL(object): if not formats: raise ExtractorError('No video formats found!') + formats_dict = {} + # We check that all the formats have the format and format_id fields for i, format in enumerate(formats): if 'url' not in format: @@ -1054,6 +1058,18 @@ class YoutubeDL(object): if format.get('format_id') is None: format['format_id'] = compat_str(i) + format_id = format['format_id'] + if format_id not in formats_dict: + formats_dict[format_id] = [] + formats_dict[format_id].append(format) + + # Make sure all formats have unique format_id + for format_id, ambiguous_formats in formats_dict.items(): + if len(ambiguous_formats) > 1: + for i, format in enumerate(ambiguous_formats): + format['format_id'] = '%s-%d' % (format_id, i) + + for i, format in enumerate(formats): if format.get('format') is None: format['format'] = '{id} - {res}{note}'.format( id=format['format_id'], @@ -1706,7 +1722,8 @@ class YoutubeDL(object): if req_is_string: req = url_escaped else: - req = compat_urllib_request.Request( + req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request + req = req_type( url_escaped, data=req.data, headers=req.headers, origin_req_host=req.origin_req_host, unverifiable=req.unverifiable) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 85c1b1a3a..be464271a 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -353,8 +353,10 @@ from .ninegag import NineGagIE from .noco import NocoIE from .normalboots import NormalbootsIE from .nosvideo import NosVideoIE +from .nova import NovaIE from .novamov import NovaMovIE from .nowness import NownessIE +from .nowtv import NowTVIE from .nowvideo import NowVideoIE from .npo import ( NPOIE, @@ -401,6 +403,7 @@ from .playfm import PlayFMIE from .playvid import PlayvidIE from .playwire import PlaywireIE from .podomatic import PodomaticIE +from .porn91 import Porn91IE from .pornhd import PornHdIE from .pornhub import ( PornHubIE, @@ -438,7 +441,6 @@ from .roxwel import RoxwelIE from .rtbf import RTBFIE from .rte import RteIE from .rtlnl import RtlNlIE -from .rtlnow import RTLnowIE from .rtl2 import RTL2IE from .rtp import RTPIE from .rts import RTSIE @@ -481,6 +483,10 @@ from .smotri import ( ) from .snotr import SnotrIE from .sohu import SohuIE +from .soompi import ( + SoompiIE, + SoompiShowIE, +) from .soundcloud import ( SoundcloudIE, SoundcloudSetIE, @@ -566,6 +572,7 @@ from .traileraddict import TrailerAddictIE from .trilulilu import TriluliluIE from .trutube import TruTubeIE from .tube8 import Tube8IE +from .tubitv import TubiTvIE from .tudou import TudouIE from .tumblr import TumblrIE from .tunein import TuneInIE diff --git a/youtube_dl/extractor/aftonbladet.py b/youtube_dl/extractor/aftonbladet.py index a117502bc..e0518cf26 100644 --- a/youtube_dl/extractor/aftonbladet.py +++ b/youtube_dl/extractor/aftonbladet.py @@ -6,11 +6,11 @@ from ..utils import int_or_none class AftonbladetIE(InfoExtractor): - _VALID_URL = r'http://tv\.aftonbladet\.se/webbtv.+?(?P<id>article[0-9]+)\.ab(?:$|[?#])' + _VALID_URL = r'http://tv\.aftonbladet\.se/abtv/articles/(?P<id>[0-9]+)' _TEST = { - 'url': 'http://tv.aftonbladet.se/webbtv/nyheter/vetenskap/rymden/article36015.ab', + 'url': 'http://tv.aftonbladet.se/abtv/articles/36015', 'info_dict': { - 'id': 'article36015', + 'id': '36015', 'ext': 'mp4', 'title': 'Vulkanutbrott i rymden - nu släpper NASA bilderna', 'description': 'Jupiters måne mest aktiv av alla himlakroppar', @@ -25,8 +25,9 @@ class AftonbladetIE(InfoExtractor): # find internal video meta data meta_url = 'http://aftonbladet-play.drlib.aptoma.no/video/%s.json' - internal_meta_id = self._html_search_regex( - r'data-aptomaId="([\w\d]+)"', webpage, 'internal_meta_id') + player_config = self._parse_json(self._html_search_regex( + r'data-player-config="([^"]+)"', webpage, 'player config'), video_id) + internal_meta_id = player_config['videoId'] internal_meta_url = meta_url % internal_meta_id internal_meta_json = self._download_json( internal_meta_url, video_id, 'Downloading video meta data') diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 1c77df47e..41f0c736d 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -76,8 +76,8 @@ class CrunchyrollIE(InfoExtractor): self._login() def _decrypt_subtitles(self, data, iv, id): - data = bytes_to_intlist(data) - iv = bytes_to_intlist(iv) + data = bytes_to_intlist(base64.b64decode(data.encode('utf-8'))) + iv = bytes_to_intlist(base64.b64decode(iv.encode('utf-8'))) id = int(id) def obfuscate_key_aux(count, modulo, start): @@ -179,6 +179,16 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text return output + def _extract_subtitles(self, subtitle): + sub_root = xml.etree.ElementTree.fromstring(subtitle) + return [{ + 'ext': 'srt', + 'data': self._convert_subtitles_to_srt(sub_root), + }, { + 'ext': 'ass', + 'data': self._convert_subtitles_to_ass(sub_root), + }] + def _get_subtitles(self, video_id, webpage): subtitles = {} for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage): @@ -190,25 +200,11 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False) if not id or not iv or not data: continue - id = int(id) - iv = base64.b64decode(iv) - data = base64.b64decode(data) - subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8') lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False) if not lang_code: continue - sub_root = xml.etree.ElementTree.fromstring(subtitle) - subtitles[lang_code] = [ - { - 'ext': 'srt', - 'data': self._convert_subtitles_to_srt(sub_root), - }, - { - 'ext': 'ass', - 'data': self._convert_subtitles_to_ass(sub_root), - }, - ] + subtitles[lang_code] = self._extract_subtitles(subtitle) return subtitles def _real_extract(self, url): diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index e8d682716..82dc27bc6 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -152,7 +152,7 @@ class FacebookIE(InfoExtractor): raise ExtractorError('Cannot find video formats') video_title = self._html_search_regex( - r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, 'title', + r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>([^<]*)</h2>', webpage, 'title', default=None) if not video_title: video_title = self._html_search_regex( diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 9a7b0d25d..96ca398de 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -9,6 +9,8 @@ from .common import InfoExtractor from .youtube import YoutubeIE from ..compat import ( compat_urllib_parse, + compat_urllib_parse_unquote, + compat_urllib_request, compat_urlparse, compat_xml_parse_error, ) @@ -46,6 +48,97 @@ class GenericIE(InfoExtractor): _VALID_URL = r'.*' IE_NAME = 'generic' _TESTS = [ + # Direct link to a video + { + 'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4', + 'md5': '67d406c2bcb6af27fa886f31aa934bbe', + 'info_dict': { + 'id': 'trailer', + 'ext': 'mp4', + 'title': 'trailer', + 'upload_date': '20100513', + } + }, + # Direct link to media delivered compressed (until Accept-Encoding is *) + { + 'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac', + 'md5': '128c42e68b13950268b648275386fc74', + 'info_dict': { + 'id': 'FictionJunction-Parallel_Hearts', + 'ext': 'flac', + 'title': 'FictionJunction-Parallel_Hearts', + 'upload_date': '20140522', + }, + 'expected_warnings': [ + 'URL could be a direct video link, returning it as such.' + ] + }, + # Direct download with broken HEAD + { + 'url': 'http://ai-radio.org:8000/radio.opus', + 'info_dict': { + 'id': 'radio', + 'ext': 'opus', + 'title': 'radio', + }, + 'params': { + 'skip_download': True, # infinite live stream + }, + 'expected_warnings': [ + r'501.*Not Implemented' + ], + }, + # Direct link with incorrect MIME type + { + 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm', + 'md5': '4ccbebe5f36706d85221f204d7eb5913', + 'info_dict': { + 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm', + 'id': '5_Lennart_Poettering_-_Systemd', + 'ext': 'webm', + 'title': '5_Lennart_Poettering_-_Systemd', + 'upload_date': '20141120', + }, + 'expected_warnings': [ + 'URL could be a direct video link, returning it as such.' + ] + }, + # RSS feed + { + 'url': 'http://phihag.de/2014/youtube-dl/rss2.xml', + 'info_dict': { + 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml', + 'title': 'Zero Punctuation', + 'description': 're:.*groundbreaking video review series.*' + }, + 'playlist_mincount': 11, + }, + # RSS feed with enclosure + { + 'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml', + 'info_dict': { + 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624', + 'ext': 'm4v', + 'upload_date': '20150228', + 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624', + } + }, + # google redirect + { + 'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE', + 'info_dict': { + 'id': 'cmQHVoWB5FY', + 'ext': 'mp4', + 'upload_date': '20130224', + 'uploader_id': 'TheVerge', + 'description': 're:^Chris Ziegler takes a look at the\.*', + 'uploader': 'The Verge', + 'title': 'First Firefox OS phones side-by-side', + }, + 'params': { + 'skip_download': False, + } + }, { 'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html', 'md5': '85b90ccc9d73b4acd9138d3af4c27f89', @@ -125,17 +218,6 @@ class GenericIE(InfoExtractor): 'skip_download': True, # m3u8 download }, }, - # Direct link to a video - { - 'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4', - 'md5': '67d406c2bcb6af27fa886f31aa934bbe', - 'info_dict': { - 'id': 'trailer', - 'ext': 'mp4', - 'title': 'trailer', - 'upload_date': '20100513', - } - }, # ooyala video { 'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219', @@ -160,22 +242,6 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['Ooyala'], }, - # google redirect - { - 'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE', - 'info_dict': { - 'id': 'cmQHVoWB5FY', - 'ext': 'mp4', - 'upload_date': '20130224', - 'uploader_id': 'TheVerge', - 'description': 're:^Chris Ziegler takes a look at the\.*', - 'uploader': 'The Verge', - 'title': 'First Firefox OS phones side-by-side', - }, - 'params': { - 'skip_download': False, - } - }, # embed.ly video { 'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/', @@ -407,16 +473,6 @@ class GenericIE(InfoExtractor): 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com', } }, - # RSS feed - { - 'url': 'http://phihag.de/2014/youtube-dl/rss2.xml', - 'info_dict': { - 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml', - 'title': 'Zero Punctuation', - 'description': 're:.*groundbreaking video review series.*' - }, - 'playlist_mincount': 11, - }, # Multiple brightcove videos # https://github.com/rg3/youtube-dl/issues/2283 { @@ -470,21 +526,6 @@ class GenericIE(InfoExtractor): 'uploader': 'thoughtworks.wistia.com', }, }, - # Direct download with broken HEAD - { - 'url': 'http://ai-radio.org:8000/radio.opus', - 'info_dict': { - 'id': 'radio', - 'ext': 'opus', - 'title': 'radio', - }, - 'params': { - 'skip_download': True, # infinite live stream - }, - 'expected_warnings': [ - r'501.*Not Implemented' - ], - }, # Soundcloud embed { 'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/', @@ -516,21 +557,6 @@ class GenericIE(InfoExtractor): }, 'playlist_mincount': 2, }, - # Direct link with incorrect MIME type - { - 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm', - 'md5': '4ccbebe5f36706d85221f204d7eb5913', - 'info_dict': { - 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm', - 'id': '5_Lennart_Poettering_-_Systemd', - 'ext': 'webm', - 'title': '5_Lennart_Poettering_-_Systemd', - 'upload_date': '20141120', - }, - 'expected_warnings': [ - 'URL could be a direct video link, returning it as such.' - ] - }, # Cinchcast embed { 'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/', @@ -689,16 +715,6 @@ class GenericIE(InfoExtractor): 'age_limit': 0, }, }, - # RSS feed with enclosure - { - 'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml', - 'info_dict': { - 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624', - 'ext': 'm4v', - 'upload_date': '20150228', - 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624', - } - }, # Crooks and Liars embed { 'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists', @@ -894,7 +910,7 @@ class GenericIE(InfoExtractor): force_videoid = smuggled_data['force_videoid'] video_id = force_videoid else: - video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0] + video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0]) self.to_screen('%s: Requesting header' % video_id) @@ -916,7 +932,9 @@ class GenericIE(InfoExtractor): full_response = None if head_response is False: - full_response = self._request_webpage(url, video_id) + request = compat_urllib_request.Request(url) + request.add_header('Accept-Encoding', '*') + full_response = self._request_webpage(request, video_id) head_response = full_response # Check for direct link to a video @@ -927,7 +945,7 @@ class GenericIE(InfoExtractor): head_response.headers.get('Last-Modified')) return { 'id': video_id, - 'title': os.path.splitext(url_basename(url))[0], + 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]), 'direct': True, 'formats': [{ 'format_id': m.group('format_id'), @@ -941,7 +959,17 @@ class GenericIE(InfoExtractor): self._downloader.report_warning('Falling back on generic information extractor.') if not full_response: - full_response = self._request_webpage(url, video_id) + request = compat_urllib_request.Request(url) + # Some webservers may serve compressed content of rather big size (e.g. gzipped flac) + # making it impossible to download only chunk of the file (yet we need only 512kB to + # test whether it's HTML or not). According to youtube-dl default Accept-Encoding + # that will always result in downloading the whole file that is not desirable. + # Therefore for extraction pass we have to override Accept-Encoding to any in order + # to accept raw bytes and being able to download only a chunk. + # It may probably better to solve this by checking Content-Type for application/octet-stream + # after HEAD request finishes, but not sure if we can rely on this. + request.add_header('Accept-Encoding', '*') + full_response = self._request_webpage(request, video_id) # Maybe it's a direct link to a video? # Be careful not to download the whole thing! @@ -953,7 +981,7 @@ class GenericIE(InfoExtractor): head_response.headers.get('Last-Modified')) return { 'id': video_id, - 'title': os.path.splitext(url_basename(url))[0], + 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]), 'direct': True, 'url': url, 'upload_date': upload_date, diff --git a/youtube_dl/extractor/imgur.py b/youtube_dl/extractor/imgur.py index fe5d95e2c..d692ea79a 100644 --- a/youtube_dl/extractor/imgur.py +++ b/youtube_dl/extractor/imgur.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_urlparse from ..utils import ( int_or_none, js_to_json, @@ -12,7 +13,7 @@ from ..utils import ( class ImgurIE(InfoExtractor): - _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?P<id>[a-zA-Z0-9]+)(?:\.mp4|\.gifv)?' + _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?P<id>[a-zA-Z0-9]+)' _TESTS = [{ 'url': 'https://i.imgur.com/A61SaA1.gifv', @@ -34,7 +35,8 @@ class ImgurIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage( + compat_urlparse.urljoin(url, video_id), video_id) width = int_or_none(self._search_regex( r'<param name="width" value="([0-9]+)"', diff --git a/youtube_dl/extractor/iprima.py b/youtube_dl/extractor/iprima.py index 8529bedfc..821c8ec10 100644 --- a/youtube_dl/extractor/iprima.py +++ b/youtube_dl/extractor/iprima.py @@ -11,11 +11,12 @@ from ..compat import ( ) from ..utils import ( ExtractorError, + remove_end, ) class IPrimaIE(InfoExtractor): - _VALID_URL = r'https?://play\.iprima\.cz/[^?#]+/(?P<id>[^?#]+)' + _VALID_URL = r'https?://play\.iprima\.cz/(?:[^/]+/)*(?P<id>[^?#]+)' _TESTS = [{ 'url': 'http://play.iprima.cz/particka/particka-92', @@ -23,7 +24,7 @@ class IPrimaIE(InfoExtractor): 'id': '39152', 'ext': 'flv', 'title': 'Partička (92)', - 'description': 'md5:3740fda51464da35a2d4d0670b8e4fd6', + 'description': 'md5:74e9617e51bca67c3ecfb2c6f9766f45', 'thumbnail': 'http://play.iprima.cz/sites/default/files/image_crops/image_620x349/3/491483_particka-92_image_620x349.jpg', }, 'params': { @@ -35,13 +36,14 @@ class IPrimaIE(InfoExtractor): 'id': '9718337', 'ext': 'flv', 'title': 'Tchibo Partička - Jarní móda', - 'description': 'md5:589f8f59f414220621ff8882eb3ce7be', 'thumbnail': 're:^http:.*\.jpg$', }, 'params': { 'skip_download': True, # requires rtmpdump }, - 'skip': 'Do not have permission to access this page', + }, { + 'url': 'http://play.iprima.cz/zpravy-ftv-prima-2752015', + 'only_matching': True, }] def _real_extract(self, url): @@ -102,8 +104,10 @@ class IPrimaIE(InfoExtractor): return { 'id': real_id, - 'title': self._og_search_title(webpage), + 'title': remove_end(self._og_search_title(webpage), ' | Prima PLAY'), 'thumbnail': self._og_search_thumbnail(webpage), 'formats': formats, - 'description': self._og_search_description(webpage), + 'description': self._search_regex( + r'<p[^>]+itemprop="description"[^>]*>([^<]+)', + webpage, 'description', default=None), } diff --git a/youtube_dl/extractor/nova.py b/youtube_dl/extractor/nova.py new file mode 100644 index 000000000..4e999b237 --- /dev/null +++ b/youtube_dl/extractor/nova.py @@ -0,0 +1,138 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class NovaIE(InfoExtractor): + IE_DESC = 'TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz' + _VALID_URL = 'http://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+)(?:\.html|/?)' + _TESTS = [{ + 'url': 'http://tvnoviny.nova.cz/clanek/novinky/co-na-sebe-sportaci-praskli-vime-jestli-pujde-hrdlicka-na-materskou.html', + 'info_dict': { + 'id': '1608920', + 'display_id': 'co-na-sebe-sportaci-praskli-vime-jestli-pujde-hrdlicka-na-materskou', + 'ext': 'flv', + 'title': 'Duel: Michal Hrdlička a Petr Suchoň', + 'description': 'md5:d0cc509858eee1b1374111c588c6f5d5', + 'thumbnail': 're:^https?://.*\.(?:jpg)', + }, + 'params': { + # rtmp download + 'skip_download': True, + } + }, { + 'url': 'http://tn.nova.cz/clanek/tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci.html', + 'md5': '1dd7b9d5ea27bc361f110cd855a19bd3', + 'info_dict': { + 'id': '1757139', + 'display_id': 'tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci', + 'ext': 'mp4', + 'title': 'Podzemní nemocnice v pražské Krči', + 'description': 'md5:f0a42dd239c26f61c28f19e62d20ef53', + 'thumbnail': 're:^https?://.*\.(?:jpg)', + } + }, { + 'url': 'http://novaplus.nova.cz/porad/policie-modrava/video/5591-policie-modrava-15-dil-blondynka-na-hrbitove/', + 'info_dict': { + 'id': '1756825', + 'display_id': '5591-policie-modrava-15-dil-blondynka-na-hrbitove', + 'ext': 'flv', + 'title': 'Policie Modrava - 15. díl - Blondýnka na hřbitově', + 'description': 'md5:d804ba6b30bc7da2705b1fea961bddfe', + 'thumbnail': 're:^https?://.*\.(?:jpg)', + }, + 'params': { + # rtmp download + 'skip_download': True, + } + }, { + 'url': 'http://sport.tn.nova.cz/clanek/sport/hokej/nhl/zivot-jde-dal-hodnotil-po-vyrazeni-z-playoff-jiri-sekac.html', + 'only_matching': True, + }, { + 'url': 'http://fanda.nova.cz/clanek/fun-and-games/krvavy-epos-zaklinac-3-divoky-hon-vychazi-vyhrajte-ho-pro-sebe.html', + 'only_matching': True, + }, { + 'url': 'http://doma.nova.cz/clanek/zdravi/prijdte-se-zapsat-do-registru-kostni-drene-jiz-ve-stredu-3-cervna.html', + 'only_matching': True, + }, { + 'url': 'http://prask.nova.cz/clanek/novinky/co-si-na-sobe-nase-hvezdy-nechaly-pojistit.html', + 'only_matching': True, + }, { + 'url': 'http://tv.nova.cz/clanek/novinky/zivot-je-zivot-bondovsky-trailer.html', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + display_id = mobj.group('id') + site = mobj.group('site') + + webpage = self._download_webpage(url, display_id) + + video_id = self._search_regex( + [r"(?:media|video_id)\s*:\s*'(\d+)'", + r'media=(\d+)', + r'id="article_video_(\d+)"', + r'id="player_(\d+)"'], + webpage, 'video id') + + config_url = self._search_regex( + r'src="(http://tn\.nova\.cz/bin/player/videojs/config\.php\?[^"]+)"', + webpage, 'config url', default=None) + + if not config_url: + DEFAULT_SITE_ID = '23000' + SITES = { + 'tvnoviny': DEFAULT_SITE_ID, + 'novaplus': DEFAULT_SITE_ID, + 'vymena': DEFAULT_SITE_ID, + 'krasna': DEFAULT_SITE_ID, + 'fanda': '30', + 'tn': '30', + 'doma': '30', + } + + site_id = self._search_regex( + r'site=(\d+)', webpage, 'site id', default=None) or SITES.get(site, DEFAULT_SITE_ID) + + config_url = ('http://tn.nova.cz/bin/player/videojs/config.php?site=%s&media=%s&jsVar=vjsconfig' + % (site_id, video_id)) + + config = self._download_json( + config_url, display_id, + 'Downloading config JSON', + transform_source=lambda s: re.sub(r'var\s+[\da-zA-Z_]+\s*=\s*({.+?});', r'\1', s)) + + mediafile = config['mediafile'] + video_url = mediafile['src'] + + m = re.search(r'^(?P<url>rtmpe?://[^/]+/(?P<app>[^/]+?))/&*(?P<playpath>.+)$', video_url) + if m: + formats = [{ + 'url': m.group('url'), + 'app': m.group('app'), + 'play_path': m.group('playpath'), + 'player_path': 'http://tvnoviny.nova.cz/static/shared/app/videojs/video-js.swf', + 'ext': 'flv', + }] + else: + formats = [{ + 'url': video_url, + }] + self._sort_formats(formats) + + title = mediafile.get('meta', {}).get('title') or self._og_search_title(webpage) + description = self._og_search_description(webpage) + thumbnail = config.get('poster') + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'formats': formats, + } diff --git a/youtube_dl/extractor/nowtv.py b/youtube_dl/extractor/nowtv.py new file mode 100644 index 000000000..173e46cd8 --- /dev/null +++ b/youtube_dl/extractor/nowtv.py @@ -0,0 +1,192 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + ExtractorError, + int_or_none, + parse_iso8601, + parse_duration, + remove_start, +) + + +class NowTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?nowtv\.de/(?P<station>rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<id>.+?)/player' + + _TESTS = [{ + # rtl + 'url': 'http://www.nowtv.de/rtl/bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit/player', + 'info_dict': { + 'id': '203519', + 'display_id': 'bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit', + 'ext': 'mp4', + 'title': 'Die neuen Bauern und eine Hochzeit', + 'description': 'md5:e234e1ed6d63cf06be5c070442612e7e', + 'thumbnail': 're:^https?://.*\.jpg$', + 'timestamp': 1432580700, + 'upload_date': '20150525', + 'duration': 2786, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + # rtl2 + 'url': 'http://www.nowtv.de/rtl2/berlin-tag-nacht/berlin-tag-nacht-folge-934/player', + 'info_dict': { + 'id': '203481', + 'display_id': 'berlin-tag-nacht/berlin-tag-nacht-folge-934', + 'ext': 'mp4', + 'title': 'Berlin - Tag & Nacht (Folge 934)', + 'description': 'md5:c85e88c2e36c552dfe63433bc9506dd0', + 'thumbnail': 're:^https?://.*\.jpg$', + 'timestamp': 1432666800, + 'upload_date': '20150526', + 'duration': 2641, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + # rtlnitro + 'url': 'http://www.nowtv.de/rtlnitro/alarm-fuer-cobra-11-die-autobahnpolizei/hals-und-beinbruch-2014-08-23-21-10-00/player', + 'info_dict': { + 'id': '165780', + 'display_id': 'alarm-fuer-cobra-11-die-autobahnpolizei/hals-und-beinbruch-2014-08-23-21-10-00', + 'ext': 'mp4', + 'title': 'Hals- und Beinbruch', + 'description': 'md5:b50d248efffe244e6f56737f0911ca57', + 'thumbnail': 're:^https?://.*\.jpg$', + 'timestamp': 1432415400, + 'upload_date': '20150523', + 'duration': 2742, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + # superrtl + 'url': 'http://www.nowtv.de/superrtl/medicopter-117/angst/player', + 'info_dict': { + 'id': '99205', + 'display_id': 'medicopter-117/angst', + 'ext': 'mp4', + 'title': 'Angst!', + 'description': 'md5:30cbc4c0b73ec98bcd73c9f2a8c17c4e', + 'thumbnail': 're:^https?://.*\.jpg$', + 'timestamp': 1222632900, + 'upload_date': '20080928', + 'duration': 3025, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + # ntv + 'url': 'http://www.nowtv.de/ntv/ratgeber-geld/thema-ua-der-erste-blick-die-apple-watch/player', + 'info_dict': { + 'id': '203521', + 'display_id': 'ratgeber-geld/thema-ua-der-erste-blick-die-apple-watch', + 'ext': 'mp4', + 'title': 'Thema u.a.: Der erste Blick: Die Apple Watch', + 'description': 'md5:4312b6c9d839ffe7d8caf03865a531af', + 'thumbnail': 're:^https?://.*\.jpg$', + 'timestamp': 1432751700, + 'upload_date': '20150527', + 'duration': 1083, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + # vox + 'url': 'http://www.nowtv.de/vox/der-hundeprofi/buero-fall-chihuahua-joel/player', + 'info_dict': { + 'id': '128953', + 'display_id': 'der-hundeprofi/buero-fall-chihuahua-joel', + 'ext': 'mp4', + 'title': "Büro-Fall / Chihuahua 'Joel'", + 'description': 'md5:e62cb6bf7c3cc669179d4f1eb279ad8d', + 'thumbnail': 're:^https?://.*\.jpg$', + 'timestamp': 1432408200, + 'upload_date': '20150523', + 'duration': 3092, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + display_id = mobj.group('id') + station = mobj.group('station') + + info = self._download_json( + 'https://api.nowtv.de/v3/movies/%s?fields=*,format,files' % display_id, + display_id) + + video_id = compat_str(info['id']) + + files = info['files'] + if not files: + if info.get('geoblocked', False): + raise ExtractorError( + 'Video %s is not available from your location due to geo restriction' % video_id, + expected=True) + if not info.get('free', True): + raise ExtractorError( + 'Video %s is not available for free' % video_id, expected=True) + + f = info.get('format', {}) + station = f.get('station') or station + + STATIONS = { + 'rtl': 'rtlnow', + 'rtl2': 'rtl2now', + 'vox': 'voxnow', + 'nitro': 'rtlnitronow', + 'ntv': 'n-tvnow', + 'superrtl': 'superrtlnow' + } + + formats = [] + for item in files['items']: + item_path = remove_start(item['path'], '/') + tbr = int_or_none(item['bitrate']) + m3u8_url = 'http://hls.fra.%s.de/hls-vod-enc/%s.m3u8' % (STATIONS[station], item_path) + m3u8_url = m3u8_url.replace('now/', 'now/videos/') + formats.append({ + 'url': m3u8_url, + 'format_id': '%s-%sk' % (item['id'], tbr), + 'ext': 'mp4', + 'tbr': tbr, + }) + self._sort_formats(formats) + + title = info['title'] + description = info.get('articleLong') or info.get('articleShort') + timestamp = parse_iso8601(info.get('broadcastStartDate'), ' ') + duration = parse_duration(info.get('duration')) + thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo') + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'timestamp': timestamp, + 'duration': duration, + 'formats': formats, + } diff --git a/youtube_dl/extractor/patreon.py b/youtube_dl/extractor/patreon.py index f179ea200..6cdc2638b 100644 --- a/youtube_dl/extractor/patreon.py +++ b/youtube_dl/extractor/patreon.py @@ -87,7 +87,7 @@ class PatreonIE(InfoExtractor): r'<div class="attach"><a target="_blank" href="([^"]+)">', webpage, 'attachment URL', default=None) embed = self._html_search_regex( - r'<div id="watchCreation">\s*<iframe class="embedly-embed" src="([^"]+)"', + r'<div[^>]+id="watchCreation"[^>]*>\s*<iframe[^>]+src="([^"]+)"', webpage, 'embedded URL', default=None) if attach_fn is not None: diff --git a/youtube_dl/extractor/porn91.py b/youtube_dl/extractor/porn91.py new file mode 100644 index 000000000..72d1b2718 --- /dev/null +++ b/youtube_dl/extractor/porn91.py @@ -0,0 +1,71 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +from ..compat import compat_urllib_parse +from .common import InfoExtractor +from ..utils import ( + parse_duration, + int_or_none, + ExtractorError, +) + + +class Porn91IE(InfoExtractor): + IE_NAME = '91porn' + _VALID_URL = r'(?:https?://)(?:www\.|)91porn\.com/.+?\?viewkey=(?P<id>[\w\d]+)' + + _TEST = { + 'url': 'http://91porn.com/view_video.php?viewkey=7e42283b4f5ab36da134', + 'md5': '6df8f6d028bc8b14f5dbd73af742fb20', + 'info_dict': { + 'id': '7e42283b4f5ab36da134', + 'title': '18岁大一漂亮学妹,水嫩性感,再爽一次!', + 'ext': 'mp4', + 'duration': 431, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + url = 'http://91porn.com/view_video.php?viewkey=%s' % video_id + self._set_cookie('91porn.com', 'language', 'cn_CN') + webpage = self._download_webpage(url, video_id, 'get HTML content') + + if '作为游客,你每天只可观看10个视频' in webpage: + raise ExtractorError('91 Porn says: Daily limit 10 videos exceeded', expected=True) + + title = self._search_regex( + r'<div id="viewvideo-title">([^<]+)</div>', webpage, 'title') + title = title.replace('\n', '') + + # get real url + file_id = self._search_regex( + r'so.addVariable\(\'file\',\'(\d+)\'', webpage, 'file id') + sec_code = self._search_regex( + r'so.addVariable\(\'seccode\',\'([^\']+)\'', webpage, 'sec code') + max_vid = self._search_regex( + r'so.addVariable\(\'max_vid\',\'(\d+)\'', webpage, 'max vid') + url_params = compat_urllib_parse.urlencode({ + 'VID': file_id, + 'mp4': '1', + 'seccode': sec_code, + 'max_vid': max_vid, + }) + info_cn = self._download_webpage( + 'http://91porn.com/getfile.php?' + url_params, video_id, + 'get real video url') + video_url = self._search_regex(r'file=([^&]+)&', info_cn, 'url') + + duration = parse_duration(self._search_regex( + r'时长:\s*</span>\s*(\d+:\d+)', webpage, 'duration', fatal=False)) + + comment_count = int_or_none(self._search_regex( + r'留言:\s*</span>\s*(\d+)', webpage, 'comment count', fatal=False)) + + return { + 'id': video_id, + 'title': title, + 'url': video_url, + 'duration': duration, + 'comment_count': comment_count, + } diff --git a/youtube_dl/extractor/rtlnow.py b/youtube_dl/extractor/rtlnow.py deleted file mode 100644 index 785a8045e..000000000 --- a/youtube_dl/extractor/rtlnow.py +++ /dev/null @@ -1,174 +0,0 @@ -# encoding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - clean_html, - unified_strdate, - int_or_none, -) - - -class RTLnowIE(InfoExtractor): - """Information Extractor for RTL NOW, RTL2 NOW, RTL NITRO, SUPER RTL NOW, VOX NOW and n-tv NOW""" - _VALID_URL = r'''(?x) - (?:https?://)? - (?P<url> - (?P<domain> - rtl-now\.rtl\.de| - rtl2now\.rtl2\.de| - (?:www\.)?voxnow\.de| - (?:www\.)?rtlnitronow\.de| - (?:www\.)?superrtlnow\.de| - (?:www\.)?n-tvnow\.de) - /+[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\? - (?:container_id|film_id)=(?P<video_id>[0-9]+)& - player=1(?:&season=[0-9]+)?(?:&.*)? - )''' - - _TESTS = [ - { - 'url': 'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1', - 'info_dict': { - 'id': '90419', - 'ext': 'flv', - 'title': 'Ahornallee - Folge 1 - Der Einzug', - 'description': 'md5:ce843b6b5901d9a7f7d04d1bbcdb12de', - 'upload_date': '20070416', - 'duration': 1685, - }, - 'params': { - 'skip_download': True, - }, - 'skip': 'Only works from Germany', - }, - { - 'url': 'http://rtl2now.rtl2.de/aerger-im-revier/episode-15-teil-1.php?film_id=69756&player=1&season=2&index=5', - 'info_dict': { - 'id': '69756', - 'ext': 'flv', - 'title': 'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit u.a.', - 'description': 'md5:3fb247005ed21a935ffc82b7dfa70cf0', - 'thumbnail': 'http://autoimg.static-fra.de/rtl2now/219850/1500x1500/image2.jpg', - 'upload_date': '20120519', - 'duration': 1245, - }, - 'params': { - 'skip_download': True, - }, - 'skip': 'Only works from Germany', - }, - { - 'url': 'http://www.voxnow.de/voxtours/suedafrika-reporter-ii.php?film_id=13883&player=1&season=17', - 'info_dict': { - 'id': '13883', - 'ext': 'flv', - 'title': 'Voxtours - Südafrika-Reporter II', - 'description': 'md5:de7f8d56be6fd4fed10f10f57786db00', - 'upload_date': '20090627', - 'duration': 1800, - }, - 'params': { - 'skip_download': True, - }, - }, - { - 'url': 'http://superrtlnow.de/medicopter-117/angst.php?film_id=99205&player=1', - 'info_dict': { - 'id': '99205', - 'ext': 'flv', - 'title': 'Medicopter 117 - Angst!', - 'description': 're:^Im Therapiezentrum \'Sonnalm\' kommen durch eine Unachtsamkeit die für die B.handlung mit Phobikern gehaltenen Voglespinnen frei\. Eine Ausreißerin', - 'thumbnail': 'http://autoimg.static-fra.de/superrtlnow/287529/1500x1500/image2.jpg', - 'upload_date': '20080928', - 'duration': 2691, - }, - 'params': { - 'skip_download': True, - }, - }, - { - 'url': 'http://rtl-now.rtl.de/der-bachelor/folge-4.php?film_id=188729&player=1&season=5', - 'info_dict': { - 'id': '188729', - 'ext': 'flv', - 'upload_date': '20150204', - 'description': 'md5:5e1ce23095e61a79c166d134b683cecc', - 'title': 'Der Bachelor - Folge 4', - } - }, { - 'url': 'http://www.n-tvnow.de/deluxe-alles-was-spass-macht/thema-ua-luxushotel-fuer-vierbeiner.php?container_id=153819&player=1&season=0', - 'only_matching': True, - }, - ] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_page_url = 'http://%s/' % mobj.group('domain') - video_id = mobj.group('video_id') - - webpage = self._download_webpage('http://' + mobj.group('url'), video_id) - - mobj = re.search(r'(?s)<div style="margin-left: 20px; font-size: 13px;">(.*?)<div id="playerteaser">', webpage) - if mobj: - raise ExtractorError(clean_html(mobj.group(1)), expected=True) - - title = self._og_search_title(webpage) - description = self._og_search_description(webpage) - thumbnail = self._og_search_thumbnail(webpage, default=None) - - upload_date = unified_strdate(self._html_search_meta('uploadDate', webpage, 'upload date')) - - mobj = re.search(r'<meta itemprop="duration" content="PT(?P<seconds>\d+)S" />', webpage) - duration = int(mobj.group('seconds')) if mobj else None - - playerdata_url = self._html_search_regex( - r"'playerdata': '(?P<playerdata_url>[^']+)'", webpage, 'playerdata_url') - - playerdata = self._download_xml(playerdata_url, video_id, 'Downloading player data XML') - - videoinfo = playerdata.find('./playlist/videoinfo') - - formats = [] - for filename in videoinfo.findall('filename'): - mobj = re.search(r'(?P<url>rtmpe://(?:[^/]+/){2})(?P<play_path>.+)', filename.text) - if mobj: - fmt = { - 'url': mobj.group('url'), - 'play_path': 'mp4:' + mobj.group('play_path'), - 'page_url': video_page_url, - 'player_url': video_page_url + 'includes/vodplayer.swf', - } - else: - mobj = re.search(r'.*/(?P<hoster>[^/]+)/videos/(?P<play_path>.+)\.f4m', filename.text) - if mobj: - fmt = { - 'url': 'rtmpe://fms.rtl.de/' + mobj.group('hoster'), - 'play_path': 'mp4:' + mobj.group('play_path'), - 'page_url': url, - 'player_url': video_page_url + 'includes/vodplayer.swf', - } - else: - fmt = { - 'url': filename.text, - } - fmt.update({ - 'width': int_or_none(filename.get('width')), - 'height': int_or_none(filename.get('height')), - 'vbr': int_or_none(filename.get('bitrate')), - 'ext': 'flv', - }) - formats.append(fmt) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'upload_date': upload_date, - 'duration': duration, - 'formats': formats, - } diff --git a/youtube_dl/extractor/senateisvp.py b/youtube_dl/extractor/senateisvp.py index d3b8a1be4..9c53704ea 100644 --- a/youtube_dl/extractor/senateisvp.py +++ b/youtube_dl/extractor/senateisvp.py @@ -48,7 +48,7 @@ class SenateISVPIE(InfoExtractor): ["arch", "", "http://ussenate-f.akamaihd.net/"] ] _IE_NAME = 'senate.gov' - _VALID_URL = r'http://www\.senate\.gov/isvp/\?(?P<qs>.+)' + _VALID_URL = r'http://www\.senate\.gov/isvp/?\?(?P<qs>.+)' _TESTS = [{ 'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png', 'info_dict': { @@ -72,12 +72,16 @@ class SenateISVPIE(InfoExtractor): 'ext': 'mp4', 'title': 'Integrated Senate Video Player' } + }, { + # From http://www.c-span.org/video/?96791-1 + 'url': 'http://www.senate.gov/isvp?type=live&comm=banking&filename=banking012715', + 'only_matching': True, }] @staticmethod def _search_iframe_url(webpage): mobj = re.search( - r"<iframe[^>]+src=['\"](?P<url>http://www\.senate\.gov/isvp/\?[^'\"]+)['\"]", + r"<iframe[^>]+src=['\"](?P<url>http://www\.senate\.gov/isvp/?\?[^'\"]+)['\"]", webpage) if mobj: return mobj.group('url') diff --git a/youtube_dl/extractor/soompi.py b/youtube_dl/extractor/soompi.py new file mode 100644 index 000000000..5da66ca9e --- /dev/null +++ b/youtube_dl/extractor/soompi.py @@ -0,0 +1,146 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +import re + +from .crunchyroll import CrunchyrollIE + +from .common import InfoExtractor +from ..compat import compat_HTTPError +from ..utils import ( + ExtractorError, + int_or_none, + remove_start, + xpath_text, +) + + +class SoompiBaseIE(InfoExtractor): + def _get_episodes(self, webpage, episode_filter=None): + episodes = self._parse_json( + self._search_regex( + r'VIDEOS\s*=\s*(\[.+?\]);', webpage, 'episodes JSON'), + None) + return list(filter(episode_filter, episodes)) + + +class SoompiIE(SoompiBaseIE, CrunchyrollIE): + IE_NAME = 'soompi' + _VALID_URL = r'https?://tv\.soompi\.com/(?:en/)?watch/(?P<id>[0-9]+)' + _TESTS = [{ + 'url': 'http://tv.soompi.com/en/watch/29235', + 'info_dict': { + 'id': '29235', + 'ext': 'mp4', + 'title': 'Episode 1096', + 'description': '2015-05-20' + }, + 'params': { + 'skip_download': True, + }, + }] + + def _get_episode(self, webpage, video_id): + return self._get_episodes(webpage, lambda x: x['id'] == video_id)[0] + + def _get_subtitles(self, config, video_id): + sub_langs = {} + for subtitle in config.findall('./{default}preload/subtitles/subtitle'): + sub_langs[subtitle.attrib['id']] = subtitle.attrib['title'] + + subtitles = {} + for s in config.findall('./{default}preload/subtitle'): + lang_code = sub_langs.get(s.attrib['id']) + if not lang_code: + continue + sub_id = s.get('id') + data = xpath_text(s, './data', 'data') + iv = xpath_text(s, './iv', 'iv') + if not id or not iv or not data: + continue + subtitle = self._decrypt_subtitles(data, iv, sub_id).decode('utf-8') + subtitles[lang_code] = self._extract_subtitles(subtitle) + return subtitles + + def _real_extract(self, url): + video_id = self._match_id(url) + + try: + webpage = self._download_webpage( + url, video_id, 'Downloading episode page') + except ExtractorError as ee: + if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403: + webpage = ee.cause.read() + block_message = self._html_search_regex( + r'(?s)<div class="block-message">(.+?)</div>', webpage, + 'block message', default=None) + if block_message: + raise ExtractorError(block_message, expected=True) + raise + + formats = [] + config = None + for format_id in re.findall(r'\?quality=([0-9a-zA-Z]+)', webpage): + config = self._download_xml( + 'http://tv.soompi.com/en/show/_/%s-config.xml?mode=hls&quality=%s' % (video_id, format_id), + video_id, 'Downloading %s XML' % format_id) + m3u8_url = xpath_text( + config, './{default}preload/stream_info/file', + '%s m3u8 URL' % format_id) + if not m3u8_url: + continue + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', m3u8_id=format_id)) + self._sort_formats(formats) + + episode = self._get_episode(webpage, video_id) + + title = episode['name'] + description = episode.get('description') + duration = int_or_none(episode.get('duration')) + + thumbnails = [{ + 'id': thumbnail_id, + 'url': thumbnail_url, + } for thumbnail_id, thumbnail_url in episode.get('img_url', {}).items()] + + subtitles = self.extract_subtitles(config, video_id) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnails': thumbnails, + 'duration': duration, + 'formats': formats, + 'subtitles': subtitles + } + + +class SoompiShowIE(SoompiBaseIE): + IE_NAME = 'soompi:show' + _VALID_URL = r'https?://tv\.soompi\.com/en/shows/(?P<id>[0-9a-zA-Z\-_]+)' + _TESTS = [{ + 'url': 'http://tv.soompi.com/en/shows/liar-game', + 'info_dict': { + 'id': 'liar-game', + 'title': 'Liar Game', + 'description': 'md5:52c02bce0c1a622a95823591d0589b66', + }, + 'playlist_count': 14, + }] + + def _real_extract(self, url): + show_id = self._match_id(url) + + webpage = self._download_webpage( + url, show_id, 'Downloading show page') + + title = remove_start(self._og_search_title(webpage), 'SoompiTV | ') + description = self._og_search_description(webpage) + + entries = [ + self.url_result('http://tv.soompi.com/en/watch/%s' % episode['id'], 'Soompi') + for episode in self._get_episodes(webpage)] + + return self.playlist_result(entries, show_id, title, description) diff --git a/youtube_dl/extractor/spiegeltv.py b/youtube_dl/extractor/spiegeltv.py index 98cf92d89..359722ad6 100644 --- a/youtube_dl/extractor/spiegeltv.py +++ b/youtube_dl/extractor/spiegeltv.py @@ -51,9 +51,9 @@ class SpiegeltvIE(InfoExtractor): is_wide = media_json['is_wide'] server_json = self._download_json( - 'http://www.spiegel.tv/streaming_servers/', video_id, - note='Downloading server information') - server = server_json[0]['endpoint'] + 'http://spiegeltv-prod-static.s3.amazonaws.com/projectConfigs/projectConfig.json', + video_id, note='Downloading server information') + server = server_json['streamingserver'][0]['endpoint'] thumbnails = [] for image in media_json['images']: @@ -76,5 +76,6 @@ class SpiegeltvIE(InfoExtractor): 'ext': 'm4v', 'description': description, 'duration': duration, - 'thumbnails': thumbnails + 'thumbnails': thumbnails, + 'rtmp_live': True, } diff --git a/youtube_dl/extractor/tf1.py b/youtube_dl/extractor/tf1.py index 656410528..3a68eaa80 100644 --- a/youtube_dl/extractor/tf1.py +++ b/youtube_dl/extractor/tf1.py @@ -6,7 +6,7 @@ from .common import InfoExtractor class TF1IE(InfoExtractor): """TF1 uses the wat.tv player.""" - _VALID_URL = r'http://(?:videos\.tf1|www\.tfou|www\.tf1)\.fr/.*?-(?P<id>\d+)(?:-\d+)?\.html' + _VALID_URL = r'http://(?:(?:videos|www|lci)\.tf1|www\.tfou)\.fr/.*?-(?P<id>\d+)(?:-\d+)?\.html' _TESTS = [{ 'url': 'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html', 'info_dict': { @@ -35,6 +35,9 @@ class TF1IE(InfoExtractor): }, { 'url': 'http://www.tf1.fr/tf1/koh-lanta/videos/replay-koh-lanta-22-mai-2015.html', 'only_matching': True, + }, { + 'url': 'http://lci.tf1.fr/sept-a-huit/videos/sept-a-huit-du-24-mai-2015-8611550.html', + 'only_matching': True, }] def _real_extract(self, url): diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py index d73ad3762..6ca8840b0 100644 --- a/youtube_dl/extractor/tube8.py +++ b/youtube_dl/extractor/tube8.py @@ -47,7 +47,7 @@ class Tube8IE(InfoExtractor): webpage = self._download_webpage(req, display_id) flashvars = json.loads(self._html_search_regex( - r'var flashvars\s*=\s*({.+?})', webpage, 'flashvars')) + r'flashvars\s*=\s*({.+?})', webpage, 'flashvars')) video_url = flashvars['video_url'] if flashvars.get('encrypted') is True: @@ -58,19 +58,19 @@ class Tube8IE(InfoExtractor): thumbnail = flashvars.get('image_url') title = self._html_search_regex( - r'videotitle\s*=\s*"([^"]+)', webpage, 'title') + r'videoTitle\s*=\s*"([^"]+)', webpage, 'title') description = self._html_search_regex( - r'>Description:</strong>(.+?)<', webpage, 'description', fatal=False) + r'>Description:</strong>\s*(.+?)\s*<', webpage, 'description', fatal=False) uploader = self._html_search_regex( - r'<strong class="video-username">(?:<a href="[^"]+">)?([^<]+)(?:</a>)?</strong>', + r'<span class="username">\s*(.+?)\s*<', webpage, 'uploader', fatal=False) like_count = int_or_none(self._html_search_regex( - r"rupVar\s*=\s*'(\d+)'", webpage, 'like count', fatal=False)) + r'rupVar\s*=\s*"(\d+)"', webpage, 'like count', fatal=False)) dislike_count = int_or_none(self._html_search_regex( - r"rdownVar\s*=\s*'(\d+)'", webpage, 'dislike count', fatal=False)) + r'rdownVar\s*=\s*"(\d+)"', webpage, 'dislike count', fatal=False)) view_count = self._html_search_regex( - r'<strong>Views: </strong>([\d,\.]+)</li>', webpage, 'view count', fatal=False) + r'<strong>Views: </strong>([\d,\.]+)\s*</li>', webpage, 'view count', fatal=False) if view_count: view_count = str_to_int(view_count) comment_count = self._html_search_regex( diff --git a/youtube_dl/extractor/tubitv.py b/youtube_dl/extractor/tubitv.py new file mode 100644 index 000000000..2c4b21807 --- /dev/null +++ b/youtube_dl/extractor/tubitv.py @@ -0,0 +1,84 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import codecs +import re + +from .common import InfoExtractor +from ..compat import ( + compat_urllib_parse, + compat_urllib_request +) +from ..utils import ( + ExtractorError, + int_or_none, +) + + +class TubiTvIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?tubitv\.com/video\?id=(?P<id>[0-9]+)' + _LOGIN_URL = 'http://tubitv.com/login' + _NETRC_MACHINE = 'tubitv' + _TEST = { + 'url': 'http://tubitv.com/video?id=54411&title=The_Kitchen_Musical_-_EP01', + 'info_dict': { + 'id': '54411', + 'ext': 'mp4', + 'title': 'The Kitchen Musical - EP01', + 'thumbnail': 're:^https?://.*\.png$', + 'description': 'md5:37532716166069b353e8866e71fefae7', + 'duration': 2407, + }, + 'params': { + 'skip_download': 'HLS download', + }, + } + + def _login(self): + (username, password) = self._get_login_info() + if username is None: + return + self.report_login() + form_data = { + 'username': username, + 'password': password, + } + payload = compat_urllib_parse.urlencode(form_data).encode('utf-8') + request = compat_urllib_request.Request(self._LOGIN_URL, payload) + request.add_header('Content-Type', 'application/x-www-form-urlencoded') + login_page = self._download_webpage( + request, None, False, 'Wrong login info') + if not re.search(r'id="tubi-logout"', login_page): + raise ExtractorError( + 'Login failed (invalid username/password)', expected=True) + + def _real_initialize(self): + self._login() + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + if re.search(r"<(?:DIV|div) class='login-required-screen'>", webpage): + raise ExtractorError( + 'This video requires login, use --username and --password ' + 'options to provide account credentials.', expected=True) + + title = self._og_search_title(webpage) + description = self._og_search_description(webpage) + thumbnail = self._og_search_thumbnail(webpage) + duration = int_or_none(self._html_search_meta( + 'video:duration', webpage, 'duration')) + + apu = self._search_regex(r"apu='([^']+)'", webpage, 'apu') + m3u8_url = codecs.decode(apu, 'rot_13')[::-1] + formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4') + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'thumbnail': thumbnail, + 'description': description, + 'duration': duration, + } diff --git a/youtube_dl/extractor/tumblr.py b/youtube_dl/extractor/tumblr.py index 828c808a6..e6218808f 100644 --- a/youtube_dl/extractor/tumblr.py +++ b/youtube_dl/extractor/tumblr.py @@ -28,6 +28,17 @@ class TumblrIE(InfoExtractor): 'description': 'md5:dba62ac8639482759c8eb10ce474586a', 'thumbnail': 're:http://.*\.jpg', } + }, { + 'url': 'http://naked-yogi.tumblr.com/post/118312946248/naked-smoking-stretching', + 'md5': 'de07e5211d60d4f3a2c3df757ea9f6ab', + 'info_dict': { + 'id': 'Wmur', + 'ext': 'mp4', + 'title': 'naked smoking & stretching', + 'upload_date': '20150506', + 'timestamp': 1430931613, + }, + 'add_ie': ['Vidme'], }] def _real_extract(self, url): @@ -38,6 +49,12 @@ class TumblrIE(InfoExtractor): url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id) webpage = self._download_webpage(url, video_id) + vid_me_embed_url = self._search_regex( + r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]', + webpage, 'vid.me embed', default=None) + if vid_me_embed_url is not None: + return self.url_result(vid_me_embed_url, 'Vidme') + iframe_url = self._search_regex( r'src=\'(https?://www\.tumblr\.com/video/[^\']+)\'', webpage, 'iframe url') diff --git a/youtube_dl/extractor/tvigle.py b/youtube_dl/extractor/tvigle.py index 102362b29..dc3a8334a 100644 --- a/youtube_dl/extractor/tvigle.py +++ b/youtube_dl/extractor/tvigle.py @@ -5,7 +5,9 @@ import re from .common import InfoExtractor from ..utils import ( + ExtractorError, float_or_none, + int_or_none, parse_age_limit, ) @@ -24,22 +26,24 @@ class TvigleIE(InfoExtractor): 'display_id': 'sokrat', 'ext': 'flv', 'title': 'Сократ', - 'description': 'md5:a05bd01be310074d5833efc6743be95e', + 'description': 'md5:d6b92ffb7217b4b8ebad2e7665253c17', 'duration': 6586, - 'age_limit': 0, + 'age_limit': 12, }, + 'skip': 'georestricted', }, { 'url': 'http://www.tvigle.ru/video/vladimir-vysotskii/vedushchii-teleprogrammy-60-minut-ssha-o-vladimire-vysotskom/', - 'md5': 'd9012d7c7c598fe7a11d7fb46dc1f574', + 'md5': 'e7efe5350dd5011d0de6550b53c3ba7b', 'info_dict': { 'id': '5142516', - 'ext': 'mp4', + 'ext': 'flv', 'title': 'Ведущий телепрограммы «60 минут» (США) о Владимире Высоцком', 'description': 'md5:027f7dc872948f14c96d19b4178428a4', 'duration': 186.080, 'age_limit': 0, }, + 'skip': 'georestricted', }, { 'url': 'https://cloud.tvigle.ru/video/5267604/', 'only_matching': True, @@ -54,7 +58,7 @@ class TvigleIE(InfoExtractor): if not video_id: webpage = self._download_webpage(url, display_id) video_id = self._html_search_regex( - r'<li class="video-preview current_playing" id="(\d+)">', + r'class="video-preview current_playing" id="(\d+)">', webpage, 'video id') video_data = self._download_json( @@ -62,21 +66,34 @@ class TvigleIE(InfoExtractor): item = video_data['playlist']['items'][0] + videos = item.get('videos') + + error_message = item.get('errorMessage') + if not videos and error_message: + raise ExtractorError( + '%s returned error: %s' % (self.IE_NAME, error_message), expected=True) + title = item['title'] - description = item['description'] - thumbnail = item['thumbnail'] + description = item.get('description') + thumbnail = item.get('thumbnail') duration = float_or_none(item.get('durationMilliseconds'), 1000) age_limit = parse_age_limit(item.get('ageRestrictions')) formats = [] for vcodec, fmts in item['videos'].items(): - for quality, video_url in fmts.items(): + for format_id, video_url in fmts.items(): + if format_id == 'm3u8': + formats.extend(self._extract_m3u8_formats( + video_url, video_id, 'mp4', m3u8_id=vcodec)) + continue + height = self._search_regex( + r'^(\d+)[pP]$', format_id, 'height', default=None) formats.append({ 'url': video_url, - 'format_id': '%s-%s' % (vcodec, quality), + 'format_id': '%s-%s' % (vcodec, format_id), 'vcodec': vcodec, - 'height': int(quality[:-1]), - 'filesize': item['video_files_size'][vcodec][quality], + 'height': int_or_none(height), + 'filesize': int_or_none(item.get('video_files_size', {}).get(vcodec, {}).get(format_id)), }) self._sort_formats(formats) diff --git a/youtube_dl/extractor/twentyfourvideo.py b/youtube_dl/extractor/twentyfourvideo.py index 67e8bfea0..c1ee1decc 100644 --- a/youtube_dl/extractor/twentyfourvideo.py +++ b/youtube_dl/extractor/twentyfourvideo.py @@ -15,7 +15,7 @@ class TwentyFourVideoIE(InfoExtractor): _TESTS = [ { 'url': 'http://www.24video.net/video/view/1044982', - 'md5': '48dd7646775690a80447a8dca6a2df76', + 'md5': 'd041af8b5b4246ea466226a0d6693345', 'info_dict': { 'id': '1044982', 'ext': 'mp4', @@ -54,7 +54,7 @@ class TwentyFourVideoIE(InfoExtractor): webpage, 'upload date')) uploader = self._html_search_regex( - r'Загрузил\s*<a href="/jsecUser/movies/[^"]+" class="link">([^<]+)</a>', + r'class="video-uploaded"[^>]*>\s*<a href="/jsecUser/movies/[^"]+"[^>]*>([^<]+)</a>', webpage, 'uploader', fatal=False) view_count = int_or_none(self._html_search_regex( diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py index e6ee1e471..f38a72fde 100644 --- a/youtube_dl/extractor/vgtv.py +++ b/youtube_dl/extractor/vgtv.py @@ -4,7 +4,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import float_or_none +from ..utils import ( + ExtractorError, + float_or_none, +) class VGTVIE(InfoExtractor): @@ -59,16 +62,16 @@ class VGTVIE(InfoExtractor): }, { # streamType: live - 'url': 'http://www.vgtv.no/#!/live/100015/direkte-her-kan-du-se-laksen-live-fra-suldalslaagen', + 'url': 'http://www.vgtv.no/#!/live/113063/direkte-v75-fra-solvalla', 'info_dict': { - 'id': '100015', + 'id': '113063', 'ext': 'flv', - 'title': 'DIREKTE: Her kan du se laksen live fra Suldalslågen!', - 'description': 'md5:9a60cc23fa349f761628924e56eeec2d', + 'title': 're:^DIREKTE: V75 fra Solvalla [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'description': 'md5:b3743425765355855f88e096acc93231', 'thumbnail': 're:^https?://.*\.jpg', 'duration': 0, - 'timestamp': 1407423348, - 'upload_date': '20140807', + 'timestamp': 1432975582, + 'upload_date': '20150530', 'view_count': int, }, 'params': { @@ -97,7 +100,12 @@ class VGTVIE(InfoExtractor): % (host, video_id, HOST_WEBSITES[host]), video_id, 'Downloading media JSON') + if data.get('status') == 'inactive': + raise ExtractorError( + 'Video %s is no longer available' % video_id, expected=True) + streams = data['streamUrls'] + stream_type = data.get('streamType') formats = [] @@ -107,7 +115,8 @@ class VGTVIE(InfoExtractor): hls_url, video_id, 'mp4', m3u8_id='hls')) hds_url = streams.get('hds') - if hds_url: + # wasLive hds are always 404 + if hds_url and stream_type != 'wasLive': formats.extend(self._extract_f4m_formats( hds_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18', video_id, f4m_id='hds')) @@ -135,13 +144,14 @@ class VGTVIE(InfoExtractor): return { 'id': video_id, - 'title': data['title'], + 'title': self._live_title(data['title']), 'description': data['description'], 'thumbnail': data['images']['main'] + '?t[]=900x506q80', 'timestamp': data['published'], 'duration': float_or_none(data['duration'], 1000), 'view_count': data['displays'], 'formats': formats, + 'is_live': True if stream_type == 'live' else False, } diff --git a/youtube_dl/extractor/vidme.py b/youtube_dl/extractor/vidme.py index bd953fb4c..e0b55078b 100644 --- a/youtube_dl/extractor/vidme.py +++ b/youtube_dl/extractor/vidme.py @@ -10,7 +10,7 @@ from ..utils import ( class VidmeIE(InfoExtractor): _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]+)' - _TEST = { + _TESTS = [{ 'url': 'https://vid.me/QNB', 'md5': 'f42d05e7149aeaec5c037b17e5d3dc82', 'info_dict': { @@ -23,9 +23,14 @@ class VidmeIE(InfoExtractor): 'upload_date': '20140725', 'thumbnail': 're:^https?://.*\.jpg', }, - } + }, { + # From http://naked-yogi.tumblr.com/post/118312946248/naked-smoking-stretching + 'url': 'https://vid.me/e/Wmur', + 'only_matching': True, + }] def _real_extract(self, url): + url = url.replace('vid.me/e/', 'vid.me/') video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 0301682b8..aacb999ce 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1399,6 +1399,22 @@ class YoutubeChannelIE(InfoExtractor): channel_id = self._match_id(url) url = self._TEMPLATE_URL % channel_id + + # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778) + # Workaround by extracting as a playlist if managed to obtain channel playlist URL + # otherwise fallback on channel by page extraction + channel_page = self._download_webpage( + url + '?view=57', channel_id, + 'Downloading channel page', fatal=False) + channel_playlist_id = self._search_regex( + [r'<meta itemprop="channelId" content="([^"]+)">', + r'data-channel-external-id="([^"]+)"'], + channel_page, 'channel id', default=None) + if channel_playlist_id and channel_playlist_id.startswith('UC'): + playlist_id = 'UU' + channel_playlist_id[2:] + return self.url_result( + compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist') + channel_page = self._download_webpage(url, channel_id, 'Downloading page #1') autogenerated = re.search(r'''(?x) class="[^"]*?(?: diff --git a/youtube_dl/postprocessor/embedthumbnail.py b/youtube_dl/postprocessor/embedthumbnail.py index 8f825f785..774494efd 100644 --- a/youtube_dl/postprocessor/embedthumbnail.py +++ b/youtube_dl/postprocessor/embedthumbnail.py @@ -49,7 +49,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor): os.remove(encodeFilename(filename)) os.rename(encodeFilename(temp_filename), encodeFilename(filename)) - elif info['ext'] == 'm4a': + elif info['ext'] in ['m4a', 'mp4']: if not check_executable('AtomicParsley', ['-v']): raise EmbedThumbnailPPError('AtomicParsley was not found. Please install.') @@ -82,6 +82,6 @@ class EmbedThumbnailPP(FFmpegPostProcessor): os.remove(encodeFilename(filename)) os.rename(encodeFilename(temp_filename), encodeFilename(filename)) else: - raise EmbedThumbnailPPError('Only mp3 and m4a are supported for thumbnail embedding for now.') + raise EmbedThumbnailPPError('Only mp3 and m4a/mp4 are supported for thumbnail embedding for now.') return [], info diff --git a/youtube_dl/version.py b/youtube_dl/version.py index b33385153..653710131 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.05.20' +__version__ = '2015.05.29' |