diff options
32 files changed, 846 insertions, 319 deletions
| @@ -91,3 +91,4 @@ t0mm0  Tithen-Firion  Zack Fernandes  cryptonaut +Adrian Kretz @@ -1,7 +1,7 @@  all: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish  clean: -	rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part +	rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json  cleanall: clean  	rm -f youtube-dl youtube-dl.exe diff --git a/test/test_utils.py b/test/test_utils.py index aaa293ff8..d42df6d96 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -144,6 +144,9 @@ class TestUtil(unittest.TestCase):          self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011')          self.assertEqual(unified_strdate('1968-12-10'), '19681210')          self.assertEqual(unified_strdate('28/01/2014 21:00:00 +0100'), '20140128') +        self.assertEqual( +            unified_strdate('11/26/2014 11:30:00 AM PST', day_first=False), +            '20141126')      def test_find_xpath_attr(self):          testxml = '''<root> diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 56dc3d461..31531855e 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -622,23 +622,17 @@ class YoutubeDL(object):                  ie_result['url'], ie_key=ie_result.get('ie_key'),                  extra_info=extra_info, download=False, process=False) -            def make_result(embedded_info): -                new_result = ie_result.copy() -                for f in ('_type', 'url', 'ext', 'player_url', 'formats', -                          'entries', 'ie_key', 'duration', -                          'subtitles', 'annotations', 'format', -                          'thumbnail', 'thumbnails'): -                    if f in new_result: -                        del new_result[f] -                    if f in embedded_info: -                        new_result[f] = embedded_info[f] -                return new_result -            new_result = make_result(info) +            new_result = ie_result.copy() +            for f in ('_type', 'id', 'url', 'ext', 'player_url', 'formats', +                      'entries', 'ie_key', 'duration', +                      'subtitles', 'annotations', 'format', +                      'thumbnail', 'thumbnails'): +                if f in new_result: +                    del new_result[f] +                if f in info: +                    new_result[f] = info[f]              assert new_result.get('_type') != 'url_transparent' -            if new_result.get('_type') == 'compat_list': -                new_result['entries'] = [ -                    make_result(e) for e in new_result['entries']]              return self.process_ie_result(                  new_result, download=download, extra_info=extra_info) @@ -942,8 +936,12 @@ class YoutubeDL(object):          if self.params.get('forceid', False):              self.to_stdout(info_dict['id'])          if self.params.get('forceurl', False): -            # For RTMP URLs, also include the playpath -            self.to_stdout(info_dict['url'] + info_dict.get('play_path', '')) +            if info_dict.get('requested_formats') is not None: +                for f in info_dict['requested_formats']: +                    self.to_stdout(f['url'] + f.get('play_path', '')) +            else: +                # For RTMP URLs, also include the playpath +                self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))          if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:              self.to_stdout(info_dict['thumbnail'])          if self.params.get('forcedescription', False) and info_dict.get('description') is not None: diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 27596687d..f4a85443e 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -247,7 +247,7 @@ else:                  userhome = compat_getenv('HOME')              elif 'USERPROFILE' in os.environ:                  userhome = compat_getenv('USERPROFILE') -            elif not 'HOMEPATH' in os.environ: +            elif 'HOMEPATH' not in os.environ:                  return path              else:                  try: diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 4c9c44b31..746ee69e4 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -30,6 +30,7 @@ from .bandcamp import BandcampIE, BandcampAlbumIE  from .bbccouk import BBCCoUkIE  from .beeg import BeegIE  from .behindkink import BehindKinkIE +from .bet import BetIE  from .bild import BildIE  from .bilibili import BiliBiliIE  from .blinkx import BlinkxIE @@ -50,7 +51,7 @@ from .cbsnews import CBSNewsIE  from .ceskatelevize import CeskaTelevizeIE  from .channel9 import Channel9IE  from .chilloutzone import ChilloutzoneIE -from .cinemassacre import CinemassacreIE +from .cinchcast import CinchcastIE  from .clipfish import ClipfishIE  from .cliphunter import CliphunterIE  from .clipsyndicate import ClipsyndicateIE @@ -307,6 +308,7 @@ from .promptfile import PromptFileIE  from .prosiebensat1 import ProSiebenSat1IE  from .pyvideo import PyvideoIE  from .quickvid import QuickVidIE +from .radiode import RadioDeIE  from .radiofrance import RadioFranceIE  from .rai import RaiIE  from .rbmaradio import RBMARadioIE @@ -334,6 +336,7 @@ from .savefrom import SaveFromIE  from .sbs import SBSIE  from .scivee import SciVeeIE  from .screencast import ScreencastIE +from .screenwavemedia import CinemassacreIE, ScreenwaveMediaIE, TeamFourIE  from .servingsys import ServingSysIE  from .sexu import SexuIE  from .sexykarma import SexyKarmaIE @@ -524,7 +527,7 @@ from .youtube import (      YoutubeUserIE,      YoutubeWatchLaterIE,  ) -from .zdf import ZDFIE +from .zdf import ZDFIE, ZDFChannelIE  from .zingmp3 import (      ZingMp3SongIE,      ZingMp3AlbumIE, diff --git a/youtube_dl/extractor/behindkink.py b/youtube_dl/extractor/behindkink.py index 31fdc0dcc..1bdc25812 100644 --- a/youtube_dl/extractor/behindkink.py +++ b/youtube_dl/extractor/behindkink.py @@ -10,15 +10,15 @@ from ..utils import url_basename  class BehindKinkIE(InfoExtractor):      _VALID_URL = r'http://(?:www\.)?behindkink\.com/(?P<year>[0-9]{4})/(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<id>[^/#?_]+)'      _TEST = { -        'url': 'http://www.behindkink.com/2014/08/14/ab1576-performers-voice-finally-heard-the-bill-is-killed/', -        'md5': '41ad01222b8442089a55528fec43ec01', +        'url': 'http://www.behindkink.com/2014/12/05/what-are-you-passionate-about-marley-blaze/', +        'md5': '507b57d8fdcd75a41a9a7bdb7989c762',          'info_dict': { -            'id': '36370', +            'id': '37127',              'ext': 'mp4', -            'title': 'AB1576 - PERFORMERS VOICE FINALLY HEARD - THE BILL IS KILLED!', -            'description': 'The adult industry voice was finally heard as Assembly Bill 1576 remained\xa0 in suspense today at the Senate Appropriations Hearing. AB1576 was, among other industry damaging issues, a condom mandate...', -            'upload_date': '20140814', -            'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/08/36370_AB1576_Win.jpg', +            'title': 'What are you passionate about – Marley Blaze', +            'description': 'md5:aee8e9611b4ff70186f752975d9b94b4', +            'upload_date': '20141205', +            'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/12/blaze-1.jpg',              'age_limit': 18,          }      } @@ -26,26 +26,19 @@ class BehindKinkIE(InfoExtractor):      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url)          display_id = mobj.group('id') -        year = mobj.group('year') -        month = mobj.group('month') -        day = mobj.group('day') -        upload_date = year + month + day          webpage = self._download_webpage(url, display_id)          video_url = self._search_regex( -            r"'file':\s*'([^']+)'", -            webpage, 'URL base') - -        video_id = url_basename(video_url) -        video_id = video_id.split('_')[0] +            r'<source src="([^"]+)"', webpage, 'video URL') +        video_id = url_basename(video_url).split('_')[0] +        upload_date = mobj.group('year') + mobj.group('month') + mobj.group('day')          return {              'id': video_id, +            'display_id': display_id,              'url': video_url, -            'ext': 'mp4',              'title': self._og_search_title(webpage), -            'display_id': display_id,              'thumbnail': self._og_search_thumbnail(webpage),              'description': self._og_search_description(webpage),              'upload_date': upload_date, diff --git a/youtube_dl/extractor/bet.py b/youtube_dl/extractor/bet.py new file mode 100644 index 000000000..c1fc433f7 --- /dev/null +++ b/youtube_dl/extractor/bet.py @@ -0,0 +1,108 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( +    compat_urllib_parse, +    xpath_text, +    xpath_with_ns, +    int_or_none, +    parse_iso8601, +) + + +class BetIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?bet\.com/(?:[^/]+/)+(?P<id>.+?)\.html' +    _TESTS = [ +        { +            'url': 'http://www.bet.com/news/politics/2014/12/08/in-bet-exclusive-obama-talks-race-and-racism.html', +            'info_dict': { +                'id': '417cd61c-c793-4e8e-b006-e445ecc45add', +                'display_id': 'in-bet-exclusive-obama-talks-race-and-racism', +                'ext': 'flv', +                'title': 'BET News Presents: A Conversation With President Obama', +                'description': 'md5:5a88d8ae912c1b33e090290af7ec33c6', +                'duration': 1534, +                'timestamp': 1418075340, +                'upload_date': '20141208', +                'uploader': 'admin', +                'thumbnail': 're:(?i)^https?://.*\.jpg$', +            }, +            'params': { +                # rtmp download +                'skip_download': True, +            }, +        }, +        { +            'url': 'http://www.bet.com/video/news/national/2014/justice-for-ferguson-a-community-reacts.html', +            'info_dict': { +                'id': '4160e53b-ad41-43b1-980f-8d85f63121f4', +                'display_id': 'justice-for-ferguson-a-community-reacts', +                'ext': 'flv', +                'title': 'Justice for Ferguson: A Community Reacts', +                'description': 'A BET News special.', +                'duration': 1696, +                'timestamp': 1416942360, +                'upload_date': '20141125', +                'uploader': 'admin', +                'thumbnail': 're:(?i)^https?://.*\.jpg$', +            }, +            'params': { +                # rtmp download +                'skip_download': True, +            }, +        } +    ] + +    def _real_extract(self, url): +        display_id = self._match_id(url) + +        webpage = self._download_webpage(url, display_id) + +        media_url = compat_urllib_parse.unquote(self._search_regex( +            [r'mediaURL\s*:\s*"([^"]+)"', r"var\s+mrssMediaUrl\s*=\s*'([^']+)'"], +            webpage, 'media URL')) + +        mrss = self._download_xml(media_url, display_id) + +        item = mrss.find('./channel/item') + +        NS_MAP = { +            'dc': 'http://purl.org/dc/elements/1.1/', +            'media': 'http://search.yahoo.com/mrss/', +            'ka': 'http://kickapps.com/karss', +        } + +        title = xpath_text(item, './title', 'title') +        description = xpath_text( +            item, './description', 'description', fatal=False) + +        video_id = xpath_text(item, './guid', 'video id', fatal=False) + +        timestamp = parse_iso8601(xpath_text( +            item, xpath_with_ns('./dc:date', NS_MAP), +            'upload date', fatal=False)) +        uploader = xpath_text( +            item, xpath_with_ns('./dc:creator', NS_MAP), +            'uploader', fatal=False) + +        media_content = item.find( +            xpath_with_ns('./media:content', NS_MAP)) +        duration = int_or_none(media_content.get('duration')) +        smil_url = media_content.get('url') + +        thumbnail = media_content.find( +            xpath_with_ns('./media:thumbnail', NS_MAP)).get('url') + +        formats = self._extract_smil_formats(smil_url, display_id) + +        return { +            'id': video_id, +            'display_id': display_id, +            'title': title, +            'description': description, +            'thumbnail': thumbnail, +            'timestamp': timestamp, +            'uploader': uploader, +            'duration': duration, +            'formats': formats, +        } diff --git a/youtube_dl/extractor/bliptv.py b/youtube_dl/extractor/bliptv.py index da47f27bd..14b814120 100644 --- a/youtube_dl/extractor/bliptv.py +++ b/youtube_dl/extractor/bliptv.py @@ -4,13 +4,17 @@ import re  from .common import InfoExtractor  from .subtitles import SubtitlesInfoExtractor -from ..utils import ( + +from ..compat import ( +    compat_str,      compat_urllib_request, -    unescapeHTML, -    parse_iso8601,      compat_urlparse, +) +from ..utils import (      clean_html, -    compat_str, +    int_or_none, +    parse_iso8601, +    unescapeHTML,  ) @@ -78,7 +82,25 @@ class BlipTVIE(SubtitlesInfoExtractor):                  'uploader': 'NostalgiaCritic',                  'uploader_id': '246467',              } -        } +        }, +        { +            # https://github.com/rg3/youtube-dl/pull/4404 +            'note': 'Audio only', +            'url': 'http://blip.tv/hilarios-productions/weekly-manga-recap-kingdom-7119982', +            'md5': '76c0a56f24e769ceaab21fbb6416a351', +            'info_dict': { +                'id': '7103299', +                'ext': 'flv', +                'title': 'Weekly Manga Recap: Kingdom', +                'description': 'And then Shin breaks the enemy line, and he's all like HWAH! And then he slices a guy and it's all like FWASHING! And... it's really hard to describe the best parts of this series without breaking down into sound effects, okay?', +                'timestamp': 1417660321, +                'upload_date': '20141204', +                'uploader': 'The Rollo T', +                'uploader_id': '407429', +                'duration': 7251, +                'vcodec': 'none', +            } +        },      ]      def _real_extract(self, url): @@ -145,11 +167,11 @@ class BlipTVIE(SubtitlesInfoExtractor):                      'url': real_url,                      'format_id': role,                      'format_note': media_type, -                    'vcodec': media_content.get(blip('vcodec')), +                    'vcodec': media_content.get(blip('vcodec')) or 'none',                      'acodec': media_content.get(blip('acodec')),                      'filesize': media_content.get('filesize'), -                    'width': int(media_content.get('width')), -                    'height': int(media_content.get('height')), +                    'width': int_or_none(media_content.get('width')), +                    'height': int_or_none(media_content.get('height')),                  })          self._sort_formats(formats) diff --git a/youtube_dl/extractor/cinchcast.py b/youtube_dl/extractor/cinchcast.py new file mode 100644 index 000000000..0c9a24bef --- /dev/null +++ b/youtube_dl/extractor/cinchcast.py @@ -0,0 +1,52 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( +    unified_strdate, +    xpath_text, +) + + +class CinchcastIE(InfoExtractor): +    _VALID_URL = r'https?://player\.cinchcast\.com/.*?assetId=(?P<id>[0-9]+)' +    _TEST = { +        # Actual test is run in generic, look for undergroundwellness +        'url': 'http://player.cinchcast.com/?platformId=1&assetType=single&assetId=7141703', +        'only_matching': True, +    } + +    def _real_extract(self, url): +        video_id = self._match_id(url) +        doc = self._download_xml( +            'http://www.blogtalkradio.com/playerasset/mrss?assetType=single&assetId=%s' % video_id, +            video_id) + +        item = doc.find('.//item') +        title = xpath_text(item, './title', fatal=True) +        date_str = xpath_text( +            item, './{http://developer.longtailvideo.com/trac/}date') +        upload_date = unified_strdate(date_str, day_first=False) +        # duration is present but wrong +        formats = [] +        formats.append({ +            'format_id': 'main', +            'url': item.find( +                './{http://search.yahoo.com/mrss/}content').attrib['url'], +        }) +        backup_url = xpath_text( +            item, './{http://developer.longtailvideo.com/trac/}backupContent') +        if backup_url: +            formats.append({ +                'preference': 2,  # seems to be more reliable +                'format_id': 'backup', +                'url': backup_url, +            }) +        self._sort_formats(formats) + +        return { +            'id': video_id, +            'title': title, +            'upload_date': upload_date, +            'formats': formats, +        } diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index cb6081dd0..d302fe45f 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -118,6 +118,7 @@ class InfoExtractor(object):      The following fields are optional: +    alt_title:      A secondary title of the video.      display_id      An alternative identifier for the video, not necessarily                      unique, but available before title. Typically, id is                      something like "4234987", title "Dancing naked mole rats", @@ -129,7 +130,7 @@ class InfoExtractor(object):                          * "resolution" (optional, string "{width}x{height"},                                          deprecated)      thumbnail:      Full URL to a video thumbnail image. -    description:    One-line video description. +    description:    Full video description.      uploader:       Full name of the video uploader.      timestamp:      UNIX timestamp of the moment the video became available.      upload_date:    Video upload date (YYYYMMDD). @@ -174,9 +175,10 @@ class InfoExtractor(object):      _type "url" indicates that the video must be extracted from another      location, possibly by a different extractor. Its only required key is:      "url" - the next URL to extract. - -    Additionally, it may have properties believed to be identical to the -    resolved entity, for example "title" if the title of the referred video is +    The key "ie_key" can be set to the class name (minus the trailing "IE", +    e.g. "Youtube") if the extractor class is known in advance. +    Additionally, the dictionary may have any properties of the resolved entity +    known in advance, for example "title" if the title of the referred video is      known ahead of time. @@ -390,6 +392,10 @@ class InfoExtractor(object):              url_or_request, video_id, note, errnote, fatal=fatal)          if (not fatal) and json_string is False:              return None +        return self._parse_json( +            json_string, video_id, transform_source=transform_source, fatal=fatal) + +    def _parse_json(self, json_string, video_id, transform_source=None, fatal=True):          if transform_source:              json_string = transform_source(json_string)          try: @@ -439,7 +445,7 @@ class InfoExtractor(object):          return video_info      @staticmethod -    def playlist_result(entries, playlist_id=None, playlist_title=None): +    def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None):          """Returns a playlist"""          video_info = {'_type': 'playlist',                        'entries': entries} @@ -447,6 +453,8 @@ class InfoExtractor(object):              video_info['id'] = playlist_id          if playlist_title:              video_info['title'] = playlist_title +        if playlist_description: +            video_info['description'] = playlist_description          return video_info      def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None): @@ -790,6 +798,49 @@ class InfoExtractor(object):          self._sort_formats(formats)          return formats +    # TODO: improve extraction +    def _extract_smil_formats(self, smil_url, video_id): +        smil = self._download_xml( +            smil_url, video_id, 'Downloading SMIL file', +            'Unable to download SMIL file') + +        base = smil.find('./head/meta').get('base') + +        formats = [] +        rtmp_count = 0 +        for video in smil.findall('./body/switch/video'): +            src = video.get('src') +            if not src: +                continue +            bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000) +            width = int_or_none(video.get('width')) +            height = int_or_none(video.get('height')) +            proto = video.get('proto') +            if not proto: +                if base: +                    if base.startswith('rtmp'): +                        proto = 'rtmp' +                    elif base.startswith('http'): +                        proto = 'http' +            ext = video.get('ext') +            if proto == 'm3u8': +                formats.extend(self._extract_m3u8_formats(src, video_id, ext)) +            elif proto == 'rtmp': +                rtmp_count += 1 +                streamer = video.get('streamer') or base +                formats.append({ +                    'url': streamer, +                    'play_path': src, +                    'ext': 'flv', +                    'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate), +                    'tbr': bitrate, +                    'width': width, +                    'height': height, +                }) +        self._sort_formats(formats) + +        return formats +      def _live_title(self, name):          """ Generate the title for a live video """          now = datetime.datetime.now() diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 2139f68aa..1ad4e77a8 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -13,9 +13,10 @@ from ..compat import (      compat_urllib_request,  )  from ..utils import ( -    urlencode_postdata,      ExtractorError, +    int_or_none,      limit_length, +    urlencode_postdata,  ) @@ -36,7 +37,6 @@ class FacebookIE(InfoExtractor):          'info_dict': {              'id': '637842556329505',              'ext': 'mp4', -            'duration': 38,              'title': 're:Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam',          }      }, { @@ -107,9 +107,7 @@ class FacebookIE(InfoExtractor):          self._login()      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') - +        video_id = self._match_id(url)          url = 'https://www.facebook.com/video/video.php?v=%s' % video_id          webpage = self._download_webpage(url, video_id) @@ -149,6 +147,6 @@ class FacebookIE(InfoExtractor):              'id': video_id,              'title': video_title,              'url': video_url, -            'duration': int(video_data['video_duration']), -            'thumbnail': video_data['thumbnail_src'], +            'duration': int_or_none(video_data.get('video_duration')), +            'thumbnail': video_data.get('thumbnail_src'),          } diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 328301de3..2b4d8c62f 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -467,8 +467,17 @@ class GenericIE(InfoExtractor):              'expected_warnings': [                  'URL could be a direct video link, returning it as such.'              ] -        } - +        }, +        # Cinchcast embed +        { +            'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/', +            'info_dict': { +                'id': '7141703', +                'ext': 'mp3', +                'upload_date': '20141126', +                'title': 'Jack Tips: 5 Steps to Permanent Gut Healing', +            } +        },      ]      def report_following_redirect(self, new_url): @@ -962,6 +971,13 @@ class GenericIE(InfoExtractor):          if mobj is not None:              return self.url_result(mobj.group('url'), 'SBS') +        # Look for embedded Cinchcast player +        mobj = re.search( +            r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1', +            webpage) +        if mobj is not None: +            return self.url_result(mobj.group('url'), 'Cinchcast') +          mobj = re.search(              r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',              webpage) diff --git a/youtube_dl/extractor/howstuffworks.py b/youtube_dl/extractor/howstuffworks.py index fccc23884..e97339121 100644 --- a/youtube_dl/extractor/howstuffworks.py +++ b/youtube_dl/extractor/howstuffworks.py @@ -1,12 +1,12 @@  from __future__ import unicode_literals -import re -import json -import random -import string -  from .common import InfoExtractor -from ..utils import find_xpath_attr +from ..utils import ( +    find_xpath_attr, +    int_or_none, +    js_to_json, +    unescapeHTML, +)  class HowStuffWorksIE(InfoExtractor): @@ -16,98 +16,74 @@ class HowStuffWorksIE(InfoExtractor):              'url': 'http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm',              'info_dict': {                  'id': '450221', -                'display_id': 'cool-jobs-iditarod-musher',                  'ext': 'flv',                  'title': 'Cool Jobs - Iditarod Musher', -                'description': 'md5:82bb58438a88027b8186a1fccb365f90', +                'description': 'Cold sleds, freezing temps and warm dog breath... an Iditarod musher\'s dream. Kasey-Dee Gardner jumps on a sled to find out what the big deal is.', +                'display_id': 'cool-jobs-iditarod-musher',                  'thumbnail': 're:^https?://.*\.jpg$', +                'duration': 161,              }, -            'params': { -                # md5 is not consistent -                'skip_download': True -            }          },          {              'url': 'http://adventure.howstuffworks.com/7199-survival-zone-food-and-water-in-the-savanna-video.htm',              'info_dict': {                  'id': '453464', -                'display_id': 'survival-zone-food-and-water-in-the-savanna',                  'ext': 'mp4',                  'title': 'Survival Zone: Food and Water In the Savanna', -                'description': 'md5:7e1c89f6411434970c15fa094170c371', +                'description': 'Learn how to find both food and water while trekking in the African savannah. In this video from the Discovery Channel.', +                'display_id': 'survival-zone-food-and-water-in-the-savanna',                  'thumbnail': 're:^https?://.*\.jpg$',              }, -            'params': { -                # md5 is not consistent -                'skip_download': True -            }          },          {              'url': 'http://entertainment.howstuffworks.com/arts/2706-sword-swallowing-1-by-dan-meyer-video.htm',              'info_dict': {                  'id': '440011', -                'display_id': 'sword-swallowing-1-by-dan-meyer',                  'ext': 'flv',                  'title': 'Sword Swallowing #1 by Dan Meyer', -                'description': 'md5:b2409e88172913e2e7d3d1159b0ef735', +                'description': 'Video footage (1 of 3) used by permission of the owner Dan Meyer through Sword Swallowers Association International <www.swordswallow.org>', +                'display_id': 'sword-swallowing-1-by-dan-meyer',                  'thumbnail': 're:^https?://.*\.jpg$',              }, -            'params': { -                # md5 is not consistent -                'skip_download': True -            }          },      ]      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        display_id = mobj.group('id') +        display_id = self._match_id(url)          webpage = self._download_webpage(url, display_id) +        clip_js = self._search_regex( +            r'(?s)var clip = ({.*?});', webpage, 'clip info') +        clip_info = self._parse_json( +            clip_js, display_id, transform_source=js_to_json) -        content_id = self._search_regex(r'var siteSectionId="(\d+)";', webpage, 'content id') - -        mp4 = self._search_regex( -            r'''(?xs)var\s+clip\s*=\s*{\s* -                .+?\s* -                content_id\s*:\s*%s\s*,\s* -                .+?\s* -                mp4\s*:\s*\[(.*?),?\]\s* -                };\s* -                videoData\.push\(clip\);''' % content_id, -            webpage, 'mp4', fatal=False, default=None) - -        smil = self._download_xml( -            'http://services.media.howstuffworks.com/videos/%s/smil-service.smil' % content_id, -            content_id, 'Downloading video SMIL') - -        http_base = find_xpath_attr( -            smil, -            './{0}head/{0}meta'.format('{http://www.w3.org/2001/SMIL20/Language}'), -            'name', -            'httpBase').get('content') - -        def random_string(str_len=0): -            return ''.join([random.choice(string.ascii_uppercase) for _ in range(str_len)]) - -        URL_SUFFIX = '?v=2.11.3&fp=LNX 11,2,202,356&r=%s&g=%s' % (random_string(5), random_string(12)) - +        video_id = clip_info['content_id']          formats = [] +        m3u8_url = clip_info.get('m3u8') +        if m3u8_url: +            formats += self._extract_m3u8_formats(m3u8_url, video_id, 'mp4') +        for video in clip_info.get('mp4', []): +            formats.append({ +                'url': video['src'], +                'format_id': video['bitrate'], +                'vbr': int(video['bitrate'].rstrip('k')), +            }) + +        if not formats: +            smil = self._download_xml( +                'http://services.media.howstuffworks.com/videos/%s/smil-service.smil' % video_id, +                video_id, 'Downloading video SMIL') + +            http_base = find_xpath_attr( +                smil, +                './{0}head/{0}meta'.format('{http://www.w3.org/2001/SMIL20/Language}'), +                'name', +                'httpBase').get('content') + +            URL_SUFFIX = '?v=2.11.3&fp=LNX 11,2,202,356&r=A&g=A' -        if mp4: -            for video in json.loads('[%s]' % mp4): -                bitrate = video['bitrate'] -                fmt = { -                    'url': video['src'].replace('http://pmd.video.howstuffworks.com', http_base) + URL_SUFFIX, -                    'format_id': bitrate, -                } -                m = re.search(r'(?P<vbr>\d+)[Kk]', bitrate) -                if m: -                    fmt['vbr'] = int(m.group('vbr')) -                formats.append(fmt) -        else:              for video in smil.findall( -                    './/{0}body/{0}switch/{0}video'.format('{http://www.w3.org/2001/SMIL20/Language}')): -                vbr = int(video.attrib['system-bitrate']) / 1000 +                    './{0}body/{0}switch/{0}video'.format('{http://www.w3.org/2001/SMIL20/Language}')): +                vbr = int_or_none(video.attrib['system-bitrate'], scale=1000)                  formats.append({                      'url': '%s/%s%s' % (http_base, video.attrib['src'], URL_SUFFIX),                      'format_id': '%dk' % vbr, @@ -116,19 +92,12 @@ class HowStuffWorksIE(InfoExtractor):          self._sort_formats(formats) -        title = self._og_search_title(webpage) -        TITLE_SUFFIX = ' : HowStuffWorks' -        if title.endswith(TITLE_SUFFIX): -            title = title[:-len(TITLE_SUFFIX)] - -        description = self._og_search_description(webpage) -        thumbnail = self._og_search_thumbnail(webpage) -          return { -            'id': content_id, +            'id': '%s' % video_id,              'display_id': display_id, -            'title': title, -            'description': description, -            'thumbnail': thumbnail, +            'title': unescapeHTML(clip_info['clip_title']), +            'description': unescapeHTML(clip_info.get('caption')), +            'thumbnail': clip_info.get('video_still_url'), +            'duration': clip_info.get('duration'),              'formats': formats,          } diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index bb8937c4d..55cc33a3e 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -70,7 +70,7 @@ class MixcloudIE(InfoExtractor):              raise ExtractorError('Unable to extract track url')          PREFIX = ( -            r'<div class="cloudcast-play-button-container[^"]*?"' +            r'<span class="play-button[^"]*?"'              r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+')          title = self._html_search_regex(              PREFIX + r'm-title="([^"]+)"', webpage, 'title') diff --git a/youtube_dl/extractor/nhl.py b/youtube_dl/extractor/nhl.py index 0244368e9..b2f40344f 100644 --- a/youtube_dl/extractor/nhl.py +++ b/youtube_dl/extractor/nhl.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals  import re  import json +import os  from .common import InfoExtractor  from ..compat import ( @@ -26,7 +27,8 @@ class NHLBaseInfoExtractor(InfoExtractor):          initial_video_url = info['publishPoint']          if info['formats'] == '1':              parsed_url = compat_urllib_parse_urlparse(initial_video_url) -            path = parsed_url.path.replace('.', '_sd.', 1) +            filename, ext = os.path.splitext(parsed_url.path) +            path = '%s_sd%s' % (filename, ext)              data = compat_urllib_parse.urlencode({                  'type': 'fvod',                  'path': compat_urlparse.urlunparse(parsed_url[:2] + (path,) + parsed_url[3:]) diff --git a/youtube_dl/extractor/ntv.py b/youtube_dl/extractor/ntv.py index 13c8d79cd..ee740cd9c 100644 --- a/youtube_dl/extractor/ntv.py +++ b/youtube_dl/extractor/ntv.py @@ -130,7 +130,7 @@ class NTVIE(InfoExtractor):                  'rtmp_conn': 'B:1',                  'player_url': 'http://www.ntv.ru/swf/vps1.swf?update=20131128',                  'page_url': 'http://www.ntv.ru', -                'flash_ver': 'LNX 11,2,202,341', +                'flash_version': 'LNX 11,2,202,341',                  'rtmp_live': True,                  'ext': 'flv',                  'filesize': int(size.text), diff --git a/youtube_dl/extractor/pornhd.py b/youtube_dl/extractor/pornhd.py index bac484c67..954dfccb7 100644 --- a/youtube_dl/extractor/pornhd.py +++ b/youtube_dl/extractor/pornhd.py @@ -8,7 +8,6 @@ from ..utils import (      int_or_none,      js_to_json,      qualities, -    determine_ext,  ) @@ -45,13 +44,18 @@ class PornHdIE(InfoExtractor):          thumbnail = self._search_regex(              r"'poster'\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False) -        quality = qualities(['SD', 'HD']) -        formats = [{ -            'url': source['file'], -            'format_id': '%s-%s' % (source['label'], determine_ext(source['file'])), -            'quality': quality(source['label']), -        } for source in json.loads(js_to_json(self._search_regex( -            r"(?s)'sources'\s*:\s*(\[.+?\])", webpage, 'sources')))] +        quality = qualities(['sd', 'hd']) +        sources = json.loads(js_to_json(self._search_regex( +            r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}\);", webpage, 'sources'))) +        formats = [] +        for container, s in sources.items(): +            for qname, video_url in s.items(): +                formats.append({ +                    'url': video_url, +                    'container': container, +                    'format_id': '%s-%s' % (container, qname), +                    'quality': quality(qname), +                })          self._sort_formats(formats)          return { diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 32d747ede..1262793c8 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -85,7 +85,7 @@ class ProSiebenSat1IE(InfoExtractor):                  'ext': 'mp4',                  'title': 'Im Interview: Kai Wiesinger',                  'description': 'md5:e4e5370652ec63b95023e914190b4eb9', -                'upload_date': '20140225', +                'upload_date': '20140203',                  'duration': 522.56,              },              'params': { @@ -100,7 +100,7 @@ class ProSiebenSat1IE(InfoExtractor):                  'ext': 'mp4',                  'title': 'Jagd auf Fertigkost im Elsthal - Teil 2',                  'description': 'md5:2669cde3febe9bce13904f701e774eb6', -                'upload_date': '20140225', +                'upload_date': '20141014',                  'duration': 2410.44,              },              'params': { @@ -152,12 +152,22 @@ class ProSiebenSat1IE(InfoExtractor):                  'skip_download': True,              },          }, +        { +            'url': 'http://www.prosieben.de/tv/joko-gegen-klaas/videos/playlists/episode-8-ganze-folge-playlist', +            'info_dict': { +                'id': '439664', +                'title': 'Episode 8 - Ganze Folge - Playlist', +                'description': 'md5:63b8963e71f481782aeea877658dec84', +            }, +            'playlist_count': 2, +        },      ]      _CLIPID_REGEXES = [          r'"clip_id"\s*:\s+"(\d+)"',          r'clipid: "(\d+)"',          r'clip[iI]d=(\d+)', +        r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)",      ]      _TITLE_REGEXES = [          r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>', @@ -178,11 +188,19 @@ class ProSiebenSat1IE(InfoExtractor):          r'<span style="padding-left: 4px;line-height:20px; color:#404040">(\d{2}\.\d{2}\.\d{4})</span>',          r'(\d{2}\.\d{2}\.\d{4}) \| \d{2}:\d{2} Min<br/>',      ] +    _PAGE_TYPE_REGEXES = [ +        r'<meta name="page_type" content="([^"]+)">', +        r"'itemType'\s*:\s*'([^']*)'", +    ] +    _PLAYLIST_ID_REGEXES = [ +        r'content[iI]d=(\d+)', +        r"'itemId'\s*:\s*'([^']*)'", +    ] +    _PLAYLIST_CLIP_REGEXES = [ +        r'(?s)data-qvt=.+?<a href="([^"]+)"', +    ] -    def _real_extract(self, url): -        video_id = self._match_id(url) -        webpage = self._download_webpage(url, video_id) - +    def _extract_clip(self, url, webpage):          clip_id = self._html_search_regex(self._CLIPID_REGEXES, webpage, 'clip id')          access_token = 'testclient' @@ -281,3 +299,31 @@ class ProSiebenSat1IE(InfoExtractor):              'duration': duration,              'formats': formats,          } + +    def _extract_playlist(self, url, webpage): +        playlist_id = self._html_search_regex( +            self._PLAYLIST_ID_REGEXES, webpage, 'playlist id') +        for regex in self._PLAYLIST_CLIP_REGEXES: +            playlist_clips = re.findall(regex, webpage) +            if playlist_clips: +                title = self._html_search_regex( +                    self._TITLE_REGEXES, webpage, 'title') +                description = self._html_search_regex( +                    self._DESCRIPTION_REGEXES, webpage, 'description', fatal=False) +                entries = [ +                    self.url_result( +                        re.match('(.+?//.+?)/', url).group(1) + clip_path, +                        'ProSiebenSat1') +                    for clip_path in playlist_clips] +                return self.playlist_result(entries, playlist_id, title, description) + +    def _real_extract(self, url): +        video_id = self._match_id(url) +        webpage = self._download_webpage(url, video_id) +        page_type = self._search_regex( +            self._PAGE_TYPE_REGEXES, webpage, +            'page type', default='clip').lower() +        if page_type == 'clip': +            return self._extract_clip(url, webpage) +        elif page_type == 'playlist': +            return self._extract_playlist(url, webpage) diff --git a/youtube_dl/extractor/radiode.py b/youtube_dl/extractor/radiode.py new file mode 100644 index 000000000..f95bc9454 --- /dev/null +++ b/youtube_dl/extractor/radiode.py @@ -0,0 +1,55 @@ +from __future__ import unicode_literals + +import json + +from .common import InfoExtractor + + +class RadioDeIE(InfoExtractor): +    IE_NAME = 'radio.de' +    _VALID_URL = r'https?://(?P<id>.+?)\.(?:radio\.(?:de|at|fr|pt|es|pl|it)|rad\.io)' +    _TEST = { +        'url': 'http://ndr2.radio.de/', +        'md5': '3b4cdd011bc59174596b6145cda474a4', +        'info_dict': { +            'id': 'ndr2', +            'ext': 'mp3', +            'title': 're:^NDR 2 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', +            'description': 'md5:591c49c702db1a33751625ebfb67f273', +            'thumbnail': 're:^https?://.*\.png', +        }, +        'params': { +            'skip_download': True, +        } +    } + +    def _real_extract(self, url): +        radio_id = self._match_id(url) + +        webpage = self._download_webpage(url, radio_id) + +        broadcast = json.loads(self._search_regex( +            r'_getBroadcast\s*=\s*function\(\s*\)\s*{\s*return\s+({.+?})\s*;\s*}', +            webpage, 'broadcast')) + +        title = self._live_title(broadcast['name']) +        description = broadcast.get('description') or broadcast.get('shortDescription') +        thumbnail = broadcast.get('picture4Url') or broadcast.get('picture4TransUrl') + +        formats = [{ +            'url': stream['streamUrl'], +            'ext': stream['streamContentFormat'].lower(), +            'acodec': stream['streamContentFormat'], +            'abr': stream['bitRate'], +            'asr': stream['sampleRate'] +        } for stream in broadcast['streamUrls']] +        self._sort_formats(formats) + +        return { +            'id': radio_id, +            'title': title, +            'description': description, +            'thumbnail': thumbnail, +            'is_live': True, +            'formats': formats, +        } diff --git a/youtube_dl/extractor/cinemassacre.py b/youtube_dl/extractor/screenwavemedia.py index b7fa73c3b..6c9fdb7c1 100644 --- a/youtube_dl/extractor/cinemassacre.py +++ b/youtube_dl/extractor/screenwavemedia.py @@ -5,61 +5,27 @@ import re  from .common import InfoExtractor  from ..utils import ( -    ExtractorError,      int_or_none, +    unified_strdate,  ) -class CinemassacreIE(InfoExtractor): -    _VALID_URL = r'http://(?:www\.)?cinemassacre\.com/(?P<date_Y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/(?P<display_id>[^?#/]+)' -    _TESTS = [ -        { -            'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/', -            'md5': 'fde81fbafaee331785f58cd6c0d46190', -            'info_dict': { -                'id': '19911', -                'ext': 'mp4', -                'upload_date': '20121110', -                'title': '“Angry Video Game Nerd: The Movie” – Trailer', -                'description': 'md5:fb87405fcb42a331742a0dce2708560b', -            }, -        }, -        { -            'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940', -            'md5': 'd72f10cd39eac4215048f62ab477a511', -            'info_dict': { -                'id': '521be8ef82b16', -                'ext': 'mp4', -                'upload_date': '20131002', -                'title': 'The Mummy’s Hand (1940)', -            }, -        } -    ] - -    def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        display_id = mobj.group('display_id') - -        webpage = self._download_webpage(url, display_id) -        video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d') -        mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?P<full_video_id>(?:Cinemassacre-)?(?P<video_id>.+?)))"', webpage) -        if not mobj: -            raise ExtractorError('Can\'t extract embed url and video id') -        playerdata_url = mobj.group('embed_url') -        video_id = mobj.group('video_id') -        full_video_id = mobj.group('full_video_id') +class ScreenwaveMediaIE(InfoExtractor): +    _VALID_URL = r'http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?P<id>.+)' -        video_title = self._html_search_regex( -            r'<title>(?P<title>.+?)\|', webpage, 'title') -        video_description = self._html_search_regex( -            r'<div class="entry-content">(?P<description>.+?)</div>', -            webpage, 'description', flags=re.DOTALL, fatal=False) -        video_thumbnail = self._og_search_thumbnail(webpage) +    _TESTS = [{ +        'url': 'http://player.screenwavemedia.com/play/play.php?playerdiv=videoarea&companiondiv=squareAd&id=Cinemassacre-19911', +        'only_matching': True, +    }] -        playerdata = self._download_webpage(playerdata_url, video_id, 'Downloading player webpage') +    def _real_extract(self, url): +        video_id = self._match_id(url) +        playerdata = self._download_webpage(url, video_id, 'Downloading player webpage') +        vidtitle = self._search_regex( +            r'\'vidtitle\'\s*:\s*"([^"]+)"', playerdata, 'vidtitle').replace('\\/', '/')          vidurl = self._search_regex( -            r'\'vidurl\'\s*:\s*"([^\']+)"', playerdata, 'vidurl').replace('\\/', '/') +            r'\'vidurl\'\s*:\s*"([^"]+)"', playerdata, 'vidurl').replace('\\/', '/')          videolist_url = None @@ -67,7 +33,7 @@ class CinemassacreIE(InfoExtractor):          if mobj:              videoserver = mobj.group('videoserver')              mobj = re.search(r'\'vidid\'\s*:\s*"(?P<vidid>[^\']+)"', playerdata) -            vidid = mobj.group('vidid') if mobj else full_video_id +            vidid = mobj.group('vidid') if mobj else video_id              videolist_url = 'http://%s/vod/smil:%s.smil/jwplayer.smil' % (videoserver, vidid)          else:              mobj = re.search(r"file\s*:\s*'(?P<smil>http.+?/jwplayer\.smil)'", playerdata) @@ -85,34 +51,128 @@ class CinemassacreIE(InfoExtractor):                  file_ = src.partition(':')[-1]                  width = int_or_none(video.get('width'))                  height = int_or_none(video.get('height')) -                bitrate = int_or_none(video.get('system-bitrate')) +                bitrate = int_or_none(video.get('system-bitrate'), scale=1000)                  format = {                      'url': baseurl + file_,                      'format_id': src.rpartition('.')[0].rpartition('_')[-1],                  }                  if width or height:                      format.update({ -                        'tbr': bitrate // 1000 if bitrate else None, +                        'tbr': bitrate,                          'width': width,                          'height': height,                      })                  else:                      format.update({ -                        'abr': bitrate // 1000 if bitrate else None, +                        'abr': bitrate,                          'vcodec': 'none',                      })                  formats.append(format) -            self._sort_formats(formats)          else:              formats = [{                  'url': vidurl,              }] +        self._sort_formats(formats)          return {              'id': video_id, -            'title': video_title, +            'title': vidtitle,              'formats': formats, +        } + + +class CinemassacreIE(InfoExtractor): +    _VALID_URL = 'https?://(?:www\.)?cinemassacre\.com/(?P<date_y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/(?P<display_id>[^?#/]+)' +    _TESTS = [ +        { +            'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/', +            'md5': 'fde81fbafaee331785f58cd6c0d46190', +            'info_dict': { +                'id': 'Cinemassacre-19911', +                'ext': 'mp4', +                'upload_date': '20121110', +                'title': '“Angry Video Game Nerd: The Movie” – Trailer', +                'description': 'md5:fb87405fcb42a331742a0dce2708560b', +            }, +        }, +        { +            'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940', +            'md5': 'd72f10cd39eac4215048f62ab477a511', +            'info_dict': { +                'id': 'Cinemassacre-521be8ef82b16', +                'ext': 'mp4', +                'upload_date': '20131002', +                'title': 'The Mummy’s Hand (1940)', +            }, +        } +    ] + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        display_id = mobj.group('display_id') +        video_date = mobj.group('date_y') + mobj.group('date_m') + mobj.group('date_d') + +        webpage = self._download_webpage(url, display_id) + +        playerdata_url = self._search_regex( +            r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"', +            webpage, 'player data URL') +        video_title = self._html_search_regex( +            r'<title>(?P<title>.+?)\|', webpage, 'title') +        video_description = self._html_search_regex( +            r'<div class="entry-content">(?P<description>.+?)</div>', +            webpage, 'description', flags=re.DOTALL, fatal=False) +        video_thumbnail = self._og_search_thumbnail(webpage) + +        return { +            '_type': 'url_transparent', +            'display_id': display_id, +            'title': video_title, +            'description': video_description, +            'upload_date': video_date, +            'thumbnail': video_thumbnail, +            'url': playerdata_url, +        } + + +class TeamFourIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?teamfourstar\.com/video/(?P<id>[a-z0-9\-]+)/?' +    _TEST = { +        'url': 'http://teamfourstar.com/video/a-moment-with-tfs-episode-4/', +        'info_dict': { +            'id': 'TeamFourStar-5292a02f20bfa', +            'ext': 'mp4', +            'upload_date': '20130401', +            'description': 'Check out this and more on our website: http://teamfourstar.com\nTFS Store: http://sharkrobot.com/team-four-star\nFollow on Twitter: http://twitter.com/teamfourstar\nLike on FB: http://facebook.com/teamfourstar', +            'title': 'A Moment With TFS Episode 4', +        } +    } + +    def _real_extract(self, url): +        display_id = self._match_id(url) +        webpage = self._download_webpage(url, display_id) + +        playerdata_url = self._search_regex( +            r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"', +            webpage, 'player data URL') + +        video_title = self._html_search_regex( +            r'<div class="heroheadingtitle">(?P<title>.+?)</div>', +            webpage, 'title') +        video_date = unified_strdate(self._html_search_regex( +            r'<div class="heroheadingdate">(?P<date>.+?)</div>', +            webpage, 'date', fatal=False)) +        video_description = self._html_search_regex( +            r'(?s)<div class="postcontent">(?P<description>.+?)</div>', +            webpage, 'description', fatal=False) +        video_thumbnail = self._og_search_thumbnail(webpage) + +        return { +            '_type': 'url_transparent', +            'display_id': display_id, +            'title': video_title,              'description': video_description,              'upload_date': video_date,              'thumbnail': video_thumbnail, +            'url': playerdata_url,          } diff --git a/youtube_dl/extractor/smotri.py b/youtube_dl/extractor/smotri.py index 0751efc61..646af3cc9 100644 --- a/youtube_dl/extractor/smotri.py +++ b/youtube_dl/extractor/smotri.py @@ -274,15 +274,18 @@ class SmotriBroadcastIE(InfoExtractor):          broadcast_page = self._download_webpage(broadcast_url, broadcast_id, 'Downloading broadcast page')          if re.search('>Режиссер с логином <br/>"%s"<br/> <span>не существует<' % broadcast_id, broadcast_page) is not None: -            raise ExtractorError('Broadcast %s does not exist' % broadcast_id, expected=True) +            raise ExtractorError( +                'Broadcast %s does not exist' % broadcast_id, expected=True)          # Adult content          if re.search('EroConfirmText">', broadcast_page) is not None:              (username, password) = self._get_login_info()              if username is None: -                raise ExtractorError('Erotic broadcasts allowed only for registered users, ' -                                     'use --username and --password options to provide account credentials.', expected=True) +                raise ExtractorError( +                    'Erotic broadcasts allowed only for registered users, ' +                    'use --username and --password options to provide account credentials.', +                    expected=True)              login_form = {                  'login-hint53': '1', @@ -291,9 +294,11 @@ class SmotriBroadcastIE(InfoExtractor):                  'password': password,              } -            request = compat_urllib_request.Request(broadcast_url + '/?no_redirect=1', compat_urllib_parse.urlencode(login_form)) +            request = compat_urllib_request.Request( +                broadcast_url + '/?no_redirect=1', compat_urllib_parse.urlencode(login_form))              request.add_header('Content-Type', 'application/x-www-form-urlencoded') -            broadcast_page = self._download_webpage(request, broadcast_id, 'Logging in and confirming age') +            broadcast_page = self._download_webpage( +                request, broadcast_id, 'Logging in and confirming age')              if re.search('>Неверный логин или пароль<', broadcast_page) is not None:                  raise ExtractorError('Unable to log in: bad username or password', expected=True) @@ -303,7 +308,7 @@ class SmotriBroadcastIE(InfoExtractor):              adult_content = False          ticket = self._html_search_regex( -            'window\.broadcast_control\.addFlashVar\\(\'file\', \'([^\']+)\'\\);', +            r"window\.broadcast_control\.addFlashVar\('file'\s*,\s*'([^']+)'\)",              broadcast_page, 'broadcast ticket')          url = 'http://smotri.com/broadcast/view/url/?ticket=%s' % ticket @@ -312,26 +317,31 @@ class SmotriBroadcastIE(InfoExtractor):          if broadcast_password:              url += '&pass=%s' % hashlib.md5(broadcast_password.encode('utf-8')).hexdigest() -        broadcast_json_page = self._download_webpage(url, broadcast_id, 'Downloading broadcast JSON') +        broadcast_json_page = self._download_webpage( +            url, broadcast_id, 'Downloading broadcast JSON')          try:              broadcast_json = json.loads(broadcast_json_page)              protected_broadcast = broadcast_json['_pass_protected'] == 1              if protected_broadcast and not broadcast_password: -                raise ExtractorError('This broadcast is protected by a password, use the --video-password option', expected=True) +                raise ExtractorError( +                    'This broadcast is protected by a password, use the --video-password option', +                    expected=True)              broadcast_offline = broadcast_json['is_play'] == 0              if broadcast_offline:                  raise ExtractorError('Broadcast %s is offline' % broadcast_id, expected=True)              rtmp_url = broadcast_json['_server'] -            if not rtmp_url.startswith('rtmp://'): +            mobj = re.search(r'^rtmp://[^/]+/(?P<app>.+)/?$', rtmp_url) +            if not mobj:                  raise ExtractorError('Unexpected broadcast rtmp URL')              broadcast_playpath = broadcast_json['_streamName'] +            broadcast_app = '%s/%s' % (mobj.group('app'), broadcast_json['_vidURL'])              broadcast_thumbnail = broadcast_json['_imgURL'] -            broadcast_title = broadcast_json['title'] +            broadcast_title = self._live_title(broadcast_json['title'])              broadcast_description = broadcast_json['description']              broadcaster_nick = broadcast_json['nick']              broadcaster_login = broadcast_json['login'] @@ -352,6 +362,9 @@ class SmotriBroadcastIE(InfoExtractor):              'age_limit': 18 if adult_content else 0,              'ext': 'flv',              'play_path': broadcast_playpath, +            'player_url': 'http://pics.smotri.com/broadcast_play.swf', +            'app': broadcast_app,              'rtmp_live': True, -            'rtmp_conn': rtmp_conn +            'rtmp_conn': rtmp_conn, +            'is_live': True,          } diff --git a/youtube_dl/extractor/tvigle.py b/youtube_dl/extractor/tvigle.py index d81d1d1a6..ba65996dc 100644 --- a/youtube_dl/extractor/tvigle.py +++ b/youtube_dl/extractor/tvigle.py @@ -1,32 +1,30 @@  # encoding: utf-8  from __future__ import unicode_literals -import re -  from .common import InfoExtractor  from ..utils import (      float_or_none, -    str_to_int, +    parse_age_limit,  )  class TvigleIE(InfoExtractor):      IE_NAME = 'tvigle'      IE_DESC = 'Интернет-телевидение Tvigle.ru' -    _VALID_URL = r'http://(?:www\.)?tvigle\.ru/(?:[^/]+/)+(?P<display_id>[^/]+)/$' +    _VALID_URL = r'http://(?:www\.)?tvigle\.ru/(?:[^/]+/)+(?P<id>[^/]+)/$'      _TESTS = [          { -            'url': 'http://www.tvigle.ru/video/brat/', -            'md5': 'ff4344a4894b0524441fb6f8218dc716', +            'url': 'http://www.tvigle.ru/video/sokrat/', +            'md5': '36514aed3657d4f70b4b2cef8eb520cd',              'info_dict': { -                'id': '5118490', -                'display_id': 'brat', -                'ext': 'mp4', -                'title': 'Брат', -                'description': 'md5:d16ac7c0b47052ea51fddb92c4e413eb', -                'duration': 5722.6, -                'age_limit': 16, +                'id': '1848932', +                'display_id': 'sokrat', +                'ext': 'flv', +                'title': 'Сократ', +                'description': 'md5:a05bd01be310074d5833efc6743be95e', +                'duration': 6586, +                'age_limit': 0,              },          },          { @@ -44,8 +42,7 @@ class TvigleIE(InfoExtractor):      ]      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        display_id = mobj.group('display_id') +        display_id = self._match_id(url)          webpage = self._download_webpage(url, display_id) @@ -60,8 +57,8 @@ class TvigleIE(InfoExtractor):          title = item['title']          description = item['description']          thumbnail = item['thumbnail'] -        duration = float_or_none(item['durationMilliseconds'], 1000) -        age_limit = str_to_int(item['ageRestrictions']) +        duration = float_or_none(item.get('durationMilliseconds'), 1000) +        age_limit = parse_age_limit(item.get('ageRestrictions'))          formats = []          for vcodec, fmts in item['videos'].items(): diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index eb9473754..9a53a3c74 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -6,7 +6,6 @@ import re  from .common import InfoExtractor  from ..compat import compat_str  from ..utils import ( -    ExtractorError,      parse_iso8601,      qualities,  ) @@ -182,8 +181,8 @@ class TVPlayIE(InfoExtractor):              'http://playapi.mtgx.tv/v1/videos/%s' % video_id, video_id, 'Downloading video JSON')          if video['is_geo_blocked']: -            raise ExtractorError( -                'This content is not available in your country due to copyright reasons', expected=True) +            self.report_warning( +                'This content might not be available in your country due to copyright reasons')          streams = self._download_json(              'http://playapi.mtgx.tv/v1/videos/stream/%s' % video_id, video_id, 'Downloading streams JSON') diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 36aa1ad6e..397d167e8 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -1,3 +1,4 @@ +# coding: utf-8  from __future__ import unicode_literals  import itertools @@ -5,6 +6,8 @@ import re  from .common import InfoExtractor  from ..utils import ( +    compat_urllib_parse, +    compat_urllib_request,      ExtractorError,      parse_iso8601,  ) @@ -24,6 +27,7 @@ class TwitchIE(InfoExtractor):          """      _PAGE_LIMIT = 100      _API_BASE = 'https://api.twitch.tv' +    _LOGIN_URL = 'https://secure.twitch.tv/user/login'      _TESTS = [{          'url': 'http://www.twitch.tv/riotgames/b/577357806',          'info_dict': { @@ -109,6 +113,44 @@ class TwitchIE(InfoExtractor):              'view_count': info['views'],          } +    def _real_initialize(self): +        self._login() + +    def _login(self): +        (username, password) = self._get_login_info() +        if username is None: +            return + +        login_page = self._download_webpage( +            self._LOGIN_URL, None, 'Downloading login page') + +        authenticity_token = self._search_regex( +            r'<input name="authenticity_token" type="hidden" value="([^"]+)"', +            login_page, 'authenticity token') + +        login_form = { +            'utf8': '✓'.encode('utf-8'), +            'authenticity_token': authenticity_token, +            'redirect_on_login': '', +            'embed_form': 'false', +            'mp_source_action': '', +            'follow': '', +            'user[login]': username, +            'user[password]': password, +        } + +        request = compat_urllib_request.Request( +            self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8')) +        request.add_header('Referer', self._LOGIN_URL) +        response = self._download_webpage( +            request, None, 'Logging in as %s' % username) + +        m = re.search( +            r"id=([\"'])login_error_message\1[^>]*>(?P<msg>[^<]+)", response) +        if m: +            raise ExtractorError( +                'Unable to login: %s' % m.group('msg').strip(), expected=True) +      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url)          if mobj.group('chapterid'): diff --git a/youtube_dl/extractor/vine.py b/youtube_dl/extractor/vine.py index 4970b2f23..0b58fe0fe 100644 --- a/youtube_dl/extractor/vine.py +++ b/youtube_dl/extractor/vine.py @@ -17,6 +17,7 @@ class VineIE(InfoExtractor):              'id': 'b9KOOWX7HUx',              'ext': 'mp4',              'title': 'Chicken.', +            'alt_title': 'Vine by Jack Dorsey',              'description': 'Chicken.',              'upload_date': '20130519',              'uploader': 'Jack Dorsey', @@ -25,30 +26,26 @@ class VineIE(InfoExtractor):      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') - +        video_id = self._match_id(url)          webpage = self._download_webpage('https://vine.co/v/' + video_id, video_id)          data = json.loads(self._html_search_regex(              r'window\.POST_DATA = { %s: ({.+?}) }' % video_id, webpage, 'vine data')) -        formats = [ -            { -                'url': data['videoLowURL'], -                'ext': 'mp4', -                'format_id': 'low', -            }, -            { -                'url': data['videoUrl'], -                'ext': 'mp4', -                'format_id': 'standard', -            } -        ] +        formats = [{ +            'url': data['videoLowURL'], +            'ext': 'mp4', +            'format_id': 'low', +        }, { +            'url': data['videoUrl'], +            'ext': 'mp4', +            'format_id': 'standard', +        }]          return {              'id': video_id,              'title': self._og_search_title(webpage), +            'alt_title': self._og_search_description(webpage),              'description': data['description'],              'thumbnail': data['thumbnailUrl'],              'upload_date': unified_strdate(data['created']), diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 8b6e591a4..7b6179a2a 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -14,23 +14,24 @@ from .common import InfoExtractor, SearchInfoExtractor  from .subtitles import SubtitlesInfoExtractor  from ..jsinterp import JSInterpreter  from ..swfinterp import SWFInterpreter -from ..utils import ( +from ..compat import (      compat_chr,      compat_parse_qs,      compat_urllib_parse,      compat_urllib_request,      compat_urlparse,      compat_str, - +) +from ..utils import (      clean_html, -    get_element_by_id, -    get_element_by_attribute,      ExtractorError, +    get_element_by_attribute, +    get_element_by_id,      int_or_none,      OnDemandPagedList, +    orderedSet,      unescapeHTML,      unified_strdate, -    orderedSet,      uppercase_escape,  ) @@ -417,6 +418,38 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):                  'upload_date': '20140605',              },          }, +        # video_info is None (https://github.com/rg3/youtube-dl/issues/4421) +        { +            'url': '__2ABJjxzNo', +            'info_dict': { +                'id': '__2ABJjxzNo', +                'ext': 'mp4', +                'upload_date': '20100430', +                'uploader_id': 'deadmau5', +                'description': 'md5:12c56784b8032162bb936a5f76d55360', +                'uploader': 'deadmau5', +                'title': 'Deadmau5 - Some Chords (HD)', +            }, +            'expected_warnings': [ +                'DASH manifest missing', +            ] +        }, +        # Olympics (https://github.com/rg3/youtube-dl/issues/4431) +        { +            'url': 'lqQg6PlCWgI', +            'info_dict': { +                'id': 'lqQg6PlCWgI', +                'ext': 'mp4', +                'upload_date': '20120731', +                'uploader_id': 'olympic', +                'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games', +                'uploader': 'Olympics', +                'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games', +            }, +            'params': { +                'skip_download': 'requires avconv', +            } +        },      ]      def __init__(self, *args, **kwargs): @@ -666,6 +699,46 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):          url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id          return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.') +    def _parse_dash_manifest( +            self, video_id, dash_manifest_url, player_url, age_gate): +        def decrypt_sig(mobj): +            s = mobj.group(1) +            dec_s = self._decrypt_signature(s, video_id, player_url, age_gate) +            return '/signature/%s' % dec_s +        dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url) +        dash_doc = self._download_xml( +            dash_manifest_url, video_id, +            note='Downloading DASH manifest', +            errnote='Could not download DASH manifest') + +        formats = [] +        for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'): +            url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL') +            if url_el is None: +                continue +            format_id = r.attrib['id'] +            video_url = url_el.text +            filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength')) +            f = { +                'format_id': format_id, +                'url': video_url, +                'width': int_or_none(r.attrib.get('width')), +                'tbr': int_or_none(r.attrib.get('bandwidth'), 1000), +                'asr': int_or_none(r.attrib.get('audioSamplingRate')), +                'filesize': filesize, +                'fps': int_or_none(r.attrib.get('frameRate')), +            } +            try: +                existing_format = next( +                    fo for fo in formats +                    if fo['format_id'] == format_id) +            except StopIteration: +                f.update(self._formats.get(format_id, {})) +                formats.append(f) +            else: +                existing_format.update(f) +        return formats +      def _real_extract(self, url):          proto = (              'http' if self._downloader.params.get('prefer_insecure', False) @@ -800,7 +873,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):          m_cat_container = self._search_regex(              r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>', -            video_webpage, 'categories', fatal=False) +            video_webpage, 'categories', default=None)          if m_cat_container:              category = self._html_search_regex(                  r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category', @@ -878,7 +951,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):                  'url': video_info['conn'][0],                  'player_url': player_url,              }] -        elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1: +        elif len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1:              encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]              if 'rtmpe%3Dyes' in encoded_url_map:                  raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True) @@ -943,51 +1016,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):          # Look for the DASH manifest          if self._downloader.params.get('youtube_include_dash_manifest', True): -            try: -                # The DASH manifest used needs to be the one from the original video_webpage. -                # The one found in get_video_info seems to be using different signatures. -                # However, in the case of an age restriction there won't be any embedded dashmpd in the video_webpage. -                # Luckily, it seems, this case uses some kind of default signature (len == 86), so the -                # combination of get_video_info and the _static_decrypt_signature() decryption fallback will work here. -                dash_manifest_url = video_info.get('dashmpd')[0] - -                def decrypt_sig(mobj): -                    s = mobj.group(1) -                    dec_s = self._decrypt_signature(s, video_id, player_url, age_gate) -                    return '/signature/%s' % dec_s -                dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url) -                dash_doc = self._download_xml( -                    dash_manifest_url, video_id, -                    note='Downloading DASH manifest', -                    errnote='Could not download DASH manifest') -                for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'): -                    url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL') -                    if url_el is None: -                        continue -                    format_id = r.attrib['id'] -                    video_url = url_el.text -                    filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength')) -                    f = { -                        'format_id': format_id, -                        'url': video_url, -                        'width': int_or_none(r.attrib.get('width')), -                        'tbr': int_or_none(r.attrib.get('bandwidth'), 1000), -                        'asr': int_or_none(r.attrib.get('audioSamplingRate')), -                        'filesize': filesize, -                        'fps': int_or_none(r.attrib.get('frameRate')), -                    } -                    try: -                        existing_format = next( -                            fo for fo in formats -                            if fo['format_id'] == format_id) -                    except StopIteration: -                        f.update(self._formats.get(format_id, {})) -                        formats.append(f) -                    else: -                        existing_format.update(f) - -            except (ExtractorError, KeyError) as e: -                self.report_warning('Skipping DASH manifest: %r' % e, video_id) +            dash_mpd = video_info.get('dashmpd') +            if dash_mpd: +                dash_manifest_url = dash_mpd[0] +                try: +                    dash_formats = self._parse_dash_manifest( +                        video_id, dash_manifest_url, player_url, age_gate) +                except (ExtractorError, KeyError) as e: +                    self.report_warning( +                        'Skipping DASH manifest: %r' % e, video_id) +                else: +                    formats.extend(dash_formats)          self._sort_formats(formats) diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py index 9ff00e26c..74c76a9a0 100644 --- a/youtube_dl/extractor/zdf.py +++ b/youtube_dl/extractor/zdf.py @@ -1,12 +1,14 @@  # coding: utf-8  from __future__ import unicode_literals +import functools  import re  from .common import InfoExtractor  from ..utils import (      int_or_none,      unified_strdate, +    OnDemandPagedList,  ) @@ -87,7 +89,7 @@ def extract_from_xml_url(ie, video_id, xml_url):  class ZDFIE(InfoExtractor): -    _VALID_URL = r'^https?://www\.zdf\.de/ZDFmediathek(?P<hash>#)?/(.*beitrag/(?:video/)?)(?P<id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?' +    _VALID_URL = r'(?:zdf:|zdf:video:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/(.*beitrag/(?:video/)?))(?P<id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?'      _TEST = {          'url': 'http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt', @@ -106,6 +108,52 @@ class ZDFIE(InfoExtractor):      def _real_extract(self, url):          video_id = self._match_id(url) -          xml_url = 'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id          return extract_from_xml_url(self, video_id, xml_url) + + +class ZDFChannelIE(InfoExtractor): +    _VALID_URL = r'(?:zdf:topic:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/.*kanaluebersicht/)(?P<id>[0-9]+)' +    _TEST = { +        'url': 'http://www.zdf.de/ZDFmediathek#/kanaluebersicht/1586442/sendung/Titanic', +        'info_dict': { +            'id': '1586442', +        }, +        'playlist_count': 4, +    } +    _PAGE_SIZE = 50 + +    def _fetch_page(self, channel_id, page): +        offset = page * self._PAGE_SIZE +        xml_url = ( +            'http://www.zdf.de/ZDFmediathek/xmlservice/web/aktuellste?ak=web&offset=%d&maxLength=%d&id=%s' +            % (offset, self._PAGE_SIZE, channel_id)) +        doc = self._download_xml( +            xml_url, channel_id, +            note='Downloading channel info', +            errnote='Failed to download channel info') + +        title = doc.find('.//information/title').text +        description = doc.find('.//information/detail').text +        for asset in doc.findall('.//teasers/teaser'): +            a_type = asset.find('./type').text +            a_id = asset.find('./details/assetId').text +            if a_type not in ('video', 'topic'): +                continue +            yield { +                '_type': 'url', +                'playlist_title': title, +                'playlist_description': description, +                'url': 'zdf:%s:%s' % (a_type, a_id), +            } + +    def _real_extract(self, url): +        channel_id = self._match_id(url) +        entries = OnDemandPagedList( +            functools.partial(self._fetch_page, channel_id), self._PAGE_SIZE) + +        return { +            '_type': 'playlist', +            'id': channel_id, +            'entries': entries, +        } diff --git a/youtube_dl/postprocessor/execafterdownload.py b/youtube_dl/postprocessor/execafterdownload.py index 09db43611..75c0f7bbe 100644 --- a/youtube_dl/postprocessor/execafterdownload.py +++ b/youtube_dl/postprocessor/execafterdownload.py @@ -14,7 +14,7 @@ class ExecAfterDownloadPP(PostProcessor):      def run(self, information):          cmd = self.exec_cmd -        if not '{}' in cmd: +        if '{}' not in cmd:              cmd += ' {}'          cmd = cmd.replace('{}', shlex_quote(information['filepath'])) diff --git a/youtube_dl/update.py b/youtube_dl/update.py index 4c07a558e..2d2703368 100644 --- a/youtube_dl/update.py +++ b/youtube_dl/update.py @@ -79,7 +79,7 @@ def update_self(to_screen, verbose):              to_screen(compat_str(traceback.format_exc()))          to_screen('ERROR: can\'t obtain versions info. Please try again later.')          return -    if not 'signature' in versions_info: +    if 'signature' not in versions_info:          to_screen('ERROR: the versions file is not signed or corrupted. Aborting.')          return      signature = versions_info['signature'] diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 2b0f4e589..51a822e4f 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -166,7 +166,7 @@ def xpath_text(node, xpath, name=None, fatal=False):          xpath = xpath.encode('ascii')      n = node.find(xpath) -    if n is None: +    if n is None or n.text is None:          if fatal:              name = xpath if name is None else name              raise ExtractorError('Could not find XML element %s' % name) @@ -644,17 +644,19 @@ def parse_iso8601(date_str, delimiter='T'):      return calendar.timegm(dt.timetuple()) -def unified_strdate(date_str): +def unified_strdate(date_str, day_first=True):      """Return a string with the date in the format YYYYMMDD"""      if date_str is None:          return None -      upload_date = None      # Replace commas      date_str = date_str.replace(',', ' ')      # %z (UTC offset) is only supported in python>=3.2      date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str) +    # Remove AM/PM + timezone +    date_str = re.sub(r'(?i)\s*(?:AM|PM)\s+[A-Z]+', '', date_str) +      format_expressions = [          '%d %B %Y',          '%d %b %Y', @@ -669,7 +671,6 @@ def unified_strdate(date_str):          '%d/%m/%Y',          '%d/%m/%y',          '%Y/%m/%d %H:%M:%S', -        '%d/%m/%Y %H:%M:%S',          '%Y-%m-%d %H:%M:%S',          '%Y-%m-%d %H:%M:%S.%f',          '%d.%m.%Y %H:%M', @@ -681,6 +682,14 @@ def unified_strdate(date_str):          '%Y-%m-%dT%H:%M:%S.%f',          '%Y-%m-%dT%H:%M',      ] +    if day_first: +        format_expressions.extend([ +            '%d/%m/%Y %H:%M:%S', +        ]) +    else: +        format_expressions.extend([ +            '%m/%d/%Y %H:%M:%S', +        ])      for expression in format_expressions:          try:              upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d') @@ -712,8 +721,10 @@ def date_from_str(date_str):      Return a datetime object from a string in the format YYYYMMDD or      (now|today)[+-][0-9](day|week|month|year)(s)?"""      today = datetime.date.today() -    if date_str == 'now'or date_str == 'today': +    if date_str in ('now', 'today'):          return today +    if date_str == 'yesterday': +        return today - datetime.timedelta(days=1)      match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)      if match is not None:          sign = match.group('sign') @@ -1024,7 +1035,7 @@ def smuggle_url(url, data):  def unsmuggle_url(smug_url, default=None): -    if not '#__youtubedl_smuggle' in smug_url: +    if '#__youtubedl_smuggle' not in smug_url:          return smug_url, default      url, _, sdata = smug_url.rpartition('#')      jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0] diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 3ac525557..7289ea4d2 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@  from __future__ import unicode_literals -__version__ = '2014.12.06.1' +__version__ = '2014.12.12.1' | 
