diff options
author | Philipp Hagemeister <phihag@phihag.de> | 2014-11-23 21:33:31 +0100 |
---|---|---|
committer | Philipp Hagemeister <phihag@phihag.de> | 2014-11-23 21:33:31 +0100 |
commit | 784b6d3a9bc79fe55a8b132fd10555c1e9a61c31 (patch) | |
tree | e9a7996c44fd8d07f0bef5cdebbafdbe9c0b7bb4 /youtube_dl/extractor | |
parent | f3a34072266c0a7595f73fa032685cf8a50d2ab4 (diff) | |
parent | 2514d2635e0c8ff0fb72d00a093a849001df2bdd (diff) |
Merge remote-tracking branch 'jtwaleson/master'
Diffstat (limited to 'youtube_dl/extractor')
130 files changed, 312 insertions, 373 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index fcb75af34..43185345e 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -529,4 +529,4 @@ def gen_extractors(): def get_info_extractor(ie_name): """Returns the info extractor class with the given ie_name""" - return globals()[ie_name+'IE'] + return globals()[ie_name + 'IE'] diff --git a/youtube_dl/extractor/adultswim.py b/youtube_dl/extractor/adultswim.py index b4b40f2d4..0d05cbb4b 100644 --- a/youtube_dl/extractor/adultswim.py +++ b/youtube_dl/extractor/adultswim.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor + class AdultSwimIE(InfoExtractor): _VALID_URL = r'https?://video\.adultswim\.com/(?P<path>.+?)(?:\.html)?(?:\?.*)?(?:#.*)?$' _TEST = { diff --git a/youtube_dl/extractor/aparat.py b/youtube_dl/extractor/aparat.py index 748608826..56775289b 100644 --- a/youtube_dl/extractor/aparat.py +++ b/youtube_dl/extractor/aparat.py @@ -1,4 +1,4 @@ -#coding: utf-8 +# coding: utf-8 from __future__ import unicode_literals diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py index 4359b88d1..567a76cf0 100644 --- a/youtube_dl/extractor/appletrailers.py +++ b/youtube_dl/extractor/appletrailers.py @@ -70,11 +70,13 @@ class AppleTrailersIE(InfoExtractor): uploader_id = mobj.group('company') playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc') + def fix_html(s): s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s) s = re.sub(r'<img ([^<]*?)>', r'<img \1/>', s) # The ' in the onClick attributes are not escaped, it couldn't be parsed # like: http://trailers.apple.com/trailers/wb/gravity/ + def _clean_json(m): return 'iTunes.playURL(%s);' % m.group(1).replace('\'', ''') s = re.sub(self._JSON_RE, _clean_json, s) diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index 630b1faa9..967bd865c 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -192,4 +192,3 @@ class ARDIE(InfoExtractor): 'upload_date': upload_date, 'thumbnail': thumbnail, } - diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 3a57ce527..219631b9b 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -13,7 +13,7 @@ from ..utils import ( qualities, ) -# There are different sources of video in arte.tv, the extraction process +# There are different sources of video in arte.tv, the extraction process # is different for each one. The videos usually expire in 7 days, so we can't # add tests. diff --git a/youtube_dl/extractor/audiomack.py b/youtube_dl/extractor/audiomack.py index 6232d2cd0..eeeec768f 100644 --- a/youtube_dl/extractor/audiomack.py +++ b/youtube_dl/extractor/audiomack.py @@ -12,17 +12,17 @@ class AudiomackIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?audiomack\.com/song/(?P<id>[\w/-]+)' IE_NAME = 'audiomack' _TESTS = [ - #hosted on audiomack + # hosted on audiomack { 'url': 'http://www.audiomack.com/song/roosh-williams/extraordinary', 'info_dict': { - 'id' : 'roosh-williams/extraordinary', + 'id': 'roosh-williams/extraordinary', 'ext': 'mp3', 'title': 'Roosh Williams - Extraordinary' } }, - #hosted on soundcloud via audiomack + # hosted on soundcloud via audiomack { 'url': 'http://www.audiomack.com/song/xclusiveszone/take-kare', 'file': '172419696.mp3', @@ -49,7 +49,7 @@ class AudiomackIE(InfoExtractor): raise ExtractorError("Unable to deduce api url of song") realurl = api_response["url"] - #Audiomack wraps a lot of soundcloud tracks in their branded wrapper + # Audiomack wraps a lot of soundcloud tracks in their branded wrapper # - if so, pass the work off to the soundcloud extractor if SoundcloudIE.suitable(realurl): return {'_type': 'url', 'url': realurl, 'ie_key': 'Soundcloud'} diff --git a/youtube_dl/extractor/bambuser.py b/youtube_dl/extractor/bambuser.py index de5d4faf3..eab99faaa 100644 --- a/youtube_dl/extractor/bambuser.py +++ b/youtube_dl/extractor/bambuser.py @@ -18,7 +18,7 @@ class BambuserIE(InfoExtractor): _TEST = { 'url': 'http://bambuser.com/v/4050584', # MD5 seems to be flaky, see https://travis-ci.org/rg3/youtube-dl/jobs/14051016#L388 - #u'md5': 'fba8f7693e48fd4e8641b3fd5539a641', + # u'md5': 'fba8f7693e48fd4e8641b3fd5539a641', 'info_dict': { 'id': '4050584', 'ext': 'flv', diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 1b8da43ca..acddbc8f1 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -83,12 +83,12 @@ class BandcampIE(InfoExtractor): initial_url = mp3_info['url'] re_url = r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$' m_url = re.match(re_url, initial_url) - #We build the url we will use to get the final track url + # We build the url we will use to get the final track url # This url is build in Bandcamp in the script download_bunde_*.js request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), video_id, m_url.group('ts')) final_url_webpage = self._download_webpage(request_url, video_id, 'Requesting download url') # If we could correctly generate the .rand field the url would be - #in the "download_url" key + # in the "download_url" key final_url = re.search(r'"retry_url":"(.*?)"', final_url_webpage).group(1) return { diff --git a/youtube_dl/extractor/bbccouk.py b/youtube_dl/extractor/bbccouk.py index 75e608f99..fa15bf19c 100644 --- a/youtube_dl/extractor/bbccouk.py +++ b/youtube_dl/extractor/bbccouk.py @@ -195,7 +195,7 @@ class BBCCoUkIE(SubtitlesInfoExtractor): duration = int(item.get('duration')) media_selection = self._download_xml( - 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s' % programme_id, + 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s' % programme_id, programme_id, 'Downloading media selection XML') for media in self._extract_medias(media_selection): @@ -220,4 +220,4 @@ class BBCCoUkIE(SubtitlesInfoExtractor): 'duration': duration, 'formats': formats, 'subtitles': subtitles, - }
\ No newline at end of file + } diff --git a/youtube_dl/extractor/beeg.py b/youtube_dl/extractor/beeg.py index 314e37f8b..4e79fea8f 100644 --- a/youtube_dl/extractor/beeg.py +++ b/youtube_dl/extractor/beeg.py @@ -40,7 +40,7 @@ class BeegIE(InfoExtractor): title = self._html_search_regex( r'<title>([^<]+)\s*-\s*beeg\.?</title>', webpage, 'title') - + description = self._html_search_regex( r'<meta name="description" content="([^"]*)"', webpage, 'description', fatal=False) diff --git a/youtube_dl/extractor/bild.py b/youtube_dl/extractor/bild.py index 0269d1174..77b562d99 100644 --- a/youtube_dl/extractor/bild.py +++ b/youtube_dl/extractor/bild.py @@ -1,4 +1,4 @@ -#coding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py index dddeaa59c..9873728df 100644 --- a/youtube_dl/extractor/canalplus.py +++ b/youtube_dl/extractor/canalplus.py @@ -112,4 +112,4 @@ class CanalplusIE(InfoExtractor): 'like_count': int(infos.find('NB_LIKES').text), 'comment_count': int(infos.find('NB_COMMENTS').text), 'formats': formats, - }
\ No newline at end of file + } diff --git a/youtube_dl/extractor/cbsnews.py b/youtube_dl/extractor/cbsnews.py index 0bce7937f..7e47960ab 100644 --- a/youtube_dl/extractor/cbsnews.py +++ b/youtube_dl/extractor/cbsnews.py @@ -84,4 +84,4 @@ class CBSNewsIE(InfoExtractor): 'thumbnail': thumbnail, 'duration': duration, 'formats': formats, - }
\ No newline at end of file + } diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index 90a3dddb9..97feb6704 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -92,7 +92,7 @@ class CeskaTelevizeIE(InfoExtractor): req.add_header('Referer', url) playlist = self._download_xml(req, video_id) - + formats = [] for i in playlist.find('smilRoot/body'): if 'AD' not in i.attrib['id']: diff --git a/youtube_dl/extractor/channel9.py b/youtube_dl/extractor/channel9.py index 16d800512..82a48870a 100644 --- a/youtube_dl/extractor/channel9.py +++ b/youtube_dl/extractor/channel9.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from ..utils import ExtractorError + class Channel9IE(InfoExtractor): ''' Common extractor for channel9.msdn.com. @@ -31,7 +32,7 @@ class Channel9IE(InfoExtractor): 'session_code': 'KOS002', 'session_day': 'Day 1', 'session_room': 'Arena 1A', - 'session_speakers': [ 'Ed Blankenship', 'Andrew Coates', 'Brady Gaster', 'Patrick Klug', 'Mads Kristensen' ], + 'session_speakers': ['Ed Blankenship', 'Andrew Coates', 'Brady Gaster', 'Patrick Klug', 'Mads Kristensen'], }, }, { @@ -44,7 +45,7 @@ class Channel9IE(InfoExtractor): 'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b', 'duration': 1540, 'thumbnail': 'http://video.ch9.ms/ch9/87e1/0300391f-a455-4c72-bec3-4422f19287e1/selfservicenuk_512.jpg', - 'authors': [ 'Mike Wilmot' ], + 'authors': ['Mike Wilmot'], }, } ] @@ -83,7 +84,7 @@ class Channel9IE(InfoExtractor): 'format_id': x.group('quality'), 'format_note': x.group('note'), 'format': '%s (%s)' % (x.group('quality'), x.group('note')), - 'filesize': self._restore_bytes(x.group('filesize')), # File size is approximate + 'filesize': self._restore_bytes(x.group('filesize')), # File size is approximate 'preference': self._known_formats.index(x.group('quality')), 'vcodec': 'none' if x.group('note') == 'Audio only' else None, } for x in list(re.finditer(FORMAT_REGEX, html)) if x.group('quality') in self._known_formats] @@ -202,17 +203,17 @@ class Channel9IE(InfoExtractor): if slides is not None: d = common.copy() - d.update({ 'title': title + '-Slides', 'url': slides }) + d.update({'title': title + '-Slides', 'url': slides}) result.append(d) if zip_ is not None: d = common.copy() - d.update({ 'title': title + '-Zip', 'url': zip_ }) + d.update({'title': title + '-Zip', 'url': zip_}) result.append(d) if len(formats) > 0: d = common.copy() - d.update({ 'title': title, 'formats': formats }) + d.update({'title': title, 'formats': formats}) result.append(d) return result @@ -270,5 +271,5 @@ class Channel9IE(InfoExtractor): else: raise ExtractorError('Unexpected WT.entryid %s' % page_type, expected=True) - else: # Assuming list + else: # Assuming list return self._extract_list(content_path) diff --git a/youtube_dl/extractor/cinemassacre.py b/youtube_dl/extractor/cinemassacre.py index 31fe906b4..b7fa73c3b 100644 --- a/youtube_dl/extractor/cinemassacre.py +++ b/youtube_dl/extractor/cinemassacre.py @@ -77,7 +77,7 @@ class CinemassacreIE(InfoExtractor): if videolist_url: videolist = self._download_xml(videolist_url, video_id, 'Downloading videolist XML') formats = [] - baseurl = vidurl[:vidurl.rfind('/')+1] + baseurl = vidurl[:vidurl.rfind('/') + 1] for video in videolist.findall('.//video'): src = video.get('src') if not src: diff --git a/youtube_dl/extractor/clipsyndicate.py b/youtube_dl/extractor/clipsyndicate.py index 02a1667fa..d07d544ea 100644 --- a/youtube_dl/extractor/clipsyndicate.py +++ b/youtube_dl/extractor/clipsyndicate.py @@ -39,6 +39,7 @@ class ClipsyndicateIE(InfoExtractor): transform_source=fix_xml_ampersands) track_doc = pdoc.find('trackList/track') + def find_param(name): node = find_xpath_attr(track_doc, './/param', 'name', name) if node is not None: diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 93a5a3d57..f0489ede4 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -423,17 +423,18 @@ class InfoExtractor(object): """Report attempt to log in.""" self.to_screen('Logging in') - #Methods for following #608 + # Methods for following #608 @staticmethod def url_result(url, ie=None, video_id=None): """Returns a url that points to a page that should be processed""" - #TODO: ie should be the class used for getting the info + # TODO: ie should be the class used for getting the info video_info = {'_type': 'url', 'url': url, 'ie_key': ie} if video_id is not None: video_info['id'] = video_id return video_info + @staticmethod def playlist_result(entries, playlist_id=None, playlist_title=None): """Returns a playlist""" @@ -517,7 +518,7 @@ class InfoExtractor(object): raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE) except (IOError, netrc.NetrcParseError) as err: self._downloader.report_warning('parsing .netrc: %s' % compat_str(err)) - + return (username, password) def _get_tfa_info(self): diff --git a/youtube_dl/extractor/cracked.py b/youtube_dl/extractor/cracked.py index 74b880ffc..cf763ee7e 100644 --- a/youtube_dl/extractor/cracked.py +++ b/youtube_dl/extractor/cracked.py @@ -54,7 +54,7 @@ class CrackedIE(InfoExtractor): return { 'id': video_id, - 'url':video_url, + 'url': video_url, 'title': title, 'description': description, 'timestamp': timestamp, @@ -62,4 +62,4 @@ class CrackedIE(InfoExtractor): 'comment_count': comment_count, 'height': height, 'width': width, - }
\ No newline at end of file + } diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index fe1324fe3..c3c4d114a 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -69,11 +69,9 @@ class CrunchyrollIE(SubtitlesInfoExtractor): login_request.add_header('Content-Type', 'application/x-www-form-urlencoded') self._download_webpage(login_request, None, False, 'Wrong login info') - def _real_initialize(self): self._login() - def _decrypt_subtitles(self, data, iv, id): data = bytes_to_intlist(data) iv = bytes_to_intlist(iv) @@ -99,8 +97,10 @@ class CrunchyrollIE(SubtitlesInfoExtractor): return shaHash + [0] * 12 key = obfuscate_key(id) + class Counter: __value = iv + def next_value(self): temp = self.__value self.__value = inc(self.__value) @@ -183,7 +183,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text return output - def _real_extract(self,url): + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('video_id') @@ -226,10 +226,10 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text formats = [] for fmt in re.findall(r'\?p([0-9]{3,4})=1', webpage): stream_quality, stream_format = self._FORMAT_IDS[fmt] - video_format = fmt+'p' + video_format = fmt + 'p' streamdata_req = compat_urllib_request.Request('http://www.crunchyroll.com/xml/') # urlencode doesn't work! - streamdata_req.data = 'req=RpcApiVideoEncode%5FGetStreamInfo&video%5Fencode%5Fquality='+stream_quality+'&media%5Fid='+stream_id+'&video%5Fformat='+stream_format + streamdata_req.data = 'req=RpcApiVideoEncode%5FGetStreamInfo&video%5Fencode%5Fquality=' + stream_quality + '&media%5Fid=' + stream_id + '&video%5Fformat=' + stream_format streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded') streamdata_req.add_header('Content-Length', str(len(streamdata_req.data))) streamdata = self._download_xml( @@ -248,8 +248,8 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text subtitles = {} sub_format = self._downloader.params.get('subtitlesformat', 'srt') for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage): - sub_page = self._download_webpage('http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id='+sub_id,\ - video_id, note='Downloading subtitles for '+sub_name) + sub_page = self._download_webpage('http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id,\ + video_id, note='Downloading subtitles for ' + sub_name) id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False) iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False) data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False) @@ -274,14 +274,14 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text return return { - 'id': video_id, - 'title': video_title, + 'id': video_id, + 'title': video_title, 'description': video_description, - 'thumbnail': video_thumbnail, - 'uploader': video_uploader, + 'thumbnail': video_thumbnail, + 'uploader': video_uploader, 'upload_date': video_upload_date, - 'subtitles': subtitles, - 'formats': formats, + 'subtitles': subtitles, + 'formats': formats, } diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index fd4bc75b2..22cdcdfa5 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -1,4 +1,4 @@ -#coding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re @@ -18,6 +18,7 @@ from ..utils import ( unescapeHTML, ) + class DailymotionBaseInfoExtractor(InfoExtractor): @staticmethod def _build_request(url): @@ -27,6 +28,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor): request.add_header('Cookie', 'ff=off') return request + class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): """Information Extractor for Dailymotion""" diff --git a/youtube_dl/extractor/defense.py b/youtube_dl/extractor/defense.py index c5529f8d4..3ffed3d44 100644 --- a/youtube_dl/extractor/defense.py +++ b/youtube_dl/extractor/defense.py @@ -26,13 +26,13 @@ class DefenseGouvFrIE(InfoExtractor): video_id = self._search_regex( r"flashvars.pvg_id=\"(\d+)\";", webpage, 'ID') - + json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/' + video_id) info = self._download_webpage(json_url, title, 'Downloading JSON config') video_url = json.loads(info)['renditions'][0]['url'] - + return {'id': video_id, 'ext': 'mp4', 'url': video_url, diff --git a/youtube_dl/extractor/dotsub.py b/youtube_dl/extractor/dotsub.py index 5ae0ad5b6..638bb33cd 100644 --- a/youtube_dl/extractor/dotsub.py +++ b/youtube_dl/extractor/dotsub.py @@ -27,7 +27,7 @@ class DotsubIE(InfoExtractor): video_id = mobj.group('id') info_url = "https://dotsub.com/api/media/%s/metadata" % video_id info = self._download_json(info_url, video_id) - date = time.gmtime(info['dateCreated']/1000) # The timestamp is in miliseconds + date = time.gmtime(info['dateCreated'] / 1000) # The timestamp is in miliseconds return { 'id': video_id, diff --git a/youtube_dl/extractor/fc2.py b/youtube_dl/extractor/fc2.py index c663a0f81..6f5d23559 100644 --- a/youtube_dl/extractor/fc2.py +++ b/youtube_dl/extractor/fc2.py @@ -40,7 +40,7 @@ class FC2IE(InfoExtractor): info_url = ( "http://video.fc2.com/ginfo.php?mimi={1:s}&href={2:s}&v={0:s}&fversion=WIN%2011%2C6%2C602%2C180&from=2&otag=0&upid={0:s}&tk=null&". - format(video_id, mimi, compat_urllib_request.quote(refer, safe='').replace('.','%2E'))) + format(video_id, mimi, compat_urllib_request.quote(refer, safe='').replace('.', '%2E'))) info_webpage = self._download_webpage( info_url, video_id, note='Downloading info page') diff --git a/youtube_dl/extractor/firsttv.py b/youtube_dl/extractor/firsttv.py index c2e987ff7..3410daa98 100644 --- a/youtube_dl/extractor/firsttv.py +++ b/youtube_dl/extractor/firsttv.py @@ -57,4 +57,4 @@ class FirstTVIE(InfoExtractor): 'duration': int_or_none(duration), 'like_count': int_or_none(like_count), 'dislike_count': int_or_none(dislike_count), - }
\ No newline at end of file + } diff --git a/youtube_dl/extractor/flickr.py b/youtube_dl/extractor/flickr.py index e09982e88..5b0bc9d21 100644 --- a/youtube_dl/extractor/flickr.py +++ b/youtube_dl/extractor/flickr.py @@ -17,8 +17,8 @@ class FlickrIE(InfoExtractor): 'info_dict': { 'id': '5645318632', 'ext': 'mp4', - "description": "Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.", - "uploader_id": "forestwander-nature-pictures", + "description": "Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.", + "uploader_id": "forestwander-nature-pictures", "title": "Dark Hollow Waterfalls" } } diff --git a/youtube_dl/extractor/fourtube.py b/youtube_dl/extractor/fourtube.py index 7d56b9be9..24d4e9754 100644 --- a/youtube_dl/extractor/fourtube.py +++ b/youtube_dl/extractor/fourtube.py @@ -92,4 +92,4 @@ class FourTubeIE(InfoExtractor): 'duration': duration, 'age_limit': 18, 'webpage_url': webpage_url, - }
\ No newline at end of file + } diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index c7a824c29..109dd20db 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -733,7 +733,7 @@ class GenericIE(InfoExtractor): 'title': video_title, 'id': video_id, } - + match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage) if match: return { @@ -748,7 +748,7 @@ class GenericIE(InfoExtractor): # Look for embedded blip.tv player mobj = re.search(r'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage) if mobj: - return self.url_result('http://blip.tv/a/a-'+mobj.group(1), 'BlipTV') + return self.url_result('http://blip.tv/a/a-' + mobj.group(1), 'BlipTV') mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9_]+)', webpage) if mobj: return self.url_result(mobj.group(1), 'BlipTV') @@ -1025,4 +1025,3 @@ class GenericIE(InfoExtractor): '_type': 'playlist', 'entries': entries, } - diff --git a/youtube_dl/extractor/globo.py b/youtube_dl/extractor/globo.py index 66ca37918..6949a57c7 100644 --- a/youtube_dl/extractor/globo.py +++ b/youtube_dl/extractor/globo.py @@ -397,4 +397,4 @@ class GloboIE(InfoExtractor): 'uploader_id': uploader_id, 'like_count': like_count, 'formats': formats - }
\ No newline at end of file + } diff --git a/youtube_dl/extractor/gorillavid.py b/youtube_dl/extractor/gorillavid.py index e21e57510..65b153417 100644 --- a/youtube_dl/extractor/gorillavid.py +++ b/youtube_dl/extractor/gorillavid.py @@ -69,7 +69,7 @@ class GorillaVidIE(InfoExtractor): (?:id="[^"]+"\s+)? value="([^"]*)" ''', webpage)) - + if fields['op'] == 'download1': post = compat_urllib_parse.urlencode(fields) diff --git a/youtube_dl/extractor/hornbunny.py b/youtube_dl/extractor/hornbunny.py index 7e7714438..5b6efb27e 100644 --- a/youtube_dl/extractor/hornbunny.py +++ b/youtube_dl/extractor/hornbunny.py @@ -37,7 +37,7 @@ class HornBunnyIE(InfoExtractor): webpage2 = self._download_webpage(redirect_url, video_id) video_url = self._html_search_regex( r'flvMask:(.*?);', webpage2, 'video_url') - + duration = parse_duration(self._search_regex( r'<strong>Runtime:</strong>\s*([0-9:]+)</div>', webpage, 'duration', fatal=False)) diff --git a/youtube_dl/extractor/howcast.py b/youtube_dl/extractor/howcast.py index 6ae04782c..4ddf06409 100644 --- a/youtube_dl/extractor/howcast.py +++ b/youtube_dl/extractor/howcast.py @@ -13,7 +13,7 @@ class HowcastIE(InfoExtractor): 'info_dict': { 'id': '390161', 'ext': 'mp4', - 'description': 'The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here\'s the proper way to tie a square knot.', + 'description': 'The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here\'s the proper way to tie a square knot.', 'title': 'How to Tie a Square Knot Properly', } } diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py index 6108ed552..f2c1c10f5 100644 --- a/youtube_dl/extractor/imdb.py +++ b/youtube_dl/extractor/imdb.py @@ -71,7 +71,7 @@ class ImdbListIE(InfoExtractor): }, 'playlist_count': 7, } - + def _real_extract(self, url): list_id = self._match_id(url) webpage = self._download_webpage(url, list_id) diff --git a/youtube_dl/extractor/internetvideoarchive.py b/youtube_dl/extractor/internetvideoarchive.py index 53f9a5f75..c137f4a5d 100644 --- a/youtube_dl/extractor/internetvideoarchive.py +++ b/youtube_dl/extractor/internetvideoarchive.py @@ -32,7 +32,7 @@ class InternetVideoArchiveIE(InfoExtractor): def _clean_query(query): NEEDED_ARGS = ['publishedid', 'customerid'] query_dic = compat_urlparse.parse_qs(query) - cleaned_dic = dict((k,v[0]) for (k,v) in query_dic.items() if k in NEEDED_ARGS) + cleaned_dic = dict((k, v[0]) for (k, v) in query_dic.items() if k in NEEDED_ARGS) # Other player ids return m3u8 urls cleaned_dic['playerid'] = '247' cleaned_dic['videokbrate'] = '100000' diff --git a/youtube_dl/extractor/iprima.py b/youtube_dl/extractor/iprima.py index d1defd363..4247d6391 100644 --- a/youtube_dl/extractor/iprima.py +++ b/youtube_dl/extractor/iprima.py @@ -54,7 +54,7 @@ class IPrimaIE(InfoExtractor): player_url = ( 'http://embed.livebox.cz/iprimaplay/player-embed-v2.js?__tok%s__=%s' % - (floor(random()*1073741824), floor(random()*1073741824)) + (floor(random() * 1073741824), floor(random() * 1073741824)) ) req = compat_urllib_request.Request(player_url) diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py index 75b543b7c..06af73417 100644 --- a/youtube_dl/extractor/ivi.py +++ b/youtube_dl/extractor/ivi.py @@ -102,7 +102,7 @@ class IviIE(InfoExtractor): compilation = result['compilation'] title = result['title'] - title = '%s - %s' % (compilation, title) if compilation is not None else title + title = '%s - %s' % (compilation, title) if compilation is not None else title previews = result['preview'] previews.sort(key=lambda fmt: self._known_thumbnails.index(fmt['content_format'])) @@ -152,17 +152,17 @@ class IviCompilationIE(InfoExtractor): compilation_id = mobj.group('compilationid') season_id = mobj.group('seasonid') - if season_id is not None: # Season link + if season_id is not None: # Season link season_page = self._download_webpage(url, compilation_id, 'Downloading season %s web page' % season_id) playlist_id = '%s/season%s' % (compilation_id, season_id) playlist_title = self._html_search_meta('title', season_page, 'title') entries = self._extract_entries(season_page, compilation_id) - else: # Compilation link + else: # Compilation link compilation_page = self._download_webpage(url, compilation_id, 'Downloading compilation web page') playlist_id = compilation_id playlist_title = self._html_search_meta('title', compilation_page, 'title') seasons = re.findall(r'<a href="/watch/%s/season(\d+)">[^<]+</a>' % compilation_id, compilation_page) - if len(seasons) == 0: # No seasons in this compilation + if len(seasons) == 0: # No seasons in this compilation entries = self._extract_entries(compilation_page, compilation_id) else: entries = [] @@ -172,4 +172,4 @@ class IviCompilationIE(InfoExtractor): compilation_id, 'Downloading season %s web page' % season_id) entries.extend(self._extract_entries(season_page, compilation_id)) - return self.playlist_result(entries, playlist_id, playlist_title)
\ No newline at end of file + return self.playlist_result(entries, playlist_id, playlist_title) diff --git a/youtube_dl/extractor/jadorecettepub.py b/youtube_dl/extractor/jadorecettepub.py index ace08769b..063e86de4 100644 --- a/youtube_dl/extractor/jadorecettepub.py +++ b/youtube_dl/extractor/jadorecettepub.py @@ -45,4 +45,3 @@ class JadoreCettePubIE(InfoExtractor): 'title': title, 'description': description, } - diff --git a/youtube_dl/extractor/jeuxvideo.py b/youtube_dl/extractor/jeuxvideo.py index 188165966..8094cc2e4 100644 --- a/youtube_dl/extractor/jeuxvideo.py +++ b/youtube_dl/extractor/jeuxvideo.py @@ -29,7 +29,7 @@ class JeuxVideoIE(InfoExtractor): xml_link = self._html_search_regex( r'<param name="flashvars" value="config=(.*?)" />', webpage, 'config URL') - + video_id = self._search_regex( r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml', xml_link, 'video ID') @@ -38,7 +38,7 @@ class JeuxVideoIE(InfoExtractor): xml_link, title, 'Downloading XML config') info_json = config.find('format.json').text info = json.loads(info_json)['versions'][0] - + video_url = 'http://video720.jeuxvideo.com/' + info['file'] return { diff --git a/youtube_dl/extractor/kankan.py b/youtube_dl/extractor/kankan.py index 23103b163..dbfe4cc03 100644 --- a/youtube_dl/extractor/kankan.py +++ b/youtube_dl/extractor/kankan.py @@ -10,7 +10,7 @@ _md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() class KankanIE(InfoExtractor): _VALID_URL = r'https?://(?:.*?\.)?kankan\.com/.+?/(?P<id>\d+)\.shtml' - + _TEST = { 'url': 'http://yinyue.kankan.com/vod/48/48863.shtml', 'file': '48863.flv', diff --git a/youtube_dl/extractor/kontrtube.py b/youtube_dl/extractor/kontrtube.py index 8a73ecfa0..41fd62009 100644 --- a/youtube_dl/extractor/kontrtube.py +++ b/youtube_dl/extractor/kontrtube.py @@ -63,4 +63,4 @@ class KontrTubeIE(InfoExtractor): 'duration': duration, 'view_count': int_or_none(view_count), 'comment_count': int_or_none(comment_count), - }
\ No newline at end of file + } diff --git a/youtube_dl/extractor/ku6.py b/youtube_dl/extractor/ku6.py index 89013e522..a602980a1 100644 --- a/youtube_dl/extractor/ku6.py +++ b/youtube_dl/extractor/ku6.py @@ -30,4 +30,3 @@ class Ku6IE(InfoExtractor): 'title': title, 'url': downloadUrl } - diff --git a/youtube_dl/extractor/laola1tv.py b/youtube_dl/extractor/laola1tv.py index 102e29f7a..2fd3b4699 100644 --- a/youtube_dl/extractor/laola1tv.py +++ b/youtube_dl/extractor/laola1tv.py @@ -75,4 +75,3 @@ class Laola1TvIE(InfoExtractor): 'categories': categories, 'ext': 'mp4', } - diff --git a/youtube_dl/extractor/lifenews.py b/youtube_dl/extractor/lifenews.py index 8d9491f23..1dfe7f77f 100644 --- a/youtube_dl/extractor/lifenews.py +++ b/youtube_dl/extractor/lifenews.py @@ -52,7 +52,7 @@ class LifeNewsIE(InfoExtractor): r'<div class=\'comments\'>\s*<span class=\'counter\'>(\d+)</span>', webpage, 'comment count', fatal=False) upload_date = self._html_search_regex( - r'<time datetime=\'([^\']+)\'>', webpage, 'upload date',fatal=False) + r'<time datetime=\'([^\']+)\'>', webpage, 'upload date', fatal=False) if upload_date is not None: upload_date = unified_strdate(upload_date) @@ -71,4 +71,4 @@ class LifeNewsIE(InfoExtractor): if len(videos) == 1: return make_entry(video_id, videos[0]) else: - return [make_entry(video_id, media, video_number+1) for video_number, media in enumerate(videos)]
\ No newline at end of file + return [make_entry(video_id, media, video_number + 1) for video_number, media in enumerate(videos)] diff --git a/youtube_dl/extractor/liveleak.py b/youtube_dl/extractor/liveleak.py index 8e50e8f79..b04be1e8c 100644 --- a/youtube_dl/extractor/liveleak.py +++ b/youtube_dl/extractor/liveleak.py @@ -19,8 +19,7 @@ class LiveLeakIE(InfoExtractor): 'uploader': 'ljfriel2', 'title': 'Most unlucky car accident' } - }, - { + }, { 'url': 'http://www.liveleak.com/view?i=f93_1390833151', 'md5': 'd3f1367d14cc3c15bf24fbfbe04b9abf', 'info_dict': { @@ -30,8 +29,7 @@ class LiveLeakIE(InfoExtractor): 'uploader': 'ARD_Stinkt', 'title': 'German Television does first Edward Snowden Interview (ENGLISH)', } - }, - { + }, { 'url': 'http://www.liveleak.com/view?i=4f7_1392687779', 'md5': '42c6d97d54f1db107958760788c5f48f', 'info_dict': { diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index 33f34f4e9..97ca4337b 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -109,7 +109,7 @@ class LyndaIE(SubtitlesInfoExtractor): 'password': password, 'remember': 'false', 'stayPut': 'false' - } + } request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form)) login_page = self._download_webpage(request, None, 'Logging in as %s' % username) @@ -117,7 +117,7 @@ class LyndaIE(SubtitlesInfoExtractor): m = re.search(r'loginResultJson = \'(?P<json>[^\']+)\';', login_page) if m is not None: response = m.group('json') - response_json = json.loads(response) + response_json = json.loads(response) state = response_json['state'] if state == 'notlogged': @@ -187,7 +187,7 @@ class LyndaCourseIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) course_path = mobj.group('coursepath') course_id = mobj.group('courseid') - + page = self._download_webpage('http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id, course_id, 'Downloading course JSON') course_json = json.loads(page) @@ -221,4 +221,4 @@ class LyndaCourseIE(InfoExtractor): course_title = course_json['Title'] - return self.playlist_result(entries, course_id, course_title)
\ No newline at end of file + return self.playlist_result(entries, course_id, course_title) diff --git a/youtube_dl/extractor/m6.py b/youtube_dl/extractor/m6.py index 1a26b5d57..3d806323a 100644 --- a/youtube_dl/extractor/m6.py +++ b/youtube_dl/extractor/m6.py @@ -53,4 +53,4 @@ class M6IE(InfoExtractor): 'duration': duration, 'view_count': view_count, 'formats': formats, - }
\ No newline at end of file + } diff --git a/youtube_dl/extractor/malemotion.py b/youtube_dl/extractor/malemotion.py index 8c1966ab2..1abf6e4f8 100644 --- a/youtube_dl/extractor/malemotion.py +++ b/youtube_dl/extractor/malemotion.py @@ -7,6 +7,7 @@ from ..utils import ( compat_urllib_parse, ) + class MalemotionIE(InfoExtractor): _VALID_URL = r'^(?:https?://)?malemotion\.com/video/(.+?)\.(?P<id>.+?)(#|$)' _TEST = { diff --git a/youtube_dl/extractor/mdr.py b/youtube_dl/extractor/mdr.py index 1b8c4a32e..5fdd19027 100644 --- a/youtube_dl/extractor/mdr.py +++ b/youtube_dl/extractor/mdr.py @@ -7,7 +7,7 @@ from .common import InfoExtractor class MDRIE(InfoExtractor): _VALID_URL = r'^(?P<domain>https?://(?:www\.)?mdr\.de)/(?:.*)/(?P<type>video|audio)(?P<video_id>[^/_]+)(?:_|\.html)' - + # No tests, MDR regularily deletes its videos _TEST = { 'url': 'http://www.mdr.de/fakt/video189002.html', diff --git a/youtube_dl/extractor/metacafe.py b/youtube_dl/extractor/metacafe.py index 1a896b536..f68add6c0 100644 --- a/youtube_dl/extractor/metacafe.py +++ b/youtube_dl/extractor/metacafe.py @@ -22,7 +22,7 @@ class MetacafeIE(InfoExtractor): # Youtube video { 'add_ie': ['Youtube'], - 'url': 'http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/', + 'url': 'http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/', 'info_dict': { 'id': '_aUehQsCQtM', 'ext': 'mp4', diff --git a/youtube_dl/extractor/mojvideo.py b/youtube_dl/extractor/mojvideo.py index 90b460d65..0ba435dc5 100644 --- a/youtube_dl/extractor/mojvideo.py +++ b/youtube_dl/extractor/mojvideo.py @@ -55,4 +55,4 @@ class MojvideoIE(InfoExtractor): 'title': title, 'thumbnail': thumbnail, 'duration': duration, - }
\ No newline at end of file + } diff --git a/youtube_dl/extractor/moniker.py b/youtube_dl/extractor/moniker.py index 79bb2ca59..1c4f589cc 100644 --- a/youtube_dl/extractor/moniker.py +++ b/youtube_dl/extractor/moniker.py @@ -54,7 +54,7 @@ class MonikerIE(InfoExtractor): title = os.path.splitext(data['fname'])[0] - #Could be several links with different quality + # Could be several links with different quality links = re.findall(r'"file" : "?(.+?)",', webpage) # Assume the links are ordered in quality formats = [{ diff --git a/youtube_dl/extractor/mooshare.py b/youtube_dl/extractor/mooshare.py index 7d21ea18f..f50e432a3 100644 --- a/youtube_dl/extractor/mooshare.py +++ b/youtube_dl/extractor/mooshare.py @@ -111,4 +111,4 @@ class MooshareIE(InfoExtractor): 'thumbnail': thumbnail, 'duration': duration, 'formats': formats, - }
\ No newline at end of file + } diff --git a/youtube_dl/extractor/motherless.py b/youtube_dl/extractor/motherless.py index 3621ff99e..97d5da626 100644 --- a/youtube_dl/extractor/motherless.py +++ b/youtube_dl/extractor/motherless.py @@ -72,7 +72,7 @@ class MotherlessIE(InfoExtractor): like_count = str_to_int(self._html_search_regex( r'<strong>Favorited</strong>\s+([^<]+)<', webpage, 'like count', fatal=False)) - + upload_date = self._html_search_regex( r'<strong>Uploaded</strong>\s+([^<]+)<', webpage, 'upload date') if 'Ago' in upload_date: diff --git a/youtube_dl/extractor/moviezine.py b/youtube_dl/extractor/moviezine.py index 43146180a..f130b75c4 100644 --- a/youtube_dl/extractor/moviezine.py +++ b/youtube_dl/extractor/moviezine.py @@ -27,7 +27,7 @@ class MoviezineIE(InfoExtractor): webpage = self._download_webpage(url, video_id) jsplayer = self._download_webpage('http://www.moviezine.se/api/player.js?video=%s' % video_id, video_id, 'Downloading js api player') - formats =[{ + formats = [{ 'format_id': 'sd', 'url': self._html_search_regex(r'file: "(.+?)",', jsplayer, 'file'), 'quality': 0, diff --git a/youtube_dl/extractor/movshare.py b/youtube_dl/extractor/movshare.py index 4191cf7a0..6101063f2 100644 --- a/youtube_dl/extractor/movshare.py +++ b/youtube_dl/extractor/movshare.py @@ -24,4 +24,4 @@ class MovShareIE(NovaMovIE): 'title': 'dissapeared image', 'description': 'optical illusion dissapeared image magic illusion', } - }
\ No newline at end of file + } diff --git a/youtube_dl/extractor/mpora.py b/youtube_dl/extractor/mpora.py index 387935d4d..88c9501cd 100644 --- a/youtube_dl/extractor/mpora.py +++ b/youtube_dl/extractor/mpora.py @@ -44,7 +44,7 @@ class MporaIE(InfoExtractor): r'_([0-9]+)\.[a-zA-Z0-9]+$', src['src'], False, default=None) vcodec = src['type'].partition('/')[2] - + formats.append({ 'format_id': encoding_id + '-' + vcodec, 'url': src['src'], diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 5f0f476b6..b6755ff01 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -60,7 +60,7 @@ class MTVServicesInfoExtractor(InfoExtractor): url = response.geturl() # Transform the url to get the best quality: url = re.sub(r'.+pxE=mp4', 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=0+_pxK=18639+_pxE=mp4', url, 1) - return [{'url': url,'ext': 'mp4'}] + return [{'url': url, 'ext': 'mp4'}] def _extract_video_formats(self, mdoc, mtvn_id): if re.match(r'.*/(error_country_block\.swf|geoblock\.mp4)$', mdoc.find('.//src').text) is not None: @@ -240,15 +240,15 @@ class MTVIE(MTVServicesInfoExtractor): uri = mobj.groupdict().get('mgid') if uri is None: webpage = self._download_webpage(url, video_id) - + # Some videos come from Vevo.com m_vevo = re.search(r'isVevoVideo = true;.*?vevoVideoId = "(.*?)";', webpage, re.DOTALL) if m_vevo: - vevo_id = m_vevo.group(1); + vevo_id = m_vevo.group(1) self.to_screen('Vevo video detected: %s' % vevo_id) return self.url_result('vevo:%s' % vevo_id, ie='Vevo') - + uri = self._html_search_regex(r'/uri/(.*?)\?', webpage, 'uri') return self._get_videos_info(uri) diff --git a/youtube_dl/extractor/muenchentv.py b/youtube_dl/extractor/muenchentv.py index c7f6beb9c..b4e8ad17e 100644 --- a/youtube_dl/extractor/muenchentv.py +++ b/youtube_dl/extractor/muenchentv.py @@ -73,4 +73,3 @@ class MuenchenTVIE(InfoExtractor): 'is_live': True, 'thumbnail': thumbnail, } - diff --git a/youtube_dl/extractor/musicplayon.py b/youtube_dl/extractor/musicplayon.py index 42d7a82a5..50d92b50a 100644 --- a/youtube_dl/extractor/musicplayon.py +++ b/youtube_dl/extractor/musicplayon.py @@ -72,4 +72,4 @@ class MusicPlayOnIE(InfoExtractor): 'duration': int_or_none(duration), 'view_count': int_or_none(view_count), 'formats': formats, - }
\ No newline at end of file + } diff --git a/youtube_dl/extractor/muzu.py b/youtube_dl/extractor/muzu.py index 1772b7f9a..97edd3ef6 100644 --- a/youtube_dl/extractor/muzu.py +++ b/youtube_dl/extractor/muzu.py @@ -37,7 +37,7 @@ class MuzuTVIE(InfoExtractor): player_info_page = self._download_webpage('http://player.muzu.tv/player/playerInit?ai=%s' % video_id, video_id, u'Downloading player info') video_info = json.loads(player_info_page)['videos'][0] - for quality in ['1080' , '720', '480', '360']: + for quality in ['1080', '720', '480', '360']: if video_info.get('v%s' % quality): break diff --git a/youtube_dl/extractor/myvideo.py b/youtube_dl/extractor/myvideo.py index a89153985..956cf8b86 100644 --- a/youtube_dl/extractor/myvideo.py +++ b/youtube_dl/extractor/myvideo.py @@ -33,7 +33,7 @@ class MyVideoIE(InfoExtractor): # Original Code from: https://github.com/dersphere/plugin.video.myvideo_de.git # Released into the Public Domain by Tristan Fischer on 2013-05-19 # https://github.com/rg3/youtube-dl/pull/842 - def __rc4crypt(self,data, key): + def __rc4crypt(self, data, key): x = 0 box = list(range(256)) for i in list(range(256)): @@ -49,10 +49,10 @@ class MyVideoIE(InfoExtractor): out += chr(compat_ord(char) ^ box[(box[x] + box[y]) % 256]) return out - def __md5(self,s): + def __md5(self, s): return hashlib.md5(s).hexdigest().encode() - def _real_extract(self,url): + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') @@ -173,4 +173,3 @@ class MyVideoIE(InfoExtractor): 'play_path': video_playpath, 'player_url': video_swfobj, } - diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py index cb1af9ecc..5ce35dbf5 100644 --- a/youtube_dl/extractor/naver.py +++ b/youtube_dl/extractor/naver.py @@ -40,7 +40,7 @@ class NaverIE(InfoExtractor): raise ExtractorError('couldn\'t extract vid and key') vid = m_id.group(1) key = m_id.group(2) - query = compat_urllib_parse.urlencode({'vid': vid, 'inKey': key,}) + query = compat_urllib_parse.urlencode({'vid': vid, 'inKey': key, }) query_urls = compat_urllib_parse.urlencode({ 'masterVid': vid, 'protocol': 'p2p', @@ -65,7 +65,7 @@ class NaverIE(InfoExtractor): if domain.startswith('rtmp'): f.update({ 'ext': 'flv', - 'rtmp_protocol': '1', # rtmpt + 'rtmp_protocol': '1', # rtmpt }) formats.append(f) self._sort_formats(formats) diff --git a/youtube_dl/extractor/nba.py b/youtube_dl/extractor/nba.py index 78e650b2d..f69fe0925 100644 --- a/youtube_dl/extractor/nba.py +++ b/youtube_dl/extractor/nba.py @@ -39,7 +39,6 @@ class NBAIE(InfoExtractor): duration = parse_duration( self._html_search_meta('duration', webpage, 'duration', fatal=False)) - return { 'id': shortened_video_id, 'url': video_url, diff --git a/youtube_dl/extractor/ndr.py b/youtube_dl/extractor/ndr.py index c31ff8160..f49c66690 100644 --- a/youtube_dl/extractor/ndr.py +++ b/youtube_dl/extractor/ndr.py @@ -91,4 +91,4 @@ class NDRIE(InfoExtractor): 'thumbnail': thumbnail, 'duration': duration, 'formats': formats, - }
\ No newline at end of file + } diff --git a/youtube_dl/extractor/newgrounds.py b/youtube_dl/extractor/newgrounds.py index 2e72e8915..cd117b04e 100644 --- a/youtube_dl/extractor/newgrounds.py +++ b/youtube_dl/extractor/newgrounds.py @@ -23,12 +23,12 @@ class NewgroundsIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) music_id = mobj.group('id') webpage = self._download_webpage(url, music_id) - + title = self._html_search_regex( r',"name":"([^"]+)",', webpage, 'music title') uploader = self._html_search_regex( r',"artist":"([^"]+)",', webpage, 'music uploader') - + music_url_json_string = self._html_search_regex( r'({"url":"[^"]+"),', webpage, 'music url') + '}' music_url_json = json.loads(music_url_json_string) diff --git a/youtube_dl/extractor/newstube.py b/youtube_dl/extractor/newstube.py index 551bd4d7a..85fcad06b 100644 --- a/youtube_dl/extractor/newstube.py +++ b/youtube_dl/extractor/newstube.py @@ -89,4 +89,4 @@ class NewstubeIE(InfoExtractor): 'thumbnail': thumbnail, 'duration': duration, 'formats': formats, - }
\ No newline at end of file + } diff --git a/youtube_dl/extractor/nfb.py b/youtube_dl/extractor/nfb.py index ba7b77a46..09dcc8c84 100644 --- a/youtube_dl/extractor/nfb.py +++ b/youtube_dl/extractor/nfb.py @@ -93,4 +93,4 @@ class NFBIE(InfoExtractor): 'uploader': uploader, 'uploader_id': uploader_id, 'formats': formats, - }
\ No newline at end of file + } diff --git a/youtube_dl/extractor/nhl.py b/youtube_dl/extractor/nhl.py index 82af6e330..31813eb68 100644 --- a/youtube_dl/extractor/nhl.py +++ b/youtube_dl/extractor/nhl.py @@ -31,7 +31,7 @@ class NHLBaseInfoExtractor(InfoExtractor): path_url, video_id, 'Downloading final video url') video_url = path_doc.find('path').text else: - video_url = initial_video_url + video_url = initial_video_url join = compat_urlparse.urljoin return { diff --git a/youtube_dl/extractor/noco.py b/youtube_dl/extractor/noco.py index 7f1bc6377..18c869b39 100644 --- a/youtube_dl/extractor/noco.py +++ b/youtube_dl/extractor/noco.py @@ -163,4 +163,4 @@ class NocoIE(InfoExtractor): 'uploader_id': uploader_id, 'duration': duration, 'formats': formats, - }
\ No newline at end of file + } diff --git a/youtube_dl/extractor/novamov.py b/youtube_dl/extractor/novamov.py index 2e7ab1e4f..38d05e466 100644 --- a/youtube_dl/extractor/novamov.py +++ b/youtube_dl/extractor/novamov.py @@ -66,4 +66,4 @@ class NovaMovIE(InfoExtractor): 'url': video_url, 'title': title, 'description': description - }
\ No newline at end of file + } diff --git a/youtube_dl/extractor/nowvideo.py b/youtube_dl/extractor/nowvideo.py index bfba18418..ecb38de2d 100644 --- a/youtube_dl/extractor/nowvideo.py +++ b/youtube_dl/extractor/nowvideo.py @@ -25,4 +25,4 @@ class NowVideoIE(NovaMovIE): 'title': 'youtubedl test video _BaW_jenozKc.mp4', 'description': 'Description', } - }
\ No newline at end of file + } diff --git a/youtube_dl/extractor/ntv.py b/youtube_dl/extractor/ntv.py index ed60314ec..13c8d79cd 100644 --- a/youtube_dl/extractor/ntv.py +++ b/youtube_dl/extractor/ntv.py @@ -145,4 +145,4 @@ class NTVIE(InfoExtractor): 'duration': duration, 'view_count': view_count, 'formats': formats, - }
\ No newline at end of file + } diff --git a/youtube_dl/extractor/nuvid.py b/youtube_dl/extractor/nuvid.py index 58ec81f91..449c8a6a3 100644 --- a/youtube_dl/extractor/nuvid.py +++ b/youtube_dl/extractor/nuvid.py @@ -71,4 +71,4 @@ class NuvidIE(InfoExtractor): 'upload_date': upload_date, 'age_limit': 18, 'formats': formats, - }
\ No newline at end of file + } diff --git a/youtube_dl/extractor/nytimes.py b/youtube_dl/extractor/nytimes.py index 7bf105d38..56e1cad3b 100644 --- a/youtube_dl/extractor/nytimes.py +++ b/youtube_dl/extractor/nytimes.py @@ -74,4 +74,4 @@ class NYTimesIE(InfoExtractor): 'duration': duration, 'formats': formats, 'thumbnails': thumbnails, - }
\ No newline at end of file + } diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index 2044e107e..30d80fa6b 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -97,4 +97,3 @@ class OoyalaIE(InfoExtractor): } else: return self._extract_result(videos_info[0], videos_more_info) - diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py index 011e6be13..572a234ad 100644 --- a/youtube_dl/extractor/orf.py +++ b/youtube_dl/extractor/orf.py @@ -178,4 +178,4 @@ class ORFFM4IE(InfoExtractor): 'title': data['title'], 'description': data['subtitle'], 'entries': entries - }
\ No newline at end of file + } diff --git a/youtube_dl/extractor/podomatic.py b/youtube_dl/extractor/podomatic.py index ffafd2380..f20946a2b 100644 --- a/youtube_dl/extractor/podomatic.py +++ b/youtube_dl/extractor/podomatic.py @@ -6,6 +6,7 @@ import re from .common import InfoExtractor from ..utils import int_or_none + class PodomaticIE(InfoExtractor): IE_NAME = 'podomatic' _VALID_URL = r'^(?P<proto>https?)://(?P<channel>[^.]+)\.podomatic\.com/entry/(?P<id>[^?]+)' diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 618e8f5dd..2ca15b717 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -56,7 +56,7 @@ class PornHubIE(InfoExtractor): comment_count = self._extract_count( r'All comments \(<var class="videoCommentCount">([\d,\.]+)</var>', webpage, 'comment') - video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage))) + video_urls = list(map(compat_urllib_parse.unquote, re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage))) if webpage.find('"encrypted":true') != -1: password = compat_urllib_parse.unquote_plus(self._html_search_regex(r'"video_title":"([^"]+)', webpage, 'password')) video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls)) diff --git a/youtube_dl/extractor/pornotube.py b/youtube_dl/extractor/pornotube.py index 04bd3d979..5253aa3d3 100644 --- a/youtube_dl/extractor/pornotube.py +++ b/youtube_dl/extractor/pornotube.py @@ -38,7 +38,7 @@ class PornotubeIE(InfoExtractor): video_url = self._search_regex(VIDEO_URL_RE, webpage, 'video url') video_url = compat_urllib_parse.unquote(video_url) - #Get the uploaded date + # Get the uploaded date VIDEO_UPLOADED_RE = r'<div class="video_added_by">Added (?P<date>[0-9\/]+) by' upload_date = self._html_search_regex(VIDEO_UPLOADED_RE, webpage, 'upload date', fatal=False) if upload_date: diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 619496de7..32d747ede 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -280,4 +280,4 @@ class ProSiebenSat1IE(InfoExtractor): 'upload_date': upload_date, 'duration': duration, 'formats': formats, - }
\ No newline at end of file + } diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py index ba3dd707f..2d39ecfe4 100644 --- a/youtube_dl/extractor/rai.py +++ b/youtube_dl/extractor/rai.py @@ -119,4 +119,4 @@ class RaiIE(SubtitlesInfoExtractor): if captions.endswith(STL_EXT): captions = captions[:-len(STL_EXT)] + SRT_EXT subtitles['it'] = 'http://www.rai.tv%s' % compat_urllib_parse.quote(captions) - return subtitles
\ No newline at end of file + return subtitles diff --git a/youtube_dl/extractor/ringtv.py b/youtube_dl/extractor/ringtv.py index 9fbdb9fcb..59dc137cc 100644 --- a/youtube_dl/extractor/ringtv.py +++ b/youtube_dl/extractor/ringtv.py @@ -41,4 +41,3 @@ class RingTVIE(InfoExtractor): 'thumbnail': thumbnail_url, 'description': description, } - diff --git a/youtube_dl/extractor/rtlnl.py b/youtube_dl/extractor/rtlnl.py index 4a188e5d4..d029b0ec5 100644 --- a/youtube_dl/extractor/rtlnl.py +++ b/youtube_dl/extractor/rtlnl.py @@ -44,7 +44,7 @@ class RtlXlIE(InfoExtractor): formats = self._extract_m3u8_formats(m3u8_url, uuid, ext='mp4') - video_urlpart = videopath.split('/flash/')[1][:-4] + video_urlpart = videopath.split('/flash/')[1][:-5] PG_URL_TEMPLATE = 'http://pg.us.rtl.nl/rtlxl/network/%s/progressive/%s.mp4' formats.extend([ diff --git a/youtube_dl/extractor/rtlnow.py b/youtube_dl/extractor/rtlnow.py index 1a41cbe40..285c3c4be 100644 --- a/youtube_dl/extractor/rtlnow.py +++ b/youtube_dl/extractor/rtlnow.py @@ -122,7 +122,7 @@ class RTLnowIE(InfoExtractor): playerdata = self._download_xml(playerdata_url, video_id, 'Downloading player data XML') videoinfo = playerdata.find('./playlist/videoinfo') - + formats = [] for filename in videoinfo.findall('filename'): mobj = re.search(r'(?P<url>rtmpe://(?:[^/]+/){2})(?P<play_path>.+)', filename.text) @@ -153,4 +153,4 @@ class RTLnowIE(InfoExtractor): 'upload_date': upload_date, 'duration': duration, 'formats': formats, - }
\ No newline at end of file + } diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index 4dd35a47b..0ce22d60c 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -54,7 +54,6 @@ def _decrypt_url(png): return url - class RTVEALaCartaIE(InfoExtractor): IE_NAME = 'rtve.es:alacarta' IE_DESC = 'RTVE a la carta' diff --git a/youtube_dl/extractor/rutv.py b/youtube_dl/extractor/rutv.py index f737b4e5f..a73e6f331 100644 --- a/youtube_dl/extractor/rutv.py +++ b/youtube_dl/extractor/rutv.py @@ -191,4 +191,4 @@ class RUTVIE(InfoExtractor): 'view_count': view_count, 'duration': duration, 'formats': formats, - }
\ No newline at end of file + } diff --git a/youtube_dl/extractor/scivee.py b/youtube_dl/extractor/scivee.py index 55a481cc0..3bf93c870 100644 --- a/youtube_dl/extractor/scivee.py +++ b/youtube_dl/extractor/scivee.py @@ -53,4 +53,4 @@ class SciVeeIE(InfoExtractor): 'description': description, 'thumbnail': 'http://www.scivee.tv/assets/videothumb/%s' % video_id, 'formats': formats, - }
\ No newline at end of file + } diff --git a/youtube_dl/extractor/servingsys.py b/youtube_dl/extractor/servingsys.py index 1dc551d5c..16dc3736b 100644 --- a/youtube_dl/extractor/servingsys.py +++ b/youtube_dl/extractor/servingsys.py @@ -67,5 +67,3 @@ class ServingSysIE(InfoExtractor): 'title': title, 'entries': entries, } - -
\ No newline at end of file diff --git a/youtube_dl/extractor/shared.py b/youtube_dl/extractor/shared.py index badba2ac6..fdc31603a 100644 --- a/youtube_dl/extractor/shared.py +++ b/youtube_dl/extractor/shared.py @@ -54,4 +54,4 @@ class SharedIE(InfoExtractor): 'filesize': filesize, 'title': title, 'thumbnail': thumbnail, - }
\ No newline at end of file + } diff --git a/youtube_dl/extractor/smotri.py b/youtube_dl/extractor/smotri.py index 9bd5defa7..b6a71305f 100644 --- a/youtube_dl/extractor/smotri.py +++ b/youtube_dl/extractor/smotri.py @@ -1,7 +1,6 @@ # encoding: utf-8 from __future__ import unicode_literals -import os.path import re import json import hashlib @@ -12,15 +11,15 @@ from ..utils import ( compat_urllib_parse, compat_urllib_request, ExtractorError, - url_basename, int_or_none, + unified_strdate, ) class SmotriIE(InfoExtractor): IE_DESC = 'Smotri.com' IE_NAME = 'smotri' - _VALID_URL = r'^https?://(?:www\.)?(?:smotri\.com/video/view/\?id=|pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=)(?P<videoid>v(?P<realvideoid>[0-9]+)[a-z0-9]{4})' + _VALID_URL = r'^https?://(?:www\.)?(?:smotri\.com/video/view/\?id=|pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=)(?P<id>v(?P<realvideoid>[0-9]+)[a-z0-9]{4})' _NETRC_MACHINE = 'smotri' _TESTS = [ @@ -35,7 +34,6 @@ class SmotriIE(InfoExtractor): 'uploader': 'rbc2008', 'uploader_id': 'rbc08', 'upload_date': '20131118', - 'description': 'катастрофа с камер видеонаблюдения, видео катастрофа с камер видеонаблюдения', 'thumbnail': 'http://frame6.loadup.ru/8b/a9/2610366.3.3.jpg', }, }, @@ -50,7 +48,6 @@ class SmotriIE(InfoExtractor): 'uploader': 'Support Photofile@photofile', 'uploader_id': 'support-photofile', 'upload_date': '20070704', - 'description': 'test, видео test', 'thumbnail': 'http://frame4.loadup.ru/03/ed/57591.2.3.jpg', }, }, @@ -66,7 +63,6 @@ class SmotriIE(InfoExtractor): 'uploader_id': 'timoxa40', 'upload_date': '20100404', 'thumbnail': 'http://frame7.loadup.ru/af/3f/1390466.3.3.jpg', - 'description': 'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1, видео TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1', }, 'params': { 'videopassword': 'qwerty', @@ -85,7 +81,6 @@ class SmotriIE(InfoExtractor): 'upload_date': '20101001', 'thumbnail': 'http://frame3.loadup.ru/75/75/1540889.1.3.jpg', 'age_limit': 18, - 'description': 'этот ролик не покажут по ТВ, видео этот ролик не покажут по ТВ', }, 'params': { 'videopassword': '333' @@ -102,17 +97,11 @@ class SmotriIE(InfoExtractor): 'uploader': 'HannahL', 'uploader_id': 'lisaha95', 'upload_date': '20090331', - 'description': 'Shakira - Don\'t Bother, видео Shakira - Don\'t Bother', 'thumbnail': 'http://frame8.loadup.ru/44/0b/918809.7.3.jpg', }, }, ] - _SUCCESS = 0 - _PASSWORD_NOT_VERIFIED = 1 - _PASSWORD_DETECTED = 2 - _VIDEO_NOT_FOUND = 3 - @classmethod def _extract_url(cls, webpage): mobj = re.search( @@ -137,44 +126,44 @@ class SmotriIE(InfoExtractor): return self._html_search_meta(name, html, display_name) def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('videoid') - real_video_id = mobj.group('realvideoid') + video_id = self._match_id(url) + + video_form = { + 'ticket': video_id, + 'video_url': '1', + 'frame_url': '1', + 'devid': 'LoadupFlashPlayer', + 'getvideoinfo': '1', + } - # Download video JSON data - video_json_url = 'http://smotri.com/vt.php?id=%s' % real_video_id - video_json_page = self._download_webpage(video_json_url, video_id, 'Downloading video JSON') - video_json = json.loads(video_json_page) + request = compat_urllib_request.Request( + 'http://smotri.com/video/view/url/bot/', compat_urllib_parse.urlencode(video_form)) + request.add_header('Content-Type', 'application/x-www-form-urlencoded') - status = video_json['status'] - if status == self._VIDEO_NOT_FOUND: + video = self._download_json(request, video_id, 'Downloading video JSON') + + if video.get('_moderate_no') or not video.get('moderated'): + raise ExtractorError('Video %s has not been approved by moderator' % video_id, expected=True) + + if video.get('error'): raise ExtractorError('Video %s does not exist' % video_id, expected=True) - elif status == self._PASSWORD_DETECTED: # The video is protected by a password, retry with - # video-password set - video_password = self._downloader.params.get('videopassword', None) - if not video_password: - raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True) - video_json_url += '&md5pass=%s' % hashlib.md5(video_password.encode('utf-8')).hexdigest() - video_json_page = self._download_webpage(video_json_url, video_id, 'Downloading video JSON (video-password set)') - video_json = json.loads(video_json_page) - status = video_json['status'] - if status == self._PASSWORD_NOT_VERIFIED: - raise ExtractorError('Video password is invalid', expected=True) - - if status != self._SUCCESS: - raise ExtractorError('Unexpected status value %s' % status) - - # Extract the URL of the video - video_url = video_json['file_data'] + + video_url = video.get('_vidURL') or video.get('_vidURL_mp4') + title = video['title'] + thumbnail = video['_imgURL'] + upload_date = unified_strdate(video['added']) + uploader = video['userNick'] + uploader_id = video['userLogin'] + duration = int_or_none(video['duration']) # Video JSON does not provide enough meta data # We will extract some from the video web page instead - video_page_url = 'http://smotri.com/video/view/?id=%s' % video_id - video_page = self._download_webpage(video_page_url, video_id, 'Downloading video page') + webpage_url = 'http://smotri.com/video/view/?id=%s' % video_id + webpage = self._download_webpage(webpage_url, video_id, 'Downloading video page') # Warning if video is unavailable warning = self._html_search_regex( - r'<div class="videoUnModer">(.*?)</div>', video_page, + r'<div class="videoUnModer">(.*?)</div>', webpage, 'warning message', default=None) if warning is not None: self._downloader.report_warning( @@ -182,84 +171,32 @@ class SmotriIE(InfoExtractor): (video_id, warning)) # Adult content - if re.search('EroConfirmText">', video_page) is not None: + if re.search('EroConfirmText">', webpage) is not None: self.report_age_confirmation() confirm_string = self._html_search_regex( r'<a href="/video/view/\?id=%s&confirm=([^"]+)" title="[^"]+">' % video_id, - video_page, 'confirm string') - confirm_url = video_page_url + '&confirm=%s' % confirm_string - video_page = self._download_webpage(confirm_url, video_id, 'Downloading video page (age confirmed)') + webpage, 'confirm string') + confirm_url = webpage_url + '&confirm=%s' % confirm_string + webpage = self._download_webpage(confirm_url, video_id, 'Downloading video page (age confirmed)') adult_content = True else: adult_content = False - # Extract the rest of meta data - video_title = self._search_meta('name', video_page, 'title') - if not video_title: - video_title = os.path.splitext(url_basename(video_url))[0] - - video_description = self._search_meta('description', video_page) - END_TEXT = ' на сайте Smotri.com' - if video_description and video_description.endswith(END_TEXT): - video_description = video_description[:-len(END_TEXT)] - START_TEXT = 'Смотреть онлайн ролик ' - if video_description and video_description.startswith(START_TEXT): - video_description = video_description[len(START_TEXT):] - video_thumbnail = self._search_meta('thumbnail', video_page) - - upload_date_str = self._search_meta('uploadDate', video_page, 'upload date') - if upload_date_str: - upload_date_m = re.search(r'(?P<year>\d{4})\.(?P<month>\d{2})\.(?P<day>\d{2})T', upload_date_str) - video_upload_date = ( - ( - upload_date_m.group('year') + - upload_date_m.group('month') + - upload_date_m.group('day') - ) - if upload_date_m else None - ) - else: - video_upload_date = None - - duration_str = self._search_meta('duration', video_page) - if duration_str: - duration_m = re.search(r'T(?P<hours>[0-9]{2})H(?P<minutes>[0-9]{2})M(?P<seconds>[0-9]{2})S', duration_str) - video_duration = ( - ( - (int(duration_m.group('hours')) * 60 * 60) + - (int(duration_m.group('minutes')) * 60) + - int(duration_m.group('seconds')) - ) - if duration_m else None - ) - else: - video_duration = None - - video_uploader = self._html_search_regex( - '<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info[^"]+">(.*?)</a>', - video_page, 'uploader', fatal=False, flags=re.MULTILINE|re.DOTALL) - - video_uploader_id = self._html_search_regex( - '<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info\\(.*?\'([^\']+)\'\\);">', - video_page, 'uploader id', fatal=False, flags=re.MULTILINE|re.DOTALL) - - video_view_count = self._html_search_regex( + view_count = self._html_search_regex( 'Общее количество просмотров.*?<span class="Number">(\\d+)</span>', - video_page, 'view count', fatal=False, flags=re.MULTILINE|re.DOTALL) + webpage, 'view count', fatal=False, flags=re.MULTILINE | re.DOTALL) return { 'id': video_id, 'url': video_url, - 'title': video_title, - 'thumbnail': video_thumbnail, - 'description': video_description, - 'uploader': video_uploader, - 'upload_date': video_upload_date, - 'uploader_id': video_uploader_id, - 'duration': video_duration, - 'view_count': int_or_none(video_view_count), + 'title': title, + 'thumbnail': thumbnail, + 'uploader': uploader, + 'upload_date': upload_date, + 'uploader_id': uploader_id, + 'duration': duration, + 'view_count': int_or_none(view_count), 'age_limit': 18 if adult_content else 0, - 'video_page_url': video_page_url } @@ -275,7 +212,7 @@ class SmotriCommunityIE(InfoExtractor): }, 'playlist_mincount': 4, } - + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) community_id = mobj.group('communityid') diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py index bebcafb62..0348e7460 100644 --- a/youtube_dl/extractor/sohu.py +++ b/youtube_dl/extractor/sohu.py @@ -69,7 +69,7 @@ class SohuIE(InfoExtractor): (allot, prot, clipsURL[i], su[i])) part_str = self._download_webpage( part_url, video_id, - note=u'Downloading part %d of %d' % (i+1, part_count)) + note=u'Downloading part %d of %d' % (i + 1, part_count)) part_info = part_str.split('|') video_url = '%s%s?key=%s' % (part_info[0], su[i], part_info[3]) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 54256e1a2..f92f7fa24 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -224,14 +224,14 @@ class SoundcloudIE(InfoExtractor): # extract uploader (which is in the url) uploader = mobj.group('uploader') # extract simple title (uploader + slug of song title) - slug_title = mobj.group('title') + slug_title = mobj.group('title') token = mobj.group('token') full_title = resolve_title = '%s/%s' % (uploader, slug_title) if token: resolve_title += '/%s' % token - + self.report_resolve(full_title) - + url = 'http://soundcloud.com/%s' % resolve_title info_json_url = self._resolv_url(url) info = self._download_json(info_json_url, full_title, 'Downloading info JSON') diff --git a/youtube_dl/extractor/spiegeltv.py b/youtube_dl/extractor/spiegeltv.py index ef5e7c08b..98cf92d89 100644 --- a/youtube_dl/extractor/spiegeltv.py +++ b/youtube_dl/extractor/spiegeltv.py @@ -77,4 +77,4 @@ class SpiegeltvIE(InfoExtractor): 'description': description, 'duration': duration, 'thumbnails': thumbnails - }
\ No newline at end of file + } diff --git a/youtube_dl/extractor/sport5.py b/youtube_dl/extractor/sport5.py index 3f680bfc6..dfe50ed45 100644 --- a/youtube_dl/extractor/sport5.py +++ b/youtube_dl/extractor/sport5.py @@ -89,4 +89,4 @@ class Sport5IE(InfoExtractor): 'duration': duration, 'categories': categories, 'formats': formats, - }
\ No newline at end of file + } diff --git a/youtube_dl/extractor/sportdeutschland.py b/youtube_dl/extractor/sportdeutschland.py index abb827783..057ef5251 100644 --- a/youtube_dl/extractor/sportdeutschland.py +++ b/youtube_dl/extractor/sportdeutschland.py @@ -93,4 +93,3 @@ class SportDeutschlandIE(InfoExtractor): 'rtmp_live': asset.get('live'), 'timestamp': parse_iso8601(asset.get('date')), } - diff --git a/youtube_dl/extractor/subtitles.py b/youtube_dl/extractor/subtitles.py index db33745c1..41d740e87 100644 --- a/youtube_dl/extractor/subtitles.py +++ b/youtube_dl/extractor/subtitles.py @@ -50,7 +50,7 @@ class SubtitlesInfoExtractor(InfoExtractor): sub_lang_list = {} for sub_lang in requested_langs: - if not sub_lang in available_subs_list: + if sub_lang not in available_subs_list: self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang) continue sub_lang_list[sub_lang] = available_subs_list[sub_lang] diff --git a/youtube_dl/extractor/swrmediathek.py b/youtube_dl/extractor/swrmediathek.py index bf430d870..58073eefe 100644 --- a/youtube_dl/extractor/swrmediathek.py +++ b/youtube_dl/extractor/swrmediathek.py @@ -80,7 +80,7 @@ class SWRMediathekIE(InfoExtractor): if media_type == 'Video': fmt.update({ - 'format_note': ['144p', '288p', '544p', '720p'][quality-1], + 'format_note': ['144p', '288p', '544p', '720p'][quality - 1], 'vcodec': codec, }) elif media_type == 'Audio': diff --git a/youtube_dl/extractor/teachertube.py b/youtube_dl/extractor/teachertube.py index 8a95fd656..6c3445d79 100644 --- a/youtube_dl/extractor/teachertube.py +++ b/youtube_dl/extractor/teachertube.py @@ -121,7 +121,7 @@ class TeacherTubeUserIE(InfoExtractor): urls = [] webpage = self._download_webpage(url, user_id) urls.extend(re.findall(self._MEDIA_RE, webpage)) - + pages = re.findall(r'/ajax-user/user-videos/%s\?page=([0-9]+)' % user_id, webpage)[:-1] for p in pages: more = 'http://www.teachertube.com/ajax-user/user-videos/%s?page=%s' % (user_id, p) diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py index fa796ce72..d5dda34f6 100644 --- a/youtube_dl/extractor/teamcoco.py +++ b/youtube_dl/extractor/teamcoco.py @@ -33,7 +33,7 @@ class TeamcocoIE(InfoExtractor): display_id = mobj.group('display_id') webpage = self._download_webpage(url, display_id) - + video_id = mobj.group("video_id") if not video_id: video_id = self._html_search_regex( diff --git a/youtube_dl/extractor/telecinco.py b/youtube_dl/extractor/telecinco.py index db9788c18..2a2fff5e1 100644 --- a/youtube_dl/extractor/telecinco.py +++ b/youtube_dl/extractor/telecinco.py @@ -1,4 +1,4 @@ -#coding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .mitele import MiTeleIE diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index a04925633..522a095a2 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -35,11 +35,12 @@ class ThePlatformIE(InfoExtractor): 'skip_download': True, }, } + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') if mobj.group('config'): - config_url = url+ '&form=json' + config_url = url + '&form=json' config_url = config_url.replace('swf/', 'config/') config_url = config_url.replace('onsite/', 'onsite/config/') config = self._download_json(config_url, video_id, 'Downloading config') @@ -48,7 +49,6 @@ class ThePlatformIE(InfoExtractor): smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?' 'format=smil&mbr=true'.format(video_id)) - meta = self._download_xml(smil_url, video_id) try: error_msg = next( @@ -118,5 +118,5 @@ class ThePlatformIE(InfoExtractor): 'formats': formats, 'description': info['description'], 'thumbnail': info['defaultThumbnailUrl'], - 'duration': info['duration']//1000, + 'duration': info['duration'] // 1000, } diff --git a/youtube_dl/extractor/thisav.py b/youtube_dl/extractor/thisav.py index bfb9d2fc9..7f323c938 100644 --- a/youtube_dl/extractor/thisav.py +++ b/youtube_dl/extractor/thisav.py @@ -1,4 +1,4 @@ -#coding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re @@ -36,12 +36,12 @@ class ThisAVIE(InfoExtractor): r': <a href="http://www.thisav.com/user/[0-9]+/([^"]+)">(?:[^<]+)</a>', webpage, 'uploader id', fatal=False) ext = determine_ext(video_url) - + return { - 'id': video_id, - 'url': video_url, - 'uploader': uploader, + 'id': video_id, + 'url': video_url, + 'uploader': uploader, 'uploader_id': uploader_id, - 'title': title, - 'ext': ext, + 'title': title, + 'ext': ext, } diff --git a/youtube_dl/extractor/tinypic.py b/youtube_dl/extractor/tinypic.py index a4aa25f66..6ba5dc5f1 100644 --- a/youtube_dl/extractor/tinypic.py +++ b/youtube_dl/extractor/tinypic.py @@ -26,7 +26,7 @@ class TinyPicIE(InfoExtractor): video_id = mobj.group('id') webpage = self._download_webpage(url, video_id, 'Downloading page') - + mobj = re.search(r'(?m)fo\.addVariable\("file",\s"(?P<fileid>[\da-z]+)"\);\n' '\s+fo\.addVariable\("s",\s"(?P<serverid>\d+)"\);', webpage) if mobj is None: @@ -47,4 +47,4 @@ class TinyPicIE(InfoExtractor): 'url': video_url, 'thumbnail': thumbnail, 'title': title - }
\ No newline at end of file + } diff --git a/youtube_dl/extractor/tnaflix.py b/youtube_dl/extractor/tnaflix.py index 4956f8577..0ecd695f8 100644 --- a/youtube_dl/extractor/tnaflix.py +++ b/youtube_dl/extractor/tnaflix.py @@ -71,7 +71,7 @@ class TNAFlixIE(InfoExtractor): fmt['height'] = int(m.group(1)) formats.append(fmt) self._sort_formats(formats) - + return { 'id': video_id, 'display_id': display_id, diff --git a/youtube_dl/extractor/traileraddict.py b/youtube_dl/extractor/traileraddict.py index 11407428b..b1a440e79 100644 --- a/youtube_dl/extractor/traileraddict.py +++ b/youtube_dl/extractor/traileraddict.py @@ -25,7 +25,7 @@ class TrailerAddictIE(InfoExtractor): webpage = self._download_webpage(url, name) title = self._search_regex(r'<title>(.+?)</title>', - webpage, 'video title').replace(' - Trailer Addict','') + webpage, 'video title').replace(' - Trailer Addict', '') view_count_str = self._search_regex( r'<span class="views_n">([0-9,.]+)</span>', webpage, 'view count', fatal=False) @@ -43,10 +43,10 @@ class TrailerAddictIE(InfoExtractor): fvar = "fvar" info_url = "http://www.traileraddict.com/%s.php?tid=%s" % (fvar, str(video_id)) - info_webpage = self._download_webpage(info_url, video_id , "Downloading the info webpage") + info_webpage = self._download_webpage(info_url, video_id, "Downloading the info webpage") final_url = self._search_regex(r'&fileurl=(.+)', - info_webpage, 'Download url').replace('%3F','?') + info_webpage, 'Download url').replace('%3F', '?') thumbnail_url = self._search_regex(r'&image=(.+?)&', info_webpage, 'thumbnail url') diff --git a/youtube_dl/extractor/trilulilu.py b/youtube_dl/extractor/trilulilu.py index d64aaa41f..2f2c8aeda 100644 --- a/youtube_dl/extractor/trilulilu.py +++ b/youtube_dl/extractor/trilulilu.py @@ -39,7 +39,7 @@ class TriluliluIE(InfoExtractor): format_url, video_id, note=u'Downloading formats', errnote=u'Error while downloading formats') - + video_url_template = ( u'http://fs%(server)s.trilulilu.ro/stream.php?type=video' u'&source=site&hash=%(hash)s&username=%(userid)s&' @@ -63,4 +63,3 @@ class TriluliluIE(InfoExtractor): 'description': description, 'thumbnail': thumbnail, } - diff --git a/youtube_dl/extractor/tudou.py b/youtube_dl/extractor/tudou.py index dcd823d08..b6e4a432b 100644 --- a/youtube_dl/extractor/tudou.py +++ b/youtube_dl/extractor/tudou.py @@ -37,11 +37,11 @@ class TudouIE(InfoExtractor): }] def _url_for_id(self, id, quality = None): - info_url = "http://v2.tudou.com/f?id="+str(id) + info_url = "http://v2.tudou.com/f?id=" + str(id) if quality: info_url += '&hd' + quality webpage = self._download_webpage(info_url, id, "Opening the info webpage") - final_url = self._html_search_regex('>(.+?)</f>',webpage, 'video url') + final_url = self._html_search_regex('>(.+?)</f>', webpage, 'video url') return final_url def _real_extract(self, url): diff --git a/youtube_dl/extractor/tvigle.py b/youtube_dl/extractor/tvigle.py index 27962b5fe..d81d1d1a6 100644 --- a/youtube_dl/extractor/tvigle.py +++ b/youtube_dl/extractor/tvigle.py @@ -84,4 +84,4 @@ class TvigleIE(InfoExtractor): 'duration': duration, 'age_limit': age_limit, 'formats': formats, - }
\ No newline at end of file + } diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 054f42725..7df11fc19 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -161,4 +161,4 @@ class UdemyCourseIE(UdemyIE): for asset in response if asset.get('assetType') == 'Video' ] - return self.playlist_result(entries, course_id, course_title)
\ No newline at end of file + return self.playlist_result(entries, course_id, course_title) diff --git a/youtube_dl/extractor/vesti.py b/youtube_dl/extractor/vesti.py index 27f9acb67..5aebcecd7 100644 --- a/youtube_dl/extractor/vesti.py +++ b/youtube_dl/extractor/vesti.py @@ -118,4 +118,4 @@ class VestiIE(InfoExtractor): if rutv_url: return self.url_result(rutv_url, 'RUTV') - raise ExtractorError('No video found', expected=True)
\ No newline at end of file + raise ExtractorError('No video found', expected=True) diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py index d3fa70e0e..70578a4cc 100644 --- a/youtube_dl/extractor/vgtv.py +++ b/youtube_dl/extractor/vgtv.py @@ -114,4 +114,4 @@ class VGTVIE(InfoExtractor): 'duration': float_or_none(data['duration'], 1000), 'view_count': data['displays'], 'formats': formats, - }
\ No newline at end of file + } diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py index f11ca8217..71f520fb5 100644 --- a/youtube_dl/extractor/vice.py +++ b/youtube_dl/extractor/vice.py @@ -35,4 +35,3 @@ class ViceIE(InfoExtractor): except ExtractorError: raise ExtractorError('The page doesn\'t contain a video', expected=True) return self.url_result(ooyala_url, ie='Ooyala') - diff --git a/youtube_dl/extractor/videobam.py b/youtube_dl/extractor/videobam.py index fed95ef71..0eb3d9414 100644 --- a/youtube_dl/extractor/videobam.py +++ b/youtube_dl/extractor/videobam.py @@ -78,4 +78,4 @@ class VideoBamIE(InfoExtractor): 'view_count': view_count, 'formats': formats, 'age_limit': 18, - }
\ No newline at end of file + } diff --git a/youtube_dl/extractor/videofyme.py b/youtube_dl/extractor/videofyme.py index f75169041..d69fe1e77 100644 --- a/youtube_dl/extractor/videofyme.py +++ b/youtube_dl/extractor/videofyme.py @@ -6,13 +6,14 @@ from ..utils import ( determine_ext, ) + class VideofyMeIE(InfoExtractor): _VALID_URL = r'https?://(www\.videofy\.me/.+?|p\.videofy\.me/v)/(?P<id>\d+)(&|#|$)' IE_NAME = u'videofy.me' _TEST = { u'url': u'http://www.videofy.me/thisisvideofyme/1100701', - u'file': u'1100701.mp4', + u'file': u'1100701.mp4', u'md5': u'c77d700bdc16ae2e9f3c26019bd96143', u'info_dict': { u'title': u'This is VideofyMe', @@ -20,7 +21,7 @@ class VideofyMeIE(InfoExtractor): u'uploader': u'VideofyMe', u'uploader_id': u'thisisvideofyme', }, - + } def _real_extract(self, url): @@ -30,7 +31,7 @@ class VideofyMeIE(InfoExtractor): video_id) video = config.find('video') sources = video.find('sources') - url_node = next(node for node in [find_xpath_attr(sources, 'source', 'id', 'HQ %s' % key) + url_node = next(node for node in [find_xpath_attr(sources, 'source', 'id', 'HQ %s' % key) for key in ['on', 'av', 'off']] if node is not None) video_url = url_node.find('url').text diff --git a/youtube_dl/extractor/videopremium.py b/youtube_dl/extractor/videopremium.py index 65463c733..9f2cdb6ef 100644 --- a/youtube_dl/extractor/videopremium.py +++ b/youtube_dl/extractor/videopremium.py @@ -35,11 +35,11 @@ class VideoPremiumIE(InfoExtractor): r'<h2(?:.*?)>\s*(.+?)\s*<', webpage, u'video title') return { - 'id': video_id, - 'url': "rtmp://e%d.md.iplay.md/play" % random.randint(1, 16), - 'play_path': "mp4:%s.f4v" % video_id, - 'page_url': "http://videopremium.tv/" + video_id, - 'player_url': "http://videopremium.tv/uplayer/uppod.swf", - 'ext': 'f4v', - 'title': video_title, + 'id': video_id, + 'url': "rtmp://e%d.md.iplay.md/play" % random.randint(1, 16), + 'play_path': "mp4:%s.f4v" % video_id, + 'page_url': "http://videopremium.tv/" + video_id, + 'player_url': "http://videopremium.tv/uplayer/uppod.swf", + 'ext': 'f4v', + 'title': video_title, } diff --git a/youtube_dl/extractor/videott.py b/youtube_dl/extractor/videott.py index a647807d0..1f938838c 100644 --- a/youtube_dl/extractor/videott.py +++ b/youtube_dl/extractor/videott.py @@ -58,4 +58,4 @@ class VideoTtIE(InfoExtractor): 'like_count': int_or_none(video['liked']), 'dislike_count': int_or_none(video['disliked']), 'formats': formats, - }
\ No newline at end of file + } diff --git a/youtube_dl/extractor/videoweed.py b/youtube_dl/extractor/videoweed.py index 4a08ddd43..ca2e50935 100644 --- a/youtube_dl/extractor/videoweed.py +++ b/youtube_dl/extractor/videoweed.py @@ -23,4 +23,4 @@ class VideoWeedIE(NovaMovIE): 'title': 'optical illusion dissapeared image magic illusion', 'description': '' }, - }
\ No newline at end of file + } diff --git a/youtube_dl/extractor/vidzi.py b/youtube_dl/extractor/vidzi.py index 669979e13..08a5a7b8d 100644 --- a/youtube_dl/extractor/vidzi.py +++ b/youtube_dl/extractor/vidzi.py @@ -1,4 +1,4 @@ -#coding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor @@ -18,16 +18,15 @@ class VidziIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - + webpage = self._download_webpage(url, video_id) video_url = self._html_search_regex( r'{\s*file\s*:\s*"([^"]+)"\s*}', webpage, 'video url') title = self._html_search_regex( r'(?s)<h2 class="video-title">(.*?)</h2>', webpage, 'title') - + return { 'id': video_id, 'title': title, 'url': video_url, } -
\ No newline at end of file diff --git a/youtube_dl/extractor/vrt.py b/youtube_dl/extractor/vrt.py index 57ef8dc30..bbd3bbf7b 100644 --- a/youtube_dl/extractor/vrt.py +++ b/youtube_dl/extractor/vrt.py @@ -36,7 +36,7 @@ class VRTIE(InfoExtractor): 'timestamp': 1413835980.560, 'upload_date': '20141020', 'duration': 3238, - } + } }, # cobra.be { @@ -92,4 +92,4 @@ class VRTIE(InfoExtractor): 'timestamp': timestamp, 'duration': duration, 'formats': formats, - }
\ No newline at end of file + } diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 54d37da61..93a6e6454 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -224,4 +224,4 @@ class WDRMausIE(InfoExtractor): 'upload_date': upload_date, } -# TODO test _1
\ No newline at end of file +# TODO test _1 diff --git a/youtube_dl/extractor/worldstarhiphop.py b/youtube_dl/extractor/worldstarhiphop.py index bda3870db..d5c26a032 100644 --- a/youtube_dl/extractor/worldstarhiphop.py +++ b/youtube_dl/extractor/worldstarhiphop.py @@ -51,4 +51,3 @@ class WorldStarHipHopIE(InfoExtractor): 'title': video_title, 'thumbnail': thumbnail, } - diff --git a/youtube_dl/extractor/xbef.py b/youtube_dl/extractor/xbef.py index 71bd7c463..1b4e88365 100644 --- a/youtube_dl/extractor/xbef.py +++ b/youtube_dl/extractor/xbef.py @@ -47,4 +47,3 @@ class XBefIE(InfoExtractor): 'thumbnail': thumbnail, 'age_limit': 18, } - diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index 4e8fbde8d..1e6c94623 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -42,7 +42,7 @@ class XHamsterIE(InfoExtractor): } ] - def _real_extract(self,url): + def _real_extract(self, url): def extract_video_url(webpage): mp4 = re.search(r'<video\s+.*?file="([^"]+)".*?>', webpage) if mp4 is None: diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py index c3bb9b2cf..38448e7c0 100644 --- a/youtube_dl/extractor/xtube.py +++ b/youtube_dl/extractor/xtube.py @@ -97,7 +97,7 @@ class XTubeUserIE(InfoExtractor): url, username, note='Retrieving profile page') video_count = int(self._search_regex( - r'<strong>%s\'s Videos \(([0-9]+)\)</strong>'%username, profile_page, + r'<strong>%s\'s Videos \(([0-9]+)\)</strong>' % username, profile_page, 'video count')) PAGE_SIZE = 25 diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index 117f0856a..39caf60f2 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -229,7 +229,7 @@ class YahooSearchIE(SearchInfoExtractor): for pagenum in itertools.count(0): result_url = 'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (compat_urllib_parse.quote_plus(query), pagenum * 30) info = self._download_json(result_url, query, - note='Downloading results page '+str(pagenum+1)) + note='Downloading results page ' + str(pagenum + 1)) m = info['m'] results = info['results'] diff --git a/youtube_dl/extractor/ynet.py b/youtube_dl/extractor/ynet.py index 9cd7989cc..7b621a9e3 100644 --- a/youtube_dl/extractor/ynet.py +++ b/youtube_dl/extractor/ynet.py @@ -47,4 +47,4 @@ class YnetIE(InfoExtractor): 'title': title, 'formats': self._extract_f4m_formats(f4m_url, video_id), 'thumbnail': self._og_search_thumbnail(webpage), - }
\ No newline at end of file + } diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index 48d47a245..830ae6cd9 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -35,21 +35,21 @@ class YoukuIE(InfoExtractor): def _gen_sid(self): nowTime = int(time.time() * 1000) - random1 = random.randint(1000,1998) - random2 = random.randint(1000,9999) + random1 = random.randint(1000, 1998) + random2 = random.randint(1000, 9999) - return "%d%d%d" %(nowTime,random1,random2) + return "%d%d%d" % (nowTime, random1, random2) def _get_file_ID_mix_string(self, seed): mixed = [] source = list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890") seed = float(seed) for i in range(len(source)): - seed = (seed * 211 + 30031) % 65536 - index = math.floor(seed / 65536 * len(source)) + seed = (seed * 211 + 30031) % 65536 + index = math.floor(seed / 65536 * len(source)) mixed.append(source[int(index)]) source.remove(source[int(index)]) - #return ''.join(mixed) + # return ''.join(mixed) return mixed def _get_file_id(self, fileId, seed): @@ -100,12 +100,12 @@ class YoukuIE(InfoExtractor): keys = [s['k'] for s in config['data'][0]['segs'][format]] # segs is usually a dictionary, but an empty *list* if an error occured. - files_info=[] + files_info = [] sid = self._gen_sid() fileid = self._get_file_id(fileid, seed) - #column 8,9 of fileid represent the segment number - #fileid[7:9] should be changed + # column 8,9 of fileid represent the segment number + # fileid[7:9] should be changed for index, key in enumerate(keys): temp_fileid = '%s%02X%s' % (fileid[0:8], index, fileid[10:]) download_url = 'http://k.youku.com/player/getFlvPath/sid/%s_%02X/st/flv/fileid/%s?k=%s' % (sid, index, temp_fileid, key) diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index 7bfda45e7..ee61e2381 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -73,7 +73,7 @@ class YouPornIE(InfoExtractor): for encrypted_link in encrypted_links: link = aes_decrypt_text(encrypted_link, video_title, 32).decode('utf-8') links.append(link) - + formats = [] for link in links: # A link looks like this: @@ -104,7 +104,7 @@ class YouPornIE(InfoExtractor): if not formats: raise ExtractorError(u'ERROR: no known formats available for video') - + return { 'id': video_id, 'uploader': video_uploader, diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index b4bbaf902..45c85ed01 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -33,6 +33,7 @@ from ..utils import ( uppercase_escape, ) + class YoutubeBaseInfoExtractor(InfoExtractor): """Provide base functions for Youtube extractors""" _LOGIN_URL = 'https://accounts.google.com/ServiceLogin' @@ -76,30 +77,30 @@ class YoutubeBaseInfoExtractor(InfoExtractor): # Log in login_form_strs = { - 'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1', - 'Email': username, - 'GALX': galx, - 'Passwd': password, - - 'PersistentCookie': 'yes', - '_utf8': '霱', - 'bgresponse': 'js_disabled', - 'checkConnection': '', - 'checkedDomains': 'youtube', - 'dnConn': '', - 'pstMsg': '0', - 'rmShown': '1', - 'secTok': '', - 'signIn': 'Sign in', - 'timeStmp': '', - 'service': 'youtube', - 'uilel': '3', - 'hl': 'en_US', + 'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1', + 'Email': username, + 'GALX': galx, + 'Passwd': password, + + 'PersistentCookie': 'yes', + '_utf8': '霱', + 'bgresponse': 'js_disabled', + 'checkConnection': '', + 'checkedDomains': 'youtube', + 'dnConn': '', + 'pstMsg': '0', + 'rmShown': '1', + 'secTok': '', + 'signIn': 'Sign in', + 'timeStmp': '', + 'service': 'youtube', + 'uilel': '3', + 'hl': 'en_US', } # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode # chokes on unicode - login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items()) + login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items()) login_data = compat_urllib_parse.urlencode(login_form).encode('ascii') req = compat_urllib_request.Request(self._LOGIN_URL, login_data) @@ -149,7 +150,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): 'service': 'youtube', 'hl': 'en_US', } - tfa_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in tfa_form_strs.items()) + tfa_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in tfa_form_strs.items()) tfa_data = compat_urllib_parse.urlencode(tfa_form).encode('ascii') tfa_req = compat_urllib_request.Request(self._TWOFACTOR_URL, tfa_data) @@ -180,8 +181,10 @@ class YoutubeBaseInfoExtractor(InfoExtractor): 'next_url': '/', 'action_confirm': 'Confirm', } - req = compat_urllib_request.Request(self._AGE_URL, - compat_urllib_parse.urlencode(age_form).encode('ascii')) + req = compat_urllib_request.Request( + self._AGE_URL, + compat_urllib_parse.urlencode(age_form).encode('ascii') + ) self._download_webpage( req, None, @@ -491,7 +494,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): def gen_sig_code(idxs): def _genslice(start, end, step): starts = '' if start == 0 else str(start) - ends = (':%d' % (end+step)) if end + step >= 0 else ':' + ends = (':%d' % (end + step)) if end + step >= 0 else ':' steps = '' if step == 1 else (':%d' % step) return 's[%s%s%s]' % (starts, ends, steps) @@ -529,7 +532,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): def _parse_sig_js(self, jscode): funcname = self._search_regex( r'\.sig\|\|([a-zA-Z0-9]+)\(', jscode, - 'Initial JS player signature function name') + 'Initial JS player signature function name') jsi = JSInterpreter(jscode) initial_function = jsi.extract_function(funcname) @@ -618,7 +621,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): list_url = caption_url + '&' + list_params caption_list = self._download_xml(list_url, video_id) original_lang_node = caption_list.find('track') - if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' : + if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr': self._downloader.report_warning('Video doesn\'t have automatic captions') return {} original_lang = original_lang_node.attrib['lang_code'] @@ -651,10 +654,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): def _extract_from_m3u8(self, manifest_url, video_id): url_map = {} + def _get_urls(_manifest): lines = _manifest.split('\n') urls = filter(lambda l: l and not l.startswith('#'), - lines) + lines) return urls manifest = self._download_webpage(manifest_url, video_id, 'Downloading formats manifest') formats_urls = _get_urls(manifest) @@ -721,10 +725,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): age_gate = False for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: video_info_url = (proto + '://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' - % (video_id, el_type)) + % (video_id, el_type)) video_info_webpage = self._download_webpage(video_info_url, video_id, - note=False, - errnote='unable to download video info webpage') + note=False, + errnote='unable to download video info webpage') video_info = compat_parse_qs(video_info_webpage) if 'token' in video_info: break @@ -850,7 +854,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): # annotations video_annotations = None if self._downloader.params.get('writeannotations', False): - video_annotations = self._extract_annotations(video_id) + video_annotations = self._extract_annotations(video_id) # Decide which formats to download try: @@ -900,7 +904,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): 'player_url': player_url, }] elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1: - encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts',[''])[0] + encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0] if 'rtmpe%3Dyes' in encoded_url_map: raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True) url_map = {} @@ -974,6 +978,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): dash_manifest_url = video_info.get('dashmpd')[0] else: dash_manifest_url = ytplayer_config['args']['dashmpd'] + def decrypt_sig(mobj): s = mobj.group(1) dec_s = self._decrypt_signature(s, video_id, player_url, age_gate) @@ -1014,25 +1019,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): self._sort_formats(formats) return { - 'id': video_id, - 'uploader': video_uploader, - 'uploader_id': video_uploader_id, - 'upload_date': upload_date, - 'title': video_title, - 'thumbnail': video_thumbnail, - 'description': video_description, - 'categories': video_categories, - 'subtitles': video_subtitles, - 'duration': video_duration, - 'age_limit': 18 if age_gate else 0, - 'annotations': video_annotations, + 'id': video_id, + 'uploader': video_uploader, + 'uploader_id': video_uploader_id, + 'upload_date': upload_date, + 'title': video_title, + 'thumbnail': video_thumbnail, + 'description': video_description, + 'categories': video_categories, + 'subtitles': video_subtitles, + 'duration': video_duration, + 'age_limit': 18 if age_gate else 0, + 'annotations': video_annotations, 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id, - 'view_count': view_count, + 'view_count': view_count, 'like_count': like_count, 'dislike_count': dislike_count, - 'formats': formats, + 'formats': formats, } + class YoutubePlaylistIE(YoutubeBaseInfoExtractor): IE_DESC = 'YouTube.com playlists' _VALID_URL = r"""(?x)(?: @@ -1046,7 +1052,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): ) ( (?:PL|LL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,} - # Top tracks, they can also include dots + # Top tracks, they can also include dots |(?:MC)[\w\.]* ) .* @@ -1163,7 +1169,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): return self._extract_mix(playlist_id) if playlist_id.startswith('TL'): raise ExtractorError('For downloading YouTube.com top lists, use ' - 'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True) + 'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True) url = self._TEMPLATE_URL % playlist_id page = self._download_webpage(url, playlist_id) @@ -1230,7 +1236,7 @@ class YoutubeTopListIE(YoutubePlaylistIE): <span[^>]*>.*?%s.*?</span>''' % re.escape(query), channel_page, 'list') url = compat_urlparse.urljoin('https://www.youtube.com/', link) - + video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"' ids = [] # sometimes the webpage doesn't contain the videos @@ -1298,7 +1304,7 @@ class YoutubeChannelIE(InfoExtractor): ids_in_page = self.extract_videos_from_page(page['content_html']) video_ids.extend(ids_in_page) - + if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']: break @@ -1333,8 +1339,10 @@ class YoutubeUserIE(InfoExtractor): # Don't return True if the url can be extracted with other youtube # extractor, the regex would is too permissive and it would match. other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls) - if any(ie.suitable(url) for ie in other_ies): return False - else: return super(YoutubeUserIE, cls).suitable(url) + if any(ie.suitable(url) for ie in other_ies): + return False + else: + return super(YoutubeUserIE, cls).suitable(url) def _real_extract(self, url): # Extract username @@ -1540,8 +1548,8 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor): paging = 0 for i in itertools.count(1): info = self._download_json(self._FEED_TEMPLATE % paging, - '%s feed' % self._FEED_NAME, - 'Downloading page %s' % i) + '%s feed' % self._FEED_NAME, + 'Downloading page %s' % i) feed_html = info.get('feed_html') or info.get('content_html') load_more_widget_html = info.get('load_more_widget_html') or feed_html m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html) @@ -1557,12 +1565,14 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor): paging = mobj.group('paging') return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE) + class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor): IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)' _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?' _FEED_NAME = 'recommended' _PLAYLIST_TITLE = 'Youtube Recommended videos' + class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor): IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)' _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater' @@ -1570,6 +1580,7 @@ class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor): _PLAYLIST_TITLE = 'Youtube Watch Later' _PERSONAL_FEED = True + class YoutubeHistoryIE(YoutubeFeedsInfoExtractor): IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)' _VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory' @@ -1577,6 +1588,7 @@ class YoutubeHistoryIE(YoutubeFeedsInfoExtractor): _PERSONAL_FEED = True _PLAYLIST_TITLE = 'Youtube Watch History' + class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): IE_NAME = 'youtube:favorites' IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)' |