diff options
-rw-r--r-- | youtube_dl/extractor/channel9.py | 70 | ||||
-rw-r--r-- | youtube_dl/extractor/smotri.py | 247 |
2 files changed, 156 insertions, 161 deletions
diff --git a/youtube_dl/extractor/channel9.py b/youtube_dl/extractor/channel9.py index 574881b70..3867d7850 100644 --- a/youtube_dl/extractor/channel9.py +++ b/youtube_dl/extractor/channel9.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +from __future__ import unicode_literals import re @@ -11,38 +11,38 @@ class Channel9IE(InfoExtractor): The type of provided URL (video or playlist) is determined according to meta Search.PageType from web page HTML rather than URL itself, as it is - not always possible to do. + not always possible to do. ''' - IE_DESC = u'Channel 9' - IE_NAME = u'channel9' + IE_DESC = 'Channel 9' + IE_NAME = 'channel9' _VALID_URL = r'^https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?' _TESTS = [ { - u'url': u'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002', - u'file': u'Events_TechEd_Australia_2013_KOS002.mp4', - u'md5': u'bbd75296ba47916b754e73c3a4bbdf10', - u'info_dict': { - u'title': u'Developer Kick-Off Session: Stuff We Love', - u'description': u'md5:c08d72240b7c87fcecafe2692f80e35f', - u'duration': 4576, - u'thumbnail': u'http://media.ch9.ms/ch9/9d51/03902f2d-fc97-4d3c-b195-0bfe15a19d51/KOS002_220.jpg', - u'session_code': u'KOS002', - u'session_day': u'Day 1', - u'session_room': u'Arena 1A', - u'session_speakers': [ u'Ed Blankenship', u'Andrew Coates', u'Brady Gaster', u'Patrick Klug', u'Mads Kristensen' ], + 'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002', + 'file': 'Events_TechEd_Australia_2013_KOS002.mp4', + 'md5': 'bbd75296ba47916b754e73c3a4bbdf10', + 'info_dict': { + 'title': 'Developer Kick-Off Session: Stuff We Love', + 'description': 'md5:c08d72240b7c87fcecafe2692f80e35f', + 'duration': 4576, + 'thumbnail': 'http://media.ch9.ms/ch9/9d51/03902f2d-fc97-4d3c-b195-0bfe15a19d51/KOS002_220.jpg', + 'session_code': 'KOS002', + 'session_day': 'Day 1', + 'session_room': 'Arena 1A', + 'session_speakers': [ 'Ed Blankenship', 'Andrew Coates', 'Brady Gaster', 'Patrick Klug', 'Mads Kristensen' ], }, }, { - u'url': u'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing', - u'file': u'posts_Self-service-BI-with-Power-BI-nuclear-testing.mp4', - u'md5': u'b43ee4529d111bc37ba7ee4f34813e68', - u'info_dict': { - u'title': u'Self-service BI with Power BI - nuclear testing', - u'description': u'md5:d1e6ecaafa7fb52a2cacdf9599829f5b', - u'duration': 1540, - u'thumbnail': u'http://media.ch9.ms/ch9/87e1/0300391f-a455-4c72-bec3-4422f19287e1/selfservicenuk_512.jpg', - u'authors': [ u'Mike Wilmot' ], + 'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing', + 'file': 'posts_Self-service-BI-with-Power-BI-nuclear-testing.mp4', + 'md5': 'b43ee4529d111bc37ba7ee4f34813e68', + 'info_dict': { + 'title': 'Self-service BI with Power BI - nuclear testing', + 'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b', + 'duration': 1540, + 'thumbnail': 'http://media.ch9.ms/ch9/87e1/0300391f-a455-4c72-bec3-4422f19287e1/selfservicenuk_512.jpg', + 'authors': [ 'Mike Wilmot' ], }, } ] @@ -60,7 +60,7 @@ class Channel9IE(InfoExtractor): return 0 units = m.group('units') try: - exponent = [u'B', u'KB', u'MB', u'GB', u'TB', u'PB', u'EB', u'ZB', u'YB'].index(units.upper()) + exponent = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB'].index(units.upper()) except ValueError: return 0 size = float(m.group('size')) @@ -80,7 +80,7 @@ class Channel9IE(InfoExtractor): 'url': x.group('url'), 'format_id': x.group('quality'), 'format_note': x.group('note'), - 'format': u'%s (%s)' % (x.group('quality'), x.group('note')), + 'format': '%s (%s)' % (x.group('quality'), x.group('note')), 'filesize': self._restore_bytes(x.group('filesize')), # File size is approximate 'preference': self._known_formats.index(x.group('quality')), 'vcodec': 'none' if x.group('note') == 'Audio only' else None, @@ -91,10 +91,10 @@ class Channel9IE(InfoExtractor): return formats def _extract_title(self, html): - title = self._html_search_meta(u'title', html, u'title') + title = self._html_search_meta('title', html, 'title') if title is None: title = self._og_search_title(html) - TITLE_SUFFIX = u' (Channel 9)' + TITLE_SUFFIX = ' (Channel 9)' if title is not None and title.endswith(TITLE_SUFFIX): title = title[:-len(TITLE_SUFFIX)] return title @@ -110,7 +110,7 @@ class Channel9IE(InfoExtractor): m = re.search(DESCRIPTION_REGEX, html) if m is not None: return m.group('description') - return self._html_search_meta(u'description', html, u'description') + return self._html_search_meta('description', html, 'description') def _extract_duration(self, html): m = re.search(r'data-video_duration="(?P<hours>\d{2}):(?P<minutes>\d{2}):(?P<seconds>\d{2})"', html) @@ -172,7 +172,7 @@ class Channel9IE(InfoExtractor): # Nothing to download if len(formats) == 0 and slides is None and zip_ is None: - self._downloader.report_warning(u'None of recording, slides or zip are available for %s' % content_path) + self._downloader.report_warning('None of recording, slides or zip are available for %s' % content_path) return # Extract meta @@ -244,7 +244,7 @@ class Channel9IE(InfoExtractor): return contents def _extract_list(self, content_path): - rss = self._download_xml(self._RSS_URL % content_path, content_path, u'Downloading RSS') + rss = self._download_xml(self._RSS_URL % content_path, content_path, 'Downloading RSS') entries = [self.url_result(session_url.text, 'Channel9') for session_url in rss.findall('./channel/item/link')] title_text = rss.find('./channel/title').text @@ -254,11 +254,11 @@ class Channel9IE(InfoExtractor): mobj = re.match(self._VALID_URL, url) content_path = mobj.group('contentpath') - webpage = self._download_webpage(url, content_path, u'Downloading web page') + webpage = self._download_webpage(url, content_path, 'Downloading web page') page_type_m = re.search(r'<meta name="Search.PageType" content="(?P<pagetype>[^"]+)"/>', webpage) if page_type_m is None: - raise ExtractorError(u'Search.PageType not found, don\'t know how to process this page', expected=True) + raise ExtractorError('Search.PageType not found, don\'t know how to process this page', expected=True) page_type = page_type_m.group('pagetype') if page_type == 'List': # List page, may contain list of 'item'-like objects @@ -268,4 +268,4 @@ class Channel9IE(InfoExtractor): elif page_type == 'Session': # Event session page, may contain downloadable content return self._extract_session(webpage, content_path) else: - raise ExtractorError(u'Unexpected Search.PageType %s' % page_type, expected=True)
\ No newline at end of file + raise ExtractorError('Unexpected Search.PageType %s' % page_type, expected=True)
\ No newline at end of file diff --git a/youtube_dl/extractor/smotri.py b/youtube_dl/extractor/smotri.py index 99f5b19d2..f249f013c 100644 --- a/youtube_dl/extractor/smotri.py +++ b/youtube_dl/extractor/smotri.py @@ -1,4 +1,5 @@ # encoding: utf-8 +from __future__ import unicode_literals import os.path import re @@ -16,76 +17,76 @@ from ..utils import ( class SmotriIE(InfoExtractor): - IE_DESC = u'Smotri.com' - IE_NAME = u'smotri' + IE_DESC = 'Smotri.com' + IE_NAME = 'smotri' _VALID_URL = r'^https?://(?:www\.)?(?P<url>smotri\.com/video/view/\?id=(?P<videoid>v(?P<realvideoid>[0-9]+)[a-z0-9]{4}))' _TESTS = [ # real video id 2610366 { - u'url': u'http://smotri.com/video/view/?id=v261036632ab', - u'file': u'v261036632ab.mp4', - u'md5': u'2a7b08249e6f5636557579c368040eb9', - u'info_dict': { - u'title': u'катастрофа с камер видеонаблюдения', - u'uploader': u'rbc2008', - u'uploader_id': u'rbc08', - u'upload_date': u'20131118', - u'description': u'катастрофа с камер видеонаблюдения, видео катастрофа с камер видеонаблюдения', - u'thumbnail': u'http://frame6.loadup.ru/8b/a9/2610366.3.3.jpg', + 'url': 'http://smotri.com/video/view/?id=v261036632ab', + 'file': 'v261036632ab.mp4', + 'md5': '2a7b08249e6f5636557579c368040eb9', + 'info_dict': { + 'title': 'катастрофа с камер видеонаблюдения', + 'uploader': 'rbc2008', + 'uploader_id': 'rbc08', + 'upload_date': '20131118', + 'description': 'катастрофа с камер видеонаблюдения, видео катастрофа с камер видеонаблюдения', + 'thumbnail': 'http://frame6.loadup.ru/8b/a9/2610366.3.3.jpg', }, }, # real video id 57591 { - u'url': u'http://smotri.com/video/view/?id=v57591cb20', - u'file': u'v57591cb20.flv', - u'md5': u'830266dfc21f077eac5afd1883091bcd', - u'info_dict': { - u'title': u'test', - u'uploader': u'Support Photofile@photofile', - u'uploader_id': u'support-photofile', - u'upload_date': u'20070704', - u'description': u'test, видео test', - u'thumbnail': u'http://frame4.loadup.ru/03/ed/57591.2.3.jpg', + 'url': 'http://smotri.com/video/view/?id=v57591cb20', + 'file': 'v57591cb20.flv', + 'md5': '830266dfc21f077eac5afd1883091bcd', + 'info_dict': { + 'title': 'test', + 'uploader': 'Support Photofile@photofile', + 'uploader_id': 'support-photofile', + 'upload_date': '20070704', + 'description': 'test, видео test', + 'thumbnail': 'http://frame4.loadup.ru/03/ed/57591.2.3.jpg', }, }, # video-password { - u'url': u'http://smotri.com/video/view/?id=v1390466a13c', - u'file': u'v1390466a13c.mp4', - u'md5': u'f6331cef33cad65a0815ee482a54440b', - u'info_dict': { - u'title': u'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1', - u'uploader': u'timoxa40', - u'uploader_id': u'timoxa40', - u'upload_date': u'20100404', - u'thumbnail': u'http://frame7.loadup.ru/af/3f/1390466.3.3.jpg', - u'description': u'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1, видео TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1', + 'url': 'http://smotri.com/video/view/?id=v1390466a13c', + 'file': 'v1390466a13c.mp4', + 'md5': 'f6331cef33cad65a0815ee482a54440b', + 'info_dict': { + 'title': 'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1', + 'uploader': 'timoxa40', + 'uploader_id': 'timoxa40', + 'upload_date': '20100404', + 'thumbnail': 'http://frame7.loadup.ru/af/3f/1390466.3.3.jpg', + 'description': 'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1, видео TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1', }, - u'params': { - u'videopassword': u'qwerty', + 'params': { + 'videopassword': 'qwerty', }, }, # age limit + video-password { - u'url': u'http://smotri.com/video/view/?id=v15408898bcf', - u'file': u'v15408898bcf.flv', - u'md5': u'91e909c9f0521adf5ee86fbe073aad70', - u'info_dict': { - u'title': u'этот ролик не покажут по ТВ', - u'uploader': u'zzxxx', - u'uploader_id': u'ueggb', - u'upload_date': u'20101001', - u'thumbnail': u'http://frame3.loadup.ru/75/75/1540889.1.3.jpg', - u'age_limit': 18, - u'description': u'этот ролик не покажут по ТВ, видео этот ролик не покажут по ТВ', + 'url': 'http://smotri.com/video/view/?id=v15408898bcf', + 'file': 'v15408898bcf.flv', + 'md5': '91e909c9f0521adf5ee86fbe073aad70', + 'info_dict': { + 'title': 'этот ролик не покажут по ТВ', + 'uploader': 'zzxxx', + 'uploader_id': 'ueggb', + 'upload_date': '20101001', + 'thumbnail': 'http://frame3.loadup.ru/75/75/1540889.1.3.jpg', + 'age_limit': 18, + 'description': 'этот ролик не покажут по ТВ, видео этот ролик не покажут по ТВ', }, - u'params': { - u'videopassword': u'333' + 'params': { + 'videopassword': '333' } } ] - + _SUCCESS = 0 _PASSWORD_NOT_VERIFIED = 1 _PASSWORD_DETECTED = 2 @@ -106,71 +107,71 @@ class SmotriIE(InfoExtractor): # Download video JSON data video_json_url = 'http://smotri.com/vt.php?id=%s' % real_video_id - video_json_page = self._download_webpage(video_json_url, video_id, u'Downloading video JSON') + video_json_page = self._download_webpage(video_json_url, video_id, 'Downloading video JSON') video_json = json.loads(video_json_page) - + status = video_json['status'] if status == self._VIDEO_NOT_FOUND: - raise ExtractorError(u'Video %s does not exist' % video_id, expected=True) - elif status == self._PASSWORD_DETECTED: # The video is protected by a password, retry with + raise ExtractorError('Video %s does not exist' % video_id, expected=True) + elif status == self._PASSWORD_DETECTED: # The video is protected by a password, retry with # video-password set video_password = self._downloader.params.get('videopassword', None) if not video_password: - raise ExtractorError(u'This video is protected by a password, use the --video-password option', expected=True) + raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True) video_json_url += '&md5pass=%s' % hashlib.md5(video_password.encode('utf-8')).hexdigest() - video_json_page = self._download_webpage(video_json_url, video_id, u'Downloading video JSON (video-password set)') + video_json_page = self._download_webpage(video_json_url, video_id, 'Downloading video JSON (video-password set)') video_json = json.loads(video_json_page) status = video_json['status'] if status == self._PASSWORD_NOT_VERIFIED: - raise ExtractorError(u'Video password is invalid', expected=True) - + raise ExtractorError('Video password is invalid', expected=True) + if status != self._SUCCESS: - raise ExtractorError(u'Unexpected status value %s' % status) - + raise ExtractorError('Unexpected status value %s' % status) + # Extract the URL of the video video_url = video_json['file_data'] - + # Video JSON does not provide enough meta data # We will extract some from the video web page instead video_page_url = 'http://' + mobj.group('url') - video_page = self._download_webpage(video_page_url, video_id, u'Downloading video page') + video_page = self._download_webpage(video_page_url, video_id, 'Downloading video page') # Warning if video is unavailable warning = self._html_search_regex( r'<div class="videoUnModer">(.*?)</div>', video_page, - u'warning message', default=None) + 'warning message', default=None) if warning is not None: self._downloader.report_warning( - u'Video %s may not be available; smotri said: %s ' % + 'Video %s may not be available; smotri said: %s ' % (video_id, warning)) # Adult content - if re.search(u'EroConfirmText">', video_page) is not None: + if re.search('EroConfirmText">', video_page) is not None: self.report_age_confirmation() confirm_string = self._html_search_regex( r'<a href="/video/view/\?id=%s&confirm=([^"]+)" title="[^"]+">' % video_id, - video_page, u'confirm string') + video_page, 'confirm string') confirm_url = video_page_url + '&confirm=%s' % confirm_string - video_page = self._download_webpage(confirm_url, video_id, u'Downloading video page (age confirmed)') + video_page = self._download_webpage(confirm_url, video_id, 'Downloading video page (age confirmed)') adult_content = True else: adult_content = False - + # Extract the rest of meta data - video_title = self._search_meta(u'name', video_page, u'title') + video_title = self._search_meta('name', video_page, 'title') if not video_title: video_title = os.path.splitext(url_basename(video_url))[0] - video_description = self._search_meta(u'description', video_page) - END_TEXT = u' на сайте Smotri.com' + video_description = self._search_meta('description', video_page) + END_TEXT = ' на сайте Smotri.com' if video_description and video_description.endswith(END_TEXT): video_description = video_description[:-len(END_TEXT)] - START_TEXT = u'Смотреть онлайн ролик ' + START_TEXT = 'Смотреть онлайн ролик ' if video_description and video_description.startswith(START_TEXT): video_description = video_description[len(START_TEXT):] - video_thumbnail = self._search_meta(u'thumbnail', video_page) + video_thumbnail = self._search_meta('thumbnail', video_page) - upload_date_str = self._search_meta(u'uploadDate', video_page, u'upload date') + upload_date_str = self._search_meta('uploadDate', video_page, 'upload date') if upload_date_str: upload_date_m = re.search(r'(?P<year>\d{4})\.(?P<month>\d{2})\.(?P<day>\d{2})T', upload_date_str) video_upload_date = ( @@ -183,8 +184,8 @@ class SmotriIE(InfoExtractor): ) else: video_upload_date = None - - duration_str = self._search_meta(u'duration', video_page) + + duration_str = self._search_meta('duration', video_page) if duration_str: duration_m = re.search(r'T(?P<hours>[0-9]{2})H(?P<minutes>[0-9]{2})M(?P<seconds>[0-9]{2})S', duration_str) video_duration = ( @@ -197,19 +198,19 @@ class SmotriIE(InfoExtractor): ) else: video_duration = None - + video_uploader = self._html_search_regex( - u'<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info[^"]+">(.*?)</a>', - video_page, u'uploader', fatal=False, flags=re.MULTILINE|re.DOTALL) - + '<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info[^"]+">(.*?)</a>', + video_page, 'uploader', fatal=False, flags=re.MULTILINE|re.DOTALL) + video_uploader_id = self._html_search_regex( - u'<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info\\(.*?\'([^\']+)\'\\);">', - video_page, u'uploader id', fatal=False, flags=re.MULTILINE|re.DOTALL) - + '<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info\\(.*?\'([^\']+)\'\\);">', + video_page, 'uploader id', fatal=False, flags=re.MULTILINE|re.DOTALL) + video_view_count = self._html_search_regex( - u'Общее количество просмотров.*?<span class="Number">(\\d+)</span>', - video_page, u'view count', fatal=False, flags=re.MULTILINE|re.DOTALL) - + 'Общее количество просмотров.*?<span class="Number">(\\d+)</span>', + video_page, 'view count', fatal=False, flags=re.MULTILINE|re.DOTALL) + return { 'id': video_id, 'url': video_url, @@ -227,8 +228,8 @@ class SmotriIE(InfoExtractor): class SmotriCommunityIE(InfoExtractor): - IE_DESC = u'Smotri.com community videos' - IE_NAME = u'smotri:community' + IE_DESC = 'Smotri.com community videos' + IE_NAME = 'smotri:community' _VALID_URL = r'^https?://(?:www\.)?smotri\.com/community/video/(?P<communityid>[0-9A-Za-z_\'-]+)' def _real_extract(self, url): @@ -236,21 +237,21 @@ class SmotriCommunityIE(InfoExtractor): community_id = mobj.group('communityid') url = 'http://smotri.com/export/rss/video/by/community/-/%s/video.xml' % community_id - rss = self._download_xml(url, community_id, u'Downloading community RSS') + rss = self._download_xml(url, community_id, 'Downloading community RSS') entries = [self.url_result(video_url.text, 'Smotri') for video_url in rss.findall('./channel/item/link')] description_text = rss.find('./channel/description').text community_title = self._html_search_regex( - u'^Видео сообщества "([^"]+)"$', description_text, u'community title') + '^Видео сообщества "([^"]+)"$', description_text, 'community title') return self.playlist_result(entries, community_id, community_title) class SmotriUserIE(InfoExtractor): - IE_DESC = u'Smotri.com user videos' - IE_NAME = u'smotri:user' + IE_DESC = 'Smotri.com user videos' + IE_NAME = 'smotri:user' _VALID_URL = r'^https?://(?:www\.)?smotri\.com/user/(?P<userid>[0-9A-Za-z_\'-]+)' def _real_extract(self, url): @@ -258,22 +259,22 @@ class SmotriUserIE(InfoExtractor): user_id = mobj.group('userid') url = 'http://smotri.com/export/rss/user/video/-/%s/video.xml' % user_id - rss = self._download_xml(url, user_id, u'Downloading user RSS') + rss = self._download_xml(url, user_id, 'Downloading user RSS') entries = [self.url_result(video_url.text, 'Smotri') for video_url in rss.findall('./channel/item/link')] description_text = rss.find('./channel/description').text user_nickname = self._html_search_regex( - u'^Видео режиссера (.*)$', description_text, - u'user nickname') + '^Видео режиссера (.*)$', description_text, + 'user nickname') return self.playlist_result(entries, user_id, user_nickname) class SmotriBroadcastIE(InfoExtractor): - IE_DESC = u'Smotri.com broadcasts' - IE_NAME = u'smotri:broadcast' + IE_DESC = 'Smotri.com broadcasts' + IE_NAME = 'smotri:broadcast' _VALID_URL = r'^https?://(?:www\.)?(?P<url>smotri\.com/live/(?P<broadcastid>[^/]+))/?.*' def _real_extract(self, url): @@ -281,46 +282,40 @@ class SmotriBroadcastIE(InfoExtractor): broadcast_id = mobj.group('broadcastid') broadcast_url = 'http://' + mobj.group('url') - broadcast_page = self._download_webpage(broadcast_url, broadcast_id, u'Downloading broadcast page') + broadcast_page = self._download_webpage(broadcast_url, broadcast_id, 'Downloading broadcast page') - if re.search(u'>Режиссер с логином <br/>"%s"<br/> <span>не существует<' % broadcast_id, broadcast_page) is not None: - raise ExtractorError(u'Broadcast %s does not exist' % broadcast_id, expected=True) + if re.search('>Режиссер с логином <br/>"%s"<br/> <span>не существует<' % broadcast_id, broadcast_page) is not None: + raise ExtractorError('Broadcast %s does not exist' % broadcast_id, expected=True) # Adult content - if re.search(u'EroConfirmText">', broadcast_page) is not None: + if re.search('EroConfirmText">', broadcast_page) is not None: (username, password) = self._get_login_info() if username is None: - raise ExtractorError(u'Erotic broadcasts allowed only for registered users, ' - u'use --username and --password options to provide account credentials.', expected=True) - - # Log in - login_form_strs = { - u'login-hint53': '1', - u'confirm_erotic': '1', - u'login': username, - u'password': password, + raise ExtractorError('Erotic broadcasts allowed only for registered users, ' + 'use --username and --password options to provide account credentials.', expected=True) + + login_form = { + 'login-hint53': '1', + 'confirm_erotic': '1', + 'login': username, + 'password': password, } - # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode - # chokes on unicode - login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items()) - login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8') - login_url = broadcast_url + '/?no_redirect=1' - request = compat_urllib_request.Request(login_url, login_data) + + request = compat_urllib_request.Request(broadcast_url + '/?no_redirect=1', compat_urllib_parse.urlencode(login_form)) request.add_header('Content-Type', 'application/x-www-form-urlencoded') - broadcast_page = self._download_webpage( - request, broadcast_id, note=u'Logging in and confirming age') + broadcast_page = self._download_webpage(request, broadcast_id, 'Logging in and confirming age') - if re.search(u'>Неверный логин или пароль<', broadcast_page) is not None: - raise ExtractorError(u'Unable to log in: bad username or password', expected=True) + if re.search('>Неверный логин или пароль<', broadcast_page) is not None: + raise ExtractorError('Unable to log in: bad username or password', expected=True) adult_content = True else: adult_content = False ticket = self._html_search_regex( - u'window\.broadcast_control\.addFlashVar\\(\'file\', \'([^\']+)\'\\);', - broadcast_page, u'broadcast ticket') + 'window\.broadcast_control\.addFlashVar\\(\'file\', \'([^\']+)\'\\);', + broadcast_page, 'broadcast ticket') url = 'http://smotri.com/broadcast/view/url/?ticket=%s' % ticket @@ -328,22 +323,22 @@ class SmotriBroadcastIE(InfoExtractor): if broadcast_password: url += '&pass=%s' % hashlib.md5(broadcast_password.encode('utf-8')).hexdigest() - broadcast_json_page = self._download_webpage(url, broadcast_id, u'Downloading broadcast JSON') + broadcast_json_page = self._download_webpage(url, broadcast_id, 'Downloading broadcast JSON') try: broadcast_json = json.loads(broadcast_json_page) protected_broadcast = broadcast_json['_pass_protected'] == 1 if protected_broadcast and not broadcast_password: - raise ExtractorError(u'This broadcast is protected by a password, use the --video-password option', expected=True) + raise ExtractorError('This broadcast is protected by a password, use the --video-password option', expected=True) broadcast_offline = broadcast_json['is_play'] == 0 if broadcast_offline: - raise ExtractorError(u'Broadcast %s is offline' % broadcast_id, expected=True) + raise ExtractorError('Broadcast %s is offline' % broadcast_id, expected=True) rtmp_url = broadcast_json['_server'] if not rtmp_url.startswith('rtmp://'): - raise ExtractorError(u'Unexpected broadcast rtmp URL') + raise ExtractorError('Unexpected broadcast rtmp URL') broadcast_playpath = broadcast_json['_streamName'] broadcast_thumbnail = broadcast_json['_imgURL'] @@ -354,8 +349,8 @@ class SmotriBroadcastIE(InfoExtractor): rtmp_conn = 'S:%s' % uuid.uuid4().hex except KeyError: if protected_broadcast: - raise ExtractorError(u'Bad broadcast password', expected=True) - raise ExtractorError(u'Unexpected broadcast JSON') + raise ExtractorError('Bad broadcast password', expected=True) + raise ExtractorError('Unexpected broadcast JSON') return { 'id': broadcast_id, |