diff options
Diffstat (limited to 'youtube_dl/extractor')
114 files changed, 425 insertions, 371 deletions
diff --git a/youtube_dl/extractor/allocine.py b/youtube_dl/extractor/allocine.py index 398e93bfb..623aeaf34 100644 --- a/youtube_dl/extractor/allocine.py +++ b/youtube_dl/extractor/allocine.py @@ -5,10 +5,9 @@ import re import json from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( - compat_str, qualities, - determine_ext, ) @@ -75,9 +74,7 @@ class AllocineIE(InfoExtractor): 'format_id': format_id, 'quality': quality(format_id), 'url': v, - 'ext': determine_ext(v), }) - self._sort_formats(formats) return { diff --git a/youtube_dl/extractor/aol.py b/youtube_dl/extractor/aol.py index 185ee3693..b51eafc45 100644 --- a/youtube_dl/extractor/aol.py +++ b/youtube_dl/extractor/aol.py @@ -68,4 +68,3 @@ class AolIE(InfoExtractor): 'title': title, 'entries': entries, } - diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py index 0c01fa1a1..7cd0482c7 100644 --- a/youtube_dl/extractor/appletrailers.py +++ b/youtube_dl/extractor/appletrailers.py @@ -4,8 +4,8 @@ import re import json from .common import InfoExtractor +from ..compat import compat_urlparse from ..utils import ( - compat_urlparse, int_or_none, ) diff --git a/youtube_dl/extractor/auengine.py b/youtube_dl/extractor/auengine.py index 1c765532a..014a21952 100644 --- a/youtube_dl/extractor/auengine.py +++ b/youtube_dl/extractor/auengine.py @@ -3,8 +3,8 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_urllib_parse from ..utils import ( - compat_urllib_parse, determine_ext, ExtractorError, ) diff --git a/youtube_dl/extractor/bambuser.py b/youtube_dl/extractor/bambuser.py index 1ca0b7cf2..98e1443ab 100644 --- a/youtube_dl/extractor/bambuser.py +++ b/youtube_dl/extractor/bambuser.py @@ -5,7 +5,7 @@ import json import itertools from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_request, ) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index acddbc8f1..9fb770cb1 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -4,9 +4,11 @@ import json import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_str, compat_urlparse, +) +from ..utils import ( ExtractorError, ) diff --git a/youtube_dl/extractor/bet.py b/youtube_dl/extractor/bet.py index c1fc433f7..003e50002 100644 --- a/youtube_dl/extractor/bet.py +++ b/youtube_dl/extractor/bet.py @@ -1,8 +1,8 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import compat_urllib_parse from ..utils import ( - compat_urllib_parse, xpath_text, xpath_with_ns, int_or_none, diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 0d5889f5d..241b904a9 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -4,8 +4,8 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_parse_qs from ..utils import ( - compat_parse_qs, ExtractorError, int_or_none, unified_strdate, @@ -29,10 +29,9 @@ class BiliBiliIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + video_code = self._search_regex( r'(?s)<div itemprop="video".*?>(.*?)</div>', webpage, 'video code') diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index bf18a97e0..1eca00470 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -6,20 +6,21 @@ import json import xml.etree.ElementTree from .common import InfoExtractor -from ..utils import ( - compat_urllib_parse, - find_xpath_attr, - fix_xml_ampersands, - compat_urlparse, - compat_str, - compat_urllib_request, +from ..compat import ( compat_parse_qs, + compat_str, + compat_urllib_parse, compat_urllib_parse_urlparse, - + compat_urllib_request, + compat_urlparse, +) +from ..utils import ( determine_ext, ExtractorError, - unsmuggle_url, + find_xpath_attr, + fix_xml_ampersands, unescapeHTML, + unsmuggle_url, ) diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index 97feb6704..2f866f3ef 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -4,10 +4,12 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_request, compat_urllib_parse, compat_urllib_parse_urlparse, +) +from ..utils import ( ExtractorError, ) diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index 2e3ef3fda..48e2410b6 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -3,9 +3,11 @@ from __future__ import unicode_literals import re from .mtv import MTVServicesInfoExtractor -from ..utils import ( +from ..compat import ( compat_str, compat_urllib_parse, +) +from ..utils import ( ExtractorError, float_or_none, unified_strdate, diff --git a/youtube_dl/extractor/condenast.py b/youtube_dl/extractor/condenast.py index 7a7e79360..3db4db4e4 100644 --- a/youtube_dl/extractor/condenast.py +++ b/youtube_dl/extractor/condenast.py @@ -5,12 +5,14 @@ import re import json from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, - orderedSet, compat_urllib_parse_urlparse, compat_urlparse, ) +from ..utils import ( + orderedSet, +) class CondeNastIE(InfoExtractor): diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index d7e2b841e..8f1ea02e7 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -10,10 +10,12 @@ import xml.etree.ElementTree from hashlib import sha1 from math import pow, sqrt, floor from .subtitles import SubtitlesInfoExtractor -from ..utils import ( - ExtractorError, +from ..compat import ( compat_urllib_parse, compat_urllib_request, +) +from ..utils import ( + ExtractorError, bytes_to_intlist, intlist_to_bytes, unified_strdate, diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 936c13cd6..cf5841a7c 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -8,13 +8,15 @@ import itertools from .common import InfoExtractor from .subtitles import SubtitlesInfoExtractor -from ..utils import ( - compat_urllib_request, +from ..compat import ( compat_str, + compat_urllib_request, +) +from ..utils import ( + ExtractorError, + int_or_none, orderedSet, str_to_int, - int_or_none, - ExtractorError, unescapeHTML, ) diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py index 45d66e2e6..c6b813f58 100644 --- a/youtube_dl/extractor/daum.py +++ b/youtube_dl/extractor/daum.py @@ -5,7 +5,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, ) diff --git a/youtube_dl/extractor/ehow.py b/youtube_dl/extractor/ehow.py index b766e17f2..9cb1bf301 100644 --- a/youtube_dl/extractor/ehow.py +++ b/youtube_dl/extractor/ehow.py @@ -1,8 +1,6 @@ from __future__ import unicode_literals -import re - -from ..utils import ( +from ..compat import ( compat_urllib_parse, ) from .common import InfoExtractor @@ -24,11 +22,10 @@ class EHowIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - video_url = self._search_regex(r'(?:file|source)=(http[^\'"&]*)', - webpage, 'video URL') + video_url = self._search_regex( + r'(?:file|source)=(http[^\'"&]*)', webpage, 'video URL') final_url = compat_urllib_parse.unquote(video_url) uploader = self._html_search_meta('uploader', webpage) title = self._og_search_title(webpage).replace(' | eHow', '') diff --git a/youtube_dl/extractor/eighttracks.py b/youtube_dl/extractor/eighttracks.py index f4c1e2a72..a30a1f330 100644 --- a/youtube_dl/extractor/eighttracks.py +++ b/youtube_dl/extractor/eighttracks.py @@ -6,7 +6,7 @@ import random import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_str, ) diff --git a/youtube_dl/extractor/escapist.py b/youtube_dl/extractor/escapist.py index 476fc22b9..e240cb859 100644 --- a/youtube_dl/extractor/escapist.py +++ b/youtube_dl/extractor/escapist.py @@ -3,9 +3,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, - +) +from ..utils import ( ExtractorError, ) diff --git a/youtube_dl/extractor/everyonesmixtape.py b/youtube_dl/extractor/everyonesmixtape.py index d237a8281..d872d828f 100644 --- a/youtube_dl/extractor/everyonesmixtape.py +++ b/youtube_dl/extractor/everyonesmixtape.py @@ -3,8 +3,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_request, +) +from ..utils import ( ExtractorError, ) diff --git a/youtube_dl/extractor/extremetube.py b/youtube_dl/extractor/extremetube.py index aacbf1414..36ba33128 100644 --- a/youtube_dl/extractor/extremetube.py +++ b/youtube_dl/extractor/extremetube.py @@ -3,16 +3,18 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse_urlparse, compat_urllib_request, compat_urllib_parse, +) +from ..utils import ( str_to_int, ) class ExtremeTubeIE(InfoExtractor): - _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>extremetube\.com/.*?video/.+?(?P<videoid>[0-9]+))(?:[/?&]|$)' + _VALID_URL = r'https?://(?:www\.)?(?P<url>extremetube\.com/.*?video/.+?(?P<id>[0-9]+))(?:[/?&]|$)' _TESTS = [{ 'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431', 'md5': '1fb9228f5e3332ec8c057d6ac36f33e0', @@ -31,7 +33,7 @@ class ExtremeTubeIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('videoid') + video_id = mobj.group('id') url = 'http://www.' + mobj.group('url') req = compat_urllib_request.Request(url) diff --git a/youtube_dl/extractor/fc2.py b/youtube_dl/extractor/fc2.py index 6f5d23559..81ceace53 100644 --- a/youtube_dl/extractor/fc2.py +++ b/youtube_dl/extractor/fc2.py @@ -1,19 +1,20 @@ #! -*- coding: utf-8 -*- from __future__ import unicode_literals -import re import hashlib from .common import InfoExtractor -from ..utils import ( - ExtractorError, +from ..compat import ( compat_urllib_request, compat_urlparse, ) +from ..utils import ( + ExtractorError, +) class FC2IE(InfoExtractor): - _VALID_URL = r'^http://video\.fc2\.com/((?P<lang>[^/]+)/)?content/(?P<id>[^/]+)' + _VALID_URL = r'^http://video\.fc2\.com/(?:[^/]+/)?content/(?P<id>[^/]+)' IE_NAME = 'fc2' _TEST = { 'url': 'http://video.fc2.com/en/content/20121103kUan1KHs', @@ -26,9 +27,7 @@ class FC2IE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) self._downloader.cookiejar.clear_session_cookies() # must clear diff --git a/youtube_dl/extractor/firedrive.py b/youtube_dl/extractor/firedrive.py index af439ccfe..3191116d9 100644 --- a/youtube_dl/extractor/firedrive.py +++ b/youtube_dl/extractor/firedrive.py @@ -4,11 +4,13 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( - ExtractorError, +from ..compat import ( compat_urllib_parse, compat_urllib_request, ) +from ..utils import ( + ExtractorError, +) class FiredriveIE(InfoExtractor): @@ -28,11 +30,8 @@ class FiredriveIE(InfoExtractor): }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) url = 'http://firedrive.com/file/%s' % video_id - webpage = self._download_webpage(url, video_id) if re.search(self._FILE_DELETED_REGEX, webpage) is not None: diff --git a/youtube_dl/extractor/fourtube.py b/youtube_dl/extractor/fourtube.py index b22ce2acb..7187e0752 100644 --- a/youtube_dl/extractor/fourtube.py +++ b/youtube_dl/extractor/fourtube.py @@ -3,12 +3,14 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_request, - unified_strdate, - str_to_int, - parse_duration, +) +from ..utils import ( clean_html, + parse_duration, + str_to_int, + unified_strdate, ) @@ -31,9 +33,7 @@ class FourTubeIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - - video_id = mobj.group('id') + video_id = self._match_id(url) webpage_url = 'http://www.4tube.com/videos/' + video_id webpage = self._download_webpage(webpage_url, video_id) diff --git a/youtube_dl/extractor/franceculture.py b/youtube_dl/extractor/franceculture.py index 898e0dda7..0c2972162 100644 --- a/youtube_dl/extractor/franceculture.py +++ b/youtube_dl/extractor/franceculture.py @@ -5,7 +5,7 @@ import json import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_parse_qs, compat_urlparse, ) diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index e0420a48f..bbc760a49 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -6,13 +6,15 @@ import re import json from .common import InfoExtractor -from ..utils import ( +from ..compat import ( + compat_urllib_parse_urlparse, compat_urlparse, - ExtractorError, +) +from ..utils import ( clean_html, - parse_duration, - compat_urllib_parse_urlparse, + ExtractorError, int_or_none, + parse_duration, ) diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py index d570e3f6a..47373e215 100644 --- a/youtube_dl/extractor/gamespot.py +++ b/youtube_dl/extractor/gamespot.py @@ -4,9 +4,11 @@ import re import json from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, compat_urlparse, +) +from ..utils import ( unescapeHTML, ) diff --git a/youtube_dl/extractor/gdcvault.py b/youtube_dl/extractor/gdcvault.py index de14ae1fb..d453ec010 100644 --- a/youtube_dl/extractor/gdcvault.py +++ b/youtube_dl/extractor/gdcvault.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, compat_urllib_request, ) diff --git a/youtube_dl/extractor/golem.py b/youtube_dl/extractor/golem.py index 53714f47f..2bfb99040 100644 --- a/youtube_dl/extractor/golem.py +++ b/youtube_dl/extractor/golem.py @@ -2,8 +2,10 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urlparse, +) +from ..utils import ( determine_ext, ) diff --git a/youtube_dl/extractor/googlesearch.py b/youtube_dl/extractor/googlesearch.py index 469e1f935..498304cb2 100644 --- a/youtube_dl/extractor/googlesearch.py +++ b/youtube_dl/extractor/googlesearch.py @@ -4,7 +4,7 @@ import itertools import re from .common import SearchInfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, ) diff --git a/youtube_dl/extractor/gorillavid.py b/youtube_dl/extractor/gorillavid.py index 1ac1da856..ae24aff84 100644 --- a/youtube_dl/extractor/gorillavid.py +++ b/youtube_dl/extractor/gorillavid.py @@ -4,11 +4,12 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( - ExtractorError, - determine_ext, +from ..compat import ( compat_urllib_parse, compat_urllib_request, +) +from ..utils import ( + ExtractorError, int_or_none, ) @@ -106,7 +107,6 @@ class GorillaVidIE(InfoExtractor): formats = [{ 'format_id': 'sd', 'url': video_url, - 'ext': determine_ext(video_url), 'quality': 1, }] diff --git a/youtube_dl/extractor/hostingbulk.py b/youtube_dl/extractor/hostingbulk.py index 8e812b669..704d0285d 100644 --- a/youtube_dl/extractor/hostingbulk.py +++ b/youtube_dl/extractor/hostingbulk.py @@ -4,9 +4,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import ( + compat_urllib_request, +) from ..utils import ( ExtractorError, - compat_urllib_request, int_or_none, urlencode_postdata, ) @@ -30,9 +32,7 @@ class HostingBulkIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) url = 'http://hostingbulk.com/{0:}.html'.format(video_id) # Custom request with cookie to set language to English, so our file diff --git a/youtube_dl/extractor/hypem.py b/youtube_dl/extractor/hypem.py index 6d0d847c6..aa0724a02 100644 --- a/youtube_dl/extractor/hypem.py +++ b/youtube_dl/extractor/hypem.py @@ -1,20 +1,20 @@ from __future__ import unicode_literals import json -import re import time from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, compat_urllib_request, - +) +from ..utils import ( ExtractorError, ) class HypemIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)' + _VALID_URL = r'http://(?:www\.)?hypem\.com/track/(?P<id>[^/]+)/' _TEST = { 'url': 'http://hypem.com/track/1v6ga/BODYWORK+-+TAME', 'md5': 'b9cc91b5af8995e9f0c1cee04c575828', @@ -27,8 +27,7 @@ class HypemIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - track_id = mobj.group(1) + track_id = self._match_id(url) data = {'ax': 1, 'ts': time.time()} data_encoded = compat_urllib_parse.urlencode(data) diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py index f2c1c10f5..13a53a0cb 100644 --- a/youtube_dl/extractor/imdb.py +++ b/youtube_dl/extractor/imdb.py @@ -4,7 +4,7 @@ import re import json from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urlparse, ) diff --git a/youtube_dl/extractor/infoq.py b/youtube_dl/extractor/infoq.py index e76dd222d..f25f43664 100644 --- a/youtube_dl/extractor/infoq.py +++ b/youtube_dl/extractor/infoq.py @@ -1,10 +1,9 @@ from __future__ import unicode_literals import base64 -import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, ) @@ -24,9 +23,7 @@ class InfoQIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) video_title = self._html_search_regex(r'<title>(.*?)</title>', webpage, 'title') diff --git a/youtube_dl/extractor/internetvideoarchive.py b/youtube_dl/extractor/internetvideoarchive.py index 1e4799187..c813d4b82 100644 --- a/youtube_dl/extractor/internetvideoarchive.py +++ b/youtube_dl/extractor/internetvideoarchive.py @@ -3,9 +3,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urlparse, compat_urllib_parse, +) +from ..utils import ( xpath_with_ns, ) diff --git a/youtube_dl/extractor/iprima.py b/youtube_dl/extractor/iprima.py index 4247d6391..8529bedfc 100644 --- a/youtube_dl/extractor/iprima.py +++ b/youtube_dl/extractor/iprima.py @@ -6,8 +6,10 @@ from random import random from math import floor from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_request, +) +from ..utils import ( ExtractorError, ) diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py index f0fba1adb..7a400323d 100644 --- a/youtube_dl/extractor/ivi.py +++ b/youtube_dl/extractor/ivi.py @@ -5,8 +5,10 @@ import re import json from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_request, +) +from ..utils import ( ExtractorError, ) diff --git a/youtube_dl/extractor/keezmovies.py b/youtube_dl/extractor/keezmovies.py index 75b63cffb..97dcb518a 100644 --- a/youtube_dl/extractor/keezmovies.py +++ b/youtube_dl/extractor/keezmovies.py @@ -4,7 +4,7 @@ import os import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse_urlparse, compat_urllib_request, compat_urllib_parse, @@ -15,7 +15,7 @@ from ..aes import ( class KeezMoviesIE(InfoExtractor): - _VALID_URL = r'^https?://(?:www\.)?keezmovies\.com/video/.+?(?P<videoid>[0-9]+)(?:[/?&]|$)' + _VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/.+?(?P<id>[0-9]+)(?:[/?&]|$)' _TEST = { 'url': 'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711', 'file': '1214711.mp4', @@ -27,8 +27,7 @@ class KeezMoviesIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('videoid') + video_id = self._match_id(url) req = compat_urllib_request.Request(url) req.add_header('Cookie', 'age_verified=1') diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py index 03c4691c6..5247c6f58 100644 --- a/youtube_dl/extractor/livestream.py +++ b/youtube_dl/extractor/livestream.py @@ -4,10 +4,12 @@ import re import json from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_str, compat_urllib_parse_urlparse, compat_urlparse, +) +from ..utils import ( ExtractorError, find_xpath_attr, int_or_none, diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index 2160d6cb0..26e84970d 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -5,12 +5,14 @@ import json from .subtitles import SubtitlesInfoExtractor from .common import InfoExtractor -from ..utils import ( +from ..compat import ( + compat_str, compat_urllib_parse, compat_urllib_request, +) +from ..utils import ( ExtractorError, int_or_none, - compat_str, ) diff --git a/youtube_dl/extractor/malemotion.py b/youtube_dl/extractor/malemotion.py index 1abf6e4f8..0b85a59d1 100644 --- a/youtube_dl/extractor/malemotion.py +++ b/youtube_dl/extractor/malemotion.py @@ -1,43 +1,33 @@ +# coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, ) class MalemotionIE(InfoExtractor): - _VALID_URL = r'^(?:https?://)?malemotion\.com/video/(.+?)\.(?P<id>.+?)(#|$)' + _VALID_URL = r'https?://malemotion\.com/video/(.+?)\.(?P<id>.+?)(#|$)' _TEST = { - 'url': 'http://malemotion.com/video/bien-dur.10ew', - 'file': '10ew.mp4', - 'md5': 'b3cc49f953b107e4a363cdff07d100ce', + 'url': 'http://malemotion.com/video/bete-de-concours.ltc', + 'md5': '3013e53a0afbde2878bc39998c33e8a5', 'info_dict': { - "title": "Bien dur", - "age_limit": 18, + 'id': 'ltc', + 'ext': 'mp4', + 'title': 'Bête de Concours', + 'age_limit': 18, }, - 'skip': 'This video has been deleted.' } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group("id") - + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - self.report_extraction(video_id) - - # Extract video URL - video_url = compat_urllib_parse.unquote( - self._search_regex(r'<source type="video/mp4" src="(.+?)"', webpage, 'video URL')) - - # Extract title + video_url = compat_urllib_parse.unquote(self._search_regex( + r'<source type="video/mp4" src="(.+?)"', webpage, 'video URL')) video_title = self._html_search_regex( r'<title>(.*?)</title', webpage, 'title') - - # Extract video thumbnail video_thumbnail = self._search_regex( r'<video .+?poster="(.+?)"', webpage, 'thumbnail', fatal=False) @@ -47,14 +37,12 @@ class MalemotionIE(InfoExtractor): 'format_id': 'mp4', 'preference': 1, }] + self._sort_formats(formats) return { 'id': video_id, 'formats': formats, - 'uploader': None, - 'upload_date': None, 'title': video_title, 'thumbnail': video_thumbnail, - 'description': None, 'age_limit': 18, } diff --git a/youtube_dl/extractor/metacafe.py b/youtube_dl/extractor/metacafe.py index 858c1c0c3..8bc333b02 100644 --- a/youtube_dl/extractor/metacafe.py +++ b/youtube_dl/extractor/metacafe.py @@ -3,10 +3,12 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_parse_qs, compat_urllib_parse, compat_urllib_request, +) +from ..utils import ( determine_ext, ExtractorError, int_or_none, diff --git a/youtube_dl/extractor/mit.py b/youtube_dl/extractor/mit.py index 807b1dc89..d354702d1 100644 --- a/youtube_dl/extractor/mit.py +++ b/youtube_dl/extractor/mit.py @@ -5,8 +5,10 @@ import json from .common import InfoExtractor from .youtube import YoutubeIE -from ..utils import ( +from ..compat import ( compat_urlparse, +) +from ..utils import ( clean_html, ExtractorError, get_element_by_id, diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py index 6691521e5..256758323 100644 --- a/youtube_dl/extractor/mitele.py +++ b/youtube_dl/extractor/mitele.py @@ -1,12 +1,13 @@ from __future__ import unicode_literals -import re import json from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, compat_urlparse, +) +from ..utils import ( get_element_by_attribute, parse_duration, strip_jsonp, @@ -15,7 +16,7 @@ from ..utils import ( class MiTeleIE(InfoExtractor): IE_NAME = 'mitele.es' - _VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<episode>[^/]+)/' + _VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/' _TEST = { 'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/', @@ -31,12 +32,10 @@ class MiTeleIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - episode = mobj.group('episode') + episode = self._match_id(url) webpage = self._download_webpage(url, episode) embed_data_json = self._search_regex( - r'MSV\.embedData\[.*?\]\s*=\s*({.*?});', webpage, 'embed data', - flags=re.DOTALL + r'(?s)MSV\.embedData\[.*?\]\s*=\s*({.*?});', webpage, 'embed data', ).replace('\'', '"') embed_data = json.loads(embed_data_json) diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 55cc33a3e..07d194562 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -3,8 +3,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, +) +from ..utils import ( ExtractorError, HEADRequest, int_or_none, diff --git a/youtube_dl/extractor/moevideo.py b/youtube_dl/extractor/moevideo.py index 2ff79b9b8..184f9c2c9 100644 --- a/youtube_dl/extractor/moevideo.py +++ b/youtube_dl/extractor/moevideo.py @@ -5,10 +5,12 @@ import json import re from .common import InfoExtractor -from ..utils import ( - ExtractorError, +from ..compat import ( compat_urllib_parse, compat_urllib_request, +) +from ..utils import ( + ExtractorError, int_or_none, ) diff --git a/youtube_dl/extractor/mofosex.py b/youtube_dl/extractor/mofosex.py index d658647e6..2cec12d35 100644 --- a/youtube_dl/extractor/mofosex.py +++ b/youtube_dl/extractor/mofosex.py @@ -4,7 +4,7 @@ import os import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse_urlparse, compat_urllib_request, compat_urllib_parse, @@ -12,7 +12,7 @@ from ..utils import ( class MofosexIE(InfoExtractor): - _VALID_URL = r'^https?://(?:www\.)?(?P<url>mofosex\.com/videos/(?P<videoid>[0-9]+)/.*?\.html)' + _VALID_URL = r'https?://(?:www\.)?(?P<url>mofosex\.com/videos/(?P<id>[0-9]+)/.*?\.html)' _TEST = { 'url': 'http://www.mofosex.com/videos/5018/japanese-teen-music-video.html', 'md5': '1b2eb47ac33cc75d4a80e3026b613c5a', @@ -26,7 +26,7 @@ class MofosexIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('videoid') + video_id = mobj.group('id') url = 'http://www.' + mobj.group('url') req = compat_urllib_request.Request(url) diff --git a/youtube_dl/extractor/moniker.py b/youtube_dl/extractor/moniker.py index 1c4f589cc..5de719bdc 100644 --- a/youtube_dl/extractor/moniker.py +++ b/youtube_dl/extractor/moniker.py @@ -5,7 +5,7 @@ import os.path import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, compat_urllib_request, ) @@ -37,10 +37,9 @@ class MonikerIE(InfoExtractor): }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) orig_webpage = self._download_webpage(url, video_id) + fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage) data = dict(fields) diff --git a/youtube_dl/extractor/mooshare.py b/youtube_dl/extractor/mooshare.py index 34a4bec3a..9f2853fa3 100644 --- a/youtube_dl/extractor/mooshare.py +++ b/youtube_dl/extractor/mooshare.py @@ -4,11 +4,13 @@ import re import time from .common import InfoExtractor -from ..utils import ( - ExtractorError, +from ..compat import ( compat_urllib_request, compat_urllib_parse, ) +from ..utils import ( + ExtractorError, +) class MooshareIE(InfoExtractor): @@ -43,9 +45,7 @@ class MooshareIE(InfoExtractor): ] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) page = self._download_webpage(url, video_id, 'Downloading page') if re.search(r'>Video Not Found or Deleted<', page) is not None: diff --git a/youtube_dl/extractor/motorsport.py b/youtube_dl/extractor/motorsport.py index 7c0ec6a12..f5ca74e97 100644 --- a/youtube_dl/extractor/motorsport.py +++ b/youtube_dl/extractor/motorsport.py @@ -3,13 +3,14 @@ from __future__ import unicode_literals import hashlib import json -import re import time from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_parse_qs, compat_str, +) +from ..utils import ( int_or_none, ) @@ -32,10 +33,9 @@ class MotorsportIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - display_id = mobj.group('id') - + display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) + flashvars_code = self._html_search_regex( r'<embed id="player".*?flashvars="([^"]+)"', webpage, 'flashvars') flashvars = compat_parse_qs(flashvars_code) diff --git a/youtube_dl/extractor/movieclips.py b/youtube_dl/extractor/movieclips.py index 456807dd1..04e17d055 100644 --- a/youtube_dl/extractor/movieclips.py +++ b/youtube_dl/extractor/movieclips.py @@ -3,9 +3,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import ( + compat_str, +) from ..utils import ( ExtractorError, - compat_str, clean_html, ) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index b482d6d4d..5ebc78033 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -3,9 +3,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, compat_urllib_request, +) +from ..utils import ( ExtractorError, find_xpath_attr, fix_xml_ampersands, diff --git a/youtube_dl/extractor/myspass.py b/youtube_dl/extractor/myspass.py index 51e540814..5b9b9fbcd 100644 --- a/youtube_dl/extractor/myspass.py +++ b/youtube_dl/extractor/myspass.py @@ -2,9 +2,10 @@ from __future__ import unicode_literals import os.path from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse_urlparse, - +) +from ..utils import ( ExtractorError, ) diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py index fbe34defd..c10405f04 100644 --- a/youtube_dl/extractor/naver.py +++ b/youtube_dl/extractor/naver.py @@ -4,8 +4,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, +) +from ..utils import ( ExtractorError, clean_html, ) @@ -26,9 +28,9 @@ class NaverIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group(1) + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"', webpage) if m_id is None: diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index bf5132721..690c46b6a 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -4,8 +4,10 @@ import re import json from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_str, +) +from ..utils import ( ExtractorError, find_xpath_attr, ) diff --git a/youtube_dl/extractor/nfb.py b/youtube_dl/extractor/nfb.py index 7ce1d481d..ea077254b 100644 --- a/youtube_dl/extractor/nfb.py +++ b/youtube_dl/extractor/nfb.py @@ -1,9 +1,7 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_request, compat_urllib_parse, ) @@ -12,7 +10,7 @@ from ..utils import ( class NFBIE(InfoExtractor): IE_NAME = 'nfb' IE_DESC = 'National Film Board of Canada' - _VALID_URL = r'https?://(?:www\.)?(nfb|onf)\.ca/film/(?P<id>[\da-z_-]+)' + _VALID_URL = r'https?://(?:www\.)?(?:nfb|onf)\.ca/film/(?P<id>[\da-z_-]+)' _TEST = { 'url': 'https://www.nfb.ca/film/qallunaat_why_white_people_are_funny', @@ -32,10 +30,10 @@ class NFBIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - - page = self._download_webpage('https://www.nfb.ca/film/%s' % video_id, video_id, 'Downloading film page') + video_id = self._match_id(url) + page = self._download_webpage( + 'https://www.nfb.ca/film/%s' % video_id, video_id, + 'Downloading film page') uploader_id = self._html_search_regex(r'<a class="director-link" href="/explore-all-directors/([^/]+)/"', page, 'director id', fatal=False) diff --git a/youtube_dl/extractor/nfl.py b/youtube_dl/extractor/nfl.py index cc7c921c3..606e2294e 100644 --- a/youtube_dl/extractor/nfl.py +++ b/youtube_dl/extractor/nfl.py @@ -4,9 +4,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import ( + compat_urllib_parse_urlparse, +) from ..utils import ( ExtractorError, - compat_urllib_parse_urlparse, int_or_none, remove_end, ) diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index 1d9c1a096..4c1890416 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -5,14 +5,16 @@ import re import json from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, compat_urllib_request, compat_urlparse, - unified_strdate, - parse_duration, - int_or_none, +) +from ..utils import ( ExtractorError, + int_or_none, + parse_duration, + unified_strdate, ) diff --git a/youtube_dl/extractor/noco.py b/youtube_dl/extractor/noco.py index 7d2ff7b9a..251e6da07 100644 --- a/youtube_dl/extractor/noco.py +++ b/youtube_dl/extractor/noco.py @@ -6,13 +6,15 @@ import time import hashlib from .common import InfoExtractor -from ..utils import ( - compat_urllib_request, +from ..compat import ( + compat_str, compat_urllib_parse, - ExtractorError, + compat_urllib_request, +) +from ..utils import ( clean_html, + ExtractorError, unified_strdate, - compat_str, ) diff --git a/youtube_dl/extractor/nosvideo.py b/youtube_dl/extractor/nosvideo.py index f3be8f552..f5ef856db 100644 --- a/youtube_dl/extractor/nosvideo.py +++ b/youtube_dl/extractor/nosvideo.py @@ -4,9 +4,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import ( + compat_urllib_request, +) from ..utils import ( ExtractorError, - compat_urllib_request, urlencode_postdata, xpath_text, xpath_with_ns, @@ -32,8 +34,7 @@ class NosVideoIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) fields = { 'id': video_id, diff --git a/youtube_dl/extractor/novamov.py b/youtube_dl/extractor/novamov.py index 38d05e466..04d779890 100644 --- a/youtube_dl/extractor/novamov.py +++ b/youtube_dl/extractor/novamov.py @@ -3,9 +3,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import ( + compat_urlparse, +) from ..utils import ( ExtractorError, - compat_urlparse ) diff --git a/youtube_dl/extractor/nuvid.py b/youtube_dl/extractor/nuvid.py index 449c8a6a3..57928f2ae 100644 --- a/youtube_dl/extractor/nuvid.py +++ b/youtube_dl/extractor/nuvid.py @@ -3,15 +3,17 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import ( + compat_urllib_request, +) from ..utils import ( parse_duration, unified_strdate, - compat_urllib_request, ) class NuvidIE(InfoExtractor): - _VALID_URL = r'^https?://(?:www|m)\.nuvid\.com/video/(?P<id>[0-9]+)' + _VALID_URL = r'https?://(?:www|m)\.nuvid\.com/video/(?P<id>[0-9]+)' _TEST = { 'url': 'http://m.nuvid.com/video/1310741/', 'md5': 'eab207b7ac4fccfb4e23c86201f11277', @@ -26,8 +28,7 @@ class NuvidIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) formats = [] diff --git a/youtube_dl/extractor/photobucket.py b/youtube_dl/extractor/photobucket.py index b4389e0b6..c66db3cdc 100644 --- a/youtube_dl/extractor/photobucket.py +++ b/youtube_dl/extractor/photobucket.py @@ -4,16 +4,17 @@ import json import re from .common import InfoExtractor -from ..utils import compat_urllib_parse +from ..compat import compat_urllib_parse class PhotobucketIE(InfoExtractor): _VALID_URL = r'http://(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))' _TEST = { 'url': 'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0', - 'file': 'zpsc0c3b9fa.mp4', 'md5': '7dabfb92b0a31f6c16cebc0f8e60ff99', 'info_dict': { + 'id': 'zpsc0c3b9fa', + 'ext': 'mp4', 'timestamp': 1367669341, 'upload_date': '20130504', 'uploader': 'rachaneronas', diff --git a/youtube_dl/extractor/played.py b/youtube_dl/extractor/played.py index 17880471d..449d4836c 100644 --- a/youtube_dl/extractor/played.py +++ b/youtube_dl/extractor/played.py @@ -5,11 +5,13 @@ import re import os.path from .common import InfoExtractor -from ..utils import ( - ExtractorError, +from ..compat import ( compat_urllib_parse, compat_urllib_request, ) +from ..utils import ( + ExtractorError, +) class PlayedIE(InfoExtractor): @@ -28,7 +30,6 @@ class PlayedIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - orig_webpage = self._download_webpage(url, video_id) m_error = re.search( diff --git a/youtube_dl/extractor/playfm.py b/youtube_dl/extractor/playfm.py index ebc046804..9576aed0e 100644 --- a/youtube_dl/extractor/playfm.py +++ b/youtube_dl/extractor/playfm.py @@ -4,9 +4,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, compat_urllib_request, +) +from ..utils import ( ExtractorError, float_or_none, int_or_none, diff --git a/youtube_dl/extractor/playvid.py b/youtube_dl/extractor/playvid.py index cd3905acb..c3e667e9e 100644 --- a/youtube_dl/extractor/playvid.py +++ b/youtube_dl/extractor/playvid.py @@ -3,31 +3,31 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import ( + compat_urllib_parse, +) from ..utils import ( - ExtractorError, clean_html, - compat_urllib_parse, + ExtractorError, ) class PlayvidIE(InfoExtractor): - _VALID_URL = r'^https?://www\.playvid\.com/watch(\?v=|/)(?P<id>.+?)(?:#|$)' + _VALID_URL = r'https?://www\.playvid\.com/watch(\?v=|/)(?P<id>.+?)(?:#|$)' _TEST = { - 'url': 'http://www.playvid.com/watch/agbDDi7WZTV', - 'md5': '44930f8afa616efdf9482daf4fe53e1e', + 'url': 'http://www.playvid.com/watch/RnmBNgtrrJu', + 'md5': 'ffa2f6b2119af359f544388d8c01eb6c', 'info_dict': { - 'id': 'agbDDi7WZTV', + 'id': 'RnmBNgtrrJu', 'ext': 'mp4', - 'title': 'Michelle Lewin in Miami Beach', - 'duration': 240, + 'title': 'md5:9256d01c6317e3f703848b5906880dc8', + 'duration': 82, 'age_limit': 18, } } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) m_error = re.search( diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 2ca15b717..634142d0d 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -4,10 +4,12 @@ import os import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( + compat_urllib_parse, compat_urllib_parse_urlparse, compat_urllib_request, - compat_urllib_parse, +) +from ..utils import ( str_to_int, ) from ..aes import ( @@ -16,7 +18,7 @@ from ..aes import ( class PornHubIE(InfoExtractor): - _VALID_URL = r'^https?://(?:www\.)?pornhub\.com/view_video\.php\?viewkey=(?P<id>[0-9a-f]+)' + _VALID_URL = r'https?://(?:www\.)?pornhub\.com/view_video\.php\?viewkey=(?P<id>[0-9a-f]+)' _TEST = { 'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015', 'md5': '882f488fa1f0026f023f33576004a2ed', diff --git a/youtube_dl/extractor/promptfile.py b/youtube_dl/extractor/promptfile.py index 7fcde086c..f536e6e6c 100644 --- a/youtube_dl/extractor/promptfile.py +++ b/youtube_dl/extractor/promptfile.py @@ -4,12 +4,14 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( - ExtractorError, - determine_ext, +from ..compat import ( compat_urllib_parse, compat_urllib_request, ) +from ..utils import ( + determine_ext, + ExtractorError, +) class PromptFileIE(InfoExtractor): diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 1262793c8..385681d06 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -5,8 +5,10 @@ import re from hashlib import sha1 from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, +) +from ..utils import ( unified_strdate, ) diff --git a/youtube_dl/extractor/quickvid.py b/youtube_dl/extractor/quickvid.py index 3bc78060d..af7d76cf4 100644 --- a/youtube_dl/extractor/quickvid.py +++ b/youtube_dl/extractor/quickvid.py @@ -3,8 +3,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urlparse, +) +from ..utils import ( determine_ext, int_or_none, ) diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py index 2d39ecfe4..aa26b7e0b 100644 --- a/youtube_dl/extractor/rai.py +++ b/youtube_dl/extractor/rai.py @@ -3,10 +3,12 @@ from __future__ import unicode_literals import re from .subtitles import SubtitlesInfoExtractor +from ..compat import ( + compat_urllib_parse, +) from ..utils import ( parse_duration, unified_strdate, - compat_urllib_parse, ) diff --git a/youtube_dl/extractor/rts.py b/youtube_dl/extractor/rts.py index dc59a5e5c..5e84c1098 100644 --- a/youtube_dl/extractor/rts.py +++ b/youtube_dl/extractor/rts.py @@ -4,12 +4,14 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import ( + compat_str, +) from ..utils import ( int_or_none, parse_duration, parse_iso8601, unescapeHTML, - compat_str, ) diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py index 6941d96fb..b72b5a586 100644 --- a/youtube_dl/extractor/rutube.py +++ b/youtube_dl/extractor/rutube.py @@ -5,10 +5,12 @@ import re import itertools from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_str, - unified_strdate, +) +from ..utils import ( ExtractorError, + unified_strdate, ) @@ -36,9 +38,7 @@ class RutubeIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) video = self._download_json( 'http://rutube.ru/api/video/%s/?format=json' % video_id, video_id, 'Downloading video JSON') @@ -114,8 +114,7 @@ class RutubeMovieIE(RutubeChannelIE): _PAGE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/video?page=%s&format=json' def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - movie_id = mobj.group('id') + movie_id = self._match_id(url) movie = self._download_json( self._MOVIE_TEMPLATE % movie_id, movie_id, 'Downloading movie JSON') diff --git a/youtube_dl/extractor/screencast.py b/youtube_dl/extractor/screencast.py index c145f6fc7..dfd897ba3 100644 --- a/youtube_dl/extractor/screencast.py +++ b/youtube_dl/extractor/screencast.py @@ -1,14 +1,14 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals -import re - from .common import InfoExtractor -from ..utils import ( - ExtractorError, +from ..compat import ( compat_parse_qs, compat_urllib_request, ) +from ..utils import ( + ExtractorError, +) class ScreencastIE(InfoExtractor): @@ -57,8 +57,7 @@ class ScreencastIE(InfoExtractor): ] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) video_url = self._html_search_regex( diff --git a/youtube_dl/extractor/shared.py b/youtube_dl/extractor/shared.py index fdc31603a..26ced716e 100644 --- a/youtube_dl/extractor/shared.py +++ b/youtube_dl/extractor/shared.py @@ -4,10 +4,12 @@ import re import base64 from .common import InfoExtractor +from ..compat import ( + compat_urllib_parse, + compat_urllib_request, +) from ..utils import ( ExtractorError, - compat_urllib_request, - compat_urllib_parse, int_or_none, ) @@ -26,26 +28,30 @@ class SharedIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - - page = self._download_webpage(url, video_id) - - if re.search(r'>File does not exist<', page) is not None: - raise ExtractorError('Video %s does not exist' % video_id, expected=True) + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) - download_form = dict(re.findall(r'<input type="hidden" name="([^"]+)" value="([^"]*)"', page)) + if '>File does not exist<' in webpage: + raise ExtractorError( + 'Video %s does not exist' % video_id, expected=True) - request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(download_form)) + download_form = dict(re.findall( + r'<input type="hidden" name="([^"]+)" value="([^"]*)"', webpage)) + request = compat_urllib_request.Request( + url, compat_urllib_parse.urlencode(download_form)) request.add_header('Content-Type', 'application/x-www-form-urlencoded') - video_page = self._download_webpage(request, video_id, 'Downloading video page') + video_page = self._download_webpage( + request, video_id, 'Downloading video page') - video_url = self._html_search_regex(r'data-url="([^"]+)"', video_page, 'video URL') - title = base64.b64decode(self._html_search_meta('full:title', page, 'title')).decode('utf-8') - filesize = int_or_none(self._html_search_meta('full:size', page, 'file size', fatal=False)) + video_url = self._html_search_regex( + r'data-url="([^"]+)"', video_page, 'video URL') + title = base64.b64decode(self._html_search_meta( + 'full:title', webpage, 'title')).decode('utf-8') + filesize = int_or_none(self._html_search_meta( + 'full:size', webpage, 'file size', fatal=False)) thumbnail = self._html_search_regex( - r'data-poster="([^"]+)"', video_page, 'thumbnail', fatal=False, default=None) + r'data-poster="([^"]+)"', video_page, 'thumbnail', default=None) return { 'id': video_id, diff --git a/youtube_dl/extractor/sharesix.py b/youtube_dl/extractor/sharesix.py index 7531e8325..ac3e3adf2 100644 --- a/youtube_dl/extractor/sharesix.py +++ b/youtube_dl/extractor/sharesix.py @@ -4,9 +4,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, compat_urllib_request, +) +from ..utils import ( parse_duration, ) diff --git a/youtube_dl/extractor/sina.py b/youtube_dl/extractor/sina.py index 5eadbb7ea..a63d126d4 100644 --- a/youtube_dl/extractor/sina.py +++ b/youtube_dl/extractor/sina.py @@ -4,7 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_request, compat_urllib_parse, ) diff --git a/youtube_dl/extractor/slideshare.py b/youtube_dl/extractor/slideshare.py index 5864b9936..e7d776e7b 100644 --- a/youtube_dl/extractor/slideshare.py +++ b/youtube_dl/extractor/slideshare.py @@ -4,8 +4,10 @@ import re import json from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urlparse, +) +from ..utils import ( ExtractorError, ) diff --git a/youtube_dl/extractor/smotri.py b/youtube_dl/extractor/smotri.py index 646af3cc9..d031fe401 100644 --- a/youtube_dl/extractor/smotri.py +++ b/youtube_dl/extractor/smotri.py @@ -7,9 +7,11 @@ import hashlib import uuid from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, compat_urllib_request, +) +from ..utils import ( ExtractorError, int_or_none, unified_strdate, diff --git a/youtube_dl/extractor/sockshare.py b/youtube_dl/extractor/sockshare.py index c663e56d4..7d3c0e937 100644 --- a/youtube_dl/extractor/sockshare.py +++ b/youtube_dl/extractor/sockshare.py @@ -1,13 +1,16 @@ # coding: utf-8 from __future__ import unicode_literals -from ..utils import ( - ExtractorError, +import re + +from ..compat import ( compat_urllib_parse, compat_urllib_request, +) +from ..utils import ( determine_ext, + ExtractorError, ) -import re from .common import InfoExtractor @@ -27,9 +30,7 @@ class SockshareIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) url = 'http://sockshare.com/file/%s' % video_id webpage = self._download_webpage(url, video_id) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index ab9483d2d..5d60c4939 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -5,11 +5,12 @@ import re import itertools from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_str, compat_urlparse, compat_urllib_parse, - +) +from ..utils import ( ExtractorError, int_or_none, unified_strdate, diff --git a/youtube_dl/extractor/spankwire.py b/youtube_dl/extractor/spankwire.py index 94602e89e..b936202f6 100644 --- a/youtube_dl/extractor/spankwire.py +++ b/youtube_dl/extractor/spankwire.py @@ -3,12 +3,14 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( + compat_urllib_parse, compat_urllib_parse_urlparse, compat_urllib_request, - compat_urllib_parse, - unified_strdate, +) +from ..utils import ( str_to_int, + unified_strdate, ) from ..aes import aes_decrypt_text diff --git a/youtube_dl/extractor/sportdeutschland.py b/youtube_dl/extractor/sportdeutschland.py index 057ef5251..2f57f5b7c 100644 --- a/youtube_dl/extractor/sportdeutschland.py +++ b/youtube_dl/extractor/sportdeutschland.py @@ -4,8 +4,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_request, +) +from ..utils import ( parse_iso8601, ) diff --git a/youtube_dl/extractor/streamcloud.py b/youtube_dl/extractor/streamcloud.py index c1178f26d..38176498d 100644 --- a/youtube_dl/extractor/streamcloud.py +++ b/youtube_dl/extractor/streamcloud.py @@ -5,7 +5,7 @@ import re import time from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, compat_urllib_request, ) diff --git a/youtube_dl/extractor/streamcz.py b/youtube_dl/extractor/streamcz.py index 778704839..c3ceb5f76 100644 --- a/youtube_dl/extractor/streamcz.py +++ b/youtube_dl/extractor/streamcz.py @@ -1,13 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals -import re -import json - from .common import InfoExtractor -from ..compat import ( - compat_str, -) from ..utils import ( int_or_none, ) diff --git a/youtube_dl/extractor/tapely.py b/youtube_dl/extractor/tapely.py index 283e11350..f1f43d0a7 100644 --- a/youtube_dl/extractor/tapely.py +++ b/youtube_dl/extractor/tapely.py @@ -4,10 +4,12 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import ( + compat_urllib_request, +) from ..utils import ( - ExtractorError, clean_html, - compat_urllib_request, + ExtractorError, float_or_none, parse_iso8601, ) diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index 72160503c..944177426 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -5,7 +5,7 @@ import re from .subtitles import SubtitlesInfoExtractor -from ..utils import ( +from ..compat import ( compat_str, ) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index e2653d62d..af6ef0033 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -4,8 +4,10 @@ import re import json from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_str, +) +from ..utils import ( determine_ext, ExtractorError, xpath_with_ns, diff --git a/youtube_dl/extractor/tlc.py b/youtube_dl/extractor/tlc.py index 66d159e99..9f9e388c5 100644 --- a/youtube_dl/extractor/tlc.py +++ b/youtube_dl/extractor/tlc.py @@ -5,7 +5,7 @@ import re from .common import InfoExtractor from .brightcove import BrightcoveIE from .discovery import DiscoveryIE -from ..utils import compat_urlparse +from ..compat import compat_urlparse class TlcIE(DiscoveryIE): diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py index 64a1e9030..d73ad3762 100644 --- a/youtube_dl/extractor/tube8.py +++ b/youtube_dl/extractor/tube8.py @@ -4,9 +4,11 @@ import json import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse_urlparse, compat_urllib_request, +) +from ..utils import ( int_or_none, str_to_int, ) diff --git a/youtube_dl/extractor/tutv.py b/youtube_dl/extractor/tutv.py index d516b6427..4de0aac52 100644 --- a/youtube_dl/extractor/tutv.py +++ b/youtube_dl/extractor/tutv.py @@ -1,10 +1,9 @@ from __future__ import unicode_literals import base64 -import re from .common import InfoExtractor -from ..utils import compat_parse_qs +from ..compat import compat_parse_qs class TutvIE(InfoExtractor): @@ -20,10 +19,9 @@ class TutvIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + internal_id = self._search_regex(r'codVideo=([0-9]+)', webpage, 'internal video ID') data_content = self._download_webpage( diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 397d167e8..715f9930c 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -5,9 +5,11 @@ import itertools import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, compat_urllib_request, +) +from ..utils import ( ExtractorError, parse_iso8601, ) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 5271611ac..4667ed83b 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -3,9 +3,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, compat_urllib_request, +) +from ..utils import ( ExtractorError, ) diff --git a/youtube_dl/extractor/urort.py b/youtube_dl/extractor/urort.py index 249a34c86..8872cfcb2 100644 --- a/youtube_dl/extractor/urort.py +++ b/youtube_dl/extractor/urort.py @@ -1,11 +1,11 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, +) +from ..utils import ( unified_strdate, ) diff --git a/youtube_dl/extractor/ustream.py b/youtube_dl/extractor/ustream.py index 53dc3a496..68d03b999 100644 --- a/youtube_dl/extractor/ustream.py +++ b/youtube_dl/extractor/ustream.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urlparse, ) diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py index 455b6d9da..dd026748d 100644 --- a/youtube_dl/extractor/vbox7.py +++ b/youtube_dl/extractor/vbox7.py @@ -1,19 +1,18 @@ # encoding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, compat_urllib_request, - +) +from ..utils import ( ExtractorError, ) class Vbox7IE(InfoExtractor): - _VALID_URL = r'http://(www\.)?vbox7\.com/play:(?P<id>[^/]+)' + _VALID_URL = r'http://(?:www\.)?vbox7\.com/play:(?P<id>[^/]+)' _TEST = { 'url': 'http://vbox7.com/play:249bb972c2', 'md5': '99f65c0c9ef9b682b97313e052734c3f', @@ -25,8 +24,7 @@ class Vbox7IE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) redirect_page, urlh = self._download_webpage_handle(url, video_id) new_location = self._search_regex(r'window\.location = \'(.*)\';', diff --git a/youtube_dl/extractor/veehd.py b/youtube_dl/extractor/veehd.py index 94647d1c8..815f58468 100644 --- a/youtube_dl/extractor/veehd.py +++ b/youtube_dl/extractor/veehd.py @@ -4,10 +4,12 @@ import re import json from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urlparse, - get_element_by_id, +) +from ..utils import ( clean_html, + get_element_by_id, ) @@ -26,8 +28,7 @@ class VeeHDIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) # VeeHD seems to send garbage on the first request. # See https://github.com/rg3/youtube-dl/issues/2102 diff --git a/youtube_dl/extractor/veoh.py b/youtube_dl/extractor/veoh.py index a7953a7e7..01e258e32 100644 --- a/youtube_dl/extractor/veoh.py +++ b/youtube_dl/extractor/veoh.py @@ -4,8 +4,10 @@ import re import json from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_request, +) +from ..utils import ( int_or_none, ExtractorError, ) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index c912c3cbe..43f6b029d 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -4,8 +4,10 @@ import re import xml.etree.ElementTree from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_request, +) +from ..utils import ( ExtractorError, ) diff --git a/youtube_dl/extractor/videodetective.py b/youtube_dl/extractor/videodetective.py index ac6c25537..0ffc7ff7d 100644 --- a/youtube_dl/extractor/videodetective.py +++ b/youtube_dl/extractor/videodetective.py @@ -1,10 +1,8 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor +from ..compat import compat_urlparse from .internetvideoarchive import InternetVideoArchiveIE -from ..utils import compat_urlparse class VideoDetectiveIE(InfoExtractor): @@ -17,13 +15,12 @@ class VideoDetectiveIE(InfoExtractor): 'ext': 'mp4', 'title': 'KICK-ASS 2', 'description': 'md5:65ba37ad619165afac7d432eaded6013', - 'duration': 135, + 'duration': 138, }, } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) og_video = self._og_search_video_url(webpage) query = compat_urlparse.urlparse(og_video).query diff --git a/youtube_dl/extractor/videomega.py b/youtube_dl/extractor/videomega.py index 29c4e0101..7a78f0d26 100644 --- a/youtube_dl/extractor/videomega.py +++ b/youtube_dl/extractor/videomega.py @@ -1,11 +1,11 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, +) +from ..utils import ( remove_start, ) @@ -27,9 +27,7 @@ class VideoMegaIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) url = 'http://videomega.tv/iframe.php?ref={0:}'.format(video_id) webpage = self._download_webpage(url, video_id) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index ca6b0d5b3..542e9198a 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -5,14 +5,17 @@ import re import json from .common import InfoExtractor +from ..compat import ( + compat_str, + compat_urllib_parse, + compat_urllib_request, +) from ..utils import ( ExtractorError, - compat_urllib_request, - compat_urllib_parse, - compat_str, + orderedSet, unescapeHTML, unified_strdate, - orderedSet) +) class VKIE(InfoExtractor): diff --git a/youtube_dl/extractor/vodlocker.py b/youtube_dl/extractor/vodlocker.py index affef6507..1c0966a79 100644 --- a/youtube_dl/extractor/vodlocker.py +++ b/youtube_dl/extractor/vodlocker.py @@ -2,8 +2,9 @@ from __future__ import unicode_literals import re + from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, compat_urllib_request, ) @@ -24,8 +25,7 @@ class VodlockerIE(InfoExtractor): }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) fields = dict(re.findall(r'''(?x)<input\s+ diff --git a/youtube_dl/extractor/vube.py b/youtube_dl/extractor/vube.py index 1b2f731e9..405cb9db4 100644 --- a/youtube_dl/extractor/vube.py +++ b/youtube_dl/extractor/vube.py @@ -3,9 +3,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import ( + compat_str, +) from ..utils import ( int_or_none, - compat_str, ExtractorError, ) diff --git a/youtube_dl/extractor/vuclip.py b/youtube_dl/extractor/vuclip.py index ec3c010ad..c3fde53f5 100644 --- a/youtube_dl/extractor/vuclip.py +++ b/youtube_dl/extractor/vuclip.py @@ -3,8 +3,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse_urlparse, +) +from ..utils import ( ExtractorError, parse_duration, qualities, @@ -25,10 +27,9 @@ class VuClipIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + ad_m = re.search( r'''value="No.*?" onClick="location.href='([^"']+)'"''', webpage) if ad_m: diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 93a6e6454..8e25ecf28 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -4,9 +4,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_parse_qs, compat_urlparse, +) +from ..utils import ( determine_ext, unified_strdate, ) @@ -141,7 +143,6 @@ class WDRMobileIE(InfoExtractor): 'title': mobj.group('title'), 'age_limit': int(mobj.group('age_limit')), 'url': url, - 'ext': determine_ext(url), 'user_agent': 'mobile', } diff --git a/youtube_dl/extractor/wistia.py b/youtube_dl/extractor/wistia.py index 748443f81..13a079151 100644 --- a/youtube_dl/extractor/wistia.py +++ b/youtube_dl/extractor/wistia.py @@ -1,9 +1,8 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor -from ..utils import ExtractorError, compat_urllib_request +from ..compat import compat_urllib_request +from ..utils import ExtractorError class WistiaIE(InfoExtractor): @@ -22,8 +21,7 @@ class WistiaIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) request = compat_urllib_request.Request(self._API_URL.format(video_id)) request.add_header('Referer', url) # Some videos require this. diff --git a/youtube_dl/extractor/xbef.py b/youtube_dl/extractor/xbef.py index 1b4e88365..80c48c37d 100644 --- a/youtube_dl/extractor/xbef.py +++ b/youtube_dl/extractor/xbef.py @@ -1,9 +1,7 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, ) @@ -23,10 +21,9 @@ class XBefIE(InfoExtractor): } def _real_extract(self, url): - m = re.match(self._VALID_URL, url) - video_id = m.group('id') - + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + title = self._html_search_regex( r'<h1[^>]*>(.*?)</h1>', webpage, 'title') diff --git a/youtube_dl/extractor/xnxx.py b/youtube_dl/extractor/xnxx.py index 53ed7ef5a..79ed6c744 100644 --- a/youtube_dl/extractor/xnxx.py +++ b/youtube_dl/extractor/xnxx.py @@ -1,10 +1,8 @@ # encoding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, ) @@ -23,10 +21,7 @@ class XNXXIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - - # Get webpage content + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) video_url = self._search_regex(r'flv_url=(.*?)&', diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py index 38448e7c0..f9d98b83f 100644 --- a/youtube_dl/extractor/xtube.py +++ b/youtube_dl/extractor/xtube.py @@ -4,15 +4,17 @@ import re import json from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_request, +) +from ..utils import ( parse_duration, str_to_int, ) class XTubeIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?P<url>xtube\.com/watch\.php\?v=(?P<videoid>[^/?&]+))' + _VALID_URL = r'https?://(?:www\.)?(?P<url>xtube\.com/watch\.php\?v=(?P<id>[^/?&]+))' _TEST = { 'url': 'http://www.xtube.com/watch.php?v=kVTUy_G222_', 'md5': '092fbdd3cbe292c920ef6fc6a8a9cdab', @@ -29,7 +31,7 @@ class XTubeIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('videoid') + video_id = mobj.group('id') url = 'http://www.' + mobj.group('url') req = compat_urllib_request.Request(url) diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py index 7e0044824..2a45dc574 100644 --- a/youtube_dl/extractor/xvideos.py +++ b/youtube_dl/extractor/xvideos.py @@ -3,15 +3,17 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, - ExtractorError, +) +from ..utils import ( clean_html, + ExtractorError, ) class XVideosIE(InfoExtractor): - _VALID_URL = r'^(?:https?://)?(?:www\.)?xvideos\.com/video([0-9]+)(?:.*)' + _VALID_URL = r'https?://(?:www\.)?xvideos\.com/video(?P<id>[0-9]+)(?:.*)' _TEST = { 'url': 'http://www.xvideos.com/video4588838/biker_takes_his_girl', 'md5': '4b46ae6ea5e6e9086e714d883313c0c9', @@ -24,37 +26,25 @@ class XVideosIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group(1) - + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - self.report_extraction(video_id) - mobj = re.search(r'<h1 class="inlineError">(.+?)</h1>', webpage) if mobj: raise ExtractorError('%s said: %s' % (self.IE_NAME, clean_html(mobj.group(1))), expected=True) - # Extract video URL video_url = compat_urllib_parse.unquote( self._search_regex(r'flv_url=(.+?)&', webpage, 'video URL')) - - # Extract title video_title = self._html_search_regex( r'<title>(.*?)\s+-\s+XVID', webpage, 'title') - - # Extract video thumbnail video_thumbnail = self._search_regex( r'url_bigthumb=(.+?)&', webpage, 'thumbnail', fatal=False) return { 'id': video_id, 'url': video_url, - 'uploader': None, - 'upload_date': None, 'title': video_title, 'ext': 'flv', 'thumbnail': video_thumbnail, - 'description': None, 'age_limit': 18, } diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index 0fdb12243..031226f27 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -6,11 +6,13 @@ import json import re from .common import InfoExtractor, SearchInfoExtractor -from ..utils import ( - ExtractorError, +from ..compat import ( compat_urllib_parse, compat_urlparse, +) +from ..utils import ( clean_html, + ExtractorError, int_or_none, ) diff --git a/youtube_dl/extractor/ynet.py b/youtube_dl/extractor/ynet.py index 7b621a9e3..894678a23 100644 --- a/youtube_dl/extractor/ynet.py +++ b/youtube_dl/extractor/ynet.py @@ -5,7 +5,7 @@ import re import json from .common import InfoExtractor -from ..utils import compat_urllib_parse +from ..compat import compat_urllib_parse class YnetIE(InfoExtractor): diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index d9c06a2ee..107c9ac36 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -6,10 +6,11 @@ import re import sys from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse_urlparse, compat_urllib_request, - +) +from ..utils import ( ExtractorError, unescapeHTML, unified_strdate, |