diff options
| author | Philipp Hagemeister <phihag@phihag.de> | 2014-12-13 12:24:42 +0100 | 
|---|---|---|
| committer | Philipp Hagemeister <phihag@phihag.de> | 2014-12-13 12:35:45 +0100 | 
| commit | 1cc79574fc5df21bf35dccf61eac0e9e75ed8d20 (patch) | |
| tree | 5cc2bd2a2faba0ac5e8371892e3ef09ab5338080 | |
| parent | 20e35880bf7413c5085c1ade96af0fc6e56d84d1 (diff) | |
Fix imports and general cleanup
· Import from compat what comes from compat. Yes, some names are available in utils too, but that's an implementation detail.
· Use _match_id consistently whenever possible
· Fix some outdated tests
· Use consistent valid URL (always match the whole protocol, no ^ at start required)
· Use modern test definitions
120 files changed, 437 insertions, 378 deletions
| diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 2a566eabe..8181bca09 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -5,8 +5,8 @@ import re  import sys  import time +from ..compat import compat_str  from ..utils import ( -    compat_str,      encodeFilename,      format_bytes,      timeconvert, diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index 7cd22c504..00f3a026c 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -9,10 +9,12 @@ import xml.etree.ElementTree as etree  from .common import FileDownloader  from .http import HttpFD +from ..compat import ( +    compat_urlparse, +)  from ..utils import (      struct_pack,      struct_unpack, -    compat_urlparse,      format_bytes,      encodeFilename,      sanitize_open, diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index ad26cfa40..5bb0f3cfd 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -6,9 +6,11 @@ import subprocess  from ..postprocessor.ffmpeg import FFmpegPostProcessor  from .common import FileDownloader -from ..utils import ( +from ..compat import (      compat_urlparse,      compat_urllib_request, +) +from ..utils import (      check_executable,      encodeFilename,  ) diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 224962e86..e68f20c9f 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -4,11 +4,12 @@ import os  import time  from .common import FileDownloader -from ..utils import ( +from ..compat import (      compat_urllib_request,      compat_urllib_error, +) +from ..utils import (      ContentTooShortError, -      encodeFilename,      sanitize_open,      format_bytes, diff --git a/youtube_dl/downloader/rtmp.py b/youtube_dl/downloader/rtmp.py index 58ae2005c..575912675 100644 --- a/youtube_dl/downloader/rtmp.py +++ b/youtube_dl/downloader/rtmp.py @@ -7,9 +7,9 @@ import sys  import time  from .common import FileDownloader +from ..compat import compat_str  from ..utils import (      check_executable, -    compat_str,      encodeFilename,      format_bytes,      get_exe_version, diff --git a/youtube_dl/extractor/allocine.py b/youtube_dl/extractor/allocine.py index 398e93bfb..623aeaf34 100644 --- a/youtube_dl/extractor/allocine.py +++ b/youtube_dl/extractor/allocine.py @@ -5,10 +5,9 @@ import re  import json  from .common import InfoExtractor +from ..compat import compat_str  from ..utils import ( -    compat_str,      qualities, -    determine_ext,  ) @@ -75,9 +74,7 @@ class AllocineIE(InfoExtractor):                      'format_id': format_id,                      'quality': quality(format_id),                      'url': v, -                    'ext': determine_ext(v),                  }) -          self._sort_formats(formats)          return { diff --git a/youtube_dl/extractor/aol.py b/youtube_dl/extractor/aol.py index 185ee3693..b51eafc45 100644 --- a/youtube_dl/extractor/aol.py +++ b/youtube_dl/extractor/aol.py @@ -68,4 +68,3 @@ class AolIE(InfoExtractor):              'title': title,              'entries': entries,          } - diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py index 0c01fa1a1..7cd0482c7 100644 --- a/youtube_dl/extractor/appletrailers.py +++ b/youtube_dl/extractor/appletrailers.py @@ -4,8 +4,8 @@ import re  import json  from .common import InfoExtractor +from ..compat import compat_urlparse  from ..utils import ( -    compat_urlparse,      int_or_none,  ) diff --git a/youtube_dl/extractor/auengine.py b/youtube_dl/extractor/auengine.py index 1c765532a..014a21952 100644 --- a/youtube_dl/extractor/auengine.py +++ b/youtube_dl/extractor/auengine.py @@ -3,8 +3,8 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor +from ..compat import compat_urllib_parse  from ..utils import ( -    compat_urllib_parse,      determine_ext,      ExtractorError,  ) diff --git a/youtube_dl/extractor/bambuser.py b/youtube_dl/extractor/bambuser.py index 1ca0b7cf2..98e1443ab 100644 --- a/youtube_dl/extractor/bambuser.py +++ b/youtube_dl/extractor/bambuser.py @@ -5,7 +5,7 @@ import json  import itertools  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urllib_request,  ) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index acddbc8f1..9fb770cb1 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -4,9 +4,11 @@ import json  import re  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_str,      compat_urlparse, +) +from ..utils import (      ExtractorError,  ) diff --git a/youtube_dl/extractor/bet.py b/youtube_dl/extractor/bet.py index c1fc433f7..003e50002 100644 --- a/youtube_dl/extractor/bet.py +++ b/youtube_dl/extractor/bet.py @@ -1,8 +1,8 @@  from __future__ import unicode_literals  from .common import InfoExtractor +from ..compat import compat_urllib_parse  from ..utils import ( -    compat_urllib_parse,      xpath_text,      xpath_with_ns,      int_or_none, diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 0d5889f5d..241b904a9 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -4,8 +4,8 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor +from ..compat import compat_parse_qs  from ..utils import ( -    compat_parse_qs,      ExtractorError,      int_or_none,      unified_strdate, @@ -29,10 +29,9 @@ class BiliBiliIE(InfoExtractor):      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') - +        video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id) +          video_code = self._search_regex(              r'(?s)<div itemprop="video".*?>(.*?)</div>', webpage, 'video code') diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index bf18a97e0..1eca00470 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -6,20 +6,21 @@ import json  import xml.etree.ElementTree  from .common import InfoExtractor -from ..utils import ( -    compat_urllib_parse, -    find_xpath_attr, -    fix_xml_ampersands, -    compat_urlparse, -    compat_str, -    compat_urllib_request, +from ..compat import (      compat_parse_qs, +    compat_str, +    compat_urllib_parse,      compat_urllib_parse_urlparse, - +    compat_urllib_request, +    compat_urlparse, +) +from ..utils import (      determine_ext,      ExtractorError, -    unsmuggle_url, +    find_xpath_attr, +    fix_xml_ampersands,      unescapeHTML, +    unsmuggle_url,  ) diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index 97feb6704..2f866f3ef 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -4,10 +4,12 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urllib_request,      compat_urllib_parse,      compat_urllib_parse_urlparse, +) +from ..utils import (      ExtractorError,  ) diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index 2e3ef3fda..48e2410b6 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -3,9 +3,11 @@ from __future__ import unicode_literals  import re  from .mtv import MTVServicesInfoExtractor -from ..utils import ( +from ..compat import (      compat_str,      compat_urllib_parse, +) +from ..utils import (      ExtractorError,      float_or_none,      unified_strdate, diff --git a/youtube_dl/extractor/condenast.py b/youtube_dl/extractor/condenast.py index 7a7e79360..3db4db4e4 100644 --- a/youtube_dl/extractor/condenast.py +++ b/youtube_dl/extractor/condenast.py @@ -5,12 +5,14 @@ import re  import json  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urllib_parse, -    orderedSet,      compat_urllib_parse_urlparse,      compat_urlparse,  ) +from ..utils import ( +    orderedSet, +)  class CondeNastIE(InfoExtractor): diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index d7e2b841e..8f1ea02e7 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -10,10 +10,12 @@ import xml.etree.ElementTree  from hashlib import sha1  from math import pow, sqrt, floor  from .subtitles import SubtitlesInfoExtractor -from ..utils import ( -    ExtractorError, +from ..compat import (      compat_urllib_parse,      compat_urllib_request, +) +from ..utils import ( +    ExtractorError,      bytes_to_intlist,      intlist_to_bytes,      unified_strdate, diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 936c13cd6..cf5841a7c 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -8,13 +8,15 @@ import itertools  from .common import InfoExtractor  from .subtitles import SubtitlesInfoExtractor -from ..utils import ( -    compat_urllib_request, +from ..compat import (      compat_str, +    compat_urllib_request, +) +from ..utils import ( +    ExtractorError, +    int_or_none,      orderedSet,      str_to_int, -    int_or_none, -    ExtractorError,      unescapeHTML,  ) diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py index 45d66e2e6..c6b813f58 100644 --- a/youtube_dl/extractor/daum.py +++ b/youtube_dl/extractor/daum.py @@ -5,7 +5,7 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urllib_parse,  ) diff --git a/youtube_dl/extractor/ehow.py b/youtube_dl/extractor/ehow.py index b766e17f2..9cb1bf301 100644 --- a/youtube_dl/extractor/ehow.py +++ b/youtube_dl/extractor/ehow.py @@ -1,8 +1,6 @@  from __future__ import unicode_literals -import re - -from ..utils import ( +from ..compat import (      compat_urllib_parse,  )  from .common import InfoExtractor @@ -24,11 +22,10 @@ class EHowIE(InfoExtractor):      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') +        video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id) -        video_url = self._search_regex(r'(?:file|source)=(http[^\'"&]*)', -                                       webpage, 'video URL') +        video_url = self._search_regex( +            r'(?:file|source)=(http[^\'"&]*)', webpage, 'video URL')          final_url = compat_urllib_parse.unquote(video_url)          uploader = self._html_search_meta('uploader', webpage)          title = self._og_search_title(webpage).replace(' | eHow', '') diff --git a/youtube_dl/extractor/eighttracks.py b/youtube_dl/extractor/eighttracks.py index f4c1e2a72..a30a1f330 100644 --- a/youtube_dl/extractor/eighttracks.py +++ b/youtube_dl/extractor/eighttracks.py @@ -6,7 +6,7 @@ import random  import re  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_str,  ) diff --git a/youtube_dl/extractor/escapist.py b/youtube_dl/extractor/escapist.py index 476fc22b9..e240cb859 100644 --- a/youtube_dl/extractor/escapist.py +++ b/youtube_dl/extractor/escapist.py @@ -3,9 +3,10 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urllib_parse, - +) +from ..utils import (      ExtractorError,  ) diff --git a/youtube_dl/extractor/everyonesmixtape.py b/youtube_dl/extractor/everyonesmixtape.py index d237a8281..d872d828f 100644 --- a/youtube_dl/extractor/everyonesmixtape.py +++ b/youtube_dl/extractor/everyonesmixtape.py @@ -3,8 +3,10 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urllib_request, +) +from ..utils import (      ExtractorError,  ) diff --git a/youtube_dl/extractor/extremetube.py b/youtube_dl/extractor/extremetube.py index aacbf1414..36ba33128 100644 --- a/youtube_dl/extractor/extremetube.py +++ b/youtube_dl/extractor/extremetube.py @@ -3,16 +3,18 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urllib_parse_urlparse,      compat_urllib_request,      compat_urllib_parse, +) +from ..utils import (      str_to_int,  )  class ExtremeTubeIE(InfoExtractor): -    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>extremetube\.com/.*?video/.+?(?P<videoid>[0-9]+))(?:[/?&]|$)' +    _VALID_URL = r'https?://(?:www\.)?(?P<url>extremetube\.com/.*?video/.+?(?P<id>[0-9]+))(?:[/?&]|$)'      _TESTS = [{          'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',          'md5': '1fb9228f5e3332ec8c057d6ac36f33e0', @@ -31,7 +33,7 @@ class ExtremeTubeIE(InfoExtractor):      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('videoid') +        video_id = mobj.group('id')          url = 'http://www.' + mobj.group('url')          req = compat_urllib_request.Request(url) diff --git a/youtube_dl/extractor/fc2.py b/youtube_dl/extractor/fc2.py index 6f5d23559..81ceace53 100644 --- a/youtube_dl/extractor/fc2.py +++ b/youtube_dl/extractor/fc2.py @@ -1,19 +1,20 @@  #! -*- coding: utf-8 -*-  from __future__ import unicode_literals -import re  import hashlib  from .common import InfoExtractor -from ..utils import ( -    ExtractorError, +from ..compat import (      compat_urllib_request,      compat_urlparse,  ) +from ..utils import ( +    ExtractorError, +)  class FC2IE(InfoExtractor): -    _VALID_URL = r'^http://video\.fc2\.com/((?P<lang>[^/]+)/)?content/(?P<id>[^/]+)' +    _VALID_URL = r'^http://video\.fc2\.com/(?:[^/]+/)?content/(?P<id>[^/]+)'      IE_NAME = 'fc2'      _TEST = {          'url': 'http://video.fc2.com/en/content/20121103kUan1KHs', @@ -26,9 +27,7 @@ class FC2IE(InfoExtractor):      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') - +        video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id)          self._downloader.cookiejar.clear_session_cookies()  # must clear diff --git a/youtube_dl/extractor/firedrive.py b/youtube_dl/extractor/firedrive.py index af439ccfe..3191116d9 100644 --- a/youtube_dl/extractor/firedrive.py +++ b/youtube_dl/extractor/firedrive.py @@ -4,11 +4,13 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor -from ..utils import ( -    ExtractorError, +from ..compat import (      compat_urllib_parse,      compat_urllib_request,  ) +from ..utils import ( +    ExtractorError, +)  class FiredriveIE(InfoExtractor): @@ -28,11 +30,8 @@ class FiredriveIE(InfoExtractor):      }]      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') - +        video_id = self._match_id(url)          url = 'http://firedrive.com/file/%s' % video_id -          webpage = self._download_webpage(url, video_id)          if re.search(self._FILE_DELETED_REGEX, webpage) is not None: diff --git a/youtube_dl/extractor/fourtube.py b/youtube_dl/extractor/fourtube.py index b22ce2acb..7187e0752 100644 --- a/youtube_dl/extractor/fourtube.py +++ b/youtube_dl/extractor/fourtube.py @@ -3,12 +3,14 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urllib_request, -    unified_strdate, -    str_to_int, -    parse_duration, +) +from ..utils import (      clean_html, +    parse_duration, +    str_to_int, +    unified_strdate,  ) @@ -31,9 +33,7 @@ class FourTubeIE(InfoExtractor):      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) - -        video_id = mobj.group('id') +        video_id = self._match_id(url)          webpage_url = 'http://www.4tube.com/videos/' + video_id          webpage = self._download_webpage(webpage_url, video_id) diff --git a/youtube_dl/extractor/franceculture.py b/youtube_dl/extractor/franceculture.py index 898e0dda7..0c2972162 100644 --- a/youtube_dl/extractor/franceculture.py +++ b/youtube_dl/extractor/franceculture.py @@ -5,7 +5,7 @@ import json  import re  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_parse_qs,      compat_urlparse,  ) diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index e0420a48f..bbc760a49 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -6,13 +6,15 @@ import re  import json  from .common import InfoExtractor -from ..utils import ( +from ..compat import ( +    compat_urllib_parse_urlparse,      compat_urlparse, -    ExtractorError, +) +from ..utils import (      clean_html, -    parse_duration, -    compat_urllib_parse_urlparse, +    ExtractorError,      int_or_none, +    parse_duration,  ) diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py index d570e3f6a..47373e215 100644 --- a/youtube_dl/extractor/gamespot.py +++ b/youtube_dl/extractor/gamespot.py @@ -4,9 +4,11 @@ import re  import json  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urllib_parse,      compat_urlparse, +) +from ..utils import (      unescapeHTML,  ) diff --git a/youtube_dl/extractor/gdcvault.py b/youtube_dl/extractor/gdcvault.py index de14ae1fb..d453ec010 100644 --- a/youtube_dl/extractor/gdcvault.py +++ b/youtube_dl/extractor/gdcvault.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urllib_parse,      compat_urllib_request,  ) diff --git a/youtube_dl/extractor/golem.py b/youtube_dl/extractor/golem.py index 53714f47f..2bfb99040 100644 --- a/youtube_dl/extractor/golem.py +++ b/youtube_dl/extractor/golem.py @@ -2,8 +2,10 @@  from __future__ import unicode_literals  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urlparse, +) +from ..utils import (      determine_ext,  ) diff --git a/youtube_dl/extractor/googlesearch.py b/youtube_dl/extractor/googlesearch.py index 469e1f935..498304cb2 100644 --- a/youtube_dl/extractor/googlesearch.py +++ b/youtube_dl/extractor/googlesearch.py @@ -4,7 +4,7 @@ import itertools  import re  from .common import SearchInfoExtractor -from ..utils import ( +from ..compat import (      compat_urllib_parse,  ) diff --git a/youtube_dl/extractor/gorillavid.py b/youtube_dl/extractor/gorillavid.py index 1ac1da856..ae24aff84 100644 --- a/youtube_dl/extractor/gorillavid.py +++ b/youtube_dl/extractor/gorillavid.py @@ -4,11 +4,12 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor -from ..utils import ( -    ExtractorError, -    determine_ext, +from ..compat import (      compat_urllib_parse,      compat_urllib_request, +) +from ..utils import ( +    ExtractorError,      int_or_none,  ) @@ -106,7 +107,6 @@ class GorillaVidIE(InfoExtractor):          formats = [{              'format_id': 'sd',              'url': video_url, -            'ext': determine_ext(video_url),              'quality': 1,          }] diff --git a/youtube_dl/extractor/hostingbulk.py b/youtube_dl/extractor/hostingbulk.py index 8e812b669..704d0285d 100644 --- a/youtube_dl/extractor/hostingbulk.py +++ b/youtube_dl/extractor/hostingbulk.py @@ -4,9 +4,11 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor +from ..compat import ( +    compat_urllib_request, +)  from ..utils import (      ExtractorError, -    compat_urllib_request,      int_or_none,      urlencode_postdata,  ) @@ -30,9 +32,7 @@ class HostingBulkIE(InfoExtractor):      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') - +        video_id = self._match_id(url)          url = 'http://hostingbulk.com/{0:}.html'.format(video_id)          # Custom request with cookie to set language to English, so our file diff --git a/youtube_dl/extractor/hypem.py b/youtube_dl/extractor/hypem.py index 6d0d847c6..aa0724a02 100644 --- a/youtube_dl/extractor/hypem.py +++ b/youtube_dl/extractor/hypem.py @@ -1,20 +1,20 @@  from __future__ import unicode_literals  import json -import re  import time  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urllib_parse,      compat_urllib_request, - +) +from ..utils import (      ExtractorError,  )  class HypemIE(InfoExtractor): -    _VALID_URL = r'http://(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)' +    _VALID_URL = r'http://(?:www\.)?hypem\.com/track/(?P<id>[^/]+)/'      _TEST = {          'url': 'http://hypem.com/track/1v6ga/BODYWORK+-+TAME',          'md5': 'b9cc91b5af8995e9f0c1cee04c575828', @@ -27,8 +27,7 @@ class HypemIE(InfoExtractor):      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        track_id = mobj.group(1) +        track_id = self._match_id(url)          data = {'ax': 1, 'ts': time.time()}          data_encoded = compat_urllib_parse.urlencode(data) diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py index f2c1c10f5..13a53a0cb 100644 --- a/youtube_dl/extractor/imdb.py +++ b/youtube_dl/extractor/imdb.py @@ -4,7 +4,7 @@ import re  import json  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urlparse,  ) diff --git a/youtube_dl/extractor/infoq.py b/youtube_dl/extractor/infoq.py index e76dd222d..f25f43664 100644 --- a/youtube_dl/extractor/infoq.py +++ b/youtube_dl/extractor/infoq.py @@ -1,10 +1,9 @@  from __future__ import unicode_literals  import base64 -import re  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urllib_parse,  ) @@ -24,9 +23,7 @@ class InfoQIE(InfoExtractor):      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') - +        video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id)          video_title = self._html_search_regex(r'<title>(.*?)</title>', webpage, 'title') diff --git a/youtube_dl/extractor/internetvideoarchive.py b/youtube_dl/extractor/internetvideoarchive.py index 1e4799187..c813d4b82 100644 --- a/youtube_dl/extractor/internetvideoarchive.py +++ b/youtube_dl/extractor/internetvideoarchive.py @@ -3,9 +3,11 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urlparse,      compat_urllib_parse, +) +from ..utils import (      xpath_with_ns,  ) diff --git a/youtube_dl/extractor/iprima.py b/youtube_dl/extractor/iprima.py index 4247d6391..8529bedfc 100644 --- a/youtube_dl/extractor/iprima.py +++ b/youtube_dl/extractor/iprima.py @@ -6,8 +6,10 @@ from random import random  from math import floor  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urllib_request, +) +from ..utils import (      ExtractorError,  ) diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py index f0fba1adb..7a400323d 100644 --- a/youtube_dl/extractor/ivi.py +++ b/youtube_dl/extractor/ivi.py @@ -5,8 +5,10 @@ import re  import json  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urllib_request, +) +from ..utils import (      ExtractorError,  ) diff --git a/youtube_dl/extractor/keezmovies.py b/youtube_dl/extractor/keezmovies.py index 75b63cffb..97dcb518a 100644 --- a/youtube_dl/extractor/keezmovies.py +++ b/youtube_dl/extractor/keezmovies.py @@ -4,7 +4,7 @@ import os  import re  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urllib_parse_urlparse,      compat_urllib_request,      compat_urllib_parse, @@ -15,7 +15,7 @@ from ..aes import (  class KeezMoviesIE(InfoExtractor): -    _VALID_URL = r'^https?://(?:www\.)?keezmovies\.com/video/.+?(?P<videoid>[0-9]+)(?:[/?&]|$)' +    _VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/.+?(?P<id>[0-9]+)(?:[/?&]|$)'      _TEST = {          'url': 'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711',          'file': '1214711.mp4', @@ -27,8 +27,7 @@ class KeezMoviesIE(InfoExtractor):      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('videoid') +        video_id = self._match_id(url)          req = compat_urllib_request.Request(url)          req.add_header('Cookie', 'age_verified=1') diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py index 03c4691c6..5247c6f58 100644 --- a/youtube_dl/extractor/livestream.py +++ b/youtube_dl/extractor/livestream.py @@ -4,10 +4,12 @@ import re  import json  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_str,      compat_urllib_parse_urlparse,      compat_urlparse, +) +from ..utils import (      ExtractorError,      find_xpath_attr,      int_or_none, diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index 2160d6cb0..26e84970d 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -5,12 +5,14 @@ import json  from .subtitles import SubtitlesInfoExtractor  from .common import InfoExtractor -from ..utils import ( +from ..compat import ( +    compat_str,      compat_urllib_parse,      compat_urllib_request, +) +from ..utils import (      ExtractorError,      int_or_none, -    compat_str,  ) diff --git a/youtube_dl/extractor/malemotion.py b/youtube_dl/extractor/malemotion.py index 1abf6e4f8..0b85a59d1 100644 --- a/youtube_dl/extractor/malemotion.py +++ b/youtube_dl/extractor/malemotion.py @@ -1,43 +1,33 @@ +# coding: utf-8  from __future__ import unicode_literals -import re -  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urllib_parse,  )  class MalemotionIE(InfoExtractor): -    _VALID_URL = r'^(?:https?://)?malemotion\.com/video/(.+?)\.(?P<id>.+?)(#|$)' +    _VALID_URL = r'https?://malemotion\.com/video/(.+?)\.(?P<id>.+?)(#|$)'      _TEST = { -        'url': 'http://malemotion.com/video/bien-dur.10ew', -        'file': '10ew.mp4', -        'md5': 'b3cc49f953b107e4a363cdff07d100ce', +        'url': 'http://malemotion.com/video/bete-de-concours.ltc', +        'md5': '3013e53a0afbde2878bc39998c33e8a5',          'info_dict': { -            "title": "Bien dur", -            "age_limit": 18, +            'id': 'ltc', +            'ext': 'mp4', +            'title': 'Bête de Concours', +            'age_limit': 18,          }, -        'skip': 'This video has been deleted.'      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group("id") - +        video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id) -        self.report_extraction(video_id) - -        # Extract video URL -        video_url = compat_urllib_parse.unquote( -            self._search_regex(r'<source type="video/mp4" src="(.+?)"', webpage, 'video URL')) - -        # Extract title +        video_url = compat_urllib_parse.unquote(self._search_regex( +            r'<source type="video/mp4" src="(.+?)"', webpage, 'video URL'))          video_title = self._html_search_regex(              r'<title>(.*?)</title', webpage, 'title') - -        # Extract video thumbnail          video_thumbnail = self._search_regex(              r'<video .+?poster="(.+?)"', webpage, 'thumbnail', fatal=False) @@ -47,14 +37,12 @@ class MalemotionIE(InfoExtractor):              'format_id': 'mp4',              'preference': 1,          }] +        self._sort_formats(formats)          return {              'id': video_id,              'formats': formats, -            'uploader': None, -            'upload_date': None,              'title': video_title,              'thumbnail': video_thumbnail, -            'description': None,              'age_limit': 18,          } diff --git a/youtube_dl/extractor/metacafe.py b/youtube_dl/extractor/metacafe.py index 858c1c0c3..8bc333b02 100644 --- a/youtube_dl/extractor/metacafe.py +++ b/youtube_dl/extractor/metacafe.py @@ -3,10 +3,12 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_parse_qs,      compat_urllib_parse,      compat_urllib_request, +) +from ..utils import (      determine_ext,      ExtractorError,      int_or_none, diff --git a/youtube_dl/extractor/mit.py b/youtube_dl/extractor/mit.py index 807b1dc89..d354702d1 100644 --- a/youtube_dl/extractor/mit.py +++ b/youtube_dl/extractor/mit.py @@ -5,8 +5,10 @@ import json  from .common import InfoExtractor  from .youtube import YoutubeIE -from ..utils import ( +from ..compat import (      compat_urlparse, +) +from ..utils import (      clean_html,      ExtractorError,      get_element_by_id, diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py index 6691521e5..256758323 100644 --- a/youtube_dl/extractor/mitele.py +++ b/youtube_dl/extractor/mitele.py @@ -1,12 +1,13 @@  from __future__ import unicode_literals -import re  import json  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urllib_parse,      compat_urlparse, +) +from ..utils import (      get_element_by_attribute,      parse_duration,      strip_jsonp, @@ -15,7 +16,7 @@ from ..utils import (  class MiTeleIE(InfoExtractor):      IE_NAME = 'mitele.es' -    _VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<episode>[^/]+)/' +    _VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/'      _TEST = {          'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/', @@ -31,12 +32,10 @@ class MiTeleIE(InfoExtractor):      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        episode = mobj.group('episode') +        episode = self._match_id(url)          webpage = self._download_webpage(url, episode)          embed_data_json = self._search_regex( -            r'MSV\.embedData\[.*?\]\s*=\s*({.*?});', webpage, 'embed data', -            flags=re.DOTALL +            r'(?s)MSV\.embedData\[.*?\]\s*=\s*({.*?});', webpage, 'embed data',          ).replace('\'', '"')          embed_data = json.loads(embed_data_json) diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 55cc33a3e..07d194562 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -3,8 +3,10 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urllib_parse, +) +from ..utils import (      ExtractorError,      HEADRequest,      int_or_none, diff --git a/youtube_dl/extractor/moevideo.py b/youtube_dl/extractor/moevideo.py index 2ff79b9b8..184f9c2c9 100644 --- a/youtube_dl/extractor/moevideo.py +++ b/youtube_dl/extractor/moevideo.py @@ -5,10 +5,12 @@ import json  import re  from .common import InfoExtractor -from ..utils import ( -    ExtractorError, +from ..compat import (      compat_urllib_parse,      compat_urllib_request, +) +from ..utils import ( +    ExtractorError,      int_or_none,  ) diff --git a/youtube_dl/extractor/mofosex.py b/youtube_dl/extractor/mofosex.py index d658647e6..2cec12d35 100644 --- a/youtube_dl/extractor/mofosex.py +++ b/youtube_dl/extractor/mofosex.py @@ -4,7 +4,7 @@ import os  import re  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urllib_parse_urlparse,      compat_urllib_request,      compat_urllib_parse, @@ -12,7 +12,7 @@ from ..utils import (  class MofosexIE(InfoExtractor): -    _VALID_URL = r'^https?://(?:www\.)?(?P<url>mofosex\.com/videos/(?P<videoid>[0-9]+)/.*?\.html)' +    _VALID_URL = r'https?://(?:www\.)?(?P<url>mofosex\.com/videos/(?P<id>[0-9]+)/.*?\.html)'      _TEST = {          'url': 'http://www.mofosex.com/videos/5018/japanese-teen-music-video.html',          'md5': '1b2eb47ac33cc75d4a80e3026b613c5a', @@ -26,7 +26,7 @@ class MofosexIE(InfoExtractor):      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('videoid') +        video_id = mobj.group('id')          url = 'http://www.' + mobj.group('url')          req = compat_urllib_request.Request(url) diff --git a/youtube_dl/extractor/moniker.py b/youtube_dl/extractor/moniker.py index 1c4f589cc..5de719bdc 100644 --- a/youtube_dl/extractor/moniker.py +++ b/youtube_dl/extractor/moniker.py @@ -5,7 +5,7 @@ import os.path  import re  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urllib_parse,      compat_urllib_request,  ) @@ -37,10 +37,9 @@ class MonikerIE(InfoExtractor):      }]      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') - +        video_id = self._match_id(url)          orig_webpage = self._download_webpage(url, video_id) +          fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage)          data = dict(fields) diff --git a/youtube_dl/extractor/mooshare.py b/youtube_dl/extractor/mooshare.py index 34a4bec3a..9f2853fa3 100644 --- a/youtube_dl/extractor/mooshare.py +++ b/youtube_dl/extractor/mooshare.py @@ -4,11 +4,13 @@ import re  import time  from .common import InfoExtractor -from ..utils import ( -    ExtractorError, +from ..compat import (      compat_urllib_request,      compat_urllib_parse,  ) +from ..utils import ( +    ExtractorError, +)  class MooshareIE(InfoExtractor): @@ -43,9 +45,7 @@ class MooshareIE(InfoExtractor):      ]      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') - +        video_id = self._match_id(url)          page = self._download_webpage(url, video_id, 'Downloading page')          if re.search(r'>Video Not Found or Deleted<', page) is not None: diff --git a/youtube_dl/extractor/motorsport.py b/youtube_dl/extractor/motorsport.py index 7c0ec6a12..f5ca74e97 100644 --- a/youtube_dl/extractor/motorsport.py +++ b/youtube_dl/extractor/motorsport.py @@ -3,13 +3,14 @@ from __future__ import unicode_literals  import hashlib  import json -import re  import time  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_parse_qs,      compat_str, +) +from ..utils import (      int_or_none,  ) @@ -32,10 +33,9 @@ class MotorsportIE(InfoExtractor):      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        display_id = mobj.group('id') - +        display_id = self._match_id(url)          webpage = self._download_webpage(url, display_id) +          flashvars_code = self._html_search_regex(              r'<embed id="player".*?flashvars="([^"]+)"', webpage, 'flashvars')          flashvars = compat_parse_qs(flashvars_code) diff --git a/youtube_dl/extractor/movieclips.py b/youtube_dl/extractor/movieclips.py index 456807dd1..04e17d055 100644 --- a/youtube_dl/extractor/movieclips.py +++ b/youtube_dl/extractor/movieclips.py @@ -3,9 +3,11 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor +from ..compat import ( +    compat_str, +)  from ..utils import (      ExtractorError, -    compat_str,      clean_html,  ) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index b482d6d4d..5ebc78033 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -3,9 +3,11 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urllib_parse,      compat_urllib_request, +) +from ..utils import (      ExtractorError,      find_xpath_attr,      fix_xml_ampersands, diff --git a/youtube_dl/extractor/myspass.py b/youtube_dl/extractor/myspass.py index 51e540814..5b9b9fbcd 100644 --- a/youtube_dl/extractor/myspass.py +++ b/youtube_dl/extractor/myspass.py @@ -2,9 +2,10 @@ from __future__ import unicode_literals  import os.path  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urllib_parse_urlparse, - +) +from ..utils import (      ExtractorError,  ) diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py index fbe34defd..c10405f04 100644 --- a/youtube_dl/extractor/naver.py +++ b/youtube_dl/extractor/naver.py @@ -4,8 +4,10 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urllib_parse, +) +from ..utils import (      ExtractorError,      clean_html,  ) @@ -26,9 +28,9 @@ class NaverIE(InfoExtractor):      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group(1) +        video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id) +          m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"',                           webpage)          if m_id is None: diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index bf5132721..690c46b6a 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -4,8 +4,10 @@ import re  import json  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_str, +) +from ..utils import (      ExtractorError,      find_xpath_attr,  ) diff --git a/youtube_dl/extractor/nfb.py b/youtube_dl/extractor/nfb.py index 7ce1d481d..ea077254b 100644 --- a/youtube_dl/extractor/nfb.py +++ b/youtube_dl/extractor/nfb.py @@ -1,9 +1,7 @@  from __future__ import unicode_literals -import re -  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urllib_request,      compat_urllib_parse,  ) @@ -12,7 +10,7 @@ from ..utils import (  class NFBIE(InfoExtractor):      IE_NAME = 'nfb'      IE_DESC = 'National Film Board of Canada' -    _VALID_URL = r'https?://(?:www\.)?(nfb|onf)\.ca/film/(?P<id>[\da-z_-]+)' +    _VALID_URL = r'https?://(?:www\.)?(?:nfb|onf)\.ca/film/(?P<id>[\da-z_-]+)'      _TEST = {          'url': 'https://www.nfb.ca/film/qallunaat_why_white_people_are_funny', @@ -32,10 +30,10 @@ class NFBIE(InfoExtractor):      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') - -        page = self._download_webpage('https://www.nfb.ca/film/%s' % video_id, video_id, 'Downloading film page') +        video_id = self._match_id(url) +        page = self._download_webpage( +            'https://www.nfb.ca/film/%s' % video_id, video_id, +            'Downloading film page')          uploader_id = self._html_search_regex(r'<a class="director-link" href="/explore-all-directors/([^/]+)/"',                                                page, 'director id', fatal=False) diff --git a/youtube_dl/extractor/nfl.py b/youtube_dl/extractor/nfl.py index cc7c921c3..606e2294e 100644 --- a/youtube_dl/extractor/nfl.py +++ b/youtube_dl/extractor/nfl.py @@ -4,9 +4,11 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor +from ..compat import ( +    compat_urllib_parse_urlparse, +)  from ..utils import (      ExtractorError, -    compat_urllib_parse_urlparse,      int_or_none,      remove_end,  ) diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index 1d9c1a096..4c1890416 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -5,14 +5,16 @@ import re  import json  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urllib_parse,      compat_urllib_request,      compat_urlparse, -    unified_strdate, -    parse_duration, -    int_or_none, +) +from ..utils import (      ExtractorError, +    int_or_none, +    parse_duration, +    unified_strdate,  ) diff --git a/youtube_dl/extractor/noco.py b/youtube_dl/extractor/noco.py index 7d2ff7b9a..251e6da07 100644 --- a/youtube_dl/extractor/noco.py +++ b/youtube_dl/extractor/noco.py @@ -6,13 +6,15 @@ import time  import hashlib  from .common import InfoExtractor -from ..utils import ( -    compat_urllib_request, +from ..compat import ( +    compat_str,      compat_urllib_parse, -    ExtractorError, +    compat_urllib_request, +) +from ..utils import (      clean_html, +    ExtractorError,      unified_strdate, -    compat_str,  ) diff --git a/youtube_dl/extractor/nosvideo.py b/youtube_dl/extractor/nosvideo.py index f3be8f552..f5ef856db 100644 --- a/youtube_dl/extractor/nosvideo.py +++ b/youtube_dl/extractor/nosvideo.py @@ -4,9 +4,11 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor +from ..compat import ( +    compat_urllib_request, +)  from ..utils import (      ExtractorError, -    compat_urllib_request,      urlencode_postdata,      xpath_text,      xpath_with_ns, @@ -32,8 +34,7 @@ class NosVideoIE(InfoExtractor):      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') +        video_id = self._match_id(url)          fields = {              'id': video_id, diff --git a/youtube_dl/extractor/novamov.py b/youtube_dl/extractor/novamov.py index 38d05e466..04d779890 100644 --- a/youtube_dl/extractor/novamov.py +++ b/youtube_dl/extractor/novamov.py @@ -3,9 +3,11 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor +from ..compat import ( +    compat_urlparse, +)  from ..utils import (      ExtractorError, -    compat_urlparse  ) diff --git a/youtube_dl/extractor/nuvid.py b/youtube_dl/extractor/nuvid.py index 449c8a6a3..57928f2ae 100644 --- a/youtube_dl/extractor/nuvid.py +++ b/youtube_dl/extractor/nuvid.py @@ -3,15 +3,17 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor +from ..compat import ( +    compat_urllib_request, +)  from ..utils import (      parse_duration,      unified_strdate, -    compat_urllib_request,  )  class NuvidIE(InfoExtractor): -    _VALID_URL = r'^https?://(?:www|m)\.nuvid\.com/video/(?P<id>[0-9]+)' +    _VALID_URL = r'https?://(?:www|m)\.nuvid\.com/video/(?P<id>[0-9]+)'      _TEST = {          'url': 'http://m.nuvid.com/video/1310741/',          'md5': 'eab207b7ac4fccfb4e23c86201f11277', @@ -26,8 +28,7 @@ class NuvidIE(InfoExtractor):      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') +        video_id = self._match_id(url)          formats = [] diff --git a/youtube_dl/extractor/photobucket.py b/youtube_dl/extractor/photobucket.py index b4389e0b6..c66db3cdc 100644 --- a/youtube_dl/extractor/photobucket.py +++ b/youtube_dl/extractor/photobucket.py @@ -4,16 +4,17 @@ import json  import re  from .common import InfoExtractor -from ..utils import compat_urllib_parse +from ..compat import compat_urllib_parse  class PhotobucketIE(InfoExtractor):      _VALID_URL = r'http://(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))'      _TEST = {          'url': 'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0', -        'file': 'zpsc0c3b9fa.mp4',          'md5': '7dabfb92b0a31f6c16cebc0f8e60ff99',          'info_dict': { +            'id': 'zpsc0c3b9fa', +            'ext': 'mp4',              'timestamp': 1367669341,              'upload_date': '20130504',              'uploader': 'rachaneronas', diff --git a/youtube_dl/extractor/played.py b/youtube_dl/extractor/played.py index 17880471d..449d4836c 100644 --- a/youtube_dl/extractor/played.py +++ b/youtube_dl/extractor/played.py @@ -5,11 +5,13 @@ import re  import os.path  from .common import InfoExtractor -from ..utils import ( -    ExtractorError, +from ..compat import (      compat_urllib_parse,      compat_urllib_request,  ) +from ..utils import ( +    ExtractorError, +)  class PlayedIE(InfoExtractor): @@ -28,7 +30,6 @@ class PlayedIE(InfoExtractor):      def _real_extract(self, url):          video_id = self._match_id(url) -          orig_webpage = self._download_webpage(url, video_id)          m_error = re.search( diff --git a/youtube_dl/extractor/playfm.py b/youtube_dl/extractor/playfm.py index ebc046804..9576aed0e 100644 --- a/youtube_dl/extractor/playfm.py +++ b/youtube_dl/extractor/playfm.py @@ -4,9 +4,11 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urllib_parse,      compat_urllib_request, +) +from ..utils import (      ExtractorError,      float_or_none,      int_or_none, diff --git a/youtube_dl/extractor/playvid.py b/youtube_dl/extractor/playvid.py index cd3905acb..c3e667e9e 100644 --- a/youtube_dl/extractor/playvid.py +++ b/youtube_dl/extractor/playvid.py @@ -3,31 +3,31 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor +from ..compat import ( +    compat_urllib_parse, +)  from ..utils import ( -    ExtractorError,      clean_html, -    compat_urllib_parse, +    ExtractorError,  )  class PlayvidIE(InfoExtractor): -    _VALID_URL = r'^https?://www\.playvid\.com/watch(\?v=|/)(?P<id>.+?)(?:#|$)' +    _VALID_URL = r'https?://www\.playvid\.com/watch(\?v=|/)(?P<id>.+?)(?:#|$)'      _TEST = { -        'url': 'http://www.playvid.com/watch/agbDDi7WZTV', -        'md5': '44930f8afa616efdf9482daf4fe53e1e', +        'url': 'http://www.playvid.com/watch/RnmBNgtrrJu', +        'md5': 'ffa2f6b2119af359f544388d8c01eb6c',          'info_dict': { -            'id': 'agbDDi7WZTV', +            'id': 'RnmBNgtrrJu',              'ext': 'mp4', -            'title': 'Michelle Lewin in Miami Beach', -            'duration': 240, +            'title': 'md5:9256d01c6317e3f703848b5906880dc8', +            'duration': 82,              'age_limit': 18,          }      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') - +        video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id)          m_error = re.search( diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 2ca15b717..634142d0d 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -4,10 +4,12 @@ import os  import re  from .common import InfoExtractor -from ..utils import ( +from ..compat import ( +    compat_urllib_parse,      compat_urllib_parse_urlparse,      compat_urllib_request, -    compat_urllib_parse, +) +from ..utils import (      str_to_int,  )  from ..aes import ( @@ -16,7 +18,7 @@ from ..aes import (  class PornHubIE(InfoExtractor): -    _VALID_URL = r'^https?://(?:www\.)?pornhub\.com/view_video\.php\?viewkey=(?P<id>[0-9a-f]+)' +    _VALID_URL = r'https?://(?:www\.)?pornhub\.com/view_video\.php\?viewkey=(?P<id>[0-9a-f]+)'      _TEST = {          'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',          'md5': '882f488fa1f0026f023f33576004a2ed', diff --git a/youtube_dl/extractor/promptfile.py b/youtube_dl/extractor/promptfile.py index 7fcde086c..f536e6e6c 100644 --- a/youtube_dl/extractor/promptfile.py +++ b/youtube_dl/extractor/promptfile.py @@ -4,12 +4,14 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor -from ..utils import ( -    ExtractorError, -    determine_ext, +from ..compat import (      compat_urllib_parse,      compat_urllib_request,  ) +from ..utils import ( +    determine_ext, +    ExtractorError, +)  class PromptFileIE(InfoExtractor): diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 1262793c8..385681d06 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -5,8 +5,10 @@ import re  from hashlib import sha1  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urllib_parse, +) +from ..utils import (      unified_strdate,  ) diff --git a/youtube_dl/extractor/quickvid.py b/youtube_dl/extractor/quickvid.py index 3bc78060d..af7d76cf4 100644 --- a/youtube_dl/extractor/quickvid.py +++ b/youtube_dl/extractor/quickvid.py @@ -3,8 +3,10 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urlparse, +) +from ..utils import (      determine_ext,      int_or_none,  ) diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py index 2d39ecfe4..aa26b7e0b 100644 --- a/youtube_dl/extractor/rai.py +++ b/youtube_dl/extractor/rai.py @@ -3,10 +3,12 @@ from __future__ import unicode_literals  import re  from .subtitles import SubtitlesInfoExtractor +from ..compat import ( +    compat_urllib_parse, +)  from ..utils import (      parse_duration,      unified_strdate, -    compat_urllib_parse,  ) diff --git a/youtube_dl/extractor/rts.py b/youtube_dl/extractor/rts.py index dc59a5e5c..5e84c1098 100644 --- a/youtube_dl/extractor/rts.py +++ b/youtube_dl/extractor/rts.py @@ -4,12 +4,14 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor +from ..compat import ( +    compat_str, +)  from ..utils import (      int_or_none,      parse_duration,      parse_iso8601,      unescapeHTML, -    compat_str,  ) diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py index 6941d96fb..b72b5a586 100644 --- a/youtube_dl/extractor/rutube.py +++ b/youtube_dl/extractor/rutube.py @@ -5,10 +5,12 @@ import re  import itertools  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_str, -    unified_strdate, +) +from ..utils import (      ExtractorError, +    unified_strdate,  ) @@ -36,9 +38,7 @@ class RutubeIE(InfoExtractor):      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') - +        video_id = self._match_id(url)          video = self._download_json(              'http://rutube.ru/api/video/%s/?format=json' % video_id,              video_id, 'Downloading video JSON') @@ -114,8 +114,7 @@ class RutubeMovieIE(RutubeChannelIE):      _PAGE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/video?page=%s&format=json'      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        movie_id = mobj.group('id') +        movie_id = self._match_id(url)          movie = self._download_json(              self._MOVIE_TEMPLATE % movie_id, movie_id,              'Downloading movie JSON') diff --git a/youtube_dl/extractor/screencast.py b/youtube_dl/extractor/screencast.py index c145f6fc7..dfd897ba3 100644 --- a/youtube_dl/extractor/screencast.py +++ b/youtube_dl/extractor/screencast.py @@ -1,14 +1,14 @@  # -*- coding: utf-8 -*-  from __future__ import unicode_literals -import re -  from .common import InfoExtractor -from ..utils import ( -    ExtractorError, +from ..compat import (      compat_parse_qs,      compat_urllib_request,  ) +from ..utils import ( +    ExtractorError, +)  class ScreencastIE(InfoExtractor): @@ -57,8 +57,7 @@ class ScreencastIE(InfoExtractor):      ]      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') +        video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id)          video_url = self._html_search_regex( diff --git a/youtube_dl/extractor/shared.py b/youtube_dl/extractor/shared.py index fdc31603a..26ced716e 100644 --- a/youtube_dl/extractor/shared.py +++ b/youtube_dl/extractor/shared.py @@ -4,10 +4,12 @@ import re  import base64  from .common import InfoExtractor +from ..compat import ( +    compat_urllib_parse, +    compat_urllib_request, +)  from ..utils import (      ExtractorError, -    compat_urllib_request, -    compat_urllib_parse,      int_or_none,  ) @@ -26,26 +28,30 @@ class SharedIE(InfoExtractor):      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') - -        page = self._download_webpage(url, video_id) - -        if re.search(r'>File does not exist<', page) is not None: -            raise ExtractorError('Video %s does not exist' % video_id, expected=True) +        video_id = self._match_id(url) +        webpage = self._download_webpage(url, video_id) -        download_form = dict(re.findall(r'<input type="hidden" name="([^"]+)" value="([^"]*)"', page)) +        if '>File does not exist<' in webpage: +            raise ExtractorError( +                'Video %s does not exist' % video_id, expected=True) -        request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(download_form)) +        download_form = dict(re.findall( +            r'<input type="hidden" name="([^"]+)" value="([^"]*)"', webpage)) +        request = compat_urllib_request.Request( +            url, compat_urllib_parse.urlencode(download_form))          request.add_header('Content-Type', 'application/x-www-form-urlencoded') -        video_page = self._download_webpage(request, video_id, 'Downloading video page') +        video_page = self._download_webpage( +            request, video_id, 'Downloading video page') -        video_url = self._html_search_regex(r'data-url="([^"]+)"', video_page, 'video URL') -        title = base64.b64decode(self._html_search_meta('full:title', page, 'title')).decode('utf-8') -        filesize = int_or_none(self._html_search_meta('full:size', page, 'file size', fatal=False)) +        video_url = self._html_search_regex( +            r'data-url="([^"]+)"', video_page, 'video URL') +        title = base64.b64decode(self._html_search_meta( +            'full:title', webpage, 'title')).decode('utf-8') +        filesize = int_or_none(self._html_search_meta( +            'full:size', webpage, 'file size', fatal=False))          thumbnail = self._html_search_regex( -            r'data-poster="([^"]+)"', video_page, 'thumbnail', fatal=False, default=None) +            r'data-poster="([^"]+)"', video_page, 'thumbnail', default=None)          return {              'id': video_id, diff --git a/youtube_dl/extractor/sharesix.py b/youtube_dl/extractor/sharesix.py index 7531e8325..ac3e3adf2 100644 --- a/youtube_dl/extractor/sharesix.py +++ b/youtube_dl/extractor/sharesix.py @@ -4,9 +4,11 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urllib_parse,      compat_urllib_request, +) +from ..utils import (      parse_duration,  ) diff --git a/youtube_dl/extractor/sina.py b/youtube_dl/extractor/sina.py index 5eadbb7ea..a63d126d4 100644 --- a/youtube_dl/extractor/sina.py +++ b/youtube_dl/extractor/sina.py @@ -4,7 +4,7 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urllib_request,      compat_urllib_parse,  ) diff --git a/youtube_dl/extractor/slideshare.py b/youtube_dl/extractor/slideshare.py index 5864b9936..e7d776e7b 100644 --- a/youtube_dl/extractor/slideshare.py +++ b/youtube_dl/extractor/slideshare.py @@ -4,8 +4,10 @@ import re  import json  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urlparse, +) +from ..utils import (      ExtractorError,  ) diff --git a/youtube_dl/extractor/smotri.py b/youtube_dl/extractor/smotri.py index 646af3cc9..d031fe401 100644 --- a/youtube_dl/extractor/smotri.py +++ b/youtube_dl/extractor/smotri.py @@ -7,9 +7,11 @@ import hashlib  import uuid  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urllib_parse,      compat_urllib_request, +) +from ..utils import (      ExtractorError,      int_or_none,      unified_strdate, diff --git a/youtube_dl/extractor/sockshare.py b/youtube_dl/extractor/sockshare.py index c663e56d4..7d3c0e937 100644 --- a/youtube_dl/extractor/sockshare.py +++ b/youtube_dl/extractor/sockshare.py @@ -1,13 +1,16 @@  # coding: utf-8  from __future__ import unicode_literals -from ..utils import ( -    ExtractorError, +import re + +from ..compat import (      compat_urllib_parse,      compat_urllib_request, +) +from ..utils import (      determine_ext, +    ExtractorError,  ) -import re  from .common import InfoExtractor @@ -27,9 +30,7 @@ class SockshareIE(InfoExtractor):      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') - +        video_id = self._match_id(url)          url = 'http://sockshare.com/file/%s' % video_id          webpage = self._download_webpage(url, video_id) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index ab9483d2d..5d60c4939 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -5,11 +5,12 @@ import re  import itertools  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_str,      compat_urlparse,      compat_urllib_parse, - +) +from ..utils import (      ExtractorError,      int_or_none,      unified_strdate, diff --git a/youtube_dl/extractor/spankwire.py b/youtube_dl/extractor/spankwire.py index 94602e89e..b936202f6 100644 --- a/youtube_dl/extractor/spankwire.py +++ b/youtube_dl/extractor/spankwire.py @@ -3,12 +3,14 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor -from ..utils import ( +from ..compat import ( +    compat_urllib_parse,      compat_urllib_parse_urlparse,      compat_urllib_request, -    compat_urllib_parse, -    unified_strdate, +) +from ..utils import (      str_to_int, +    unified_strdate,  )  from ..aes import aes_decrypt_text diff --git a/youtube_dl/extractor/sportdeutschland.py b/youtube_dl/extractor/sportdeutschland.py index 057ef5251..2f57f5b7c 100644 --- a/youtube_dl/extractor/sportdeutschland.py +++ b/youtube_dl/extractor/sportdeutschland.py @@ -4,8 +4,10 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urllib_request, +) +from ..utils import (      parse_iso8601,  ) diff --git a/youtube_dl/extractor/streamcloud.py b/youtube_dl/extractor/streamcloud.py index c1178f26d..38176498d 100644 --- a/youtube_dl/extractor/streamcloud.py +++ b/youtube_dl/extractor/streamcloud.py @@ -5,7 +5,7 @@ import re  import time  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urllib_parse,      compat_urllib_request,  ) diff --git a/youtube_dl/extractor/streamcz.py b/youtube_dl/extractor/streamcz.py index 778704839..c3ceb5f76 100644 --- a/youtube_dl/extractor/streamcz.py +++ b/youtube_dl/extractor/streamcz.py @@ -1,13 +1,7 @@  # -*- coding: utf-8 -*-  from __future__ import unicode_literals -import re -import json -  from .common import InfoExtractor -from ..compat import ( -    compat_str, -)  from ..utils import (      int_or_none,  ) diff --git a/youtube_dl/extractor/tapely.py b/youtube_dl/extractor/tapely.py index 283e11350..f1f43d0a7 100644 --- a/youtube_dl/extractor/tapely.py +++ b/youtube_dl/extractor/tapely.py @@ -4,10 +4,12 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor +from ..compat import ( +    compat_urllib_request, +)  from ..utils import ( -    ExtractorError,      clean_html, -    compat_urllib_request, +    ExtractorError,      float_or_none,      parse_iso8601,  ) diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index 72160503c..944177426 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -5,7 +5,7 @@ import re  from .subtitles import SubtitlesInfoExtractor -from ..utils import ( +from ..compat import (      compat_str,  ) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index e2653d62d..af6ef0033 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -4,8 +4,10 @@ import re  import json  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_str, +) +from ..utils import (      determine_ext,      ExtractorError,      xpath_with_ns, diff --git a/youtube_dl/extractor/tlc.py b/youtube_dl/extractor/tlc.py index 66d159e99..9f9e388c5 100644 --- a/youtube_dl/extractor/tlc.py +++ b/youtube_dl/extractor/tlc.py @@ -5,7 +5,7 @@ import re  from .common import InfoExtractor  from .brightcove import BrightcoveIE  from .discovery import DiscoveryIE -from ..utils import compat_urlparse +from ..compat import compat_urlparse  class TlcIE(DiscoveryIE): diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py index 64a1e9030..d73ad3762 100644 --- a/youtube_dl/extractor/tube8.py +++ b/youtube_dl/extractor/tube8.py @@ -4,9 +4,11 @@ import json  import re  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urllib_parse_urlparse,      compat_urllib_request, +) +from ..utils import (      int_or_none,      str_to_int,  ) diff --git a/youtube_dl/extractor/tutv.py b/youtube_dl/extractor/tutv.py index d516b6427..4de0aac52 100644 --- a/youtube_dl/extractor/tutv.py +++ b/youtube_dl/extractor/tutv.py @@ -1,10 +1,9 @@  from __future__ import unicode_literals  import base64 -import re  from .common import InfoExtractor -from ..utils import compat_parse_qs +from ..compat import compat_parse_qs  class TutvIE(InfoExtractor): @@ -20,10 +19,9 @@ class TutvIE(InfoExtractor):      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') - +        video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id) +          internal_id = self._search_regex(r'codVideo=([0-9]+)', webpage, 'internal video ID')          data_content = self._download_webpage( diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 397d167e8..715f9930c 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -5,9 +5,11 @@ import itertools  import re  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urllib_parse,      compat_urllib_request, +) +from ..utils import (      ExtractorError,      parse_iso8601,  ) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 5271611ac..4667ed83b 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -3,9 +3,11 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urllib_parse,      compat_urllib_request, +) +from ..utils import (      ExtractorError,  ) diff --git a/youtube_dl/extractor/urort.py b/youtube_dl/extractor/urort.py index 249a34c86..8872cfcb2 100644 --- a/youtube_dl/extractor/urort.py +++ b/youtube_dl/extractor/urort.py @@ -1,11 +1,11 @@  # coding: utf-8  from __future__ import unicode_literals -import re -  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urllib_parse, +) +from ..utils import (      unified_strdate,  ) diff --git a/youtube_dl/extractor/ustream.py b/youtube_dl/extractor/ustream.py index 53dc3a496..68d03b999 100644 --- a/youtube_dl/extractor/ustream.py +++ b/youtube_dl/extractor/ustream.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urlparse,  ) diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py index 455b6d9da..dd026748d 100644 --- a/youtube_dl/extractor/vbox7.py +++ b/youtube_dl/extractor/vbox7.py @@ -1,19 +1,18 @@  # encoding: utf-8  from __future__ import unicode_literals -import re -  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urllib_parse,      compat_urllib_request, - +) +from ..utils import (      ExtractorError,  )  class Vbox7IE(InfoExtractor): -    _VALID_URL = r'http://(www\.)?vbox7\.com/play:(?P<id>[^/]+)' +    _VALID_URL = r'http://(?:www\.)?vbox7\.com/play:(?P<id>[^/]+)'      _TEST = {          'url': 'http://vbox7.com/play:249bb972c2',          'md5': '99f65c0c9ef9b682b97313e052734c3f', @@ -25,8 +24,7 @@ class Vbox7IE(InfoExtractor):      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') +        video_id = self._match_id(url)          redirect_page, urlh = self._download_webpage_handle(url, video_id)          new_location = self._search_regex(r'window\.location = \'(.*)\';', diff --git a/youtube_dl/extractor/veehd.py b/youtube_dl/extractor/veehd.py index 94647d1c8..815f58468 100644 --- a/youtube_dl/extractor/veehd.py +++ b/youtube_dl/extractor/veehd.py @@ -4,10 +4,12 @@ import re  import json  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urlparse, -    get_element_by_id, +) +from ..utils import (      clean_html, +    get_element_by_id,  ) @@ -26,8 +28,7 @@ class VeeHDIE(InfoExtractor):      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') +        video_id = self._match_id(url)          # VeeHD seems to send garbage on the first request.          # See https://github.com/rg3/youtube-dl/issues/2102 diff --git a/youtube_dl/extractor/veoh.py b/youtube_dl/extractor/veoh.py index a7953a7e7..01e258e32 100644 --- a/youtube_dl/extractor/veoh.py +++ b/youtube_dl/extractor/veoh.py @@ -4,8 +4,10 @@ import re  import json  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urllib_request, +) +from ..utils import (      int_or_none,      ExtractorError,  ) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index c912c3cbe..43f6b029d 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -4,8 +4,10 @@ import re  import xml.etree.ElementTree  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urllib_request, +) +from ..utils import (      ExtractorError,  ) diff --git a/youtube_dl/extractor/videodetective.py b/youtube_dl/extractor/videodetective.py index ac6c25537..0ffc7ff7d 100644 --- a/youtube_dl/extractor/videodetective.py +++ b/youtube_dl/extractor/videodetective.py @@ -1,10 +1,8 @@  from __future__ import unicode_literals -import re -  from .common import InfoExtractor +from ..compat import compat_urlparse  from .internetvideoarchive import InternetVideoArchiveIE -from ..utils import compat_urlparse  class VideoDetectiveIE(InfoExtractor): @@ -17,13 +15,12 @@ class VideoDetectiveIE(InfoExtractor):              'ext': 'mp4',              'title': 'KICK-ASS 2',              'description': 'md5:65ba37ad619165afac7d432eaded6013', -            'duration': 135, +            'duration': 138,          },      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') +        video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id)          og_video = self._og_search_video_url(webpage)          query = compat_urlparse.urlparse(og_video).query diff --git a/youtube_dl/extractor/videomega.py b/youtube_dl/extractor/videomega.py index 29c4e0101..7a78f0d26 100644 --- a/youtube_dl/extractor/videomega.py +++ b/youtube_dl/extractor/videomega.py @@ -1,11 +1,11 @@  # coding: utf-8  from __future__ import unicode_literals -import re -  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urllib_parse, +) +from ..utils import (      remove_start,  ) @@ -27,9 +27,7 @@ class VideoMegaIE(InfoExtractor):      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') - +        video_id = self._match_id(url)          url = 'http://videomega.tv/iframe.php?ref={0:}'.format(video_id)          webpage = self._download_webpage(url, video_id) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index ca6b0d5b3..542e9198a 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -5,14 +5,17 @@ import re  import json  from .common import InfoExtractor +from ..compat import ( +    compat_str, +    compat_urllib_parse, +    compat_urllib_request, +)  from ..utils import (      ExtractorError, -    compat_urllib_request, -    compat_urllib_parse, -    compat_str, +    orderedSet,      unescapeHTML,      unified_strdate, -    orderedSet) +)  class VKIE(InfoExtractor): diff --git a/youtube_dl/extractor/vodlocker.py b/youtube_dl/extractor/vodlocker.py index affef6507..1c0966a79 100644 --- a/youtube_dl/extractor/vodlocker.py +++ b/youtube_dl/extractor/vodlocker.py @@ -2,8 +2,9 @@  from __future__ import unicode_literals  import re +  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urllib_parse,      compat_urllib_request,  ) @@ -24,8 +25,7 @@ class VodlockerIE(InfoExtractor):      }]      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') +        video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id)          fields = dict(re.findall(r'''(?x)<input\s+ diff --git a/youtube_dl/extractor/vube.py b/youtube_dl/extractor/vube.py index 1b2f731e9..405cb9db4 100644 --- a/youtube_dl/extractor/vube.py +++ b/youtube_dl/extractor/vube.py @@ -3,9 +3,11 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor +from ..compat import ( +    compat_str, +)  from ..utils import (      int_or_none, -    compat_str,      ExtractorError,  ) diff --git a/youtube_dl/extractor/vuclip.py b/youtube_dl/extractor/vuclip.py index ec3c010ad..c3fde53f5 100644 --- a/youtube_dl/extractor/vuclip.py +++ b/youtube_dl/extractor/vuclip.py @@ -3,8 +3,10 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urllib_parse_urlparse, +) +from ..utils import (      ExtractorError,      parse_duration,      qualities, @@ -25,10 +27,9 @@ class VuClipIE(InfoExtractor):      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') - +        video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id) +          ad_m = re.search(              r'''value="No.*?" onClick="location.href='([^"']+)'"''', webpage)          if ad_m: diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 93a6e6454..8e25ecf28 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -4,9 +4,11 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_parse_qs,      compat_urlparse, +) +from ..utils import (      determine_ext,      unified_strdate,  ) @@ -141,7 +143,6 @@ class WDRMobileIE(InfoExtractor):              'title': mobj.group('title'),              'age_limit': int(mobj.group('age_limit')),              'url': url, -            'ext': determine_ext(url),              'user_agent': 'mobile',          } diff --git a/youtube_dl/extractor/wistia.py b/youtube_dl/extractor/wistia.py index 748443f81..13a079151 100644 --- a/youtube_dl/extractor/wistia.py +++ b/youtube_dl/extractor/wistia.py @@ -1,9 +1,8 @@  from __future__ import unicode_literals -import re -  from .common import InfoExtractor -from ..utils import ExtractorError, compat_urllib_request +from ..compat import compat_urllib_request +from ..utils import ExtractorError  class WistiaIE(InfoExtractor): @@ -22,8 +21,7 @@ class WistiaIE(InfoExtractor):      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') +        video_id = self._match_id(url)          request = compat_urllib_request.Request(self._API_URL.format(video_id))          request.add_header('Referer', url)  # Some videos require this. diff --git a/youtube_dl/extractor/xbef.py b/youtube_dl/extractor/xbef.py index 1b4e88365..80c48c37d 100644 --- a/youtube_dl/extractor/xbef.py +++ b/youtube_dl/extractor/xbef.py @@ -1,9 +1,7 @@  from __future__ import unicode_literals -import re -  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urllib_parse,  ) @@ -23,10 +21,9 @@ class XBefIE(InfoExtractor):      }      def _real_extract(self, url): -        m = re.match(self._VALID_URL, url) -        video_id = m.group('id') - +        video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id) +          title = self._html_search_regex(              r'<h1[^>]*>(.*?)</h1>', webpage, 'title') diff --git a/youtube_dl/extractor/xnxx.py b/youtube_dl/extractor/xnxx.py index 53ed7ef5a..79ed6c744 100644 --- a/youtube_dl/extractor/xnxx.py +++ b/youtube_dl/extractor/xnxx.py @@ -1,10 +1,8 @@  # encoding: utf-8  from __future__ import unicode_literals -import re -  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urllib_parse,  ) @@ -23,10 +21,7 @@ class XNXXIE(InfoExtractor):      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') - -        # Get webpage content +        video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id)          video_url = self._search_regex(r'flv_url=(.*?)&', diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py index 38448e7c0..f9d98b83f 100644 --- a/youtube_dl/extractor/xtube.py +++ b/youtube_dl/extractor/xtube.py @@ -4,15 +4,17 @@ import re  import json  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urllib_request, +) +from ..utils import (      parse_duration,      str_to_int,  )  class XTubeIE(InfoExtractor): -    _VALID_URL = r'https?://(?:www\.)?(?P<url>xtube\.com/watch\.php\?v=(?P<videoid>[^/?&]+))' +    _VALID_URL = r'https?://(?:www\.)?(?P<url>xtube\.com/watch\.php\?v=(?P<id>[^/?&]+))'      _TEST = {          'url': 'http://www.xtube.com/watch.php?v=kVTUy_G222_',          'md5': '092fbdd3cbe292c920ef6fc6a8a9cdab', @@ -29,7 +31,7 @@ class XTubeIE(InfoExtractor):      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('videoid') +        video_id = mobj.group('id')          url = 'http://www.' + mobj.group('url')          req = compat_urllib_request.Request(url) diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py index 7e0044824..2a45dc574 100644 --- a/youtube_dl/extractor/xvideos.py +++ b/youtube_dl/extractor/xvideos.py @@ -3,15 +3,17 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urllib_parse, -    ExtractorError, +) +from ..utils import (      clean_html, +    ExtractorError,  )  class XVideosIE(InfoExtractor): -    _VALID_URL = r'^(?:https?://)?(?:www\.)?xvideos\.com/video([0-9]+)(?:.*)' +    _VALID_URL = r'https?://(?:www\.)?xvideos\.com/video(?P<id>[0-9]+)(?:.*)'      _TEST = {          'url': 'http://www.xvideos.com/video4588838/biker_takes_his_girl',          'md5': '4b46ae6ea5e6e9086e714d883313c0c9', @@ -24,37 +26,25 @@ class XVideosIE(InfoExtractor):      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group(1) - +        video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id) -        self.report_extraction(video_id) -          mobj = re.search(r'<h1 class="inlineError">(.+?)</h1>', webpage)          if mobj:              raise ExtractorError('%s said: %s' % (self.IE_NAME, clean_html(mobj.group(1))), expected=True) -        # Extract video URL          video_url = compat_urllib_parse.unquote(              self._search_regex(r'flv_url=(.+?)&', webpage, 'video URL')) - -        # Extract title          video_title = self._html_search_regex(              r'<title>(.*?)\s+-\s+XVID', webpage, 'title') - -        # Extract video thumbnail          video_thumbnail = self._search_regex(              r'url_bigthumb=(.+?)&', webpage, 'thumbnail', fatal=False)          return {              'id': video_id,              'url': video_url, -            'uploader': None, -            'upload_date': None,              'title': video_title,              'ext': 'flv',              'thumbnail': video_thumbnail, -            'description': None,              'age_limit': 18,          } diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index 0fdb12243..031226f27 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -6,11 +6,13 @@ import json  import re  from .common import InfoExtractor, SearchInfoExtractor -from ..utils import ( -    ExtractorError, +from ..compat import (      compat_urllib_parse,      compat_urlparse, +) +from ..utils import (      clean_html, +    ExtractorError,      int_or_none,  ) diff --git a/youtube_dl/extractor/ynet.py b/youtube_dl/extractor/ynet.py index 7b621a9e3..894678a23 100644 --- a/youtube_dl/extractor/ynet.py +++ b/youtube_dl/extractor/ynet.py @@ -5,7 +5,7 @@ import re  import json  from .common import InfoExtractor -from ..utils import compat_urllib_parse +from ..compat import compat_urllib_parse  class YnetIE(InfoExtractor): diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index d9c06a2ee..107c9ac36 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -6,10 +6,11 @@ import re  import sys  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_urllib_parse_urlparse,      compat_urllib_request, - +) +from ..utils import (      ExtractorError,      unescapeHTML,      unified_strdate, diff --git a/youtube_dl/swfinterp.py b/youtube_dl/swfinterp.py index 2bd264b30..e60505ace 100644 --- a/youtube_dl/swfinterp.py +++ b/youtube_dl/swfinterp.py @@ -4,8 +4,8 @@ import collections  import io  import zlib +from .compat import compat_str  from .utils import ( -    compat_str,      ExtractorError,      struct_unpack,  ) | 
