aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2018-05-02 07:18:01 +0700
committerSergey M․ <dstftw@gmail.com>2018-05-02 07:20:59 +0700
commit5f95927a62a533b9e616abb5f1481cedeaa16a4a (patch)
tree13e1cbddf07dd4259211ede5df0d2705604ca5b1 /youtube_dl
parenta93ce61bd5cbe7779e4eff0f8ab74a8a02211285 (diff)
Improve geo bypass mechanism
* Introduce geo bypass context * Add ability to bypass based on IP blocks in CIDR notation * Introduce --geo-bypass-ip-block
Diffstat (limited to 'youtube_dl')
-rwxr-xr-xyoutube_dl/YoutubeDL.py3
-rw-r--r--youtube_dl/__init__.py1
-rw-r--r--youtube_dl/extractor/anvato.py4
-rw-r--r--youtube_dl/extractor/brightcove.py5
-rw-r--r--youtube_dl/extractor/common.py97
-rw-r--r--youtube_dl/extractor/dplay.py4
-rw-r--r--youtube_dl/extractor/go.py2
-rw-r--r--youtube_dl/extractor/limelight.py4
-rw-r--r--youtube_dl/extractor/tvplay.py6
-rw-r--r--youtube_dl/options.py4
-rw-r--r--youtube_dl/utils.py11
11 files changed, 113 insertions, 28 deletions
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index ad3598805..f1a359011 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -286,6 +286,9 @@ class YoutubeDL(object):
Two-letter ISO 3166-2 country code that will be used for
explicit geographic restriction bypassing via faking
X-Forwarded-For HTTP header (experimental)
+ geo_bypass_ip_block:
+ IP range in CIDR notation that will be used similarly to
+ geo_bypass_country (experimental)
The following options determine which downloader is picked:
external_downloader: Executable of the external downloader to call.
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 9bb952457..ba435ea42 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -430,6 +430,7 @@ def _real_main(argv=None):
'config_location': opts.config_location,
'geo_bypass': opts.geo_bypass,
'geo_bypass_country': opts.geo_bypass_country,
+ 'geo_bypass_ip_block': opts.geo_bypass_ip_block,
# just for deprecation check
'autonumber': opts.autonumber if opts.autonumber is True else None,
'usetitle': opts.usetitle if opts.usetitle is True else None,
diff --git a/youtube_dl/extractor/anvato.py b/youtube_dl/extractor/anvato.py
index 7a29cd2c6..f6a78eb5d 100644
--- a/youtube_dl/extractor/anvato.py
+++ b/youtube_dl/extractor/anvato.py
@@ -277,7 +277,9 @@ class AnvatoIE(InfoExtractor):
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
- self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
+ self._initialize_geo_bypass({
+ 'countries': smuggled_data.get('geo_countries'),
+ })
mobj = re.match(self._VALID_URL, url)
access_key, video_id = mobj.group('access_key_or_mcp', 'id')
diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py
index 0e4eaef65..ab62e54d6 100644
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -669,7 +669,10 @@ class BrightcoveNewIE(AdobePassIE):
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
- self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
+ self._initialize_geo_bypass({
+ 'countries': smuggled_data.get('geo_countries'),
+ 'ip_blocks': smuggled_data.get('geo_ip_blocks'),
+ })
account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups()
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index a9939b0fd..3ef5af13c 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -346,6 +346,11 @@ class InfoExtractor(object):
geo restriction bypass mechanism right away in order to bypass
geo restriction, of course, if the mechanism is not disabled. (experimental)
+ _GEO_IP_BLOCKS attribute may contain a list of presumably geo unrestricted
+ IP blocks in CIDR notation for this extractor. One of these IP blocks
+ will be used by geo restriction bypass mechanism similarly
+ to _GEO_COUNTRIES. (experimental)
+
NB: both these geo attributes are experimental and may change in future
or be completely removed.
@@ -358,6 +363,7 @@ class InfoExtractor(object):
_x_forwarded_for_ip = None
_GEO_BYPASS = True
_GEO_COUNTRIES = None
+ _GEO_IP_BLOCKS = None
_WORKING = True
def __init__(self, downloader=None):
@@ -392,12 +398,15 @@ class InfoExtractor(object):
def initialize(self):
"""Initializes an instance (authentication, etc)."""
- self._initialize_geo_bypass(self._GEO_COUNTRIES)
+ self._initialize_geo_bypass({
+ 'countries': self._GEO_COUNTRIES,
+ 'ip_blocks': self._GEO_IP_BLOCKS,
+ })
if not self._ready:
self._real_initialize()
self._ready = True
- def _initialize_geo_bypass(self, countries):
+ def _initialize_geo_bypass(self, geo_bypass_context):
"""
Initialize geo restriction bypass mechanism.
@@ -408,28 +417,82 @@ class InfoExtractor(object):
HTTP requests.
This method will be used for initial geo bypass mechanism initialization
- during the instance initialization with _GEO_COUNTRIES.
+ during the instance initialization with _GEO_COUNTRIES and
+ _GEO_IP_BLOCKS.
- You may also manually call it from extractor's code if geo countries
+ You may also manually call it from extractor's code if geo bypass
information is not available beforehand (e.g. obtained during
- extraction) or due to some another reason.
+ extraction) or due to some other reason. In this case you should pass
+ this information in geo bypass context passed as first argument. It may
+ contain following fields:
+
+ countries: List of geo unrestricted countries (similar
+ to _GEO_COUNTRIES)
+ ip_blocks: List of geo unrestricted IP blocks in CIDR notation
+ (similar to _GEO_IP_BLOCKS)
+
"""
if not self._x_forwarded_for_ip:
- country_code = self._downloader.params.get('geo_bypass_country', None)
- # If there is no explicit country for geo bypass specified and
- # the extractor is known to be geo restricted let's fake IP
- # as X-Forwarded-For right away.
- if (not country_code and
- self._GEO_BYPASS and
- self._downloader.params.get('geo_bypass', True) and
- countries):
- country_code = random.choice(countries)
- if country_code:
- self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
+
+ # Geo bypass mechanism is explicitly disabled by user
+ if not self._downloader.params.get('geo_bypass', True):
+ return
+
+ if not geo_bypass_context:
+ geo_bypass_context = {}
+
+ # Backward compatibility: previously _initialize_geo_bypass
+ # expected a list of countries, some 3rd party code may still use
+ # it this way
+ if isinstance(geo_bypass_context, (list, tuple)):
+ geo_bypass_context = {
+ 'countries': geo_bypass_context,
+ }
+
+ # The whole point of geo bypass mechanism is to fake IP
+ # as X-Forwarded-For HTTP header based on some IP block or
+ # country code.
+
+ # Path 1: bypassing based on IP block in CIDR notation
+
+ # Explicit IP block specified by user, use it right away
+ # regardless of whether extractor is geo bypassable or not
+ ip_block = self._downloader.params.get('geo_bypass_ip_block', None)
+
+ # Otherwise use random IP block from geo bypass context but only
+ # if extractor is known as geo bypassable
+ if not ip_block:
+ ip_blocks = geo_bypass_context.get('ip_blocks')
+ if self._GEO_BYPASS and ip_blocks:
+ ip_block = random.choice(ip_blocks)
+
+ if ip_block:
+ self._x_forwarded_for_ip = GeoUtils.random_ipv4(ip_block)
+ if self._downloader.params.get('verbose', False):
+ self._downloader.to_screen(
+ '[debug] Using fake IP %s as X-Forwarded-For.'
+ % self._x_forwarded_for_ip)
+ return
+
+ # Path 2: bypassing based on country code
+
+ # Explicit country code specified by user, use it right away
+ # regardless of whether extractor is geo bypassable or not
+ country = self._downloader.params.get('geo_bypass_country', None)
+
+ # Otherwise use random country code from geo bypass context but
+ # only if extractor is known as geo bypassable
+ if not country:
+ countries = geo_bypass_context.get('countries')
+ if self._GEO_BYPASS and countries:
+ country = random.choice(countries)
+
+ if country:
+ self._x_forwarded_for_ip = GeoUtils.random_ipv4(country)
if self._downloader.params.get('verbose', False):
self._downloader.to_screen(
'[debug] Using fake IP %s (%s) as X-Forwarded-For.'
- % (self._x_forwarded_for_ip, country_code.upper()))
+ % (self._x_forwarded_for_ip, country.upper()))
def extract(self, url):
"""Extracts URL information and returns it in list of dicts."""
diff --git a/youtube_dl/extractor/dplay.py b/youtube_dl/extractor/dplay.py
index b73446773..8e0374320 100644
--- a/youtube_dl/extractor/dplay.py
+++ b/youtube_dl/extractor/dplay.py
@@ -102,7 +102,9 @@ class DPlayIE(InfoExtractor):
display_id = mobj.group('id')
domain = mobj.group('domain')
- self._initialize_geo_bypass([mobj.group('country').upper()])
+ self._initialize_geo_bypass({
+ 'countries': [mobj.group('country').upper()],
+ })
webpage = self._download_webpage(url, display_id)
diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py
index 9c7b1bd37..e781405f2 100644
--- a/youtube_dl/extractor/go.py
+++ b/youtube_dl/extractor/go.py
@@ -123,7 +123,7 @@ class GoIE(AdobePassIE):
'adobe_requestor_id': requestor_id,
})
else:
- self._initialize_geo_bypass(['US'])
+ self._initialize_geo_bypass({'countries': ['US']})
entitlement = self._download_json(
'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json',
video_id, data=urlencode_postdata(data))
diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py
index 2803d7e8d..729d8de50 100644
--- a/youtube_dl/extractor/limelight.py
+++ b/youtube_dl/extractor/limelight.py
@@ -282,7 +282,9 @@ class LimelightMediaIE(LimelightBaseIE):
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
video_id = self._match_id(url)
- self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
+ self._initialize_geo_bypass({
+ 'countries': smuggled_data.get('geo_countries'),
+ })
pc, mobile, metadata = self._extract(
video_id, 'getPlaylistByMediaId',
diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py
index 84597b55e..e09b5f804 100644
--- a/youtube_dl/extractor/tvplay.py
+++ b/youtube_dl/extractor/tvplay.py
@@ -227,14 +227,16 @@ class TVPlayIE(InfoExtractor):
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
- self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
+ self._initialize_geo_bypass({
+ 'countries': smuggled_data.get('geo_countries'),
+ })
video_id = self._match_id(url)
geo_country = self._search_regex(
r'https?://[^/]+\.([a-z]{2})', url,
'geo country', default=None)
if geo_country:
- self._initialize_geo_bypass([geo_country.upper()])
+ self._initialize_geo_bypass({'countries': [geo_country.upper()]})
video = self._download_json(
'http://playapi.mtgx.tv/v3/videos/%s' % video_id, video_id, 'Downloading video JSON')
diff --git a/youtube_dl/options.py b/youtube_dl/options.py
index 3e4ac03a2..f3f8f23b6 100644
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@@ -249,6 +249,10 @@ def parseOpts(overrideArguments=None):
'--geo-bypass-country', metavar='CODE',
dest='geo_bypass_country', default=None,
help='Force bypass geographic restriction with explicitly provided two-letter ISO 3166-2 country code (experimental)')
+ geo.add_option(
+ '--geo-bypass-ip-block', metavar='IP_BLOCK',
+ dest='geo_bypass_ip_block', default=None,
+ help='Force bypass geographic restriction with explicitly provided IP block in CIDR notation (experimental)')
selection = optparse.OptionGroup(parser, 'Video Selection')
selection.add_option(
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index b460393bf..f9ca63c58 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -3534,10 +3534,13 @@ class GeoUtils(object):
}
@classmethod
- def random_ipv4(cls, code):
- block = cls._country_ip_map.get(code.upper())
- if not block:
- return None
+ def random_ipv4(cls, code_or_block):
+ if len(code_or_block) == 2:
+ block = cls._country_ip_map.get(code_or_block.upper())
+ if not block:
+ return None
+ else:
+ block = code_or_block
addr, preflen = block.split('/')
addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
addr_max = addr_min | (0xffffffff >> int(preflen))