aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2017-02-04 18:49:58 +0700
committerSergey M <dstftw@gmail.com>2017-02-19 05:10:08 +0800
commit773f291dcbce486fefe24e1abd29735d374d0a9e (patch)
tree0485bd4536891278d6928c66dc59cfda21664ee1
parentbf5b9d859a1f2a68fda0dc57eb839448c7571dfa (diff)
downloadyoutube-dl-773f291dcbce486fefe24e1abd29735d374d0a9e.tar.xz
Add experimental geo restriction bypass mechanism
Based on faking X-Forwarded-For HTTP header
-rwxr-xr-xyoutube_dl/YoutubeDL.py17
-rw-r--r--youtube_dl/__init__.py2
-rw-r--r--youtube_dl/extractor/common.py48
-rw-r--r--youtube_dl/options.py12
-rw-r--r--youtube_dl/utils.py267
5 files changed, 340 insertions, 6 deletions
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index a7bf5a1b0..ebace6b57 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -56,6 +56,8 @@ from .utils import (
ExtractorError,
format_bytes,
formatSeconds,
+ GeoRestrictedError,
+ ISO3166Utils,
locked_file,
make_HTTPS_handler,
MaxDownloadsReached,
@@ -272,6 +274,13 @@ class YoutubeDL(object):
If it returns None, the video is downloaded.
match_filter_func in utils.py is one example for this.
no_color: Do not emit color codes in output.
+ bypass_geo_restriction:
+ Bypass geographic restriction via faking X-Forwarded-For
+ HTTP header (experimental)
+ bypass_geo_restriction_as_country:
+ Two-letter ISO 3166-2 country code that will be used for
+ explicit geographic restriction bypassing via faking
+ X-Forwarded-For HTTP header (experimental)
The following options determine which downloader is picked:
external_downloader: Executable of the external downloader to call.
@@ -707,6 +716,14 @@ class YoutubeDL(object):
return self.process_ie_result(ie_result, download, extra_info)
else:
return ie_result
+ except GeoRestrictedError as e:
+ msg = e.msg
+ if e.countries:
+ msg += '\nThis video is available in %s.' % ', '.join(
+ map(ISO3166Utils.short2full, e.countries))
+ msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
+ self.report_error(msg)
+ break
except ExtractorError as e: # An error we somewhat expected
self.report_error(compat_str(e), e.format_traceback())
break
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 5c5b8094b..94f461a78 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -414,6 +414,8 @@ def _real_main(argv=None):
'cn_verification_proxy': opts.cn_verification_proxy,
'geo_verification_proxy': opts.geo_verification_proxy,
'config_location': opts.config_location,
+ 'bypass_geo_restriction': opts.bypass_geo_restriction,
+ 'bypass_geo_restriction_as_country': opts.bypass_geo_restriction_as_country,
}
with YoutubeDL(ydl_opts) as ydl:
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index f6ff56eda..96815099d 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -6,6 +6,7 @@ import hashlib
import json
import netrc
import os
+import random
import re
import socket
import sys
@@ -39,6 +40,8 @@ from ..utils import (
ExtractorError,
fix_xml_ampersands,
float_or_none,
+ GeoRestrictedError,
+ GeoUtils,
int_or_none,
js_to_json,
parse_iso8601,
@@ -320,17 +323,25 @@ class InfoExtractor(object):
_real_extract() methods and define a _VALID_URL regexp.
Probably, they should also be added to the list of extractors.
+ _BYPASS_GEO attribute may be set to False in order to disable
+ geo restriction bypass mechanisms for a particular extractor.
+ Though it won't disable explicit geo restriction bypass based on
+ country code provided with bypass_geo_restriction_as_country.
+
Finally, the _WORKING attribute should be set to False for broken IEs
in order to warn the users and skip the tests.
"""
_ready = False
_downloader = None
+ _x_forwarded_for_ip = None
+ _BYPASS_GEO = True
_WORKING = True
def __init__(self, downloader=None):
"""Constructor. Receives an optional downloader."""
self._ready = False
+ self._x_forwarded_for_ip = None
self.set_downloader(downloader)
@classmethod
@@ -359,6 +370,10 @@ class InfoExtractor(object):
def initialize(self):
"""Initializes an instance (authentication, etc)."""
+ if not self._x_forwarded_for_ip:
+ country_code = self._downloader.params.get('bypass_geo_restriction_as_country', None)
+ if country_code:
+ self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
if not self._ready:
self._real_initialize()
self._ready = True
@@ -366,8 +381,22 @@ class InfoExtractor(object):
def extract(self, url):
"""Extracts URL information and returns it in list of dicts."""
try:
- self.initialize()
- return self._real_extract(url)
+ for _ in range(2):
+ try:
+ self.initialize()
+ return self._real_extract(url)
+ except GeoRestrictedError as e:
+ if (not self._downloader.params.get('bypass_geo_restriction_as_country', None) and
+ self._BYPASS_GEO and
+ self._downloader.params.get('bypass_geo_restriction', True) and
+ not self._x_forwarded_for_ip and
+ e.countries):
+ self._x_forwarded_for_ip = GeoUtils.random_ipv4(random.choice(e.countries))
+ if self._x_forwarded_for_ip:
+ self.report_warning(
+ 'Video is geo restricted. Retrying extraction with fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip)
+ continue
+ raise
except ExtractorError:
raise
except compat_http_client.IncompleteRead as e:
@@ -434,6 +463,15 @@ class InfoExtractor(object):
if isinstance(url_or_request, (compat_str, str)):
url_or_request = url_or_request.partition('#')[0]
+ # Some sites check X-Forwarded-For HTTP header in order to figure out
+ # the origin of the client behind proxy. This allows bypassing geo
+ # restriction by faking this header's value to IP that belongs to some
+ # geo unrestricted country. We will do so once we encounter any
+ # geo restriction error.
+ if self._x_forwarded_for_ip:
+ if 'X-Forwarded-For' not in headers:
+ headers['X-Forwarded-For'] = self._x_forwarded_for_ip
+
urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query)
if urlh is False:
assert not fatal
@@ -609,10 +647,8 @@ class InfoExtractor(object):
expected=True)
@staticmethod
- def raise_geo_restricted(msg='This video is not available from your location due to geo restriction'):
- raise ExtractorError(
- '%s. You might want to use --proxy to workaround.' % msg,
- expected=True)
+ def raise_geo_restricted(msg='This video is not available from your location due to geo restriction', countries=None):
+ raise GeoRestrictedError(msg, countries=countries)
# Methods for following #608
@staticmethod
diff --git a/youtube_dl/options.py b/youtube_dl/options.py
index deff54324..2e194f6dc 100644
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@@ -549,6 +549,18 @@ def parseOpts(overrideArguments=None):
'Upper bound of a range for randomized sleep before each download '
'(maximum possible number of seconds to sleep). Must only be used '
'along with --min-sleep-interval.'))
+ workarounds.add_option(
+ '--bypass-geo',
+ action='store_true', dest='bypass_geo_restriction', default=True,
+ help='Bypass geographic restriction via faking X-Forwarded-For HTTP header (experimental)')
+ workarounds.add_option(
+ '--no-bypass-geo',
+ action='store_false', dest='bypass_geo_restriction', default=True,
+ help='Do not bypass geographic restriction via faking X-Forwarded-For HTTP header (experimental)')
+ workarounds.add_option(
+ '--bypass-geo-as-country', metavar='CODE',
+ dest='bypass_geo_restriction_as_country', default=None,
+ help='Force bypass geographic restriction with explicitly provided two-letter ISO 3166-2 country code (experimental)')
verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
verbosity.add_option(
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 3f9e592e3..4e76b6b7b 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -23,6 +23,7 @@ import operator
import os
import pipes
import platform
+import random
import re
import socket
import ssl
@@ -747,6 +748,18 @@ class RegexNotFoundError(ExtractorError):
pass
+class GeoRestrictedError(ExtractorError):
+ """Geographic restriction Error exception.
+
+ This exception may be thrown when a video is not available from your
+ geographic location due to geographic restrictions imposed by a website.
+ """
+ def __init__(self, msg, countries=None):
+ super(GeoRestrictedError, self).__init__(msg, expected=True)
+ self.msg = msg
+ self.countries = countries
+
+
class DownloadError(YoutubeDLError):
"""Download Error exception.
@@ -3027,6 +3040,260 @@ class ISO3166Utils(object):
return cls._country_map.get(code.upper())
+class GeoUtils(object):
+ # Major IPv4 address blocks per country
+ _country_ip_map = {
+ 'AD': '85.94.160.0/19',
+ 'AE': '94.200.0.0/13',
+ 'AF': '149.54.0.0/17',
+ 'AG': '209.59.64.0/18',
+ 'AI': '204.14.248.0/21',
+ 'AL': '46.99.0.0/16',
+ 'AM': '46.70.0.0/15',
+ 'AO': '105.168.0.0/13',
+ 'AP': '159.117.192.0/21',
+ 'AR': '181.0.0.0/12',
+ 'AS': '202.70.112.0/20',
+ 'AT': '84.112.0.0/13',
+ 'AU': '1.128.0.0/11',
+ 'AW': '181.41.0.0/18',
+ 'AZ': '5.191.0.0/16',
+ 'BA': '31.176.128.0/17',
+ 'BB': '65.48.128.0/17',
+ 'BD': '114.130.0.0/16',
+ 'BE': '57.0.0.0/8',
+ 'BF': '129.45.128.0/17',
+ 'BG': '95.42.0.0/15',
+ 'BH': '37.131.0.0/17',
+ 'BI': '154.117.192.0/18',
+ 'BJ': '137.255.0.0/16',
+ 'BL': '192.131.134.0/24',
+ 'BM': '196.12.64.0/18',
+ 'BN': '156.31.0.0/16',
+ 'BO': '161.56.0.0/16',
+ 'BQ': '161.0.80.0/20',
+ 'BR': '152.240.0.0/12',
+ 'BS': '24.51.64.0/18',
+ 'BT': '119.2.96.0/19',
+ 'BW': '168.167.0.0/16',
+ 'BY': '178.120.0.0/13',
+ 'BZ': '179.42.192.0/18',
+ 'CA': '99.224.0.0/11',
+ 'CD': '41.243.0.0/16',
+ 'CF': '196.32.200.0/21',
+ 'CG': '197.214.128.0/17',
+ 'CH': '85.0.0.0/13',
+ 'CI': '154.232.0.0/14',
+ 'CK': '202.65.32.0/19',
+ 'CL': '152.172.0.0/14',
+ 'CM': '165.210.0.0/15',
+ 'CN': '36.128.0.0/10',
+ 'CO': '181.240.0.0/12',
+ 'CR': '201.192.0.0/12',
+ 'CU': '152.206.0.0/15',
+ 'CV': '165.90.96.0/19',
+ 'CW': '190.88.128.0/17',
+ 'CY': '46.198.0.0/15',
+ 'CZ': '88.100.0.0/14',
+ 'DE': '53.0.0.0/8',
+ 'DJ': '197.241.0.0/17',
+ 'DK': '87.48.0.0/12',
+ 'DM': '192.243.48.0/20',
+ 'DO': '152.166.0.0/15',
+ 'DZ': '41.96.0.0/12',
+ 'EC': '186.68.0.0/15',
+ 'EE': '90.190.0.0/15',
+ 'EG': '156.160.0.0/11',
+ 'ER': '196.200.96.0/20',
+ 'ES': '88.0.0.0/11',
+ 'ET': '196.188.0.0/14',
+ 'EU': '2.16.0.0/13',
+ 'FI': '91.152.0.0/13',
+ 'FJ': '144.120.0.0/16',
+ 'FM': '119.252.112.0/20',
+ 'FO': '88.85.32.0/19',
+ 'FR': '90.0.0.0/9',
+ 'GA': '41.158.0.0/15',
+ 'GB': '25.0.0.0/8',
+ 'GD': '74.122.88.0/21',
+ 'GE': '31.146.0.0/16',
+ 'GF': '161.22.64.0/18',
+ 'GG': '62.68.160.0/19',
+ 'GH': '45.208.0.0/14',
+ 'GI': '85.115.128.0/19',
+ 'GL': '88.83.0.0/19',
+ 'GM': '160.182.0.0/15',
+ 'GN': '197.149.192.0/18',
+ 'GP': '104.250.0.0/19',
+ 'GQ': '105.235.224.0/20',
+ 'GR': '94.64.0.0/13',
+ 'GT': '168.234.0.0/16',
+ 'GU': '168.123.0.0/16',
+ 'GW': '197.214.80.0/20',
+ 'GY': '181.41.64.0/18',
+ 'HK': '113.252.0.0/14',
+ 'HN': '181.210.0.0/16',
+ 'HR': '93.136.0.0/13',
+ 'HT': '148.102.128.0/17',
+ 'HU': '84.0.0.0/14',
+ 'ID': '39.192.0.0/10',
+ 'IE': '87.32.0.0/12',
+ 'IL': '79.176.0.0/13',
+ 'IM': '5.62.80.0/20',
+ 'IN': '117.192.0.0/10',
+ 'IO': '203.83.48.0/21',
+ 'IQ': '37.236.0.0/14',
+ 'IR': '2.176.0.0/12',
+ 'IS': '82.221.0.0/16',
+ 'IT': '79.0.0.0/10',
+ 'JE': '87.244.64.0/18',
+ 'JM': '72.27.0.0/17',
+ 'JO': '176.29.0.0/16',
+ 'JP': '126.0.0.0/8',
+ 'KE': '105.48.0.0/12',
+ 'KG': '158.181.128.0/17',
+ 'KH': '36.37.128.0/17',
+ 'KI': '103.25.140.0/22',
+ 'KM': '197.255.224.0/20',
+ 'KN': '198.32.32.0/19',
+ 'KP': '175.45.176.0/22',
+ 'KR': '175.192.0.0/10',
+ 'KW': '37.36.0.0/14',
+ 'KY': '64.96.0.0/15',
+ 'KZ': '2.72.0.0/13',
+ 'LA': '115.84.64.0/18',
+ 'LB': '178.135.0.0/16',
+ 'LC': '192.147.231.0/24',
+ 'LI': '82.117.0.0/19',
+ 'LK': '112.134.0.0/15',
+ 'LR': '41.86.0.0/19',
+ 'LS': '129.232.0.0/17',
+ 'LT': '78.56.0.0/13',
+ 'LU': '188.42.0.0/16',
+ 'LV': '46.109.0.0/16',
+ 'LY': '41.252.0.0/14',
+ 'MA': '105.128.0.0/11',
+ 'MC': '88.209.64.0/18',
+ 'MD': '37.246.0.0/16',
+ 'ME': '178.175.0.0/17',
+ 'MF': '74.112.232.0/21',
+ 'MG': '154.126.0.0/17',
+ 'MH': '117.103.88.0/21',
+ 'MK': '77.28.0.0/15',
+ 'ML': '154.118.128.0/18',
+ 'MM': '37.111.0.0/17',
+ 'MN': '49.0.128.0/17',
+ 'MO': '60.246.0.0/16',
+ 'MP': '202.88.64.0/20',
+ 'MQ': '109.203.224.0/19',
+ 'MR': '41.188.64.0/18',
+ 'MS': '208.90.112.0/22',
+ 'MT': '46.11.0.0/16',
+ 'MU': '105.16.0.0/12',
+ 'MV': '27.114.128.0/18',
+ 'MW': '105.234.0.0/16',
+ 'MX': '187.192.0.0/11',
+ 'MY': '175.136.0.0/13',
+ 'MZ': '197.218.0.0/15',
+ 'NA': '41.182.0.0/16',
+ 'NC': '101.101.0.0/18',
+ 'NE': '197.214.0.0/18',
+ 'NF': '203.17.240.0/22',
+ 'NG': '105.112.0.0/12',
+ 'NI': '186.76.0.0/15',
+ 'NL': '145.96.0.0/11',
+ 'NO': '84.208.0.0/13',
+ 'NP': '36.252.0.0/15',
+ 'NR': '203.98.224.0/19',
+ 'NU': '49.156.48.0/22',
+ 'NZ': '49.224.0.0/14',
+ 'OM': '5.36.0.0/15',
+ 'PA': '186.72.0.0/15',
+ 'PE': '186.160.0.0/14',
+ 'PF': '123.50.64.0/18',
+ 'PG': '124.240.192.0/19',
+ 'PH': '49.144.0.0/13',
+ 'PK': '39.32.0.0/11',
+ 'PL': '83.0.0.0/11',
+ 'PM': '70.36.0.0/20',
+ 'PR': '66.50.0.0/16',
+ 'PS': '188.161.0.0/16',
+ 'PT': '85.240.0.0/13',
+ 'PW': '202.124.224.0/20',
+ 'PY': '181.120.0.0/14',
+ 'QA': '37.210.0.0/15',
+ 'RE': '139.26.0.0/16',
+ 'RO': '79.112.0.0/13',
+ 'RS': '178.220.0.0/14',
+ 'RU': '5.136.0.0/13',
+ 'RW': '105.178.0.0/15',
+ 'SA': '188.48.0.0/13',
+ 'SB': '202.1.160.0/19',
+ 'SC': '154.192.0.0/11',
+ 'SD': '154.96.0.0/13',
+ 'SE': '78.64.0.0/12',
+ 'SG': '152.56.0.0/14',
+ 'SI': '188.196.0.0/14',
+ 'SK': '78.98.0.0/15',
+ 'SL': '197.215.0.0/17',
+ 'SM': '89.186.32.0/19',
+ 'SN': '41.82.0.0/15',
+ 'SO': '197.220.64.0/19',
+ 'SR': '186.179.128.0/17',
+ 'SS': '105.235.208.0/21',
+ 'ST': '197.159.160.0/19',
+ 'SV': '168.243.0.0/16',
+ 'SX': '190.102.0.0/20',
+ 'SY': '5.0.0.0/16',
+ 'SZ': '41.84.224.0/19',
+ 'TC': '65.255.48.0/20',
+ 'TD': '154.68.128.0/19',
+ 'TG': '196.168.0.0/14',
+ 'TH': '171.96.0.0/13',
+ 'TJ': '85.9.128.0/18',
+ 'TK': '27.96.24.0/21',
+ 'TL': '180.189.160.0/20',
+ 'TM': '95.85.96.0/19',
+ 'TN': '197.0.0.0/11',
+ 'TO': '175.176.144.0/21',
+ 'TR': '78.160.0.0/11',
+ 'TT': '186.44.0.0/15',
+ 'TV': '202.2.96.0/19',
+ 'TW': '120.96.0.0/11',
+ 'TZ': '156.156.0.0/14',
+ 'UA': '93.72.0.0/13',
+ 'UG': '154.224.0.0/13',
+ 'US': '3.0.0.0/8',
+ 'UY': '167.56.0.0/13',
+ 'UZ': '82.215.64.0/18',
+ 'VA': '212.77.0.0/19',
+ 'VC': '24.92.144.0/20',
+ 'VE': '186.88.0.0/13',
+ 'VG': '172.103.64.0/18',
+ 'VI': '146.226.0.0/16',
+ 'VN': '14.160.0.0/11',
+ 'VU': '202.80.32.0/20',
+ 'WF': '117.20.32.0/21',
+ 'WS': '202.4.32.0/19',
+ 'YE': '134.35.0.0/16',
+ 'YT': '41.242.116.0/22',
+ 'ZA': '41.0.0.0/11',
+ 'ZM': '165.56.0.0/13',
+ 'ZW': '41.85.192.0/19',
+ }
+
+ @classmethod
+ def random_ipv4(cls, code):
+ block = cls._country_ip_map.get(code.upper())
+ if not block:
+ return None
+ addr, preflen = block.split('/')
+ addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
+ addr_max = addr_min | (0xffffffff >> int(preflen))
+ return socket.inet_ntoa(
+ compat_struct_pack('!I', random.randint(addr_min, addr_max)))
+
+
class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
def __init__(self, proxies=None):
# Set default handlers