aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2017-02-19 03:53:23 +0700
committerSergey M <dstftw@gmail.com>2017-02-19 05:10:08 +0800
commit4248dad92bd87650c791194276296b148f668e68 (patch)
treee4c26e41802f4119b67830a61976c39c494b377e /youtube_dl/extractor
parent0a840f584c3f1fedb6957c05587dec697143f2d5 (diff)
Improve geo bypass mechanism
* Rename options to preffixly match with --geo-verification-proxy * Introduce _GEO_COUNTRIES for extractors * Implement faking IP right away for sites with known geo restriction
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/common.py57
-rw-r--r--youtube_dl/extractor/dramafever.py3
-rw-r--r--youtube_dl/extractor/go.py3
-rw-r--r--youtube_dl/extractor/itv.py4
-rw-r--r--youtube_dl/extractor/nrk.py4
-rw-r--r--youtube_dl/extractor/ondemandkorea.py3
-rw-r--r--youtube_dl/extractor/pbs.py5
-rw-r--r--youtube_dl/extractor/srgssr.py6
-rw-r--r--youtube_dl/extractor/svt.py4
-rw-r--r--youtube_dl/extractor/vbox7.py3
-rw-r--r--youtube_dl/extractor/vgtv.py5
-rw-r--r--youtube_dl/extractor/viki.py2
12 files changed, 70 insertions, 29 deletions
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 6eb6a25b8..272da74b6 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -323,10 +323,15 @@ class InfoExtractor(object):
_real_extract() methods and define a _VALID_URL regexp.
Probably, they should also be added to the list of extractors.
- _BYPASS_GEO attribute may be set to False in order to disable
+ _GEO_BYPASS attribute may be set to False in order to disable
geo restriction bypass mechanisms for a particular extractor.
Though it won't disable explicit geo restriction bypass based on
- country code provided with geo_bypass_country.
+ country code provided with geo_bypass_country. (experimental)
+
+ _GEO_COUNTRIES attribute may contain a list of presumably geo unrestricted
+ countries for this extractor. One of these countries will be used by
+ geo restriction bypass mechanism right away in order to bypass
+ geo restriction, of course, if the mechanism is not disabled. (experimental)
Finally, the _WORKING attribute should be set to False for broken IEs
in order to warn the users and skip the tests.
@@ -335,7 +340,8 @@ class InfoExtractor(object):
_ready = False
_downloader = None
_x_forwarded_for_ip = None
- _BYPASS_GEO = True
+ _GEO_BYPASS = True
+ _GEO_COUNTRIES = None
_WORKING = True
def __init__(self, downloader=None):
@@ -370,13 +376,27 @@ class InfoExtractor(object):
def initialize(self):
"""Initializes an instance (authentication, etc)."""
+ self.__initialize_geo_bypass()
+ if not self._ready:
+ self._real_initialize()
+ self._ready = True
+
+ def __initialize_geo_bypass(self):
if not self._x_forwarded_for_ip:
country_code = self._downloader.params.get('geo_bypass_country', None)
+ # If there is no explicit country for geo bypass specified and
+ # the extractor is known to be geo restricted let's fake IP
+ # as X-Forwarded-For right away.
+ if (not country_code and
+ self._GEO_BYPASS and
+ self._downloader.params.get('geo_bypass', True) and
+ self._GEO_COUNTRIES):
+ country_code = random.choice(self._GEO_COUNTRIES)
if country_code:
self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
- if not self._ready:
- self._real_initialize()
- self._ready = True
+ if self._downloader.params.get('verbose', False):
+ self._downloader.to_stdout(
+ '[debug] Using fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip)
def extract(self, url):
"""Extracts URL information and returns it in list of dicts."""
@@ -389,16 +409,8 @@ class InfoExtractor(object):
ie_result['__x_forwarded_for_ip'] = self._x_forwarded_for_ip
return ie_result
except GeoRestrictedError as e:
- if (not self._downloader.params.get('geo_bypass_country', None) and
- self._BYPASS_GEO and
- self._downloader.params.get('geo_bypass', True) and
- not self._x_forwarded_for_ip and
- e.countries):
- self._x_forwarded_for_ip = GeoUtils.random_ipv4(random.choice(e.countries))
- if self._x_forwarded_for_ip:
- self.report_warning(
- 'Video is geo restricted. Retrying extraction with fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip)
- continue
+ if self.__maybe_fake_ip_and_retry(e.countries):
+ continue
raise
except ExtractorError:
raise
@@ -407,6 +419,19 @@ class InfoExtractor(object):
except (KeyError, StopIteration) as e:
raise ExtractorError('An extractor error has occurred.', cause=e)
+ def __maybe_fake_ip_and_retry(self, countries):
+ if (not self._downloader.params.get('geo_bypass_country', None) and
+ self._GEO_BYPASS and
+ self._downloader.params.get('geo_bypass', True) and
+ not self._x_forwarded_for_ip and
+ countries):
+ self._x_forwarded_for_ip = GeoUtils.random_ipv4(random.choice(countries))
+ if self._x_forwarded_for_ip:
+ self.report_warning(
+ 'Video is geo restricted. Retrying extraction with fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip)
+ return True
+ return False
+
def set_downloader(self, downloader):
"""Sets the downloader for this IE."""
self._downloader = downloader
diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py
index 755db806a..e7abc8889 100644
--- a/youtube_dl/extractor/dramafever.py
+++ b/youtube_dl/extractor/dramafever.py
@@ -20,6 +20,7 @@ from ..utils import (
class DramaFeverBaseIE(AMPIE):
_LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
_NETRC_MACHINE = 'dramafever'
+ _GEO_COUNTRIES = ['US', 'CA']
_CONSUMER_SECRET = 'DA59dtVXYLxajktV'
@@ -118,7 +119,7 @@ class DramaFeverIE(DramaFeverBaseIE):
if isinstance(e.cause, compat_HTTPError):
self.raise_geo_restricted(
msg='Currently unavailable in your country',
- countries=['US', 'CA'])
+ countries=self._GEO_COUNTRIES)
raise
series_id, episode_number = video_id.split('.')
diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py
index ec902c670..b205bfc7c 100644
--- a/youtube_dl/extractor/go.py
+++ b/youtube_dl/extractor/go.py
@@ -37,6 +37,7 @@ class GoIE(AdobePassIE):
}
}
_VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P<id>\w+)|season-\d+/\d+-(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys())
+ _GEO_COUNTRIES = ['US']
_TESTS = [{
'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx',
'info_dict': {
@@ -104,7 +105,7 @@ class GoIE(AdobePassIE):
for error in errors:
if error.get('code') == 1002:
self.raise_geo_restricted(
- error['message'], countries=['US'])
+ error['message'], countries=self._GEO_COUNTRIES)
error_message = ', '.join([error['message'] for error in errors])
raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
asset_url += '?' + entitlement['uplynkData']['sessionKey']
diff --git a/youtube_dl/extractor/itv.py b/youtube_dl/extractor/itv.py
index aabde15f3..021c6b278 100644
--- a/youtube_dl/extractor/itv.py
+++ b/youtube_dl/extractor/itv.py
@@ -24,6 +24,7 @@ from ..utils import (
class ITVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-zA-Z]+)'
+ _GEO_COUNTRIES = ['GB']
_TEST = {
'url': 'http://www.itv.com/hub/mr-bean-animated-series/2a2936a0053',
'info_dict': {
@@ -101,7 +102,8 @@ class ITVIE(InfoExtractor):
fault_code = xpath_text(resp_env, './/faultcode')
fault_string = xpath_text(resp_env, './/faultstring')
if fault_code == 'InvalidGeoRegion':
- self.raise_geo_restricted(msg=fault_string, countries=['GB'])
+ self.raise_geo_restricted(
+ msg=fault_string, countries=self._GEO_COUNTRIES)
raise ExtractorError('%s said: %s' % (self.IE_NAME, fault_string))
title = xpath_text(playlist, 'EpisodeTitle', fatal=True)
video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True)
diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py
index 78ece33e1..13af9ed1f 100644
--- a/youtube_dl/extractor/nrk.py
+++ b/youtube_dl/extractor/nrk.py
@@ -14,6 +14,7 @@ from ..utils import (
class NRKBaseIE(InfoExtractor):
+ _GEO_COUNTRIES = ['NO']
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -93,7 +94,8 @@ class NRKBaseIE(InfoExtractor):
# Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked*
if 'IsGeoBlocked' in message_type:
self.raise_geo_restricted(
- msg=MESSAGES.get('ProgramIsGeoBlocked'), countries=['NO'])
+ msg=MESSAGES.get('ProgramIsGeoBlocked'),
+ countries=self._GEO_COUNTRIES)
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, MESSAGES.get(
message_type, message_type)),
diff --git a/youtube_dl/extractor/ondemandkorea.py b/youtube_dl/extractor/ondemandkorea.py
index 0c85d549e..df1ce3c1d 100644
--- a/youtube_dl/extractor/ondemandkorea.py
+++ b/youtube_dl/extractor/ondemandkorea.py
@@ -10,6 +10,7 @@ from ..utils import (
class OnDemandKoreaIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?P<id>[^/]+)\.html'
+ _GEO_COUNTRIES = ['US', 'CA']
_TEST = {
'url': 'http://www.ondemandkorea.com/ask-us-anything-e43.html',
'info_dict': {
@@ -36,7 +37,7 @@ class OnDemandKoreaIE(InfoExtractor):
if 'msg_block_01.png' in webpage:
self.raise_geo_restricted(
msg='This content is not available in your region',
- countries=['US', 'CA'])
+ countries=self._GEO_COUNTRIES)
if 'This video is only available to ODK PLUS members.' in webpage:
raise ExtractorError(
diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py
index 64f47bae3..3e51b4dd7 100644
--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@@ -193,6 +193,8 @@ class PBSIE(InfoExtractor):
)
''' % '|'.join(list(zip(*_STATIONS))[0])
+ _GEO_COUNTRIES = ['US']
+
_TESTS = [
{
'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/',
@@ -492,7 +494,8 @@ class PBSIE(InfoExtractor):
message = self._ERRORS.get(
redirect_info['http_code'], redirect_info['message'])
if redirect_info['http_code'] == 403:
- self.raise_geo_restricted(msg=message, countries=['US'])
+ self.raise_geo_restricted(
+ msg=message, countries=self._GEO_COUNTRIES)
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, message), expected=True)
diff --git a/youtube_dl/extractor/srgssr.py b/youtube_dl/extractor/srgssr.py
index a35a0a538..bb73eb1d5 100644
--- a/youtube_dl/extractor/srgssr.py
+++ b/youtube_dl/extractor/srgssr.py
@@ -14,7 +14,8 @@ from ..utils import (
class SRGSSRIE(InfoExtractor):
_VALID_URL = r'(?:https?://tp\.srgssr\.ch/p(?:/[^/]+)+\?urn=urn|srgssr):(?P<bu>srf|rts|rsi|rtr|swi):(?:[^:]+:)?(?P<type>video|audio):(?P<id>[0-9a-f\-]{36}|\d+)'
- _BYPASS_GEO = False
+ _GEO_BYPASS = False
+ _GEO_COUNTRIES = ['CH']
_ERRORS = {
'AGERATING12': 'To protect children under the age of 12, this video is only available between 8 p.m. and 6 a.m.',
@@ -43,7 +44,8 @@ class SRGSSRIE(InfoExtractor):
if media_data.get('block') and media_data['block'] in self._ERRORS:
message = self._ERRORS[media_data['block']]
if media_data['block'] == 'GEOBLOCK':
- self.raise_geo_restricted(msg=message, countries=['CH'])
+ self.raise_geo_restricted(
+ msg=message, countries=self._GEO_COUNTRIES)
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, message), expected=True)
diff --git a/youtube_dl/extractor/svt.py b/youtube_dl/extractor/svt.py
index f2a2200bf..9e2c9fcc6 100644
--- a/youtube_dl/extractor/svt.py
+++ b/youtube_dl/extractor/svt.py
@@ -13,6 +13,7 @@ from ..utils import (
class SVTBaseIE(InfoExtractor):
+ _GEO_COUNTRIES = ['SE']
def _extract_video(self, video_info, video_id):
formats = []
for vr in video_info['videoReferences']:
@@ -39,7 +40,8 @@ class SVTBaseIE(InfoExtractor):
})
if not formats and video_info.get('rights', {}).get('geoBlockedSweden'):
self.raise_geo_restricted(
- 'This video is only available in Sweden', countries=['SE'])
+ 'This video is only available in Sweden',
+ countries=self._GEO_COUNTRIES)
self._sort_formats(formats)
subtitles = {}
diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py
index f86d804c1..8152acefd 100644
--- a/youtube_dl/extractor/vbox7.py
+++ b/youtube_dl/extractor/vbox7.py
@@ -20,6 +20,7 @@ class Vbox7IE(InfoExtractor):
)
(?P<id>[\da-fA-F]+)
'''
+ _GEO_COUNTRIES = ['BG']
_TESTS = [{
'url': 'http://vbox7.com/play:0946fff23c',
'md5': 'a60f9ab3a3a2f013ef9a967d5f7be5bf',
@@ -78,7 +79,7 @@ class Vbox7IE(InfoExtractor):
video_url = video['src']
if '/na.mp4' in video_url:
- self.raise_geo_restricted(countries=['BG'])
+ self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
uploader = video.get('uploader')
diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py
index 1709fd6bb..0f8c156a7 100644
--- a/youtube_dl/extractor/vgtv.py
+++ b/youtube_dl/extractor/vgtv.py
@@ -14,7 +14,7 @@ from ..utils import (
class VGTVIE(XstreamIE):
IE_DESC = 'VGTV, BTTV, FTV, Aftenposten and Aftonbladet'
- _BYPASS_GEO = False
+ _GEO_BYPASS = False
_HOST_TO_APPNAME = {
'vgtv.no': 'vgtv',
@@ -218,7 +218,8 @@ class VGTVIE(XstreamIE):
properties = try_get(
data, lambda x: x['streamConfiguration']['properties'], list)
if properties and 'geoblocked' in properties:
- raise self.raise_geo_restricted(countries=['NO'])
+ raise self.raise_geo_restricted(
+ countries=[host.rpartition('.')[-1].partition('/')[0].upper()])
self._sort_formats(info['formats'])
diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py
index 68a74e246..e9c8bf824 100644
--- a/youtube_dl/extractor/viki.py
+++ b/youtube_dl/extractor/viki.py
@@ -27,7 +27,7 @@ class VikiBaseIE(InfoExtractor):
_APP_VERSION = '2.2.5.1428709186'
_APP_SECRET = '-$iJ}@p7!G@SyU/je1bEyWg}upLu-6V6-Lg9VD(]siH,r.,m-r|ulZ,U4LC/SeR)'
- _BYPASS_GEO = False
+ _GEO_BYPASS = False
_NETRC_MACHINE = 'viki'
_token = None