aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFilippo Valsorda <filippo.valsorda@gmail.com>2013-10-28 01:50:17 -0400
committerFilippo Valsorda <filippo.valsorda@gmail.com>2013-10-28 01:50:17 -0400
commit750e9833b83c6e17a4efa8d5dac5b3cd848f4603 (patch)
treee9380854bb2d946aae957507cea63a09adc6d76d
parent82f0ac657c0399659863b0bdec3afea2020ca5a9 (diff)
downloadyoutube-dl-750e9833b83c6e17a4efa8d5dac5b3cd848f4603.tar.xz
Add the missing age_limit tags; added a devscript to do a superficial check for porn sites without the age_limit tag in the test
-rw-r--r--devscripts/check-porn.py39
-rw-r--r--youtube_dl/extractor/keezmovies.py5
-rw-r--r--youtube_dl/extractor/pornhub.py2
-rw-r--r--youtube_dl/extractor/pornotube.py3
-rw-r--r--youtube_dl/extractor/spankwire.py4
-rw-r--r--youtube_dl/extractor/tube8.py2
-rw-r--r--youtube_dl/extractor/youjizz.py8
7 files changed, 59 insertions, 4 deletions
diff --git a/devscripts/check-porn.py b/devscripts/check-porn.py
new file mode 100644
index 000000000..63401fe18
--- /dev/null
+++ b/devscripts/check-porn.py
@@ -0,0 +1,39 @@
+#!/usr/bin/env python
+
+"""
+This script employs a VERY basic heuristic ('porn' in webpage.lower()) to check
+if we are not 'age_limit' tagging some porn site
+"""
+
+# Allow direct execution
+import os
+import sys
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from test.helper import get_testcases
+from youtube_dl.utils import compat_urllib_request
+
+for test in get_testcases():
+ try:
+ webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read()
+ except:
+ print('\nFail: {0}'.format(test['name']))
+ continue
+
+ webpage = webpage.decode('utf8', 'replace')
+
+ if 'porn' in webpage.lower() and ('info_dict' not in test
+ or 'age_limit' not in test['info_dict']
+ or test['info_dict']['age_limit'] != 18):
+ print('\nPotential missing age_limit check: {0}'.format(test['name']))
+
+ elif 'porn' not in webpage.lower() and ('info_dict' in test and
+ 'age_limit' in test['info_dict'] and
+ test['info_dict']['age_limit'] == 18):
+ print('\nPotential false negative: {0}'.format(test['name']))
+
+ else:
+ sys.stdout.write('.')
+ sys.stdout.flush()
+
+print()
diff --git a/youtube_dl/extractor/keezmovies.py b/youtube_dl/extractor/keezmovies.py
index 23d5209d9..5e05900da 100644
--- a/youtube_dl/extractor/keezmovies.py
+++ b/youtube_dl/extractor/keezmovies.py
@@ -6,7 +6,6 @@ from ..utils import (
compat_urllib_parse_urlparse,
compat_urllib_request,
compat_urllib_parse,
- unescapeHTML,
)
from ..aes import (
aes_decrypt_text
@@ -20,6 +19,7 @@ class KeezMoviesIE(InfoExtractor):
u'md5': u'6e297b7e789329923fcf83abb67c9289',
u'info_dict': {
u"title": u"Petite Asian Lady Mai Playing In Bathtub",
+ u"age_limit": 18,
}
}
@@ -48,6 +48,8 @@ class KeezMoviesIE(InfoExtractor):
format = path.split('/')[4].split('_')[:2]
format = "-".join( format )
+ age_limit = self._rta_search(webpage)
+
return {
'id': video_id,
'title': video_title,
@@ -55,4 +57,5 @@ class KeezMoviesIE(InfoExtractor):
'ext': extension,
'format': format,
'format_id': format,
+ 'age_limit': age_limit,
}
diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py
index 3dbd2ab69..5e2454f1b 100644
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@@ -21,6 +21,7 @@ class PornHubIE(InfoExtractor):
u'info_dict': {
u"uploader": u"BABES-COM",
u"title": u"Seductive Indian beauty strips down and fingers her pink pussy",
+ u"age_limit": 18
}
}
@@ -64,4 +65,5 @@ class PornHubIE(InfoExtractor):
'title': video_title,
'thumbnail': thumbnail,
'formats': formats,
+ 'age_limit': 18,
}
diff --git a/youtube_dl/extractor/pornotube.py b/youtube_dl/extractor/pornotube.py
index 5d770ec28..35dc5a9ff 100644
--- a/youtube_dl/extractor/pornotube.py
+++ b/youtube_dl/extractor/pornotube.py
@@ -16,7 +16,8 @@ class PornotubeIE(InfoExtractor):
u'md5': u'374dd6dcedd24234453b295209aa69b6',
u'info_dict': {
u"upload_date": u"20090708",
- u"title": u"Marilyn-Monroe-Bathing"
+ u"title": u"Marilyn-Monroe-Bathing",
+ u"age_limit": 18
}
}
diff --git a/youtube_dl/extractor/spankwire.py b/youtube_dl/extractor/spankwire.py
index f0d5009c7..32df0a7fb 100644
--- a/youtube_dl/extractor/spankwire.py
+++ b/youtube_dl/extractor/spankwire.py
@@ -22,6 +22,7 @@ class SpankwireIE(InfoExtractor):
u"uploader": u"oreusz",
u"title": u"Buckcherry`s X Rated Music Video Crazy Bitch",
u"description": u"Crazy Bitch X rated music video.",
+ u"age_limit": 18,
}
}
@@ -60,6 +61,8 @@ class SpankwireIE(InfoExtractor):
})
formats.sort(key=lambda format: list(map(lambda s: s.zfill(6), format['format'].split('-'))))
+ age_limit = self._rta_search(webpage)
+
return {
'id': video_id,
'uploader': video_uploader,
@@ -67,4 +70,5 @@ class SpankwireIE(InfoExtractor):
'thumbnail': thumbnail,
'description': description,
'formats': formats,
+ 'age_limit': age_limit,
}
diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py
index ebc8c1f4f..aea9d9a24 100644
--- a/youtube_dl/extractor/tube8.py
+++ b/youtube_dl/extractor/tube8.py
@@ -22,6 +22,7 @@ class Tube8IE(InfoExtractor):
u"description": u"hot teen Kasia grinding",
u"uploader": u"unknown",
u"title": u"Kasia music video",
+ u"age_limit": 18,
}
}
@@ -60,4 +61,5 @@ class Tube8IE(InfoExtractor):
'ext': extension,
'format': format,
'format_id': format,
+ 'age_limit': 18,
}
diff --git a/youtube_dl/extractor/youjizz.py b/youtube_dl/extractor/youjizz.py
index 1265639e8..1fcc518ac 100644
--- a/youtube_dl/extractor/youjizz.py
+++ b/youtube_dl/extractor/youjizz.py
@@ -13,7 +13,8 @@ class YouJizzIE(InfoExtractor):
u'file': u'2189178.flv',
u'md5': u'07e15fa469ba384c7693fd246905547c',
u'info_dict': {
- u"title": u"Zeichentrick 1"
+ u"title": u"Zeichentrick 1",
+ u"age_limit": 18,
}
}
@@ -25,6 +26,8 @@ class YouJizzIE(InfoExtractor):
# Get webpage content
webpage = self._download_webpage(url, video_id)
+ age_limit = self._rta_search(webpage)
+
# Get the video title
video_title = self._html_search_regex(r'<title>(?P<title>.*)</title>',
webpage, u'title').strip()
@@ -60,6 +63,7 @@ class YouJizzIE(InfoExtractor):
'title': video_title,
'ext': 'flv',
'format': 'flv',
- 'player_url': embed_page_url}
+ 'player_url': embed_page_url,
+ 'age_limit': age_limit}
return [info]