diff options
| author | Filippo Valsorda <filippo.valsorda@gmail.com> | 2013-10-28 01:50:17 -0400 | 
|---|---|---|
| committer | Filippo Valsorda <filippo.valsorda@gmail.com> | 2013-10-28 01:50:17 -0400 | 
| commit | 750e9833b83c6e17a4efa8d5dac5b3cd848f4603 (patch) | |
| tree | e9380854bb2d946aae957507cea63a09adc6d76d | |
| parent | 82f0ac657c0399659863b0bdec3afea2020ca5a9 (diff) | |
Add the missing age_limit tags; added a devscript to do a superficial check for porn sites without the age_limit tag in the test
| -rw-r--r-- | devscripts/check-porn.py | 39 | ||||
| -rw-r--r-- | youtube_dl/extractor/keezmovies.py | 5 | ||||
| -rw-r--r-- | youtube_dl/extractor/pornhub.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/pornotube.py | 3 | ||||
| -rw-r--r-- | youtube_dl/extractor/spankwire.py | 4 | ||||
| -rw-r--r-- | youtube_dl/extractor/tube8.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/youjizz.py | 8 | 
7 files changed, 59 insertions, 4 deletions
| diff --git a/devscripts/check-porn.py b/devscripts/check-porn.py new file mode 100644 index 000000000..63401fe18 --- /dev/null +++ b/devscripts/check-porn.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python + +""" +This script employs a VERY basic heuristic ('porn' in webpage.lower()) to check +if we are not 'age_limit' tagging some porn site +""" + +# Allow direct execution +import os +import sys +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from test.helper import get_testcases +from youtube_dl.utils import compat_urllib_request + +for test in get_testcases(): +    try: +        webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read() +    except: +        print('\nFail: {0}'.format(test['name'])) +        continue + +    webpage = webpage.decode('utf8', 'replace') + +    if 'porn' in webpage.lower() and ('info_dict' not in test +                                      or 'age_limit' not in test['info_dict'] +                                      or test['info_dict']['age_limit'] != 18): +        print('\nPotential missing age_limit check: {0}'.format(test['name'])) + +    elif 'porn' not in webpage.lower() and ('info_dict' in test and +                                            'age_limit' in test['info_dict'] and +                                            test['info_dict']['age_limit'] == 18): +        print('\nPotential false negative: {0}'.format(test['name'])) + +    else: +        sys.stdout.write('.') +    sys.stdout.flush() + +print() diff --git a/youtube_dl/extractor/keezmovies.py b/youtube_dl/extractor/keezmovies.py index 23d5209d9..5e05900da 100644 --- a/youtube_dl/extractor/keezmovies.py +++ b/youtube_dl/extractor/keezmovies.py @@ -6,7 +6,6 @@ from ..utils import (      compat_urllib_parse_urlparse,      compat_urllib_request,      compat_urllib_parse, -    unescapeHTML,  )  from ..aes import (      aes_decrypt_text @@ -20,6 +19,7 @@ class KeezMoviesIE(InfoExtractor):          u'md5': u'6e297b7e789329923fcf83abb67c9289',          u'info_dict': {              u"title": u"Petite Asian Lady Mai Playing In Bathtub", +            u"age_limit": 18,          }      } @@ -48,6 +48,8 @@ class KeezMoviesIE(InfoExtractor):          format = path.split('/')[4].split('_')[:2]          format = "-".join( format ) +        age_limit = self._rta_search(webpage) +          return {              'id': video_id,              'title': video_title, @@ -55,4 +57,5 @@ class KeezMoviesIE(InfoExtractor):              'ext': extension,              'format': format,              'format_id': format, +            'age_limit': age_limit,          } diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 3dbd2ab69..5e2454f1b 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -21,6 +21,7 @@ class PornHubIE(InfoExtractor):          u'info_dict': {              u"uploader": u"BABES-COM",               u"title": u"Seductive Indian beauty strips down and fingers her pink pussy", +            u"age_limit": 18          }      } @@ -64,4 +65,5 @@ class PornHubIE(InfoExtractor):              'title': video_title,              'thumbnail': thumbnail,              'formats': formats, +            'age_limit': 18,          } diff --git a/youtube_dl/extractor/pornotube.py b/youtube_dl/extractor/pornotube.py index 5d770ec28..35dc5a9ff 100644 --- a/youtube_dl/extractor/pornotube.py +++ b/youtube_dl/extractor/pornotube.py @@ -16,7 +16,8 @@ class PornotubeIE(InfoExtractor):          u'md5': u'374dd6dcedd24234453b295209aa69b6',          u'info_dict': {              u"upload_date": u"20090708",  -            u"title": u"Marilyn-Monroe-Bathing" +            u"title": u"Marilyn-Monroe-Bathing", +            u"age_limit": 18          }      } diff --git a/youtube_dl/extractor/spankwire.py b/youtube_dl/extractor/spankwire.py index f0d5009c7..32df0a7fb 100644 --- a/youtube_dl/extractor/spankwire.py +++ b/youtube_dl/extractor/spankwire.py @@ -22,6 +22,7 @@ class SpankwireIE(InfoExtractor):              u"uploader": u"oreusz",               u"title": u"Buckcherry`s X Rated Music Video Crazy Bitch",              u"description": u"Crazy Bitch X rated music video.", +            u"age_limit": 18,          }      } @@ -60,6 +61,8 @@ class SpankwireIE(InfoExtractor):              })          formats.sort(key=lambda format: list(map(lambda s: s.zfill(6), format['format'].split('-')))) +        age_limit = self._rta_search(webpage) +          return {              'id': video_id,              'uploader': video_uploader, @@ -67,4 +70,5 @@ class SpankwireIE(InfoExtractor):              'thumbnail': thumbnail,              'description': description,              'formats': formats, +            'age_limit': age_limit,          } diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py index ebc8c1f4f..aea9d9a24 100644 --- a/youtube_dl/extractor/tube8.py +++ b/youtube_dl/extractor/tube8.py @@ -22,6 +22,7 @@ class Tube8IE(InfoExtractor):              u"description": u"hot teen Kasia grinding",               u"uploader": u"unknown",               u"title": u"Kasia music video", +            u"age_limit": 18,          }      } @@ -60,4 +61,5 @@ class Tube8IE(InfoExtractor):              'ext': extension,              'format': format,              'format_id': format, +            'age_limit': 18,          } diff --git a/youtube_dl/extractor/youjizz.py b/youtube_dl/extractor/youjizz.py index 1265639e8..1fcc518ac 100644 --- a/youtube_dl/extractor/youjizz.py +++ b/youtube_dl/extractor/youjizz.py @@ -13,7 +13,8 @@ class YouJizzIE(InfoExtractor):          u'file': u'2189178.flv',          u'md5': u'07e15fa469ba384c7693fd246905547c',          u'info_dict': { -            u"title": u"Zeichentrick 1" +            u"title": u"Zeichentrick 1", +            u"age_limit": 18,          }      } @@ -25,6 +26,8 @@ class YouJizzIE(InfoExtractor):          # Get webpage content          webpage = self._download_webpage(url, video_id) +        age_limit = self._rta_search(webpage) +          # Get the video title          video_title = self._html_search_regex(r'<title>(?P<title>.*)</title>',              webpage, u'title').strip() @@ -60,6 +63,7 @@ class YouJizzIE(InfoExtractor):                  'title': video_title,                  'ext': 'flv',                  'format': 'flv', -                'player_url': embed_page_url} +                'player_url': embed_page_url, +                'age_limit': age_limit}          return [info] | 
