diff options
| -rw-r--r-- | youtube_dl/extractor/sexykarma.py | 35 | 
1 files changed, 23 insertions, 12 deletions
| diff --git a/youtube_dl/extractor/sexykarma.py b/youtube_dl/extractor/sexykarma.py index 4a6345758..c833fc8ee 100644 --- a/youtube_dl/extractor/sexykarma.py +++ b/youtube_dl/extractor/sexykarma.py @@ -12,7 +12,8 @@ from ..utils import (  class SexyKarmaIE(InfoExtractor): -    _VALID_URL = r'https?://(?:www\.)?sexykarma\.com/gonewild/video/(?P<display_id>[^/]+)-(?P<id>[a-zA-Z0-9]+)\.html' +    IE_DESC = 'Sexy Karma and Watch Indian Porn' +    _VALID_URL = r'https?://(?:www\.)?(?:sexykarma\.com|watchindianporn\.net)/(?:[^/]+/)*video/(?P<display_id>[^/]+)-(?P<id>[a-zA-Z0-9]+)\.html'      _TESTS = [{          'url': 'http://www.sexykarma.com/gonewild/video/taking-a-quick-pee-yHI70cOyIHt.html',          'md5': 'b9798e7d1ef1765116a8f516c8091dbd', @@ -21,11 +22,10 @@ class SexyKarmaIE(InfoExtractor):              'display_id': 'taking-a-quick-pee',              'ext': 'mp4',              'title': 'Taking a quick pee.', -            'description': '',              'thumbnail': 're:^https?://.*\.jpg$',              'uploader': 'wildginger7',              'upload_date': '20141007', -            'duration': 81, +            'duration': 22,              'view_count': int,              'comment_count': int,              'categories': list, @@ -38,7 +38,6 @@ class SexyKarmaIE(InfoExtractor):              'display_id': 'pot-pixie-tribute',              'ext': 'mp4',              'title': 'pot_pixie tribute', -            'description': 'tribute',              'thumbnail': 're:^https?://.*\.jpg$',              'uploader': 'banffite',              'upload_date': '20141013', @@ -47,6 +46,22 @@ class SexyKarmaIE(InfoExtractor):              'comment_count': int,              'categories': list,          } +    }, { +        'url': 'http://www.watchindianporn.net/video/desi-dancer-namrata-stripping-completely-nude-and-dancing-on-a-hot-number-dW2mtctxJfs.html', +        'md5': '9afb80675550406ed9a63ac2819ef69d', +        'info_dict': { +            'id': 'dW2mtctxJfs', +            'display_id': 'desi-dancer-namrata-stripping-completely-nude-and-dancing-on-a-hot-number', +            'ext': 'mp4', +            'title': 'Desi dancer namrata stripping completely nude and dancing on a hot number', +            'thumbnail': 're:^https?://.*\.jpg$', +            'uploader': 'Don', +            'upload_date': '20140213', +            'duration': 83, +            'view_count': int, +            'comment_count': int, +            'categories': list, +        }      }]      def _real_extract(self, url): @@ -57,14 +72,11 @@ class SexyKarmaIE(InfoExtractor):          webpage = self._download_webpage(url, display_id)          video_url = self._html_search_regex( -            r'<p>Save this video to your computer: </p><p><a href="([^"]+)"', -            webpage, 'url') +            r"url: escape\('([^']+)'\)", webpage, 'url')          title = self._html_search_regex(              r'<h2 class="he2"><span>(.*?)</span>',              webpage, 'title') -        description = self._html_search_meta( -            'description', webpage, 'description', fatal=False, default='')          thumbnail = self._html_search_regex(              r'<span id="container"><img\s+src="([^"]+)"',              webpage, 'thumbnail', fatal=False) @@ -86,16 +98,15 @@ class SexyKarmaIE(InfoExtractor):              r'<td>Comments:\s*</td>\s*<td align="right"><span>\s*(\d+)\s*</span>',              webpage, 'comment count', fatal=False)) -        categories = self._html_search_meta( -            'keywords', webpage, 'categories', -            fatal=False, default='').split(',') +        categories = re.findall( +            r'<a href="[^"]+/search/video/desi"><span>([^<]+)</span></a>', +            webpage)          return {              'id': video_id,              'display_id': display_id,              'url': video_url,              'title': title, -            'description': description,              'thumbnail': thumbnail,              'uploader': uploader,              'upload_date': upload_date, | 
