aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
authorPhilipp Hagemeister <phihag@phihag.de>2013-06-23 20:41:54 +0200
committerPhilipp Hagemeister <phihag@phihag.de>2013-06-23 20:41:54 +0200
commit934858ad86f5b628978d3bcdd7edd765d4590840 (patch)
treee42ce0d296826e8649a810e1c41efae4371224b3 /youtube_dl/extractor
parent3c25b9abaee69657db9c75d80a5671c8c4206615 (diff)
Move YahooSearchIE to youtube_dl.extractor.yahoo
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/yahoo.py39
1 files changed, 38 insertions, 1 deletions
diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py
index 4f3af17d7..4b3aec9d1 100644
--- a/youtube_dl/extractor/yahoo.py
+++ b/youtube_dl/extractor/yahoo.py
@@ -1,9 +1,12 @@
import datetime
+import itertools
import json
import re
-from .common import InfoExtractor
+from .common import InfoExtractor, SearchInfoExtractor
from ..utils import (
+ compat_urllib_parse,
+
ExtractorError,
)
@@ -74,3 +77,37 @@ class YahooIE(InfoExtractor):
'ext': 'flv',
}
return info_dict
+
+class YahooSearchIE(SearchInfoExtractor):
+ """Information Extractor for Yahoo! Video search queries."""
+
+ _MAX_RESULTS = 1000
+ IE_NAME = u'screen.yahoo:search'
+ _SEARCH_KEY = 'yvsearch'
+
+ def _get_n_results(self, query, n):
+ """Get a specified number of results for a query"""
+
+ res = {
+ '_type': 'playlist',
+ 'id': query,
+ 'entries': []
+ }
+ for pagenum in itertools.count(0):
+ result_url = u'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (compat_urllib_parse.quote_plus(query), pagenum * 30)
+ webpage = self._download_webpage(result_url, query,
+ note='Downloading results page '+str(pagenum+1))
+ info = json.loads(webpage)
+ m = info[u'm']
+ results = info[u'results']
+
+ for (i, r) in enumerate(results):
+ if (pagenum * 30) +i >= n:
+ break
+ mobj = re.search(r'(?P<url>screen\.yahoo\.com/.*?-\d*?\.html)"', r)
+ e = self.url_result('http://' + mobj.group('url'), 'Yahoo')
+ res['entries'].append(e)
+ if (pagenum * 30 +i >= n) or (m[u'last'] >= (m[u'total'] -1 )):
+ break
+
+ return res