1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
|
import itertools
import json
import re
from .common import InfoExtractor, SearchInfoExtractor
from ..utils import (
compat_urllib_parse,
compat_urlparse,
determine_ext,
clean_html,
)
class YahooIE(InfoExtractor):
IE_DESC = u'Yahoo screen'
_VALID_URL = r'http://screen\.yahoo\.com/.*?-(?P<id>\d*?)\.html'
_TESTS = [
{
u'url': u'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
u'file': u'214727115.flv',
u'info_dict': {
u'title': u'Julian Smith & Travis Legg Watch Julian Smith',
u'description': u'Julian and Travis watch Julian Smith',
},
u'params': {
# Requires rtmpdump
u'skip_download': True,
},
},
{
u'url': u'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html',
u'file': u'103000935.flv',
u'info_dict': {
u'title': u'Codefellas - The Cougar Lies with Spanish Moss',
u'description': u'Agent Topple\'s mustache does its dirty work, and Nicole brokers a deal for peace. But why is the NSA collecting millions of Instagram brunch photos? And if your waffles have nothing to hide, what are they so worried about?',
},
u'params': {
# Requires rtmpdump
u'skip_download': True,
},
},
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
items_json = self._search_regex(r'YVIDEO_INIT_ITEMS = ({.*?});$',
webpage, u'items', flags=re.MULTILINE)
items = json.loads(items_json)
info = items['mediaItems']['query']['results']['mediaObj'][0]
# The 'meta' field is not always in the video webpage, we request it
# from another page
long_id = info['id']
query = ('SELECT * FROM yahoo.media.video.streams WHERE id="%s"'
' AND plrs="86Gj0vCaSzV_Iuf6hNylf2"' % long_id)
data = compat_urllib_parse.urlencode({
'q': query,
'env': 'prod',
'format': 'json',
})
query_result_json = self._download_webpage(
'http://video.query.yahoo.com/v1/public/yql?' + data,
video_id, u'Downloading video info')
query_result = json.loads(query_result_json)
info = query_result['query']['results']['mediaObj'][0]
meta = info['meta']
formats = []
for s in info['streams']:
format_info = {
'width': s.get('width'),
'height': s.get('height'),
'bitrate': s.get('bitrate'),
}
host = s['host']
path = s['path']
if host.startswith('rtmp'):
format_info.update({
'url': host,
'play_path': path,
'ext': 'flv',
})
else:
format_url = compat_urlparse.urljoin(host, path)
format_info['url'] = format_url
format_info['ext'] = determine_ext(format_url)
formats.append(format_info)
formats = sorted(formats, key=lambda f:(f['height'], f['width']))
info = {
'id': video_id,
'title': meta['title'],
'formats': formats,
'description': clean_html(meta['description']),
'thumbnail': meta['thumbnail'],
}
# TODO: Remove when #980 has been merged
info.update(formats[-1])
return info
class YahooSearchIE(SearchInfoExtractor):
IE_DESC = u'Yahoo screen search'
_MAX_RESULTS = 1000
IE_NAME = u'screen.yahoo:search'
_SEARCH_KEY = 'yvsearch'
def _get_n_results(self, query, n):
"""Get a specified number of results for a query"""
res = {
'_type': 'playlist',
'id': query,
'entries': []
}
for pagenum in itertools.count(0):
result_url = u'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (compat_urllib_parse.quote_plus(query), pagenum * 30)
webpage = self._download_webpage(result_url, query,
note='Downloading results page '+str(pagenum+1))
info = json.loads(webpage)
m = info[u'm']
results = info[u'results']
for (i, r) in enumerate(results):
if (pagenum * 30) +i >= n:
break
mobj = re.search(r'(?P<url>screen\.yahoo\.com/.*?-\d*?\.html)"', r)
e = self.url_result('http://' + mobj.group('url'), 'Yahoo')
res['entries'].append(e)
if (pagenum * 30 +i >= n) or (m[u'last'] >= (m[u'total'] -1)):
break
return res
|