aboutsummaryrefslogtreecommitdiff
path: root/system/scrapers/video/amazonus.xml
blob: 5a7e3a44eb58e323e480baeef50e9816454da252 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
<?xml version="1.0" encoding="UTF-8"?>
<!-- Initial basic version doing Studio and Thumb believed to have been written by C-Quel -->
<!-- Then updated by John Lockwood to scrape Title, Year, MPAA, Runtime, Rating, Votes, Plot, Actors, Directors -->
<!-- This version 1.1 dated 12/01/09 includes fix by C-Quel for processing results from Amazon to match recent change -->
<!-- Version 1.1 also now supports the Writers field -->
<scraper framework="1.0" date="2009-05-22" content="movies" name="Amazon US" thumb="amazonus.png" language="en">
	<CreateSearchUrl dest="3">
		<RegExp input="$$1" output="&lt;url&gt;http://www.amazon.com/s/ref=nb_ss_d_h_?url=search-alias%3Ddvd&amp;amp;field-keywords=\1&lt;/url&gt;" dest="3">
			<expression noclean="1"></expression>
		</RegExp>
	</CreateSearchUrl>
	<GetSearchResults dest="8">
		<RegExp input="$$5" output="&lt;?xml version=&quot;1.0&quot; encoding=&quot;iso-8859-1&quot; standalone=&quot;yes&quot;?&gt;&lt;results&gt;\1&lt;/results&gt;" dest="8">
			<RegExp input="$$1" output="&lt;entity&gt;&lt;title&gt;\2&lt;/title&gt;&lt;url&gt;\1&lt;/url&gt;&lt;/entity&gt;" dest="5">
				<expression repeat="yes" clear="yes" noclean="1">productTitle&quot;&gt;&lt;a href=&quot;([^&quot;]*)&quot;&gt;([^&lt;]*)&lt;/a&gt;</expression>
			</RegExp>
			<expression clear="yes" noclean="1"></expression>
		</RegExp>
	</GetSearchResults>
	<GetDetails clearbuffers="no" dest="3">
		<RegExp input="$$5" output="&lt;details&gt;\1&lt;/details&gt;" dest="3">
			<RegExp input="$$1" output="&lt;title&gt;\1&lt;/title&gt;" dest="5">
				<expression noclean="1">&lt;title&gt;[Amazon.com: ]*([^:]*)</expression>
			</RegExp>
			<RegExp input="$$1" output="&lt;year&gt;\1&lt;/year&gt;" dest="5+">
				<expression trim="1">[ \[\(]([0-9]{4})[ \]\)][^&lt;]*&lt;/span&gt;</expression>
			</RegExp>
			<RegExp input="$$1" output="&lt;top250&gt;\1&lt;/top250&gt;" dest="5+">
				<expression>Top 250: #([0-9]*)&lt;/a&gt;</expression>
			</RegExp>

                        <RegExp input="$$9" output="&lt;mpaa&gt;G&lt;/mpaa&gt;" dest="5+">
                                <RegExp input="$$1" output="\1" dest="9">
                                        <expression>&lt;b&gt;Rating: &lt;/b&gt;[^_]*/(g)._</expression>
                                </RegExp>
                                <expression>(g)</expression>
                        </RegExp>
                        <RegExp input="$$9" output="&lt;mpaa&gt;PG&lt;/mpaa&gt;" dest="5+">
                                <RegExp input="$$1" output="\1" dest="9">
                                        <expression>&lt;b&gt;Rating: &lt;/b&gt;[^_]*/(pg)._</expression>
                                </RegExp>
                                <expression>(pg)</expression>
                        </RegExp>
                        <RegExp input="$$9" output="&lt;mpaa&gt;PG-13&lt;/mpaa&gt;" dest="5+">
                                <RegExp input="$$1" output="\1" dest="9">
                                        <expression>&lt;b&gt;Rating: &lt;/b&gt;[^_]*/(pg-13)._</expression>
                                </RegExp>
                                <expression>(pg-13)</expression>
                        </RegExp>
                        <RegExp input="$$9" output="&lt;mpaa&gt;R&lt;/mpaa&gt;" dest="5+">
                                <RegExp input="$$1" output="\1" dest="9">
                                        <expression>&lt;b&gt;Rating: &lt;/b&gt;[^_]*/(r)._</expression>
                                </RegExp>
                                <expression>(r)</expression>
                        </RegExp>
                        <RegExp input="$$9" output="&lt;mpaa&gt;NC-17&lt;/mpaa&gt;" dest="5+">
                                <RegExp input="$$1" output="\1" dest="9">
                                        <expression>&lt;b&gt;Rating: &lt;/b&gt;[^_]*/(nc-17)._</expression>
                                </RegExp>
                                <expression>(nc-17)</expression>
                        </RegExp>
                        <RegExp input="$$9" output="&lt;mpaa&gt;UNRATED&lt;/mpaa&gt;" dest="5+">
                                <RegExp input="$$1" output="\1" dest="9">
                                        <expression>&lt;b&gt;Rating: &lt;/b&gt;[^_]*/(unrated)._</expression>
                                </RegExp>
                                <expression>(unrated)</expression>
                        </RegExp>


			<RegExp input="$$1" output="&lt;certification&gt;\1&lt;/certification&gt;" dest="5+">
				<expression repeat="yes">Classification:&lt;/b&gt;[^&gt;]*alt=&quot;([0-9]*)&quot;</expression>
			</RegExp>
			<RegExp input="$$1" output="&lt;tagline&gt;\1&lt;/tagline&gt;" dest="5+">
				<expression>&lt;h5&gt;Tagline:&lt;/h5&gt;([^&lt;]*)</expression>
			</RegExp>
			<RegExp input="$$1" output="&lt;runtime&gt;\1&lt;/runtime&gt;" dest="5+">
				<expression trim="1">Run Time:&lt;/b&gt;[^0-9]*([^&lt;]*)&lt;/li&gt;</expression>
			</RegExp>
			<RegExp input="$$1" output="&lt;rating&gt;\1.\2&lt;/rating&gt;&lt;votes&gt;\3&lt;/votes&gt;" dest="5+">
				<expression noclean="1">Average Customer Review&lt;/b&gt;[^_]*stars-([0-9])-([0-9])[^)]*&gt;([0-9]*) customer reviews&lt;/a&gt;\)</expression>
			</RegExp>
			<RegExp input="$$1" output="&lt;genre&gt;\1&lt;/genre&gt;" dest="5+">
				<expression repeat="yes">&quot;/Sections/Genres/[^/]*/&quot;&gt;([^&lt;]*)&lt;/a&gt;</expression>
			</RegExp>
			<RegExp input="$$1" output="&lt;studio&gt;\1&lt;/studio&gt;" dest="5+">
				<expression>Studio:&lt;/b&gt;  ([^&lt;]*)&lt;/li&gt;</expression>
			</RegExp>
			<RegExp input="$$1" output="&lt;outline&gt;\2&lt;/outline&gt;&lt;plot&gt;\2&lt;/plot&gt;" dest="5+">
				<expression trim="1">Plot (Outline|Summary):&lt;/h5&gt;([^&lt;]*)</expression>
			</RegExp>
			<RegExp input="$$1" output="&lt;plot&gt;\1&lt;/plot&gt;" dest="5+">
				<expression trim="1">&lt;b&gt;Product Description&lt;/b&gt;&lt;br /[^&gt;]*&gt;([^&lt;]+)</expression>
			</RegExp>
                        <RegExp input="$$1" output="&lt;thumb&gt;\101.L.jpg&lt;/thumb&gt;" dest="5+">
				<expression noclean="1">&quot;original_image&quot;, &quot;([^&quot;]*)AA2[0-9]0_\.jpg&quot;</expression>
                        </RegExp>


                        <RegExp input="$$9" output="&lt;credits&gt;\1&lt;/credits&gt;" dest="5+">
                                <RegExp input="$$1" output="\1" dest="9">
                                        <expression noclean="1">&lt;b&gt;Writers:&lt;/b&gt; ([^\n]*&lt;/a&gt;)</expression>
                                </RegExp>
                                <expression noclean="1" repeat="yes">[^&gt;]*&gt;([^&lt;]+)&lt;/a&gt;</expression>
                        </RegExp>


                        <RegExp input="$$9" output="&lt;director&gt;\1&lt;/director&gt;" dest="5+">
                                <RegExp input="$$1" output="\1" dest="9">
                                        <expression noclean="1">&lt;b&gt;Directors:&lt;/b&gt; ([^\n]*&lt;/a&gt;)</expression>
                                </RegExp>
                                <expression noclean="1" repeat="yes">[^&gt;]*&gt;([^&lt;]+)&lt;/a&gt;</expression>
                        </RegExp>


                        <RegExp input="$$9" output="&lt;actor&gt;&lt;name&gt;\1&lt;/name&gt;&lt;/actor&gt;" dest="5+">
                                <RegExp input="$$1" output="\1" dest="9">
                                        <expression noclean="1">&lt;b&gt;Actors:&lt;/b&gt; ([^\n]*&lt;/a&gt;)</expression>
                                </RegExp>
                                <expression noclean="1" repeat="yes">[^&gt;]*&gt;([^&lt;]+)&lt;/a&gt;</expression>
                        </RegExp>

			<expression noclean="1"></expression>
		</RegExp>
	</GetDetails>
</scraper>