aboutsummaryrefslogtreecommitdiff
path: root/system/scrapers/video/amazonuk.xml
diff options
context:
space:
mode:
Diffstat (limited to 'system/scrapers/video/amazonuk.xml')
-rw-r--r--system/scrapers/video/amazonuk.xml154
1 files changed, 154 insertions, 0 deletions
diff --git a/system/scrapers/video/amazonuk.xml b/system/scrapers/video/amazonuk.xml
new file mode 100644
index 0000000000..0bf5626b7e
--- /dev/null
+++ b/system/scrapers/video/amazonuk.xml
@@ -0,0 +1,154 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Initial basic version doing Studio and Thumb believed to have been written by C-Quel -->
+<!-- Then updated by John Lockwood to scrape Title, Year, MPAA, Runtime, Rating, Votes, Plot, Actors, Directors -->
+<!-- This version 1.1 dated 12/01/09 includes fix by C-Quel for processing results from Amazon to match recent change -->
+<!-- Version 1.1 also now supports the Writers field and fixes an issue to do with film titles -->
+<scraper framework="1.0" date="2009-05-22" content="movies" name="Amazon UK" thumb="amazonuk.png" language="en">
+ <CreateSearchUrl dest="3">
+ <RegExp input="$$1" output="&lt;url&gt;http://www.amazon.co.uk/s/ref=nb_ss_d_h_?url=search-alias%3Ddvd&amp;amp;field-keywords=\1&lt;/url&gt;" dest="3">
+ <expression noclean="1"></expression>
+ </RegExp>
+ </CreateSearchUrl>
+ <GetSearchResults dest="8">
+ <RegExp input="$$5" output="&lt;?xml version=&quot;1.0&quot; encoding=&quot;iso-8859-1&quot; standalone=&quot;yes&quot;?&gt;&lt;results&gt;\1&lt;/results&gt;" dest="8">
+ <RegExp input="$$1" output="&lt;entity&gt;&lt;title&gt;\2&lt;/title&gt;&lt;url&gt;\1&lt;/url&gt;&lt;/entity&gt;" dest="5">
+ <expression repeat="yes" clear="yes" noclean="1">productTitle&quot;&gt;&lt;a href=&quot;([^&quot;]*)&quot;&gt;([^&lt;]*)&lt;/a&gt;</expression>
+ </RegExp>
+ <expression clear="yes" noclean="1"></expression>
+ </RegExp>
+ </GetSearchResults>
+ <GetDetails clearbuffers="no" dest="3">
+ <RegExp input="$$5" output="&lt;details&gt;\1&lt;/details&gt;" dest="3">
+ <RegExp input="$$1" output="&lt;title&gt;\1&lt;/title&gt;" dest="5">
+ <expression noclean="1" trim="1">&lt;title&gt;[Amazon.co.uk: ]*([^\:\(]*) </expression>
+ </RegExp>
+
+ <RegExp input="$$1" output="&lt;year&gt;\1&lt;/year&gt;" dest="5+">
+ <expression trim="1">[ \[\(]([0-9]{4})[ \]\)][^&lt;]*&lt;/span&gt;</expression>
+ </RegExp>
+
+ <RegExp input="$$1" output="&lt;top250&gt;\1&lt;/top250&gt;" dest="5+">
+ <expression>Top 250: #([0-9]*)&lt;/a&gt;</expression>
+ </RegExp>
+
+ <RegExp input="$$9" output="&lt;mpaa&gt;Exempt&lt;/mpaa&gt;" dest="5+">
+ <RegExp input="$$1" output="\1" dest="9">
+ <expression>&lt;b&gt;Classification:&lt;/b&gt;.*(Exempt)</expression>
+ </RegExp>
+ <expression>(exempt)</expression>
+ </RegExp>
+ <RegExp input="$$9" output="&lt;mpaa&gt;U&lt;/mpaa&gt;" dest="5+">
+ <RegExp input="$$1" output="\1" dest="9">
+ <expression>&lt;b&gt;Classification:&lt;/b&gt;[^_]*/(u)-rating</expression>
+ </RegExp>
+ <expression>(u)</expression>
+ </RegExp>
+ <RegExp input="$$9" output="&lt;mpaa&gt;Uc&lt;/mpaa&gt;" dest="5+">
+ <RegExp input="$$1" output="\1" dest="9">
+ <expression>&lt;b&gt;Classification:&lt;/b&gt;[^_]*/(u[cC])-rating</expression>
+ </RegExp>
+ <expression>(uc)</expression>
+ </RegExp>
+ <RegExp input="$$9" output="&lt;mpaa&gt;12A&lt;/mpaa&gt;" dest="5+">
+ <RegExp input="$$1" output="\1" dest="9">
+ <expression>&lt;b&gt;Classification:&lt;/b&gt;[^_]*/(12[aA])-rating</expression>
+ </RegExp>
+ <expression>(12a)</expression>
+ </RegExp>
+ <RegExp input="$$9" output="&lt;mpaa&gt;12&lt;/mpaa&gt;" dest="5+">
+ <RegExp input="$$1" output="\1" dest="9">
+ <expression>&lt;b&gt;Classification:&lt;/b&gt;[^_]*/(12)-rating</expression>
+ </RegExp>
+ <expression>(12)</expression>
+ </RegExp>
+ <RegExp input="$$9" output="&lt;mpaa&gt;15&lt;/mpaa&gt;" dest="5+">
+ <RegExp input="$$1" output="\1" dest="9">
+ <expression>&lt;b&gt;Classification:&lt;/b&gt;[^_]*/(15)-rating</expression>
+ </RegExp>
+ <expression>(15)</expression>
+ </RegExp>
+ <RegExp input="$$9" output="&lt;mpaa&gt;PG&lt;/mpaa&gt;" dest="5+">
+ <RegExp input="$$1" output="\1" dest="9">
+ <expression>&lt;b&gt;Classification:&lt;/b&gt;[^_]*/(pg)-rating</expression>
+ </RegExp>
+ <expression>(pg)</expression>
+ </RegExp>
+ <RegExp input="$$9" output="&lt;mpaa&gt;18&lt;/mpaa&gt;" dest="5+">
+ <RegExp input="$$1" output="\1" dest="9">
+ <expression>&lt;b&gt;Classification:&lt;/b&gt;[^_]*/(18)-rating</expression>
+ </RegExp>
+ <expression>(18)</expression>
+ </RegExp>
+ <RegExp input="$$9" output="&lt;mpaa&gt;R18&lt;/mpaa&gt;" dest="5+">
+ <RegExp input="$$1" output="\1" dest="9">
+ <expression>&lt;b&gt;Classification:&lt;/b&gt;[^_]*/([rR]18)-rating</expression>
+ </RegExp>
+ <expression>(R18)</expression>
+ </RegExp>
+ <RegExp input="$$9" output="&lt;mpaa&gt;UNRATED&lt;/mpaa&gt;" dest="5+">
+ <RegExp input="$$1" output="\1" dest="9">
+ <expression>&lt;b&gt;Classification:&lt;/b&gt;[^_]*/(unrated)-rating</expression>
+ </RegExp>
+ <expression>(unrated)</expression>
+ </RegExp>
+
+ <RegExp input="$$1" output="&lt;mpaa&gt;\1&lt;/mpaa&gt;" dest="5+">
+ <expression>&lt;b&gt;Classification:&lt;/b&gt; ([^(]*) \(</expression>
+ </RegExp>
+
+ <RegExp input="$$1" output="&lt;certification&gt;\1&lt;/certification&gt;" dest="5+">
+ <expression repeat="yes">Classification:&lt;/b&gt;[^&gt;]*alt=&quot;([0-9]*)&quot;</expression>
+ </RegExp>
+ <RegExp input="$$1" output="&lt;tagline&gt;\1&lt;/tagline&gt;" dest="5+">
+ <expression>&lt;h5&gt;Tagline:&lt;/h5&gt;([^&lt;]*)</expression>
+ </RegExp>
+ <RegExp input="$$1" output="&lt;runtime&gt;\1&lt;/runtime&gt;" dest="5+">
+ <expression trim="1">Run Time:&lt;/b&gt;[^0-9]*([^&lt;]*)&lt;/li&gt;</expression>
+ </RegExp>
+ <RegExp input="$$1" output="&lt;rating&gt;\1.\2&lt;/rating&gt;&lt;votes&gt;\3&lt;/votes&gt;" dest="5+">
+ <expression noclean="1">Average Customer Review&lt;/b&gt;[^_]*stars-([0-9])-([0-9])[^)]*&gt;([0-9]*) customer reviews&lt;/a&gt;\)</expression>
+ </RegExp>
+ <RegExp input="$$1" output="&lt;genre&gt;\1&lt;/genre&gt;" dest="5+">
+ <expression repeat="yes">&quot;/Sections/Genres/[^/]*/&quot;&gt;([^&lt;]*)&lt;/a&gt;</expression>
+ </RegExp>
+ <RegExp input="$$1" output="&lt;studio&gt;\1&lt;/studio&gt;" dest="5+">
+ <expression>Studio:&lt;/b&gt; ([^&lt;]*)&lt;/li&gt;</expression>
+ </RegExp>
+ <RegExp input="$$1" output="&lt;outline&gt;\2&lt;/outline&gt;&lt;plot&gt;\2&lt;/plot&gt;" dest="5+">
+ <expression trim="1">Plot (Outline|Summary):&lt;/h5&gt;([^&lt;]*)</expression>
+ </RegExp>
+
+ <RegExp input="$$1" output="&lt;plot&gt;\1&lt;/plot&gt;" dest="5+">
+ <expression>&lt;b&gt;Amazon.co.uk Review&lt;/b&gt;&lt;br /&gt;\n ([^\n]*)</expression>
+ </RegExp>
+ <RegExp input="$$1" output="&lt;plot&gt;\1&lt;/plot&gt;" dest="5+">
+ <expression>&lt;b&gt;Synopsis&lt;/b&gt;&lt;br /&gt;\n ([^\n]*)</expression>
+ </RegExp>
+ <RegExp input="$$1" output="&lt;thumb&gt;\101.L.jpg&lt;/thumb&gt;" dest="5+">
+ <expression noclean="1">&quot;original_image&quot;, &quot;([^&quot;]*)AA2[0-9]0_\.jpg&quot;</expression>
+ </RegExp>
+
+ <RegExp input="$$9" output="&lt;credits&gt;\1&lt;/credits&gt;" dest="5+">
+ <RegExp input="$$1" output="\1" dest="9">
+ <expression noclean="1">&lt;b&gt;Writers:&lt;/b&gt; ([^\n]*&lt;/a&gt;)</expression>
+ </RegExp>
+ <expression noclean="1" repeat="yes">[^&gt;]*&gt;([^&lt;]+)&lt;/a&gt;</expression>
+ </RegExp>
+
+ <RegExp input="$$9" output="&lt;director&gt;\1&lt;/director&gt;" dest="5+">
+ <RegExp input="$$1" output="\1" dest="9">
+ <expression noclean="1">&lt;b&gt;Directors:&lt;/b&gt; ([^\n]*&lt;/a&gt;)</expression>
+ </RegExp>
+ <expression noclean="1" repeat="yes">[^&gt;]*&gt;([^&lt;]+)&lt;/a&gt;</expression>
+ </RegExp>
+
+ <RegExp input="$$9" output="&lt;actor&gt;&lt;name&gt;\1&lt;/name&gt;&lt;/actor&gt;" dest="5+">
+ <RegExp input="$$1" output="\1" dest="9">
+ <expression noclean="1">&lt;b&gt;Actors:&lt;/b&gt; ([^\n]*&lt;/a&gt;)</expression>
+ </RegExp>
+ <expression noclean="1" repeat="yes">[^&gt;]*&gt;([^&lt;]+)&lt;/a&gt;</expression>
+ </RegExp>
+ <expression noclean="1"></expression>
+ </RegExp>
+ </GetDetails>
+</scraper>