diff options
Diffstat (limited to 'system/scrapers/video/amazonuk.xml')
-rw-r--r-- | system/scrapers/video/amazonuk.xml | 154 |
1 files changed, 154 insertions, 0 deletions
diff --git a/system/scrapers/video/amazonuk.xml b/system/scrapers/video/amazonuk.xml new file mode 100644 index 0000000000..0bf5626b7e --- /dev/null +++ b/system/scrapers/video/amazonuk.xml @@ -0,0 +1,154 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- Initial basic version doing Studio and Thumb believed to have been written by C-Quel --> +<!-- Then updated by John Lockwood to scrape Title, Year, MPAA, Runtime, Rating, Votes, Plot, Actors, Directors --> +<!-- This version 1.1 dated 12/01/09 includes fix by C-Quel for processing results from Amazon to match recent change --> +<!-- Version 1.1 also now supports the Writers field and fixes an issue to do with film titles --> +<scraper framework="1.0" date="2009-05-22" content="movies" name="Amazon UK" thumb="amazonuk.png" language="en"> + <CreateSearchUrl dest="3"> + <RegExp input="$$1" output="<url>http://www.amazon.co.uk/s/ref=nb_ss_d_h_?url=search-alias%3Ddvd&amp;field-keywords=\1</url>" dest="3"> + <expression noclean="1"></expression> + </RegExp> + </CreateSearchUrl> + <GetSearchResults dest="8"> + <RegExp input="$$5" output="<?xml version="1.0" encoding="iso-8859-1" standalone="yes"?><results>\1</results>" dest="8"> + <RegExp input="$$1" output="<entity><title>\2</title><url>\1</url></entity>" dest="5"> + <expression repeat="yes" clear="yes" noclean="1">productTitle"><a href="([^"]*)">([^<]*)</a></expression> + </RegExp> + <expression clear="yes" noclean="1"></expression> + </RegExp> + </GetSearchResults> + <GetDetails clearbuffers="no" dest="3"> + <RegExp input="$$5" output="<details>\1</details>" dest="3"> + <RegExp input="$$1" output="<title>\1</title>" dest="5"> + <expression noclean="1" trim="1"><title>[Amazon.co.uk: ]*([^\:\(]*) </expression> + </RegExp> + + <RegExp input="$$1" output="<year>\1</year>" dest="5+"> + <expression trim="1">[ \[\(]([0-9]{4})[ \]\)][^<]*</span></expression> + </RegExp> + + <RegExp input="$$1" output="<top250>\1</top250>" dest="5+"> + <expression>Top 250: #([0-9]*)</a></expression> + </RegExp> + + <RegExp input="$$9" output="<mpaa>Exempt</mpaa>" dest="5+"> + <RegExp input="$$1" output="\1" dest="9"> + <expression><b>Classification:</b>.*(Exempt)</expression> + </RegExp> + <expression>(exempt)</expression> + </RegExp> + <RegExp input="$$9" output="<mpaa>U</mpaa>" dest="5+"> + <RegExp input="$$1" output="\1" dest="9"> + <expression><b>Classification:</b>[^_]*/(u)-rating</expression> + </RegExp> + <expression>(u)</expression> + </RegExp> + <RegExp input="$$9" output="<mpaa>Uc</mpaa>" dest="5+"> + <RegExp input="$$1" output="\1" dest="9"> + <expression><b>Classification:</b>[^_]*/(u[cC])-rating</expression> + </RegExp> + <expression>(uc)</expression> + </RegExp> + <RegExp input="$$9" output="<mpaa>12A</mpaa>" dest="5+"> + <RegExp input="$$1" output="\1" dest="9"> + <expression><b>Classification:</b>[^_]*/(12[aA])-rating</expression> + </RegExp> + <expression>(12a)</expression> + </RegExp> + <RegExp input="$$9" output="<mpaa>12</mpaa>" dest="5+"> + <RegExp input="$$1" output="\1" dest="9"> + <expression><b>Classification:</b>[^_]*/(12)-rating</expression> + </RegExp> + <expression>(12)</expression> + </RegExp> + <RegExp input="$$9" output="<mpaa>15</mpaa>" dest="5+"> + <RegExp input="$$1" output="\1" dest="9"> + <expression><b>Classification:</b>[^_]*/(15)-rating</expression> + </RegExp> + <expression>(15)</expression> + </RegExp> + <RegExp input="$$9" output="<mpaa>PG</mpaa>" dest="5+"> + <RegExp input="$$1" output="\1" dest="9"> + <expression><b>Classification:</b>[^_]*/(pg)-rating</expression> + </RegExp> + <expression>(pg)</expression> + </RegExp> + <RegExp input="$$9" output="<mpaa>18</mpaa>" dest="5+"> + <RegExp input="$$1" output="\1" dest="9"> + <expression><b>Classification:</b>[^_]*/(18)-rating</expression> + </RegExp> + <expression>(18)</expression> + </RegExp> + <RegExp input="$$9" output="<mpaa>R18</mpaa>" dest="5+"> + <RegExp input="$$1" output="\1" dest="9"> + <expression><b>Classification:</b>[^_]*/([rR]18)-rating</expression> + </RegExp> + <expression>(R18)</expression> + </RegExp> + <RegExp input="$$9" output="<mpaa>UNRATED</mpaa>" dest="5+"> + <RegExp input="$$1" output="\1" dest="9"> + <expression><b>Classification:</b>[^_]*/(unrated)-rating</expression> + </RegExp> + <expression>(unrated)</expression> + </RegExp> + + <RegExp input="$$1" output="<mpaa>\1</mpaa>" dest="5+"> + <expression><b>Classification:</b> ([^(]*) \(</expression> + </RegExp> + + <RegExp input="$$1" output="<certification>\1</certification>" dest="5+"> + <expression repeat="yes">Classification:</b>[^>]*alt="([0-9]*)"</expression> + </RegExp> + <RegExp input="$$1" output="<tagline>\1</tagline>" dest="5+"> + <expression><h5>Tagline:</h5>([^<]*)</expression> + </RegExp> + <RegExp input="$$1" output="<runtime>\1</runtime>" dest="5+"> + <expression trim="1">Run Time:</b>[^0-9]*([^<]*)</li></expression> + </RegExp> + <RegExp input="$$1" output="<rating>\1.\2</rating><votes>\3</votes>" dest="5+"> + <expression noclean="1">Average Customer Review</b>[^_]*stars-([0-9])-([0-9])[^)]*>([0-9]*) customer reviews</a>\)</expression> + </RegExp> + <RegExp input="$$1" output="<genre>\1</genre>" dest="5+"> + <expression repeat="yes">"/Sections/Genres/[^/]*/">([^<]*)</a></expression> + </RegExp> + <RegExp input="$$1" output="<studio>\1</studio>" dest="5+"> + <expression>Studio:</b> ([^<]*)</li></expression> + </RegExp> + <RegExp input="$$1" output="<outline>\2</outline><plot>\2</plot>" dest="5+"> + <expression trim="1">Plot (Outline|Summary):</h5>([^<]*)</expression> + </RegExp> + + <RegExp input="$$1" output="<plot>\1</plot>" dest="5+"> + <expression><b>Amazon.co.uk Review</b><br />\n ([^\n]*)</expression> + </RegExp> + <RegExp input="$$1" output="<plot>\1</plot>" dest="5+"> + <expression><b>Synopsis</b><br />\n ([^\n]*)</expression> + </RegExp> + <RegExp input="$$1" output="<thumb>\101.L.jpg</thumb>" dest="5+"> + <expression noclean="1">"original_image", "([^"]*)AA2[0-9]0_\.jpg"</expression> + </RegExp> + + <RegExp input="$$9" output="<credits>\1</credits>" dest="5+"> + <RegExp input="$$1" output="\1" dest="9"> + <expression noclean="1"><b>Writers:</b> ([^\n]*</a>)</expression> + </RegExp> + <expression noclean="1" repeat="yes">[^>]*>([^<]+)</a></expression> + </RegExp> + + <RegExp input="$$9" output="<director>\1</director>" dest="5+"> + <RegExp input="$$1" output="\1" dest="9"> + <expression noclean="1"><b>Directors:</b> ([^\n]*</a>)</expression> + </RegExp> + <expression noclean="1" repeat="yes">[^>]*>([^<]+)</a></expression> + </RegExp> + + <RegExp input="$$9" output="<actor><name>\1</name></actor>" dest="5+"> + <RegExp input="$$1" output="\1" dest="9"> + <expression noclean="1"><b>Actors:</b> ([^\n]*</a>)</expression> + </RegExp> + <expression noclean="1" repeat="yes">[^>]*>([^<]+)</a></expression> + </RegExp> + <expression noclean="1"></expression> + </RegExp> + </GetDetails> +</scraper> |