aboutsummaryrefslogtreecommitdiff
path: root/addons/metadata.common.imdb.com/imdb.xml
diff options
context:
space:
mode:
authorMartijn Kaijser <mcm.kaijser@gmail.com>2012-11-14 21:54:32 +0100
committerMartijn Kaijser <mcm.kaijser@gmail.com>2012-11-14 21:54:32 +0100
commit64d3b8e87d681c91553fa296a65f77bb3786a67e (patch)
treef3ff630691506a64c1887ed8066e33dd351dcc2c /addons/metadata.common.imdb.com/imdb.xml
parent37e8e4ecf7fbc78dbb2c7cd338cdd0f7e8a620c9 (diff)
[scrapers] sync master with current scraper repo
Diffstat (limited to 'addons/metadata.common.imdb.com/imdb.xml')
-rw-r--r--addons/metadata.common.imdb.com/imdb.xml155
1 files changed, 151 insertions, 4 deletions
diff --git a/addons/metadata.common.imdb.com/imdb.xml b/addons/metadata.common.imdb.com/imdb.xml
index 5633908756..2ae432516c 100644
--- a/addons/metadata.common.imdb.com/imdb.xml
+++ b/addons/metadata.common.imdb.com/imdb.xml
@@ -27,6 +27,20 @@
</RegExp>
</ParseIMDBRating>
+ <GetMetaCriticRatingById dest="5">
+ <RegExp input="$$1" output="&lt;details&gt;&lt;url cache=&quot;$$1-main.html&quot; function=&quot;ParseMetaCriticRating&quot;&gt;http://akas.imdb.com/title/$$1/&lt;/url&gt;&lt;/details&gt;" dest="5">
+ <expression noclean="1" />
+ </RegExp>
+ </GetMetaCriticRatingById>
+ <ParseMetaCriticRating dest="5">
+ <RegExp input="$$2" output="&lt;details&gt;\1&lt;/details&gt;" dest="5">
+ <RegExp input="$$1" output="&lt;rating&gt;\1.\2&lt;/rating&gt;" dest="2">
+ <expression>&lt;a href=&quot;criticreviews&quot;&gt;(\d)(\d*)</expression>
+ </RegExp>
+ <expression noclean="1" />
+ </RegExp>
+ </ParseMetaCriticRating>
+
<GetIMDBPlotById dest="5">
<RegExp input="$$1" output="&lt;details&gt;&lt;url cache=&quot;$$1-main.html&quot; function=&quot;ParseIMDBPlot&quot;&gt;http://akas.imdb.com/title/$$1/&lt;/url&gt;&lt;/details&gt;" dest="5">
<expression noclean="1" />
@@ -35,7 +49,7 @@
<ParseIMDBPlot dest="5">
<RegExp input="$$2" output="&lt;details&gt;\1&lt;/details&gt;" dest="5">
<RegExp input="$$1" output="&lt;plot&gt;\1&lt;/plot&gt;" dest="2">
- <expression trim="1">&lt;h2&gt;Storyline&lt;/h2&gt;\n+&lt;p&gt;(.*?)&lt;[^a/]</expression>
+ <expression fixchars="1" trim="1">&lt;h2&gt;Storyline&lt;/h2&gt;\n+&lt;p&gt;(.*?)&lt;[^a/]</expression>
</RegExp>
<expression noclean="1" />
</RegExp>
@@ -51,7 +65,7 @@
<RegExp input="$$1" output="\1" dest="6">
<expression noclean="1">&lt;table class=&quot;cast_list&quot;&gt;(.*?)&lt;/table&gt;</expression>
</RegExp>
- <RegExp input="$$6" output="&lt;actor&gt;&lt;thumb&gt;\2_SX512_SY512_\3&lt;/thumb&gt;&lt;name&gt;\1&lt;/name&gt;&lt;role&gt;\5&lt;/role&gt;&lt;/actor&gt;" dest="7">
+ <RegExp input="$$6" output="&lt;actor&gt;&lt;thumb&gt;\2_SX1024_SY1024_\3&lt;/thumb&gt;&lt;name&gt;\1&lt;/name&gt;&lt;role&gt;\5&lt;/role&gt;&lt;/actor&gt;" dest="7">
<expression repeat="yes" clear="yes" trim="3,4" noclean="1,2">&lt;noscript&gt;&lt;img \n[^a]*alt=&quot;([^&quot;]*)&quot;[^&quot;]*&quot;[^&quot;]*&quot;[^s]*src=&quot;(?:([^&quot;]*\.)[^&quot;]*(\.jpg))[^&gt;]*.*?ter"&gt;[^&gt;]*&gt;\n\s*(&lt;[^&gt;]*&gt;)?([^&lt;\(]*)?</expression>
</RegExp>
<RegExp input="$$6" output="&lt;actor&gt;&lt;thumb&gt;&lt;/thumb&gt;&lt;name&gt;\1&lt;/name&gt;&lt;role&gt;\3&lt;/role&gt;&lt;/actor&gt;" dest="7+">
@@ -131,7 +145,7 @@
<RegExp input="$$1" output="\1" dest="6">
<expression noclean="1">&lt;table class=&quot;cast&quot;&gt;(.*?)&lt;/table&gt;</expression>
</RegExp>
- <RegExp input="$$6" output="&lt;actor&gt;&lt;thumb&gt;\1_SX512_SY512_\2&lt;/thumb&gt;&lt;name&gt;\3&lt;/name&gt;&lt;role&gt;\5&lt;/role&gt;&lt;/actor&gt;" dest="7">
+ <RegExp input="$$6" output="&lt;actor&gt;&lt;thumb&gt;\1_SX1024_SY1024_\2&lt;/thumb&gt;&lt;name&gt;\3&lt;/name&gt;&lt;role&gt;\5&lt;/role&gt;&lt;/actor&gt;" dest="7">
<expression repeat="yes" clear="yes" fixchars="3,5" trim="3,5" noclean="1,2">&lt;img src="(?:([^&quot;]*\.)[^&quot;]*(\.jpg))?[^&gt;]*[^&quot;]*&quot;nm&quot;&gt;&lt;a href=&quot;[^&quot;]*[^&gt;]*&gt;([^&lt;]*)&lt;[^&quot;]*&quot;ddd&quot;&gt;([^&lt;]&lt;)?[^&quot;]*&quot;char&quot;&gt;(.*?)&lt;/td&gt;</expression>
</RegExp>
<RegExp input="$$7" output="&lt;actor&gt;&lt;thumb&gt;\1&lt;/thumb&gt;\2&lt;/actor&gt;" dest="2+">
@@ -205,10 +219,143 @@
<RegExp input="$$1" output="\1_SX$INFO[imdbscale]_SY$INFO[imdbscale]_\2" dest="4">
<expression noclean="1,2">&lt;a name=&quot;poster&quot;.*?src=&quot;(.*?)_S.*?(.jpg)&quot;.*?&lt;/a&gt;</expression>
</RegExp>
- <RegExp input="$$4" output="&lt;thumb&gt;\1&lt;/thumb&gt;" dest="6">
+ <RegExp input="$$4" output="&lt;thumb aspect=&quot;poster&quot;&gt;\1&lt;/thumb&gt;" dest="6">
<expression noclean="1">(.*?_SX[0-9]+_SY[0-9]+_.jpg)</expression>
</RegExp>
<expression noclean="1" />
</RegExp>
</ParseIMDBThumbs>
+
+ <GetIMDBUSACert dest="5">
+ <RegExp input="$$1" output="&lt;details&gt;&lt;url cache=&quot;$$1-main.html&quot; function=&quot;ParseIMDBUSACert&quot;&gt;http://akas.imdb.com/title/$$1/&lt;/url&gt;&lt;/details&gt;" dest="5">
+ <expression noclean="1" />
+ </RegExp>
+ </GetIMDBUSACert>
+ <ParseIMDBUSACert dest="5">
+ <RegExp input="$$1" output="&lt;details&gt;&lt;mpaa&gt;$INFO[certprefix]\1&lt;/mpaa&gt;&lt;/details&gt;" dest="5">
+ <expression>MPAA&lt;/a&gt;\)&lt;/h4&gt;\n?&lt;span itemprop=&quot;contentRating&quot;&gt;Rated\s([^&lt;]*)</expression>
+ </RegExp>
+ </ParseIMDBUSACert>
+
+ <GetIMDBCountryCert dest="5">
+ <RegExp input="$$1" output="&lt;details&gt;&lt;url cache=&quot;$$1-combined.html&quot; function=&quot;ParseIMDBCountryCert&quot;&gt;http://akas.imdb.com/title/$$1/combined&lt;/url&gt;&lt;/details&gt;" dest="5">
+ <expression noclean="1" />
+ </RegExp>
+ </GetIMDBCountryCert>
+ <ParseIMDBCountryCert dest="5">
+ <RegExp input="$$1" output="&lt;details&gt;&lt;mpaa&gt;$INFO[certprefix]\1&lt;/mpaa&gt;&lt;/details&gt;" dest="5">
+ <expression>&gt;\s*$INFO[imdbcertcountry]:([^&lt;]+)&lt;/a&gt;</expression>
+ </RegExp>
+ </ParseIMDBCountryCert>
+
+ <GetIMDBAKATitlesById dest="5">
+ <RegExp input="$$1" output="&lt;details&gt;&lt;url cache=&quot;$$1-combined.html&quot; function=&quot;ParseIMDBAKATitles&quot;&gt;http://akas.imdb.com/title/$$1/combined&lt;/url&gt;&lt;/details&gt;" dest="5">
+ <expression noclean="1" />
+ </RegExp>
+ </GetIMDBAKATitlesById>
+ <ParseIMDBAKATitles dest="5">
+ <RegExp input="$$2" output="&lt;details&gt;&lt;title&gt;\1&lt;/title&gt;&lt;/details&gt;" dest="5">
+ <RegExp input="$$1" output="\1" dest="2">
+ <expression fixchars="1">&lt;h1&gt;([^&lt;]*)</expression>
+ </RegExp>
+ <RegExp input="$$10" output="\1" dest="4">
+ <RegExp input="$$1" output="\2" dest="9">
+ <expression fixchars="2">&lt;meta name=&quot;title&quot; content=&quot;(IMDb - )?(?:&amp;#x22;)?([^&quot;]*?)(?:&amp;#x22;)? \([^\(]*?([0-9]{4})\)</expression>
+ </RegExp>
+ <RegExp input="$$9" output="\1" dest="10">
+ <expression />
+ </RegExp>
+ <RegExp input="$$1" output="\1" dest="11">
+ <expression fixchars="1" clear="yes">&gt;\s*?&quot;([^&lt;]+)&quot;[^&lt;]+[&lt;em&gt;]?[^&quot;]+Hong Kong [&lt;em&gt;][^&quot;]+English</expression>
+ </RegExp>
+ <RegExp input="$$11" output="\1" dest="10">
+ <expression>(.+)</expression>
+ </RegExp>
+ <RegExp input="$$1" output="\1" dest="11">
+ <expression fixchars="1" clear="yes">&gt;\s*?&quot;([^&lt;]+)&quot;[^&lt;]+[&lt;em&gt;]?[^&quot;]+((Canada)\s(&lt;em&gt;)?\((English|imdb))</expression>
+ </RegExp>
+ <RegExp input="$$11" output="\1" dest="10">
+ <expression>(.+)</expression>
+ </RegExp>
+ <RegExp input="$$1" output="\1" dest="11">
+ <expression fixchars="1" clear="yes">&gt;\s*?&quot;([^&lt;]+)&quot;[^&lt;]+[&lt;em&gt;]?[^&quot;]+(UK(&lt;em&gt;)?&lt;br&gt;)</expression>
+ </RegExp>
+ <RegExp input="$$11" output="\1" dest="10">
+ <expression>(.+)</expression>
+ </RegExp>
+ <RegExp input="$$1" output="\1" dest="11">
+ <expression fixchars="1" clear="yes">&gt;\s*?&quot;([^&lt;]+)&quot;[^&quot;]+International\s(&lt;em&gt;)?\(English title\)(&lt;/em&gt;)?(,|&lt;)( |b)</expression>
+ </RegExp>
+ <RegExp input="$$11" output="\1" dest="10">
+ <expression>(.+)</expression>
+ </RegExp>
+ <RegExp input="$$1" output="\1" dest="11">
+ <expression fixchars="1" clear="yes">&gt;\s*?&quot;([^&lt;]+)&quot;[^&quot;]+International\s(&lt;em&gt;)?\(English title\)(&lt;/em&gt;)? (&lt;em&gt;)?\(imdb</expression>
+ </RegExp>
+ <RegExp input="$$11" output="\1" dest="10">
+ <expression>(.+)</expression>
+ </RegExp>
+ <RegExp input="$$1" output="\1" dest="12">
+ <expression fixchars="1" clear="yes">&lt;a href=&quot;/country/[^&gt;]+&gt;(UK&lt;/a&gt;&lt;/div&gt;)</expression>
+ </RegExp>
+ <RegExp input="$$12" output="$$9" dest="10">
+ <expression>(.+)</expression>
+ </RegExp>
+ <RegExp input="$$1" output="\1" dest="12">
+ <expression clear="yes">&lt;a href=&quot;/country/[^&gt;]+&gt;(USA&lt;/a&gt;&lt;/div&gt;)</expression>
+ </RegExp>
+ <RegExp input="$$12" output="$$9" dest="10">
+ <expression>(.+)</expression>
+ </RegExp>
+ <RegExp input="$$1" output="\1" dest="11">
+ <expression fixchars="1" clear="yes">&gt;\s*?&quot;([^&lt;]+)&quot;[^&lt;]+[&lt;em&gt;]?[^&quot;]+(USA(&lt;em&gt;)?&lt;br&gt;)</expression>
+ </RegExp>
+ <RegExp input="$$11" output="\1" dest="10">
+ <expression>(.+)</expression>
+ </RegExp>
+ <RegExp input="$$1" output="\1" dest="11">
+ <expression fixchars="1" clear="yes">&gt;\s*?&quot;([^&lt;]+)&quot;[^&lt;]+[&lt;em&gt;]?[^&quot;]+((USA)\s(&lt;em&gt;)?\((English|imdb))</expression>
+ </RegExp>
+ <RegExp input="$$11" output="\1" dest="10">
+ <expression>(.+)</expression>
+ </RegExp>
+ <RegExp input="$$1" output="\1" dest="11">
+ <expression fixchars="1" clear="yes">&gt;\s*?&quot;([^&lt;]+)&quot;[^&lt;]+[&lt;em&gt;]?[^&quot;]+((USA)\s(&lt;em&gt;)?\((new title))</expression>
+ </RegExp>
+ <RegExp input="$$11" output="\1" dest="10">
+ <expression>(.+)</expression>
+ </RegExp>
+ <expression noclean="1" />
+ </RegExp>
+ <RegExp input="$$4" output="\1" dest="2">
+ <expression>(.+)</expression>
+ </RegExp>
+ <RegExp input="$$1" output="\1" dest="4">
+ <expression fixchars="1">&gt;\s*?&quot;([^&lt;]+)&quot;[^&lt;]+[&lt;em&gt;]?[^&quot;]+$INFO[imdbakatitles]</expression>
+ </RegExp>
+ <RegExp input="$$4" output="\1" dest="2">
+ <expression>(.+)</expression>
+ </RegExp>
+ <RegExp input="$$1" output="\1" dest="4">
+ <expression fixchars="1">&gt;\s*?&quot;([^&lt;]+)&quot;[^&lt;]+[&lt;em&gt;]?[^&quot;]+$INFO[imdbakatitles]\s(&lt;em&gt;)?\((imdb display)</expression>
+ </RegExp>
+ <RegExp input="$$4" output="\1" dest="2">
+ <expression>(.+)</expression>
+ </RegExp>
+ <RegExp input="$$1" output="\1" dest="5">
+ <expression>&lt;a href=&quot;/country/[^&gt;]+&gt;($INFO[imdbakatitles])</expression>
+ </RegExp>
+ <RegExp input="$$5" output="$$9" dest="4">
+ <expression>($INFO[imdbakatitles])</expression>
+ </RegExp>
+ <RegExp input="$INFO[imdbakatitles]" output="$$9" dest="4">
+ <expression>Keep Original</expression>
+ </RegExp>
+ <RegExp input="$$4" output="\1" dest="2">
+ <expression>(.+)</expression>
+ </RegExp>
+ <expression noclean="1" />
+ </RegExp>
+ </ParseIMDBAKATitles>
+
</scraperfunctions>