aboutsummaryrefslogtreecommitdiff
path: root/addons/metadata.common.imdb.com
diff options
context:
space:
mode:
authorMartijn Kaijser <mcm.kaijser@gmail.com>2012-11-14 21:54:32 +0100
committerMartijn Kaijser <mcm.kaijser@gmail.com>2012-11-14 21:54:32 +0100
commit64d3b8e87d681c91553fa296a65f77bb3786a67e (patch)
treef3ff630691506a64c1887ed8066e33dd351dcc2c /addons/metadata.common.imdb.com
parent37e8e4ecf7fbc78dbb2c7cd338cdd0f7e8a620c9 (diff)
[scrapers] sync master with current scraper repo
Diffstat (limited to 'addons/metadata.common.imdb.com')
-rw-r--r--addons/metadata.common.imdb.com/addon.xml2
-rw-r--r--addons/metadata.common.imdb.com/changelog.txt19
-rw-r--r--addons/metadata.common.imdb.com/icon.pngbin0 -> 25064 bytes
-rw-r--r--addons/metadata.common.imdb.com/imdb.xml155
4 files changed, 171 insertions, 5 deletions
diff --git a/addons/metadata.common.imdb.com/addon.xml b/addons/metadata.common.imdb.com/addon.xml
index 59a43028d4..f4d66cdb03 100644
--- a/addons/metadata.common.imdb.com/addon.xml
+++ b/addons/metadata.common.imdb.com/addon.xml
@@ -1,7 +1,7 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<addon id="metadata.common.imdb.com"
name="IMDB common scraper functions"
- version="2.1.9"
+ version="2.4.0"
provider-name="Team XBMC">
<requires>
<import addon="xbmc.metadata" version="1.0"/>
diff --git a/addons/metadata.common.imdb.com/changelog.txt b/addons/metadata.common.imdb.com/changelog.txt
index a19b89efe5..4e0c80198f 100644
--- a/addons/metadata.common.imdb.com/changelog.txt
+++ b/addons/metadata.common.imdb.com/changelog.txt
@@ -1,3 +1,22 @@
+[B]2.4.0[/B]
+- added: aspect tag to imdb poster
+
+[B]2.3.0[/B]
+- added: option to scrape rating from MetaCritic (via IMDb)
+
+[B]2.2.3[/B]
+- fixed: still to address some html encode issues, now in plots
+
+[B]2.2.2[/B]
+- fixed: still to address html encode issues
+
+[B]2.2.1[/B]
+- fixed: hopefully fixed issue with html encodes
+
+[B]2.2.0[/B]
+- factored out GetIMDBAKATitlesById
+- factored out scraping certifications
+
[B]2.1.9[/B]
- fixed: plot when contains html links (once again)
diff --git a/addons/metadata.common.imdb.com/icon.png b/addons/metadata.common.imdb.com/icon.png
new file mode 100644
index 0000000000..1e45477728
--- /dev/null
+++ b/addons/metadata.common.imdb.com/icon.png
Binary files differ
diff --git a/addons/metadata.common.imdb.com/imdb.xml b/addons/metadata.common.imdb.com/imdb.xml
index 5633908756..2ae432516c 100644
--- a/addons/metadata.common.imdb.com/imdb.xml
+++ b/addons/metadata.common.imdb.com/imdb.xml
@@ -27,6 +27,20 @@
</RegExp>
</ParseIMDBRating>
+ <GetMetaCriticRatingById dest="5">
+ <RegExp input="$$1" output="&lt;details&gt;&lt;url cache=&quot;$$1-main.html&quot; function=&quot;ParseMetaCriticRating&quot;&gt;http://akas.imdb.com/title/$$1/&lt;/url&gt;&lt;/details&gt;" dest="5">
+ <expression noclean="1" />
+ </RegExp>
+ </GetMetaCriticRatingById>
+ <ParseMetaCriticRating dest="5">
+ <RegExp input="$$2" output="&lt;details&gt;\1&lt;/details&gt;" dest="5">
+ <RegExp input="$$1" output="&lt;rating&gt;\1.\2&lt;/rating&gt;" dest="2">
+ <expression>&lt;a href=&quot;criticreviews&quot;&gt;(\d)(\d*)</expression>
+ </RegExp>
+ <expression noclean="1" />
+ </RegExp>
+ </ParseMetaCriticRating>
+
<GetIMDBPlotById dest="5">
<RegExp input="$$1" output="&lt;details&gt;&lt;url cache=&quot;$$1-main.html&quot; function=&quot;ParseIMDBPlot&quot;&gt;http://akas.imdb.com/title/$$1/&lt;/url&gt;&lt;/details&gt;" dest="5">
<expression noclean="1" />
@@ -35,7 +49,7 @@
<ParseIMDBPlot dest="5">
<RegExp input="$$2" output="&lt;details&gt;\1&lt;/details&gt;" dest="5">
<RegExp input="$$1" output="&lt;plot&gt;\1&lt;/plot&gt;" dest="2">
- <expression trim="1">&lt;h2&gt;Storyline&lt;/h2&gt;\n+&lt;p&gt;(.*?)&lt;[^a/]</expression>
+ <expression fixchars="1" trim="1">&lt;h2&gt;Storyline&lt;/h2&gt;\n+&lt;p&gt;(.*?)&lt;[^a/]</expression>
</RegExp>
<expression noclean="1" />
</RegExp>
@@ -51,7 +65,7 @@
<RegExp input="$$1" output="\1" dest="6">
<expression noclean="1">&lt;table class=&quot;cast_list&quot;&gt;(.*?)&lt;/table&gt;</expression>
</RegExp>
- <RegExp input="$$6" output="&lt;actor&gt;&lt;thumb&gt;\2_SX512_SY512_\3&lt;/thumb&gt;&lt;name&gt;\1&lt;/name&gt;&lt;role&gt;\5&lt;/role&gt;&lt;/actor&gt;" dest="7">
+ <RegExp input="$$6" output="&lt;actor&gt;&lt;thumb&gt;\2_SX1024_SY1024_\3&lt;/thumb&gt;&lt;name&gt;\1&lt;/name&gt;&lt;role&gt;\5&lt;/role&gt;&lt;/actor&gt;" dest="7">
<expression repeat="yes" clear="yes" trim="3,4" noclean="1,2">&lt;noscript&gt;&lt;img \n[^a]*alt=&quot;([^&quot;]*)&quot;[^&quot;]*&quot;[^&quot;]*&quot;[^s]*src=&quot;(?:([^&quot;]*\.)[^&quot;]*(\.jpg))[^&gt;]*.*?ter"&gt;[^&gt;]*&gt;\n\s*(&lt;[^&gt;]*&gt;)?([^&lt;\(]*)?</expression>
</RegExp>
<RegExp input="$$6" output="&lt;actor&gt;&lt;thumb&gt;&lt;/thumb&gt;&lt;name&gt;\1&lt;/name&gt;&lt;role&gt;\3&lt;/role&gt;&lt;/actor&gt;" dest="7+">
@@ -131,7 +145,7 @@
<RegExp input="$$1" output="\1" dest="6">
<expression noclean="1">&lt;table class=&quot;cast&quot;&gt;(.*?)&lt;/table&gt;</expression>
</RegExp>
- <RegExp input="$$6" output="&lt;actor&gt;&lt;thumb&gt;\1_SX512_SY512_\2&lt;/thumb&gt;&lt;name&gt;\3&lt;/name&gt;&lt;role&gt;\5&lt;/role&gt;&lt;/actor&gt;" dest="7">
+ <RegExp input="$$6" output="&lt;actor&gt;&lt;thumb&gt;\1_SX1024_SY1024_\2&lt;/thumb&gt;&lt;name&gt;\3&lt;/name&gt;&lt;role&gt;\5&lt;/role&gt;&lt;/actor&gt;" dest="7">
<expression repeat="yes" clear="yes" fixchars="3,5" trim="3,5" noclean="1,2">&lt;img src="(?:([^&quot;]*\.)[^&quot;]*(\.jpg))?[^&gt;]*[^&quot;]*&quot;nm&quot;&gt;&lt;a href=&quot;[^&quot;]*[^&gt;]*&gt;([^&lt;]*)&lt;[^&quot;]*&quot;ddd&quot;&gt;([^&lt;]&lt;)?[^&quot;]*&quot;char&quot;&gt;(.*?)&lt;/td&gt;</expression>
</RegExp>
<RegExp input="$$7" output="&lt;actor&gt;&lt;thumb&gt;\1&lt;/thumb&gt;\2&lt;/actor&gt;" dest="2+">
@@ -205,10 +219,143 @@
<RegExp input="$$1" output="\1_SX$INFO[imdbscale]_SY$INFO[imdbscale]_\2" dest="4">
<expression noclean="1,2">&lt;a name=&quot;poster&quot;.*?src=&quot;(.*?)_S.*?(.jpg)&quot;.*?&lt;/a&gt;</expression>
</RegExp>
- <RegExp input="$$4" output="&lt;thumb&gt;\1&lt;/thumb&gt;" dest="6">
+ <RegExp input="$$4" output="&lt;thumb aspect=&quot;poster&quot;&gt;\1&lt;/thumb&gt;" dest="6">
<expression noclean="1">(.*?_SX[0-9]+_SY[0-9]+_.jpg)</expression>
</RegExp>
<expression noclean="1" />
</RegExp>
</ParseIMDBThumbs>
+
+ <GetIMDBUSACert dest="5">
+ <RegExp input="$$1" output="&lt;details&gt;&lt;url cache=&quot;$$1-main.html&quot; function=&quot;ParseIMDBUSACert&quot;&gt;http://akas.imdb.com/title/$$1/&lt;/url&gt;&lt;/details&gt;" dest="5">
+ <expression noclean="1" />
+ </RegExp>
+ </GetIMDBUSACert>
+ <ParseIMDBUSACert dest="5">
+ <RegExp input="$$1" output="&lt;details&gt;&lt;mpaa&gt;$INFO[certprefix]\1&lt;/mpaa&gt;&lt;/details&gt;" dest="5">
+ <expression>MPAA&lt;/a&gt;\)&lt;/h4&gt;\n?&lt;span itemprop=&quot;contentRating&quot;&gt;Rated\s([^&lt;]*)</expression>
+ </RegExp>
+ </ParseIMDBUSACert>
+
+ <GetIMDBCountryCert dest="5">
+ <RegExp input="$$1" output="&lt;details&gt;&lt;url cache=&quot;$$1-combined.html&quot; function=&quot;ParseIMDBCountryCert&quot;&gt;http://akas.imdb.com/title/$$1/combined&lt;/url&gt;&lt;/details&gt;" dest="5">
+ <expression noclean="1" />
+ </RegExp>
+ </GetIMDBCountryCert>
+ <ParseIMDBCountryCert dest="5">
+ <RegExp input="$$1" output="&lt;details&gt;&lt;mpaa&gt;$INFO[certprefix]\1&lt;/mpaa&gt;&lt;/details&gt;" dest="5">
+ <expression>&gt;\s*$INFO[imdbcertcountry]:([^&lt;]+)&lt;/a&gt;</expression>
+ </RegExp>
+ </ParseIMDBCountryCert>
+
+ <GetIMDBAKATitlesById dest="5">
+ <RegExp input="$$1" output="&lt;details&gt;&lt;url cache=&quot;$$1-combined.html&quot; function=&quot;ParseIMDBAKATitles&quot;&gt;http://akas.imdb.com/title/$$1/combined&lt;/url&gt;&lt;/details&gt;" dest="5">
+ <expression noclean="1" />
+ </RegExp>
+ </GetIMDBAKATitlesById>
+ <ParseIMDBAKATitles dest="5">
+ <RegExp input="$$2" output="&lt;details&gt;&lt;title&gt;\1&lt;/title&gt;&lt;/details&gt;" dest="5">
+ <RegExp input="$$1" output="\1" dest="2">
+ <expression fixchars="1">&lt;h1&gt;([^&lt;]*)</expression>
+ </RegExp>
+ <RegExp input="$$10" output="\1" dest="4">
+ <RegExp input="$$1" output="\2" dest="9">
+ <expression fixchars="2">&lt;meta name=&quot;title&quot; content=&quot;(IMDb - )?(?:&amp;#x22;)?([^&quot;]*?)(?:&amp;#x22;)? \([^\(]*?([0-9]{4})\)</expression>
+ </RegExp>
+ <RegExp input="$$9" output="\1" dest="10">
+ <expression />
+ </RegExp>
+ <RegExp input="$$1" output="\1" dest="11">
+ <expression fixchars="1" clear="yes">&gt;\s*?&quot;([^&lt;]+)&quot;[^&lt;]+[&lt;em&gt;]?[^&quot;]+Hong Kong [&lt;em&gt;][^&quot;]+English</expression>
+ </RegExp>
+ <RegExp input="$$11" output="\1" dest="10">
+ <expression>(.+)</expression>
+ </RegExp>
+ <RegExp input="$$1" output="\1" dest="11">
+ <expression fixchars="1" clear="yes">&gt;\s*?&quot;([^&lt;]+)&quot;[^&lt;]+[&lt;em&gt;]?[^&quot;]+((Canada)\s(&lt;em&gt;)?\((English|imdb))</expression>
+ </RegExp>
+ <RegExp input="$$11" output="\1" dest="10">
+ <expression>(.+)</expression>
+ </RegExp>
+ <RegExp input="$$1" output="\1" dest="11">
+ <expression fixchars="1" clear="yes">&gt;\s*?&quot;([^&lt;]+)&quot;[^&lt;]+[&lt;em&gt;]?[^&quot;]+(UK(&lt;em&gt;)?&lt;br&gt;)</expression>
+ </RegExp>
+ <RegExp input="$$11" output="\1" dest="10">
+ <expression>(.+)</expression>
+ </RegExp>
+ <RegExp input="$$1" output="\1" dest="11">
+ <expression fixchars="1" clear="yes">&gt;\s*?&quot;([^&lt;]+)&quot;[^&quot;]+International\s(&lt;em&gt;)?\(English title\)(&lt;/em&gt;)?(,|&lt;)( |b)</expression>
+ </RegExp>
+ <RegExp input="$$11" output="\1" dest="10">
+ <expression>(.+)</expression>
+ </RegExp>
+ <RegExp input="$$1" output="\1" dest="11">
+ <expression fixchars="1" clear="yes">&gt;\s*?&quot;([^&lt;]+)&quot;[^&quot;]+International\s(&lt;em&gt;)?\(English title\)(&lt;/em&gt;)? (&lt;em&gt;)?\(imdb</expression>
+ </RegExp>
+ <RegExp input="$$11" output="\1" dest="10">
+ <expression>(.+)</expression>
+ </RegExp>
+ <RegExp input="$$1" output="\1" dest="12">
+ <expression fixchars="1" clear="yes">&lt;a href=&quot;/country/[^&gt;]+&gt;(UK&lt;/a&gt;&lt;/div&gt;)</expression>
+ </RegExp>
+ <RegExp input="$$12" output="$$9" dest="10">
+ <expression>(.+)</expression>
+ </RegExp>
+ <RegExp input="$$1" output="\1" dest="12">
+ <expression clear="yes">&lt;a href=&quot;/country/[^&gt;]+&gt;(USA&lt;/a&gt;&lt;/div&gt;)</expression>
+ </RegExp>
+ <RegExp input="$$12" output="$$9" dest="10">
+ <expression>(.+)</expression>
+ </RegExp>
+ <RegExp input="$$1" output="\1" dest="11">
+ <expression fixchars="1" clear="yes">&gt;\s*?&quot;([^&lt;]+)&quot;[^&lt;]+[&lt;em&gt;]?[^&quot;]+(USA(&lt;em&gt;)?&lt;br&gt;)</expression>
+ </RegExp>
+ <RegExp input="$$11" output="\1" dest="10">
+ <expression>(.+)</expression>
+ </RegExp>
+ <RegExp input="$$1" output="\1" dest="11">
+ <expression fixchars="1" clear="yes">&gt;\s*?&quot;([^&lt;]+)&quot;[^&lt;]+[&lt;em&gt;]?[^&quot;]+((USA)\s(&lt;em&gt;)?\((English|imdb))</expression>
+ </RegExp>
+ <RegExp input="$$11" output="\1" dest="10">
+ <expression>(.+)</expression>
+ </RegExp>
+ <RegExp input="$$1" output="\1" dest="11">
+ <expression fixchars="1" clear="yes">&gt;\s*?&quot;([^&lt;]+)&quot;[^&lt;]+[&lt;em&gt;]?[^&quot;]+((USA)\s(&lt;em&gt;)?\((new title))</expression>
+ </RegExp>
+ <RegExp input="$$11" output="\1" dest="10">
+ <expression>(.+)</expression>
+ </RegExp>
+ <expression noclean="1" />
+ </RegExp>
+ <RegExp input="$$4" output="\1" dest="2">
+ <expression>(.+)</expression>
+ </RegExp>
+ <RegExp input="$$1" output="\1" dest="4">
+ <expression fixchars="1">&gt;\s*?&quot;([^&lt;]+)&quot;[^&lt;]+[&lt;em&gt;]?[^&quot;]+$INFO[imdbakatitles]</expression>
+ </RegExp>
+ <RegExp input="$$4" output="\1" dest="2">
+ <expression>(.+)</expression>
+ </RegExp>
+ <RegExp input="$$1" output="\1" dest="4">
+ <expression fixchars="1">&gt;\s*?&quot;([^&lt;]+)&quot;[^&lt;]+[&lt;em&gt;]?[^&quot;]+$INFO[imdbakatitles]\s(&lt;em&gt;)?\((imdb display)</expression>
+ </RegExp>
+ <RegExp input="$$4" output="\1" dest="2">
+ <expression>(.+)</expression>
+ </RegExp>
+ <RegExp input="$$1" output="\1" dest="5">
+ <expression>&lt;a href=&quot;/country/[^&gt;]+&gt;($INFO[imdbakatitles])</expression>
+ </RegExp>
+ <RegExp input="$$5" output="$$9" dest="4">
+ <expression>($INFO[imdbakatitles])</expression>
+ </RegExp>
+ <RegExp input="$INFO[imdbakatitles]" output="$$9" dest="4">
+ <expression>Keep Original</expression>
+ </RegExp>
+ <RegExp input="$$4" output="\1" dest="2">
+ <expression>(.+)</expression>
+ </RegExp>
+ <expression noclean="1" />
+ </RegExp>
+ </ParseIMDBAKATitles>
+
</scraperfunctions>