aboutsummaryrefslogtreecommitdiff
path: root/addons/metadata.common.imdb.com
diff options
context:
space:
mode:
authornadasb <nadasb@svn>2010-10-11 19:46:49 +0000
committernadasb <nadasb@svn>2010-10-11 19:46:49 +0000
commit86ac17e549023b61417519b62d872b550da54de0 (patch)
tree97e8db7ea45b371031d597ffd99e370c4493ec43 /addons/metadata.common.imdb.com
parentc5155ccc852779b9c26ee1bd1ef6fa182d69719f (diff)
sync with scrapers git
git-svn-id: https://xbmc.svn.sourceforge.net/svnroot/xbmc/trunk@34690 568bbfeb-2a22-0410-94d2-cc84cf5bfa90
Diffstat (limited to 'addons/metadata.common.imdb.com')
-rw-r--r--addons/metadata.common.imdb.com/addon.xml29
-rw-r--r--addons/metadata.common.imdb.com/changelog.txt20
-rw-r--r--addons/metadata.common.imdb.com/imdb.xml205
3 files changed, 254 insertions, 0 deletions
diff --git a/addons/metadata.common.imdb.com/addon.xml b/addons/metadata.common.imdb.com/addon.xml
new file mode 100644
index 0000000000..4b08fd17a4
--- /dev/null
+++ b/addons/metadata.common.imdb.com/addon.xml
@@ -0,0 +1,29 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<addon id="metadata.common.imdb.com"
+ name="IMDB common scraper functions"
+ version="2.0.5"
+ provider-name="Team XBMC">
+ <requires>
+ <import addon="xbmc.metadata" version="1.0"/>
+ </requires>
+ <extension point="xbmc.metadata.scraper.library"
+ library="imdb.xml"/>
+ <extension point="xbmc.addon.metadata">
+ <platform>all</platform>
+ <minversion>20000</minversion>
+ <summary lang="en">IMDB Scraper Library</summary>
+ <summary lang="hu">IMDB leolvasó-könyvtár</summary>
+ <summary lang="kr">IMDB 스크래퍼 라이브러리</summary>
+ <summary lang="nl">IMDB Scraper Bibliotheek</summary>
+ <summary lang="pl">Scraper IMDb</summary>
+ <summary lang="pt">Scraper de filmes IMDb</summary>
+ <summary lang="se">IMDB Skrapbibliotek</summary>
+ <description lang="en">Download Movie information from www.imdb.com</description>
+ <description lang="hu">Film információk letöltése a www.imdb.com webhelyről</description>
+ <description lang="kr">www.imdb.com 에서 영화 정보 다운로드</description>
+ <description lang="nl">Download film informatie van www.imdb.com</description>
+ <description lang="pl">Pobieraj informacje o filmach z www.imdb.com</description>
+ <description lang="pt">Descarregar informação de filmes de www.imdb.com</description>
+ <description lang="se">Ladda ner filminformation från www.imdb.com</description>
+ </extension>
+</addon>
diff --git a/addons/metadata.common.imdb.com/changelog.txt b/addons/metadata.common.imdb.com/changelog.txt
new file mode 100644
index 0000000000..cd4bd77c6d
--- /dev/null
+++ b/addons/metadata.common.imdb.com/changelog.txt
@@ -0,0 +1,20 @@
+[B]2.0.5[/B]
+- changed: move scraping of IMDb rating out to IMDb scraper library
+
+[B]2.0.4[/B]
+- reverted: purge fixchars from all functions, since it cause convert fails on Beta2 and Linux
+
+[B]2.0.3[/B]
+- reverted: fixchars removed from cast parsing, leads to Dharma Beta2 hanging, will be added back once next Beta is out
+
+[B]2.0.2[/B]
+- fixed: some more cast won't get scraped under certain circumtances
+
+[B]2.0.1[/B]
+- fixed: cast won't get scraped under certain circumtances
+
+[B]2.0.0[/B]
+- changed: adapted new IMDb layout
+
+[B]1.0.5[/B]
+- fixed: tack /combined onto the end of the url to work around the new layout of IMDb \ No newline at end of file
diff --git a/addons/metadata.common.imdb.com/imdb.xml b/addons/metadata.common.imdb.com/imdb.xml
new file mode 100644
index 0000000000..24a3a4f422
--- /dev/null
+++ b/addons/metadata.common.imdb.com/imdb.xml
@@ -0,0 +1,205 @@
+<scraperfunctions>
+ <GetIMDBRatingById dest="5">
+ <RegExp input="$$1" output="&lt;details&gt;&lt;url cache=&quot;$$1-main.html&quot; function=&quot;ParseIMDBRating&quot;&gt;http://akas.imdb.com/title/$$1/&lt;/url&gt;&lt;/details&gt;" dest="5">
+ <expression noclean="1" />
+ </RegExp>
+ </GetIMDBRatingById>
+ <GetIMDBPlotById dest="5">
+ <RegExp input="$$1" output="&lt;details&gt;&lt;url cache=&quot;$$1-main.html&quot; function=&quot;ParseIMDBPlot&quot;&gt;http://akas.imdb.com/title/$$1/&lt;/url&gt;&lt;/details&gt;" dest="5">
+ <expression noclean="1" />
+ </RegExp>
+ </GetIMDBPlotById>
+ <GetIMDBCastById dest="5">
+ <RegExp input="$$1" output="&lt;details&gt;&lt;url cache=&quot;$$1-main.html&quot; function=&quot;ParseIMDBCast&quot;&gt;http://akas.imdb.com/title/$$1/&lt;/url&gt;&lt;/details&gt;" dest="5">
+ <expression noclean="1" />
+ </RegExp>
+ </GetIMDBCastById>
+ <GetIMDBScaledCastById dest="5">
+ <RegExp input="$$1" output="&lt;details&gt;&lt;url cache=&quot;$$1-main.html&quot; function=&quot;ParseIMDBScaledCast&quot;&gt;http://akas.imdb.com/title/$$1/&lt;/url&gt;&lt;/details&gt;" dest="5">
+ <expression noclean="1" />
+ </RegExp>
+ </GetIMDBScaledCastById>
+ <GetIMDBDirectorsById dest="5">
+ <RegExp input="$$1" output="&lt;details&gt;&lt;url cache=&quot;$$1-main.html&quot; function=&quot;ParseIMDBDirectors&quot;&gt;http://akas.imdb.com/title/$$1/&lt;/url&gt;&lt;/details&gt;" dest="5">
+ <expression noclean="1" />
+ </RegExp>
+ </GetIMDBDirectorsById>
+ <GetIMDBWritersById dest="5">
+ <RegExp input="$$1" output="&lt;details&gt;&lt;url cache=&quot;$$1-main.html&quot; function=&quot;ParseIMDBWriters&quot;&gt;http://akas.imdb.com/title/$$1/&lt;/url&gt;&lt;/details&gt;" dest="5">
+ <expression noclean="1" />
+ </RegExp>
+ </GetIMDBWritersById>
+ <GetIMDBFullCastById dest="5">
+ <RegExp input="$$1" output="&lt;details&gt;&lt;url cache=&quot;$$1-combined.html&quot; function=&quot;ParseIMDBFullCast&quot;&gt;http://akas.imdb.com/title/$$1/combined&lt;/url&gt;&lt;/details&gt;" dest="5">
+ <expression noclean="1" />
+ </RegExp>
+ </GetIMDBFullCastById>
+ <GetIMDBScaledFullCastById dest="5">
+ <RegExp input="$$1" output="&lt;details&gt;&lt;url cache=&quot;$$1-combined.html&quot; function=&quot;ParseIMDBScaledFullCast&quot;&gt;http://akas.imdb.com/title/$$1/combined&lt;/url&gt;&lt;/details&gt;" dest="5">
+ <expression noclean="1" />
+ </RegExp>
+ </GetIMDBScaledFullCastById>
+ <GetIMDBFullDirectorsById dest="5">
+ <RegExp input="$$1" output="&lt;details&gt;&lt;url cache=&quot;$$1-combined.html&quot; function=&quot;ParseIMDBFullDirectors&quot;&gt;http://akas.imdb.com/title/$$1/combined&lt;/url&gt;&lt;/details&gt;" dest="5">
+ <expression noclean="1" />
+ </RegExp>
+ </GetIMDBFullDirectorsById>
+ <GetIMDBFullWritersById dest="5">
+ <RegExp input="$$1" output="&lt;details&gt;&lt;url cache=&quot;$$1-combined.html&quot; function=&quot;ParseIMDBFullWriters&quot;&gt;http://akas.imdb.com/title/$$1/combined&lt;/url&gt;&lt;/details&gt;" dest="5">
+ <expression noclean="1" />
+ </RegExp>
+ </GetIMDBFullWritersById>
+ <GetIMDBThumbsById dest="5">
+ <RegExp input="$$1" output="&lt;details&gt;&lt;url cache=&quot;$$1-posters.html&quot; function=&quot;ParseIMDBThumbs&quot;&gt;http://akas.imdb.com/title/$$1/posters&lt;/url&gt;&lt;/details&gt;" dest="5">
+ <expression noclean="1" />
+ </RegExp>
+ </GetIMDBThumbsById>
+ <GetIMDBTrailerById dest="5">
+ <RegExp input="$$1" output="&lt;details&gt;&lt;url cache=&quot;$$1-main.html&quot; function=&quot;GetIMDBTrailerPage&quot;&gt;http://akas.imdb.com/title/$$1/&lt;/url&gt;&lt;/details&gt;" dest="5">
+ <expression noclean="1" />
+ </RegExp>
+ </GetIMDBTrailerById>
+ <ParseIMDBRating dest="5">
+ <RegExp input="$$2" output="&lt;details&gt;\1&lt;/details&gt;" dest="5">
+ <RegExp input="$$1" output="&lt;rating&gt;\1&lt;/rating&gt;&lt;votes&gt;\2&lt;/votes&gt;" dest="2">
+ <expression>&lt;span class="rating-rating"&gt;([0-9.]+).*?&gt;([0-9,]+) votes&lt;/a&gt;</expression>
+ </RegExp>
+ <expression noclean="1" />
+ </RegExp>
+ </ParseIMDBRating>
+ <ParseIMDBPlot dest="5">
+ <RegExp input="$$2" output="&lt;details&gt;\1&lt;/details&gt;" dest="5">
+ <RegExp input="$$1" output="&lt;plot&gt;\1&lt;/plot&gt;" dest="2">
+ <expression trim="1">&lt;h2&gt;Storyline&lt;/h2&gt;\n+&lt;p&gt;([^&lt;]+)</expression>
+ </RegExp>
+ <expression noclean="1" />
+ </RegExp>
+ </ParseIMDBPlot>
+ <ParseIMDBCast dest="5">
+ <RegExp input="$$2" output="&lt;details&gt;\1&lt;/details&gt;" dest="5">
+ <RegExp input="$$1" output="\1" dest="6">
+ <expression noclean="1">&lt;table class="cast_list"&gt;(.*?)&lt;/table&gt;</expression>
+ </RegExp>
+ <RegExp input="$$6" output="&lt;actor&gt;&lt;thumb&gt;\1_SX512_SY512_\2&lt;/thumb&gt;&lt;name&gt;\3&lt;/name&gt;&lt;role&gt;\4&lt;/role&gt;&lt;/actor&gt;" dest="7">
+ <expression repeat="yes" clear="yes" trim="3,4" noclean="1,2">&lt;img\n[^\n]*\n[^\n]*\n\s+src="(?:([^"]*\.)[^"]*(\.jpg))?[^&gt;]*[^"]*"[^"]*"&gt;[^&gt;]*&gt;([^&lt;]*).*?&lt;td class="character"&gt;[^&gt;]*&gt;[^&gt;]*&gt;([^&lt;]*)?</expression>
+ </RegExp>
+ <RegExp input="$$7" output="&lt;actor&gt;&lt;thumb&gt;\1&lt;/thumb&gt;\2&lt;/actor&gt;" dest="2+">
+ <expression repeat="yes" clear="yes" noclean="1,2,3">&lt;actor&gt;&lt;thumb&gt;(?:(http.*?)|_SX[0-9]+_SY[0-9]+_)&lt;/thumb&gt;(.*?)&lt;/actor&gt;</expression>
+ </RegExp>
+ <expression noclean="1" />
+ </RegExp>
+ </ParseIMDBCast>
+ <ParseIMDBScaledCast dest="5">
+ <RegExp input="$$2" output="&lt;details&gt;\1&lt;/details&gt;" dest="5">
+ <RegExp input="$$1" output="\1" dest="6">
+ <expression noclean="1">&lt;table class="cast_list"&gt;(.*?)&lt;/table&gt;</expression>
+ </RegExp>
+ <RegExp input="$$6" output="&lt;actor&gt;&lt;thumb&gt;\1_SX$INFO[imdbscale]_SY$INFO[imdbscale]_\2&lt;/thumb&gt;&lt;name&gt;\3&lt;/name&gt;&lt;role&gt;\4&lt;/role&gt;&lt;/actor&gt;" dest="7">
+ <expression repeat="yes" clear="yes" trim="3,4" noclean="1,2">&lt;img\n[^\n]*\n[^\n]*\n\s+src="(?:([^"]*\.)[^"]*(\.jpg))?[^&gt;]*[^"]*"[^"]*"&gt;[^&gt;]*&gt;([^&lt;]*).*?&lt;td class="character"&gt;[^&gt;]*&gt;[^&gt;]*&gt;([^&lt;]*)?</expression>
+ </RegExp>
+ <RegExp input="$$7" output="&lt;actor&gt;&lt;thumb&gt;\1&lt;/thumb&gt;\2&lt;/actor&gt;" dest="2+">
+ <expression repeat="yes" clear="yes" noclean="1,2,3">&lt;actor&gt;&lt;thumb&gt;(?:(http.*?)|_SX[0-9]+_SY[0-9]+_)&lt;/thumb&gt;(.*?)&lt;/actor&gt;</expression>
+ </RegExp>
+ <expression noclean="1" />
+ </RegExp>
+ </ParseIMDBScaledCast>
+ <ParseIMDBDirectors dest="5">
+ <RegExp input="$$2" output="&lt;details&gt;\1&lt;/details&gt;" dest="5">
+ <RegExp input="$$1" output="\1" dest="6">
+ <expression clear="yes" noclean="1">Director[s]?.*?&lt;/h4&gt;(.*?)&lt;div class</expression>
+ </RegExp>
+ <RegExp input="$$6" output="&lt;director&gt;\1&lt;/director&gt;" dest="2+">
+ <expression repeat="yes" clear="yes">&lt;a href="/name[^&gt;]*&gt;([^&lt;]*)&lt;</expression>
+ </RegExp>
+ <expression noclean="1" />
+ </RegExp>
+ </ParseIMDBDirectors>
+ <ParseIMDBWriters dest="5">
+ <RegExp input="$$2" output="&lt;details&gt;\1&lt;/details&gt;" dest="5">
+ <RegExp input="$$1" output="\1" dest="6">
+ <expression noclean="1">Writer[s]?.*?&lt;/h4&gt;(.*?)&lt;div class</expression>
+ </RegExp>
+ <RegExp input="$$6" output="&lt;credits&gt;\1&lt;/credits&gt;" dest="2+">
+ <expression repeat="yes">&lt;a href="/name[^&gt;]*&gt;([^&lt;]*)&lt;</expression>
+ </RegExp>
+ <expression noclean="1" />
+ </RegExp>
+ </ParseIMDBWriters>
+ <ParseIMDBFullCast dest="5">
+ <RegExp input="$$2" output="&lt;details&gt;\1&lt;/details&gt;" dest="5">
+ <RegExp input="$$1" output="\1" dest="6">
+ <expression noclean="1">&lt;table class="cast"&gt;(.*?)&lt;/table&gt;</expression>
+ </RegExp>
+ <RegExp input="$$6" output="&lt;actor&gt;&lt;thumb&gt;\1_SX512_SY512_\2&lt;/thumb&gt;&lt;name&gt;\3&lt;/name&gt;&lt;role&gt;\5&lt;/role&gt;&lt;/actor&gt;" dest="7">
+ <expression repeat="yes" clear="yes" trim="3,5" noclean="1,2">&lt;img src="(?:([^"]*\.)[^"]*(\.jpg))?[^&gt;]*[^"]*"nm"&gt;&lt;a href="[^"]*[^&gt;]*&gt;([^&lt;]*)&lt;[^"]*"ddd"&gt;([^&lt;]&lt;)?[^"]*"char"&gt;(.*?)&lt;/td&gt;</expression>
+ </RegExp>
+ <RegExp input="$$7" output="&lt;actor&gt;&lt;thumb&gt;\1&lt;/thumb&gt;\2&lt;/actor&gt;" dest="2+">
+ <expression repeat="yes" clear="yes" noclean="1,2,3">&lt;actor&gt;&lt;thumb&gt;(?:(http.*?)|_SX[0-9]+_SY[0-9]+_)&lt;/thumb&gt;(.*?)&lt;/actor&gt;</expression>
+ </RegExp>
+ <expression noclean="1" />
+ </RegExp>
+ </ParseIMDBFullCast>
+ <ParseIMDBScaledFullCast dest="5">
+ <RegExp input="$$2" output="&lt;details&gt;\1&lt;/details&gt;" dest="5">
+ <RegExp input="$$1" output="\1" dest="6">
+ <expression noclean="1">&lt;table class="cast"&gt;(.*?)&lt;/table&gt;</expression>
+ </RegExp>
+ <RegExp input="$$6" output="&lt;actor&gt;&lt;thumb&gt;\1_SX$INFO[imdbscale]_SY$INFO[imdbscale]_\2&lt;/thumb&gt;&lt;name&gt;\3&lt;/name&gt;&lt;role&gt;\5&lt;/role&gt;&lt;/actor&gt;" dest="7">
+ <expression repeat="yes" clear="yes" trim="3,5" noclean="1,2">&lt;img src="(?:([^"]*\.)[^"]*(\.jpg))?[^&gt;]*[^"]*"nm"&gt;&lt;a href="[^"]*[^&gt;]*&gt;([^&lt;]*)&lt;[^"]*"ddd"&gt;([^&lt;]&lt;)?[^"]*"char"&gt;(.*?)&lt;/td&gt;</expression>
+ </RegExp>
+ <RegExp input="$$7" output="&lt;actor&gt;&lt;thumb&gt;\1&lt;/thumb&gt;\2&lt;/actor&gt;" dest="2+">
+ <expression repeat="yes" clear="yes" noclean="1,2,3">&lt;actor&gt;&lt;thumb&gt;(?:(http.*?)|_SX[0-9]+_SY[0-9]+_)&lt;/thumb&gt;(.*?)&lt;/actor&gt;</expression>
+ </RegExp>
+ <expression noclean="1" />
+ </RegExp>
+ </ParseIMDBScaledFullCast>
+ <ParseIMDBFullDirectors dest="5">
+ <RegExp input="$$2" output="&lt;details&gt;\1&lt;/details&gt;" dest="5">
+ <RegExp conditional="fullcredits" input="$$1" output="\1" dest="6">
+ <expression clear="yes" noclean="1">Director[s]?:&lt;/h5&gt;[^&gt;]*&gt;(.*?)&lt;/div</expression>
+ </RegExp>
+ <RegExp input="$$6" output="&lt;director&gt;\1&lt;/director&gt;" dest="2+">
+ <expression repeat="yes" clear="yes">&lt;a href="/name/[^&gt;]*&gt;([^&lt;]*)&lt;</expression>
+ </RegExp>
+ <expression noclean="1" />
+ </RegExp>
+ </ParseIMDBFullDirectors>
+ <ParseIMDBFullWriters dest="5">
+ <RegExp input="$$2" output="&lt;details&gt;\1&lt;/details&gt;" dest="5">
+ <RegExp conditional="fullcredits" input="$$1" output="\1" dest="6">
+ <expression noclean="1">Writing credits(.*?)name="</expression>
+ </RegExp>
+ <RegExp input="$$6" output="&lt;credits&gt;\1&lt;/credits&gt;" dest="2+">
+ <expression repeat="yes">&lt;a href="/name/[^&gt;]*&gt;([^&lt;]*)&lt;</expression>
+ </RegExp>
+ <expression noclean="1" />
+ </RegExp>
+ </ParseIMDBFullWriters>
+ <ParseIMDBThumbs dest="5">
+ <RegExp input="$$6" output="&lt;details&gt;\1&lt;/details&gt;" dest="5">
+ <RegExp input="$$1" output="\1_SX$INFO[imdbscale]_SY$INFO[imdbscale]_\2" dest="4">
+ <expression noclean="1,2">&lt;a name="poster".*?src="(.*?)_S.*?(.jpg)".*?&lt;/a&gt;</expression>
+ </RegExp>
+ <RegExp input="$$4" output="&lt;thumb&gt;\1&lt;/thumb&gt;" dest="6">
+ <expression noclean="1">(.*?_SX[0-9]+_SY[0-9]+_.jpg)</expression>
+ </RegExp>
+ <expression noclean="1" />
+ </RegExp>
+ </ParseIMDBThumbs>
+ <GetIMDBTrailerPage dest="5">
+ <RegExp input="$$1" output="&lt;details&gt;&lt;url function=&quot;ParseIMDBTrailer&quot;&gt;http://akas.imdb.com/video/imdb/vi\1/player&lt;/url&gt;&lt;/details&gt;" dest="5">
+ <expression>/vi([0-9]*)/</expression>
+ </RegExp>
+ </GetIMDBTrailerPage>
+ <ParseIMDBTrailer dest="5">
+ <RegExp input="$$2" output="&lt;details&gt;&lt;trailer urlencoded=&quot;yes&quot;&gt;\1&lt;/trailer&gt;&lt;/details&gt;" dest="5">
+ <RegExp input="$$1" output="\1" dest="2">
+ <expression noclean="1">"file"[^"]*"([^"]*)</expression>
+ </RegExp>
+ <RegExp input="$$1" output="$$2/\1" dest="2">
+ <expression noclean="1">"id"[^"]*"([^"]*)</expression>
+ </RegExp>
+ <expression noclean="1" />
+ </RegExp>
+ </ParseIMDBTrailer>
+</scraperfunctions>