diff options
author | Martijn Kaijser <mcm.kaijser@gmail.com> | 2012-11-14 21:54:32 +0100 |
---|---|---|
committer | Martijn Kaijser <mcm.kaijser@gmail.com> | 2012-11-14 21:54:32 +0100 |
commit | 64d3b8e87d681c91553fa296a65f77bb3786a67e (patch) | |
tree | f3ff630691506a64c1887ed8066e33dd351dcc2c /addons/metadata.common.imdb.com | |
parent | 37e8e4ecf7fbc78dbb2c7cd338cdd0f7e8a620c9 (diff) |
[scrapers] sync master with current scraper repo
Diffstat (limited to 'addons/metadata.common.imdb.com')
-rw-r--r-- | addons/metadata.common.imdb.com/addon.xml | 2 | ||||
-rw-r--r-- | addons/metadata.common.imdb.com/changelog.txt | 19 | ||||
-rw-r--r-- | addons/metadata.common.imdb.com/icon.png | bin | 0 -> 25064 bytes | |||
-rw-r--r-- | addons/metadata.common.imdb.com/imdb.xml | 155 |
4 files changed, 171 insertions, 5 deletions
diff --git a/addons/metadata.common.imdb.com/addon.xml b/addons/metadata.common.imdb.com/addon.xml index 59a43028d4..f4d66cdb03 100644 --- a/addons/metadata.common.imdb.com/addon.xml +++ b/addons/metadata.common.imdb.com/addon.xml @@ -1,7 +1,7 @@ <?xml version="1.0" encoding="UTF-8" standalone="yes"?> <addon id="metadata.common.imdb.com" name="IMDB common scraper functions" - version="2.1.9" + version="2.4.0" provider-name="Team XBMC"> <requires> <import addon="xbmc.metadata" version="1.0"/> diff --git a/addons/metadata.common.imdb.com/changelog.txt b/addons/metadata.common.imdb.com/changelog.txt index a19b89efe5..4e0c80198f 100644 --- a/addons/metadata.common.imdb.com/changelog.txt +++ b/addons/metadata.common.imdb.com/changelog.txt @@ -1,3 +1,22 @@ +[B]2.4.0[/B] +- added: aspect tag to imdb poster + +[B]2.3.0[/B] +- added: option to scrape rating from MetaCritic (via IMDb) + +[B]2.2.3[/B] +- fixed: still to address some html encode issues, now in plots + +[B]2.2.2[/B] +- fixed: still to address html encode issues + +[B]2.2.1[/B] +- fixed: hopefully fixed issue with html encodes + +[B]2.2.0[/B] +- factored out GetIMDBAKATitlesById +- factored out scraping certifications + [B]2.1.9[/B] - fixed: plot when contains html links (once again) diff --git a/addons/metadata.common.imdb.com/icon.png b/addons/metadata.common.imdb.com/icon.png Binary files differnew file mode 100644 index 0000000000..1e45477728 --- /dev/null +++ b/addons/metadata.common.imdb.com/icon.png diff --git a/addons/metadata.common.imdb.com/imdb.xml b/addons/metadata.common.imdb.com/imdb.xml index 5633908756..2ae432516c 100644 --- a/addons/metadata.common.imdb.com/imdb.xml +++ b/addons/metadata.common.imdb.com/imdb.xml @@ -27,6 +27,20 @@ </RegExp> </ParseIMDBRating> + <GetMetaCriticRatingById dest="5"> + <RegExp input="$$1" output="<details><url cache="$$1-main.html" function="ParseMetaCriticRating">http://akas.imdb.com/title/$$1/</url></details>" dest="5"> + <expression noclean="1" /> + </RegExp> + </GetMetaCriticRatingById> + <ParseMetaCriticRating dest="5"> + <RegExp input="$$2" output="<details>\1</details>" dest="5"> + <RegExp input="$$1" output="<rating>\1.\2</rating>" dest="2"> + <expression><a href="criticreviews">(\d)(\d*)</expression> + </RegExp> + <expression noclean="1" /> + </RegExp> + </ParseMetaCriticRating> + <GetIMDBPlotById dest="5"> <RegExp input="$$1" output="<details><url cache="$$1-main.html" function="ParseIMDBPlot">http://akas.imdb.com/title/$$1/</url></details>" dest="5"> <expression noclean="1" /> @@ -35,7 +49,7 @@ <ParseIMDBPlot dest="5"> <RegExp input="$$2" output="<details>\1</details>" dest="5"> <RegExp input="$$1" output="<plot>\1</plot>" dest="2"> - <expression trim="1"><h2>Storyline</h2>\n+<p>(.*?)<[^a/]</expression> + <expression fixchars="1" trim="1"><h2>Storyline</h2>\n+<p>(.*?)<[^a/]</expression> </RegExp> <expression noclean="1" /> </RegExp> @@ -51,7 +65,7 @@ <RegExp input="$$1" output="\1" dest="6"> <expression noclean="1"><table class="cast_list">(.*?)</table></expression> </RegExp> - <RegExp input="$$6" output="<actor><thumb>\2_SX512_SY512_\3</thumb><name>\1</name><role>\5</role></actor>" dest="7"> + <RegExp input="$$6" output="<actor><thumb>\2_SX1024_SY1024_\3</thumb><name>\1</name><role>\5</role></actor>" dest="7"> <expression repeat="yes" clear="yes" trim="3,4" noclean="1,2"><noscript><img \n[^a]*alt="([^"]*)"[^"]*"[^"]*"[^s]*src="(?:([^"]*\.)[^"]*(\.jpg))[^>]*.*?ter">[^>]*>\n\s*(<[^>]*>)?([^<\(]*)?</expression> </RegExp> <RegExp input="$$6" output="<actor><thumb></thumb><name>\1</name><role>\3</role></actor>" dest="7+"> @@ -131,7 +145,7 @@ <RegExp input="$$1" output="\1" dest="6"> <expression noclean="1"><table class="cast">(.*?)</table></expression> </RegExp> - <RegExp input="$$6" output="<actor><thumb>\1_SX512_SY512_\2</thumb><name>\3</name><role>\5</role></actor>" dest="7"> + <RegExp input="$$6" output="<actor><thumb>\1_SX1024_SY1024_\2</thumb><name>\3</name><role>\5</role></actor>" dest="7"> <expression repeat="yes" clear="yes" fixchars="3,5" trim="3,5" noclean="1,2"><img src="(?:([^"]*\.)[^"]*(\.jpg))?[^>]*[^"]*"nm"><a href="[^"]*[^>]*>([^<]*)<[^"]*"ddd">([^<]<)?[^"]*"char">(.*?)</td></expression> </RegExp> <RegExp input="$$7" output="<actor><thumb>\1</thumb>\2</actor>" dest="2+"> @@ -205,10 +219,143 @@ <RegExp input="$$1" output="\1_SX$INFO[imdbscale]_SY$INFO[imdbscale]_\2" dest="4"> <expression noclean="1,2"><a name="poster".*?src="(.*?)_S.*?(.jpg)".*?</a></expression> </RegExp> - <RegExp input="$$4" output="<thumb>\1</thumb>" dest="6"> + <RegExp input="$$4" output="<thumb aspect="poster">\1</thumb>" dest="6"> <expression noclean="1">(.*?_SX[0-9]+_SY[0-9]+_.jpg)</expression> </RegExp> <expression noclean="1" /> </RegExp> </ParseIMDBThumbs> + + <GetIMDBUSACert dest="5"> + <RegExp input="$$1" output="<details><url cache="$$1-main.html" function="ParseIMDBUSACert">http://akas.imdb.com/title/$$1/</url></details>" dest="5"> + <expression noclean="1" /> + </RegExp> + </GetIMDBUSACert> + <ParseIMDBUSACert dest="5"> + <RegExp input="$$1" output="<details><mpaa>$INFO[certprefix]\1</mpaa></details>" dest="5"> + <expression>MPAA</a>\)</h4>\n?<span itemprop="contentRating">Rated\s([^<]*)</expression> + </RegExp> + </ParseIMDBUSACert> + + <GetIMDBCountryCert dest="5"> + <RegExp input="$$1" output="<details><url cache="$$1-combined.html" function="ParseIMDBCountryCert">http://akas.imdb.com/title/$$1/combined</url></details>" dest="5"> + <expression noclean="1" /> + </RegExp> + </GetIMDBCountryCert> + <ParseIMDBCountryCert dest="5"> + <RegExp input="$$1" output="<details><mpaa>$INFO[certprefix]\1</mpaa></details>" dest="5"> + <expression>>\s*$INFO[imdbcertcountry]:([^<]+)</a></expression> + </RegExp> + </ParseIMDBCountryCert> + + <GetIMDBAKATitlesById dest="5"> + <RegExp input="$$1" output="<details><url cache="$$1-combined.html" function="ParseIMDBAKATitles">http://akas.imdb.com/title/$$1/combined</url></details>" dest="5"> + <expression noclean="1" /> + </RegExp> + </GetIMDBAKATitlesById> + <ParseIMDBAKATitles dest="5"> + <RegExp input="$$2" output="<details><title>\1</title></details>" dest="5"> + <RegExp input="$$1" output="\1" dest="2"> + <expression fixchars="1"><h1>([^<]*)</expression> + </RegExp> + <RegExp input="$$10" output="\1" dest="4"> + <RegExp input="$$1" output="\2" dest="9"> + <expression fixchars="2"><meta name="title" content="(IMDb - )?(?:&#x22;)?([^"]*?)(?:&#x22;)? \([^\(]*?([0-9]{4})\)</expression> + </RegExp> + <RegExp input="$$9" output="\1" dest="10"> + <expression /> + </RegExp> + <RegExp input="$$1" output="\1" dest="11"> + <expression fixchars="1" clear="yes">>\s*?"([^<]+)"[^<]+[<em>]?[^"]+Hong Kong [<em>][^"]+English</expression> + </RegExp> + <RegExp input="$$11" output="\1" dest="10"> + <expression>(.+)</expression> + </RegExp> + <RegExp input="$$1" output="\1" dest="11"> + <expression fixchars="1" clear="yes">>\s*?"([^<]+)"[^<]+[<em>]?[^"]+((Canada)\s(<em>)?\((English|imdb))</expression> + </RegExp> + <RegExp input="$$11" output="\1" dest="10"> + <expression>(.+)</expression> + </RegExp> + <RegExp input="$$1" output="\1" dest="11"> + <expression fixchars="1" clear="yes">>\s*?"([^<]+)"[^<]+[<em>]?[^"]+(UK(<em>)?<br>)</expression> + </RegExp> + <RegExp input="$$11" output="\1" dest="10"> + <expression>(.+)</expression> + </RegExp> + <RegExp input="$$1" output="\1" dest="11"> + <expression fixchars="1" clear="yes">>\s*?"([^<]+)"[^"]+International\s(<em>)?\(English title\)(</em>)?(,|<)( |b)</expression> + </RegExp> + <RegExp input="$$11" output="\1" dest="10"> + <expression>(.+)</expression> + </RegExp> + <RegExp input="$$1" output="\1" dest="11"> + <expression fixchars="1" clear="yes">>\s*?"([^<]+)"[^"]+International\s(<em>)?\(English title\)(</em>)? (<em>)?\(imdb</expression> + </RegExp> + <RegExp input="$$11" output="\1" dest="10"> + <expression>(.+)</expression> + </RegExp> + <RegExp input="$$1" output="\1" dest="12"> + <expression fixchars="1" clear="yes"><a href="/country/[^>]+>(UK</a></div>)</expression> + </RegExp> + <RegExp input="$$12" output="$$9" dest="10"> + <expression>(.+)</expression> + </RegExp> + <RegExp input="$$1" output="\1" dest="12"> + <expression clear="yes"><a href="/country/[^>]+>(USA</a></div>)</expression> + </RegExp> + <RegExp input="$$12" output="$$9" dest="10"> + <expression>(.+)</expression> + </RegExp> + <RegExp input="$$1" output="\1" dest="11"> + <expression fixchars="1" clear="yes">>\s*?"([^<]+)"[^<]+[<em>]?[^"]+(USA(<em>)?<br>)</expression> + </RegExp> + <RegExp input="$$11" output="\1" dest="10"> + <expression>(.+)</expression> + </RegExp> + <RegExp input="$$1" output="\1" dest="11"> + <expression fixchars="1" clear="yes">>\s*?"([^<]+)"[^<]+[<em>]?[^"]+((USA)\s(<em>)?\((English|imdb))</expression> + </RegExp> + <RegExp input="$$11" output="\1" dest="10"> + <expression>(.+)</expression> + </RegExp> + <RegExp input="$$1" output="\1" dest="11"> + <expression fixchars="1" clear="yes">>\s*?"([^<]+)"[^<]+[<em>]?[^"]+((USA)\s(<em>)?\((new title))</expression> + </RegExp> + <RegExp input="$$11" output="\1" dest="10"> + <expression>(.+)</expression> + </RegExp> + <expression noclean="1" /> + </RegExp> + <RegExp input="$$4" output="\1" dest="2"> + <expression>(.+)</expression> + </RegExp> + <RegExp input="$$1" output="\1" dest="4"> + <expression fixchars="1">>\s*?"([^<]+)"[^<]+[<em>]?[^"]+$INFO[imdbakatitles]</expression> + </RegExp> + <RegExp input="$$4" output="\1" dest="2"> + <expression>(.+)</expression> + </RegExp> + <RegExp input="$$1" output="\1" dest="4"> + <expression fixchars="1">>\s*?"([^<]+)"[^<]+[<em>]?[^"]+$INFO[imdbakatitles]\s(<em>)?\((imdb display)</expression> + </RegExp> + <RegExp input="$$4" output="\1" dest="2"> + <expression>(.+)</expression> + </RegExp> + <RegExp input="$$1" output="\1" dest="5"> + <expression><a href="/country/[^>]+>($INFO[imdbakatitles])</expression> + </RegExp> + <RegExp input="$$5" output="$$9" dest="4"> + <expression>($INFO[imdbakatitles])</expression> + </RegExp> + <RegExp input="$INFO[imdbakatitles]" output="$$9" dest="4"> + <expression>Keep Original</expression> + </RegExp> + <RegExp input="$$4" output="\1" dest="2"> + <expression>(.+)</expression> + </RegExp> + <expression noclean="1" /> + </RegExp> + </ParseIMDBAKATitles> + </scraperfunctions> |