aboutsummaryrefslogtreecommitdiff
path: root/addons/de.cinefacts.scraper/cinefacts.xml
blob: 407e28d337045a8a73e7b22d3fb6b46e5a691018 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
<?xml version="1.0" encoding="iso-8859-1" standalone="yes"?>
<scraper framework="1.1" date="2010-02-26" name="Cinefacts.de" content="movies" thumb="cinefacts.png" language="de">
	<NfoUrl dest="3">
		<RegExp input="$$1" output="&lt;url&gt;http://www.cinefacts.de/kino/\1/\2/filmdetails.html&lt;/url&gt;&lt;id&gt;\1&lt;/id&gt;" dest="3">
			<expression clear="yes" noclean="1">(cinefacts.de/kino/)([0-9]*)/(.[^\/]*)/filmdetails.html</expression>
		</RegExp>
		<RegExp input="$$1" output="&lt;url cache=&quot;tt\2&quot; function=&quot;GetByIMDBId&quot;&gt;http://www.imdb.com/title/tt\2/externalreviews&lt;/url&gt;&lt;id&gt;tt\2&lt;/id&gt;" dest="3+">
			<expression>(imdb.com/title/tt)([0-9]*)</expression>
		</RegExp>
		<RegExp input="$$1" output="&lt;url cache=&quot;tt\2&quot; function=&quot;GetByIMDBId&quot;&gt;http://www.imdb.com/title/tt\2/externalreviews&lt;/url&gt;&lt;id&gt;tt\2&lt;/id&gt;" dest="3+">
			<expression>(imdb.com/)Title\?([0-9]+)</expression>
		</RegExp>
	</NfoUrl>
	<GetByIMDBId dest="3">
		<RegExp input="$$1" output="&lt;url function=&quot;GetCinefactsDetailsLink&quot;&gt;http://www.cinefacts.de/kino/\1&lt;/url&gt;&lt;id&gt;$$2&lt;/id&gt;" dest="3+">
			<expression noclean="1">&lt;a href=&quot;http://www.cinefacts.de/kino/([^&quot;]*)&quot;</expression>
		</RegExp>
	</GetByIMDBId>
	<GetCinefactsDetailsLink dest="3">
		<RegExp input="$$1" output="&lt;url&gt;http://www.cinefacts.de\1&lt;/url&gt;&lt;id&gt;$$2&lt;/id&gt;" dest="3+">
			<expression>&lt;a href=&quot;([^&quot;]*)&quot;&gt;Filmdetails&lt;/a&gt;</expression>
		</RegExp>
	</GetCinefactsDetailsLink>
	<CreateSearchUrl dest="3" SearchStringEncoding="iso-8859-1">
		<RegExp input="$$1" output="http://www.cinefacts.de/suche/suche.php?name=\1" dest="3">
			<expression noclean="1"/>
		</RegExp>
	</CreateSearchUrl>
	<GetSearchResults dest="8">
		<RegExp input="$$5" output="&lt;?xml version=&quot;1.0&quot; encoding=&quot;iso-8859-1&quot; standalone=&quot;yes&quot;?&gt;&lt;results&gt;\1&lt;/results&gt;" dest="8">
			<RegExp input="$$1" output="&lt;entity&gt;&lt;title&gt;\3 (\4)&lt;/title&gt;&lt;url cache=&quot;\1.xml&quot; &gt;http://www.cinefacts.de/kino/\1/\2/filmdetails.html&lt;/url&gt;&lt;id&gt;\1&lt;/id&gt;&lt;/entity&gt;" dest="5">
				<expression repeat="yes">&gt;&lt;a href=&quot;/kino/([0-9]*)/(.[^\/]*)/filmdetails.html&quot;&gt;[^&lt;]*&lt;b title=&quot;([^&quot;]*)&quot; class=&quot;headline&quot;&gt;[^&lt;]+&lt;/b&gt;&lt;/a&gt;&lt;br&gt;[^&lt;]+&lt;br&gt;+[^0-9]+([^&lt;]*)</expression>
			</RegExp>
			<expression noclean="1"/>
		</RegExp>
	</GetSearchResults>
	<GetDetails dest="3">
		<RegExp input="$$5" output="&lt;?xml version=&quot;1.0&quot; encoding=&quot;iso-8859-1&quot; standalone=&quot;yes&quot;?&gt;&lt;details&gt;\1&lt;/details&gt;" dest="3">
			<!--Title-->
			<RegExp input="$$1" output="&lt;title&gt;\1&lt;/title&gt;" dest="5+">
				<expression trim="1" noclean="1">&lt;h1&gt;([^&lt;]*)</expression>
			</RegExp>
			<!--Original Title-->
			<RegExp input="$$1" output="&lt;originaltitle&gt;\1&lt;/originaltitle&gt;" dest="5+">
				<expression>&lt;dt class=&quot;c1&quot;&gt;Originaltitel:&lt;/dt&gt;[^&lt;]*&lt;dd class=&quot;first&quot;&gt;(.[^&lt;]*)&lt;/dd&gt;</expression>
			</RegExp>
			<!--Genre-->
			<RegExp input="$$1" output="\1" dest="4+">
				<expression noclean="1">Genre:([^:]*)Deutschlandstart:</expression>
			</RegExp>
			<RegExp input="$$4" output="&lt;genre&gt;\1&lt;/genre&gt;" dest="5+">
				<expression repeat="yes" noclean="1" trim="1">&gt;*[ A-Za-z]([^&lt;&gt;]*)&lt;/a&gt;</expression>
			</RegExp>
			<!--Director Film-->
			<RegExp input="$$1" output="\1" dest="7+">
				<expression noclean="1">Regie:([^:]*)Buch:</expression>
			</RegExp>
			<RegExp input="$$7" output="&lt;director&gt;\1&lt;/director&gt;" dest="5+">
				<expression repeat="yes" >&lt;a href=&quot;[^&quot;]*&quot;&gt;([^&lt;]*)&lt;/a&gt;</expression>
			</RegExp>
			<!--Actors-->
			<RegExp input="$$1" output="\1" dest="7+">
				<expression noclean="1">Darsteller:&lt;/td&gt;(.*)&lt;/table</expression>
			</RegExp>
			<RegExp input="$$7" output="&lt;actor&gt;&lt;name&gt;\1&lt;/name&gt;&lt;role&gt;\2&lt;/role&gt;&lt;/actor&gt;" dest="5+">
				<expression repeat="yes">&gt;([^&lt;&gt;]*)&lt;/a&gt;&lt;/td&gt;+[^&lt;]+&lt;[^&gt;]+&gt; als([ A-Za-z]*)</expression>
			</RegExp>
			<!--Studio-->
			<RegExp input="$$1" output="&lt;studio&gt;\1&lt;/studio&gt;" dest="5+">
				<expression trim="1" noclean="1">Verleih:([^\.]*)\.</expression>
			</RegExp>
			<!--Year-->
			<RegExp input="$$1" output="&lt;year&gt;\1&lt;/year&gt;" dest="5+">
				<expression>&lt;/a&gt; ([0-9]*) &lt;/dd&gt;</expression>
			</RegExp>
			<!--MPAA-->
			<RegExp input="$$1" output="&lt;mpaa&gt;\1&lt;/mpaa&gt;" dest="5+">
				<expression>FSK:&lt;/dt&gt;[^&gt;]*&gt;([^&lt;]*)&lt;</expression>
			</RegExp>
			<!--Runtime-->
			<RegExp input="$$1" output="&lt;runtime&gt;\1&lt;/runtime&gt;" dest="5+">
				<expression>L.nge:&lt;/dt&gt;[^&gt;]*&gt;([^&lt;]*)&lt;</expression>
			</RegExp>
			<!--Plot-->
			<RegExp input="$$1" output="&lt;plot&gt;\1&lt;/plot&gt;" dest="5+">
				<expression>KURZINHALT&lt;/h2&gt;&lt;/li&gt;[^&gt;]*&gt;*([^&lt;]*)[&lt;/li&gt;]</expression>
			</RegExp>
			<!--Writers-->
			<RegExp input="$$1" output="\1" dest="6+">
				<expression noclean="1">Buch:([^:]*)Musik:</expression>
			</RegExp>
			<RegExp input="$$6" output="&lt;credits&gt;\1&lt;/credits&gt;" dest="5+">
				<expression repeat="yes" >&lt;a href=&quot;[^&quot;]*&quot;&gt;([^&lt;]*)&lt;/a&gt;</expression>
			</RegExp>
			<!--Poster URL-->
			<RegExp input="$$1" output="&lt;url function=&quot;GetThumbnailLink&quot;&gt;http://www.cinefacts.de/kino/film/\1/\2/plakate.html&lt;/url&gt;" dest="5+">
				<expression repeat ="yes">&lt;a href=&quot;/kino/film/([0-9]*)/([^\/]*)/plakate.html&quot;&gt;</expression>
			</RegExp>
			<!--Google or IMDbId-->
			<RegExp input="$$2" output="tt\1" dest="6">
				<expression clear="yes">tt([0-9]*)</expression>
			</RegExp>
			<RegExp input="$$1" output="&quot;\2&quot;+\1+||+&quot;\2&quot;" dest="7">
				<expression encode="1,2">&lt;h1&gt;[^&lt;]*&lt;/h1&gt;[^0-9]*([0-9]*) &lt;/li&gt;[^:]*:&lt;/dt&gt;[^&lt;]*&lt;dd class=&quot;first&quot;&gt;(.[^&lt;]*)&lt;/dd&gt;</expression>
			</RegExp>
			<!--TMDB Thumbs & Fanart (IMDbId) -->
			<RegExp input="$$6" output="&lt;url cache=&quot;tmdb-$$6&quot; function=&quot;GetTMDBFanartById&quot;&gt;http://imdb.com/title/$$6&lt;/url&gt;" dest="5+">
				<expression>.+</expression>
			</RegExp>
			<RegExp input="$$6" output="&lt;url cache=&quot;tmdb-$$6&quot; function=&quot;GetTMDBThumbsById&quot;&gt;http://imdb.com/title/$$6&lt;/url&gt;" dest="5+">
				<expression>.+</expression>
			</RegExp>
			<!--TMDB Thumbs & Fanart (Google) -->
			<RegExp input="$$6" output="&lt;url cache=&quot;google-$$2&quot; function=&quot;GetTMDBFanartById&quot;&gt;http://www.google.com/search?q=site:imdb.com+releaseinfo+$$7&lt;/url&gt;" dest="5+">
				<expression encode="1">^$</expression>
			</RegExp>
			<RegExp input="$$6" output="&lt;url cache=&quot;google-$$2&quot; function=&quot;GetTMDBThumbsById&quot;&gt;http://www.google.com/search?q=site:imdb.com+releaseinfo+$$7&lt;/url&gt;" dest="5+">
				<expression encode="1">^$</expression>
			</RegExp>
			<expression noclean="1"/>
		</RegExp>
	</GetDetails>
	<!--Thumbnail-->
	<GetThumbnailLink dest="5">
		<RegExp input="$$2" output="&lt;details&gt;\1&lt;/details&gt;" dest="5+">
			<RegExp input="$$1" output="&lt;url function=&quot;GetThumbnail&quot;&gt;http://www.cinefacts.de/kino/film/\1&lt;/url&gt;" dest="2+">
				<expression repeat="yes" noclean="1">&lt;a href=&quot;/kino/film/([^&quot;]+)&quot;&gt;[^&lt;]*&lt;img</expression>
			</RegExp>
			<expression noclean="1"/>
		</RegExp>
	</GetThumbnailLink>
	<GetThumbnail dest="5">
		<RegExp input="$$2" output="&lt;details&gt;\1&lt;/details&gt;" dest="5">
			<RegExp input="$$1" output="&lt;thumb&gt;http://www.cinefacts.de/kino/plakat/\1&lt;/thumb&gt;" dest="2+">
				<expression>src=&quot;/kino/plakat/([^&quot;]*)&quot;</expression>
			</RegExp>
			<expression noclean="1"/>
		</RegExp>
	</GetThumbnail>
</scraper>