XBMC Community Forum  

Go Back   XBMC Community Forum > Development > Scraper Development

Scraper Development Developers forum for meta data scrapers. Scraper developers only!
Not for posting feature requests, bugs, or end-user support requests!

Reply
 
Thread Tools Search this Thread Display Modes
Old 2007-10-29, 20:58   #1
l8tig
Junior Member
 
Join Date: Feb 2007
Posts: 13
l8tig is on a distinguished road
Default repeat function with scraper

Mon code allocine.fr fonctionne avec scrap.exe pour recupéré les informations des acteurs ,puis relance une fonction sur chaque acteur pour avoir leur visage. mais sur la xbox il ne prends pas en compte cette repetition,
Peut t on lancer plusieurs fonction identique avec une adresse differente ?

Code:
<?xml version="1.0" encoding="utf-8" standalone="yes"?>
<scraper name="allocine.fr" content="movies" thumb="allocine.jpg">

	<NfoUrl dest="3">
		<RegExp input="$$1" output="http://www.allocine.fr/film/fichefilm_gen_cfilm=\1.html"  dest="3">
			<expression noclean="1">allocine.fr/film/fichefilm_gen_cfilm=([0-9]*).html</expression>
		</RegExp>
	</NfoUrl>

	<CreateSearchUrl dest="3">
		<RegExp input="$$1" output="http://www.allocine.fr/recherche/?rub=1&amp;page=1&amp;motcle=\1" dest="3">
			<expression></expression>
		</RegExp>
	</CreateSearchUrl>

	<GetSearchResults dest="8">
		<RegExp input="$$5" output="&lt;?xml version=&quot;1.0&quot; encoding=&quot;iso-8859-1&quot; standalone=&quot;yes&quot;?&gt;&lt;results&gt;\1&lt;/results&gt;" dest="8">
			<RegExp input="$$1" output="&lt;entity&gt;&lt;title&gt;\2&lt;/title&gt;&lt;url&gt;http://www.allocine.fr/film/fichefilm_gen_cfilm=\1.html&lt;/url&gt;&lt;id&gt;\1&lt;/id&gt;&lt;/entity&gt;" dest="5+">
				<expression repeat="yes">&lt;h4&gt;&lt;a href=&quot;/film/fichefilm_gen_cfilm=([0-9]+)\.html&quot; class=&quot;link1&quot;&gt;([^;#]*)&lt;/a</expression>
			</RegExp>
			<expression noclean="1"></expression>
		</RegExp>
	</GetSearchResults>

	<GetDetails clearbuffers="no" dest="3">
		<RegExp input="$$5" output="&lt;details&gt;\1&lt;/details&gt;" dest="3">
<!-- TITLE -->	
			<RegExp input="$$1" output="&lt;title&gt;\1&lt;/title&gt;" dest="5">
				<expression trim="1" noclean="1">&lt;title&gt;([^&lt;]*)&lt;</expression>
			</RegExp>
<!-- GetThumbnail -->
     		<RegExp input="$$1" output="&lt;url function=&quot;GetThumbnail&quot;&gt;http://www.allocine.fr/film/galerievignette_gen_cfilm=\1.html&lt;/url&gt;" dest="5+">
        		<expression noclean="1">galerievignette_gen_cfilm=([0-9]*)</expression>
      		</RegExp>
<!-- STUDIO -->
			<RegExp input="$$1" output="&lt;studio&gt;\1&lt;/studio&gt;" dest="5+">
				<expression>Distribué par [^&gt;]*&gt;([^&lt;]*)</expression>
			</RegExp>
<!-- GENRE -->
			<RegExp input="$$1" output="\1" dest="7">
				<expression noclean="1">Genre :([^:]*):</expression>
			</RegExp>
			<RegExp input="$$6" output="&lt;genre&gt;\1&lt;/genre&gt;" dest="5+">
				<RegExp input="$$7" output=" / \1" dest="6">
					<expression repeat="yes" noclean="1">&lt;a href[^&gt;]*&gt;([^&lt;]*)&lt;/a&gt;</expression>
				</RegExp>
				<expression> / (.*)</expression>
			</RegExp>
<!-- YEAR -->
			<RegExp input="$$1" output="&lt;year&gt;\1&lt;/year&gt;" dest="5+">
				<expression>&gt;Année de production : ([^&lt;]*)&lt;</expression>
			</RegExp>
<!-- DIRECTOR -->
			<RegExp input="$$1" output="&lt;director&gt;\1&lt;/director&gt;" dest="5+">
				<expression>Réalisé par &lt;[^&gt;]*&gt;([^&lt;]*)</expression>
			</RegExp>
			<RegExp input="$$1" output="&lt;runtime&gt;\1&lt;/runtime&gt;" dest="5+">
				<expression>Durée : ([^.]*)</expression>
			</RegExp>
			<RegExp input="$$1" output="\1#\2" dest="7">
				<expression>&lt;h4&gt;Note moyenne : &lt;([^&gt;]*)&gt;[^0-9]*([0-9]*)</expression>
			</RegExp>
			<RegExp input="$$7" output="&lt;rating&gt;\1étoile(s)&lt;/rating&gt;&lt;votes&gt;\2&lt;/votes&gt;" dest="5+">
				<expression>class=&quot;etoile_([0-9]*)&quot;[^#]*#([0-9]*)</expression>
			</RegExp>
			<RegExp input="$$1" output="&lt;mpaa&gt;\1&lt;/mpaa&gt;" dest="5+">
				<expression>N° de visa[^&gt;]*&gt;[^&gt;]*&gt;[^&gt;]*&gt;[^&gt;]*&gt;[^&gt;]*&gt;([^&lt;]*)&lt;</expression>
			</RegExp>
<!-- GetTagline -->
			<RegExp input="$$1" output="&lt;url function=&quot;GetTagline&quot;&gt;http://www.allocine.fr/film/revuedepresse_gen_cfilm=\1.html&lt;/url&gt;" dest="5+">
				<expression noclean="1">revuedepresse_gen_cfilm=([0-9]*)</expression>
			</RegExp>
<!-- GetCredits -->
			<RegExp input="$$1" output="&lt;url function=&quot;GetCredits&quot;&gt;http://www.allocine.fr/film/casting_gen_cfilm=\1.html&lt;/url&gt;" dest="5+">
				<expression>casting_gen_cfilm=([0-9]*)</expression>
			</RegExp>
<!-- GetActor -->
			<RegExp input="$$1" output="&lt;url function=&quot;GetActor&quot;&gt;http://www.allocine.fr/film/casting_gen_cfilm=\1.html&lt;/url&gt;" dest="5+">
				<expression>casting_gen_cfilm=([0-9]*)</expression>
			</RegExp>
<!-- OUTLINE PLOT -->
			<RegExp input="$$1" output="&lt;outline&gt;\1&lt;/outline&gt;&lt;plot&gt;\1&lt;/plot&gt;" dest="5+">
				<expression>&lt;td valign=&quot;top&quot; style=&quot;padding:10 0 0 0&quot;&gt;&lt;div align=&quot;justify&quot;&gt;&lt;h4&gt;([^\n]*)</expression>
			</RegExp>
			<expression noclean="1"></expression>
		</RegExp>
	</GetDetails>

	<GetCredits dest="5">
		<RegExp input="$$3" output="&lt;?xml version=&quot;1.0&quot; encoding=&quot;iso-8859-1&quot; standalone=&quot;yes&quot;?&gt;&lt;details&gt;\1&lt;/details&gt;" dest="5">
			<RegExp input="$$1" output="\1" dest="7">
				<expression noclean="1">Scénariste([^(]*)Equipe technique</expression>
			</RegExp>
			<RegExp input="$$6" output="&lt;credits&gt;\1&lt;/credits&gt;" dest="3+">
				<RegExp input="$$7" output=" / \1" dest="6">
					<expression repeat="yes" noclean="1">&gt;([^&lt;]*)&lt;/a</expression>
				</RegExp>
				<expression noclean="1"> / (.*)</expression>
			</RegExp>
			<RegExp input="$$1" output="\1" dest="6">
				<expression noclean="1">&lt;b&gt;Acteurs&lt;/b&gt;(.*)&lt;b&gt;Production&lt;/b&gt;</expression>
			</RegExp>
			<RegExp input="$$6" output="&lt;actor&gt;&lt;name&gt;\3&lt;/name&gt;&lt;role&gt;\1&lt;/role&gt;&lt;/actor&gt;" dest="3+">
				<expression repeat="yes" noclean="1,2,3">&lt;h5&gt;([^&lt;]*)&lt;/h5&gt;&lt;/td&gt;[^&lt;]*&lt;[^&gt;]*&gt;&lt;h5&gt;&lt;a href=&quot;/([^&quot;]*)&quot;[^&gt;]*&gt;([^&lt;]*)&lt;</expression>
			</RegExp> 
			<expression noclean="1"></expression>
		</RegExp>
	</GetCredits>

	<GetActor dest="5">
		<RegExp input="$$1" output="\1" dest="2">
			<expression noclean="1">&lt;b&gt;Acteurs(.*)(Production|Producteur)</expression>
		</RegExp>

		<RegExp input="$$3" output="&lt;?xml version=&quot;1.0&quot; encoding=&quot;iso-8859-1&quot; standalone=&quot;yes&quot;?&gt;&lt;details&gt;\1&lt;/details&gt;" dest="5">
			<RegExp input="$$2" output="&lt;url function=&quot;GetActorThumb&quot;&gt;http://www.allocine.fr/personne/fichepersonne_gen_cpersonne=\2.html&lt;/url&gt;&lt;id&gt;\2&lt;/id&gt;" dest="3+">
				<expression repeat="yes" noclean="1">&lt;h5&gt;([^&lt;]*)&lt;/h5&gt;&lt;/td&gt;[^&lt;]*&lt;[^&gt;]*&gt;&lt;h5&gt;&lt;a href=&quot;/personne/fichepersonne_gen_cpersonne=([0-9]*).html&quot;[^&gt;]*&gt;([^&lt;]*)&lt;</expression>
			</RegExp>
			<expression>(.*)</expression>
		</RegExp>

	</GetActor>


	<GetActorThumb dest="5">
		<RegExp input="$$1" output="&lt;name&gt;\1&lt;/name&gt;" dest="2">
			<expression>&lt;title&gt;([^&lt;]*)</expression>
		</RegExp>

		<RegExp input="$$1" output="&lt;thumbs&gt;&lt;thumb&gt;\1&lt;/thumb&gt;&lt;/thumbs&gt;" dest="3">
			<expression>src=&quot;([^&quot;]*)&quot; width=&quot;120&quot; height=&quot;160&quot; border=&quot;0&quot;&gt;&lt;br /&gt;</expression>
		</RegExp>

		<RegExp input="$$1" output="&lt;?xml version=&quot;1.0&quot; encoding=&quot;iso-8859-1&quot; standalone=&quot;yes&quot;?&gt;&lt;details&gt;&lt;actor&gt;$$3$$2&lt;/actor&gt;&lt;/details&gt;" dest="5+">
			<expression>src=&quot;([^&quot;]*)&quot; width=&quot;120&quot; height=&quot;160&quot; border=&quot;0&quot;&gt;&lt;br /&gt;</expression>
		</RegExp>
	</GetActorThumb>

	<GetTagline dest="5">
		<RegExp input="$$1" output="&lt;?xml version=&quot;1.0&quot; encoding=&quot;iso-8859-1&quot; standalone=&quot;yes&quot;?&gt;&lt;details&gt;&lt;tagline&gt;\1&lt;/tagline&gt;&lt;/details&gt;" dest="5+">
				<expression clean="1" trim="1">&lt;div align=&quot;justify&quot; style=&quot;padding: 5 0 5 0&quot;&gt;&lt;h4&gt;([^&lt;]*)</expression>
		</RegExp>
  	</GetTagline>

	<GetThumbnail dest="5">
		<RegExp input="$$1" output="\1/medias" dest="3">
			<expression>var CRP_PATH = &quot;([^&quot;]*)/crp/80/80/x/x/medias&quot;</expression>
		</RegExp>
		<RegExp input="$$1" output="&lt;thumb&gt;$$3\1&lt;/thumb&gt;" dest="4">
			<expression repeat="yes" noclean="1">&quot;fichier&quot;:&quot;([^&quot;]*)&quot;</expression>
		</RegExp>
		<RegExp input="$$1" output="&lt;?xml version=&quot;1.0&quot; encoding=&quot;iso-8859-1&quot; standalone=&quot;yes&quot;?&gt;&lt;details&gt;&lt;thumbs&gt;$$4&lt;/thumbs&gt;&lt;/details&gt;" dest="5+">
				<expression repeat="yes" noclean="1"></expression>
		</RegExp>
  	</GetThumbnail>


</scraper>
l8tig is offline   Reply With Quote
Old 2007-10-30, 10:54   #2
bilbon0bud
Junior Member
 
Join Date: Dec 2004
Posts: 21
bilbon0bud is on a distinguished road
Default

My Allocine.fr scraper works with scrap.Exe for actor informations, then he restarta fonction on each actors to download their photo. but on the xbox he didn't take care of this repetition.
can we launch several identical function with a different URL?
bilbon0bud is offline   Reply With Quote
Reply

Bookmarks


Currently Active Users Viewing This Thread: 1 (0 members and 1 guests)
 
Thread Tools Search this Thread
Search this Thread:

Advanced Search
Display Modes

Posting Rules
You may not post new threads
You may not post replies
You may not post attachments
You may not edit your posts

BB code is On
Smilies are On
[IMG] code is On
HTML code is Off

Forum Jump


All times are GMT +2. The time now is 14:17.


Protected by Akismet, We recommend WordPress blogs
Copyright © 2008, XBMC Project