- Timestamp:
- 10/12/10 19:33:38 (20 months ago)
- Location:
- trunk/SapphireFrappliance/MetaDataImporting/Scrapers
- Files:
-
- 2 modified
-
common/imdb.xml (modified) (5 diffs)
-
imdb.xml (modified) (5 diffs)
Legend:
- Unmodified
- Added
- Removed
-
trunk/SapphireFrappliance/MetaDataImporting/Scrapers/common/imdb.xml
r995 r1333 25 25 <RegExp input="$$2" output="<details>\1</details>" dest="5"> 26 26 <RegExp conditional="!fullcredits" input="$$1" output="\1" dest="6"> 27 <expression noclean="1">> Writer.*?:</h5>(.*?)</div></expression>27 <expression noclean="1">>\s*Writer.*?:\s*</h4>(.*?)</div></expression> 28 28 </RegExp> 29 29 <RegExp conditional="fullcredits" input="$$1" output="\1" dest="6"> … … 31 31 </RegExp> 32 32 <RegExp input="$$6" output="<credits>\1</credits>" dest="2+"> 33 <expression repeat="yes"><a href="/name/[^>]*>([^<]*)<</expression>33 <expression repeat="yes"><a\s*href="/name/[^>]*>([^<]*)<</expression> 34 34 </RegExp> 35 35 <expression noclean="1"/> … … 39 39 <RegExp input="$$2" output="<details>\1</details>" dest="5"> 40 40 <RegExp input="$$1" output="\1" dest="6"> 41 <expression noclean="1"><table class="cast ">(.*?)</table></expression>41 <expression noclean="1"><table class="cast_list">(.*?)</table></expression> 42 42 </RegExp> 43 43 <RegExp input="$$6" output="<actor><thumb>\1_SX$INFO[imdbscale]_SY$INFO[imdbscale]_\2</thumb><name>\3</name><role>\4</role></actor>" dest="7"> 44 <expression clear="yes" repeat="yes" noclean="1,2" trim="3,4"><img src="(?:([^"]*\.)[^"]*(\.jpg))?[^>]*[^"]*"nm"><a href="[^"]*[^>]*>([^<]*)<[^"]*"ddd"> ... [^"]*"char">(.*?)</td></expression>44 <expression clear="yes" repeat="yes" noclean="1,2" trim="3,4"><img.*?src="(?:([^"]*\.)[^"]*(\.jpg))?[^>]*[^"]*"name">\s*<a\s*href="[^"]*[^>]*>([^<]*)<[^"]*"ellipsis">\s*...[^"]*"character">(.*?)</td></expression> 45 45 </RegExp> 46 46 <RegExp input="$$7" output="<actor><thumb>\1</thumb>\2</actor>" dest="2+"> … … 53 53 <RegExp input="$$2" output="<details>\1</details>" dest="5"> 54 54 <RegExp conditional="!fullcredits" input="$$1" output="\1" dest="6"> 55 <expression clear="yes" noclean="1">> Director.*?</h5>(.*?)</div></expression>55 <expression clear="yes" noclean="1">>\s*Director.*?</h4>(.*?)</div></expression> 56 56 </RegExp> 57 57 <RegExp conditional="fullcredits" input="$$1" output="\1" dest="6"> … … 59 59 </RegExp> 60 60 <RegExp input="$$6" output="<director>\1</director>" dest="2+"> 61 <expression clear="yes" repeat="yes"><a href="/name/[^>]*>([^<]*)<</expression>61 <expression clear="yes" repeat="yes"><a\s*href="/name/[^>]*>([^<]*)<</expression> 62 62 </RegExp> 63 63 <expression noclean="1"/> -
trunk/SapphireFrappliance/MetaDataImporting/Scrapers/imdb.xml
r1307 r1333 1 1 <?xml version="1.0" encoding="UTF-8"?> 2 <scraper framework="1.1" date="2010- 07-22" name="IMDb.com" content="movies" thumb="imdb.png" language="en">2 <scraper framework="1.1" date="2010-10-12" name="IMDb.com" content="movies" thumb="imdb.png" language="en"> 3 3 <include>common/imdb.xml</include> 4 4 <include>common/tmdb.xml</include> … … 77 77 </RegExp> 78 78 <RegExp input="$$1" output="<title>\1</title>" dest="5+"> 79 <expression trim="1" noclean="1"><h1 >([^<]*)</expression>79 <expression trim="1" noclean="1"><h1[^<]*>\s*([^<]*)</expression> 80 80 </RegExp> 81 81 <RegExp input="$$1" output="<year>\1</year>" dest="5+"> … … 83 83 </RegExp> 84 84 <RegExp input="$$1" output="<top250>\1</top250>" dest="5+"> 85 <expression>Top 250 : #([0-9]*)</a></expression>85 <expression>Top 250 #([0-9]*)</strong></expression> 86 86 </RegExp> 87 87 <RegExp input="$$1" output="<mpaa>\1</mpaa>" dest="5+"> 88 <expression>MPAA</a> :</h5><div class="info-content">(.[^<]*)</expression>88 <expression>MPAA</a>\)</h4>\s*Rated (.[^ ]*)</expression> 89 89 </RegExp> 90 90 <RegExp input="$$1" output="<certification>\1 \3</certification>" dest="5+"> … … 92 92 </RegExp> 93 93 <RegExp input="$$1" output="<tagline>\1</tagline>" dest="5+"> 94 <expression> <h5>Tagline:</h5>\n<div class="info-content">\n([^<]*)</expression>94 <expression>Taglines:</h4>\s*([^<]*)</expression> 95 95 </RegExp> 96 96 <RegExp input="$$1" output="<runtime>\1</runtime>" dest="5+"> 97 <expression trim="1"> <h5>Runtime:</h5>[^0-9]*([^<]*)</expression>97 <expression trim="1">Runtime:</h4>[^0-9]*([^<]*)</expression> 98 98 </RegExp> 99 99 <RegExp input="$$1" output="<rating>\1</rating><votes>\2</votes>" dest="5+"> 100 <expression><b>([0-9.]+) /10</b>[^<]*<a href="ratings" class="tn15more">([0-9,]+) votes</a></expression>100 <expression><b>([0-9.]+)</b><span[^>]*>/10</span></span>[^<]*<a [^<]*>([0-9,]+) votes</a></expression> 101 101 </RegExp> 102 102 <RegExp input="$$1" output="<genre>\1</genre>" dest="5+"> 103 <expression repeat="yes">"/ Sections/Genres/[^/]*/">([^<]*)</a></expression>103 <expression repeat="yes">"/genre/[^/]*">([^<]*)</a></expression> 104 104 </RegExp> 105 105 <RegExp input="$$1" output="<studio>\1</studio>" dest="5+"> … … 107 107 </RegExp> 108 108 <RegExp input="$$1" output="<outline>\1</outline><plot>\1</plot>" dest="5+"> 109 <expression> Plot:</h5>\n<div class="info-content">\n([^<]*)</expression>109 <expression>Storyline</h2>\n\n<p>([^<]*)</expression> 110 110 </RegExp> 111 111 <RegExp input="$$1" output="<oscars>1</oscars>" dest="5+"> 112 <expression> Awards:</h5>\n<div class="info-content">\nWon Oscar</expression>112 <expression><b>Won Oscar\.</b></expression> 113 113 </RegExp> 114 114 <RegExp input="$$1" output="<oscars>\1</oscars>" dest="5+"> 115 <expression> Awards:</h5>\n<div class="info-content">\nWon ([0-9]*) Oscars</expression>115 <expression><b>Won ([0-9]*) Oscars\.</b></expression> 116 116 </RegExp> 117 117 <RegExp input="$$1" output="<releasedate>\1</releasedate>" dest="5+"> 118 <expression> <h5>Release Date:</h5>\n<div class="info-content">\n([^<]*?)\(</expression>118 <expression>Release Date:</h4>\n([^<]*?)\s*\(</expression> 119 119 </RegExp> 120 120 <RegExp input="$$2" output="<url function="GetIMDBPlot">$$3plotsummary</url>" dest="5+">
