Changeset 1358

Show
Ignore:
Timestamp:
01/19/12 18:55:46 (4 months ago)
Author:
gbooker
Message:

Updated scrapers
Fixes #401

Location:
trunk/SapphireFrappliance/MetaDataImporting/Scrapers
Files:
3 modified

Legend:

Unmodified
Added
Removed
  • trunk/SapphireFrappliance/MetaDataImporting/Scrapers/common/imdb.xml

    r1351 r1358  
    3131                        </RegExp> 
    3232                        <RegExp input="$$6" output="&lt;credits&gt;\1&lt;/credits&gt;" dest="2+"> 
    33                                 <expression repeat="yes">&lt;a\s*href=&quot;/name/[^&gt;]*&gt;([^&lt;]*)&lt;</expression> 
     33                                <expression repeat="yes">&lt;a[^&gt;]*href=&quot;/name/[^&gt;]*&gt;([^&lt;]*)&lt;</expression> 
    3434                        </RegExp> 
    3535                        <expression noclean="1"/> 
     
    4242                        </RegExp> 
    4343                        <RegExp input="$$6" output="&lt;actor&gt;&lt;thumb&gt;\1_SX$INFO[imdbscale]_SY$INFO[imdbscale]_\2&lt;/thumb&gt;&lt;name&gt;\3&lt;/name&gt;&lt;role&gt;\4&lt;/role&gt;&lt;/actor&gt;" dest="7"> 
    44                                 <expression clear="yes" repeat="yes" noclean="1,2" trim="3,4">&lt;img.*?src=&quot;(?:([^&quot;]*\.)[^&quot;]*(\.jpg))?[^&gt;]*[^&quot;]*&quot;name&quot;&gt;\s*&lt;a\s*href=&quot;[^&quot;]*[^&gt;]*&gt;([^&lt;]*)&lt;[^&quot;]*&quot;ellipsis&quot;&gt;\s*...[^&quot;]*&quot;character&quot;&gt;(.*?)&lt;/td&gt;</expression> 
     44                                <expression clear="yes" repeat="yes" noclean="1,2" trim="3,4">&lt;img.*?src=&quot;(?:([^&quot;]*\.)[^&quot;]*(\.jpg))?[^&gt;]*[^&quot;]*&quot;name&quot;&gt;\s*&lt;a[^&gt;]*href=&quot;[^&quot;]*[^&gt;]*&gt;([^&lt;]*)&lt;[^&quot;]*&quot;ellipsis&quot;&gt;\s*...[^&quot;]*&quot;character&quot;&gt;(.*?)&lt;/td&gt;</expression> 
    4545                        </RegExp> 
    4646                        <RegExp input="$$7" output="&lt;actor&gt;&lt;thumb&gt;\1&lt;/thumb&gt;\2&lt;/actor&gt;" dest="2+"> 
     
    5959                        </RegExp> 
    6060                        <RegExp input="$$6" output="&lt;director&gt;\1&lt;/director&gt;" dest="2+"> 
    61                                 <expression clear="yes" repeat="yes">&lt;a\s*href=&quot;/name/[^&gt;]*&gt;([^&lt;]*)&lt;</expression> 
     61                                <expression clear="yes" repeat="yes">&lt;a[^&gt;]*href=&quot;/name/[^&gt;]*&gt;([^&lt;]*)&lt;</expression> 
    6262                        </RegExp> 
    6363                        <expression noclean="1"/> 
  • trunk/SapphireFrappliance/MetaDataImporting/Scrapers/imdb.xml

    r1355 r1358  
    11<?xml version="1.0" encoding="UTF-8"?> 
    2 <scraper framework="1.1" date="2011-08-15" name="IMDb.com" content="movies" thumb="imdb.png" language="en"> 
     2<scraper framework="1.1" date="2012-01-15" name="IMDb.com" content="movies" thumb="imdb.png" language="en"> 
    33        <include>common/imdb.xml</include> 
    44        <include>common/tmdb.xml</include> 
     
    8686                        </RegExp> 
    8787                        <RegExp input="$$1" output="&lt;mpaa&gt;\1&lt;/mpaa&gt;" dest="5+"> 
    88                                 <expression>MPAA&lt;/a&gt;\)&lt;/h4&gt;\s*(Rated .[^ ]*)</expression> 
     88                                <expression>&lt;img[^&gt;]*certificates/us/[^&gt;]*title=&quot;([^&quot;]*)&quot;</expression> 
    8989                        </RegExp> 
    9090                        <RegExp input="$$1" output="&lt;tagline&gt;\1&lt;/tagline&gt;" dest="5+"> 
  • trunk/SapphireFrappliance/MetaDataImporting/Scrapers/tvrage.xml

    r1354 r1358  
    11<?xml version="1.0" encoding="UTF-8"?> 
    2 <scraper framework="1.0" date="2011-08-08" name="TV Rage" content="tvshows" thumb="tvrage.jpg" language="en"> 
     2<scraper framework="1.0" date="2012-01-19" name="TV Rage" content="tvshows" thumb="tvrage.jpg" language="en"> 
    33        <NfoUrl dest="3"> 
    44                <RegExp input="$$1" output="&lt;url&gt;http://www.tvrage.com/\1&lt;/url&gt;&lt;id&gt;\2&lt;/id&gt;"  dest="3"> 
     
    101101                        </RegExp>                                                        
    102102                        <RegExp input="$$1" output="&lt;actor&gt;&lt;name&gt;\1&lt;/name&gt;&lt;role&gt;\2&lt;/role&gt;&lt;/actor&gt;"  dest="5+"> 
    103                                 <expression repeat="yes">a href=&apos;(?:http://www.tvrage.com)?/person/[^&gt;]*&gt;(.*?)&lt;/a&gt;.*?As&lt;i&gt;\s*(.*?)&lt;/i&gt;</expression> 
     103                                <expression repeat="yes">&gt;a href=&apos;(?:http://www.tvrage.com)?/person/[^&gt;]*&gt;([^&lt;]*)&lt;/a&gt;&lt;/b&gt;&lt;br /&gt;\s*As&lt;i&gt;\s*([^&lt;]*)&lt;/i&gt;</expression> 
    104104                        </RegExp> 
    105105                        <RegExp input="$$2" output="&lt;director&gt;\1&lt;/director&gt;" dest="5+">