Changeset 1359 for trunk

Show
Ignore:
Timestamp:
01/20/12 19:30:16 (4 months ago)
Author:
gbooker
Message:

Corrected import of genres, user ratings, runtime, and studios.

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • trunk/SapphireFrappliance/MetaDataImporting/Scrapers/imdb.xml

    r1358 r1359  
    11<?xml version="1.0" encoding="UTF-8"?> 
    2 <scraper framework="1.1" date="2012-01-15" name="IMDb.com" content="movies" thumb="imdb.png" language="en"> 
     2<scraper framework="1.1" date="2012-01-20" name="IMDb.com" content="movies" thumb="imdb.png" language="en"> 
    33        <include>common/imdb.xml</include> 
    44        <include>common/tmdb.xml</include> 
     
    9292                        </RegExp> 
    9393                        <RegExp input="$$1" output="&lt;runtime&gt;\1&lt;/runtime&gt;" dest="5+"> 
    94                                 <expression trim="1">Runtime:&lt;/h4&gt;[^0-9]*([^&lt;]*)</expression> 
     94                                <expression trim="1">Runtime:&lt;/h4&gt;\s*&lt;[^&gt;]*&gt;([0-9]*)</expression> 
    9595                        </RegExp> 
    9696                        <RegExp input="$$1" output="&lt;rating&gt;\1&lt;/rating&gt;&lt;votes&gt;\2&lt;/votes&gt;" dest="5+"> 
    97                                 <expression>&lt;b&gt;([0-9.]+)&lt;/b&gt;&lt;span[^&gt;]*&gt;/10&lt;/span&gt;&lt;/span&gt;[^&lt;]*&lt;a [^&lt;]*&gt;([0-9,]+) votes&lt;/a&gt;</expression> 
     97                                <expression>&lt;strong&gt;&lt;span[^&gt;]*&gt;([0-9.]+)&lt;/span&gt;&lt;/strong&gt;&lt;span[^&gt;]*&gt;/&lt;span[^&gt;]*&gt;10&lt;/span&gt;&lt;/span&gt;[^&lt;]*&lt;a [^&lt;]*&gt;&lt;span[^&gt;]*&gt;([0-9,]+)&lt;/span&gt; users&lt;/a&gt;</expression> 
    9898                        </RegExp> 
    9999                        <RegExp input="$$1" output="&lt;genre&gt;\1&lt;/genre&gt;" dest="5+"> 
    100                                 <expression repeat="yes">&quot;/genre/[^/]*&quot;&gt;([^&lt;]*)&lt;/a&gt;</expression> 
     100                                <expression repeat="yes">&quot;/genre/[^&quot;/]*&quot;\s*&gt;([^&lt;]*)&lt;/a&gt;</expression> 
    101101                        </RegExp> 
    102102                        <RegExp input="$$1" output="&lt;studio&gt;\1&lt;/studio&gt;" dest="5+"> 
    103                                 <expression repeat="yes">&quot;/company/[^/]*/&quot;&gt;([^&lt;]*)&lt;/a&gt;</expression> 
     103                                <expression repeat="yes">&quot;/company/[^/]*/&quot;\s*&gt;([^&lt;]*)&lt;/a&gt;</expression> 
    104104                        </RegExp> 
    105105                        <RegExp input="$$1" output="&lt;outline&gt;\1&lt;/outline&gt;&lt;plot&gt;\1&lt;/plot&gt;" dest="5+">