| 1 | /* |
|---|
| 2 | * SapphireScraper.m |
|---|
| 3 | * Sapphire |
|---|
| 4 | * |
|---|
| 5 | * Created by Graham Booker on Dec. 19, 2009. |
|---|
| 6 | * Copyright 2009 Sapphire Development Team and/or www.nanopi.net |
|---|
| 7 | * All rights reserved. |
|---|
| 8 | * |
|---|
| 9 | * This program is free software; you can redistribute it and/or modify it under the terms of the GNU |
|---|
| 10 | * General Public License as published by the Free Software Foundation; either version 3 of the License, |
|---|
| 11 | * or (at your option) any later version. |
|---|
| 12 | * |
|---|
| 13 | * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even |
|---|
| 14 | * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General |
|---|
| 15 | * Public License for more details. |
|---|
| 16 | * |
|---|
| 17 | * You should have received a copy of the GNU General Public License along with this program; if not, |
|---|
| 18 | * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
|---|
| 19 | */ |
|---|
| 20 | |
|---|
| 21 | #import "SapphireScraper.h" |
|---|
| 22 | #include "pcre.h" |
|---|
| 23 | #import "SapphireApplianceController.h" |
|---|
| 24 | |
|---|
| 25 | @interface SapphireScraper () |
|---|
| 26 | - (void)parseSettings; |
|---|
| 27 | - (void)setBuffer:(int)index toString:(NSString *)str; |
|---|
| 28 | - (void)clearBuffers; |
|---|
| 29 | - (void)setStoredMatch:(int)index toString:(NSString *)str; |
|---|
| 30 | - (void)clearStorchMatches; |
|---|
| 31 | - (NSString *)parseFunction:(NSString *)function; |
|---|
| 32 | @end |
|---|
| 33 | |
|---|
| 34 | @implementation SapphireScraper |
|---|
| 35 | |
|---|
| 36 | static NSMutableDictionary *scrapers = nil; |
|---|
| 37 | static NSDictionary *scraperPaths = nil; |
|---|
| 38 | |
|---|
| 39 | + (void)initialize |
|---|
| 40 | { |
|---|
| 41 | if(!scrapers) |
|---|
| 42 | scrapers = [[NSMutableDictionary alloc] init]; |
|---|
| 43 | } |
|---|
| 44 | |
|---|
| 45 | void checkScrappersInPath(NSArray *paths, NSMutableDictionary *scraperPathsDict, NSMutableDictionary *scraperDates) |
|---|
| 46 | { |
|---|
| 47 | NSEnumerator *pathEnum = [paths objectEnumerator]; |
|---|
| 48 | NSString *path; |
|---|
| 49 | while((path = [pathEnum nextObject]) != nil) |
|---|
| 50 | { |
|---|
| 51 | NSError *error; |
|---|
| 52 | NSURL *url = [NSURL fileURLWithPath:path]; |
|---|
| 53 | NSXMLDocument *doc = [[NSXMLDocument alloc] initWithContentsOfURL:url options:0 error:&error]; |
|---|
| 54 | if(!doc) |
|---|
| 55 | continue; |
|---|
| 56 | NSXMLElement *root = [doc rootElement]; |
|---|
| 57 | |
|---|
| 58 | NSString *name = [[root attributeForName:@"name"] stringValue]; |
|---|
| 59 | NSString *type = [[root attributeForName:@"content"] stringValue]; |
|---|
| 60 | NSString *dateStr = [[root attributeForName:@"date"] stringValue]; |
|---|
| 61 | NSDate *date = [NSDate dateWithNaturalLanguageString:dateStr]; |
|---|
| 62 | |
|---|
| 63 | NSDate *existingDate = [scraperDates objectForKey:name]; |
|---|
| 64 | if(existingDate == nil || [existingDate compare:date] == NSOrderedAscending) |
|---|
| 65 | { |
|---|
| 66 | [scraperPathsDict setObject:[type stringByAppendingFormat:@"-%@", path] forKey:name]; |
|---|
| 67 | [scraperDates setObject:date forKey:name]; |
|---|
| 68 | } |
|---|
| 69 | |
|---|
| 70 | [doc release]; |
|---|
| 71 | } |
|---|
| 72 | } |
|---|
| 73 | |
|---|
| 74 | + (NSArray *)allScrapperNames |
|---|
| 75 | { |
|---|
| 76 | NSBundle *selfBundle = [NSBundle bundleForClass:[self class]]; |
|---|
| 77 | NSMutableDictionary *scraperPathsDict = [[NSMutableDictionary alloc] init]; |
|---|
| 78 | NSMutableDictionary *scraperDates = [[NSMutableDictionary alloc] init]; |
|---|
| 79 | |
|---|
| 80 | NSArray *paths = [selfBundle pathsForResourcesOfType:@"xml" inDirectory:@"scrapers"]; |
|---|
| 81 | checkScrappersInPath(paths, scraperPathsDict, scraperDates); |
|---|
| 82 | |
|---|
| 83 | paths = [NSArray array]; |
|---|
| 84 | NSString *scraperDir = [applicationSupportDir() stringByAppendingPathComponent:@"scrapers"]; |
|---|
| 85 | NSArray *files = [[NSFileManager defaultManager] directoryContentsAtPath:scraperDir]; |
|---|
| 86 | NSEnumerator *fileEnum = [files objectEnumerator]; |
|---|
| 87 | NSString *filename; |
|---|
| 88 | while((filename = [fileEnum nextObject]) != nil) |
|---|
| 89 | { |
|---|
| 90 | if([[filename pathExtension] isEqualToString:@"xml"]) |
|---|
| 91 | paths = [paths arrayByAddingObject:[scraperDir stringByAppendingPathComponent:filename]]; |
|---|
| 92 | } |
|---|
| 93 | checkScrappersInPath(paths, scraperPathsDict, scraperDates); |
|---|
| 94 | |
|---|
| 95 | [scraperDates release]; |
|---|
| 96 | [scraperPaths release]; |
|---|
| 97 | scraperPaths = [scraperPathsDict copy]; |
|---|
| 98 | [scraperPathsDict release]; |
|---|
| 99 | return [scraperPaths allKeys]; |
|---|
| 100 | } |
|---|
| 101 | |
|---|
| 102 | + (SapphireScraper *)scrapperWithName:(NSString *)filename |
|---|
| 103 | { |
|---|
| 104 | if(!scraperPaths) |
|---|
| 105 | [SapphireScraper allScrapperNames]; |
|---|
| 106 | |
|---|
| 107 | NSValue *value = [scrapers objectForKey:filename]; |
|---|
| 108 | if(value == nil) |
|---|
| 109 | { |
|---|
| 110 | NSString *path = [scraperPaths objectForKey:filename]; |
|---|
| 111 | if(path == nil) |
|---|
| 112 | return nil; |
|---|
| 113 | |
|---|
| 114 | int index = [path rangeOfString:@"-"].location; |
|---|
| 115 | NSString *type = [path substringToIndex:index]; |
|---|
| 116 | path = [path substringFromIndex:index+1]; |
|---|
| 117 | NSError *error; |
|---|
| 118 | SapphireScraper *scraper; |
|---|
| 119 | if([type isEqualToString:@"tvshows"]) |
|---|
| 120 | scraper = [[SapphireTVShowScraper alloc] initWithPath:path error:&error]; |
|---|
| 121 | else if([type isEqualToString:@"movies"]) |
|---|
| 122 | scraper = [[SapphireMovieScraper alloc] initWithPath:path error:&error]; |
|---|
| 123 | else |
|---|
| 124 | scraper = nil; |
|---|
| 125 | |
|---|
| 126 | if(!scraper) |
|---|
| 127 | return nil; |
|---|
| 128 | |
|---|
| 129 | value = [NSValue valueWithNonretainedObject:scraper]; |
|---|
| 130 | [scrapers setObject:value forKey:[scraper name]]; |
|---|
| 131 | [scraper autorelease]; |
|---|
| 132 | } |
|---|
| 133 | return [value nonretainedObjectValue]; |
|---|
| 134 | } |
|---|
| 135 | |
|---|
| 136 | - (id)initWithPath:(NSString *)path error:(NSError * *)error |
|---|
| 137 | { |
|---|
| 138 | self = [super init]; |
|---|
| 139 | if (self != nil) { |
|---|
| 140 | NSURL *url = [NSURL fileURLWithPath:path]; |
|---|
| 141 | NSXMLDocument *doc = [[NSXMLDocument alloc] initWithContentsOfURL:url options:0 error:error]; |
|---|
| 142 | root = [[doc rootElement] retain]; |
|---|
| 143 | [doc release]; |
|---|
| 144 | if(root == nil) |
|---|
| 145 | { |
|---|
| 146 | [self autorelease]; |
|---|
| 147 | return nil; |
|---|
| 148 | } |
|---|
| 149 | |
|---|
| 150 | NSArray *includes; |
|---|
| 151 | while([(includes = [root elementsForName:@"include"]) count]) |
|---|
| 152 | { |
|---|
| 153 | NSXMLElement *include; |
|---|
| 154 | NSEnumerator *includeEnum = [includes objectEnumerator]; |
|---|
| 155 | NSString *myDir = [path stringByDeletingLastPathComponent]; |
|---|
| 156 | while((include = [includeEnum nextObject]) != nil) |
|---|
| 157 | { |
|---|
| 158 | NSString *includePath = [myDir stringByAppendingPathComponent:[include stringValue]]; |
|---|
| 159 | NSXMLDocument *includeDoc = [[NSXMLDocument alloc] initWithContentsOfURL:[NSURL fileURLWithPath:includePath] options:0 error:nil]; |
|---|
| 160 | if(includeDoc) |
|---|
| 161 | { |
|---|
| 162 | NSArray *children = [[includeDoc rootElement] children]; |
|---|
| 163 | NSXMLElement *child; |
|---|
| 164 | NSEnumerator *childEnum = [children objectEnumerator]; |
|---|
| 165 | while((child = [childEnum nextObject]) != nil) |
|---|
| 166 | { |
|---|
| 167 | [child detach]; |
|---|
| 168 | [root addChild:child]; |
|---|
| 169 | } |
|---|
| 170 | } |
|---|
| 171 | [include detach]; |
|---|
| 172 | [includeDoc release]; |
|---|
| 173 | } |
|---|
| 174 | } |
|---|
| 175 | |
|---|
| 176 | settings = [[NSMutableDictionary alloc] init]; |
|---|
| 177 | [self parseSettings]; |
|---|
| 178 | } |
|---|
| 179 | return self; |
|---|
| 180 | } |
|---|
| 181 | |
|---|
| 182 | - (void) dealloc |
|---|
| 183 | { |
|---|
| 184 | [scrapers removeObjectForKey:[self name]]; |
|---|
| 185 | [root release]; |
|---|
| 186 | [settings release]; |
|---|
| 187 | [settingsXML release]; |
|---|
| 188 | [self clearBuffers]; |
|---|
| 189 | [self clearStorchMatches]; |
|---|
| 190 | [super dealloc]; |
|---|
| 191 | } |
|---|
| 192 | |
|---|
| 193 | - (NSString *)name |
|---|
| 194 | { |
|---|
| 195 | return [[root attributeForName:@"name"] stringValue]; |
|---|
| 196 | } |
|---|
| 197 | |
|---|
| 198 | - (NSString *)contentType |
|---|
| 199 | { |
|---|
| 200 | return [[root attributeForName:@"content"] stringValue]; |
|---|
| 201 | } |
|---|
| 202 | |
|---|
| 203 | - (NSString *)thumbUrl |
|---|
| 204 | { |
|---|
| 205 | return [[root attributeForName:@"content"] stringValue]; |
|---|
| 206 | } |
|---|
| 207 | |
|---|
| 208 | - (NSString *)serverEncoding |
|---|
| 209 | { |
|---|
| 210 | return [[root attributeForName:@"thumb"] stringValue]; |
|---|
| 211 | } |
|---|
| 212 | |
|---|
| 213 | - (NSString *)settingsXML |
|---|
| 214 | { |
|---|
| 215 | return settingsXML; |
|---|
| 216 | } |
|---|
| 217 | |
|---|
| 218 | - (NSMutableDictionary *)settings |
|---|
| 219 | { |
|---|
| 220 | return settings; |
|---|
| 221 | } |
|---|
| 222 | |
|---|
| 223 | - (NSString *)searchResultsForURLContent:(NSString *)urlContent |
|---|
| 224 | { |
|---|
| 225 | [self clearBuffers]; |
|---|
| 226 | [self setBuffer:0 toString:urlContent]; |
|---|
| 227 | return [self parseFunction:@"GetSearchResults"]; |
|---|
| 228 | } |
|---|
| 229 | |
|---|
| 230 | - (NSString *)searchResultsForNfoContent:(NSString *)nfoContent |
|---|
| 231 | { |
|---|
| 232 | [self clearBuffers]; |
|---|
| 233 | [self setBuffer:0 toString:nfoContent]; |
|---|
| 234 | return [self parseFunction:@"NfoUrl"]; |
|---|
| 235 | } |
|---|
| 236 | |
|---|
| 237 | - (NSString *)functionResultWithArguments:(NSString *)function, ... |
|---|
| 238 | { |
|---|
| 239 | va_list argList; |
|---|
| 240 | va_start(argList, function); |
|---|
| 241 | NSString *argument; |
|---|
| 242 | int index = 0; |
|---|
| 243 | while((argument = va_arg(argList, id)) != nil) |
|---|
| 244 | { |
|---|
| 245 | [self setBuffer:index toString:argument]; |
|---|
| 246 | index++; |
|---|
| 247 | } |
|---|
| 248 | return [self parseFunction:function]; |
|---|
| 249 | } |
|---|
| 250 | |
|---|
| 251 | - (void)setBuffer:(int)index toString:(NSString *)str |
|---|
| 252 | { |
|---|
| 253 | [scraperBuffers[index] release]; |
|---|
| 254 | scraperBuffers[index] = [str retain]; |
|---|
| 255 | } |
|---|
| 256 | |
|---|
| 257 | - (void)clearBuffers |
|---|
| 258 | { |
|---|
| 259 | int i; |
|---|
| 260 | for(i=0; i<SCRAPER_BUFFER_COUNT; i++) |
|---|
| 261 | { |
|---|
| 262 | [scraperBuffers[i] release]; |
|---|
| 263 | scraperBuffers[i] = nil; |
|---|
| 264 | } |
|---|
| 265 | } |
|---|
| 266 | |
|---|
| 267 | - (void)setStoredMatch:(int)index toString:(NSString *)str |
|---|
| 268 | { |
|---|
| 269 | [storedMatches[index] release]; |
|---|
| 270 | storedMatches[index] = [str retain]; |
|---|
| 271 | } |
|---|
| 272 | |
|---|
| 273 | - (void)clearStorchMatches |
|---|
| 274 | { |
|---|
| 275 | int i; |
|---|
| 276 | for(i=0; i<SCRAPER_MATCH_COUNT; i++) |
|---|
| 277 | { |
|---|
| 278 | [storedMatches[i] release]; |
|---|
| 279 | storedMatches[i] = nil; |
|---|
| 280 | } |
|---|
| 281 | } |
|---|
| 282 | |
|---|
| 283 | - (void)parseSetting:(NSXMLElement *)setting |
|---|
| 284 | { |
|---|
| 285 | NSString *type = [[setting attributeForName:@"type"] stringValue]; |
|---|
| 286 | if([type isEqualToString:@"sep"]) |
|---|
| 287 | return; |
|---|
| 288 | NSString *settingID = [[setting attributeForName:@"id"] stringValue]; |
|---|
| 289 | if(![settingID length] || ![type length]) |
|---|
| 290 | return; |
|---|
| 291 | |
|---|
| 292 | NSString *defaultValue = [[setting attributeForName:@"default"] stringValue]; |
|---|
| 293 | if(![defaultValue length]) |
|---|
| 294 | defaultValue = @""; |
|---|
| 295 | |
|---|
| 296 | if([type isEqualToString:@"bool"]) |
|---|
| 297 | { |
|---|
| 298 | if([defaultValue isEqualToString:@"true"]) |
|---|
| 299 | [settings setObject:[NSNumber numberWithBool:YES] forKey:settingID]; |
|---|
| 300 | else |
|---|
| 301 | [settings setObject:[NSNumber numberWithBool:NO] forKey:settingID]; |
|---|
| 302 | } |
|---|
| 303 | else if([type isEqualToString:@"text"]) |
|---|
| 304 | [settings setObject:defaultValue forKey:settingID]; |
|---|
| 305 | else if([type isEqualToString:@"labelenum"]) |
|---|
| 306 | [settings setObject:defaultValue forKey:settingID]; |
|---|
| 307 | } |
|---|
| 308 | |
|---|
| 309 | - (void)parseSettings |
|---|
| 310 | { |
|---|
| 311 | settingsXML = [[self parseFunction:@"GetSettings"] retain]; |
|---|
| 312 | if(![settingsXML length]) |
|---|
| 313 | return; |
|---|
| 314 | |
|---|
| 315 | NSXMLDocument *doc = [[NSXMLDocument alloc] initWithXMLString:settingsXML options:0 error:nil]; |
|---|
| 316 | NSXMLElement *rootSetting = [doc rootElement]; |
|---|
| 317 | NSArray *settingDescs = [rootSetting elementsForName:@"setting"]; |
|---|
| 318 | |
|---|
| 319 | int count = [settingDescs count], i; |
|---|
| 320 | for(i=0; i<count; i++) |
|---|
| 321 | { |
|---|
| 322 | [self parseSetting:[settingDescs objectAtIndex:i]]; |
|---|
| 323 | } |
|---|
| 324 | |
|---|
| 325 | [doc release]; |
|---|
| 326 | } |
|---|
| 327 | |
|---|
| 328 | NSString *trimmedString(NSString *str) |
|---|
| 329 | { |
|---|
| 330 | NSCharacterSet *whitespace = [NSCharacterSet characterSetWithCharactersInString:[NSString stringWithFormat:@"\n\r%C \t", 0x85]]; |
|---|
| 331 | |
|---|
| 332 | int i, length = [str length]; |
|---|
| 333 | for(i=0; i<length; i++) |
|---|
| 334 | if(![whitespace characterIsMember:[str characterAtIndex:i]]) |
|---|
| 335 | break; |
|---|
| 336 | int offset = i; |
|---|
| 337 | for(i=length-1; i>offset; i--) |
|---|
| 338 | if(![whitespace characterIsMember:[str characterAtIndex:i]]) |
|---|
| 339 | break; |
|---|
| 340 | |
|---|
| 341 | if(offset > i) |
|---|
| 342 | return @""; |
|---|
| 343 | return [str substringWithRange:NSMakeRange(offset, i+1-offset)]; |
|---|
| 344 | } |
|---|
| 345 | |
|---|
| 346 | NSString *cleanedString(NSString *str) |
|---|
| 347 | { |
|---|
| 348 | NSMutableString *mutStr = [[NSMutableString alloc] init]; |
|---|
| 349 | NSScanner *scanner = [NSScanner scannerWithString:str]; |
|---|
| 350 | [scanner setCharactersToBeSkipped:[NSCharacterSet characterSetWithCharactersInString:@""]]; |
|---|
| 351 | while(![scanner isAtEnd]) |
|---|
| 352 | { |
|---|
| 353 | NSString *append = nil; |
|---|
| 354 | [scanner scanUpToString:@"<" intoString:&append]; |
|---|
| 355 | if(append) |
|---|
| 356 | [mutStr appendString:append]; |
|---|
| 357 | NSString *tag = nil; |
|---|
| 358 | [scanner scanUpToString:@">" intoString:&tag]; |
|---|
| 359 | if([tag hasPrefix:@"<br"] && ([tag length] == 3 || [tag characterAtIndex:3] == ' ' || [tag characterAtIndex:3] == '/')) |
|---|
| 360 | [mutStr appendString:@"\n"]; |
|---|
| 361 | [scanner scanString:@">" intoString:nil]; |
|---|
| 362 | } |
|---|
| 363 | /*TV Rage doesn't understand that an & needs to be & in the HTML, not just '&', so we have to work around yet another instance of their stupidity. Decoding entities and then re-encoding them seems to be the safest way to do this*/ |
|---|
| 364 | NSString *decoded = (NSString *)CFXMLCreateStringByUnescapingEntities(NULL, (CFStringRef)mutStr, NULL); |
|---|
| 365 | NSString *reencoded = (NSString *)CFXMLCreateStringByEscapingEntities(NULL, (CFStringRef)decoded, NULL); |
|---|
| 366 | [decoded release]; |
|---|
| 367 | [mutStr release]; |
|---|
| 368 | [reencoded autorelease]; |
|---|
| 369 | return trimmedString(reencoded); |
|---|
| 370 | } |
|---|
| 371 | |
|---|
| 372 | void bufferBooleanAttributeWithDefault(NSXMLElement *element, NSString *attributeName, BOOL defaultValue, BOOL *values) |
|---|
| 373 | { |
|---|
| 374 | int i; |
|---|
| 375 | if(defaultValue) |
|---|
| 376 | for(i=0; i<SCRAPER_MATCH_COUNT; i++) |
|---|
| 377 | values[i] = YES; |
|---|
| 378 | else |
|---|
| 379 | memset(values, 0, sizeof(BOOL)*SCRAPER_MATCH_COUNT); |
|---|
| 380 | |
|---|
| 381 | NSString *attr = [[element attributeForName:attributeName] stringValue]; |
|---|
| 382 | if(attr) |
|---|
| 383 | { |
|---|
| 384 | NSArray *valueStrings = [attr componentsSeparatedByString:@","]; |
|---|
| 385 | int count = [valueStrings count]; |
|---|
| 386 | for(i=0; i<count; i++) |
|---|
| 387 | { |
|---|
| 388 | int index = [[valueStrings objectAtIndex:i] intValue]; |
|---|
| 389 | if(index > 0 && index <= SCRAPER_MATCH_COUNT) |
|---|
| 390 | values[index] = !defaultValue; |
|---|
| 391 | } |
|---|
| 392 | } |
|---|
| 393 | } |
|---|
| 394 | |
|---|
| 395 | BOOL booleanAttributeWithDefault(NSXMLElement *element, NSString *attributeName, BOOL defaultValue) |
|---|
| 396 | { |
|---|
| 397 | NSString *attr = [[element attributeForName:attributeName] stringValue]; |
|---|
| 398 | if(attr) |
|---|
| 399 | { |
|---|
| 400 | NSString *checkValue; |
|---|
| 401 | if(defaultValue) |
|---|
| 402 | checkValue = @"no"; |
|---|
| 403 | else |
|---|
| 404 | checkValue = @"yes"; |
|---|
| 405 | if(![attr isEqualToString:checkValue]) |
|---|
| 406 | return defaultValue; |
|---|
| 407 | else |
|---|
| 408 | return !defaultValue; |
|---|
| 409 | } |
|---|
| 410 | return defaultValue; |
|---|
| 411 | } |
|---|
| 412 | |
|---|
| 413 | int integerAttributeWithDefault(NSXMLElement *element, NSString *attributeName, int defaultValue) |
|---|
| 414 | { |
|---|
| 415 | NSString *attr = [[element attributeForName:attributeName] stringValue]; |
|---|
| 416 | if(attr) |
|---|
| 417 | { |
|---|
| 418 | int ret = [attr intValue]; |
|---|
| 419 | if(ret) |
|---|
| 420 | return ret; |
|---|
| 421 | } |
|---|
| 422 | return defaultValue; |
|---|
| 423 | } |
|---|
| 424 | |
|---|
| 425 | - (NSString *)substituteBuffersIntoInput:(NSString *)input |
|---|
| 426 | { |
|---|
| 427 | NSMutableString *mutStr = [input mutableCopy]; |
|---|
| 428 | |
|---|
| 429 | NSRange range; |
|---|
| 430 | while((range = [mutStr rangeOfString:@"$$"]).location != NSNotFound) |
|---|
| 431 | { |
|---|
| 432 | int index = [[mutStr substringFromIndex:range.location + 2] intValue]; |
|---|
| 433 | NSString *replacement; |
|---|
| 434 | if(index > 0 && index <= SCRAPER_BUFFER_COUNT) |
|---|
| 435 | { |
|---|
| 436 | if(index > 9) |
|---|
| 437 | range.length += 2; |
|---|
| 438 | else |
|---|
| 439 | range.length ++; |
|---|
| 440 | |
|---|
| 441 | replacement = scraperBuffers[index - 1]; |
|---|
| 442 | if(replacement == nil) |
|---|
| 443 | replacement = @""; |
|---|
| 444 | } |
|---|
| 445 | else |
|---|
| 446 | replacement = @""; |
|---|
| 447 | [mutStr replaceCharactersInRange:range withString:replacement]; |
|---|
| 448 | } |
|---|
| 449 | while((range = [mutStr rangeOfString:@"$INFO["]).location != NSNotFound) |
|---|
| 450 | { |
|---|
| 451 | int offset = range.location + 6; |
|---|
| 452 | NSRange endRange = [mutStr rangeOfString:@"]" options:0 range:NSMakeRange(offset, [mutStr length] - offset)]; |
|---|
| 453 | NSString *replacement; |
|---|
| 454 | if(endRange.location != NSNotFound) |
|---|
| 455 | { |
|---|
| 456 | range.length = endRange.location - range.location + 1; |
|---|
| 457 | NSString *setting = [mutStr substringWithRange:NSMakeRange(offset, range.length - 7)]; |
|---|
| 458 | replacement = [settings objectForKey:setting]; |
|---|
| 459 | if(![replacement length]) |
|---|
| 460 | replacement = @""; |
|---|
| 461 | } |
|---|
| 462 | else |
|---|
| 463 | { |
|---|
| 464 | replacement = @""; |
|---|
| 465 | } |
|---|
| 466 | [mutStr replaceCharactersInRange:range withString:replacement]; |
|---|
| 467 | } |
|---|
| 468 | NSString *ret = [NSString stringWithString:mutStr]; |
|---|
| 469 | [mutStr release]; |
|---|
| 470 | return ret; |
|---|
| 471 | } |
|---|
| 472 | |
|---|
| 473 | - (NSString *)replacementStrForOutput:(NSString *)output inputStr:(const char *)input matches:(int *)matches count:(int)matchCount |
|---|
| 474 | { |
|---|
| 475 | NSMutableString *mutStr = [output mutableCopy]; |
|---|
| 476 | |
|---|
| 477 | NSRange range = NSMakeRange(0, [mutStr length]); |
|---|
| 478 | while((range = [mutStr rangeOfString:@"\\" options:0 range:range]).location != NSNotFound) |
|---|
| 479 | { |
|---|
| 480 | BOOL storedMatch = ([mutStr characterAtIndex:range.location + 1] == '$'); |
|---|
| 481 | int index = [[mutStr substringFromIndex:range.location + 1 + storedMatch] intValue]; |
|---|
| 482 | NSString *replacement; |
|---|
| 483 | if(index > 0 && index < matchCount) |
|---|
| 484 | range.length++; |
|---|
| 485 | range.length += storedMatch; |
|---|
| 486 | |
|---|
| 487 | int start = matches[index<<1]; |
|---|
| 488 | int end = matches[(index<<1) + 1]; |
|---|
| 489 | if(range.length > 1 && start != -1) |
|---|
| 490 | { |
|---|
| 491 | replacement = [[[NSString alloc] initWithBytes:input+start length:end-start encoding:NSUTF8StringEncoding] autorelease]; |
|---|
| 492 | if(storedMatch) |
|---|
| 493 | [self setStoredMatch:index toString:replacement]; |
|---|
| 494 | if(clean[index]) |
|---|
| 495 | replacement = cleanedString(replacement); |
|---|
| 496 | else if(trim[index]) |
|---|
| 497 | replacement = trimmedString(replacement); |
|---|
| 498 | } |
|---|
| 499 | else if(range.length > 1 && storedMatch) |
|---|
| 500 | replacement = storedMatches[index]; |
|---|
| 501 | else |
|---|
| 502 | replacement = @""; |
|---|
| 503 | [mutStr replaceCharactersInRange:range withString:replacement]; |
|---|
| 504 | range.location += [replacement length]; |
|---|
| 505 | range.length = [mutStr length] - range.location; |
|---|
| 506 | } |
|---|
| 507 | |
|---|
| 508 | NSString *ret = [NSString stringWithString:mutStr]; |
|---|
| 509 | [mutStr release]; |
|---|
| 510 | return ret; |
|---|
| 511 | } |
|---|
| 512 | |
|---|
| 513 | - (void)parseExpression:(NSXMLElement *)element withInput:(NSString *)input intoDest:(int)dest andAppend:(BOOL)append |
|---|
| 514 | { |
|---|
| 515 | NSString *output = [self substituteBuffersIntoInput:[[element attributeForName:@"output"] stringValue]]; |
|---|
| 516 | NSArray *expressions = [element elementsForName:@"expression"]; |
|---|
| 517 | NSString *expression = nil; |
|---|
| 518 | NSXMLElement *expressionElement = nil; |
|---|
| 519 | if([expressions count]) |
|---|
| 520 | { |
|---|
| 521 | expressionElement = [expressions objectAtIndex:0]; |
|---|
| 522 | expression = [[expressionElement childAtIndex:0] stringValue]; |
|---|
| 523 | } |
|---|
| 524 | if(![expression length]) |
|---|
| 525 | expression = @"(.*)"; |
|---|
| 526 | |
|---|
| 527 | const char *errMsg = NULL; |
|---|
| 528 | int errOffset = 0; |
|---|
| 529 | pcre *reg = pcre_compile([expression UTF8String], PCRE_DOTALL, &errMsg, &errOffset, NULL); |
|---|
| 530 | if(!reg) |
|---|
| 531 | return; |
|---|
| 532 | |
|---|
| 533 | //AAA optional, compare; |
|---|
| 534 | |
|---|
| 535 | if(booleanAttributeWithDefault(expressionElement, @"clear", NO)) |
|---|
| 536 | [self setBuffer:dest-1 toString:nil]; |
|---|
| 537 | |
|---|
| 538 | BOOL repeat = booleanAttributeWithDefault(expressionElement, @"repeat", NO); |
|---|
| 539 | |
|---|
| 540 | bufferBooleanAttributeWithDefault(expressionElement, @"noclean", YES, clean); |
|---|
| 541 | |
|---|
| 542 | bufferBooleanAttributeWithDefault(expressionElement, @"trim", NO, trim); |
|---|
| 543 | |
|---|
| 544 | NSMutableString *result = [@"" mutableCopy]; |
|---|
| 545 | int match[30]; |
|---|
| 546 | int offset = 0; |
|---|
| 547 | const char *inputStr = [input UTF8String]; |
|---|
| 548 | int inputLen = strlen(inputStr); |
|---|
| 549 | int matchCount = 0; |
|---|
| 550 | [self clearStorchMatches]; |
|---|
| 551 | while((matchCount = pcre_exec(reg, NULL, inputStr, inputLen, offset, 0, match, 30)) >= 0) |
|---|
| 552 | { |
|---|
| 553 | BOOL addToResult = YES; |
|---|
| 554 | NSString *replacementString = [self replacementStrForOutput:output inputStr:inputStr matches:match count:matchCount]; |
|---|
| 555 | int compare = integerAttributeWithDefault(expressionElement, @"compare", -1); |
|---|
| 556 | if(compare != -1) |
|---|
| 557 | { |
|---|
| 558 | NSString *searchStr = nil; |
|---|
| 559 | if(compare > 0 && compare <= 20) |
|---|
| 560 | searchStr = scraperBuffers[compare -1]; |
|---|
| 561 | if([searchStr length] && [[replacementString lowercaseString] rangeOfString:searchStr].location == NSNotFound) |
|---|
| 562 | addToResult = NO; |
|---|
| 563 | } |
|---|
| 564 | if(addToResult) |
|---|
| 565 | [result appendString:replacementString]; |
|---|
| 566 | if(!repeat) |
|---|
| 567 | break; |
|---|
| 568 | offset = match[1]; |
|---|
| 569 | } |
|---|
| 570 | |
|---|
| 571 | pcre_free(reg); |
|---|
| 572 | |
|---|
| 573 | NSString *final = result; |
|---|
| 574 | if(append) |
|---|
| 575 | { |
|---|
| 576 | NSString *orig = scraperBuffers[dest - 1]; |
|---|
| 577 | if(orig != nil) |
|---|
| 578 | final = [orig stringByAppendingString:final]; |
|---|
| 579 | } |
|---|
| 580 | if([final length]) |
|---|
| 581 | [self setBuffer:dest-1 toString:final]; |
|---|
| 582 | [result release]; |
|---|
| 583 | } |
|---|
| 584 | |
|---|
| 585 | - (BOOL)checkCondition:(NSString *)condition |
|---|
| 586 | { |
|---|
| 587 | BOOL inverse = NO; |
|---|
| 588 | if([condition characterAtIndex:0] == '!') |
|---|
| 589 | { |
|---|
| 590 | inverse = YES; |
|---|
| 591 | condition = [condition substringFromIndex:1]; |
|---|
| 592 | } |
|---|
| 593 | |
|---|
| 594 | id value = [settings objectForKey:condition]; |
|---|
| 595 | BOOL ret = [value boolValue]; |
|---|
| 596 | if(inverse) |
|---|
| 597 | ret = !ret; |
|---|
| 598 | |
|---|
| 599 | return ret; |
|---|
| 600 | } |
|---|
| 601 | |
|---|
| 602 | - (int)parseElement:(NSXMLElement *)element |
|---|
| 603 | { |
|---|
| 604 | NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init]; |
|---|
| 605 | NSArray *regexChildren = nil; |
|---|
| 606 | NSString *value = nil; |
|---|
| 607 | NSString *conditional = nil; |
|---|
| 608 | regexChildren = [element elementsForName:@"RegExp"]; |
|---|
| 609 | int count = [regexChildren count]; |
|---|
| 610 | if(count) |
|---|
| 611 | { |
|---|
| 612 | int i; |
|---|
| 613 | for(i=0; i<count; i++) |
|---|
| 614 | [self parseElement:[regexChildren objectAtIndex:i]]; |
|---|
| 615 | } |
|---|
| 616 | |
|---|
| 617 | int result = 1; |
|---|
| 618 | value = [[element attributeForName:@"dest"] stringValue]; |
|---|
| 619 | if(value != nil) |
|---|
| 620 | result = [value intValue]; |
|---|
| 621 | BOOL append = NO; |
|---|
| 622 | if([value length] > 1 && [value characterAtIndex:1] == '+') |
|---|
| 623 | append = YES; |
|---|
| 624 | |
|---|
| 625 | conditional = [[element attributeForName:@"conditional"] stringValue]; |
|---|
| 626 | if([conditional length] && ![self checkCondition:conditional]) |
|---|
| 627 | return result; |
|---|
| 628 | |
|---|
| 629 | NSString *input = [[element attributeForName:@"input"] stringValue]; |
|---|
| 630 | if(input) |
|---|
| 631 | input = [self substituteBuffersIntoInput:input]; |
|---|
| 632 | else |
|---|
| 633 | input = scraperBuffers[0]; |
|---|
| 634 | |
|---|
| 635 | [self parseExpression:element withInput:input intoDest:result andAppend:append]; |
|---|
| 636 | [pool drain]; |
|---|
| 637 | |
|---|
| 638 | return result; |
|---|
| 639 | } |
|---|
| 640 | |
|---|
| 641 | - (NSString *)parseFunction:(NSString *)function |
|---|
| 642 | { |
|---|
| 643 | NSArray *elements = [root elementsForName:function]; |
|---|
| 644 | if(![elements count]) |
|---|
| 645 | return nil; |
|---|
| 646 | |
|---|
| 647 | NSXMLElement *functionElement = [elements objectAtIndex:0]; |
|---|
| 648 | elements = [functionElement elementsForName:@"RegExp"]; |
|---|
| 649 | int count = [elements count], i; |
|---|
| 650 | for(i=0; i<count; i++) |
|---|
| 651 | { |
|---|
| 652 | [self parseElement:[elements objectAtIndex:i]]; |
|---|
| 653 | } |
|---|
| 654 | int dest = integerAttributeWithDefault(functionElement, @"dest", 1); |
|---|
| 655 | NSString *ret = [[scraperBuffers[dest - 1] retain] autorelease]; |
|---|
| 656 | if(booleanAttributeWithDefault(functionElement, @"clearbuffers", YES)) |
|---|
| 657 | [self clearBuffers]; |
|---|
| 658 | |
|---|
| 659 | return ret; |
|---|
| 660 | } |
|---|
| 661 | |
|---|
| 662 | @end |
|---|
| 663 | |
|---|
| 664 | @implementation SapphireMovieScraper |
|---|
| 665 | |
|---|
| 666 | - (id)initWithPath:(NSString *)path error:(NSError * *)error; |
|---|
| 667 | { |
|---|
| 668 | self = [super initWithPath:path error:error]; |
|---|
| 669 | if (self != nil) { |
|---|
| 670 | if(![[self contentType] isEqualToString:@"movies"]) |
|---|
| 671 | { |
|---|
| 672 | [self autorelease]; |
|---|
| 673 | return nil; |
|---|
| 674 | } |
|---|
| 675 | } |
|---|
| 676 | return self; |
|---|
| 677 | } |
|---|
| 678 | |
|---|
| 679 | - (NSString *)searchURLForMovieName:(NSString *)movieName year:(NSString *)year |
|---|
| 680 | { |
|---|
| 681 | [self clearBuffers]; |
|---|
| 682 | [self setBuffer:0 toString:[movieName stringByAddingPercentEscapesUsingEncoding:NSUTF8StringEncoding]]; |
|---|
| 683 | [self setBuffer:1 toString:[year stringByAddingPercentEscapesUsingEncoding:NSUTF8StringEncoding]]; |
|---|
| 684 | return [self parseFunction:@"CreateSearchUrl"]; |
|---|
| 685 | } |
|---|
| 686 | |
|---|
| 687 | - (NSString *)movieDetailsForURLContent:(NSString *)urlContent movieID:(NSString *)movieID atURL:(NSString *)url |
|---|
| 688 | { |
|---|
| 689 | [self clearBuffers]; |
|---|
| 690 | [self setBuffer:0 toString:urlContent]; |
|---|
| 691 | [self setBuffer:1 toString:movieID]; |
|---|
| 692 | [self setBuffer:2 toString:url]; |
|---|
| 693 | return [self parseFunction:@"GetDetails"]; |
|---|
| 694 | } |
|---|
| 695 | |
|---|
| 696 | @end |
|---|
| 697 | |
|---|
| 698 | @implementation SapphireTVShowScraper |
|---|
| 699 | |
|---|
| 700 | - (id)initWithPath:(NSString *)path error:(NSError * *)error; |
|---|
| 701 | { |
|---|
| 702 | self = [super initWithPath:path error:error]; |
|---|
| 703 | if (self != nil) { |
|---|
| 704 | if(![[self contentType] isEqualToString:@"tvshows"]) |
|---|
| 705 | { |
|---|
| 706 | [self autorelease]; |
|---|
| 707 | return nil; |
|---|
| 708 | } |
|---|
| 709 | } |
|---|
| 710 | return self; |
|---|
| 711 | } |
|---|
| 712 | |
|---|
| 713 | - (NSString *)searchURLForShowName:(NSString *)showName; |
|---|
| 714 | { |
|---|
| 715 | [self clearBuffers]; |
|---|
| 716 | [self setBuffer:0 toString:[showName stringByAddingPercentEscapesUsingEncoding:NSUTF8StringEncoding]]; |
|---|
| 717 | return [self parseFunction:@"CreateSearchUrl"]; |
|---|
| 718 | } |
|---|
| 719 | |
|---|
| 720 | - (NSString *)showDetailsForURLContent:(NSString *)urlContent showID:(NSString *)showID atURL:(NSString *)url; |
|---|
| 721 | { |
|---|
| 722 | [self clearBuffers]; |
|---|
| 723 | [self setBuffer:0 toString:urlContent]; |
|---|
| 724 | [self setBuffer:1 toString:showID]; |
|---|
| 725 | [self setBuffer:2 toString:url]; |
|---|
| 726 | return [self parseFunction:@"GetDetails"]; |
|---|
| 727 | } |
|---|
| 728 | |
|---|
| 729 | - (NSString *)episodeListForURLContent:(NSString *)urlContent atURL:(NSString *)url; |
|---|
| 730 | { |
|---|
| 731 | [self clearBuffers]; |
|---|
| 732 | [self setBuffer:0 toString:urlContent]; |
|---|
| 733 | [self setBuffer:1 toString:url]; |
|---|
| 734 | return [self parseFunction:@"GetEpisodeList"]; |
|---|
| 735 | } |
|---|
| 736 | |
|---|
| 737 | - (NSString *)episodeDetailsForURLContent:(NSString *)urlContent episodeID:(NSString *)epID atURL:(NSString *)url; |
|---|
| 738 | { |
|---|
| 739 | [self clearBuffers]; |
|---|
| 740 | [self setBuffer:0 toString:urlContent]; |
|---|
| 741 | [self setBuffer:1 toString:epID]; |
|---|
| 742 | [self setBuffer:2 toString:url]; |
|---|
| 743 | return [self parseFunction:@"GetEpisodeDetails"]; |
|---|
| 744 | } |
|---|
| 745 | |
|---|
| 746 | @end |
|---|
| 747 | |
|---|
| 748 | NSString *stringValueOfChild(NSXMLElement *element, NSString *childName) |
|---|
| 749 | { |
|---|
| 750 | NSArray *children = [element elementsForName:childName]; |
|---|
| 751 | if(![children count]) |
|---|
| 752 | return nil; |
|---|
| 753 | |
|---|
| 754 | return [[children lastObject] stringValue]; |
|---|
| 755 | } |
|---|
| 756 | |
|---|
| 757 | NSNumber *intValueOfChild(NSXMLElement *element, NSString *childName) |
|---|
| 758 | { |
|---|
| 759 | NSArray *children = [element elementsForName:childName]; |
|---|
| 760 | if(![children count]) |
|---|
| 761 | return nil; |
|---|
| 762 | |
|---|
| 763 | NSString *str = [[children lastObject] stringValue]; |
|---|
| 764 | return [NSNumber numberWithInt:[str intValue]]; |
|---|
| 765 | } |
|---|
| 766 | |
|---|
| 767 | NSDate *dateValueOfChild(NSXMLElement *element, NSString *childName) |
|---|
| 768 | { |
|---|
| 769 | NSArray *children = [element elementsForName:childName]; |
|---|
| 770 | if(![children count]) |
|---|
| 771 | return nil; |
|---|
| 772 | |
|---|
| 773 | NSString *str = [[children lastObject] stringValue]; |
|---|
| 774 | return [NSDate dateWithNaturalLanguageString:str]; |
|---|
| 775 | } |
|---|
| 776 | |
|---|
| 777 | NSArray *arrayStringValueOfChild(NSXMLElement *element, NSString *childName) |
|---|
| 778 | { |
|---|
| 779 | NSArray *children = [element elementsForName:childName]; |
|---|
| 780 | if(![children count]) |
|---|
| 781 | return nil; |
|---|
| 782 | |
|---|
| 783 | return [children valueForKey:@"stringValue"]; |
|---|
| 784 | } |
|---|
| 785 | |
|---|
| 786 | NSArray *arrayStringValueOfXPath(NSXMLElement *element, NSString *xpath) |
|---|
| 787 | { |
|---|
| 788 | NSError *error = nil; |
|---|
| 789 | NSArray *children = [element objectsForXQuery:xpath error:&error]; |
|---|
| 790 | if(![children count]) |
|---|
| 791 | return nil; |
|---|
| 792 | |
|---|
| 793 | return [children valueForKey:@"stringValue"]; |
|---|
| 794 | } |
|---|