/* * SapphireScraper.m * Sapphire * * Created by Graham Booker on Dec. 19, 2009. * Copyright 2009 Sapphire Development Team and/or www.nanopi.net * All rights reserved. * * This program is free software; you can redistribute it and/or modify it under the terms of the GNU * General Public License as published by the Free Software Foundation; either version 3 of the License, * or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General * Public License for more details. * * You should have received a copy of the GNU General Public License along with this program; if not, * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #import "SapphireScraper.h" #include "pcre.h" @interface SapphireScraper () - (void)parseSettings; - (void)setBuffer:(int)index toString:(NSString *)str; - (void)clearBuffers; - (void)setStoredMatch:(int)index toString:(NSString *)str; - (void)clearStorchMatches; - (NSString *)parseFunction:(NSString *)function; @end @implementation SapphireScraper static NSMutableDictionary *scrapers = nil; static NSDictionary *scraperPaths = nil; + (void)initialize { if(!scrapers) scrapers = [[NSMutableDictionary alloc] init]; } + (NSArray *)allScrapperNames { NSBundle *selfBundle = [NSBundle bundleForClass:[self class]]; NSArray *paths = [selfBundle pathsForResourcesOfType:@"xml" inDirectory:@"scrapers"]; NSMutableDictionary *scraperPathsDict = [[NSMutableDictionary alloc] init]; NSEnumerator *pathEnum = [paths objectEnumerator]; NSString *path; while((path = [pathEnum nextObject]) != nil) { NSError *error; NSURL *url = [NSURL fileURLWithPath:path]; NSXMLDocument *doc = [[NSXMLDocument alloc] initWithContentsOfURL:url options:0 error:&error]; if(!doc) continue; NSXMLElement *root = [doc rootElement]; NSString *name = [[root attributeForName:@"name"] stringValue]; NSString *type = [[root attributeForName:@"content"] stringValue]; [scraperPathsDict setObject:[type stringByAppendingFormat:@"-%@", path] forKey:name]; [doc release]; } [scraperPaths release]; scraperPaths = [scraperPathsDict copy]; [scraperPathsDict release]; return [scraperPaths allKeys]; } + (SapphireScraper *)scrapperWithName:(NSString *)filename { if(!scraperPaths) [SapphireScraper allScrapperNames]; NSValue *value = [scrapers objectForKey:filename]; if(value == nil) { NSString *path = [scraperPaths objectForKey:filename]; if(path == nil) return nil; int index = [path rangeOfString:@"-"].location; NSString *type = [path substringToIndex:index]; path = [path substringFromIndex:index+1]; NSError *error; SapphireScraper *scraper; if([type isEqualToString:@"tvshows"]) scraper = [[SapphireTVShowScraper alloc] initWithPath:path error:&error]; else if([type isEqualToString:@"movies"]) scraper = [[SapphireMovieScraper alloc] initWithPath:path error:&error]; else scraper = nil; if(!scraper) return nil; value = [NSValue valueWithNonretainedObject:scraper]; [scrapers setObject:value forKey:[scraper name]]; [scraper autorelease]; } return [value nonretainedObjectValue]; } - (id)initWithPath:(NSString *)path error:(NSError * *)error { self = [super init]; if (self != nil) { NSURL *url = [NSURL fileURLWithPath:path]; NSXMLDocument *doc = [[NSXMLDocument alloc] initWithContentsOfURL:url options:0 error:error]; root = [[doc rootElement] retain]; [doc release]; if(root == nil) { [self autorelease]; return nil; } NSArray *includes; while([(includes = [root elementsForName:@"include"]) count]) { NSXMLElement *include; NSEnumerator *includeEnum = [includes objectEnumerator]; NSString *myDir = [path stringByDeletingLastPathComponent]; while((include = [includeEnum nextObject]) != nil) { NSString *includePath = [myDir stringByAppendingPathComponent:[include stringValue]]; NSXMLDocument *includeDoc = [[NSXMLDocument alloc] initWithContentsOfURL:[NSURL fileURLWithPath:includePath] options:0 error:nil]; if(includeDoc) { NSArray *children = [[includeDoc rootElement] children]; NSXMLElement *child; NSEnumerator *childEnum = [children objectEnumerator]; while((child = [childEnum nextObject]) != nil) { [child detach]; [root addChild:child]; } } [include detach]; [includeDoc release]; } } settings = [[NSMutableDictionary alloc] init]; [self parseSettings]; } return self; } - (void) dealloc { [scrapers removeObjectForKey:[self name]]; [root release]; [settings release]; [settingsXML release]; [self clearBuffers]; [self clearStorchMatches]; [super dealloc]; } - (NSString *)name { return [[root attributeForName:@"name"] stringValue]; } - (NSString *)contentType { return [[root attributeForName:@"content"] stringValue]; } - (NSString *)thumbUrl { return [[root attributeForName:@"content"] stringValue]; } - (NSString *)serverEncoding { return [[root attributeForName:@"thumb"] stringValue]; } - (NSString *)settingsXML { return settingsXML; } - (NSMutableDictionary *)settings { return settings; } - (NSString *)searchResultsForURLContent:(NSString *)urlContent { [self clearBuffers]; [self setBuffer:0 toString:urlContent]; return [self parseFunction:@"GetSearchResults"]; } - (NSString *)searchResultsForNfoContent:(NSString *)nfoContent { [self clearBuffers]; [self setBuffer:0 toString:nfoContent]; return [self parseFunction:@"NfoUrl"]; } - (NSString *)functionResultWithArguments:(NSString *)function, ... { va_list argList; va_start(argList, function); NSString *argument; int index = 0; while((argument = va_arg(argList, id)) != nil) { [self setBuffer:index toString:argument]; index++; } return [self parseFunction:function]; } - (void)setBuffer:(int)index toString:(NSString *)str { [scraperBuffers[index] release]; scraperBuffers[index] = [str retain]; } - (void)clearBuffers { int i; for(i=0; ioffset; i--) if(![whitespace characterIsMember:[str characterAtIndex:i]]) break; if(offset > i) return @""; return [str substringWithRange:NSMakeRange(offset, i+1-offset)]; } NSString *cleanedString(NSString *str) { NSMutableString *mutStr = [[NSMutableString alloc] init]; NSScanner *scanner = [NSScanner scannerWithString:str]; [scanner setCharactersToBeSkipped:[NSCharacterSet characterSetWithCharactersInString:@""]]; while(![scanner isAtEnd]) { NSString *append = nil; [scanner scanUpToString:@"<" intoString:&append]; if(append) [mutStr appendString:append]; NSString *tag = nil; [scanner scanUpToString:@">" intoString:&tag]; if([tag hasPrefix:@"" intoString:nil]; } /*TV Rage doesn't understand that an & needs to be & in the HTML, not just '&', so we have to work around yet another instance of their stupidity. Decoding entities and then re-encoding them seems to be the safest way to do this*/ NSString *decoded = (NSString *)CFXMLCreateStringByUnescapingEntities(NULL, (CFStringRef)mutStr, NULL); NSString *reencoded = (NSString *)CFXMLCreateStringByEscapingEntities(NULL, (CFStringRef)decoded, NULL); [decoded release]; [mutStr release]; [reencoded autorelease]; return trimmedString(reencoded); } void bufferBooleanAttributeWithDefault(NSXMLElement *element, NSString *attributeName, BOOL defaultValue, BOOL *values) { int i; if(defaultValue) for(i=0; i 0 && index <= SCRAPER_MATCH_COUNT) values[index] = !defaultValue; } } } BOOL booleanAttributeWithDefault(NSXMLElement *element, NSString *attributeName, BOOL defaultValue) { NSString *attr = [[element attributeForName:attributeName] stringValue]; if(attr) { NSString *checkValue; if(defaultValue) checkValue = @"no"; else checkValue = @"yes"; if(![attr isEqualToString:checkValue]) return defaultValue; else return !defaultValue; } return defaultValue; } int integerAttributeWithDefault(NSXMLElement *element, NSString *attributeName, int defaultValue) { NSString *attr = [[element attributeForName:attributeName] stringValue]; if(attr) { int ret = [attr intValue]; if(ret) return ret; } return defaultValue; } - (NSString *)substituteBuffersIntoInput:(NSString *)input { NSMutableString *mutStr = [input mutableCopy]; NSRange range; while((range = [mutStr rangeOfString:@"$$"]).location != NSNotFound) { int index = [[mutStr substringFromIndex:range.location + 2] intValue]; NSString *replacement; if(index > 0 && index <= SCRAPER_BUFFER_COUNT) { if(index > 9) range.length += 2; else range.length ++; replacement = scraperBuffers[index - 1]; if(replacement == nil) replacement = @""; } else replacement = @""; [mutStr replaceCharactersInRange:range withString:replacement]; } while((range = [mutStr rangeOfString:@"$INFO["]).location != NSNotFound) { int offset = range.location + 6; NSRange endRange = [mutStr rangeOfString:@"]" options:0 range:NSMakeRange(offset, [mutStr length] - offset)]; NSString *replacement; if(endRange.location != NSNotFound) { range.length = endRange.location - range.location + 1; NSString *setting = [mutStr substringWithRange:NSMakeRange(offset, range.length - 7)]; replacement = [settings objectForKey:setting]; if(![replacement length]) replacement = @""; } else { replacement = @""; } [mutStr replaceCharactersInRange:range withString:replacement]; } NSString *ret = [NSString stringWithString:mutStr]; [mutStr release]; return ret; } - (NSString *)replacementStrForOutput:(NSString *)output inputStr:(const char *)input matches:(int *)matches count:(int)matchCount { NSMutableString *mutStr = [output mutableCopy]; NSRange range = NSMakeRange(0, [mutStr length]); while((range = [mutStr rangeOfString:@"\\" options:0 range:range]).location != NSNotFound) { BOOL storedMatch = ([mutStr characterAtIndex:range.location + 1] == '$'); int index = [[mutStr substringFromIndex:range.location + 1 + storedMatch] intValue]; NSString *replacement; if(index > 0 && index < matchCount) range.length++; range.length += storedMatch; int start = matches[index<<1]; int end = matches[(index<<1) + 1]; if(range.length > 1 && start != -1) { replacement = [[[NSString alloc] initWithBytes:input+start length:end-start encoding:NSUTF8StringEncoding] autorelease]; if(storedMatch) [self setStoredMatch:index toString:replacement]; if(clean[index]) replacement = cleanedString(replacement); else if(trim[index]) replacement = trimmedString(replacement); } else if(range.length > 1 && storedMatch) replacement = storedMatches[index]; else replacement = @""; [mutStr replaceCharactersInRange:range withString:replacement]; range.location += [replacement length]; range.length = [mutStr length] - range.location; } NSString *ret = [NSString stringWithString:mutStr]; [mutStr release]; return ret; } - (void)parseExpression:(NSXMLElement *)element withInput:(NSString *)input intoDest:(int)dest andAppend:(BOOL)append { NSString *output = [self substituteBuffersIntoInput:[[element attributeForName:@"output"] stringValue]]; NSArray *expressions = [element elementsForName:@"expression"]; NSString *expression = nil; NSXMLElement *expressionElement = nil; if([expressions count]) { expressionElement = [expressions objectAtIndex:0]; expression = [[expressionElement childAtIndex:0] stringValue]; } if(![expression length]) expression = @"(.*)"; const char *errMsg = NULL; int errOffset = 0; pcre *reg = pcre_compile([expression UTF8String], PCRE_DOTALL, &errMsg, &errOffset, NULL); if(!reg) return; //AAA optional, compare; if(booleanAttributeWithDefault(expressionElement, @"clear", NO)) [self setBuffer:dest-1 toString:nil]; BOOL repeat = booleanAttributeWithDefault(expressionElement, @"repeat", NO); bufferBooleanAttributeWithDefault(expressionElement, @"noclean", YES, clean); bufferBooleanAttributeWithDefault(expressionElement, @"trim", NO, trim); NSMutableString *result = [@"" mutableCopy]; int match[30]; int offset = 0; const char *inputStr = [input UTF8String]; int inputLen = strlen(inputStr); int matchCount = 0; [self clearStorchMatches]; while((matchCount = pcre_exec(reg, NULL, inputStr, inputLen, offset, 0, match, 30)) >= 0) { BOOL addToResult = YES; NSString *replacementString = [self replacementStrForOutput:output inputStr:inputStr matches:match count:matchCount]; int compare = integerAttributeWithDefault(expressionElement, @"compare", -1); if(compare != -1) { NSString *searchStr = nil; if(compare > 0 && compare <= 20) searchStr = scraperBuffers[compare -1]; if([searchStr length] && [[replacementString lowercaseString] rangeOfString:searchStr].location == NSNotFound) addToResult = NO; } if(addToResult) [result appendString:replacementString]; if(!repeat) break; offset = match[1]; } pcre_free(reg); NSString *final = result; if(append) { NSString *orig = scraperBuffers[dest - 1]; if(orig != nil) final = [orig stringByAppendingString:final]; } if([final length]) [self setBuffer:dest-1 toString:final]; [result release]; } - (BOOL)checkCondition:(NSString *)condition { BOOL inverse = NO; if([condition characterAtIndex:0] == '!') { inverse = YES; condition = [condition substringFromIndex:1]; } id value = [settings objectForKey:condition]; BOOL ret = [value boolValue]; if(inverse) ret = !ret; return ret; } - (int)parseElement:(NSXMLElement *)element { NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init]; NSArray *regexChildren = nil; NSString *value = nil; NSString *conditional = nil; regexChildren = [element elementsForName:@"RegExp"]; int count = [regexChildren count]; if(count) { int i; for(i=0; i 1 && [value characterAtIndex:1] == '+') append = YES; conditional = [[element attributeForName:@"conditional"] stringValue]; if([conditional length] && ![self checkCondition:conditional]) return result; NSString *input = [[element attributeForName:@"input"] stringValue]; if(input) input = [self substituteBuffersIntoInput:input]; else input = scraperBuffers[0]; [self parseExpression:element withInput:input intoDest:result andAppend:append]; [pool drain]; return result; } - (NSString *)parseFunction:(NSString *)function { NSArray *elements = [root elementsForName:function]; if(![elements count]) return nil; NSXMLElement *functionElement = [elements objectAtIndex:0]; elements = [functionElement elementsForName:@"RegExp"]; int count = [elements count], i; for(i=0; i