- Timestamp:
- 02/18/2010 12:28:57 PM (2 years ago)
- Files:
-
- 1 modified
Legend:
- Unmodified
- Added
- Removed
-
trunk/SapphireFrappliance/MetaDataImporting/SapphireScraper.m
r1133 r1141 315 315 NSString *cleanedString(NSString *str) 316 316 { 317 NSMutableString *mutStr = [[NSMutableString alloc] init]; 318 NSScanner *scanner = [NSScanner scannerWithString:str]; 319 [scanner setCharactersToBeSkipped:[NSCharacterSet characterSetWithCharactersInString:@""]]; 320 while(![scanner isAtEnd]) 321 { 322 NSString *append = nil; 323 [scanner scanUpToString:@"<" intoString:&append]; 324 if(append) 325 [mutStr appendString:append]; 326 NSString *tag = nil; 327 [scanner scanUpToString:@">" intoString:&tag]; 328 if([tag hasPrefix:@"<br"] && ([tag length] == 3 || [tag characterAtIndex:3] == ' ' || [tag characterAtIndex:3] == '/')) 329 [mutStr appendString:@"\n"]; 330 [scanner scanString:@">" intoString:nil]; 331 } 317 332 /*TV Rage doesn't understand that an & needs to be & in the HTML, not just '&', so we have to work around yet another instance of their stupidity. Decoding entities and then re-encoding them seems to be the safest way to do this*/ 318 NSString *decoded = (NSString *)CFXMLCreateStringByUnescapingEntities(NULL, (CFStringRef) str, NULL);333 NSString *decoded = (NSString *)CFXMLCreateStringByUnescapingEntities(NULL, (CFStringRef)mutStr, NULL); 319 334 NSString *reencoded = (NSString *)CFXMLCreateStringByEscapingEntities(NULL, (CFStringRef)decoded, NULL); 320 335 [decoded release]; 321 NSXMLDocument *doc = [[NSXMLDocument alloc] initWithXMLString:reencoded options:NSXMLDocumentTidyHTML error:nil]; 322 if(doc) 323 { 324 str = (NSString *)CFXMLCreateStringByEscapingEntities(NULL, (CFStringRef)[doc stringValue], NULL); 325 [str autorelease]; 326 [doc release]; 327 } 328 [reencoded release]; 329 return trimmedString(str); 336 [mutStr release]; 337 [reencoded autorelease]; 338 return trimmedString(reencoded); 330 339 } 331 340
