iOS - 小说阅读器分章节,支持正则分章节和按字数分章节

最近做了一个WIFI传书本地阅读功能,有所收获在这里记录下吧。

 

用户下载的书籍分为两种,一种是有章节格式的,比如 第一章,001章、等,这种可以用正则来直接分章节,还有绝大多数书籍是没有这种格式的,这种如果整本书来直接解析的话,对CPU要求比较大,可能会卡死闪退,所有手动分章节还是很有必要的,这种情况下我们采用按照两千字来分。

 

话不多说,开始吧。

 

1、WIFI传书把书传到APP沙盒里,这里我们采用的是 GCDWebServer ,很方便,这里就不做陈述了。

2、将沙盒里面的 .txt 文件转成 文本 ,这里的坑点也不少,我们专门写了一个 NSStringEncoding 解码的算法来转文字,可以解析多种编码方式的文本,这种算法只能适配iOS11及以上系统,其他系统只能采用系统UTF-8方法来解析,限制较多。

//转成文字
- (void)encodeWithURL:(NSString *)url result:(void (^)(NSString *content))result;                    

- (void)encodeWithURL:(NSString *)url result:(void (^)(NSString *content))result {
    if (url.length == 0) {
        result(@"");
        return;
    }
    NSData *data = [NSData dataWithContentsOfFile:url options:NSDataReadingMappedIfSafe error:nil];
    
    if (@available(iOS 11.0, *)) {
        NSString *content = data.mc_autoString;
        if (content.length == 0) {
            NSString *txt = data.utf8String;
            txt = [txt stringByReplacingOccurrencesOfString:@"\r\n" withString:@"\n"];
            txt = [txt stringByReplacingOccurrencesOfString:@"\r" withString:@"\n"];
            result(txt);
            return;
        }
        result(content);
        return;
    }
    
    NSString *txt = data.utf8String;
    txt = [txt stringByReplacingOccurrencesOfString:@"\r\n" withString:@"\n"];
    txt = [txt stringByReplacingOccurrencesOfString:@"\r" withString:@"\n"];
    result(txt);
}

 

3、文本拿到之后开始分章节吧

 

 //分章节
- (void)separateChapterContent:(NSString *)content result:(void (^)(NSArray *chapterArr))result;       

- (void)separateChapterContent:(NSString *)content result:(void (^)(NSArray *chapterArr))result {
    NSMutableArray *chapters = [[NSMutableArray alloc] init];
    NSString *parten = @"第[0-9一二三四五六七八九十百千]*[章回].*";
    NSError* error = NULL;
    NSRegularExpression *reg = [NSRegularExpression regularExpressionWithPattern:parten options:NSRegularExpressionCaseInsensitive error:&error];
    
    NSArray* match = [reg matchesInString:content options:NSMatchingReportCompletion range:NSMakeRange(0, [content length])];
    
    if (match.count >= 100)
    {
        __block NSRange lastRange = NSMakeRange(0, 0);
        [match enumerateObjectsUsingBlock:^(NSTextCheckingResult *  _Nonnull obj, NSUInteger idx, BOOL * _Nonnull stop) {
            NSRange range = [obj range];
            NSInteger local = range.location;
            if (idx == 0) {
                NSDictionary *dict = @{@"title":@"序章",
                                       @"content":[content substringWithRange:NSMakeRange(0, local)]
                };
                [chapters addObject:dict];
            }
            if (idx > 0 ) {
                NSUInteger len = local-lastRange.location;
                NSDictionary *dict = @{@"title":[content substringWithRange:lastRange],
                                       @"content":[content substringWithRange:NSMakeRange(lastRange.location, len)]
                };
                [chapters addObject:dict];

            }
            if (idx == match.count-1) {
                NSDictionary *dict = @{@"title":[content substringWithRange:range],
                                       @"content":[content substringWithRange:NSMakeRange(local, content.length-local)]
                };
                [chapters addObject:dict];
            }
            lastRange = range;
        }];
    } else {
        //不能分章节的书籍按照2000字来手动分章节
        NSArray *lineAry = [content componentsSeparatedByString:@"\n"];
        
        NSMutableArray *bodyTextAry = [[NSMutableArray alloc] init];
        NSInteger outLength = 0;                                        //末尾长度
        NSInteger startLength = 0;                                      //起始长度
        NSInteger textLeng = content.length;                            //总长度
        NSLog(@"总长度:%ld",textLeng);
        
        //先把文字按2000字分出来
        for (int i = 0; i < lineAry.count ; i ++) {
            NSString *textLine = lineAry[i];
            outLength += textLine.length;
            
            if (i+1 != lineAry.count) {
                ++outLength;
            }
            
            if (outLength >= 2000) {
                NSRange lastRange = NSMakeRange(startLength, outLength);
                [bodyTextAry addObject:[content substringWithRange:lastRange]];
                startLength += outLength;
                outLength = 0;
            } else if (i == lineAry.count - 1) {
                NSRange lastRange = NSMakeRange(startLength, outLength);
                [bodyTextAry addObject:[content substringWithRange:lastRange]];
            }
        }
        //再构造数据传出去
        [bodyTextAry enumerateObjectsUsingBlock:^(NSString *obj, NSUInteger idx, BOOL * _Nonnull stop) {
            NSDictionary *dict = @{@"title":[NSString stringWithFormat:@"第%lu章",(unsigned long)idx+1],
                                   @"content":obj
            };
            [chapters addObject:dict];
        }];
        
    }
    
    result(chapters);
}

 

4、章节分好之后就存入本地数据库,然后传入阅读器解析阅读吧。

 

注:补充下文本转文字的算法吧

 

第一种,如果文本是以utf-8来编码的

NSData+Addition.h

#import <Foundation/Foundation.h>

NS_ASSUME_NONNULL_BEGIN

@interface NSData (Addition)

#pragma mark - Encode and Decode

/**
 UTF8编码
 */
- (nullable NSString *)utf8String;

/**
 Base64编码
 */
- (nullable NSString *)base64EncodedString;

/**
 Base64解码

 @param base64EncodedString The encoded string.
 */
+ (nullable NSData *)dataWithBase64EncodedString:(NSString *)base64EncodedString;

/**
 Json解析
 如果失败,返回 nil
 */
- (nullable id)jsonValueDecodedWithError:(NSError **)error;

#pragma mark - Hash

/**
 MD5编码
 */
- (NSString *)md5String;

@end

NSData+Addition.m

#import "NSData+Addition.h"
#include <CommonCrypto/CommonCrypto.h>

@implementation NSData (Addition)

#pragma mark - Encode and Decode

static const char base64EncodingTable[64]
= "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
static const short base64DecodingTable[256] = {
    -2, -2, -2, -2, -2, -2, -2, -2, -2, -1, -1, -2,  -1,  -1, -2, -2,
    -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,  -2,  -2, -2, -2,
    -1, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, 62,  -2,  -2, -2, 63,
    52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -2, -2,  -2,  -2, -2, -2,
    -2, 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10,  11,  12, 13, 14,
    15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -2,  -2,  -2, -2, -2,
    -2, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,  37,  38, 39, 40,
    41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -2,  -2,  -2, -2, -2,
    -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,  -2,  -2, -2, -2,
    -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,  -2,  -2, -2, -2,
    -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,  -2,  -2, -2, -2,
    -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,  -2,  -2, -2, -2,
    -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,  -2,  -2, -2, -2,
    -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,  -2,  -2, -2, -2,
    -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,  -2,  -2, -2, -2,
    -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,  -2,  -2, -2, -2
};

- (NSString *)utf8String {
    if (self.length > 0) {
        return [[NSString alloc] initWithData:self encoding:NSUTF8StringEncoding];
    }
    return @"";
}

- (NSString *)base64EncodedString {
    NSUInteger length = self.length;
    if (length == 0)
        return @"";
    
    NSUInteger out_length = ((length + 2) / 3) * 4;
    uint8_t *output = malloc(((out_length + 2) / 3) * 4);
    if (output == NULL)
        return nil;
    
    const char *input = self.bytes;
    NSInteger i, value;
    for (i = 0; i < length; i += 3) {
        value = 0;
        for (NSInteger j = i; j < i + 3; j++) {
            value <<= 8;
            if (j < length) {
                value |= (0xFF & input[j]);
            }
        }
        NSInteger index = (i / 3) * 4;
        output[index + 0] = base64EncodingTable[(value >> 18) & 0x3F];
        output[index + 1] = base64EncodingTable[(value >> 12) & 0x3F];
        output[index + 2] = ((i + 1) < length)
        ? base64EncodingTable[(value >> 6) & 0x3F]
        : =;
        output[index + 3] = ((i + 2) < length)
        ? base64EncodingTable[(value >> 0) & 0x3F]
        : =;
    }
    
    NSString *base64 = [[NSString alloc] initWithBytes:output
                                                length:out_length
                                              encoding:NSASCIIStringEncoding];
    free(output);
    return base64;
}

+ (NSData *)dataWithBase64EncodedString:(NSString *)base64EncodedString {
    NSInteger length = base64EncodedString.length;
    const char *string = [base64EncodedString cStringUsingEncoding:NSASCIIStringEncoding];
    if (string  == NULL)
        return nil;
    
    while (length > 0 && string[length - 1] == =)
        length--;
    
    NSInteger outputLength = length * 3 / 4;
    NSMutableData *data = [NSMutableData dataWithLength:outputLength];
    if (data == nil)
        return nil;
    if (length == 0)
        return data;
    
    uint8_t *output = data.mutableBytes;
    NSInteger inputPoint = 0;
    NSInteger outputPoint = 0;
    while (inputPoint < length) {
        char i0 = string[inputPoint++];
        char i1 = string[inputPoint++];
        char i2 = inputPoint < length ? string[inputPoint++] : A;
        char i3 = inputPoint < length ? string[inputPoint++] : A;
        
        output[outputPoint++] = (base64DecodingTable[i0] << 2)
        | (base64DecodingTable[i1] >> 4);
        if (outputPoint < outputLength) {
            output[outputPoint++] = ((base64DecodingTable[i1] & 0xf) << 4)
            | (base64DecodingTable[i2] >> 2);
        }
        if (outputPoint < outputLength) {
            output[outputPoint++] = ((base64DecodingTable[i2] & 0x3) << 6)
            | base64DecodingTable[i3];
        }
    }
    
    return data;
}

- (nullable id)jsonValueDecodedWithError:(NSError **)error {
    id value = [NSJSONSerialization JSONObjectWithData:self options:kNilOptions error:error];
    return value;
}

#pragma mark - Hash

- (NSString *)md5String {
    unsigned char result[CC_MD5_DIGEST_LENGTH];
    CC_MD5(self.bytes, (CC_LONG)self.length, result);
    return [NSString stringWithFormat:
            @"%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x",
            result[0], result[1], result[2], result[3],
            result[4], result[5], result[6], result[7],
            result[8], result[9], result[10], result[11],
            result[12], result[13], result[14], result[15]
            ];
}

@end

 

第二种算法先不发了,,,,

 

iOS - 小说阅读器分章节,支持正则分章节和按字数分章节

上一篇:TCP连接与HTTP请求


下一篇:Mybaits 源码解析 (五)----- 面试源码系列:Mapper接口底层原理(为什么Mapper不用写实现类就能访问到数据库?)