https://github.com/xiaojiong/scanfile
演示站点: http://www.weigongkai.com/ 7G数据 2s完成扫描
package scanfile /* #include <stdio.h> #include <stdlib.h> #include <string.h> int IndexStr(char *haystack, char *needle,unsigned int begin) { char *p = strstr(haystack+begin, needle); if (p) return p - haystack; return -1; } int IndexChar(char *haystack, char c,unsigned int begin) { char *p = haystack = haystack + begin; while(*p != '') { if(*p == c) { return p - haystack; } ++p; } return -1; } int LastIndexChar(char *haystack, char c,unsigned int begin) { int len = strlen(haystack); if(begin > 0) { if (begin > len) { return -1; } } else { begin = len - 1; } haystack +=begin; while(1) { if(*haystack == c) { return begin; } if(begin == 0) { return -1; } --haystack; --begin; } return -1; } */ import"C" import"unsafe" func strScan(str *string, key *string, counter *Counter) []string { begin := 0 CStr := C.CString(*str) Ckey := C.CString(*key) defer func() { C.free(unsafe.Pointer(CStr)) C.free(unsafe.Pointer(Ckey)) }() var res []string for { var index int = 0 if index = int(C.IndexStr(CStr, Ckey, C.uint(begin))); index == -1 { break } var startIndex int = 0 if index > 0 { if pos := int(C.LastIndexChar(CStr, 'n', C.uint(index))); pos != -1 { startIndex = pos + 1 } } var endIndex int = len(*str) if pos := int(C.IndexChar(CStr, 'n', C.uint(index))); pos != -1 { endIndex = pos + index } begin = endIndex if counter.IsMax() { break } res = append(res, (*str)[startIndex:endIndex]) counter.Add() if begin == len(*str) { break } } return res }
package scanfile import ( "io" "os" "sync" ) var LineFeed = byte('n') //文本换行符标识 var BufSize = 1024 * 1024 // buf大小 func Scan(files []string, searchStr *string) string { var result ScanResult //计数器 counter := InitCounter(10) //扫描结果输出通道 out := make(chan *FileRes, 10) fileCount := len(files) for i := 0; i < fileCount; i++ { go ScanFile(files[i], searchStr, counter, out) } for i := 0; i < fileCount; i++ { result.AddFileRes(<-out) } result.AddCounter(counter) return result.ToJson() } func ScanFile(fileName string, searchStr *string, counter *Counter, out chan *FileRes) { //文件 IO fileContentChan := fileRead(fileName, counter) fileRes := InitFileRes(fileName) //使用多路复用 wg防止线程泄漏 wg := sync.WaitGroup{} for i := 0; i < 3; i++ { wg.Add(1) go func() { for { if text, ok := <-fileContentChan; ok { if counter.IsMax() { //清空未读取channel clearFileContentChan(fileContentChan) break } else { if counter.IsMax() { break } rs := strScan(text, searchStr, counter) for i := 0; i < len(rs); i++ { fileRes.Add(rs[i]) } } } else { break } } wg.Done() }() } wg.Wait() out <- fileRes } func clearFileContentChan(c chan *string) { for { if _, ok := <-c; ok == false { break } } } func fileRead(fileName string, counter *Counter) chan *string { fileContentChan := make(chan *string, 5) go func() { fh, err := os.Open(fileName) if err != nil { panic(err) } //异常处理 defer fh.Close() buf := make([]byte, BufSize) var start int64 fh.Seek(start, 0) for { //超过计数器最大返回值 跳出程序 if counter.IsMax() { break } n, err := fh.Read(buf) if err != nil && err != io.EOF { panic(err) } if n == 0 { break } l := lastByteIndex(buf, LineFeed) content := string(buf[0 : l+1]) start += int64(l + 1) fh.Seek(start, 0) fileContentChan <- &content } close(fileContentChan) }() return fileContentChan } func lastByteIndex(s []byte, sep byte) int { for i := len(s) - 1; i >= 0; i-- { if s[i] == sep { return i } } return -1 }