#include <math.h> #include <time.h> #include <stdlib.h> #include <iostream> #include <windows.h> using namespace std; #define N 1517 //文献数目 #define textN 6 //题目数目 #define ERROR 1 #define OK 0 const int WORD_LENGTH = 30;//定义单个单词最大长度 char temp[WORD_LENGTH];//定义用以临时存放单词的数组 typedef struct Node { char word[WORD_LENGTH] = { '\0' }; int time = 0; int textnum = 0; double weight = 0; }wordNode, wordLink; char Libword[900][WORD_LENGTH] = { 0 }; //900条停用词库 int wordleng = 0; //词库中实际词条数目 wordNode sumWord[1000];//文章词表 int wordNum = 0;//文章中的非重单词数 int sumWordNum = 0;//文章总词数 double score[4] = { 0 };//选项分数 int DoLibStop(char *name, char memory[][WORD_LENGTH]) { FILE *cp = fopen(name, "r");//词库位置 char ch; while (!feof(cp)) //读取词库 { ch = fgetc(cp); for (int i = 0; ch != 13 && i<22 && ch != 10; i++)//回车区分词 { Libword[wordleng][i] = ch; ch = fgetc(cp); } // std::cout<<(word[wordleng]); //屏幕输出。临时 wordleng++; } fclose(cp); //关闭停用词库 return wordleng; } void wordDelSpe(char word[]) //去掉特殊字符 { int i, k, j; char *specialChar = ",.;:'“”?!><+=|*&^%$#@\"[](){}0123456789";//定义特殊字符集 for (i = 0; i<strlen(word); i++) { //筛选并去除字符串中的特殊字符 for (k = 0; k<strlen(specialChar); k++) { if (word[i] == specialChar[k]) { j = i; while (j<strlen(word)) { word[j] = word[j + 1]; j++; } i--; break; } } } } bool wordCmpStop(char *word)//将人称代词及其他常用词去掉 { int simNum = wordleng; for (int i = 0; i<strlen(word); i++)//筛选并将字符串中的大写字母转化为小写字母 if (word[i] >= 'A'&& word[i] <= 'Z') word[i] += 32; for (int i = 0; i<simNum; i++) if (strcmp(word, Libword[i]) == 0) return true; return false; } void wordSearch(char *word, int &wordnum) { int i = 0; while (i < wordnum && (strcmp(sumWord[i].word, word) != 0)) { i++; } if (i < wordnum) sumWord[i].time++; if (i == wordnum) { strcpy(sumWord[i].word, word); wordnum++; sumWord[i].time = 1; } sumWordNum += 1; } void doArticle(char *file0) { FILE *file; if ((file = fopen(file0, "r")) == NULL) { //这里是绝对路径,基于XCode编译器查找方便的需求 printf("%s文件读取失败!", file0); system("pause"); exit(1); } while ((fscanf(file, "%s", temp)) != EOF) { if (temp[0] == '*')//遇到题目了 break; wordDelSpe(temp); if (wordCmpStop(temp) == true) { sumWordNum += 1; continue; } wordSearch(temp, wordNum); } fclose(file);//关闭文件 } void copyNode(wordNode& node1, wordNode &node2)//node2复制到node1 { strcpy(node1.word, node2.word); node1.time = node2.time; node1.textnum = node2.textnum; node1.weight = node2.weight; } void sortWord()//直接插入排序 { wordNode t; int i, j; /*cout << wordNum << endl;*/ for (i = 1; i < wordNum; i++) { copyNode(t, sumWord[i]); for (j = i - 1; j >= 0 && sumWord[j].weight<t.weight; j--) { copyNode(sumWord[j + 1], sumWord[j]); } copyNode(sumWord[j + 1], t); } } void fileCount(char file[N][50]) { int i, j; FILE *f; for (i = 0; i <N; i++) { f = fopen(file[i], "r"); if (!f) { printf("%s文件读取失败!", file[i]); /*system("pause"); exit(1);*/ continue; } while ((fscanf(f, "%s", temp)) != EOF) { wordDelSpe(temp); j = 0; while (j < wordNum && (strcmp(sumWord[j].word, temp) != 0)) { j++; } if (j < wordNum) sumWord[j].textnum++;//文章数++ } fclose(f);//关闭文件 } } void calWeight(wordNode *sumWord, int wordNum) { int i; for (i = 0; i < wordNum; i++) sumWord[i].weight = (sumWord[i].time * 1.0 / sumWordNum)*log((N*1.0) / (sumWord[i].textnum + 1)); } int numWei(int n) { if (n / 10 == 0) return 1; else if (n / 100 == 0) return 2; else if (n / 1000 == 0) return 3; else return 4; } void fileNameMake(char file[][50], int n) { int i = 0, j = 0, i1, num; /*strcpy(file[0], "txt\\txt1.txt"); strcpy(file[1], "txt\\txt2.txt"); strcpy(file[2], "txt\\txt3.txt"); strcpy(file[3], "txt\\txt4.txt"); strcpy(file[4], "txt\\txt5.txt");*/ for (i = 0; i < n; i++) { strcpy(file[i], "fileLib\\\\txt"); num = numWei(i + 1); //cout << num << endl; i1 = i + 1; for (j = num - 1; j >= 0; j--) { *(file[i] + 12 + j) = i1 % 10 + 48; i1 = i1 / 10; } strcpy(file[i] + 12 + num, ".txt"); /*cout << file[i] << endl;*/ /*if (strcmp(file[i] , "txt\\\\txt00.txt")==0) cout << i << endl;*/ } } void saveNameMake(char savefile[][50], int n) { int i = 0, j = 0, i1, num; for (i = 0; i < n; i++) { strcpy(savefile[i], "savefile\\\\save"); num = numWei(i + 1); //cout << num << endl; i1 = i + 1; for (j = num - 1; j >= 0; j--) { *(savefile[i] + 14 + j) = i1 % 10 + 48; i1 = i1 / 10; } strcpy(savefile[i] + 14 + num, ".txt"); //cout << savefile[i] << endl; /*if (strcmp(file[i] , "txt\\\\txt00.txt")==0) cout << i << endl;*/ } } void textNameMake(char textfile[][50], int n) { int i = 0, j = 0, i1, num; for (i = 0; i < n; i++) { strcpy(textfile[i], "textfile\\\\text"); num = numWei(i + 1); //cout << num << endl; i1 = i + 1; for (j = num - 1; j >= 0; j--) { *(textfile[i] + 14 + j) = i1 % 10 + 48; i1 = i1 / 10; } strcpy(textfile[i] + 14 + num, ".txt"); //cout << textfile[i] << endl; /*if (strcmp(file[i] , "txt\\\\txt00.txt")==0) cout << i << endl;*/ } } void scoreArticle(char *file0,int k,char *answers, int &answerNum) { FILE *file; int i; if ((file = fopen(file0, "r")) == NULL) { //这里是绝对路径,基于XCode编译器查找方便的需求 printf("%s文件读取失败!", file0); system("pause"); exit(1); } int count = -1; while ((fscanf(file, "%s", temp)) != EOF&&temp[0] != '*') {}//※号提示题目 while ((fscanf(file, "%s", temp)) != EOF) { if (temp[0] == '*') break; if (!strcmp(temp, "A.") || !strcmp(temp, "B.") || !strcmp(temp, "C.") || !strcmp(temp, "D.")) { count++; continue; } wordDelSpe(temp); for (i = 0; i<strlen(temp); i++)//筛选并将字符串中的大写字母转化为小写字母 if (temp[i] >= 'A'&& temp[i] <= 'Z') temp[i] += 32; for (i = 0; i < wordNum; i++) { if (!strcmp(temp, sumWord[i].word)) score[count] += sumWord[i].weight; } } fscanf(file, "%s", temp); /*cout << temp[1] << "###"<<endl;*/ answers[k] = temp[1]; answerNum++; fclose(file);//关闭文件 } void doArticleLocal(char *file0) { FILE *file; int i; if ((file = fopen(file0, "r")) == NULL) { //这里是绝对路径,基于XCode编译器查找方便的需求 printf("%s文件读取失败!",file0); system("pause"); exit(1); } while ((fscanf(file, "%s", temp)) != EOF) { if (temp[strlen(temp) - 1] == '#') { /*cout << "遇到了#" << endl;*/ break; } wordDelSpe(temp); for (i = 0; i<strlen(temp); i++)//筛选并将字符串中的大写字母转化为小写字母 if (temp[i] >= 'A'&& temp[i] <= 'Z') temp[i] += 32; for (i = 0; i < wordNum; i++) { if (!strcmp(temp, sumWord[i].word)) { sumWord[i].weight *= 1.5; /*cout << "改了" << endl;*/ } } } while ((fscanf(file, "%s", temp)) != EOF&&temp[0] != '#') {}//再次遇到#号,最后一段 while ((fscanf(file, "%s", temp)) != EOF) { if (temp[0] == '*') { /*cout << "遇到了*" << endl;*/ break; } wordDelSpe(temp); for (i = 0; i<strlen(temp); i++)//筛选并将字符串中的大写字母转化为小写字母 if (temp[i] >= 'A'&& temp[i] <= 'Z') temp[i] += 32; for (i = 0; i < wordNum; i++) { if (!strcmp(temp, sumWord[i].word)) sumWord[i].weight *= 1.5; } } fclose(file);//关闭文件 } void doArticleAll(char *file0,char file[][50],char *savefile,int id,char *answers,int &answerNum,int &correctNum) { /*cout << "correctNum" << correctNum << endl;*/ int i; char ans; //答案 doArticle(file0); //处理题目文本 fileCount(file); //统计文件库中单词出现次数 calWeight(sumWord, wordNum); //计算权重 sortWord(); //排序 FILE *p = fopen(savefile, "w"); //文本输出 fprintf(p, " word \t词频\t文章数\t权重\n"); //输出到文档结果 fprintf(p, "本文共%d个词,%d个不重复词\n", sumWordNum, wordNum); for (i = 0; i < wordNum; i++) { fprintf(p, "%-16s\t%d\t%d\t%f\n", sumWord[i].word, sumWord[i].time, sumWord[i].textnum, sumWord[i].weight); } fclose(p); doArticleLocal(file0);//根据位置调整权值 scoreArticle(file0,id,answers,answerNum); std::cout << "第"<<id+1<<"题结果成功输出到文件:" << savefile << endl; std::cout << "成功得到结果:" << endl; for (int h = 0; h < 4; h++) { cout << score[h] << endl; } int max = 0; for (int k = 1; k < 4; k++) { if (score[k] > score[max]) max = k; } ans = 65 + max; cout << "答案是:" << ans ; if (answers[id] == ans) { cout <<" 正确"<<endl; correctNum++; } else cout << " 错误 (正确答案为:" << answers[id]<<")" << endl ; cout << endl << "-----------------------------" << endl; /*cout << "correctNum" << correctNum << endl;*/ } void main(int n, char *arg[]) { char answers[textN] = { '\0' }; int answerNum = 0,correctNum = 0, i = 0, j = 0; double corretPersent; //正确率 char textfile[textN][50] = { '\0' }; //题目名字 char savefile[textN][50] = { '\0' }; //保存文件 char file[N][50] = { '\0' }; //需要检索的文献 //char *savefile1 = "savefile\\save.txt"; //结果存放文档 char *LibStop = "stopLib\\stop.txt"; //停用词库 //char *file0 = "textfile\\text1.txt"; //char *file0 = "text2.txt"; clock_t start0, finish0; //程序运行时间 double sftime0; start0 = clock(); fileNameMake(file, N); textNameMake(textfile, textN); saveNameMake(savefile, textN); cout << endl; wordleng = DoLibStop(LibStop, Libword);//停用词处理 /*cout << file0 << "hah" << endl;*/ answerNum = 0; correctNum = 0; for(int k=0;k<textN;k++) doArticleAll(textfile[k], file, savefile[k],k,answers,answerNum,correctNum); /*cout << "correctNum" << correctNum << endl; cout << "answerNum" << answerNum << endl;*/ corretPersent = correctNum*100.0 / textN; printf("共%d篇文章,正确率为%.2f%%\n", textN, corretPersent); finish0 = clock(); sftime0 = (double)(finish0 - start0) / CLOCKS_PER_SEC;//计算用时 std::cout << endl<< "共用时间:" << sftime0 << "秒." << endl; system("pause"); }