统计文件中各单词出现的频率(Hash表实现)

统计文件中各单词出现的频率(Hash表实现)

#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<ctype.h>

#define MAX 131

typedef struct HashTable
{
	char word[50];
	int count;
	struct HashTable *next;
}HashTable;

HashTable hash[MAX];

int HashIndex(char *str)
{
	int sum=0;
	int i;
	for(i=0;str[i];i++)
		sum=sum+str[i];
	return sum%MAX;
}

void InitHash(HashTable hash[],int n)
{
	int i;
	for(i=0;i<n;i++)
	{
		hash[i].count=0;
		hash[i].next=NULL;
	}
}

void CTable(char *str)
{
	int flag=0;
	int index;
	HashTable *p,*q;
	index=HashIndex(str);
	if(hash[index].count==0||strcmp(hash[index].word,str)==0)
	{
		hash[index].count++;
		strcpy(hash[index].word,str);
	}
	else
	{
		if(hash[index].next==NULL)
		{
			p=(HashTable *)malloc(sizeof(HashTable));
			if(!p)
			{
				printf("No enough memory!\n");
				exit(-1);
			}
			p->count=1;
			p->next=NULL;
			strcpy(p->word,str);
			hash[index].next=p;
		}
		else
		{
			q=hash[index].next;
			while(q!=NULL)
			{
				if(strcmp(q->word,str)==0)
				{
					q->count++;
					flag=1;
					break;
				}
				else
					q=q->next;
			}
			if(flag==0)
			{
				q=(HashTable *)malloc(sizeof(HashTable));
				if(!q)
				{
					printf("No enough memory!\n");
					exit(-1);
				}
				q->count=1;
				q->next=NULL;
				strcpy(q->word,str);
				q->next=hash[index].next;
				hash[index].next=q;
			}
		}
	}
}

void OutPut(HashTable hash[],FILE *fp)
{
	int i;
	HashTable *p,*q;
	for(i=0;i<MAX;i++)
	{
		if(hash[i].count!=0)
			fprintf(fp,"%4d %s\n",hash[i].count ,hash[i].word);
		if(hash[i].next)
		{
			p=hash[i].next;
			while(p)
			{
				fprintf(fp,"%4d %s\n",p->count,p->word);
			    q=p;
				p=p->next;
				free(q);
				q=NULL;
			}
		}
	}
}

void GetWord(FILE *fp,int lim,char word[])
{
	char *w=word;
	int c;
	while(isspace(c=getc(fp)))
		;
	if(c!=EOF)
		*word++=c;
	if(!isalpha(c))
	{
		*word=‘\0‘;
		return ;
	}
	for( ;--lim>0;word++)
		if(!isalnum(*word=getc(fp)))
			break;
		*word=‘\0‘;
		word=w;
}

int main(int argc,char *argv[])
{
	FILE *in,*out;
	char w[MAX];
	unsigned int allwords=0;
	InitHash(hash,MAX);
	in=fopen(argv[1],"r");
	if(in==NULL)
	{
		printf("Can not open the file!\n");
		exit(-1);
	}
	out=fopen(argv[2],"w");
	if(out==NULL)
	{
		printf("Can not open the file!\n");
		exit(-1);
	}
	while(!feof(in))
	{
		GetWord(in,MAX,w);
		if(isalpha(w[0]))
		{
			CTable(w);
			allwords++;
		}
	}
	OutPut(hash,out);
	printf("The all words are: %d\n",allwords);
	fclose(in);
	fclose(out);
	return 0;
}



统计文件中各单词出现的频率(Hash表实现)

上一篇:EntityFramework之领域驱动设计实践【从DataTable到EntityObject】


下一篇:统计文件中各单词出现的频率(二叉排序树实现)