之前在大学学习数据结构的时候,学过数组和链表。数组的优点就是可以直接定位,速度快,但是缺点就是插入删除,效率就慢了很多。链表的可以快速的插入删除,但是不能直接定位,需要遍历才可以。他们使用在不同的场景下面。
有时候,又想快速的插入,又想快速的定位怎么办?这就需要把这两者优点糅合起来,哈希表就这么产生了。
哈希表的原理:一个动态数组,保存着元素的地址。通过一个公式,把元素的key(唯一),算出一个数值index。数组index位置就保存这个元素的指针。
这样存数据以及取数据,都可以通过这个公式,得到同一个位置。这个公式的目的,就是将key和数组的位置映射起来,当然没有任何的一个公式能够保证算出来的key和数组位置是一一对应的,也就是说,多个key算出来的index结果是同一个,这就是哈希冲突,稍后具体怎么解决它。
整个哈希表,最重要的就是这个映射公式,不但能够将字符串变成数值,而且能够平均分布。最常用的就是Times33算法
1
2
3
4
5
6
7
8
9
10
11
12
13
|
inline
UINT
CMyMap::HashKey( LPCTSTR
key) const
{ UINT
nHash = 5381;
while
(*key)
nHash = (nHash << 5) + nHash + *key++; return
nHash;
} |
5381是一个经验值,php就是使用这个值。
下面就是我写的代码。
1 #pragma once 2 #include "Bucket.h" 3 class HashTable 4 { 5 public: 6 HashTable(void); 7 ~HashTable(void); 8 static unsigned long hash_inline(const char* arKey,unsigned int nKeyLength); 9 bool add(const char* arKey,void* value); 10 void* getData(const char* arKey); 11 bool remove(const char* arKey); 12 bool resize(unsigned int nSize); 13 unsigned int nTableSize; 14 unsigned int nTableMask; 15 unsigned int nNumOfElements; 16 Bucket *pBucketCursor; 17 Bucket **arBuckets; 18 Bucket *pListHead; 19 Bucket *pListTail; 20 private: 21 void init(); 22 }; 23 #include "stdafx.h" 24 #include "HashTable.h" 25 #include <iostream> 26 #include <string.h> 27 using namespace std; 28 typedef unsigned long ulong; 29 typedef unsigned int uint; 30 31 HashTable::HashTable(void) 32 { 33 init(); 34 } 35 36 HashTable::~HashTable(void) 37 { 38 delete [] arBuckets; 39 } 40 41 ulong HashTable::hash_inline( const char* arKey,uint nKeyLength ) 42 { 43 register ulong hash=5381; 44 // for (; nKeyLength >= 8; nKeyLength -= 8) 45 // { 46 // const char *arKeyTemp=arKey; 47 // for(int i=0;i!=8;++i){ 48 // hash = ((hash << 5) + hash) + *arKey++; 49 // if(*arKey){ 50 // arKey=arKeyTemp; 51 // } 52 // } 53 // 54 // } 55 // switch (nKeyLength) 56 // { 57 // case 7: hash = ((hash << 5) + hash) + *arKey++; /* fallthrough... */ 58 // case 6: hash = ((hash << 5) + hash) + *arKey++; /* fallthrough... */ 59 // case 5: hash = ((hash << 5) + hash) + *arKey++; /* fallthrough... */ 60 // case 4: hash = ((hash << 5) + hash) + *arKey++; /* fallthrough... */ 61 // case 3: hash = ((hash << 5) + hash) + *arKey++; /* fallthrough... */ 62 // case 2: hash = ((hash << 5) + hash) + *arKey++; /* fallthrough... */ 63 // case 1: hash = ((hash << 5) + hash) + *arKey++; break; 64 // case 0: break; 65 // } 66 while (*arKey) 67 { 68 hash=(hash<<5)+hash+*arKey++; 69 } 70 return hash; 71 } 72 73 void HashTable::init() 74 { 75 nTableSize=8; 76 nTableMask=nTableSize-1; 77 arBuckets=new Bucket*[nTableSize](); 78 nNumOfElements=0; 79 } 80 81 bool HashTable::add(const char* arKey,void* value ) 82 { 83 ulong h=hash_inline(arKey,nTableSize); 84 uint nIndex=nTableMask&h; 85 Bucket *pbucket=arBuckets[nIndex]; 86 if (pbucket==NULL) 87 { 88 pbucket=new Bucket(arKey,value,h); 89 arBuckets[nIndex]=pbucket; 90 ++nNumOfElements; 91 if(nNumOfElements==1) 92 { 93 pListHead=pbucket; 94 pListTail=pbucket; 95 }else{ 96 pbucket->pListPre=pListTail; 97 pListTail->pListNext=pbucket; 98 pListTail=pbucket; 99 } 100 //cout<<"add key "<<pbucket->arKey<<endl; 101 }else{ 102 if(strcmp(pbucket->arKey,arKey)==0){ 103 //cout<<"key "<<arKey<<" is existed"<<endl; 104 return false; 105 }else{ 106 Bucket *pNewBucket=new Bucket(arKey,value,h); 107 while (pbucket->pNext!=NULL) 108 { 109 pbucket=pbucket->pNext; 110 } 111 pbucket->pNext=pNewBucket; 112 pNewBucket->pPre=pbucket; 113 pNewBucket->pListPre=pListTail; 114 pListTail->pListNext=pNewBucket; 115 pListTail=pNewBucket; 116 //cout<<"key "<<pbucket->arKey<<" next key is "<<pNewBucket->arKey<<endl; 117 ++nNumOfElements; 118 } 119 } 120 if(nNumOfElements>=nTableSize){ 121 resize(nTableSize*2); 122 } 123 return true; 124 } 125 126 bool HashTable::resize( unsigned int nSize ) 127 { 128 Bucket **arPBucketTemp=new Bucket*[nSize](); 129 Bucket *pListHeadTemp=NULL; 130 Bucket *pListTailTemp=NULL; 131 nTableSize=nSize; 132 nTableMask=nTableSize-1; 133 Bucket *pBucketCursorTemp=pListHead; 134 uint nNumOfElementsTemp=0; 135 //cout<<"--------------rehash-----------------"<<endl; 136 //cout<<"resize size:"<<nSize<<endl; 137 while (pBucketCursorTemp!=NULL) 138 { 139 /*//cout<<"resize pBucket key:"<<pBucketCursorTemp->arKey<<endl;*/ 140 Bucket *pbucket=pBucketCursorTemp; 141 pBucketCursorTemp=pBucketCursorTemp->pListNext; 142 ulong h=hash_inline(pbucket->arKey,nTableSize); 143 pbucket->nKeyLength=nTableSize; 144 pbucket->h=h; 145 uint nIndex=h&nTableMask; 146 Bucket *pbucketindex=arPBucketTemp[nIndex]; 147 if (pbucketindex==NULL) 148 { 149 arPBucketTemp[nIndex]=pbucket; 150 ++nNumOfElementsTemp; 151 if(nNumOfElementsTemp==1) 152 { 153 pListHeadTemp=pbucket; 154 pListTailTemp=pbucket; 155 pbucket->pListPre=NULL; 156 pbucket->pListNext=NULL; 157 pbucket->pNext=NULL; 158 pbucket->pPre=NULL; 159 }else{ 160 pbucket->pListPre=pListTailTemp; 161 pbucket->pListNext=NULL; 162 pbucket->pPre=NULL; 163 pbucket->pNext=NULL; 164 pListTailTemp->pListNext=pbucket; 165 pListTailTemp=pbucket; 166 } 167 }else{ 168 if(strcmp(pbucket->arKey,pbucketindex->arKey)==0){ 169 //cout<<"key "<<pbucket->arKey<<" is existed"<<endl; 170 return false; 171 }else{ 172 while (pbucketindex->pNext!=NULL) 173 { 174 pbucketindex=pbucketindex->pNext; 175 } 176 pbucketindex->pNext=pbucket; 177 pbucket->pPre=pbucketindex; 178 pbucket->pNext=NULL; 179 pbucket->pListPre=pListTailTemp; 180 pbucket->pListNext=NULL; 181 pListTailTemp->pListNext=pbucket; 182 pListTailTemp=pbucket; 183 /*//cout<<"key "<<pbucketindex->arKey<<" next key is "<<pbucketindex->pNext->arKey<<endl;*/ 184 ++nNumOfElementsTemp; 185 } 186 } 187 188 } 189 delete [] arBuckets; 190 arBuckets=arPBucketTemp; 191 pListTail=pListTailTemp; 192 pListHead=pListHeadTemp; 193 nNumOfElements=nNumOfElementsTemp; 194 //cout<<"--------------rehash end-----------------"<<endl; 195 return false; 196 } 197 198 void* HashTable::getData( const char* arKey ) 199 { 200 ulong h=hash_inline(arKey,nTableSize); 201 ulong nIndex=h&nTableMask; 202 Bucket *pbucket=arBuckets[nIndex]; 203 if(pbucket==NULL){ 204 return NULL; 205 }else{ 206 while (pbucket!=NULL) 207 { 208 if(strcmp(pbucket->arKey,arKey)==0){ 209 return pbucket->pData; 210 }else{ 211 pbucket=pbucket->pNext; 212 } 213 } 214 return NULL; 215 } 216 } 217 218 bool HashTable::remove( const char* arKey ) 219 { 220 ulong h=hash_inline(arKey,nTableSize); 221 ulong nIndex=h&nTableMask; 222 Bucket *pbucket=arBuckets[nIndex]; 223 if(pbucket==NULL){ 224 return false; 225 }else{ 226 while (pbucket!=NULL) 227 { 228 if(strcmp(pbucket->arKey,arKey)==0){ 229 if(pbucket==pListHead){ 230 pListHead=pbucket->pListNext; 231 if(pListHead!=NULL){ 232 pbucket->pListNext->pListPre=NULL; 233 } 234 } 235 if(pbucket==pListTail){ 236 pListTail=pbucket->pListPre; 237 if(pListTail!=NULL){ 238 pListTail->pListNext=NULL; 239 } 240 } 241 if(pbucket->pListPre!=NULL){ 242 pbucket->pListPre->pListNext=pbucket->pListNext; 243 } 244 if(pbucket->pListNext!=NULL){ 245 pbucket->pListNext->pListPre=pbucket->pListPre; 246 } 247 if(pbucket->pPre!=NULL){ 248 pbucket->pPre->pNext=pbucket->pNext; 249 }else{ 250 arBuckets[nIndex]=pbucket->pNext; 251 } 252 if(pbucket->pNext!=NULL){ 253 pbucket->pNext->pPre=pbucket->pPre; 254 } 255 --nNumOfElements; 256 delete pbucket; 257 return true; 258 }else{ 259 pbucket=pbucket->pNext; 260 } 261 } 262 return false; 263 } 264 }
1 #pragma once 2 class Bucket 3 { 4 public: 5 Bucket(const char* arKey,void *value,unsigned long h); 6 ~Bucket(void); 7 unsigned long h; 8 unsigned int nKeyLength; 9 void *pData; 10 const char *arKey; 11 Bucket *pNext; 12 Bucket *pPre; 13 Bucket *pListNext; 14 Bucket *pListPre; 15 }; 16 #include "stdafx.h" 17 #include "Bucket.h" 18 19 20 Bucket::Bucket(const char* _arKey,void *value,unsigned long h):arKey(_arKey) 21 { 22 this->pData=value; 23 this->h=h; 24 this->pNext=NULL; 25 this->pPre=NULL; 26 this->pListNext=NULL; 27 this->pListPre=NULL; 28 } 29 30 31 Bucket::~Bucket(void) 32 { 33 }
1 // hashtables.cpp : 定义控制台应用程序的入口点。 2 // 3 4 #include "stdafx.h" 5 #include <iostream> 6 #include "HashTable.h" 7 using namespace std; 8 9 int _tmain(int argc, _TCHAR* argv[]) 10 { 11 HashTable ht; 12 ht.add("name","max"); 13 ht.add("name","max"); 14 ht.add("name2","max2"); 15 ht.add("name3","max3"); 16 ht.add("name4","max4"); 17 ht.add("name5","max5"); 18 ht.add("name6","max6"); 19 ht.add("name7","max7"); 20 ht.add("name8","max8"); 21 ht.add("name9","max9"); 22 ht.add("name10","max10"); 23 ht.add("name11","max11"); 24 ht.add("name12","max12"); 25 ht.add("name13","max13"); 26 ht.add("name14","max14"); 27 ht.add("name15","max15"); 28 ht.add("name16","max16"); 29 ht.add("name17","max17"); 30 ht.add("name18","max18"); 31 ht.add("name19","max19"); 32 ht.add("name20","max20"); 33 ht.add("name21","max21"); 34 ht.add("name22","max22"); 35 ht.add("name23","max23"); 36 ht.add("name24","max24"); 37 ht.add("name25","max25"); 38 ht.add("name26","max26"); 39 ht.add("name27","max27"); 40 ht.add("name28","max28"); 41 ht.add("name29","max29"); 42 cout<<"------------------------------"<<endl; 43 Bucket *bucket=ht.pListHead; 44 while(bucket!=NULL) 45 { 46 cout<<"bucket key:"<<bucket->arKey<<";bucket value:"<<(char*)bucket->pData<<";"<<endl; 47 bucket=bucket->pListNext; 48 } 49 cout<<"------------------------------"<<endl; 50 bucket=ht.pListTail; 51 while(bucket!=NULL) 52 { 53 cout<<"bucket key:"<<bucket->arKey<<";bucket value:"<<(char*)bucket->pData<<";"<<endl; 54 bucket=bucket->pListPre; 55 } 56 57 cout<<"------------------------------"<<endl; 58 for(int i=0;i!=32;++i){ 59 Bucket *pbucket=ht.arBuckets[i]; 60 while(pbucket!=NULL){ 61 cout<<"bucket key:"<<pbucket->arKey<<";index:"<<i<<endl; 62 pbucket=pbucket->pNext; 63 } 64 } 65 cout<<"------------------------------"<<endl; 66 cout<<"bucket key "<<"name getData:"<<(char*)ht.getData("name")<<endl; 67 cout<<"bucket key "<<"name1 getData:"<<ht.getData("name1")<<endl; 68 cout<<"bucket key "<<"name5 getData:"<<(char*)ht.getData("name5")<<endl; 69 cout<<"bucket key "<<"name10 getData:"<<(char*)ht.getData("name10")<<endl; 70 cout<<"bucket key "<<"name15 getData:"<<(char*)ht.getData("name15")<<endl; 71 cout<<"bucket key "<<"name20 getData:"<<(char*)ht.getData("name20")<<endl; 72 cout<<"bucket key "<<"name29 getData:"<<(char*)ht.getData("name29")<<endl; 73 cout<<"------------------------------"<<endl; 74 cout<<"bucket key "<<"name delete:"<<ht.remove("name")<<endl; 75 cout<<"bucket key "<<"name1 delete:"<<ht.remove("name1")<<endl; 76 cout<<"bucket key "<<"name15 delete:"<<ht.remove("name15")<<endl; 77 cout<<"bucket key "<<"name20 delete:"<<ht.remove("name20")<<endl; 78 cout<<"bucket key "<<"name10 delete:"<<ht.remove("name10")<<endl; 79 cout<<"bucket key "<<"name5 delete:"<<ht.remove("name5")<<endl; 80 cout<<"bucket key "<<"name29 delete:"<<ht.remove("name29")<<endl; 81 cout<<"------------------------------"<<endl; 82 bucket=ht.pListHead; 83 while(bucket!=NULL) 84 { 85 cout<<"bucket key:"<<bucket->arKey<<";bucket value:"<<(char*)bucket->pData<<";"<<endl; 86 bucket=bucket->pListNext; 87 } 88 cout<<"------------------------------"<<endl; 89 for(int i=0;i!=32;++i){ 90 Bucket *pbucket=ht.arBuckets[i]; 91 while(pbucket!=NULL){ 92 cout<<"bucket key:"<<pbucket->arKey<<";index:"<<i<<endl; 93 pbucket=pbucket->pNext; 94 } 95 } 96 cout<<"elemetes num "<<ht.nNumOfElements<<endl; 97 cout<<"------------------------------"<<endl; 98 int num=10000500; 99 for(int i=0;i!=num;++i){ 100 char *key=new char[10](); 101 for(int j=0;j!=9;++j){ 102 int c=rand()%26+97; 103 key[j]=c; 104 } 105 key[9]=0; 106 ht.add(key,key); 107 } 108 cout<<"------------------------------"<<endl; 109 // ht.add("name30","max30"); 110 // ht.add("name31","max31"); 111 // ht.add("name32","max32"); 112 // ht.add("name33","max33"); 113 // ht.add("name34","max34"); 114 // ht.add("name35","max35"); 115 // ht.add("name36","max3"); 116 // for(int i=0;i!=32;++i){ 117 // Bucket *pbucket=ht.arBuckets[i]; 118 // while(pbucket!=NULL){ 119 // cout<<"bucket key:"<<pbucket->arKey<<";index:"<<i<<endl; 120 // pbucket=pbucket->pNext; 121 // } 122 // } 123 // cout<<"elemetes num "<<ht.nNumOfElements<<endl; 124 // cout<<"------------------------------"<<endl; 125 // num=5000; 126 // for(int i=0;i!=num;++i){ 127 // char *key=new char[10](); 128 // for(int j=0;j!=9;++j){ 129 // int c=rand()%26+97; 130 // key[j]=c; 131 // } 132 // key[9]=0; 133 // cout<<"main key:"<<key<<endl; 134 // ht.add(key,key); 135 // cout<<"main end key:"<<key<<endl; 136 // } 137 // cout<<"elemetes num "<<ht.nNumOfElements<<endl; 138 // cout<<"------------------------------"<<endl; 139 // return 0; 140 }