hashtable的桶子buckets与节点nodes
hash table表格内的元素称为桶子,表格内的每个单元涵盖的不只是个节点,可能是一桶节点。
bucket维护的linked list不是STL的list或forward_list,而是自行维护的hashtable node,至于bucket聚合体则以vector完成,以便有动态扩充的能力。
hashtable的迭代器必须维系整个bucket vector的关系,并记录当前节点,++前进时如果当前节点在list尾端,则需要跳至下一个bucket。
hashtable迭代器没有后退功能,也没有定义逆向迭代器。
hashtable的模板参数较多,_Val节点实值类型,_Key节点键值类型,_HashFcn hash function的函数类型,_ExtractKey从节点中取出键值函数或仿函数,_EqualKey判断键值是否相同函数或仿函数,_Alloc内存配置器。
虽然开链法不要求表格大小必须为质数,但hashtable仍然以质数来设计大小,并且将28个质数计算好,以备随时访问,同时提供一个函数,用来查询这28个质数中最接近某数并大于某数的质数。
namespace __gnu_cxx _GLIBCXX_VISIBILITY(default)
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION
using std::size_t;
using std::ptrdiff_t;
using std::forward_iterator_tag;
using std::input_iterator_tag;
using std::_Construct;
using std::_Destroy;
using std::distance;
using std::vector;
using std::pair;
using std::__iterator_category;
template<class _Val>
struct _Hashtable_node //hashtable节点设计
{
_Hashtable_node* _M_next;
_Val _M_val;
};
template<class _Val, class _Key, class _HashFcn, class _ExtractKey,
class _EqualKey, class _Alloc = std::allocator<_Val> >
class hashtable;
template<class _Val, class _Key, class _HashFcn,
class _ExtractKey, class _EqualKey, class _Alloc>
struct _Hashtable_iterator;
//以下省略const迭代器
template<class _Val, class _Key, class _HashFcn,
class _ExtractKey, class _EqualKey, class _Alloc>
struct _Hashtable_iterator //hashtable的迭代器设计
{
typedef hashtable<_Val, _Key, _HashFcn, _ExtractKey, _EqualKey, _Alloc>
_Hashtable;
typedef _Hashtable_iterator<_Val, _Key, _HashFcn,
_ExtractKey, _EqualKey, _Alloc>
iterator;
typedef _Hashtable_const_iterator<_Val, _Key, _HashFcn,
_ExtractKey, _EqualKey, _Alloc>
const_iterator;
typedef _Hashtable_node<_Val> _Node;
typedef forward_iterator_tag iterator_category;
typedef _Val value_type;
typedef ptrdiff_t difference_type;
typedef size_t size_type;
typedef _Val& reference;
typedef _Val* pointer;
_Node* _M_cur; //迭代器所指节点
_Hashtable* _M_ht; //保持对容器的连接,可能需要从一个bucket跳到另一个bucket
_Hashtable_iterator(_Node* __n, _Hashtable* __tab)
: _M_cur(__n), _M_ht(__tab) { }
_Hashtable_iterator() { }
reference
operator*() const
{ return _M_cur->_M_val; }
pointer
operator->() const
{ return &(operator*()); }
//前进操作,如果当前节点是list的尾端,跳至下一个bucket
iterator&
operator++()
{
const _Node* __old = _M_cur;
_M_cur = _M_cur->_M_next;
if (!_M_cur) //_M_cur不存在则需要跳至下一个bucket
{
size_type __bucket = _M_ht->_M_bkt_num(__old->_M_val); //根据元素值定位下一个bucket
while (!_M_cur && ++__bucket < _M_ht->_M_buckets.size())
_M_cur = _M_ht->_M_buckets[__bucket];
}
return *this;
}
iterator
operator++(int)
{
iterator __tmp = *this;
++*this; //调用operator++()
return __tmp;
}
bool
operator==(const iterator& __it) const
{ return _M_cur == __it._M_cur; }
bool
operator!=(const iterator& __it) const
{ return _M_cur != __it._M_cur; }
};
// Note: assumes long is at least 32 bits.
enum { _S_num_primes = 29 };
template<typename _PrimeType>
struct _Hashtable_prime_list
{
static const _PrimeType __stl_prime_list[_S_num_primes];
static const _PrimeType*
_S_get_prime_list();
};
template<typename _PrimeType> const _PrimeType //28个质数
_Hashtable_prime_list<_PrimeType>::__stl_prime_list[_S_num_primes] =
{
5ul, 53ul, 97ul, 193ul, 389ul,
769ul, 1543ul, 3079ul, 6151ul, 12289ul,
24593ul, 49157ul, 98317ul, 196613ul, 393241ul,
786433ul, 1572869ul, 3145739ul, 6291469ul, 12582917ul,
25165843ul, 50331653ul, 100663319ul, 201326611ul, 402653189ul,
805306457ul, 1610612741ul, 3221225473ul, 4294967291ul
};
template<class _PrimeType> inline const _PrimeType*
_Hashtable_prime_list<_PrimeType>::_S_get_prime_list()
{
return __stl_prime_list;
}
//找出28个质数中,最接近并大于n的质数
inline unsigned long
__stl_next_prime(unsigned long __n)
{
const unsigned long* __first = _Hashtable_prime_list<unsigned long>::_S_get_prime_list();
const unsigned long* __last = __first + (int)_S_num_primes;
const unsigned long* pos = std::lower_bound(__first, __last, __n); //lower_bound是泛型算法,需要先排序
return pos == __last ? *(__last - 1) : *pos;
}
template<class _Val, class _Key, class _HashFcn,
class _ExtractKey, class _EqualKey, class _Alloc>
class hashtable //hashtable的数据结构
{
public:
typedef _Key key_type;
typedef _Val value_type;
typedef _HashFcn hasher;
typedef _EqualKey key_equal;
typedef size_t size_type;
typedef ptrdiff_t difference_type;
typedef value_type* pointer;
typedef const value_type* const_pointer;
typedef value_type& reference;
typedef const value_type& const_reference;
hasher
hash_funct() const
{ return _M_hash; }
key_equal
key_eq() const
{ return _M_equals; }
private:
typedef _Hashtable_node<_Val> _Node;
public:
typedef typename _Alloc::template rebind<value_type>::other allocator_type;
allocator_type
get_allocator() const
{ return _M_node_allocator; }
private:
typedef typename _Alloc::template rebind<_Node>::other _Node_Alloc;
typedef typename _Alloc::template rebind<_Node*>::other _Nodeptr_Alloc;
typedef vector<_Node*, _Nodeptr_Alloc> _Vector_type; //以vector集合
_Node_Alloc _M_node_allocator;
_Node*
_M_get_node() //节点配置
{ return _M_node_allocator.allocate(1); }
void
_M_put_node(_Node* __p) //节点释放
{ _M_node_allocator.deallocate(__p, 1); }
private:
hasher _M_hash; //hash仿函数
key_equal _M_equals; //hash仿函数
_ExtractKey _M_get_key; //hash仿函数
_Vector_type _M_buckets;
size_type _M_num_elements;
public:
//以下省略const版本
typedef _Hashtable_iterator<_Val, _Key, _HashFcn, _ExtractKey,
_EqualKey, _Alloc>
iterator;
friend struct
_Hashtable_iterator<_Val, _Key, _HashFcn, _ExtractKey, _EqualKey, _Alloc>;
public:
hashtable(size_type __n, const _HashFcn& __hf,
const _EqualKey& __eql, const _ExtractKey& __ext,
const allocator_type& __a = allocator_type())
: _M_node_allocator(__a), _M_hash(__hf), _M_equals(__eql),
_M_get_key(__ext), _M_buckets(__a), _M_num_elements(0)
{ _M_initialize_buckets(__n); }
hashtable(size_type __n, const _HashFcn& __hf,
const _EqualKey& __eql,
const allocator_type& __a = allocator_type())
: _M_node_allocator(__a), _M_hash(__hf), _M_equals(__eql),
_M_get_key(_ExtractKey()), _M_buckets(__a), _M_num_elements(0)
{ _M_initialize_buckets(__n); }
hashtable(const hashtable& __ht)
: _M_node_allocator(__ht.get_allocator()), _M_hash(__ht._M_hash),
_M_equals(__ht._M_equals), _M_get_key(__ht._M_get_key),
_M_buckets(__ht.get_allocator()), _M_num_elements(0)
{ _M_copy_from(__ht); }
hashtable&
operator= (const hashtable& __ht)
{
if (&__ht != this)
{
clear();
_M_hash = __ht._M_hash;
_M_equals = __ht._M_equals;
_M_get_key = __ht._M_get_key;
_M_copy_from(__ht);
}
return *this;
}
~hashtable()
{ clear(); }
size_type
size() const
{ return _M_num_elements; }
size_type
max_size() const
{ return size_type(-1); }
bool
empty() const
{ return size() == 0; }
void
swap(hashtable& __ht)
{
std::swap(_M_hash, __ht._M_hash);
std::swap(_M_equals, __ht._M_equals);
std::swap(_M_get_key, __ht._M_get_key);
_M_buckets.swap(__ht._M_buckets);
std::swap(_M_num_elements, __ht._M_num_elements);
}
//以下begin、end省略const版本
iterator
begin()
{
for (size_type __n = 0; __n < _M_buckets.size(); ++__n)
if (_M_buckets[__n])
return iterator(_M_buckets[__n], this);
return end();
}
iterator
end()
{ return iterator(0, this); }
template<class _Vl, class _Ky, class _HF, class _Ex, class _Eq,
class _Al>
friend bool
operator==(const hashtable<_Vl, _Ky, _HF, _Ex, _Eq, _Al>&,
const hashtable<_Vl, _Ky, _HF, _Ex, _Eq, _Al>&);
public:
size_type
bucket_count() const
{ return _M_buckets.size(); }
size_type
max_bucket_count() const
{ return _Hashtable_prime_list<unsigned long>::
_S_get_prime_list()[(int)_S_num_primes - 1];
}
size_type
elems_in_bucket(size_type __bucket) const
{
size_type __result = 0;
for (_Node* __n = _M_buckets[__bucket]; __n; __n = __n->_M_next)
__result += 1;
return __result;
}
pair<iterator, bool>
insert_unique(const value_type& __obj)
{
resize(_M_num_elements + 1);
return insert_unique_noresize(__obj);
}
iterator
insert_equal(const value_type& __obj)
{
resize(_M_num_elements + 1);
return insert_equal_noresize(__obj);
}
pair<iterator, bool>
insert_unique_noresize(const value_type& __obj)
{
const size_type __n = _M_bkt_num(__obj);
_Node* __first = _M_buckets[__n];
for (_Node* __cur = __first; __cur; __cur = __cur->_M_next)
if (_M_equals(_M_get_key(__cur->_M_val), _M_get_key(__obj)))
return pair<iterator, bool>(iterator(__cur, this), false);
_Node* __tmp = _M_new_node(__obj);
__tmp->_M_next = __first;
_M_buckets[__n] = __tmp;
++_M_num_elements;
return pair<iterator, bool>(iterator(__tmp, this), true);
}
iterator
insert_equal_noresize(const value_type& __obj)
{
const size_type __n = _M_bkt_num(__obj);
_Node* __first = _M_buckets[__n];
for (_Node* __cur = __first; __cur; __cur = __cur->_M_next)
if (_M_equals(_M_get_key(__cur->_M_val), _M_get_key(__obj)))
{
_Node* __tmp = _M_new_node(__obj);
__tmp->_M_next = __cur->_M_next;
__cur->_M_next = __tmp;
++_M_num_elements;
return iterator(__tmp, this);
}
_Node* __tmp = _M_new_node(__obj);
__tmp->_M_next = __first;
_M_buckets[__n] = __tmp;
++_M_num_elements;
return iterator(__tmp, this);
}
template<class _InputIterator>
void
insert_unique(_InputIterator __f, _InputIterator __l)
{ insert_unique(__f, __l, __iterator_category(__f)); }
template<class _InputIterator>
void
insert_equal(_InputIterator __f, _InputIterator __l)
{ insert_equal(__f, __l, __iterator_category(__f)); }
template<class _InputIterator>
void
insert_unique(_InputIterator __f, _InputIterator __l,
input_iterator_tag)
{
for ( ; __f != __l; ++__f)
insert_unique(*__f);
}
template<class _InputIterator>
void
insert_equal(_InputIterator __f, _InputIterator __l,
input_iterator_tag)
{
for ( ; __f != __l; ++__f)
insert_equal(*__f);
}
template<class _ForwardIterator>
void
insert_unique(_ForwardIterator __f, _ForwardIterator __l,
forward_iterator_tag)
{
size_type __n = distance(__f, __l);
resize(_M_num_elements + __n);
for ( ; __n > 0; --__n, ++__f)
insert_unique_noresize(*__f);
}
template<class _ForwardIterator>
void
insert_equal(_ForwardIterator __f, _ForwardIterator __l,
forward_iterator_tag)
{
size_type __n = distance(__f, __l);
resize(_M_num_elements + __n);
for ( ; __n > 0; --__n, ++__f)
insert_equal_noresize(*__f);
}
reference
find_or_insert(const value_type& __obj)
{
resize(_M_num_elements + 1);
size_type __n = _M_bkt_num(__obj);
_Node* __first = _M_buckets[__n];
for (_Node* __cur = __first; __cur; __cur = __cur->_M_next)
if (_M_equals(_M_get_key(__cur->_M_val), _M_get_key(__obj)))
return __cur->_M_val;
_Node* __tmp = _M_new_node(__obj);
__tmp->_M_next = __first;
_M_buckets[__n] = __tmp;
++_M_num_elements;
return __tmp->_M_val;
}
//以下find省略const版本
iterator
find(const key_type& __key)
{
size_type __n = _M_bkt_num_key(__key);
_Node* __first;
for (__first = _M_buckets[__n];
__first && !_M_equals(_M_get_key(__first->_M_val), __key);
__first = __first->_M_next)
{ }
return iterator(__first, this);
}
size_type
count(const key_type& __key) const
{
const size_type __n = _M_bkt_num_key(__key);
size_type __result = 0;
for (const _Node* __cur = _M_buckets[__n]; __cur;
__cur = __cur->_M_next)
if (_M_equals(_M_get_key(__cur->_M_val), __key))
++__result;
return __result;
}
//以下equal_range、erase省略const版本
pair<iterator, iterator>
equal_range(const key_type& __key)
{
typedef pair<iterator, iterator> _Pii;
const size_type __n = _M_bkt_num_key(__key);
for (_Node* __first = _M_buckets[__n]; __first;
__first = __first->_M_next)
if (_M_equals(_M_get_key(__first->_M_val), __key))
{
for (_Node* __cur = __first->_M_next; __cur;
__cur = __cur->_M_next)
if (!_M_equals(_M_get_key(__cur->_M_val), __key))
return _Pii(iterator(__first, this), iterator(__cur, this));
for (size_type __m = __n + 1; __m < _M_buckets.size(); ++__m)
if (_M_buckets[__m])
return _Pii(iterator(__first, this),
iterator(_M_buckets[__m], this));
return _Pii(iterator(__first, this), end());
}
return _Pii(end(), end());
}
size_type
erase(const key_type& __key)
{
const size_type __n = _M_bkt_num_key(__key);
_Node* __first = _M_buckets[__n];
_Node* __saved_slot = 0;
size_type __erased = 0;
if (__first)
{
_Node* __cur = __first;
_Node* __next = __cur->_M_next;
while (__next)
{
if (_M_equals(_M_get_key(__next->_M_val), __key))
{
if (&_M_get_key(__next->_M_val) != &__key)
{
__cur->_M_next = __next->_M_next;
_M_delete_node(__next);
__next = __cur->_M_next;
++__erased;
--_M_num_elements;
}
else
{
__saved_slot = __cur;
__cur = __next;
__next = __cur->_M_next;
}
}
else
{
__cur = __next;
__next = __cur->_M_next;
}
}
bool __delete_first = _M_equals(_M_get_key(__first->_M_val), __key);
if (__saved_slot)
{
__next = __saved_slot->_M_next;
__saved_slot->_M_next = __next->_M_next;
_M_delete_node(__next);
++__erased;
--_M_num_elements;
}
if (__delete_first)
{
_M_buckets[__n] = __first->_M_next;
_M_delete_node(__first);
++__erased;
--_M_num_elements;
}
}
return __erased;
}
void
erase(const iterator& __it)
{
_Node* __p = __it._M_cur;
if (__p)
{
const size_type __n = _M_bkt_num(__p->_M_val);
_Node* __cur = _M_buckets[__n];
if (__cur == __p)
{
_M_buckets[__n] = __cur->_M_next;
_M_delete_node(__cur);
--_M_num_elements;
}
else
{
_Node* __next = __cur->_M_next;
while (__next)
{
if (__next == __p)
{
__cur->_M_next = __next->_M_next;
_M_delete_node(__next);
--_M_num_elements;
break;
}
else
{
__cur = __next;
__next = __cur->_M_next;
}
}
}
}
}
void
erase(iterator __first, iterator __last)
{
size_type __f_bucket = __first._M_cur ? _M_bkt_num(__first._M_cur->_M_val)
: _M_buckets.size();
size_type __l_bucket = __last._M_cur ? _M_bkt_num(__last._M_cur->_M_val)
: _M_buckets.size();
if (__first._M_cur == __last._M_cur)
return;
else if (__f_bucket == __l_bucket)
_M_erase_bucket(__f_bucket, __first._M_cur, __last._M_cur);
else
{
_M_erase_bucket(__f_bucket, __first._M_cur, 0);
for (size_type __n = __f_bucket + 1; __n < __l_bucket; ++__n)
_M_erase_bucket(__n, 0);
if (__l_bucket != _M_buckets.size())
_M_erase_bucket(__l_bucket, __last._M_cur);
}
}
void
resize(size_type __num_elements_hint)
{
const size_type __old_n = _M_buckets.size();
if (__num_elements_hint > __old_n)
{
const size_type __n = _M_next_size(__num_elements_hint);
if (__n > __old_n)
{
_Vector_type __tmp(__n, (_Node*)(0), _M_buckets.get_allocator());
__try
{
for (size_type __bucket = 0; __bucket < __old_n; ++__bucket)
{
_Node* __first = _M_buckets[__bucket];
while (__first)
{
size_type __new_bucket = _M_bkt_num(__first->_M_val,
__n);
_M_buckets[__bucket] = __first->_M_next;
__first->_M_next = __tmp[__new_bucket];
__tmp[__new_bucket] = __first;
__first = _M_buckets[__bucket];
}
}
_M_buckets.swap(__tmp);
}
__catch(...)
{
for (size_type __bucket = 0; __bucket < __tmp.size();
++__bucket)
{
while (__tmp[__bucket])
{
_Node* __next = __tmp[__bucket]->_M_next;
_M_delete_node(__tmp[__bucket]);
__tmp[__bucket] = __next;
}
}
__throw_exception_again;
}
}
}
}
void
clear()
{
if (_M_num_elements == 0)
return;
for (size_type __i = 0; __i < _M_buckets.size(); ++__i)
{
_Node* __cur = _M_buckets[__i];
while (__cur != 0)
{
_Node* __next = __cur->_M_next;
_M_delete_node(__cur);
__cur = __next;
}
_M_buckets[__i] = 0;
}
_M_num_elements = 0;
}
private:
size_type
_M_next_size(size_type __n) const
{ return __stl_next_prime(__n); }
void
_M_initialize_buckets(size_type __n)
{
const size_type __n_buckets = _M_next_size(__n);
_M_buckets.reserve(__n_buckets);
_M_buckets.insert(_M_buckets.end(), __n_buckets, (_Node*) 0);
_M_num_elements = 0;
}
size_type
_M_bkt_num_key(const key_type& __key) const
{ return _M_bkt_num_key(__key, _M_buckets.size()); }
size_type
_M_bkt_num(const value_type& __obj) const
{ return _M_bkt_num_key(_M_get_key(__obj)); }
size_type
_M_bkt_num_key(const key_type& __key, size_t __n) const
{ return _M_hash(__key) % __n; }
size_type
_M_bkt_num(const value_type& __obj, size_t __n) const
{ return _M_bkt_num_key(_M_get_key(__obj), __n); }
_Node*
_M_new_node(const value_type& __obj)
{
_Node* __n = _M_get_node();
__n->_M_next = 0;
__try
{
this->get_allocator().construct(&__n->_M_val, __obj);
return __n;
}
__catch(...)
{
_M_put_node(__n);
__throw_exception_again;
}
}
void
_M_delete_node(_Node* __n)
{
this->get_allocator().destroy(&__n->_M_val);
_M_put_node(__n);
}
void
_M_erase_bucket(const size_type __n, _Node* __first, _Node* __last)
{
_Node* __cur = _M_buckets[__n];
if (__cur == __first)
_M_erase_bucket(__n, __last);
else
{
_Node* __next;
for (__next = __cur->_M_next;
__next != __first;
__cur = __next, __next = __cur->_M_next)
;
while (__next != __last)
{
__cur->_M_next = __next->_M_next;
_M_delete_node(__next);
__next = __cur->_M_next;
--_M_num_elements;
}
}
}
void
_M_erase_bucket(const size_type __n, _Node* __last)
{
_Node* __cur = _M_buckets[__n];
while (__cur != __last)
{
_Node* __next = __cur->_M_next;
_M_delete_node(__cur);
__cur = __next;
_M_buckets[__n] = __cur;
--_M_num_elements;
}
}
void
_M_copy_from(const hashtable& __ht)
{
_M_buckets.clear();
_M_buckets.reserve(__ht._M_buckets.size());
_M_buckets.insert(_M_buckets.end(), __ht._M_buckets.size(), (_Node*) 0);
__try
{
for (size_type __i = 0; __i < __ht._M_buckets.size(); ++__i) {
const _Node* __cur = __ht._M_buckets[__i];
if (__cur)
{
_Node* __local_copy = _M_new_node(__cur->_M_val);
_M_buckets[__i] = __local_copy;
for (_Node* __next = __cur->_M_next;
__next;
__cur = __next, __next = __cur->_M_next)
{
__local_copy->_M_next = _M_new_node(__next->_M_val);
__local_copy = __local_copy->_M_next;
}
}
}
_M_num_elements = __ht._M_num_elements;
}
__catch(...)
{
clear();
__throw_exception_again;
}
}
};
template<class _Val, class _Key, class _HF, class _Ex, class _Eq, class _All>
bool
operator==(const hashtable<_Val, _Key, _HF, _Ex, _Eq, _All>& __ht1,
const hashtable<_Val, _Key, _HF, _Ex, _Eq, _All>& __ht2)
{
typedef typename hashtable<_Val, _Key, _HF, _Ex, _Eq, _All>::_Node _Node;
if (__ht1._M_buckets.size() != __ht2._M_buckets.size())
return false;
for (size_t __n = 0; __n < __ht1._M_buckets.size(); ++__n)
{
_Node* __cur1 = __ht1._M_buckets[__n];
_Node* __cur2 = __ht2._M_buckets[__n];
// Check same length of lists
for (; __cur1 && __cur2;
__cur1 = __cur1->_M_next, __cur2 = __cur2->_M_next)
{ }
if (__cur1 || __cur2)
return false;
// Now check one's elements are in the other
for (__cur1 = __ht1._M_buckets[__n] ; __cur1;
__cur1 = __cur1->_M_next)
{
bool _found__cur1 = false;
for (__cur2 = __ht2._M_buckets[__n];
__cur2; __cur2 = __cur2->_M_next)
{
if (__cur1->_M_val == __cur2->_M_val)
{
_found__cur1 = true;
break;
}
}
if (!_found__cur1)
return false;
}
}
return true;
}
template<class _Val, class _Key, class _HF, class _Ex, class _Eq, class _All>
inline bool
operator!=(const hashtable<_Val, _Key, _HF, _Ex, _Eq, _All>& __ht1,
const hashtable<_Val, _Key, _HF, _Ex, _Eq, _All>& __ht2)
{ return !(__ht1 == __ht2); }
template<class _Val, class _Key, class _HF, class _Extract, class _EqKey,
class _All>
inline void
swap(hashtable<_Val, _Key, _HF, _Extract, _EqKey, _All>& __ht1,
hashtable<_Val, _Key, _HF, _Extract, _EqKey, _All>& __ht2)
{ __ht1.swap(__ht2); }
_GLIBCXX_END_NAMESPACE_VERSION
} // namespace