数据结构
type hmap struct {
count int //键值对的个数
flags uint8 //状态值
B uint8 //桶的幂数,桶的数量为2^B
noverflow uint16 //溢出桶的数量
hash0 uint32 // hash seed 哈希种子
buckets unsafe.Pointer // 哈希表数组,底层为[]bmap的首地址
oldbuckets unsafe.Pointer // 旧哈希表数组,底层为[]bmap的首地址
nevacuate uintptr // 当前迁移进度
extra *mapextra // 溢出桶
}
溢出桶的管理
type mapextra struct {
overflow *[]*bmap
oldoverflow *[]*bmap
nextOverflow *bmap //下一个空闲溢出桶
}
键值对存放在bmap上,bmap的数据结构
key值和value值是分别存放在一个长度为8的数组中,这样就不会因为内存填充的原因浪费内存
type bmap struct {
tophash [bucketCnt]uint8 //key高8位哈希值
keys [bucketCnt]key //存放key,直接操作内存控制,结构体没有
values [bucketCnt]value //存放vaule,直接操作内存控制,结构体没有
overflow *bmap //下一个溢出桶
}
以上是map核心数据结构,可以看成map将k-v键值,和对应的高8位哈希值对存放在bmap中,当产生哈希冲突时采用的拉链法解决哈希冲突,在bmap上链入下一个桶,这个桶称溢出桶的概念
初始化
//t map的类型,存放K和V的类型,这样就是可以确定bmap是存放多少字节了,计算方法是16+8*K+8*V
//就是(哈希高8位+K的内存大小+V的内存大小)*8+下一个溢出桶的地址
//hint是用户初始化的大小
func makemap(t *maptype, hint int, h *hmap) *hmap {
mem, overflow := math.MulUintptr(uintptr(hint), t.bucket.size)
if overflow || mem > maxAlloc {
hint = 0
}
// initialize Hmap
if h == nil {
//初始化hmap,赋0值
h = new(hmap)
}
//获取哈希种子
h.hash0 = fastrand()
// Find the size parameter B which will hold the requested # of elements.
// For hint < 0 overLoadFactor returns false since hint < bucketCnt.
B := uint8(0)
//计算桶的长度
for overLoadFactor(hint, B) {
B++
}
h.B = B
// allocate initial hash table
// if B == 0, the buckets field is allocated lazily later (in mapassign)
// If hint is large zeroing this memory could take a while.
if h.B != 0 {
var nextOverflow *bmap
//计算buckets的地址和空闲溢出桶的首地址
h.buckets, nextOverflow = makeBucketArray(t, h.B, nil)
if nextOverflow != nil {
h.extra = new(mapextra)
h.extra.nextOverflow = nextOverflow
}
}
return h
}
计算长度
这也是计算是否需要扩容的条件
func overLoadFactor(count int, B uint8) bool {
//如果当前元素大于8个并且count>6.5*(2^B)则需要扩容,则B++
return count > bucketCnt && uintptr(count) > loadFactorNum*(bucketShift(B)/loadFactorDen)
}
这里的疑问
- 是如果count<8则怎么办,这里是直接返回的
// 实际是返回1<<b ,即2^B
func bucketShift(b uint8) uintptr {
// 用位运算就是为了解决溢出问题为0造成死循环问题b & (sys.PtrSize*8 - 1)
return uintptr(1) << (b & (sys.PtrSize*8 - 1))
}
开启初始化桶
计算了桶的长度B后,如果B!=0开始初始化桶
//t map的类型,存放K和V的类型,这样就是可以确定bmap是存放多少字节了,计算方法是16+8*K+8*V
//就是(哈希高8位+K的内存大小+V的内存大小)*8+下一个溢出桶的地址
//b是用户初始化的大小
func makeBucketArray(t *maptype, b uint8, dirtyalloc unsafe.Pointer) (buckets unsafe.Pointer, nextOverflow *bmap) {
//计算桶的长度2^b
base := bucketShift(b)
nbuckets := base
//如果b的大小大于等于4
if b >= 4 {
//创建空闲溢出桶,追加溢出桶
nbuckets += bucketShift(b - 4)
//计算溢出桶+原本buckets的大小
sz := t.bucket.size * nbuckets
//重新分配规格内存,以GoLang的规则分配内存
up := roundupsize(sz)
if up != sz {
//计算总桶数(空闲溢出桶+2^B)
nbuckets = up / t.bucket.size
}
}
if dirtyalloc == nil {
//分配内存空间,调用malloc函数,返回桶数组的地址
buckets = newarray(t.bucket, int(nbuckets))
} else {
// dirtyalloc was previously generated by
// the above newarray(t.bucket, int(nbuckets))
// but may not be empty.
buckets = dirtyalloc
size := t.bucket.size * nbuckets
if t.bucket.ptrdata != 0 {
memclrHasPointers(buckets, size)
} else {
memclrNoHeapPointers(buckets, size)
}
}
//如果有溢出桶
if base != nbuckets {
//获取第一个溢出桶的地址
nextOverflow = (*bmap)(add(buckets, base*uintptr(t.bucketsize)))
//获取最后一个溢出桶的地址
last := (*bmap)(add(buckets, (nbuckets-1)*uintptr(t.bucketsize)))
//设置最后一个溢出桶中的下一个桶的*(bmap)设为第一个buckets的首地址,这样设置目的是表示没有空闲溢出了
last.setoverflow(t, (*bmap)(buckets))
}
//返回buckets的地址,和溢出桶的地址
return buckets, nextOverflow
}
重新计算内存大小
// Returns size of the memory block that mallocgc will allocate if you ask for the size.
func roundupsize(size uintptr) uintptr {
if size < _MaxSmallSize {
//通过查表法找比size大的内存
if size <= smallSizeMax-8 {
return uintptr(class_to_size[size_to_class8[divRoundUp(size, smallSizeDiv)]])
} else {
return uintptr(class_to_size[size_to_class128[divRoundUp(size-smallSizeMax, largeSizeDiv)]])
}
}
if size+_PageSize < size {
return size
}
//
return alignUp(size, _PageSize)
}
//返回(n+2^(13)-1)低13位摸0的的值
func alignUp(n, a uintptr) uintptr {
return (n + a - 1) &^ (a - 1)
}
计算p+x的地址
func add(p unsafe.Pointer, x uintptr) unsafe.Pointer {
return unsafe.Pointer(uintptr(p) + x)
}
设置bmap中overflow的值
func (b *bmap) setoverflow(t *maptype, ovf *bmap) {
//获取b.overflow的地址,其内容的类型是*bmap,则其地址类型为*(*bmap),在赋值为ovf
//相当于b.overflow=ovf
*(**bmap)(add(unsafe.Pointer(b), uintptr(t.bucketsize)-sys.PtrSize)) = ovf
}
初始化map的代码已经分析完了
map的读取
以string为key为主
func mapaccess1_faststr(t *maptype, h *hmap, ky string) unsafe.Pointer {
if raceenabled && h != nil {
callerpc := getcallerpc()
racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess1_faststr))
}
if h == nil || h.count == 0 {
//如果为空值或元素个数为0时则返回0值
return unsafe.Pointer(&zeroVal[0])
}
if h.flags&hashWriting != 0 {
//不支持并发写读
throw("concurrent map read and map write")
}
//获取字符串类型
key := stringStructOf(&ky)
if h.B == 0 {
// One-bucket table.
//如果只有一个桶
b := (*bmap)(h.buckets)
if key.len < 32 {
// short key, doing lots of comparisons is ok
for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
//如果key的长度小于32则直接字符串匹配或比较地址值
k := (*stringStruct)(kptr)
if k.len != key.len || isEmpty(b.tophash[i]) {
if b.tophash[i] == emptyRest {
break
}
continue
}
if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) {
return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.elemsize))
}
}
return unsafe.Pointer(&zeroVal[0])
}
// long key, try not to do more comparisons than necessary
keymaybe := uintptr(bucketCnt)
//keymaybe为潜在值为一样的下标
for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
k := (*stringStruct)(kptr)
if k.len != key.len || isEmpty(b.tophash[i]) {
//跳过长度不一样的key
if b.tophash[i] == emptyRest {
break
}
continue
}
if k.str == key.str {
//如果地址一样则直接返回该bmap中k对应v的地址值
return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.elemsize))
}
// check first 4 bytes
if *((*[4]byte)(key.str)) != *((*[4]byte)(k.str)) {
continue
}
// check last 4 bytes
if *((*[4]byte)(add(key.str, uintptr(key.len)-4))) != *((*[4]byte)(add(k.str, uintptr(key.len)-4))) {
continue
}
if keymaybe != bucketCnt {
//如果找到两个潜在字符串key则在dohash处理
// Two keys are potential matches. Use hash to distinguish them.
goto dohash
}
//如果是第一个,并且前4个字节一样并且后4个字节也一样,则是潜在相同key
keymaybe = i
}
if keymaybe != bucketCnt {
//找到潜在相同key
k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+keymaybe*2*sys.PtrSize))
//比较两个字符串是否相同
if memequal(k.str, key.str, uintptr(key.len)) {
return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+keymaybe*uintptr(t.elemsize))
}
}
//找不到则返回0值
return unsafe.Pointer(&zeroVal[0])
}
dohash:
//获取key的哈希值
hash := t.hasher(noescape(unsafe.Pointer(&ky)), uintptr(h.hash0))
//获取索引掩码
m := bucketMask(h.B)
//计算该key对应bucket中bmap的地址
b := (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
if c := h.oldbuckets; c != nil {
//如果此时正在扩容
if !h.sameSizeGrow() {
// There used to be half as many buckets; mask down one more power of two. //如果不是等量扩容则旧桶数组长度是原来的一半
m >>= 1
}
//计算旧桶数组的地址
oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize)))
if !evacuated(oldb) {
//如果没有迁移
b = oldb
}
}
//获取高位哈希值
top := tophash(hash)
for ; b != nil; b = b.overflow(t) {
//一直遍历当前索引的桶,直到后面没有溢出桶
for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
//在当前桶内遍历
k := (*stringStruct)(kptr)
if k.len != key.len || b.tophash[i] != top {
continue
}
if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) {
//如果匹配成功则返回对应value的地址值
return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.elemsize))
}
}
}
return unsafe.Pointer(&zeroVal[0])
}