之前我们已经了解了GMP的基础知识,对G、M、P各自的职责和分工都有了大致的认识,本篇文章主要是介绍一个goroutine是怎么被创建出来的
从一个简单的例子开始
func TestNewGoroutine(t *testing.T) {
go func() {
sayHello()
}()
}
func sayHello() {
println("hello gmp")
}
编译文件 go tool compile -N -l -S new_goroutine_test.go
得到汇编代码
"".TestNewGoroutine STEXT size=71 args=0x8 locals=0x18
0x0000 00000 (new_goroutine_test.go:5) TEXT "".TestNewGoroutine(SB), ABIInternal, $24-8
0x0000 00000 (new_goroutine_test.go:5) MOVQ (TLS), CX
0x0009 00009 (new_goroutine_test.go:5) CMPQ SP, 16(CX)
0x000d 00013 (new_goroutine_test.go:5) PCDATA $0, $-2
0x000d 00013 (new_goroutine_test.go:5) JLS 64
0x000f 00015 (new_goroutine_test.go:5) PCDATA $0, $-1
0x000f 00015 (new_goroutine_test.go:5) SUBQ $24, SP
0x0013 00019 (new_goroutine_test.go:5) MOVQ BP, 16(SP)
0x0018 00024 (new_goroutine_test.go:5) LEAQ 16(SP), BP
0x001d 00029 (new_goroutine_test.go:5) FUNCDATA $0, gclocals·2a5305abe05176240e61b8620e19a815(SB)
0x001d 00029 (new_goroutine_test.go:5) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x001d 00029 (new_goroutine_test.go:6) MOVL $0, (SP)
0x0024 00036 (new_goroutine_test.go:6) LEAQ "".TestNewGoroutine.func1·f(SB), AX
0x002b 00043 (new_goroutine_test.go:6) MOVQ AX, 8(SP)
0x0030 00048 (new_goroutine_test.go:6) PCDATA $1, $0
0x0030 00048 (new_goroutine_test.go:6) CALL runtime.newproc(SB) //对应第6行的go关键字
0x0035 00053 (new_goroutine_test.go:9) MOVQ 16(SP), BP
0x003a 00058 (new_goroutine_test.go:9) ADDQ $24, SP
0x003e 00062 (new_goroutine_test.go:9) RET
0x003f 00063 (new_goroutine_test.go:9) NOP
0x003f 00063 (new_goroutine_test.go:5) PCDATA $1, $-1
0x003f 00063 (new_goroutine_test.go:5) PCDATA $0, $-2
0x003f 00063 (new_goroutine_test.go:5) NOP
0x0040 00064 (new_goroutine_test.go:5) CALL runtime.morestack_noctxt(SB)
0x0045 00069 (new_goroutine_test.go:5) PCDATA $0, $-1
0x0045 00069 (new_goroutine_test.go:5) JMP 0
从以上汇编代码可以看到当用go关键字创建一个goroutine的时候,对应的函数就是runtime.newproc(SB)
,所以我们来到runtime.newproc
函数。
func newproc(siz int32, fn *funcval) {
argp := add(unsafe.Pointer(&fn), sys.PtrSize)//跳过函数指针,获得第一个参数地址
gp := getg()//拿到当前g
pc := getcallerpc()//获得调用者下一条要执行的指令地址
//系统调用 切换到g0栈 因为g0栈空间大
systemstack(func() {
newg := newproc1(fn, argp, siz, gp, pc)//真正创建一个goroutine
_p_ := getg().m.p.ptr()//拿到当前g绑定的p
runqput(_p_, newg, true)//将g放入p的本地runq
if mainStarted {//M0是否启动了
wakep()//唤醒p
}
})
}
由于g0栈空间分配在系统空间且栈空间大,这里会产生系统调用,切换到g0栈空间来真正创建goroutine,接下来看看newproc1
函数。
func newproc1(fn *funcval, argp unsafe.Pointer, narg int32, callergp *g, callerpc uintptr) *g {
_g_ := getg()//g0
if fn == nil {
_g_.m.throwing = -1 // do not dump full stacks
throw("go of nil func value")
}
//锁住当前m,因为下面操作会修改m属性,加锁防止并发
acquirem() // disable preemption because it can be holding p in a local var
siz := narg
siz = (siz + 7) &^ 7 //8字节对其,找到>=siz的最小的8的倍数 右侧为0左边一定保留 为1则左边清0 类似于C中的 &=~
// We could allocate a larger initial stack if necessary.
// Not worth it: this is almost always an error.
// 4*sizeof(uintreg): extra space added below
// sizeof(uintreg): caller's LR (arm) or return address (x86, in gostartcall).
if siz >= _StackMin-4*sys.RegSize-sys.RegSize {//检查参数是不是过大
throw("newproc: function arguments too large for new goroutine")
}
_p_ := _g_.m.p.ptr()
newg := gfget(_p_)//从p的空闲g队列获取g,一个g执行完后会被回收至p的gFree链表中,这样就可以实现g的重复利用
if newg == nil { //gFree为空才会真正取创建goroutine
newg = malg(_StackMin)//2kb的栈
casgstatus(newg, _Gidle, _Gdead)//cas修改goroutine状态
allgadd(newg) // publishes with a g->status of Gdead so GC scanner doesn't look at uninitialized stack.
}
if newg.stack.hi == 0 {
throw("newproc1: newg missing stack")
}
if readgstatus(newg) != _Gdead {
throw("newproc1: new g is not Gdead")
}
//调整栈顶地址
totalSize := 4*sys.RegSize + uintptr(siz) + sys.MinFrameSize // extra space in case of reads slightly beyond frame
totalSize += -totalSize & (sys.SpAlign - 1) // align to spAlign
sp := newg.stack.hi - totalSize
spArg := sp
if usesLR {
// caller's LR
*(*uintptr)(unsafe.Pointer(sp)) = 0
prepGoExitFrame(sp)
spArg += sys.MinFrameSize
}
if narg > 0 {
//参数拷贝至goroutine栈
memmove(unsafe.Pointer(spArg), argp, uintptr(narg))
// This is a stack-to-stack copy. If write barriers
// are enabled and the source stack is grey (the
// destination is always black), then perform a
// barrier copy. We do this *after* the memmove
// because the destination stack may have garbage on
// it.
if writeBarrier.needed && !_g_.m.curg.gcscandone {
f := findfunc(fn.fn)
stkmap := (*stackmap)(funcdata(f, _FUNCDATA_ArgsPointerMaps))
if stkmap.nbit > 0 {
// We're in the prologue, so it's always stack map index 0.
bv := stackmapdata(stkmap, 0)
bulkBarrierBitmap(spArg, spArg, uintptr(bv.n)*sys.PtrSize, 0, bv.bytedata)
}
}
}
//清空goroutine的执行现场,因为G可能从p中取的,需要清楚原有数据
memclrNoHeapPointers(unsafe.Pointer(&newg.sched), unsafe.Sizeof(newg.sched))
newg.sched.sp = sp//栈顶
newg.stktopsp = sp
//pc为入口指令指令 这里pc=goexit+1 这样在执行完该goroutine后会调用goexit回收
newg.sched.pc = funcPC(goexit) + sys.PCQuantum // +PCQuantum so that previous instruction is in same function
/*
goexit指令如下
TEXT runtime·goexit(SB),NOSPLIT,$0-0
BYTE $0x90 // NOP
CALL runtime·goexit1(SB) // does not return 第二条指令
// traceback from goexit1 must hit code range of goexit
BYTE $0x90 // NOP
*/
newg.sched.g = guintptr(unsafe.Pointer(newg))
gostartcallfn(&newg.sched, fn)//pc被用于sp,当RET的时候pop出goexit,模拟goexit调用fn
newg.gopc = callerpc//调用方pc
newg.ancestors = saveAncestors(callergp)//记录goroutine调用链
newg.startpc = fn.fn
if _g_.m.curg != nil {
newg.labels = _g_.m.curg.labels
}
if isSystemGoroutine(newg, false) {
atomic.Xadd(&sched.ngsys, +1)
}
casgstatus(newg, _Gdead, _Grunnable)//状态置为runnabe
//goroutine id生成
if _p_.goidcache == _p_.goidcacheend {
// Sched.goidgen is the last allocated id,
// this batch must be [sched.goidgen+1, sched.goidgen+GoidCacheBatch].
// At startup sched.goidgen=0, so main goroutine receives goid=1.
_p_.goidcache = atomic.Xadd64(&sched.goidgen, _GoidCacheBatch)
_p_.goidcache -= _GoidCacheBatch - 1
_p_.goidcacheend = _p_.goidcache + _GoidCacheBatch
}
newg.goid = int64(_p_.goidcache)
_p_.goidcache++
if raceenabled {
newg.racectx = racegostart(callerpc)
}
if trace.enabled {
traceGoCreate(newg, newg.startpc)
}
releasem(_g_.m)//释放m
return newg
}
其中,gostartcallfn
函数很是巧妙,它主要的作用就是将指令goexit+1
地址压入了goroutine的栈,这样当该goroutine运行结束时会自动调用goexit
进行资源回收。我们看看它是怎么做到的。
// adjust Gobuf as if it executed a call to fn
// and then did an immediate gosave.
// 设置好g的执行现场Gobuf,让它处于正在调用fn的状态
func gostartcallfn(gobuf *gobuf, fv *funcval) {
var fn unsafe.Pointer//函数入口地址
if fv != nil {
fn = unsafe.Pointer(fv.fn)
} else {
fn = unsafe.Pointer(funcPC(nilfunc))
}
gostartcall(gobuf, fn, unsafe.Pointer(fv))
}
// adjust Gobuf as if it executed a call to fn with context ctxt
// and then did an immediate gosave.
func gostartcall(buf *gobuf, fn, ctxt unsafe.Pointer) {
sp := buf.sp
if sys.RegSize > sys.PtrSize {
sp -= sys.PtrSize
*(*uintptr)(unsafe.Pointer(sp)) = 0
}
sp -= sys.PtrSize//为返回值预留空间
*(*uintptr)(unsafe.Pointer(sp)) = buf.pc//这就将buf.pc(为指令goexit+1的地址)压入goroutine的栈顶
buf.sp = sp//调整执行现场的栈顶sp
buf.pc = uintptr(fn)//调整指令计数器pc为fn入口地址
buf.ctxt = ctxt
}
这样一个goroutine就被创建好了,接下来就是等待某个m里执行它了,m调度的逻辑将在之后展开。
为了让大家更好的理解整个流程,这里我画了一个整体流程图。