go gmp --- goroutine创建源码分析

之前我们已经了解了GMP的基础知识,对G、M、P各自的职责和分工都有了大致的认识,本篇文章主要是介绍一个goroutine是怎么被创建出来的

从一个简单的例子开始

func TestNewGoroutine(t *testing.T) {
	go func() {
		sayHello()
	}()
}

func sayHello() {
	println("hello gmp")
}

编译文件 go tool compile -N -l -S new_goroutine_test.go得到汇编代码

"".TestNewGoroutine STEXT size=71 args=0x8 locals=0x18
        0x0000 00000 (new_goroutine_test.go:5)  TEXT    "".TestNewGoroutine(SB), ABIInternal, $24-8
        0x0000 00000 (new_goroutine_test.go:5)  MOVQ    (TLS), CX
        0x0009 00009 (new_goroutine_test.go:5)  CMPQ    SP, 16(CX)
        0x000d 00013 (new_goroutine_test.go:5)  PCDATA  $0, $-2
        0x000d 00013 (new_goroutine_test.go:5)  JLS     64
        0x000f 00015 (new_goroutine_test.go:5)  PCDATA  $0, $-1
        0x000f 00015 (new_goroutine_test.go:5)  SUBQ    $24, SP
        0x0013 00019 (new_goroutine_test.go:5)  MOVQ    BP, 16(SP)
        0x0018 00024 (new_goroutine_test.go:5)  LEAQ    16(SP), BP
        0x001d 00029 (new_goroutine_test.go:5)  FUNCDATA        $0, gclocals·2a5305abe05176240e61b8620e19a815(SB)
        0x001d 00029 (new_goroutine_test.go:5)  FUNCDATA        $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
        0x001d 00029 (new_goroutine_test.go:6)  MOVL    $0, (SP)
        0x0024 00036 (new_goroutine_test.go:6)  LEAQ    "".TestNewGoroutine.func1·f(SB), AX
        0x002b 00043 (new_goroutine_test.go:6)  MOVQ    AX, 8(SP)
        0x0030 00048 (new_goroutine_test.go:6)  PCDATA  $1, $0
        0x0030 00048 (new_goroutine_test.go:6)  CALL    runtime.newproc(SB) //对应第6行的go关键字
        0x0035 00053 (new_goroutine_test.go:9)  MOVQ    16(SP), BP
        0x003a 00058 (new_goroutine_test.go:9)  ADDQ    $24, SP
        0x003e 00062 (new_goroutine_test.go:9)  RET
        0x003f 00063 (new_goroutine_test.go:9)  NOP
        0x003f 00063 (new_goroutine_test.go:5)  PCDATA  $1, $-1
        0x003f 00063 (new_goroutine_test.go:5)  PCDATA  $0, $-2
        0x003f 00063 (new_goroutine_test.go:5)  NOP
        0x0040 00064 (new_goroutine_test.go:5)  CALL    runtime.morestack_noctxt(SB)
        0x0045 00069 (new_goroutine_test.go:5)  PCDATA  $0, $-1
        0x0045 00069 (new_goroutine_test.go:5)  JMP     0

从以上汇编代码可以看到当用go关键字创建一个goroutine的时候,对应的函数就是runtime.newproc(SB),所以我们来到runtime.newproc函数。

func newproc(siz int32, fn *funcval) {
	argp := add(unsafe.Pointer(&fn), sys.PtrSize)//跳过函数指针,获得第一个参数地址
	gp := getg()//拿到当前g
	pc := getcallerpc()//获得调用者下一条要执行的指令地址
	//系统调用 切换到g0栈 因为g0栈空间大
	systemstack(func() {
		newg := newproc1(fn, argp, siz, gp, pc)//真正创建一个goroutine

		_p_ := getg().m.p.ptr()//拿到当前g绑定的p
		runqput(_p_, newg, true)//将g放入p的本地runq

		if mainStarted {//M0是否启动了
			wakep()//唤醒p
		}
	})
}

由于g0栈空间分配在系统空间且栈空间大,这里会产生系统调用,切换到g0栈空间来真正创建goroutine,接下来看看newproc1函数。

func newproc1(fn *funcval, argp unsafe.Pointer, narg int32, callergp *g, callerpc uintptr) *g {
	_g_ := getg()//g0

	if fn == nil {
		_g_.m.throwing = -1 // do not dump full stacks
		throw("go of nil func value")
	}
	//锁住当前m,因为下面操作会修改m属性,加锁防止并发
	acquirem() // disable preemption because it can be holding p in a local var
	siz := narg
	siz = (siz + 7) &^ 7 //8字节对其,找到>=siz的最小的8的倍数 右侧为0左边一定保留 为1则左边清0 类似于C中的 &=~

	// We could allocate a larger initial stack if necessary.
	// Not worth it: this is almost always an error.
	// 4*sizeof(uintreg): extra space added below
	// sizeof(uintreg): caller's LR (arm) or return address (x86, in gostartcall).
	if siz >= _StackMin-4*sys.RegSize-sys.RegSize {//检查参数是不是过大
		throw("newproc: function arguments too large for new goroutine")
	}

	_p_ := _g_.m.p.ptr()
	newg := gfget(_p_)//从p的空闲g队列获取g,一个g执行完后会被回收至p的gFree链表中,这样就可以实现g的重复利用
	if newg == nil { //gFree为空才会真正取创建goroutine
		newg = malg(_StackMin)//2kb的栈
		casgstatus(newg, _Gidle, _Gdead)//cas修改goroutine状态
		allgadd(newg) // publishes with a g->status of Gdead so GC scanner doesn't look at uninitialized stack.
	}
	if newg.stack.hi == 0 {
		throw("newproc1: newg missing stack")
	}

	if readgstatus(newg) != _Gdead {
		throw("newproc1: new g is not Gdead")
	}

	//调整栈顶地址
	totalSize := 4*sys.RegSize + uintptr(siz) + sys.MinFrameSize // extra space in case of reads slightly beyond frame
	totalSize += -totalSize & (sys.SpAlign - 1)                  // align to spAlign
	sp := newg.stack.hi - totalSize
	spArg := sp
	if usesLR {
		// caller's LR
		*(*uintptr)(unsafe.Pointer(sp)) = 0
		prepGoExitFrame(sp)
		spArg += sys.MinFrameSize
	}
	if narg > 0 {
		//参数拷贝至goroutine栈
		memmove(unsafe.Pointer(spArg), argp, uintptr(narg))
		// This is a stack-to-stack copy. If write barriers
		// are enabled and the source stack is grey (the
		// destination is always black), then perform a
		// barrier copy. We do this *after* the memmove
		// because the destination stack may have garbage on
		// it.
		if writeBarrier.needed && !_g_.m.curg.gcscandone {
			f := findfunc(fn.fn)
			stkmap := (*stackmap)(funcdata(f, _FUNCDATA_ArgsPointerMaps))
			if stkmap.nbit > 0 {
				// We're in the prologue, so it's always stack map index 0.
				bv := stackmapdata(stkmap, 0)
				bulkBarrierBitmap(spArg, spArg, uintptr(bv.n)*sys.PtrSize, 0, bv.bytedata)
			}
		}
	}

	//清空goroutine的执行现场,因为G可能从p中取的,需要清楚原有数据
	memclrNoHeapPointers(unsafe.Pointer(&newg.sched), unsafe.Sizeof(newg.sched))
	newg.sched.sp = sp//栈顶
	newg.stktopsp = sp
	//pc为入口指令指令 这里pc=goexit+1 这样在执行完该goroutine后会调用goexit回收
	newg.sched.pc = funcPC(goexit) + sys.PCQuantum // +PCQuantum so that previous instruction is in same function
	/*
	goexit指令如下
	TEXT runtime·goexit(SB),NOSPLIT,$0-0
	BYTE	$0x90	// NOP
	CALL	runtime·goexit1(SB)	// does not return 第二条指令
	// traceback from goexit1 must hit code range of goexit
	BYTE	$0x90	// NOP
	*/
	newg.sched.g = guintptr(unsafe.Pointer(newg))
	gostartcallfn(&newg.sched, fn)//pc被用于sp,当RET的时候pop出goexit,模拟goexit调用fn
	newg.gopc = callerpc//调用方pc
	newg.ancestors = saveAncestors(callergp)//记录goroutine调用链
	newg.startpc = fn.fn
	if _g_.m.curg != nil {
		newg.labels = _g_.m.curg.labels
	}
	if isSystemGoroutine(newg, false) {
		atomic.Xadd(&sched.ngsys, +1)
	}
	casgstatus(newg, _Gdead, _Grunnable)//状态置为runnabe

	//goroutine id生成
	if _p_.goidcache == _p_.goidcacheend {
		// Sched.goidgen is the last allocated id,
		// this batch must be [sched.goidgen+1, sched.goidgen+GoidCacheBatch].
		// At startup sched.goidgen=0, so main goroutine receives goid=1.
		_p_.goidcache = atomic.Xadd64(&sched.goidgen, _GoidCacheBatch)
		_p_.goidcache -= _GoidCacheBatch - 1
		_p_.goidcacheend = _p_.goidcache + _GoidCacheBatch
	}
	newg.goid = int64(_p_.goidcache)
	_p_.goidcache++
	if raceenabled {
		newg.racectx = racegostart(callerpc)
	}
	if trace.enabled {
		traceGoCreate(newg, newg.startpc)
	}
	releasem(_g_.m)//释放m

	return newg
}

其中,gostartcallfn函数很是巧妙,它主要的作用就是将指令goexit+1地址压入了goroutine的栈,这样当该goroutine运行结束时会自动调用goexit进行资源回收。我们看看它是怎么做到的。

// adjust Gobuf as if it executed a call to fn
// and then did an immediate gosave.
// 设置好g的执行现场Gobuf,让它处于正在调用fn的状态
func gostartcallfn(gobuf *gobuf, fv *funcval) {
	var fn unsafe.Pointer//函数入口地址
	if fv != nil {
		fn = unsafe.Pointer(fv.fn)
	} else {
		fn = unsafe.Pointer(funcPC(nilfunc))
	}
	gostartcall(gobuf, fn, unsafe.Pointer(fv))
}
// adjust Gobuf as if it executed a call to fn with context ctxt
// and then did an immediate gosave.
func gostartcall(buf *gobuf, fn, ctxt unsafe.Pointer) {
	sp := buf.sp
	if sys.RegSize > sys.PtrSize {
		sp -= sys.PtrSize
		*(*uintptr)(unsafe.Pointer(sp)) = 0
	}
	sp -= sys.PtrSize//为返回值预留空间
	*(*uintptr)(unsafe.Pointer(sp)) = buf.pc//这就将buf.pc(为指令goexit+1的地址)压入goroutine的栈顶
	buf.sp = sp//调整执行现场的栈顶sp
	buf.pc = uintptr(fn)//调整指令计数器pc为fn入口地址
	buf.ctxt = ctxt
}

这样一个goroutine就被创建好了,接下来就是等待某个m里执行它了,m调度的逻辑将在之后展开。
为了让大家更好的理解整个流程,这里我画了一个整体流程图。
go gmp --- goroutine创建源码分析

上一篇:Go语言核心36讲(Go语言进阶技术十)--学习笔记


下一篇:ObjectOutputStream 追加写入读取错误 - 自己的实现方案