深入理解TCP协议及其源代码

一、TCP关闭连接过程

TCP连接的关闭主要是经历了四次挥手过程。

四次挥手过程:

  1. 主机A发送FIN后,进入终止等待状态,服务器B收到主机A连接的释放报文,就立即给主机A发送ACK。然后服务器B就进入了close-wait状态。

  2. 并且服务器B再次发送FIN通知主机A关闭连接,服务器B进入最后确定状态。

  3. 主机A收到服务器BFIN请求后,会发送一个ACK告诉服务器B收到,于是客户端和服务器都关闭了。

FIN —— 该报文段的发送方已经结束向对方发送数据。

这是因为服务端在LISTEN状态下,收到建立连接请求的SYN报文后,把ACK和SYN放在一个报文里发送给客户端。而关闭连接时,当收到对方的FIN报文时,仅仅表示对方不再发送数据了但是还能接收数据,己方是否现在关闭发送数据通道,需要上层应用来决定,因此,己方ACK和FIN一般都会分开发送。

一句话总结:是否关闭通道,是上层应用决定的的,TCP无权将FIN和ACK一同发送。



深入理解TCP协议及其源代码

 

二、close操作源代码分析(客户端)

在tcp关闭连接调用close函数时 ,将调用tcp_close函数

// 关闭一个socket
static void tcp_close(struct sock *sk, int timeout)
{
	/*
	 * We need to grab some memory, and put together a FIN,	
	 * and then put it into the queue to be sent.
	 */
	
	sk->inuse = 1;
	// 监听型的socket要关闭建立的连接
	if(sk->state == TCP_LISTEN)
	{
		/* Special case */
		tcp_set_state(sk, TCP_CLOSE);
		// 关闭已经建立的连接
		tcp_close_pending(sk);
		release_sock(sk);
		return;
	}
	
	sk->keepopen = 1;
	sk->shutdown = SHUTDOWN_MASK;

	if (!sk->dead) 
	  	sk->state_change(sk);

	if (timeout == 0) 
	{
		struct sk_buff *skb;
		
		/*
		 *  We need to flush the recv. buffs.  We do this only on the
		 *  descriptor close, not protocol-sourced closes, because the
		 *  reader process may not have drained the data yet!
		 */
		// 销毁未处理的数据 
		while((skb=skb_dequeue(&sk->receive_queue))!=NULL)
			kfree_skb(skb, FREE_READ);
		/*
		 *	Get rid off any half-completed packets. 
		 */
		// 有小数据包则发送
		if (sk->partial) 
			tcp_send_partial(sk);
	}

		
	/*
	 *	Timeout is not the same thing - however the code likes
	 *	to send both the same way (sigh).
	 */
	 
	if(timeout)
	{
		tcp_set_state(sk, TCP_CLOSE);	/* Dead */
	}
	else
	{   // 要不要发送fin包
		if(tcp_close_state(sk,1)==1)
		{
			tcp_send_fin(sk);
		}
	}
	release_sock(sk);
}

  从代码可以看出该函数调用tcp_close_state函数,tcp_close_state函数会修改socket的状态为TCP_FIN_WAIT1。

// 根据socket的当前状态修改下一个状态 
static int tcp_close_state(struct sock *sk, int dead)
{	
	// 默认状态是关闭
	int ns=TCP_CLOSE;
	// 默认不需要发送fin包
	int send_fin=0;
	switch(sk->state)
	{   // 还没有建立连接,直接转为关闭状态
		case TCP_SYN_SENT:	/* No SYN back, no FIN needed */
			break;
		// 收到syn包并且发出了ack,对方也可能已经收到了ack把状态置为已建立,所以需要发送fin包,并把状态置为fin_wait1
		case TCP_SYN_RECV:
		case TCP_ESTABLISHED:	/* Closedown begin */
			ns=TCP_FIN_WAIT1;
			send_fin=1;
			break;
		// 本端已关闭,即已经发送了fin包,不需要再发送了,状态不变
		case TCP_FIN_WAIT1:	/* Already closing, or FIN sent: no change */
		case TCP_FIN_WAIT2:
		case TCP_CLOSING:
			ns=sk->state;
			break;
		// 直接置为关闭状态
		case TCP_CLOSE:
		case TCP_LISTEN:
			break;
		// 对端已关闭,现在是本端准备关闭,需要发送fin包,然后进入last_ack状态
		case TCP_CLOSE_WAIT:	/* They have FIN'd us. We send our FIN and
					   wait only for the ACK */
			ns=TCP_LAST_ACK;
			send_fin=1;
	}
	
	tcp_set_state(sk,ns);
		
	/*
	 *	This is a (useful) BSD violating of the RFC. There is a
	 *	problem with TCP as specified in that the other end could
	 *	keep a socket open forever with no application left this end.
	 *	We use a 3 minute timeout (about the same as BSD) then kill
	 *	our end. If they send after that then tough - BUT: long enough
	 *	that we won't make the old 4*rto = almost no time - whoops
	 *	reset mistake.
	 */
	// 如果是本端已经关闭,在等待对端关闭的状态,则设置一个定时器,如果超时还没有收到对端的fin包则强行关闭
	if(dead && ns==TCP_FIN_WAIT2)
	{
		int timer_active=del_timer(&sk->timer);
		if(timer_active)
			add_timer(&sk->timer);
		else
			reset_msl_timer(sk, TIME_CLOSE, TCP_FIN_TIMEOUT);
	}
	
	return send_fin;
}

  close函数调用tcp_send_fin进行第一次挥手,发送一个fin包,如果连接还有数据未发送,fin包要排队等待。

static void tcp_send_fin(struct sock *sk)
{
	struct proto *prot =(struct proto *)sk->prot;
	struct tcphdr *th =(struct tcphdr *)&sk->dummy_th;
	struct tcphdr *t1;
	struct sk_buff *buff;
	struct device *dev=NULL;
	int tmp;
		
	release_sock(sk); /* in case the malloc sleeps. */
	// 分配一个用于写的skb	
	buff = prot->wmalloc(sk, MAX_RESET_SIZE,1 , GFP_KERNEL);
	sk->inuse = 1;

	if (buff == NULL)
	{
		/* This is a disaster if it occurs */
		printk("tcp_send_fin: Impossible malloc failure");
		return;
	}

	/*
	 *	Administrivia
	 */
	 
	buff->sk = sk;
	// 当前已用的大小,一个tcp头,内容在下面赋值
	buff->len = sizeof(*t1);
	buff->localroute = sk->localroute;
	// 指向可写的地址
	t1 =(struct tcphdr *) buff->data;

	/*
	 *	Put in the IP header and routing stuff. 
	 */
	// 构建IP头、MAC头
	tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
			   IPPROTO_TCP, sk->opt,
			   sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
	if (tmp < 0) 
	{
		int t;
  		/*
  		 *	Finish anyway, treat this as a send that got lost. 
  		 *	(Not good).
  		 */
  		 
	  	buff->free = 1;
		prot->wfree(sk,buff->mem_addr, buff->mem_len);
		sk->write_seq++;
		t=del_timer(&sk->timer);
		if(t)
			add_timer(&sk->timer);
		else
			reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
		return;
	}
	
	/*
	 *	We ought to check if the end of the queue is a buffer and
	 *	if so simply add the fin to that buffer, not send it ahead.
	 */
	// 指向下一个可以写的地址
	t1 =(struct tcphdr *)((char *)t1 +tmp);
	// 更新已使用的大小
	buff->len += tmp;
	buff->dev = dev;
	// 写入tcp头的内容
	memcpy(t1, th, sizeof(*t1));
	// 序列号
	t1->seq = ntohl(sk->write_seq);
	// 更新序列号
	sk->write_seq++;
	buff->h.seq = sk->write_seq;
	// 也是一个ack包
	t1->ack = 1;
	// 期待收到对端的下一个字节的序列号
	t1->ack_seq = ntohl(sk->acked_seq);
	// 当前接收窗口的大小
	t1->window = ntohs(sk->window=tcp_select_window(sk));
	// 是个fin包
	t1->fin = 1;
	t1->rst = 0;
	// tcp头长度
	t1->doff = sizeof(*t1)/4;
	tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);

	/*
	 * If there is data in the write queue, the fin must be appended to
	 * the write queue.
 	 */
	// 还有数据没有发出去
 	if (skb_peek(&sk->write_queue) != NULL) 
 	{
  		buff->free = 0;
		if (buff->next != NULL) 
		{
			printk("tcp_send_fin: next != NULL\n");
			skb_unlink(buff);
		}
		// 放到写队列末尾,等到前面的数据先发出去
		skb_queue_tail(&sk->write_queue, buff);
  	} 
  	else 
  	{		// 立刻发出去,更新下一个数据包发送时第一个字节的序列号
        	sk->sent_seq = sk->write_seq;
		sk->prot->queue_xmit(sk, dev, buff, 0);
		// 重置定时器为rto
		reset_xmit_timer(sk, TIME_WRITE, sk->rto);
	}
}

  第二次挥手的代码在tcp_ack函数中,代码如下。最后把状态改成TCP_FIN_WAIT2,此时对端的状态是close_wait。即等待自己发送fin包。

if (sk->state == TCP_FIN_WAIT1) 
	{

		if (!sk->dead) 
			sk->state_change(sk);
		// 对端已经收到本端的数据的序列号等于下一个应用层数据的序列号,说明本端的数据发送完毕
		if (sk->rcv_ack_seq == sk->write_seq) 
		{
			flag |= 1;
			// 写端关闭
			sk->shutdown |= SEND_SHUTDOWN;
			tcp_set_state(sk, TCP_FIN_WAIT2);
		}
	}	

  第三次挥手是本端收到对端发送fin包,处理函数在tcp_fin里。从代码可知,TCP_FIN_WAIT2的时候遇到fin包,则把状态置为TCP_TIME_WAIT。

static int tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
{
	sk->fin_seq = th->seq + skb->len + th->syn + th->fin;

	if (!sk->dead) 
	{
		sk->state_change(sk);
		sock_wake_async(sk->socket, 1);
	}

	switch(sk->state) 
	{	
		case TCP_SYN_RECV:
		case TCP_SYN_SENT:
		case TCP_ESTABLISHED:
			/*
			 * move to CLOSE_WAIT, tcp_data() already handled
			 * sending the ack.
			 */
			tcp_set_state(sk,TCP_CLOSE_WAIT);
			if (th->rst)
				sk->shutdown = SHUTDOWN_MASK;
			break;

		case TCP_CLOSE_WAIT:
		case TCP_CLOSING:
			/*
			 * received a retransmission of the FIN, do
			 * nothing.
			 */
			break;
		case TCP_TIME_WAIT:
			/*
			 * received a retransmission of the FIN,
			 * restart the TIME_WAIT timer.
			 */
			reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
			return(0);
		case TCP_FIN_WAIT1:
			/*
			 * This case occurs when a simultaneous close
			 * happens, we must ack the received FIN and
			 * enter the CLOSING state.
			 *
			 * This causes a WRITE timeout, which will either
			 * move on to TIME_WAIT when we timeout, or resend
			 * the FIN properly (maybe we get rid of that annoying
			 * FIN lost hang). The TIME_WRITE code is already correct
			 * for handling this timeout.
			 */

			if(sk->ip_xmit_timeout != TIME_WRITE)
				reset_xmit_timer(sk, TIME_WRITE, sk->rto);
			tcp_set_state(sk,TCP_CLOSING);
			break;
		case TCP_FIN_WAIT2:
			/*
			 * received a FIN -- send ACK and enter TIME_WAIT
			 */
			reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
			sk->shutdown|=SHUTDOWN_MASK;
			tcp_set_state(sk,TCP_TIME_WAIT);
			break;
		case TCP_CLOSE:
			/*
			 * already in CLOSE
			 */
			break;
		default:
			tcp_set_state(sk,TCP_LAST_ACK);
	
			/* Start the timers. */
			reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
			return(0);
	}

	return(0);
}

  
第四次挥手即发送一个ack并且启动2msl的定时器。

上一篇:2021.12.4


下一篇:Redis系列(二)-Hredis客户端设计及开源