一、TCP关闭连接过程
TCP连接的关闭主要是经历了四次挥手过程。
四次挥手过程:
-
主机A发送
FIN
后,进入终止等待状态,服务器B收到主机A连接的释放报文,就立即给主机A发送ACK
。然后服务器B就进入了close-wait
状态。 -
并且服务器B再次发送
FIN
通知主机A关闭连接,服务器B进入最后确定状态。 -
主机A收到服务器B
FIN
请求后,会发送一个ACK
告诉服务器B收到,于是客户端和服务器都关闭了。
FIN —— 该报文段的发送方已经结束向对方发送数据。
这是因为服务端在LISTEN状态下,收到建立连接请求的SYN报文后,把ACK和SYN放在一个报文里发送给客户端。而关闭连接时,当收到对方的FIN报文时,仅仅表示对方不再发送数据了但是还能接收数据,己方是否现在关闭发送数据通道,需要上层应用来决定,因此,己方ACK和FIN一般都会分开发送。
一句话总结:是否关闭通道,是上层应用决定的的,TCP无权将FIN和ACK一同发送。
二、close操作源代码分析(客户端)
在tcp关闭连接调用close函数时 ,将调用tcp_close函数
// 关闭一个socket static void tcp_close(struct sock *sk, int timeout) { /* * We need to grab some memory, and put together a FIN, * and then put it into the queue to be sent. */ sk->inuse = 1; // 监听型的socket要关闭建立的连接 if(sk->state == TCP_LISTEN) { /* Special case */ tcp_set_state(sk, TCP_CLOSE); // 关闭已经建立的连接 tcp_close_pending(sk); release_sock(sk); return; } sk->keepopen = 1; sk->shutdown = SHUTDOWN_MASK; if (!sk->dead) sk->state_change(sk); if (timeout == 0) { struct sk_buff *skb; /* * We need to flush the recv. buffs. We do this only on the * descriptor close, not protocol-sourced closes, because the * reader process may not have drained the data yet! */ // 销毁未处理的数据 while((skb=skb_dequeue(&sk->receive_queue))!=NULL) kfree_skb(skb, FREE_READ); /* * Get rid off any half-completed packets. */ // 有小数据包则发送 if (sk->partial) tcp_send_partial(sk); } /* * Timeout is not the same thing - however the code likes * to send both the same way (sigh). */ if(timeout) { tcp_set_state(sk, TCP_CLOSE); /* Dead */ } else { // 要不要发送fin包 if(tcp_close_state(sk,1)==1) { tcp_send_fin(sk); } } release_sock(sk); }
从代码可以看出该函数调用tcp_close_state函数,tcp_close_state函数会修改socket的状态为TCP_FIN_WAIT1。
// 根据socket的当前状态修改下一个状态 static int tcp_close_state(struct sock *sk, int dead) { // 默认状态是关闭 int ns=TCP_CLOSE; // 默认不需要发送fin包 int send_fin=0; switch(sk->state) { // 还没有建立连接,直接转为关闭状态 case TCP_SYN_SENT: /* No SYN back, no FIN needed */ break; // 收到syn包并且发出了ack,对方也可能已经收到了ack把状态置为已建立,所以需要发送fin包,并把状态置为fin_wait1 case TCP_SYN_RECV: case TCP_ESTABLISHED: /* Closedown begin */ ns=TCP_FIN_WAIT1; send_fin=1; break; // 本端已关闭,即已经发送了fin包,不需要再发送了,状态不变 case TCP_FIN_WAIT1: /* Already closing, or FIN sent: no change */ case TCP_FIN_WAIT2: case TCP_CLOSING: ns=sk->state; break; // 直接置为关闭状态 case TCP_CLOSE: case TCP_LISTEN: break; // 对端已关闭,现在是本端准备关闭,需要发送fin包,然后进入last_ack状态 case TCP_CLOSE_WAIT: /* They have FIN'd us. We send our FIN and wait only for the ACK */ ns=TCP_LAST_ACK; send_fin=1; } tcp_set_state(sk,ns); /* * This is a (useful) BSD violating of the RFC. There is a * problem with TCP as specified in that the other end could * keep a socket open forever with no application left this end. * We use a 3 minute timeout (about the same as BSD) then kill * our end. If they send after that then tough - BUT: long enough * that we won't make the old 4*rto = almost no time - whoops * reset mistake. */ // 如果是本端已经关闭,在等待对端关闭的状态,则设置一个定时器,如果超时还没有收到对端的fin包则强行关闭 if(dead && ns==TCP_FIN_WAIT2) { int timer_active=del_timer(&sk->timer); if(timer_active) add_timer(&sk->timer); else reset_msl_timer(sk, TIME_CLOSE, TCP_FIN_TIMEOUT); } return send_fin; }
close函数调用tcp_send_fin进行第一次挥手,发送一个fin包,如果连接还有数据未发送,fin包要排队等待。
static void tcp_send_fin(struct sock *sk) { struct proto *prot =(struct proto *)sk->prot; struct tcphdr *th =(struct tcphdr *)&sk->dummy_th; struct tcphdr *t1; struct sk_buff *buff; struct device *dev=NULL; int tmp; release_sock(sk); /* in case the malloc sleeps. */ // 分配一个用于写的skb buff = prot->wmalloc(sk, MAX_RESET_SIZE,1 , GFP_KERNEL); sk->inuse = 1; if (buff == NULL) { /* This is a disaster if it occurs */ printk("tcp_send_fin: Impossible malloc failure"); return; } /* * Administrivia */ buff->sk = sk; // 当前已用的大小,一个tcp头,内容在下面赋值 buff->len = sizeof(*t1); buff->localroute = sk->localroute; // 指向可写的地址 t1 =(struct tcphdr *) buff->data; /* * Put in the IP header and routing stuff. */ // 构建IP头、MAC头 tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev, IPPROTO_TCP, sk->opt, sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl); if (tmp < 0) { int t; /* * Finish anyway, treat this as a send that got lost. * (Not good). */ buff->free = 1; prot->wfree(sk,buff->mem_addr, buff->mem_len); sk->write_seq++; t=del_timer(&sk->timer); if(t) add_timer(&sk->timer); else reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN); return; } /* * We ought to check if the end of the queue is a buffer and * if so simply add the fin to that buffer, not send it ahead. */ // 指向下一个可以写的地址 t1 =(struct tcphdr *)((char *)t1 +tmp); // 更新已使用的大小 buff->len += tmp; buff->dev = dev; // 写入tcp头的内容 memcpy(t1, th, sizeof(*t1)); // 序列号 t1->seq = ntohl(sk->write_seq); // 更新序列号 sk->write_seq++; buff->h.seq = sk->write_seq; // 也是一个ack包 t1->ack = 1; // 期待收到对端的下一个字节的序列号 t1->ack_seq = ntohl(sk->acked_seq); // 当前接收窗口的大小 t1->window = ntohs(sk->window=tcp_select_window(sk)); // 是个fin包 t1->fin = 1; t1->rst = 0; // tcp头长度 t1->doff = sizeof(*t1)/4; tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk); /* * If there is data in the write queue, the fin must be appended to * the write queue. */ // 还有数据没有发出去 if (skb_peek(&sk->write_queue) != NULL) { buff->free = 0; if (buff->next != NULL) { printk("tcp_send_fin: next != NULL\n"); skb_unlink(buff); } // 放到写队列末尾,等到前面的数据先发出去 skb_queue_tail(&sk->write_queue, buff); } else { // 立刻发出去,更新下一个数据包发送时第一个字节的序列号 sk->sent_seq = sk->write_seq; sk->prot->queue_xmit(sk, dev, buff, 0); // 重置定时器为rto reset_xmit_timer(sk, TIME_WRITE, sk->rto); } }
第二次挥手的代码在tcp_ack函数中,代码如下。最后把状态改成TCP_FIN_WAIT2,此时对端的状态是close_wait。即等待自己发送fin包。
if (sk->state == TCP_FIN_WAIT1) { if (!sk->dead) sk->state_change(sk); // 对端已经收到本端的数据的序列号等于下一个应用层数据的序列号,说明本端的数据发送完毕 if (sk->rcv_ack_seq == sk->write_seq) { flag |= 1; // 写端关闭 sk->shutdown |= SEND_SHUTDOWN; tcp_set_state(sk, TCP_FIN_WAIT2); } }
第三次挥手是本端收到对端发送fin包,处理函数在tcp_fin里。从代码可知,TCP_FIN_WAIT2的时候遇到fin包,则把状态置为TCP_TIME_WAIT。
static int tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th) { sk->fin_seq = th->seq + skb->len + th->syn + th->fin; if (!sk->dead) { sk->state_change(sk); sock_wake_async(sk->socket, 1); } switch(sk->state) { case TCP_SYN_RECV: case TCP_SYN_SENT: case TCP_ESTABLISHED: /* * move to CLOSE_WAIT, tcp_data() already handled * sending the ack. */ tcp_set_state(sk,TCP_CLOSE_WAIT); if (th->rst) sk->shutdown = SHUTDOWN_MASK; break; case TCP_CLOSE_WAIT: case TCP_CLOSING: /* * received a retransmission of the FIN, do * nothing. */ break; case TCP_TIME_WAIT: /* * received a retransmission of the FIN, * restart the TIME_WAIT timer. */ reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN); return(0); case TCP_FIN_WAIT1: /* * This case occurs when a simultaneous close * happens, we must ack the received FIN and * enter the CLOSING state. * * This causes a WRITE timeout, which will either * move on to TIME_WAIT when we timeout, or resend * the FIN properly (maybe we get rid of that annoying * FIN lost hang). The TIME_WRITE code is already correct * for handling this timeout. */ if(sk->ip_xmit_timeout != TIME_WRITE) reset_xmit_timer(sk, TIME_WRITE, sk->rto); tcp_set_state(sk,TCP_CLOSING); break; case TCP_FIN_WAIT2: /* * received a FIN -- send ACK and enter TIME_WAIT */ reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN); sk->shutdown|=SHUTDOWN_MASK; tcp_set_state(sk,TCP_TIME_WAIT); break; case TCP_CLOSE: /* * already in CLOSE */ break; default: tcp_set_state(sk,TCP_LAST_ACK); /* Start the timers. */ reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN); return(0); } return(0); }
第四次挥手即发送一个ack并且启动2msl的定时器。