TCP输入 之 tcp_v4_rcv

tcp_v4_rcv函数为TCP的总入口,数据包从IP层传递上来,进入该函数;其协议操作函数结构如下所示,其中handler即为IP层向TCP传递数据包的回调函数,设置为tcp_v4_rcv;

1 static struct net_protocol tcp_protocol = {
2     .early_demux    =    tcp_v4_early_demux,
3     .early_demux_handler =  tcp_v4_early_demux,
4     .handler    =    tcp_v4_rcv,
5     .err_handler    =    tcp_v4_err,
6     .no_policy    =    1,
7     .netns_ok    =    1,
8     .icmp_strict_tag_validation = 1,
9 };

 

在IP层处理本地数据包时,会获取到上述结构的实例,并且调用实例的handler回调,也就是调用了tcp_v4_rcv;

 1 static int ip_local_deliver_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 2 {
 3         /* 获取协议处理结构 */
 4     ipprot = rcu_dereference(inet_protos[protocol]);
 5     if (ipprot) {
 6         int ret;
 7 
 8         /* 协议上层收包处理函数 */
 9         ret = ipprot->handler(skb);
10         if (ret < 0) {
11             protocol = -ret;
12             goto resubmit;
13         }
14         __IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS);
15     } 
16 }

 

tcp_v4_rcv函数只要做以下几个工作:(1) 设置TCP_CB (2) 查找控制块  (3)根据控制块状态做不同处理,包括TCP_TIME_WAIT状态处理,TCP_NEW_SYN_RECV状态处理,TCP_LISTEN状态处理 (4) 接收TCP段;

  1 int tcp_v4_rcv(struct sk_buff *skb)
  2 {
  3     struct net *net = dev_net(skb->dev);
  4     const struct iphdr *iph;
  5     const struct tcphdr *th;
  6     bool refcounted;
  7     struct sock *sk;
  8     int ret;
  9 
 10     /* 非本机 */
 11     if (skb->pkt_type != PACKET_HOST)
 12         goto discard_it;
 13 
 14     /* Count it even if it's bad */
 15     __TCP_INC_STATS(net, TCP_MIB_INSEGS);
 16 
 17     /* 检查头部数据,若不满足,则拷贝分片 */
 18     if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
 19         goto discard_it;
 20 
 21     /* 取tcp头 */
 22     th = (const struct tcphdr *)skb->data;
 23 
 24     /* 长度过小 */
 25     if (unlikely(th->doff < sizeof(struct tcphdr) / 4))
 26         goto bad_packet;
 27     
 28     /* 检查头部数据,若不满足,则拷贝分片 */
 29     if (!pskb_may_pull(skb, th->doff * 4))
 30         goto discard_it;
 31 
 32     /* An explanation is required here, I think.
 33      * Packet length and doff are validated by header prediction,
 34      * provided case of th->doff==0 is eliminated.
 35      * So, we defer the checks. */
 36 
 37     if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo))
 38         goto csum_error;
 39 
 40     /* 取tcp头 */
 41     th = (const struct tcphdr *)skb->data;
 42     /* 取ip头 */
 43     iph = ip_hdr(skb);
 44     /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
 45      * barrier() makes sure compiler wont play fool^Waliasing games.
 46      */
 47     /* 移动ipcb */
 48     memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
 49         sizeof(struct inet_skb_parm));
 50     barrier();
 51 
 52     /* 获取开始序号*/
 53     TCP_SKB_CB(skb)->seq = ntohl(th->seq);
 54     /* 获取结束序号,syn与fin各占1  */
 55     TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
 56                     skb->len - th->doff * 4);
 57     /* 获取确认序号 */
 58     TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
 59     /* 获取标记字节,tcp首部第14个字节 */
 60     TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
 61     TCP_SKB_CB(skb)->tcp_tw_isn = 0;
 62     /* 获取ip头的服务字段 */
 63     TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
 64     TCP_SKB_CB(skb)->sacked     = 0;
 65 
 66 lookup:
 67     /* 查找控制块 */
 68     sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
 69                    th->dest, &refcounted);
 70     if (!sk)
 71         goto no_tcp_socket;
 72 
 73 process:
 74 
 75     /* TIME_WAIT转过去处理 */
 76     if (sk->sk_state == TCP_TIME_WAIT)
 77         goto do_time_wait;
 78 
 79     /* TCP_NEW_SYN_RECV状态处理 */
 80     if (sk->sk_state == TCP_NEW_SYN_RECV) {
 81         struct request_sock *req = inet_reqsk(sk);
 82         struct sock *nsk;
 83 
 84         /* 获取控制块 */
 85         sk = req->rsk_listener;
 86         if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) {
 87             sk_drops_add(sk, skb);
 88             reqsk_put(req);
 89             goto discard_it;
 90         }
 91 
 92         /* 不是listen状态 */
 93         if (unlikely(sk->sk_state != TCP_LISTEN)) {
 94             /* 从连接队列移除控制块 */
 95             inet_csk_reqsk_queue_drop_and_put(sk, req);
 96 
 97             /* 根据skb参数重新查找控制块 */
 98             goto lookup;
 99         }
100         /* We own a reference on the listener, increase it again
101          * as we might lose it too soon.
102          */
103         sock_hold(sk);
104         refcounted = true;
105 
106         /* 处理第三次握手ack,成功返回新控制块 */
107         nsk = tcp_check_req(sk, skb, req, false);
108 
109         /* 失败 */
110         if (!nsk) {
111             reqsk_put(req);
112             goto discard_and_relse;
113         }
114 
115         /* 未新建控制块,进一步处理 */
116         if (nsk == sk) {
117             reqsk_put(req);
118         } 
119         /* 有新建控制块,进行初始化等 */
120         else if (tcp_child_process(sk, nsk, skb)) {
121             /* 失败发送rst */
122             tcp_v4_send_reset(nsk, skb);
123             goto discard_and_relse;
124         } else {
125             sock_put(sk);
126             return 0;
127         }
128     }
129 
130     /* TIME_WAIT和TCP_NEW_SYN_RECV以外的状态 */
131 
132     /* ttl错误 */
133     if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
134         __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
135         goto discard_and_relse;
136     }
137 
138     if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
139         goto discard_and_relse;
140 
141     if (tcp_v4_inbound_md5_hash(sk, skb))
142         goto discard_and_relse;
143 
144     /* 初始化nf成员 */
145     nf_reset(skb);
146 
147     /* tcp过滤 */
148     if (tcp_filter(sk, skb))
149         goto discard_and_relse;
150 
151     /* 取tcp和ip头 */
152     th = (const struct tcphdr *)skb->data;
153     iph = ip_hdr(skb);
154 
155     /* 清空设备 */
156     skb->dev = NULL;
157 
158     /* LISTEN状态处理 */
159     if (sk->sk_state == TCP_LISTEN) {
160         ret = tcp_v4_do_rcv(sk, skb);
161         goto put_and_return;
162     }
163 
164     /* TIME_WAIT和TCP_NEW_SYN_RECV和LISTEN以外的状态 */
165 
166     /* 记录cpu */
167     sk_incoming_cpu_update(sk);
168 
169     bh_lock_sock_nested(sk);
170 
171     /* 分段统计 */
172     tcp_segs_in(tcp_sk(sk), skb);
173     ret = 0;
174 
175     /* 未被用户锁定 */
176     if (!sock_owned_by_user(sk)) {
177         /* 未能加入到prequeue */
178         if (!tcp_prequeue(sk, skb))
179             /* 进入tcpv4处理 */
180             ret = tcp_v4_do_rcv(sk, skb);
181     } 
182     /* 已经被用户锁定,加入到backlog */
183     else if (tcp_add_backlog(sk, skb)) {
184         goto discard_and_relse;
185     }
186     bh_unlock_sock(sk);
187 
188 put_and_return:
189     /* 减少引用计数 */
190     if (refcounted)
191         sock_put(sk);
192 
193     return ret;
194 
195 no_tcp_socket:
196     if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
197         goto discard_it;
198 
199     if (tcp_checksum_complete(skb)) {
200 csum_error:
201         __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
202 bad_packet:
203         __TCP_INC_STATS(net, TCP_MIB_INERRS);
204     } else {
205         /* 发送rst */
206         tcp_v4_send_reset(NULL, skb);
207     }
208 
209 discard_it:
210     /* Discard frame. */
211     kfree_skb(skb);
212     return 0;
213 
214 discard_and_relse:
215     sk_drops_add(sk, skb);
216     if (refcounted)
217         sock_put(sk);
218     goto discard_it;
219 
220 do_time_wait:
221     if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
222         inet_twsk_put(inet_twsk(sk));
223         goto discard_it;
224     }
225 
226     /* 校验和错误 */
227     if (tcp_checksum_complete(skb)) {
228         inet_twsk_put(inet_twsk(sk));
229         goto csum_error;
230     }
231 
232     /* TIME_WAIT入包处理 */
233     switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
234 
235     /* 收到syn */
236     case TCP_TW_SYN: {
237         /* 查找监听控制块 */
238         struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
239                             &tcp_hashinfo, skb,
240                             __tcp_hdrlen(th),
241                             iph->saddr, th->source,
242                             iph->daddr, th->dest,
243                             inet_iif(skb));
244 
245         /* 找到 */
246         if (sk2) {
247             /* 删除tw控制块 */
248             inet_twsk_deschedule_put(inet_twsk(sk));
249             /* 记录监听控制块 */
250             sk = sk2;
251             refcounted = false;
252 
253             /* 进行新请求的处理 */
254             goto process;
255         }
256         /* Fall through to ACK */
257     }
258 
259     /* 发送ack */
260     case TCP_TW_ACK:
261         tcp_v4_timewait_ack(sk, skb);
262         break;
263     /* 发送rst */
264     case TCP_TW_RST:
265         tcp_v4_send_reset(sk, skb);
266         /* 删除tw控制块 */
267         inet_twsk_deschedule_put(inet_twsk(sk));
268         goto discard_it;
269     /* 成功*/
270     case TCP_TW_SUCCESS:;
271     }
272     goto discard_it;
273 }

 

上一篇:Socket与系统调用深度分析


下一篇:scoket模拟网站的服务器和浏览器