网络协议栈源码分析

此网络协议栈源码分析是基于linux 1.2.13版本的内核源码进行分析的;在分析此代码的过程中,同时深入阅读了linux网络驱动和TCP-IP详解,先理解整体的网络概念和内核网络模块代码齐头并进,梳理出了如下的代码调用流程。如下的代码流程是从内核网络模块初始化,到插口层如何调用到内核的处理过程,针对流程和主要的数据体进行分析展示:

start_kernel

        sock_init

          proto_init

                  inet_proto_init

static struct proto_ops inet_proto_ops = {

        AF_INET,

        inet_create,

        inet_dup,

        inet_release,

        inet_bind,

        inet_connect,

        inet_socketpair,

        inet_accept,

        inet_getname,

        inet_read,

        inet_write,

        inet_select,

        inet_ioctl,

        inet_listen,

        inet_send,

        inet_recv,

        inet_sendto,

        inet_recvfrom,

        inet_shutdown,

        inet_setsockopt,

        inet_getsockopt,

        inet_fcntl,

};

                        sock_register(inet_proto_ops.family, &inet_proto_ops);

                              pops[i] = ops; //pops指向inet_proto_ops结构体     

                              pops[i]->family = family;

                        tcp_prot.sock_array

                        udp_prot.sock_array

                        raw_prot.sock_array

                        tcp_prot.inuse = 0;

                        tcp_prot.highestinuse = 0;

                        udp_prot.inuse = 0;

                        udp_prot.highestinuse = 0;

                        raw_prot.inuse = 0;

                        raw_prot.highestinuse = 0;

                        inet_add_protocol(p);

                              inet_protos[hash] = prot;

                        arp_init();

                        ip_init();

          dev_init

                  ethif_probe

                        ne_probe(dev)

                              ne_probe1(dev, base_addr);

                                    request_irq (dev->irq, ei_interrupt, 0, wordlength==2 ? "ne2000":"ne1000");

                                          ei_receive(dev);

                                                ei_block_input(dev, pkt_len, (char *) skb->data,current_offset + sizeof(rx_frame));

                                                netif_rx(skb);

                                                      mark_bh(NET_BH);

                                    ethdev_init(dev);

                                          dev->hard_start_xmit = &ei_start_xmit;

                                          dev->get_stats      = get_stats;

                                          dev->hard_header        = eth_header;

                                   dev->rebuild_header = eth_rebuild_header;

                            dev->type_trans = eth_type_trans;

                                   dev->type               = ARPHRD_ETHER;

                            dev->hard_header_len = ETH_HLEN;

                            dev->mtu                = 1500; /* eth_mtu */

                            dev->addr_len   = ETH_ALEN;

         

                            /* New-style flags. */

                            dev->flags              = IFF_BROADCAST|IFF_MULTICAST;

                            dev->family             = AF_INET;

                            dev->pa_addr    = 0;

                            dev->pa_brdaddr = 0;

                            dev->pa_mask    = 0;

                            dev->pa_alen    = sizeof(unsigned long);

                                   ei_status.reset_8390 = &ne_reset_8390;

                                ei_status.block_input = &ne_block_input;

                                ei_status.block_output = &ne_block_output;

            bh_base[NET_BH].routine= net_bh;

            enable_bh(NET_BH);

net_bh

      dev_transmit

      //ptype_base->func = ip_rcv

      pt_prev->func(skb, skb->dev, pt_prev);

            ip_rcv(skb, skb->dev, pt_prev);

                  ip_fw_chk

                  ip_forward(skb, dev, is_frag);

                  ip_acct_cnt(iph,dev, ip_acct_chain);

                  ip_defrag(iph,skb,dev);//处理分片数据包

                        ip_find

                        ip_create(skb, iph, dev)

                             ipqueue//IP fragment queue     全局队列

                        ip_frag_create(offset, end, skb, ptr);

                        ip_done(qp) //检测分片包是否接收完成

                        ip_glue(qp); //将分片的数据包组装起来

                raw_rcv //原始套接字

                  ipprot->handler //网络层向传输层传输数据包

                     tcp_rcv

                     udp_rcv

                          ip_chk_addr(daddr);

                             udp_check(uh, len, saddr, daddr)

                             udp_deliver(sk,uh,skb,dev, saddr, daddr, len);

                                   sock_queue_rcv_skb(sk,skb).  //将数据包挂接到sk变量表示的套接字的接收队列中

                                    //将数据包插入到接收队列尾部,并通知可能等待读取数据被置于睡眠的进程进行数据包的读取 sk->data_ready = def_callback2

                                         //def_callback2(sk,skb->len);

                                    sk->data_ready(sk,skb->len);  //Now tell the user we may have some data.

                               release_sock(sk);

                                         sk->prot->icv

                     icmp_rcv

      dev_transmit

sys_socketcall

      sock_socket

            ops = pops[i];

            sock = sock_alloc();

                  inode = get_empty_inode(); //申请一个文件节点

                  sock = &inode->u.socket_i;

            sock->type = type;

            sock->ops = ops;

            sock->ops->create(sock, protocol)

                  inet_create(sock, protocol)

                        sk = (struct sock *) kmalloc(sizeof(*sk), GFP_KERNEL);

           sk->num = 0;   

           sk->reuse = 0;

           switch(sock->type)

             case SOCK_STREAM:

                     case SOCK_SEQPACKET:

                                   protocol = IPPROTO_TCP;

                 sk->no_check = TCP_NO_CHECK;

                                  prot = &tcp_prot;

             case SOCK_DGRAM:

                protocol = IPPROTO_UDP;

                sk->no_check = UDP_NO_CHECK;

                prot=&udp_prot;

             case SOCK_RAW:

                Sk->reuse = 1;

                sk->no_check = 0;

                prot=&raw_prot;

                sk->num = protocol;

             case SOCK_PACKET:

                Sk->reuse = 1;

                sk->no_check = 0;

                prot=&packet_prot;

                sk->num = protocol;

              sk->socket = sock;

           sk->nonagle = 1;

           sk->nonagle = 0;

           sk->type = sock->type;

                 sk->stamp.tv_sec=0;

           sk->protocol = protocol;

           sk->wmem_alloc = 0;

           sk->rmem_alloc = 0;

           sk->sndbuf = SK_WMEM_MAX;

           sk->rcvbuf = SK_RMEM_MAX;

           sk->pair = NULL;

           sk->opt = NULL;

           sk->write_seq = 0;

           sk->acked_seq = 0;

           sk->copied_seq = 0;

           sk->fin_seq = 0;

           sk->urg_seq = 0;

           sk->urg_data = 0;

           sk->proc = 0;

           sk->rtt = 0;                            /*TCP_WRITE_TIME << 3;*/

           sk->rto = TCP_TIMEOUT_INIT;             /*TCP_WRITE_TIME*/

           sk->mdev = 0;

           sk->backoff = 0;

           sk->packets_out = 0;

           sk->cong_window = 1; /* start with only sending one packet at a time. */

           sk->cong_count = 0;

           sk->ssthresh = 0;

           sk->max_window = 0;    

           sk->urginline = 0;

           sk->intr = 0;

           sk->linger = 0;

           sk->destroy = 0;

           sk->priority = 1;

           sk->shutdown = 0;

           sk->keepopen = 0;

           sk->zapped = 0;

           sk->done = 0;

           sk->ack_backlog = 0;

           sk->window = 0;

           sk->bytes_rcv = 0;

           sk->state = TCP_CLOSE;

           sk->dead = 0;

           sk->ack_timed = 0;

           sk->partial = NULL;

           sk->user_mss = 0;

           sk->debug = 0;

                        sk->max_unacked = 2048;

           sk->max_ack_backlog = 0;

           sk->inuse = 0;

           sk->delay_acks = 0;

           skb_queue_head_init(&sk->write_queue);

           skb_queue_head_init(&sk->receive_queue);

           sk->mtu = 576;

           sk->prot = prot;

           sk->sleep = sock->wait;

           sk->daddr = 0;

           sk->saddr = 0 /* ip_my_addr() */;

           sk->err = 0;

           sk->next = NULL;

           sk->pair = NULL;

           sk->send_tail = NULL;

           sk->send_head = NULL;

           sk->timeout = 0;

           sk->broadcast = 0;

           sk->localroute = 0;

           init_timer(&sk->timer);

           init_timer(&sk->retransmit_timer);

           sk->timer.data = (unsigned long)sk;

           sk->timer.function = &net_timer;

                     sk->timer.expires = 10;

             add_timer(&sk->timer);

           skb_queue_head_init(&sk->back_log);

           sk->blog = 0;

           sock->data =(void *) sk;

           sk->dummy_th.doff = sizeof(sk->dummy_th)/4;

           sk->dummy_th.res1=0;

           sk->dummy_th.res2=0;

           sk->dummy_th.res2=0;

           sk->dummy_th.urg_ptr = 0;

           sk->dummy_th.fin = 0;

           sk->dummy_th.syn = 0;

           sk->dummy_th.rst = 0;

           sk->dummy_th.psh = 0;

           sk->dummy_th.ack = 0;

           sk->dummy_th.urg = 0;

           sk->dummy_th.dest = 0;

           sk->ip_tos=0;

           sk->ip_ttl=64;

           sk->ip_mc_loop=1;

           sk->ip_mc_ttl=1;

           *sk->ip_mc_name=0;

           sk->ip_mc_list=NULL;

           sk->state_change = def_callback1;

           sk->data_ready = def_callback2;

                     wake_up_interruptible(sk->sleep);

             sock_wake_async(sk->socket, 1);

           sk->write_space = def_callback3;

           sk->error_report = def_callback1;

                        //Add a socket into the socket tables by number.

          put_sock(sk->num, sk);

          sk->dummy_th.source = ntohs(sk->num);

           err = sk->prot->init(sk);

            fd = get_fd(SOCK_INODE(sock); //fd = get_fd(sock->node);

                  file = get_empty_filp();

           

static struct file_operations socket_file_ops = {

        sock_lseek,

        sock_read,

        sock_write,

        sock_readdir,

        sock_select,

        sock_ioctl,

        NULL,                   /* mmap */

        NULL,                   /* no special open code... */

        sock_close,

        NULL,                   /* no fsync */

        sock_fasync

};

     

        file->f_op = &socket_file_ops;

        file->f_mode = 3;

        file->f_flags = O_RDWR;

        file->f_count = 1;

        file->f_inode = inode;

        inode->i_count++;

        file->f_pos = 0;

recvfrom

      sys_socketcall

            sock_recvfrom

                  inet_recvfrom

                        udp_recvfrom

                             skb_recv_datagram

                                   interruptible_sleep_on(sk->sleep);  //阻塞进程进入睡眠

                               skb_copy_datagram(skb,sizeof(struct udphdr),to,copied);

                               skb_free_datagram(skb);

sendto

      sys_socketcall

            sock_sendto

                  udp_sendto(sk, &sin, from, len, flags);

                        udp_send(sk, &sin, from, len, flags);

                             //ip_queue_xmit(sk, dev, skb, 1);

                             sk->prot->queue_xmit(sk, dev, skb, 1);  //queue_xmit = ip_queue_xmit

                                   ip_fw_chk

                                   ip_loopback(dev,skb);

                                         ip_queue_xmit(NULL, dev, newskb, 1);

                                   dev_queue_xmit(skb, dev, SOPRI_NORMAL)

                                   //ei_start_xmit(skb, dev)

                                   dev->hard_start_xmit(skb, dev)  //dev->hard_start_xmit = &ei_start_xmit;

                                   ei_start_xmit(skb, dev)

read

      sock_read

            inet_read

                  sk->prot->read //udp_read

                        udp_read

                             udp_recvfrom

大家结合linux 1.2.13的源码,上面的函数调用流程可以让大家清晰对网络协议栈有一个比较通透的认知。

上一篇:android下socket编程问题:服务器关闭时,客户端发送请求的异常处理


下一篇:Measuring App launch time