此网络协议栈源码分析是基于linux 1.2.13版本的内核源码进行分析的;在分析此代码的过程中,同时深入阅读了linux网络驱动和TCP-IP详解,先理解整体的网络概念和内核网络模块代码齐头并进,梳理出了如下的代码调用流程。如下的代码流程是从内核网络模块初始化,到插口层如何调用到内核的处理过程,针对流程和主要的数据体进行分析展示:
start_kernel
sock_init
proto_init
inet_proto_init
static struct proto_ops inet_proto_ops = {
AF_INET,
inet_create,
inet_dup,
inet_release,
inet_bind,
inet_connect,
inet_socketpair,
inet_accept,
inet_getname,
inet_read,
inet_write,
inet_select,
inet_ioctl,
inet_listen,
inet_send,
inet_recv,
inet_sendto,
inet_recvfrom,
inet_shutdown,
inet_setsockopt,
inet_getsockopt,
inet_fcntl,
};
sock_register(inet_proto_ops.family, &inet_proto_ops);
pops[i] = ops; //pops指向inet_proto_ops结构体
pops[i]->family = family;
tcp_prot.sock_array
udp_prot.sock_array
raw_prot.sock_array
tcp_prot.inuse = 0;
tcp_prot.highestinuse = 0;
udp_prot.inuse = 0;
udp_prot.highestinuse = 0;
raw_prot.inuse = 0;
raw_prot.highestinuse = 0;
inet_add_protocol(p);
inet_protos[hash] = prot;
arp_init();
ip_init();
dev_init
ethif_probe
ne_probe(dev)
ne_probe1(dev, base_addr);
request_irq (dev->irq, ei_interrupt, 0, wordlength==2 ? "ne2000":"ne1000");
ei_receive(dev);
ei_block_input(dev, pkt_len, (char *) skb->data,current_offset + sizeof(rx_frame));
netif_rx(skb);
mark_bh(NET_BH);
ethdev_init(dev);
dev->hard_start_xmit = &ei_start_xmit;
dev->get_stats = get_stats;
dev->hard_header = eth_header;
dev->rebuild_header = eth_rebuild_header;
dev->type_trans = eth_type_trans;
dev->type = ARPHRD_ETHER;
dev->hard_header_len = ETH_HLEN;
dev->mtu = 1500; /* eth_mtu */
dev->addr_len = ETH_ALEN;
/* New-style flags. */
dev->flags = IFF_BROADCAST|IFF_MULTICAST;
dev->family = AF_INET;
dev->pa_addr = 0;
dev->pa_brdaddr = 0;
dev->pa_mask = 0;
dev->pa_alen = sizeof(unsigned long);
ei_status.reset_8390 = &ne_reset_8390;
ei_status.block_input = &ne_block_input;
ei_status.block_output = &ne_block_output;
bh_base[NET_BH].routine= net_bh;
enable_bh(NET_BH);
net_bh
dev_transmit
//ptype_base->func = ip_rcv
pt_prev->func(skb, skb->dev, pt_prev);
ip_rcv(skb, skb->dev, pt_prev);
ip_fw_chk
ip_forward(skb, dev, is_frag);
ip_acct_cnt(iph,dev, ip_acct_chain);
ip_defrag(iph,skb,dev);//处理分片数据包
ip_find
ip_create(skb, iph, dev)
ipqueue//IP fragment queue 全局队列
ip_frag_create(offset, end, skb, ptr);
ip_done(qp) //检测分片包是否接收完成
ip_glue(qp); //将分片的数据包组装起来
raw_rcv //原始套接字
ipprot->handler //网络层向传输层传输数据包
tcp_rcv
udp_rcv
ip_chk_addr(daddr);
udp_check(uh, len, saddr, daddr)
udp_deliver(sk,uh,skb,dev, saddr, daddr, len);
sock_queue_rcv_skb(sk,skb). //将数据包挂接到sk变量表示的套接字的接收队列中
//将数据包插入到接收队列尾部,并通知可能等待读取数据被置于睡眠的进程进行数据包的读取 sk->data_ready = def_callback2
//def_callback2(sk,skb->len);
sk->data_ready(sk,skb->len); //Now tell the user we may have some data.
release_sock(sk);
sk->prot->icv
icmp_rcv
dev_transmit
sys_socketcall
sock_socket
ops = pops[i];
sock = sock_alloc();
inode = get_empty_inode(); //申请一个文件节点
sock = &inode->u.socket_i;
sock->type = type;
sock->ops = ops;
sock->ops->create(sock, protocol)
inet_create(sock, protocol)
sk = (struct sock *) kmalloc(sizeof(*sk), GFP_KERNEL);
sk->num = 0;
sk->reuse = 0;
switch(sock->type)
case SOCK_STREAM:
case SOCK_SEQPACKET:
protocol = IPPROTO_TCP;
sk->no_check = TCP_NO_CHECK;
prot = &tcp_prot;
case SOCK_DGRAM:
protocol = IPPROTO_UDP;
sk->no_check = UDP_NO_CHECK;
prot=&udp_prot;
case SOCK_RAW:
Sk->reuse = 1;
sk->no_check = 0;
prot=&raw_prot;
sk->num = protocol;
case SOCK_PACKET:
Sk->reuse = 1;
sk->no_check = 0;
prot=&packet_prot;
sk->num = protocol;
sk->socket = sock;
sk->nonagle = 1;
sk->nonagle = 0;
sk->type = sock->type;
sk->stamp.tv_sec=0;
sk->protocol = protocol;
sk->wmem_alloc = 0;
sk->rmem_alloc = 0;
sk->sndbuf = SK_WMEM_MAX;
sk->rcvbuf = SK_RMEM_MAX;
sk->pair = NULL;
sk->opt = NULL;
sk->write_seq = 0;
sk->acked_seq = 0;
sk->copied_seq = 0;
sk->fin_seq = 0;
sk->urg_seq = 0;
sk->urg_data = 0;
sk->proc = 0;
sk->rtt = 0; /*TCP_WRITE_TIME << 3;*/
sk->rto = TCP_TIMEOUT_INIT; /*TCP_WRITE_TIME*/
sk->mdev = 0;
sk->backoff = 0;
sk->packets_out = 0;
sk->cong_window = 1; /* start with only sending one packet at a time. */
sk->cong_count = 0;
sk->ssthresh = 0;
sk->max_window = 0;
sk->urginline = 0;
sk->intr = 0;
sk->linger = 0;
sk->destroy = 0;
sk->priority = 1;
sk->shutdown = 0;
sk->keepopen = 0;
sk->zapped = 0;
sk->done = 0;
sk->ack_backlog = 0;
sk->window = 0;
sk->bytes_rcv = 0;
sk->state = TCP_CLOSE;
sk->dead = 0;
sk->ack_timed = 0;
sk->partial = NULL;
sk->user_mss = 0;
sk->debug = 0;
sk->max_unacked = 2048;
sk->max_ack_backlog = 0;
sk->inuse = 0;
sk->delay_acks = 0;
skb_queue_head_init(&sk->write_queue);
skb_queue_head_init(&sk->receive_queue);
sk->mtu = 576;
sk->prot = prot;
sk->sleep = sock->wait;
sk->daddr = 0;
sk->saddr = 0 /* ip_my_addr() */;
sk->err = 0;
sk->next = NULL;
sk->pair = NULL;
sk->send_tail = NULL;
sk->send_head = NULL;
sk->timeout = 0;
sk->broadcast = 0;
sk->localroute = 0;
init_timer(&sk->timer);
init_timer(&sk->retransmit_timer);
sk->timer.data = (unsigned long)sk;
sk->timer.function = &net_timer;
sk->timer.expires = 10;
add_timer(&sk->timer);
skb_queue_head_init(&sk->back_log);
sk->blog = 0;
sock->data =(void *) sk;
sk->dummy_th.doff = sizeof(sk->dummy_th)/4;
sk->dummy_th.res1=0;
sk->dummy_th.res2=0;
sk->dummy_th.res2=0;
sk->dummy_th.urg_ptr = 0;
sk->dummy_th.fin = 0;
sk->dummy_th.syn = 0;
sk->dummy_th.rst = 0;
sk->dummy_th.psh = 0;
sk->dummy_th.ack = 0;
sk->dummy_th.urg = 0;
sk->dummy_th.dest = 0;
sk->ip_tos=0;
sk->ip_ttl=64;
sk->ip_mc_loop=1;
sk->ip_mc_ttl=1;
*sk->ip_mc_name=0;
sk->ip_mc_list=NULL;
sk->state_change = def_callback1;
sk->data_ready = def_callback2;
wake_up_interruptible(sk->sleep);
sock_wake_async(sk->socket, 1);
sk->write_space = def_callback3;
sk->error_report = def_callback1;
//Add a socket into the socket tables by number.
put_sock(sk->num, sk);
sk->dummy_th.source = ntohs(sk->num);
err = sk->prot->init(sk);
fd = get_fd(SOCK_INODE(sock); //fd = get_fd(sock->node);
file = get_empty_filp();
static struct file_operations socket_file_ops = {
sock_lseek,
sock_read,
sock_write,
sock_readdir,
sock_select,
sock_ioctl,
NULL, /* mmap */
NULL, /* no special open code... */
sock_close,
NULL, /* no fsync */
sock_fasync
};
file->f_op = &socket_file_ops;
file->f_mode = 3;
file->f_flags = O_RDWR;
file->f_count = 1;
file->f_inode = inode;
inode->i_count++;
file->f_pos = 0;
recvfrom
sys_socketcall
sock_recvfrom
inet_recvfrom
udp_recvfrom
skb_recv_datagram
interruptible_sleep_on(sk->sleep); //阻塞进程进入睡眠
skb_copy_datagram(skb,sizeof(struct udphdr),to,copied);
skb_free_datagram(skb);
sendto
sys_socketcall
sock_sendto
udp_sendto(sk, &sin, from, len, flags);
udp_send(sk, &sin, from, len, flags);
//ip_queue_xmit(sk, dev, skb, 1);
sk->prot->queue_xmit(sk, dev, skb, 1); //queue_xmit = ip_queue_xmit
ip_fw_chk
ip_loopback(dev,skb);
ip_queue_xmit(NULL, dev, newskb, 1);
dev_queue_xmit(skb, dev, SOPRI_NORMAL)
//ei_start_xmit(skb, dev)
dev->hard_start_xmit(skb, dev) //dev->hard_start_xmit = &ei_start_xmit;
ei_start_xmit(skb, dev)
read
sock_read
inet_read
sk->prot->read //udp_read
udp_read
udp_recvfrom
大家结合linux 1.2.13的源码,上面的函数调用流程可以让大家清晰对网络协议栈有一个比较通透的认知。