内核版本:3.4.39
IPv6的分片流程和IPv4基本一致,这一点内核源码作者也说了。流程比较简单,分片的时候判断是否满足快速分片,满足的话直接一个接一个加上分片扩展选项发送出去,不满足的话就只能走慢速分片通道了,这时候需要重新分配每一个skb,然后从原始SKB报文那里复制数据发送出去。
int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
{
struct sk_buff *frag;
struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
struct ipv6hdr *tmp_hdr;
struct frag_hdr *fh;
unsigned int mtu, hlen, left, len;
int hroom, troom;
__be32 frag_id = 0;
int ptr, offset = 0, err=0;
u8 *prevhdr, nexthdr = 0;
struct net *net = dev_net(skb_dst(skb)->dev);
//获取不能分片的扩展选项长度,不是每个扩展选项都能够分片的
hlen = ip6_find_1stfragopt(skb, &prevhdr);
nexthdr = *prevhdr;
//获取mtu大小
mtu = ip6_skb_dst_mtu(skb);
/* We must not fragment if the socket is set to force MTU discovery
* or if the skb it not generated by a local socket.
*/
//检查下是否允许分片
if (!skb->local_df && skb->len > mtu) {
skb->dev = skb_dst(skb)->dev;
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_FRAGFAILS);
kfree_skb(skb);
return -EMSGSIZE;
}
//获取mtu大小
if (np && np->frag_size < mtu) {
if (np->frag_size)
mtu = np->frag_size;
}
//hlen表示每个分片报文都必须携带的扩展选项头
mtu -= hlen + sizeof(struct frag_hdr);
//判断是否存在分片队列,存在的话说明应用层可以已经帮忙分片了,这时候只需要检查
//分片的长度是否合法,合法的话则可以使用快速分片。
if (skb_has_frag_list(skb)) {
int first_len = skb_pagelen(skb);
struct sk_buff *frag2;
//如果第一个报文长度大于MTU或者长度不是8字节的整数倍
//克隆的报文也不能使用快速分片。
if (first_len - hlen > mtu ||
((first_len - hlen) & 7) ||
skb_cloned(skb))
goto slow_path;
//检查分片是否满足快速分片要求
skb_walk_frags(skb, frag) {
/* Correct geometry. */
//首先是长度不能大于MTU
//此外除了最后一个报文其它报文长度必须是8字节整数倍
//当然首部空间不够的话也不行
if (frag->len > mtu ||
((frag->len & 7) && frag->next) ||
skb_headroom(frag) < hlen)
goto slow_path_clean;
/* Partially cloned skb? */
if (skb_shared(frag))
goto slow_path_clean;
BUG_ON(frag->sk);
if (skb->sk) {
frag->sk = skb->sk;
frag->destructor = sock_wfree;
}
//报文要各自为战了,所以从第一个报文那个分出来。
//这么做是因为发送的时候需要将报文所占大小还给系统。
//每个报文返还自己的。
skb->truesize -= frag->truesize;
}
err = 0;
offset = 0;
//将分片从skb链表上挂载到frag上。
frag = skb_shinfo(skb)->frag_list;
skb_frag_list_init(skb);
/* BUILD HEADER */
*prevhdr = NEXTHDR_FRAGMENT;
tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
if (!tmp_hdr) {
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_FRAGFAILS);
return -ENOMEM;
}
//追加一个分片头
__skb_pull(skb, hlen);
fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
__skb_push(skb, hlen);
skb_reset_network_header(skb);
memcpy(skb_network_header(skb), tmp_hdr, hlen);
//选择一个报文ID标识
ipv6_select_ident(fh, rt);
//设置分片扩展选项
fh->nexthdr = nexthdr;
fh->reserved = 0;
fh->frag_off = htons(IP6_MF);
frag_id = fh->identification;
//重新调整长度
first_len = skb_pagelen(skb);
skb->data_len = first_len - skb_headlen(skb);
skb->len = first_len;
ipv6_hdr(skb)->payload_len = htons(first_len -
sizeof(struct ipv6hdr));
//增加路由统计计数
dst_hold(&rt->dst);
for (;;) {
/* Prepare header of the next frame,
* before previous one went down. */
//处理其它分片报文
if (frag) {
frag->ip_summed = CHECKSUM_NONE;
skb_reset_transport_header(frag);
//添加分片扩展选项
fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
__skb_push(frag, hlen);
skb_reset_network_header(frag);
memcpy(skb_network_header(frag), tmp_hdr,
hlen);
//调整选项头
offset += skb->len - hlen - sizeof(struct frag_hdr);
fh->nexthdr = nexthdr;
fh->reserved = 0;
fh->frag_off = htons(offset);
if (frag->next != NULL)
fh->frag_off |= htons(IP6_MF);
fh->identification = frag_id;
ipv6_hdr(frag)->payload_len =
htons(frag->len -
sizeof(struct ipv6hdr));
ip6_copy_metadata(frag, skb);
}
//先将上一个报文发送出去
//分片报文按照处理的顺序发送出去
err = output(skb);
if(!err)
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
IPSTATS_MIB_FRAGCREATES);
if (err || !frag)
break;
skb = frag;
frag = skb->next;
skb->next = NULL;
}
kfree(tmp_hdr);
//分片成功则返回
if (err == 0) {
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
IPSTATS_MIB_FRAGOKS);
dst_release(&rt->dst);
return 0;
}
//走到这里说明发送那里出现了错误,功亏一篑,剩下的报文原地解散回家去吧
while (frag) {
skb = frag->next;
kfree_skb(frag);
frag = skb;
}
//失败的统计计数
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
IPSTATS_MIB_FRAGFAILS);
//释放占用的路由指针
dst_release(&rt->dst);
return err;
slow_path_clean:
skb_walk_frags(skb, frag2) {
if (frag2 == frag)
break;
frag2->sk = NULL;
frag2->destructor = NULL;
skb->truesize += frag2->truesize;
}
}
slow_path:
//慢速通道,需要重新分配skb
//left表示分片报文数据部分总长度
left = skb->len - hlen; /* Space per frame */
ptr = hlen; /* Where to start from */
/*
* Fragment the datagram.
*/
*prevhdr = NEXTHDR_FRAGMENT;
//预留链路层头部长度
hroom = LL_RESERVED_SPACE(rt->dst.dev);
troom = rt->dst.dev->needed_tailroom;
/*
* Keep copying data until we run out.
*/
while(left > 0) {
len = left;
/* IF: it doesn't fit, use 'mtu' - the data space left */
//分片长度最大时mtu
if (len > mtu)
len = mtu;
/* IF: we are not sending up to and including the packet end
then align the next start on an eight byte boundary */
//除了最后一个分片报文,其它报文长度必须是8字节整数倍
if (len < left) {
len &= ~7;
}
/*
* Allocate buffer.
*/
//分配一个SKB,果然是慢速,快速已经分配好了直接发送就可以了。
if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
hroom + troom, GFP_ATOMIC)) == NULL) {
NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_FRAGFAILS);
err = -ENOMEM;
goto fail;
}
/*
* Set up data on packet
*/
//重新分配数据
ip6_copy_metadata(frag, skb);
skb_reserve(frag, hroom);
skb_put(frag, len + hlen + sizeof(struct frag_hdr));
skb_reset_network_header(frag);
fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
frag->transport_header = (frag->network_header + hlen +
sizeof(struct frag_hdr));
/*
* Charge the memory for the fragment to any owner
* it might possess
*/
if (skb->sk)
skb_set_owner_w(frag, skb->sk);
/*
* Copy the packet header into the new buffer.
*/
skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
/*
* Build fragment header.
*/
//配置分片选项头
fh->nexthdr = nexthdr;
fh->reserved = 0;
if (!frag_id) {
ipv6_select_ident(fh, rt);
frag_id = fh->identification;
} else
fh->identification = frag_id;
/*
* Copy a block of the IP datagram.
*/
//复制其它数据
if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
BUG();
left -= len;
//设置分片选项头
fh->frag_off = htons(offset);
if (left > 0)
fh->frag_off |= htons(IP6_MF);
ipv6_hdr(frag)->payload_len = htons(frag->len -
sizeof(struct ipv6hdr));
ptr += len;
offset += len;
/*
* Put this fragment into the sending queue.
*/
//发送报文
err = output(frag);
if (err)
goto fail;
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_FRAGCREATES);
}
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_FRAGOKS);
kfree_skb(skb);
return err;
fail:
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_FRAGFAILS);
kfree_skb(skb);
return err;
}