linux bridge - mac & vlan forward
https://www.jianshu.com/p/cd1b713b798d
这篇文档主要介绍一下bridge的vlan功能如何使用和生效。
如果bridge要支持vlan filter,需要满足如下条件
a. 打开kernel编译选项:CONFIG_BRIDGE_VLAN_FILTERING
b. 打开 vlan enable,比如打开网桥br1的vlan filter功能: echo 1 > /sys/class/net/br1/bridge/vlan_filtering
使能vlan filter后,只能通过bridge命令查看端口的vlan和fdb转发表
root@node2:~# bridge vlan
port vlan ids
br1 1 PVID Egress Untagged
vetha 1 PVID Egress Untagged
vethx 1 PVID Egress Untagged
root@node2:~# bridge fdb list br br1
33:33:00:00:00:01 dev br1 self permanent
66:e6:6f:a8:d4:97 dev vetha master br1 permanent
66:e6:6f:a8:d4:97 dev vetha vlan 10 master br1 permanent
66:e6:6f:a8:d4:97 dev vetha vlan 1 master br1 permanent
33:33:00:00:00:01 dev vetha self permanent
01:00:5e:00:00:01 dev vetha self permanent
12:27:96:8c:f4:58 dev vethx vlan 10 master br1 permanent
12:27:96:8c:f4:58 dev vethx master br1 permanent
12:27:96:8c:f4:58 dev vethx vlan 1 master br1 permanent
33:33:00:00:00:01 dev vethx self permanent
01:00:5e:00:00:01 dev vethx self permanent
给网桥和端口添加vlan的区别
#给端口添加vlan时,可指定master或者不指定,kernel 端会取出vetha的master设备(即网桥),
#调用网桥的 ndo_bridge_setlink 给端口添加vlan
bridge vlan add vid 10 dev vetha untagged pvid master
bridge vlan add vid 10 dev vetha untagged pvid
#给网桥添加vlan时,必须指定self,kernel端会调用网桥的ndo_bridge_setlink给网桥添加vlan
bridge vlan add vid 13 dev br1 untagged pvid self
关于两个参数: untagged pvid
untagged: 如果指定了此参数,则报文从此端口发出时,vlan会被剥掉。如果不指定,则报文会携带vlan发出去。
pvid:如果指定了此参数,则此端口收到不带vlan报文时,则会给报文添加pvid。如果不指定,则会给报文添加默认pvid 1。如果连pvid都没有,则收到不带vlan报文时,会被drop掉
接收报文处理
如果vlan filter功能没使能,则始终允许报文通过。
如果vlan filter功能使能了,需要根据报文是否携带vlan进行不同处理:
如果报文带vlan,则判断此vlan是否在vlan_bitmap中,如果存在,则返回true,如果不存在,则返回flase,表示不允许此报文通过。
如果报文不带vlan,将pvid赋给skb(如果pvid也不存在,则drop此报文),然后判断此vlan是否在vlan_bitmap中,如果存在,则返回true,如果不存在,则返回flase,表示不允许此报文通过。
发送报文处理
使能vlan filter功能后,报文在网桥内部转发过程中始终携带vlan,
如果要转发出端口时,会判断出端口是否允许此vlan的报文通过。
如果允许报文从此端口发出去,再根据untagged判断是否需要将vlan去掉。
实践部分
#创建网桥br1
brctl addbr br1
#使用网桥的vlan filter功能
echo 1 > /sys/class/net/br1/bridge/vlan_filtering
#添加两个namespace
ip netns add test1
ip netns add test2
#创建一对veth端口: vetha和vethb
ip link add vetha type veth peer vethb
#将vethb添加到ns test1,并设置ip 1.1.1.10
ip link set dev vethb netns test1
ip netns exec test1 ip link set dev vethb up
ip netns exec test1 ip address add dev vethb 1.1.1.10/24
将vetha添加到bridge br1
ip link set dev vetha up
brctl addif br1 vetha
#再创建一对veth端口: vethx和vethy
ip link add vethx type veth peer vethy
#将vethx添加到ns test2,并设置ip 1.1.1.11
ip link set dev vethy netns test2
ip netns exec test2 ip link set dev vethy up
ip netns exec test2 ip address add dev vethy 1.1.1.11/24
将vethx添加到bridge br1
ip link set dev vethx up
brctl addif br1 vethx
场景1 默认情况下,端口和网桥都有一个默认vlan 1,并且是pvid,和untagged模式。报文可以互通ping通
root@node2:~# bridge vlan
port vlan ids
br1 1 Egress Untagged
vetha 1 PVID Egress Untagged
vethx 1 PVID Egress Untagged
root@node2:~# ip netns exec test1 ping 1.1.1.11
PING 1.1.1.11 (1.1.1.11) 56(84) bytes of data.
64 bytes from 1.1.1.11: icmp_seq=1 ttl=64 time=0.142 ms
^C
--- 1.1.1.11 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.142/0.142/0.142/0.000 ms
场景2 去掉端口vetha的pvid和untagged参数,因为vetha端口没有了pvid,所以端口vetha收到报文后就被drop掉了
bridge vlan add vid 1 dev vetha
root@node2:~# bridge vlan
port vlan ids
br1 1 Egress Untagged
vetha 1
vethx 1 PVID Egress Untagged
root@node2:~# ip netns exec test1 ping 1.1.1.11
PING 1.1.1.11 (1.1.1.11) 56(84) bytes of data.
^C
--- 1.1.1.11 ping statistics ---
8 packets transmitted, 0 received, 100% packet loss, time 7151ms
场景3 只去掉端口 vethx 的 untagged 参数,报文从vethx发出去时,报文还携带vlan
root@node2:~# bridge vlan add vid 1 dev vetha pvid
root@node2:~# bridge vlan
port vlan ids
br1 1 Egress Untagged
vetha 1 PVID Egress Untagged
vethx 1 PVID
root@node2:~# ip netns exec test1 ping 1.1.1.11 -c1
PING 1.1.1.11 (1.1.1.11) 56(84) bytes of data.
^C
--- 1.1.1.11 ping statistics ---
1 packets transmitted, 0 received, 100% packet loss, time 0ms
#在test2 ns抓包,可看到报文携带vlan 1
root@node2:~# ip netns exec test2 tcpdump -vne -i vethy
tcpdump: listening on vethy, link-type EN10MB (Ethernet), capture size 262144 bytes
^C21:00:11.558978 4e:b4:a4:4e:a7:96 > ff:ff:ff:ff:ff:ff, ethertype 802.1Q (0x8100), length 46: vlan 1, p 0, ethertype ARP, Ethernet (len 6), IPv4 (len 4), Request who-has 1.1.1.11 tell 1.1.1.10, length 28
21:00:12.568679 4e:b4:a4:4e:a7:96 > ff:ff:ff:ff:ff:ff, ethertype 802.1Q (0x8100), length 46: vlan 1, p 0, ethertype ARP, Ethernet (len 6), IPv4 (len 4), Request who-has 1.1.1.11 tell 1.1.1.10, length 28
21:00:13.592686 4e:b4:a4:4e:a7:96 > ff:ff:ff:ff:ff:ff, ethertype 802.1Q (0x8100), length 46: vlan 1, p 0, ethertype ARP, Ethernet (len 6), IPv4 (len 4), Request who-has 1.1.1.11 tell 1.1.1.10, length 28
场景4 将默认vlan 1删除,创建新的vlan 10
bridge vlan del vid 1 dev vetha
bridge vlan del vid 1 dev vethx
bridge vlan add vid 10 dev vetha pvid untagged
bridge vlan add vid 10 dev vethx pvid untagged
root@node2:~# bridge vlan
port vlan ids
br1 1 Egress Untagged
vetha 10 PVID Egress Untagged
vethx 10 PVID Egress Untagged
//互相ping是可以通的,可以通过删除vethx 的Untagged标签,验证不通的情况
root@node2:~# ip netns exec test1 ping 1.1.1.11
PING 1.1.1.11 (1.1.1.11) 56(84) bytes of data.
64 bytes from 1.1.1.11: icmp_seq=1 ttl=64 time=0.139 ms
^C
--- 1.1.1.11 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.139/0.139/0.139/0.000 ms
vlan相关代码初始化
创建网桥设备时,会调用 br_dev_init->br_vlan_init,设置 vlan_proto 和默认pvid,并将pvid和网桥mac添加到fdb中。
int br_vlan_init(struct net_bridge *br)
{
br->vlan_proto = htons(ETH_P_8021Q);
br->default_pvid = 1;
return br_vlan_add(br, 1, BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED);
}
将接口添加到网桥上时,会调用 br_add_if->nbp_vlan_init,将网桥的pvid和接口mac地址添加到fdb中。
int nbp_vlan_init(struct net_bridge_port *p)
{
return p->br->default_pvid ?
nbp_vlan_add(p, p->br->default_pvid, BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED) : 0;
}
添加vlan流程
通过bridge命令给端口添加vlan时
bridge vlan add vid 1 dev vetha pvid
命令行端代码
static int vlan_modify(int cmd, int argc, char **argv)
struct {
struct nlmsghdr n;
struct ifinfomsg ifm;
char buf[1024];
} req = {
.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
.n.nlmsg_flags = NLM_F_REQUEST,
.n.nlmsg_type = cmd,
.ifm.ifi_family = PF_BRIDGE,
};
#如果指定了 self
flags |= BRIDGE_FLAGS_SELF;
#如果指定了 master
flags |= BRIDGE_FLAGS_MASTER;
#如果指定了vlan范围
vid = atoi(*argv);
vid_end = atoi(p);
vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_BEGIN;
#如果指定了 pvid
vinfo.flags |= BRIDGE_VLAN_INFO_PVID;
#如果指定了 untagged
vinfo.flags |= BRIDGE_VLAN_INFO_UNTAGGED;
afspec = addattr_nest(&req.n, sizeof(req), IFLA_AF_SPEC);
if (flags)
addattr16(&req.n, sizeof(req), IFLA_BRIDGE_FLAGS, flags);
add_vlan_info_range(&req.n, sizeof(req), vid, vid_end, vinfo.flags);
kernel端代码流程
static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh)
//根据 ifi_index 获取 dev
dev = __dev_get_by_index(net, ifm->ifi_index);
//获取 flags
br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
if (br_spec) {
nla_for_each_nested(attr, br_spec, rem) {
//获取 flags
if (nla_type(attr) == IFLA_BRIDGE_FLAGS) {
if (nla_len(attr) < sizeof(flags))
return -EINVAL;
have_flags = true;
flags = nla_get_u16(attr);
break;
//如果flags为空或者flags包含标志BRIDGE_FLAGS_MASTER,则使用网桥设备的ndo_bridge_setlink
//这个流程主要是给端口添加vlan
if (!flags || (flags & BRIDGE_FLAGS_MASTER)) {
struct net_device *br_dev = netdev_master_upper_dev_get(dev);
if (!br_dev || !br_dev->netdev_ops->ndo_bridge_setlink) {
err = -EOPNOTSUPP;
goto out;
}
//调用 ndo_bridge_setlink,对于网桥来说,就是 br_setlink
err = br_dev->netdev_ops->ndo_bridge_setlink(dev, nlh);
if (err)
goto out;
flags &= ~BRIDGE_FLAGS_MASTER;
}
//如果flags指定了BRIDGE_FLAGS_SELF,则使用dev本身的ndo_bridge_setlink,
//但是支持 ndo_bridge_setlink 的dev比较少,从代码看,只有bridge和ixgbe支持。
//这个flag主要是为了给网桥添加vlan
if ((flags & BRIDGE_FLAGS_SELF)) {
if (!dev->netdev_ops->ndo_bridge_setlink)
err = -EOPNOTSUPP;
else
err = dev->netdev_ops->ndo_bridge_setlink(dev, nlh);
if (!err)
flags &= ~BRIDGE_FLAGS_SELF;
}
int br_setlink(struct net_device *dev, struct nlmsghdr *nlh)
afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
if (afspec) {
br_afspec((struct net_bridge *)netdev_priv(dev), p, afspec, RTM_SETLINK);
switch (cmd) {
case RTM_SETLINK:
//在端口上添加vlan
if (p) {
err = nbp_vlan_add(p, vinfo->vid, vinfo->flags);
if (err)
break;
//如果指定了master,也要将vlan添加到网桥上(但是从iproute2代码看,没有设置BRIDGE_VLAN_INFO_MASTER)
if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER)
err = br_vlan_add(p->br, vinfo->vid,
vinfo->flags);
} else
//在网桥上添加vlan
err = br_vlan_add(br, vinfo->vid, vinfo->flags)