5 虚拟网桥
Linux下的Bridge也是一种虚拟设备,这多少和vlan有点相似,它依赖于一个或多个从设备。与VLAN不同的是,它不是虚拟出和从设备同一层次的镜像设备,而是虚拟出一个高一层次的设备,并把从设备虚拟化为端口port,且同时处理各个从设备的数据收发及转发,再加上netfilter框架的一些东西,使得它的实现相比vlan复杂得多。
linux下配置网桥的命令:
brctl addbr br0 /* 创建虚拟网桥br0 */
brctl addif br0 eth0 /* 把物理设备eth0虚拟成网桥的端口 */
brctl addif br0 eth1 /* 把物理设备eth1虚拟成网桥的端口 */
ifconfig br0 192.168.1.1 /* 配置虚拟网桥设备的IP */
创建虚拟网桥
brctl addbr br0
/* 用户空间brctl程序 */
/* @brname = "br0" */
int br_add_bridge(const char *brname)
{
ioctl(br_socket_fd, SIOCBRADDBR, brname);
}
内核初始化时,注册了回调函数br_ioctl_deviceless_stub()
,用户空间的ioctl最终调用到该函数。
static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
static int __init br_init(void)
{
/* 设置br_ioctl_hook = br_ioctl_deviceless_stub */
brioctl_set(br_ioctl_deviceless_stub);
}
static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
{
struct socket *sock;
struct sock *sk;
void __user *argp = (void __user *)arg;
int pid, err;
struct net *net;
switch (cmd) {
case SIOCBRADDBR:
case SIOCBRDELBR:
if (!br_ioctl_hook)
request_module("bridge"); /* 加载模块 */
if (br_ioctl_hook)
err = br_ioctl_hook(net, cmd, argp); /* br_ioctl_deviceless_stub() */
}
}
static const struct file_operations socket_file_ops = {
.unlocked_ioctl = sock_ioctl,
};
br_ioctl_deviceless_stub()
根据用户空间的命令,执行相应的内核操作。如果是增加一个虚拟网桥,则调用br_add_bridge()
。
int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *uarg)
{
switch (cmd) {
case SIOCBRADDBR:
case SIOCBRDELBR:
{
char buf[IFNAMSIZ];
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
if (copy_from_user(buf, uarg, IFNAMSIZ))
return -EFAULT;
buf[IFNAMSIZ-1] = 0;
if (cmd == SIOCBRADDBR)
return br_add_bridge(net, buf);
return br_del_bridge(net, buf);
}
}
return -EOPNOTSUPP;
}
br_add_bridge()
完成网桥设备的动态创建、初始化,并添加到系统中。参数@name
就是用户空间命令brctl addbr br0的网桥名称“br0”。
int br_add_bridge(struct net *net, const char *name)
{
struct net_device *dev;
int res;
dev = alloc_netdev(sizeof(struct net_bridge), name, NET_NAME_UNKNOWN,
br_dev_setup);
dev_net_set(dev, net);
dev->rtnl_link_ops = &br_link_ops;
res = register_netdev(dev);
return res;
}
#define alloc_netdev(sizeof_priv, name, name_assign_type, setup) \
alloc_netdev_mqs(sizeof_priv, name, name_assign_type, setup, 1, 1)
br_dev_setup()
初始化net_bridge
与net_devcie
结构体,以及虚拟网桥的组播、生成树协议的参数。
void br_dev_setup(struct net_device *dev)
{
struct net_bridge *br = netdev_priv(dev); /*因为net_device分配的内存是在
net_bridge后面,且是连续的内存空间,所以通过dev可以获得br的起始地址 */
eth_hw_addr_random(dev); /* dev.dev_addr设置成随机本地单播mac地址 */
ether_setup(dev); /* 设置以太网属性,包括mtu,包头长度,广播地址等等。
dev->priv_flags = IFF_XMIT_DST_RELEASE |
IFF_XMIT_DST_RELEASE_PERM |
IFF_TX_SKB_SHARING*/
dev->netdev_ops = &br_netdev_ops; /* 挂接网桥的驱动程序 */
dev->destructor = br_dev_free;
dev->ethtool_ops = &br_ethtool_ops;
SET_NETDEV_DEVTYPE(dev, &br_type);
dev->priv_flags = IFF_EBRIDGE | IFF_NO_QUEUE;
dev->features = COMMON_FEATURES | NETIF_F_LLTX | NETIF_F_NETNS_LOCAL |
NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
dev->hw_features = COMMON_FEATURES | NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_STAG_TX;
dev->vlan_features = COMMON_FEATURES;
br->dev = dev; /* struct net_bridge的成员dev指向struct net_device */
spin_lock_init(&br->lock);
INIT_LIST_HEAD(&br->port_list);
spin_lock_init(&br->hash_lock);
br->bridge_id.prio[0] = 0x80;
br->bridge_id.prio[1] = 0x00;
/* 设置组播地址 = {0x01, 0x80, 0xc2, 0x00, 0x00, 0x00} */
ether_addr_copy(br->group_addr, eth_reserved_addr_base);
/* 设置生成树参数 */
br->stp_enabled = BR_NO_STP;
br->group_fwd_mask = BR_GROUPFWD_DEFAULT;
br->group_fwd_mask_required = BR_GROUPFWD_DEFAULT;
br->designated_root = br->bridge_id;
br->bridge_max_age = br->max_age = 20 * HZ;
br->bridge_hello_time = br->hello_time = 2 * HZ;
br->bridge_forward_delay = br->forward_delay = 15 * HZ;
br->ageing_time = BR_DEFAULT_AGEING_TIME;
br_netfilter_rtable_init(br); /* netfilter */
br_stp_timer_init(br); /* 设备生成树定时器的回调函数 */
br_multicast_init(br); /* 设置组播参数,及组播定时器的回调函数 */
}
br_dev_init()
初始化vid1,创建本机mac地址转发表。
static int br_dev_init(struct net_device *dev)
{
struct net_bridge *br = netdev_priv(dev);
br_vlan_init(br);
}
int br_vlan_init(struct net_bridge *br)
{
br->vlan_proto = htons(ETH_P_8021Q); /* vlan协议标识符0x8100 */
br->default_pvid = 1; /* 默认pvid = 1 */
return br_vlan_add(br, 1,
BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED);
}
int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags)
{
struct net_port_vlans *pv = NULL;
pv = kzalloc(sizeof(*pv), GFP_KERNEL);
pv->parent.br = br; /* 配置vlan所属网桥,即vlan对哪个网桥生效 */
__vlan_add(pv, vid, flags);
}
static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags)
{
struct net_bridge_port *p = NULL;
struct net_bridge *br;
struct net_device *dev;
int err;
if (test_bit(vid, v->vlan_bitmap)) { /* 要添加的vlan,是否已经存在了 */
__vlan_add_flags(v, vid, flags); /* 如果vlan已经存在,则根据br_vlan_add()
的标志位参数,设置或删除pvid,设置或删除untagged_bitmap[] */
return 0;
}
/* 如果是新添加的vlan则继续往下执行 */
if (v->port_idx) {
p = v->parent.port;
br = p->br;
dev = p->dev;
} else {
br = v->parent.br;
dev = br->dev;
}
if (p) {
/* Add VLAN to the device filter if it is supported.
* This ensures tagged traffic enters the bridge when
* promiscuous mode is disabled by br_manage_promisc().
*/
vlan_vid_add(dev, br->vlan_proto, vid);
}
/* 添加设备的mac地址到桥转发表 */
br_fdb_insert(br, p, dev->dev_addr, vid);
set_bit(vid, v->vlan_bitmap);
v->num_vlans++;
__vlan_add_flags(v, vid, flags);
return 0;
out_filt:
if (p)
vlan_vid_del(dev, br->vlan_proto, vid);
return err;
}
int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
const unsigned char *addr, u16 vid)
{
fdb_insert(br, source, addr, vid);
}
static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
const unsigned char *addr, u16 vid)
{
/* 通过mac地址与vid组合,计算hash键值 */
struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
struct net_bridge_fdb_entry *fdb;
/* 全0 mac地址,或者多播地址不应该加入转发表 */
if (!is_valid_ether_addr(addr))
return -EINVAL;
fdb = fdb_find(head, addr, vid); /* br.hash[]表保存mac转发表,遍历此表,返回
匹配mac地址与vid的fdb结构体 */
/* 如果mac地址、vid组合已经存在,且属于本端口,则直接返回。
如果已经存在,但不属于本端口,说明发生了地址迁移,则刷新fdb:先删除fdb,再添加 */
if (fdb) {
if (fdb->is_local)
return 0;
fdb_delete(br, fdb);
}
/* 如果fdb不存在要添加的mac+vid表项,则创建fdb,并添加到br->hash[]链表里。
这里创建了设备自身的mac+vid1表项。 */
fdb = fdb_create(head, source, addr, vid);
fdb->is_local = fdb->is_static = 1; /* 把表项设置为本地、静态(不被老化) */
fdb_add_hw_addr(br, addr);
fdb_notify(br, fdb, RTM_NEWNEIGH); /* 创建netlink消息并发送 */
return 0;
}
图中dev
没有指向br
的指针,那怎么从dev
获取到br
呢?因为dev
与br
是同时申请的连续内存空间,所以通过dev
的指针+dev
的size,就可以获得br
的指针了。
添加网桥端口
brctl addif br0 eth0
/* 用户空间brctl程序 */
/* @bridge = "br0", @dev = "eth0" */
int br_add_interface(const char *bridge, const char *dev)
{
struct ifreq ifr;
int ifindex = if_nametoindex(dev);
if (ifindex == 0)
return ENODEV;
strncpy(ifr.ifr_name, bridge, IFNAMSIZ);
ifr.ifr_ifindex = ifindex;
ioctl(br_socket_fd, SIOCBRADDIF, &ifr);
}
创建的网桥设备“br0”已经挂接了驱动程序br_netdev_ops
,所以对“br0”进行添加端口从设备时,会调用到br_netdev_ops->ndo_do_ioctl()
,即br_dev_ioctl()
。
/* @rq.ifr_name = "br0", @rq.ifr_ifindex = "eth0转化的ifindex" */
int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
{
struct net_bridge *br = netdev_priv(dev);
switch (cmd) {
case SIOCBRADDIF:
case SIOCBRDELIF:
return add_del_if(br, rq->ifr_ifindex, cmd == SIOCBRADDIF);
}
}
static int add_del_if(struct net_bridge *br, int ifindex, int isadd)
{
struct net *net = dev_net(br->dev);
struct net_device *dev;
int ret;
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
/* 匹配@ifindex:在创建网桥时,网桥生成了一个ifindex,并通过list_netdevice()把
net->dev_index_head[].first指向dev.index_hlist,这样通过net就获取到了
dev.ifindex */
dev = __dev_get_by_index(net, ifindex);
if (isadd)
br_add_if(br, dev);
else
br_del_if(br, dev);
}
int br_add_if(struct net_bridge *br, struct net_device *dev)
{
if ((dev->flags & IFF_LOOPBACK) ||
dev->type != ARPHRD_ETHER || dev->addr_len != ETH_ALEN ||
!is_valid_ether_addr(dev->dev_addr) ||
netdev_uses_dsa(dev))
return -EINVAL;
/* 虚拟网桥不能当做端口加入到另一个虚拟网桥中,即虚拟网桥不能桥接虚拟网桥,会出现回
环。因为虚拟网桥的dev->netdev_ops = &br_netdev_ops,所以可作为判断依据 */
if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit)
return -ELOOP;
/* 判断设备是否已经加入到其他网桥里了,判断依据dev->priv_flags & IFF_BRIDGE_PORT
*/
if (br_port_exists(dev))
return -EBUSY;
/* 某些设备是不能桥接的 */
if (dev->priv_flags & IFF_DONT_BRIDGE)
return -EOPNOTSUPP;
p = new_nbp(br, dev); /* 创建网桥端口数据并初始化 */
}
static struct net_bridge_port *new_nbp(struct net_bridge *br,
struct net_device *dev)
{
int index;
struct net_bridge_port *p;
index = find_portno(br); /* 获取最小可用端口号 */
if (index < 0)
return ERR_PTR(index);
p = kzalloc(sizeof(*p), GFP_KERNEL);
if (p == NULL)
return ERR_PTR(-ENOMEM);
p->br = br;
dev_hold(dev);
p->dev = dev;
p->path_cost = port_cost(dev);
p->priority = 0x8000 >> BR_PORT_BITS;
p->port_no = index;
p->flags = BR_LEARNING | BR_FLOOD;
br_init_port(p);
br_set_state(p, BR_STATE_DISABLED);
br_stp_port_timer_init(p);
br_multicast_add_port(p);
return p;
}