在容器里面创建宿主机级别可操作的ns,在容器内部可正常使用,但是宿主机上会提示该错误。
正确方式,在host级别创建该ns,在daemonset容器内部去用即可
实际上我的pod的daemonset(kube-ovn-cni),而且是host network 模式
如下代码在宿主机命名空间上,操作ns是没有问题的。
package main
import (
"fmt"
"log"
"os"
"path"
"github.com/containernetworking/plugins/pkg/ns"
"golang.org/x/sys/unix"
)
const (
bindMountPath = "/run/netns" /* Bind mount path for named netns */
)
// NsHandle is a handle to a network namespace. It can be cast directly
// to an int and used as a file descriptor.
type NsHandle int
// None gets an empty (closed) NsHandle.
func ClosedNs() NsHandle {
return NsHandle(-1)
}
// GetFromPath gets a handle to a network namespace
// identified by the path
func GetNsFromPath(path string) (NsHandle, error) {
fd, err := unix.Open(path, unix.O_RDONLY|unix.O_CLOEXEC, 0)
if err != nil {
return -1, err
}
return NsHandle(fd), nil
}
// GetFromThread gets a handle to the network namespace of a given pid and tid.
func GetNsFromThread(pid, tid int) (NsHandle, error) {
return GetNsFromPath(fmt.Sprintf("/proc/%d/task/%d/ns/net", pid, tid))
}
// Get gets a handle to the current threads network namespace.
func GetNs() (NsHandle, error) {
return GetNsFromThread(os.Getpid(), unix.Gettid())
}
// New creates a new network namespace, sets it as current and returns
// a handle to it.
func newNs() (ns NsHandle, err error) {
if err := unix.Unshare(unix.CLONE_NEWNET); err != nil {
return -1, err
}
return GetNs()
}
// NewNamed creates a new named network namespace, sets it as current,
// and returns a handle to it
func NewNamedNs(name string) (NsHandle, error) {
if _, err := os.Stat(bindMountPath); os.IsNotExist(err) {
err = os.MkdirAll(bindMountPath, 0755)
if err != nil {
return ClosedNs(), err
}
}
newNs, err := newNs()
if err != nil {
return ClosedNs(), err
}
namedPath := path.Join(bindMountPath, name)
f, err := os.OpenFile(namedPath, os.O_CREATE|os.O_EXCL, 0444)
if err != nil {
return ClosedNs(), err
}
f.Close()
nsPath := fmt.Sprintf("/proc/%d/task/%d/ns/net", os.Getpid(), unix.Gettid())
err = unix.Mount(nsPath, namedPath, "bind", unix.MS_BIND, "")
if err != nil {
return ClosedNs(), err
}
return newNs, nil
}
// DeleteNamed deletes a named network namespace
func DeleteNamedNs(name string) error {
namedPath := path.Join(bindMountPath, name)
err := unix.Unmount(namedPath, unix.MNT_DETACH)
if err != nil {
return err
}
return os.Remove(namedPath)
}
// GetFromName gets a handle to a named network namespace such as one
// created by `ip netns add`.
func GetNsFromName(name string) (NsHandle, error) {
return GetNsFromPath(fmt.Sprintf("/var/run/netns/%s", name))
}
const (
NodeGwNic = "ovnext0"
NodeGwNs = "ovnext"
NodeGwNsPath = "/var/run/netns/ovnext"
)
func Error(e error) {
if e != nil {
log.Fatalln(e)
}
}
func SetupNetNamespace() *NsHandle {
// runtime.LockOSThread()
// defer runtime.UnlockOSThread()
log.Println("SetupNetNamespace...running")
// _, err := GetNsFromName(NSName)
// if err == nil {
// log.Printf("%s net ns is exists. Delete netns %s\n", NSName, NSName)
// }
// newns, err := NewNamedNs(NSName)
// Error(err)
// log.Println("SetupNetNamespace...done")
// create netns
if _, err := GetNsFromName(NodeGwNs); err != nil {
// net ns not exist
if _, err := NewNamedNs(NodeGwNs); err != nil {
Error(fmt.Errorf("failed to create node gw ns %q, %v", NodeGwNs, err))
}
}
podNS, err := ns.GetNS(NodeGwNsPath)
Error(err)
log.Println("PodNs: ", podNS)
ns, err := GetNsFromName(NodeGwNs)
if err != nil {
Error(fmt.Errorf("failed to locate node gw ns %q, %v", NodeGwNs, err))
}
log.Println("SetupNetNamespace...done")
return &ns
}
func main() {
ns := SetupNetNamespace()
log.Println("added ns: ", ns)
}
但在pod 或者容器内部应该都不行,即使是hostnetwork 模式也不行
package daemon
import (
"fmt"
"os"
"path"
"golang.org/x/sys/unix"
)
const (
bindMountPath = "/run/netns" /* Bind mount path for named netns */
)
// NsHandle is a handle to a network namespace. It can be cast directly
// to an int and used as a file descriptor.
type NsHandle int
// None gets an empty (closed) NsHandle.
func ClosedNs() NsHandle {
return NsHandle(-1)
}
// GetFromPath gets a handle to a network namespace
// identified by the path
func GetNsFromPath(path string) (NsHandle, error) {
fd, err := unix.Open(path, unix.O_RDONLY|unix.O_CLOEXEC, 0)
if err != nil {
return -1, err
}
return NsHandle(fd), nil
}
// GetFromThread gets a handle to the network namespace of a given pid and tid.
func GetNsFromThread(pid, tid int) (NsHandle, error) {
return GetNsFromPath(fmt.Sprintf("/proc/%d/task/%d/ns/net", pid, tid))
}
// Get gets a handle to the current threads network namespace.
func GetNs() (NsHandle, error) {
return GetNsFromThread(os.Getpid(), unix.Gettid())
}
// New creates a new network namespace, sets it as current and returns
// a handle to it.
func newNs() (ns NsHandle, err error) {
if err := unix.Unshare(unix.CLONE_NEWNET); err != nil {
return -1, err
}
return GetNs()
}
// NewNamed creates a new named network namespace, sets it as current,
// and returns a handle to it
func NewNamedNs(name string) (NsHandle, error) {
if _, err := os.Stat(bindMountPath); os.IsNotExist(err) {
err = os.MkdirAll(bindMountPath, 0755)
if err != nil {
return ClosedNs(), err
}
}
newNs, err := newNs()
if err != nil {
return ClosedNs(), err
}
namedPath := path.Join(bindMountPath, name)
f, err := os.OpenFile(namedPath, os.O_CREATE|os.O_EXCL, 0444)
if err != nil {
return ClosedNs(), err
}
f.Close()
nsPath := fmt.Sprintf("/proc/%d/task/%d/ns/net", os.Getpid(), unix.Gettid())
err = unix.Mount(nsPath, namedPath, "bind", unix.MS_BIND, "")
if err != nil {
return ClosedNs(), err
}
return newNs, nil
}
// DeleteNamed deletes a named network namespace
func DeleteNamedNs(name string) error {
namedPath := path.Join(bindMountPath, name)
err := unix.Unmount(namedPath, unix.MNT_DETACH)
if err != nil {
return err
}
return os.Remove(namedPath)
}
// GetFromName gets a handle to a named network namespace such as one
// created by `ip netns add`.
func GetNsFromName(name string) (NsHandle, error) {
return GetNsFromPath(fmt.Sprintf("/var/run/netns/%s", name))
}
参考: https://serverfault.com/questions/961504/cannot-create-nested-network-namespace
参考项目: https://github.com/vishvananda/netns
但是neutron ovn metadata 就是host network 模式的docker内部创建的ns,可能跟容器的其他ns相关的配置还有点关系,可以再分析下kolla neutron metadata相关的docker build内容或者挂载目录。
[root@compute028 ~]# ip netns
ovnmeta-8569c79c-4184-406a-ad55-19bf6d47d549 (id: 0)
ovnmeta-1e8d676f-5d0a-4371-aa3d-a0029598e27a (id: 2)
ovnmeta-fcfd0f86-989f-47c3-9830-cd40e75ba873 (id: 1)
ovnmeta-b052976f-29e0-4a3c-ad66-fa23b058427b (id: 10)
[root@compute028 ~]#
[root@compute028 ~]#
[root@compute028 ~]# docker ps -a | grep neutron
f8d24cbde993 registry.yealinkops.com/third_party/kolla/centos-source-neutron-metadata-agent:victoria "dumb-init --single-…" 11 months ago Up 8 weeks (healthy) neutron_ovn_metadata_agent
[root@compute028 ~]#
实际上这个问题的原因应该和容器bind的模式有关系,具体原因如下:
Shared subtrees 技术
内核特性,用于控制某个挂载点下的子挂载点是否"传播"给其他挂载点,只应用于 bind mount 和 mount namespace 场景中。
个人理解应该是不同的ns,挂载了同一目录,是否将不同ns下的操作的结果,同步到对端ns(peer group)下面。
Shared subtrees 技术引入了两个概念,分别是 peer group 和 propagation type,接下来一一介绍。
2.1 peer group
共享挂载信息的一组挂载点,来源主要两种:
bind mount,此时源和目的挂载点属于同一 peer group,要求源也是挂载点。
新的 namespace 创建,新的 namespace 会拷贝旧的一份挂载信息,于是,新旧中相同挂载点属于同一 peer group。
2.2 propagation type
每个挂载点都有这样的一个元数据(propagation type),用于控制当一个挂载点的下面创建和移除挂载点的时候,是否会传播到属于相同peer group的其他挂载点下去,主要有三种:
MS_SHARED: 挂载信息在同一个 peer group 里会相互传播。比如把节点上的主目录挂载到容器内的 /rootfs,如果节点上的主目录创建了新的挂载点X,则* 在容器内的 /rootfs 下面也会出现新的挂载点 /rootfs/X。
MS_PRIVATE:挂载信息在同一个 peer group 里不会相互传播。比如把节点上的主目录挂载到容器内的 /rootfs,如果节点上的主目录创建了新的挂载点X,则容器内的 /rootfs 下面不会出现新的挂载点 /rootfs/X。
MS_SLAVE:挂载信息传播是单向的。比如把节点上的主目录挂载到容器内的 /rootfs,如果节点上的主目录创建了新的挂载点 X,则在容器内的 /rootfs 下面也会出现新的挂载点 /rootfs/X ,反之则不行。
这个对应到 k8s 中 Container.volumeMounts 的 mountPropagation 字段,分别是:Bidirectional、None、HostToContainer。
参考文档: https://imroc.cc/k8s/troubleshooting/mount-root-result-device-or-resource-busy/
解决方式
# ...
volumeMounts:
- mountPath: /rootfs
name: host-rootfs
mountPropagation: HostToContainer # 这里显示声明mountPropagation为HostToContainer 或者 Bidirectional
下面证明该bind模式的影响
[root@compute029 ~]# docker inspect neutron_ovn_metadata_agent
# 可以看到 neutron_ovn_metadata_agent 的模式为 shared
{
"Type": "bind",
"Source": "/run/netns",
"Destination": "/run/netns",
"Mode": "shared",
"RW": true,
"Propagation": "shared"
},
# 也就是说容器内部创建的ns 会自动同步到peer group的另一端,简单来说就是在容器外部也就是node上可以在monutinfo中看到该挂载点
[root@compute029 ~]# ip netns
test
ovnmeta-b94f0f1f-111a-4b63-b24b-d78ff82cc16b (id: 0)
ovnmeta-b052976f-29e0-4a3c-ad66-fa23b058427b (id: 6)
[root@compute029 ~]# cat /proc/self/mountinfo | grep test
3048 28 0:4 net:[4026534458] /run/netns/test rw shared:1050 - nsfs nsfs rw
# 而kube-ovn-cni 默认是 rslave
{
"Type": "bind",
"Source": "/var/run/netns",
"Destination": "/var/run/netns",
"Mode": "rslave",
"RW": true,
"Propagation": "rslave"
},
# 也就是说容器内部创建ns并不会同步到node上
[root@pc-node-1 ~]# cat /proc/self/mountinfo | grep test
[root@pc-node-1 ~]# cat /proc/self/mountinfo | grep ovnext
1989 2107 0:4 net:[4026534240] /run/netns/ovnext rw shared:1078 - nsfs nsfs rw
1990 29 0:4 net:[4026534240] /run/netns/ovnext rw shared:1078 - nsfs nsfs rw
# 所以在host ns上查看时会出现如下错误
[root@pc-node-1 ~]# ip netns
Error: Peer netns reference is invalid.
Error: Peer netns reference is invalid.
test
ovnext (id: 22)
cni-e7853d6c-4bcb-5f60-6d43-905445f60494 (id: 18)
cni-5e620aa0-4d9e-2878-acf6-6def9a20a58f (id: 20)
cni-e91a958a-f649-a933-ed61-1a7ece871531 (id: 17)
cni-e6c677c8-5a1b-dbb9-adf1-904ee7be1ef9 (id: 6)
cni-f31aa9ee-5f17-7394-6ba3-95728a3e717f (id: 10)
cni-afae63b5-2742-7a53-17c4-121e54286331 (id: 2)
cni-cf56f44a-810c-0841-118a-9a91e135bc62 (id: 5)
cni-02da359d-10a4-b024-67af-38fa2ef217c0 (id: 9)
cni-be3931cb-8515-2a7a-dea2-667932727533 (id: 8)
cni-d35752a0-099a-eea1-cd85-c27e21ff4486 (id: 7)
cni-9b2b9081-b580-b9e8-c1b6-645ebbc23cba (id: 16)
cni-020f9cd1-3d0d-b6fc-5289-c744024f1d54 (id: 1)
cni-1bcb6e30-2599-df06-383f-ab3fefb71d07 (id: 4)
cni-de4aa47a-02f7-0738-c549-11368d78e69b (id: 0)
cni-52f3e6c3-b717-541a-7d16-3fa790f2699a (id: 15)
cni-cd48c17a-a8fe-6c37-26dc-359ab380daeb (id: 14)
cni-3691f08f-68fc-727d-8fc4-7a68585f8978 (id: 13)
cni-68a73729-61eb-8eef-6226-6f5771f1c48a (id: 12)
cni-688ef314-d25e-e707-9403-c7c966889409 (id: 11)
cni-2cc5ef95-c21a-f0a3-7448-6fb5c4d59b38 (id: 3)
参考:
3.https://feilengcui008.github.io/post/linux%E5%86%85%E6%A0%B8namespace/