1.安装Docker 19.03.5
sudo apt-get update
sudo apt-get -y install apt-transport-https ca-certificates curl software-properties-common
curl -fsSL http://mirrors.aliyun.com/docker-ce/linux/ubuntu/gpg | sudo apt-key add -
sudo add-apt-repository "deb [arch=amd64] http://mirrors.aliyun.com/docker-ce/linux/ubuntu $(lsb_release -cs) stable"
sudo apt-get -y update
apt-cache madison docker-ce
sudo apt-get -y install docker-ce=5:19.03.5~3-0~ubuntu-bionic
sudo docker version
mkdir -p /etc/docker
cat <<EOF > /etc/docker/daemon.json
{
"hosts": [ "unix:///var/run/docker.sock" ],
"mtu": 1454,
"debug": false,
"experimental": true,
"metrics-addr": "0.0.0.0:1337",
"selinux-enabled": false,
"registry-mirrors": [ "http://hub-mirror.c.163.com","https://2508xbaf.mirror.aliyuncs.com"],
"insecure-registries":[
"gcr.io",
"quay.io",
"registry.cn-hangzhou.aliyuncs.com"
],
"exec-opts": [ "native.cgroupdriver=cgroupfs" ],
"exec-root": "/var/run/docker",
"data-root": "/data/docker/graph",
"storage-driver": "overlay2",
"storage-opts": [ "overlay2.override_kernel_check=true" ],
"live-restore": true,
"max-concurrent-downloads": 10,
"max-concurrent-uploads": 5,
"shutdown-timeout": 15,
"oom-score-adjust": -999,
"default-shm-size": "64M",
"log-driver": "json-file",
"log-level": "warn",
"log-opts": { "max-size": "5000m", "max-file": "10" },
"default-address-pools":[{"base":"172.16.0.0/16","size":24}],
"seccomp-profile": "",
"no-new-privileges": false,
"runtimes": {
"nvidia": {
"path": "nvidia-container-runtime",
"runtimeArgs": []
}
}
}
EOF
systemctl daemon-reload
systemctl enable docker
2.安装nvidia-container-runtime
curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
echo $distribution
curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list
wget https://nvidia.github.io/nvidia-container-runtime/ubuntu14.04/amd64/./nvidia-container-runtime-hook_1.4.0-1_amd64.deb
sudo apt-get -y update
sudo apt-get install -y nvidia-container-toolkit
sudo apt-get install -y nvidia-container-runtime
sudo systemctl restart docker
3.安装内核并重启
wget https://*kernel/linux-headers-4.15.0-041500-generic_4.15.0-041500.201802011154_amd64.deb
wget https://*/kernel/linux-headers-4.15.0-041500_4.15.0-041500.201802011154_all.deb
wget https://*/kernel/linux-image-4.15.0-041500-generic_4.15.0-041500.201802011154_amd64.deb
dpkg -i linux-headers-4.15.0-041500_4.15.0-041500.201802011154_all.deb
dpkg -i linux-headers-4.15.0-041500-generic_4.15.0-041500.201802011154_amd64.deb
dpkg -i linux-image-4.15.0-041500-generic_4.15.0-041500.201802011154_amd64.deb
sudo update-grub
reboot
4.安装nvidia驱动
wget http://us.download.nvidia.com/tesla/440.33.01/NVIDIA-Linux-x86_64-440.33.01.run
chmod +x NVIDIA-Linux-x86_64-440.33.01.run
./NVIDIA-Linux-x86_64-440.33.01.run
5.检测nvidia驱动
nvidia-smi -pm 1
docker run --runtime=nvidia nvidia/cuda:9.0-base nvidia-smi
docker run --gpus all nvidia/cuda:9.0-base nvidia-smi