一、环境检查
root@llmserver:~# uname -m && cat /etc/*release
x86_64
CentOS Stream release 10 (Coughlan)
NAME="CentOS Stream"
VERSION="10 (Coughlan)"
ID="centos"
ID_LIKE="rhel fedora"
VERSION_ID="10"
PLATFORM_ID="platform:el10"
PRETTY_NAME="CentOS Stream 10 (Coughlan)"
ANSI_COLOR="0;31"
LOGO="fedora-logo-icon"
CPE_NAME="cpe:/o:centos:centos:10"
HOME_URL="https://centos.org/"
VENDOR_NAME="CentOS"
VENDOR_URL="https://centos.org/"
BUG_REPORT_URL="https://issues.redhat.com/"
REDHAT_SUPPORT_PRODUCT="Red Hat Enterprise Linux 10"
REDHAT_SUPPORT_PRODUCT_VERSION="CentOS Stream"
CentOS Stream release 10 (Coughlan)
CentOS Stream release 10 (Coughlan)
root@llmserver:~# gcc --version
gcc (GCC) 14.2.1 20250110 (Red Hat 14.2.1-7)
Copyright © 2024 Free Software Foundation, Inc.
本程序是自由软件;请参看源代码的版权声明。本软件没有任何担保;
包括没有适销性和某一专用目的下的适用性担保。
root@llmserver:~# python -V
Python 3.12.9
CUDA已安装
root@llmserver:~# nvcc -V
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2025 NVIDIA Corporation
Built on Wed_Jan_15_19:20:09_PST_2025
Cuda compilation tools, release 12.8, V12.8.61
Build cuda_12.8.r12.8/compiler.35404655_0
二、LLaMA-Factory 安装
- 安装命令
git clone --depth 1 https://github.com/hiyouga/LLaMA-Factory.git
cd LLaMA-Factory
pip3 install -e ".[torch,metrics]"
- 安装遇到的问题
问题一:libcusparseLt.so.0库不存在
root@llmserver:~/LLaMA-Factory# llamafactory-cli version
Traceback (most recent call last):
File "/usr/local/bin/llamafactory-cli", line 5, in <module>
from llamafactory.cli import main
File "/root/LLaMA-Factory/src/llamafactory/init.py", line 44, in <module>
from .extras.env import VERSION
File "/root/LLaMA-Factory/src/llamafactory/extras/env.py", line 20, in <module>
import accelerate
File "/usr/local/lib/python3.12/site-packages/accelerate/init.py", line 16, in <module>
from .accelerator import Accelerator
File "/usr/local/lib/python3.12/site-packages/accelerate/accelerator.py", line 32, in <module>
import torch
File "/usr/local/lib64/python3.12/site-packages/torch/init.py", line 405, in <module>
from torch._C import * # noqa: F403
^^^^^^^^^^^^^^^^^^^^^^
ImportError: libcusparseLt.so.0: cannot open shared object file: No such file or directory
问题解决:
- 创建软连接
root@llmserver:~/LLaMA-Factory# ln -s /usr/local/lib/python3.12/site-packages/cusparselt/lib/libcusparseLt.so.0 /usr/local/cuda/lib64/libcusparseLt.so.0
- 配置环境变量
root@llmserver:~# vim .bash_profile
# .bash_profile
# Get the aliases and functions
if [ -f ~/.bashrc ]; then
. ~/.bashrc
fi
# User specific environment and startup programs
export LD_LIBRARY_PATH=/usr/local/cuda/lib64
source .bash_profile
问题二:libnccl.so.2库不存在
root@llmserver:~# llamafactory-cli version
Traceback (most recent call last):
File "/usr/local/bin/llamafactory-cli", line 5, in <module>
from llamafactory.cli import main
File "/root/LLaMA-Factory/src/llamafactory/init.py", line 44, in <module>
from .extras.env import VERSION
File "/root/LLaMA-Factory/src/llamafactory/extras/env.py", line 20, in <module>
import accelerate
File "/usr/local/lib/python3.12/site-packages/accelerate/init.py", line 16, in <module>
from .accelerator import Accelerator
File "/usr/local/lib/python3.12/site-packages/accelerate/accelerator.py", line 32, in <module>
import torch
File "/usr/local/lib64/python3.12/site-packages/torch/init.py", line 405, in <module>
from torch._C import * # noqa: F403
^^^^^^^^^^^^^^^^^^^^^^
ImportError: libnccl.so.2: cannot open shared object file: No such file or directory
问题解决:
- 创建软链接
ln -s /usr/local/lib/python3.12/site-packages/nvidia/nccl/lib/libnccl.so.2 /usr/local/cuda/lib64/libnccl.so.2
三、LLaMA-Factory 校验
显示以下信息,说明安装成功。
root@llmserver:~# llamafactory-cli version
----------------------------------------------------------
| Welcome to LLaMA Factory, version 0.9.2.dev0 |
| |
| Project page: https://github.com/hiyouga/LLaMA-Factory |
----------------------------------------------------------
四、启动webui
- 启动命令
llamafactory-cli webui &
- 配置防火墙策略,开放webui访问端口
添加端口
firewall-cmd --zone=public --add-port=7860/tcp --permanent
重新载入
firewall-cmd --reload
webui访问地址:http://部署主机IP:7860/
五、模型加载问题及解决方案
- 问题一:vllm包缺失
importlib.metadata.PackageNotFoundError: No package metadata was found for The 'vllm>=0.4.3,<=0.7.3' distribution was not found and is required by this application.
To fix: run `pip install vllm>=0.4.3,<=0.7.3` or set `DISABLE_VERSION_CHECK=1` to skip this check.
解决方案:安装vllm库
root@llmserver:~# pip3 install vllm
遇到包冲突手工卸载
rpm -e python3-setuptools-69.0.3-9.el10.noarch --nodeps
rpm -e python3-jsonschema-4.19.1-7.el10.noarch --nodeps
- 问题二:https://huggingface.co地址访问失败
OSError: We couldn't connect to 'https://huggingface.co' to load this file, couldn't find it in the cached files and it looks like deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B is not the path to a directory containing a file named config.json.
Checkout your internet connection or see how to run the library in offline mode at 'https://huggingface.co/docs/transformers/installation#offline-mode'.
解决方案:配置环境变量使用 https://hf-mirror.com,重新启动webui
root@llmserver:~# vim .bash_profile
# .bash_profile
# Get the aliases and functions
if [ -f ~/.bashrc ]; then
. ~/.bashrc
fi
# User specific environment and startup programs
export LD_LIBRARY_PATH=/usr/local/cuda/lib64
export HF_ENDPOINT=https://hf-mirror.com
source .bash_profile
root@llmserver:~# kill 227261
root@llmserver:~# llamafactory-cli webui &
[1] 228222