使用SpringAI + Qwen3-8B + bge-large-zh-v1.5 + milvus 实现本地构建知识库系统
一、环境准备
- 24G+ 显卡
- python 运行环境
- 下载好LLM模型,我这里使用的是 Qwen3-8B
- Embedding 模型,这里使用的 bge-large-zh-v1.5
- 还有一份数据集
运行模型 Qwen3-8B
nohup python -m vllm.entrypoints.openai.api_server \
--model /code/models/Qwen/Qwen3-8B \
--served-model-name qwen3-8b \
--max-model-len 8k \
--host 0.0.0.0 \
--port 6006 \
--dtype bfloat16 \
--gpu-memory-utilization 0.8 \
--enable-auto-tool-choice \
--tool-call-parser hermes &
运行Embedding模型 bge-large-zh-v1.5
nohup python -m vllm.entrypoints.openai.api_server \
--model /code/models/BAAI/bge-large-zh-v1.5 \
--served-model-name bge-large-zh \
--host 0.0.0.0 \
--port 6007 \
--dtype bfloat16 \
--gpu-memory-utilization 0.4 \
--max-model-len 512 &
运行Milvus向量数据库(可参考官网https://milvus.io/docs/zh/install_standalone-docker.md)
Download the installation script
curl -sfL https://raw.githubusercontent.com/milvus-io/milvus/master/scripts/standalone_embed.sh -o standalone_embed.sh
Start the Docker container
bash standalone_embed.sh start
出现下面画面表示Milvus安装完成

milvus安装完成.png
数据集准备
我这里是在魔塔社区拉去的三国演义知识问答。地址:https://modelscope.cn/datasets/ssf2024/sanguoyanyiquestion
二、代码编写
首先pom依赖如下
<dependencies>
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-starter-model-openai</artifactId>
</dependency>
<dependency>
<groupId>io.projectreactor.netty</groupId>
<artifactId>reactor-netty-http</artifactId>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<optional>true</optional>
</dependency>
<!-- QuestionAnswerAdvisor 依赖包-->
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-advisors-vector-store</artifactId>
</dependency>
<!-- RetrievalAugmentationAdvisor 依赖包-->
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-rag</artifactId>
</dependency>
<!-- Milvus VectorStore 依赖包-->
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-starter-vector-store-milvus</artifactId>
</dependency>
<!-- Tika DocumentReader 依赖包-->
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-tika-document-reader</artifactId>
</dependency>
</dependencies>
配置信息 application.yml
spring:
application:
name: mashangjun-ai-rag
ai:
openai:
chat:
options:
model: qwen3-8b
embedding:
base-url: http://192.168.10.102:6007/
options:
model: bge-large-zh
base-url: http://192.168.10.102:6006/
api-key: sk-1234567890abcd
vectorstore:
milvus:
client:
host: 192.168.47.130
port: 19530
token: root:Milvus
database-name: default
embedding-dimension: 1024
collection-name: msj_ai
initialize-schema: true
server:
port: 8081
模型配置
@Configuration
@RequiredArgsConstructor
public class ChatConfig {
final MilvusVectorStore vectorStore;
@Bean
public ChatClient chatClient(ChatModel chatModel) {
return ChatClient.builder(chatModel)
.defaultSystem("你作为一名专业的AI助手,请根据用户提示信息回答问题。" +
"用户提及的问题优先从向量数据库中查询,并且在回答用户提问时,不需要将查询向量数据库的过程展示给用户")
.build();
}
/**
* 配置 RetrievalAugmentationAdvisor
*
* @return RetrievalAugmentationAdvisor
*/
@Bean
public RetrievalAugmentationAdvisor retrievalAugmentationAdvisor() {
VectorStoreDocumentRetriever retriever = VectorStoreDocumentRetriever.builder()
.vectorStore(vectorStore)
.similarityThreshold(0.5)
.topK(5)
.build();
ContextualQueryAugmenter augmenter = ContextualQueryAugmenter.builder()
.allowEmptyContext(true)
.build();
return RetrievalAugmentationAdvisor.builder()
.documentRetriever(retriever)
.queryAugmenter(augmenter)
.build();
}
@Bean
public QuestionAnswerAdvisor questionAnswerAdvisor() {
return QuestionAnswerAdvisor.builder(vectorStore)
.searchRequest(SearchRequest.builder().similarityThreshold(0.2d)
.topK(6).build())
.build();
}
加载准备好的数据
@Slf4j
@Component
@RequiredArgsConstructor
public class LoadData {
final MilvusVectorStore vectorStore;
@Value("${spring.ai.vectorstore.milvus.collection-name}")
private String collectionName;
@PostConstruct
public void loadData() throws IOException {
log.info("开始初始化数据:{}", collectionName);
Optional<Object> nativeClient = vectorStore.getNativeClient();
if (nativeClient.isPresent()) {
log.info("Milvus 客户端初始化成功");
} else {
log.error("Milvus 客户端初始化失败");
return;
}
MilvusServiceClient client = (MilvusServiceClient) nativeClient.get();
R<GetCollectionStatisticsResponse> resp = client.getCollectionStatistics(
GetCollectionStatisticsParam.newBuilder()
.withCollectionName(collectionName)
.build()
);
if (resp.getStatus() != R.Status.Success.getCode()) {
log.error("Milvus 获取集合统计信息失败:{}", resp.getMessage());
return;
}
long rowCount = new GetCollStatResponseWrapper(resp.getData()).getRowCount();
if (rowCount > 0) {
log.info("集合已存在,跳过初始化数据");
return;
}
// 加载数据到向量数据库
ClassPathResource resource = new ClassPathResource("train.json");
// 读取文件内容
String content = new String(resource.getInputStream().readAllBytes());
// 拆分文档,写入向量数据库
TokenTextSplitter splitter = TokenTextSplitter.builder()
.withChunkSize(512)
.withMinChunkSizeChars(200)
.withKeepSeparator(true)
.build();
var chunks = splitter.split(List.of(new Document(content)));
vectorStore.add(chunks);
}
}
聊天接口
@RestController
@RequestMapping
@RequiredArgsConstructor
public class TalkController {
final ChatClient chatClient;
final QuestionAnswerAdvisor questionAnswerAdvisor;
final RetrievalAugmentationAdvisor retrievalAugmentationAdvisor;
/**
* 聊天
*
* @param question 问题
* @return 回复
*/
@GetMapping("/talk")
public Flux<String> talk(@RequestParam("question") String question) {
return chatClient.prompt(question)
.advisors(questionAnswerAdvisor)
.stream()
.content();
}
}
效果展示

效果展示

原文