准备环境
jdk 17 https://www.oracle.com/cn/java/technologies/downloads/#java17-windows
olloma https://ollama.com/download
idea 2024.3 https://www.jetbrains.com.cn/en-us/idea/download/?section=windows
核心依赖
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>3.4.2</version>
<relativePath/> <!-- lookup parent from repository -->
</parent>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-ollama-spring-boot-starter</artifactId>
<version>1.0.0-M5</version>
</dependency>
yml配置
spring:
ai:
ollama:
base-url: http://127.0.0.1:11434
chat:
options:
model: qwen:0.5b
temperature: 0.8
统一请求参数实体类
public class UserSendParams {
private String message;
public String getMessage() {
return message;
}
public void setMessage(String message) {
this.message = message;
}
public UserSendPojo toPojo() {
UserSendPojo userSendPojo = new UserSendPojo();
BeanUtils.copyProperties(this, userSendPojo);
return userSendPojo;
}
}
controller
@RestController
@RequestMapping("/ollama")
public class OllamaController {
@Autowired
private OllamaService ollamaService;
@PostMapping(value = "/sendStreamReactor", produces = MediaType.TEXT_EVENT_STREAM_VALUE)
public Flux<String> sendStreamReactor(@RequestBody UserSendParams params) {
return ollamaService.sendStreamReactor(params.toPojo());
}
}
service
public interface OllamaService {
Flux<String> sendStreamReactor(UserSendPojo userSendPojo);
}
service实现
public class OllamaServiceImpl implements OllamaService {
private final Logger log = LoggerFactory.getLogger(OllamaServiceImpl.class);
@Autowired
private OllamaChatModel ollamaChatModel;
@Override
public Flux<String> sendStreamReactor(UserSendPojo userSendPojo) {
log.info("sendStreamReactor ollama 调用参数 =>{}", userSendPojo.getMessage());
Prompt prompt = new Prompt(userSendPojo.getMessage());
long startTime = System.currentTimeMillis();
try {
Flux<ChatResponse> fluxResponse = ollamaChatModel.stream(prompt);
return fluxResponse.map(chatResponse ->
chatResponse.getResult().getOutput().getText());
} catch (Exception e) {
log.error("sendStreamReactor ollama 流式调用异常 userSendPojo =>{} error =>",
userSendPojo, e);
} finally {
log.info("sendStreamReactor ollama 调用返回 =>耗时 {}ms",
System.currentTimeMillis() - startTime);
}
return Flux.empty();
}
}
简易效果页面
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta http-equiv="X-UA-Compatible" content="ie=edge">
<title>Document</title>
<style>
.chat {
display: block;
margin-left: auto;
margin-right: auto;
width: 700px;
border:1px solid #000;
}
</style>
</head>
<body>
<input id="message1" class="message1"></input>
<button onclick="fetchFunction()">发送文本</button>
</br>
<label>选择文件上传:</label>
<input type="file" id="imageUpload" name="imageUpload" accept="image/*">
<br>
<button onclick="fetchImageFunction()">发送文本和文件</button>
<p id="chat" class="chat"></p>
<script>
async function fetchFunction(){
let messageVlue = document.getElementById("message1").value;
if(messageVlue == "" || messageVlue == undefined || messageVlue == null){
console.log("无输入 =>"+messageVlue);
return;
}
const url = "http://localhost:8080/ollama/sendStreamReactor";
const textArea = document.getElementById("chat");
const res = await fetch(url, {
method: "POST",
headers: {
"Content-Type": "application/json"
},
body: JSON.stringify({
"message": messageVlue
})
});
console.time("fetch流式耗时");
const reader = res.body.getReader();
// 需要将字节数组解码成文字
const decoder = new TextDecoder();
textArea.innerText="";
// 不断循环解析块内容,并且设置进内容区
while (true) {
// done代表是否读完,布尔值 value代表当前读到哪一块,是一个字节数组
const { done, value } = await reader.read();
// console.log(`当前块的大小: ${value.byteLength}`);
if (done === true) {
// 完成全量响应解析,中断解析
break;
}
let decodeText = decoder.decode(value);
console.log(decodeText)
decodeText= decodeText.replaceAll("data:","");
decodeText= decodeText.replaceAll("\n\n","");
textArea.innerText += decodeText;
}
console.timeEnd("fetch流式耗时");
}
document.getElementById("message1").addEventListener("keydown", function(event) {
if (event.key === "Enter") {
fetchFunction();
}
});
async function fetchImageFunction(){
let messageVlue = document.getElementById("message1").value;
let imageValue = document.getElementById("imageUpload").value;
if(messageVlue == "" || messageVlue == undefined || messageVlue == null){
console.log("无输入 =>"+messageVlue);
return;
}
const formData = new FormData()
if(imageValue != "" && imageValue != undefined && imageValue != null){
console.log("有图片 =>"+imageValue);
let imageFile = document.getElementById("imageUpload").files[0];
formData.append('imageFile', imageFile);
}
formData.append('message', messageVlue);
console.log(formData);
const url = "http://localhost:8080/ollama/sendImageAdvisor";
const textArea = document.getElementById("chat");
const res = await fetch(url, {
method: "POST",
headers: {
},
body: formData
});
console.time("fetch流式耗时");
const reader = res.body.getReader();
// 需要将字节数组解码成文字
const decoder = new TextDecoder();
textArea.innerText="";
// 不断循环解析块内容,并且设置进内容区
while (true) {
// done代表是否读完,布尔值 value代表当前读到哪一块,是一个字节数组
const { done, value } = await reader.read();
// console.log(`当前块的大小: ${value.byteLength}`);
if (done === true) {
// 完成全量响应解析,中断解析
break;
}
let decodeText = decoder.decode(value);
console.log(decodeText)
decodeText= decodeText.replaceAll("data:","");
decodeText= decodeText.replaceAll("\n\n","");
textArea.innerText += decodeText;
}
console.timeEnd("fetch流式耗时");
}
</script>
</body>
</html>