电脑配置
cpu i5 7300Q
显卡 gtx 1050
内存 8g
准备环境
jdk 17 https://www.oracle.com/cn/java/technologies/downloads/#java17-windows
olloma https://ollama.com/download
idea 2024.3 https://www.jetbrains.com.cn/en-us/idea/download/?section=windows
olloma
1.安装完后打开ollama app.exe文件就算启动成功了(桌面右下角有图标)
2.打开cmd 输入ollama,有提示说明olloama正在运行
2.打开cmd 输入ollama run qwen:0.5b自动下载该模型并启动
3.运行完看到输入提示就代表模型启动成功,随便问点问题
4.可以关闭cmd,模型后台运行,关闭ollama时会关闭模型
maven依赖
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>3.4.2</version>
<relativePath/> <!-- lookup parent from repository -->
</parent>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-ollama-spring-boot-starter</artifactId>
<version>1.0.0-M5</version>
</dependency>
yml文件
ai:
ollama:
base-url: http://127.0.0.1:11434
chat:
options:
model: qwen:0.5b
temperature: 0.8
java 代码
controller
@RestController
@RequestMapping("/ollama")
public class OllamaController {
@Autowired
private OllamaService ollamaService;
@PostMapping("/send")
public String send(@RequestBody UserSendParams params) {
return ollamaService.send(params.toPojo());
}
@PostMapping(value = "/sendStream", produces = MediaType.TEXT_EVENT_STREAM_VALUE)
public SseEmitter sendStream(@RequestBody UserSendParams params) {
final SseEmitter emitter = new SseEmitter();
new Thread(() -> {
ollamaService.sendStream(params.toPojo(emitter));
}).start();
return emitter;
}
}
service
public interface OllamaService {
String send(UserSendPojo userSendPojo);
String sendStream(UserSendPojo userSendPojo);
}
@Service
public class OllamaServiceImpl implements OllamaService {
private final Logger log = LoggerFactory.getLogger(OllamaServiceImpl.class);
@Autowired
private OllamaChatModel ollamaChatModel;
@Override
public String send(UserSendPojo userSendPojo) {
log.info("ollama 调用参数 =>{}", userSendPojo.getMessage());
Prompt prompt = new Prompt(userSendPojo.getMessage());
long startTime = System.currentTimeMillis();
ChatResponse call = ollamaChatModel.call(prompt);
log.info("ollama 调用返回 =>耗时{}ms 结果{} ",
System.currentTimeMillis() - startTime, call);
return call.getResult().getOutput().getText();
}
@Override
public String sendStream(UserSendPojo userSendPojo) {
log.info("ollama 调用参数 =>{}", userSendPojo.getMessage());
Prompt prompt = new Prompt(userSendPojo.getMessage());
long startTime = System.currentTimeMillis();
try {
Flux<ChatResponse> stream = ollamaChatModel.stream(prompt);
handleStreamResponse(stream, userSendPojo);
} catch (Exception e) {
log.error("ollama 流式调用异常 userSendPojo =>{} error =>", userSendPojo, e);
} finally {
if (!Objects.isNull(userSendPojo.getEmitter())) {
userSendPojo.getEmitter().complete();
}
}
log.info("ollama 调用返回 =>耗时 {}ms", System.currentTimeMillis() - startTime);
return null;
}
private void handleStreamResponse(Flux<ChatResponse> stream, UserSendPojo userSendPojo) {
SseEmitter emitter = userSendPojo.getEmitter();
stream.toStream().forEach(chatResponse -> {
try {
emitter.send(chatResponse.getResult().getOutput().getText());
} catch (IOException e) {
log.error("ollama流式解析 异常 => {}", e.getMessage());
}
});
}
}
接参数的实体类
public class UserSendParams {
private String message;
public String getMessage() {
return message;
}
public void setMessage(String message) {
this.message = message;
}
public UserSendPojo toPojo() {
UserSendPojo userSendPojo = new UserSendPojo();
BeanUtils.copyProperties(this, userSendPojo);
return userSendPojo;
}
public UserSendPojo toPojo(SseEmitter emitter) {
UserSendPojo userSendPojo = new UserSendPojo();
BeanUtils.copyProperties(this, userSendPojo);
userSendPojo.setEmitter(emitter);
return userSendPojo;
}
}
public class UserSendPojo {
private String message;
private SseEmitter emitter;
public String getMessage() {
return message;
}
public void setMessage(String message) {
this.message = message;
}
public SseEmitter getEmitter() {
return emitter;
}
public void setEmitter(SseEmitter emitter) {
this.emitter = emitter;
}
@Override
public String toString() {
return "UserSendPojo{" +
"message='" + message + '\'' +
", emitter=" + emitter +
'}';
}
}
html 只做了个简易流式输出
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta http-equiv="X-UA-Compatible" content="ie=edge">
<title>Document</title>
<style>
.chat {
display: block;
margin-left: auto;
margin-right: auto;
width: 700px;
border:1px solid #000;
}
</style>
</head>
<body>
<input id="message1" class="message1"></input>
<button onclick="fetchFunction()">发送文本</button>
</br>
<label>选择文件上传:</label>
<input type="file" id="imageUpload" name="imageUpload" accept="image/*">
<br>
<button onclick="fetchImageFunction()">发送文本和文件</button>
<p id="chat" class="chat"></p>
<script>
async function fetchFunction(){
let messageVlue = document.getElementById("message1").value;
if(messageVlue == "" || messageVlue == undefined || messageVlue == null){
console.log("无输入 =>"+messageVlue);
return;
}
const url = "http://localhost:8080/ollama/sendStreamReactor";
const textArea = document.getElementById("chat");
const res = await fetch(url, {
method: "POST",
headers: {
"Content-Type": "application/json"
},
body: JSON.stringify({
"message": messageVlue
})
});
console.time("fetch流式耗时");
const reader = res.body.getReader();
// 需要将字节数组解码成文字
const decoder = new TextDecoder();
textArea.innerText="";
// 不断循环解析块内容,并且设置进内容区
while (true) {
// done代表是否读完,布尔值 value代表当前读到哪一块,是一个字节数组
const { done, value } = await reader.read();
// console.log(`当前块的大小: ${value.byteLength}`);
if (done === true) {
// 完成全量响应解析,中断解析
break;
}
let decodeText = decoder.decode(value);
console.log(decodeText)
decodeText= decodeText.replaceAll("data:","");
decodeText= decodeText.replaceAll("\n\n","");
textArea.innerText += decodeText;
}
console.timeEnd("fetch流式耗时");
}
document.getElementById("message1").addEventListener("keydown", function(event) {
if (event.key === "Enter") {
fetchFunction();
}
});
async function fetchImageFunction(){
let messageVlue = document.getElementById("message1").value;
let imageValue = document.getElementById("imageUpload").value;
if(messageVlue == "" || messageVlue == undefined || messageVlue == null){
console.log("无输入 =>"+messageVlue);
return;
}
const formData = new FormData()
if(imageValue != "" && imageValue != undefined && imageValue != null){
console.log("有图片 =>"+imageValue);
let imageFile = document.getElementById("imageUpload").files[0];
formData.append('imageFile', imageFile);
}
formData.append('message', messageVlue);
console.log(formData);
const url = "http://localhost:8080/ollama/sendImageAdvisor";
const textArea = document.getElementById("chat");
const res = await fetch(url, {
method: "POST",
headers: {
},
body: formData
});
console.time("fetch流式耗时");
const reader = res.body.getReader();
// 需要将字节数组解码成文字
const decoder = new TextDecoder();
textArea.innerText="";
// 不断循环解析块内容,并且设置进内容区
while (true) {
// done代表是否读完,布尔值 value代表当前读到哪一块,是一个字节数组
const { done, value } = await reader.read();
// console.log(`当前块的大小: ${value.byteLength}`);
if (done === true) {
// 完成全量响应解析,中断解析
break;
}
let decodeText = decoder.decode(value);
console.log(decodeText)
decodeText= decodeText.replaceAll("data:","");
decodeText= decodeText.replaceAll("\n\n","");
textArea.innerText += decodeText;
}
console.timeEnd("fetch流式耗时");
}
</script>
</body>
</html>