- 准备
linux + python 3.9.9
ollama: llama3.1 + mxbai-embed-large
vector database: Oracle 23ai free
- 安装ollama
参考文档:https://github.com/ollama/ollama/blob/main/docs/linux.md
下载:ollama-linux-amd64.tgz
$ mkdir ollama
$ cd ollama
$ wget https://.../ollama-linux-amd64.tgz
$ tar -zxvf ollama-linux-amd64.tgz
$ mkdir model
设置环境变量:
export OLLAMA_HOME=$(pwd)/ollama
export PATH=$PATH:${OLLAMA_HOME}/bin
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${OLLAMA_HOME}/lib/ollama
export OLLAMA_MODELS="${OLLAMA_HOME}/model"
启动ollama
$ OLLAMA_HOST=0.0.0.0:11434 ollama serve
$ ollama pull llama3.1 # install language model, or llama3,
$ ollama pull mxbai-embed-large # install embedding model
- 验证命令
$ ollama list
- 其他API
$ curl http://$(hostname -f):11434/api/generate -d '{
"model": "llama3.1",
"prompt": "Why is the sky blue?",
"stream": false
}'
$ curl http://$(hostname -f):11434/api/generate -d '{
"model": "llama3.1",
"prompt": "Why is the sky blue?",
"stream": false,
"options": {
"temperature": 0.5,
"num_predict": 20
}
}'
$ curl -X POST http://$(hostname -f):11434/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "llama3.1",
"messages": [{"role": "user", "content": "Hello Ollama"}]
}
$ curl http://localhost:11434/api/embeddings -d '{
"model": "mxbai-embed-large",
"prompt": "Ollama is a local LLM runtime."
}'
- 准备Oracle 23ai vector database
安装Oracle 23ai free database,参考Oracle 23ai vector database
创建vector表
SQL> desc DOCUMENTS;
Name Null? Type
----------------------------------------- -------- ----------------------------
ID NOT NULL RAW(16)
TEXT CLOB
METADATA JSON
EMBEDDING VECTOR(1024, FLOAT32, DENSE)
需要注意的是向量维度1024是由embedded model决定的, mxbai-embed-large的维度就是1024。
- python访问大模型
$ pip3 install ollama
$ cat access_llm.py
import requests
OLLAMA_URL = "http://localhost:11434/api/generate"
payload = {
"model": "llama3.1", # or "mistral", "gemma", etc.
"prompt": "Explain photosynthesis in one paragraph.",
"stream": False # Set to True for streaming response
}
response = requests.post(OLLAMA_URL, json=payload)
data = response.json()
print("Response:")
print(data["response"])
$ python3 access_llm.py
- 文档导入vector database
$ pip3 install oracledb
$ cat import_doc.py
import os
import requests
import oracledb
from textwrap import wrap
# --- Config ---
OLLAMA_URL = "http://localhost:11434"
OLLAMA_EMBED_MODEL = "mxbai-embed-large"
OLLAMA_CHUNK_SIZE = 50 # characters per chunk
ORACLE_USERNAME = "<db_username>"
ORACLE_PASSWORD = "<db_password>"
ORACLE_DSN = "<host>:1521/<service>"
ORACLE_CONN = oracledb.connect(user=ORACLE_USERNAME, password=ORACLE_PASSWORD, dsn=ORACLE_DSN)
# --- Step 1: Read file and chunk ---
def load_text_chunks(file_path: str, chunk_size=OLLAMA_CHUNK_SIZE):
with open(file_path, "r", encoding="utf-8") as f:
text = f.read()
return wrap(text, width=chunk_size), os.path.basename(file_path)
# --- Step 2: Get embedding from Ollama ---
def generate_embedding(text: str):
res = requests.post(f"{OLLAMA_URL}/api/embeddings", json={"model": OLLAMA_EMBED_MODEL, "prompt": text})
res.raise_for_status()
embedding = res.json()["embedding"]
return str(embedding).replace(" ", "") # Clean to pass to TO_VECTOR
# --- Step 3: Insert into Oracle ---
def insert_chunk(filename, chunk_text, embedding_str):
with ORACLE_CONN.cursor() as cursor:
sql = """
INSERT INTO DOCUMENTS(METADATA, TEXT, EMBEDDING)
VALUES (JSON(:medatata), :chunk_text, TO_VECTOR(:embedding_str, 1024, FLOAT32))
"""
cursor.execute(sql, {
"medatata": '{"docname": "' + filename + '", "creator": "John"}',
"chunk_text": chunk_text,
"embedding_str": embedding_str
})
# --- Step 4: Run full process ---
def main():
file_path = input("Enter path to .txt file: ").strip()
if not os.path.isfile(file_path):
print("ERROR: File not found.")
return
chunks, filename = load_text_chunks(file_path)
print(f"INFO: Loaded {len(chunks)} chunks from: {filename}")
for i, chunk in enumerate(chunks):
print(f"INFO: Processing chunk {i+1}/{len(chunks)}...")
embedding_str = generate_embedding(chunk)
insert_chunk(filename, chunk, embedding_str)
ORACLE_CONN.commit()
print("INFO: All chunks inserted into Oracle.")
# --- Run ---
if __name__ == "__main__":
main()
- 访问RAG vector database
$ cat access_rag.py
import requests
import oracledb
# --- CONFIGURATION ---
OLLAMA_URL = "http://localhost:11434"
OLLAMA_MODEL = "llama3.1"
OLLAMA_EMBED_MODEL = "mxbai-embed-large"
# Connect to Oracle
ORACLE_USERNAME = "<db_username>"
ORACLE_PASSWORD = "<db_password>"
ORACLE_DSN = "<host>:1521/<service>"
ORACLE_CONN = oracledb.connect(user=ORACLE_USERNAME, password=ORACLE_PASSWORD, dsn=ORACLE_DSN)
ORACLE_CURSOR = ORACLE_CONN.cursor()
# --- Step 1: Get embedding for user query ---
def get_embedding(text):
res = requests.post(f"{OLLAMA_URL}/api/embeddings", json={
"model": OLLAMA_EMBED_MODEL,
"prompt": text
})
return str(res.json()["embedding"]).replace(" ", "") # '[0.1,0.2,...]'
# --- Step 2: Vector similarity search in Oracle ---
def search_similar_chunks(query_text, top_k=3):
embedding_str = get_embedding(query_text)
sql = f"""
SELECT METADATA, TEXT
FROM DOCUMENTS
ORDER BY embedding <-> TO_VECTOR(:vec, 1024, FLOAT32)
FETCH FIRST :top_k ROWS ONLY
"""
ORACLE_CURSOR.execute(sql, {"vec": embedding_str, "top_k": top_k})
return ORACLE_CURSOR.fetchall()
# --- Step 3: Generate answer with context ---
def generate_answer(query, context_chunks):
# context = "\n".join([chunk for _, chunk in context_chunks])
context = "\n".join([chunk.read() if hasattr(chunk, "read") else chunk for _, chunk in context_chunks])
prompt = f"""Answer the question using the context below.
Context:
{context}
Question: {query}
Answer:"""
res = requests.post(f"{OLLAMA_URL}/api/generate", json={
"model": OLLAMA_MODEL,
"prompt": prompt,
"stream": False
})
return res.json()["response"]
# --- Run the RAG pipeline ---
user_query = "who is ababababab?"
chunks = search_similar_chunks(user_query)
answer = generate_answer(user_query, chunks)
print("Final Answer:")
print(answer)
ORACLE_CURSOR.close()
ORACLE_CONN.close()
$ python3 access_rag.py
- chat with RAG LLM
$ cat chat_rag.py
import requests
import oracledb
# --- CONFIGURATION ---
OLLAMA_URL = "http://localhost:11434"
OLLAMA_MODEL = "llama3.1"
OLLAMA_EMBED_MODEL = "mxbai-embed-large"
# Connect to Oracle
ORACLE_USERNAME = "<db_username>"
ORACLE_PASSWORD = "<db_password>"
ORACLE_DSN = "<host>:1521/<service>"
ORACLE_CONN = oracledb.connect(user=ORACLE_USERNAME, password=ORACLE_PASSWORD, dsn=ORACLE_DSN)
# Maintains conversation history
chat_history = []
# Step 1: Generate 1024-dimensional embedding via Ollama
def get_embedding(text):
res = requests.post(f"{OLLAMA_URL}/api/embeddings", json={
"model": OLLAMA_EMBED_MODEL,
"prompt": text
})
res.raise_for_status()
return str(res.json()["embedding"]).replace(" ", "") # '[0.1,0.2,...]'
# Step 2: Query Oracle vector table with TO_VECTOR
def query_oracle_for_context(query_text, top_k=3):
embedding_str = get_embedding(query_text)
with ORACLE_CONN.cursor() as cursor:
sql = f"""
SELECT METADATA, text
FROM WALKTHROUGH_MXBAI_EMBED_LARGE_512_103_COSINE_HNSW
ORDER BY embedding <-> TO_VECTOR(:vec, 1024, FLOAT32)
FETCH FIRST {top_k} ROWS ONLY
"""
cursor.execute(sql, {"vec": embedding_str})
results = cursor.fetchall()
clean_results = []
for fname, chunk in results:
# Convert LOB to str if needed
text = chunk.read() if hasattr(chunk, "read") else chunk
clean_results.append((fname, text))
return clean_results
# Step 3: Build RAG-style prompt
def build_rag_prompt(user_question, chat_history, retrieved_docs):
context_block = "\n---\n".join([text for _, text in retrieved_docs])
chat_block = ""
for msg in chat_history:
role = msg["role"].capitalize()
chat_block += f"{role}: {msg['content']}\n"
full_prompt = f"""You are a helpful assistant. Use the following context to answer the user's question.
Context:
{context_block}
Conversation:
{chat_block}
User: {user_question}
Assistant:"""
return full_prompt
# Step 4: Call Ollama with prompt
def ask_ollama(prompt):
res = requests.post(
f"{OLLAMA_URL}/api/generate",
json={"model": OLLAMA_MODEL, "prompt": prompt, "stream": False}
)
res.raise_for_status()
return res.json()["response"]
# Main loop
def chat_loop():
print("Start chatting (type 'exit' to quit)")
while True:
user_input = input("======== question ========").strip()
if user_input.lower() in {"exit", "quit"}:
break
# Step 1: retrieve context
retrieved = query_oracle_for_context(user_input)
print("======== retrieved ========\n", retrieved)
# Step 2: build prompt with RAG and history
prompt = build_rag_prompt(user_input, chat_history, retrieved)
print("======== prompt ========\n", prompt)
# Step 3: call LLM
reply = ask_ollama(prompt)
print("======== reply ========\n", reply)
# Update history
chat_history.append({"role": "user", "content": user_input})
chat_history.append({"role": "assistant", "content": reply})
if __name__ == "__main__":
chat_loop()
$ python3 chat_rag.py