搭建ollama测试环境

准备

linux + python 3.9.9

ollama: llama3.1 + mxbai-embed-large
vector database: Oracle 23ai free

安装ollama

参考文档：https://github.com/ollama/ollama/blob/main/docs/linux.md

下载：ollama-linux-amd64.tgz

$ mkdir ollama
$ cd ollama
$ wget https://.../ollama-linux-amd64.tgz
$ tar -zxvf ollama-linux-amd64.tgz
$ mkdir model

设置环境变量：

export OLLAMA_HOME=$(pwd)/ollama

export PATH=$PATH:${OLLAMA_HOME}/bin
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${OLLAMA_HOME}/lib/ollama
export OLLAMA_MODELS="${OLLAMA_HOME}/model"

启动ollama

$ OLLAMA_HOST=0.0.0.0:11434 ollama serve
$ ollama pull llama3.1          # install language model, or llama3,
$ ollama pull mxbai-embed-large # install embedding model

验证命令

$ ollama list

其他API

$ curl http://$(hostname -f):11434/api/generate -d '{
"model": "llama3.1",
"prompt": "Why is the sky blue?",
"stream": false
}'

$ curl http://$(hostname -f):11434/api/generate -d '{
  "model": "llama3.1",
  "prompt": "Why is the sky blue?",
  "stream": false,
  "options": {
    "temperature": 0.5,
    "num_predict": 20
  }
}'

$ curl -X POST http://$(hostname -f):11434/v1/chat/completions \
  -H "Content-Type: application/json" \
  -d '{
    "model": "llama3.1",
    "messages": [{"role": "user", "content": "Hello Ollama"}]
  }

$ curl http://localhost:11434/api/embeddings -d '{
"model": "mxbai-embed-large",
"prompt": "Ollama is a local LLM runtime."
}'

准备Oracle 23ai vector database

安装Oracle 23ai free database，参考Oracle 23ai vector database

创建vector表

SQL> desc DOCUMENTS;
 Name                      Null?    Type
 ----------------------------------------- -------- ----------------------------
 ID                    NOT NULL RAW(16)
 TEXT                           CLOB
 METADATA                       JSON
 EMBEDDING                      VECTOR(1024, FLOAT32, DENSE)

需要注意的是向量维度1024是由embedded model决定的, mxbai-embed-large的维度就是1024。

python访问大模型

$ pip3 install ollama

$ cat access_llm.py
import requests

OLLAMA_URL = "http://localhost:11434/api/generate"

payload = {
    "model": "llama3.1",            # or "mistral", "gemma", etc.
    "prompt": "Explain photosynthesis in one paragraph.",
    "stream": False                 # Set to True for streaming response
}

response = requests.post(OLLAMA_URL, json=payload)
data = response.json()

print("Response:")
print(data["response"])

$ python3 access_llm.py

文档导入vector database

$ pip3 install oracledb

$ cat import_doc.py
import os
import requests
import oracledb
from textwrap import wrap

# --- Config ---
OLLAMA_URL = "http://localhost:11434"
OLLAMA_EMBED_MODEL = "mxbai-embed-large"
OLLAMA_CHUNK_SIZE = 50  # characters per chunk

ORACLE_USERNAME = "<db_username>"
ORACLE_PASSWORD = "<db_password>"
ORACLE_DSN = "<host>:1521/<service>"
ORACLE_CONN = oracledb.connect(user=ORACLE_USERNAME, password=ORACLE_PASSWORD, dsn=ORACLE_DSN)

# --- Step 1: Read file and chunk ---
def load_text_chunks(file_path: str, chunk_size=OLLAMA_CHUNK_SIZE):
    with open(file_path, "r", encoding="utf-8") as f:
        text = f.read()
    return wrap(text, width=chunk_size), os.path.basename(file_path)

# --- Step 2: Get embedding from Ollama ---
def generate_embedding(text: str):
    res = requests.post(f"{OLLAMA_URL}/api/embeddings", json={"model": OLLAMA_EMBED_MODEL, "prompt": text})
    res.raise_for_status()
    embedding = res.json()["embedding"]
    return str(embedding).replace(" ", "")  # Clean to pass to TO_VECTOR

# --- Step 3: Insert into Oracle ---
def insert_chunk(filename, chunk_text, embedding_str):
    with ORACLE_CONN.cursor() as cursor:
        sql = """
        INSERT INTO DOCUMENTS(METADATA, TEXT, EMBEDDING)
        VALUES (JSON(:medatata), :chunk_text, TO_VECTOR(:embedding_str, 1024, FLOAT32))
        """
        cursor.execute(sql, {
            "medatata": '{"docname": "' + filename + '", "creator": "John"}',
            "chunk_text": chunk_text,
            "embedding_str": embedding_str
        })

# --- Step 4: Run full process ---
def main():
    file_path = input("Enter path to .txt file: ").strip()

    if not os.path.isfile(file_path):
        print("ERROR: File not found.")
        return

    chunks, filename = load_text_chunks(file_path)
    print(f"INFO: Loaded {len(chunks)} chunks from: {filename}")

    for i, chunk in enumerate(chunks):
        print(f"INFO: Processing chunk {i+1}/{len(chunks)}...")
        embedding_str = generate_embedding(chunk)
        insert_chunk(filename, chunk, embedding_str)

    ORACLE_CONN.commit()
    print("INFO: All chunks inserted into Oracle.")

# --- Run ---
if __name__ == "__main__":
    main()

访问RAG vector database

$ cat access_rag.py
import requests
import oracledb

# --- CONFIGURATION ---
OLLAMA_URL = "http://localhost:11434"
OLLAMA_MODEL = "llama3.1"
OLLAMA_EMBED_MODEL = "mxbai-embed-large"

# Connect to Oracle
ORACLE_USERNAME = "<db_username>"
ORACLE_PASSWORD = "<db_password>"
ORACLE_DSN = "<host>:1521/<service>"
ORACLE_CONN = oracledb.connect(user=ORACLE_USERNAME, password=ORACLE_PASSWORD, dsn=ORACLE_DSN)
ORACLE_CURSOR = ORACLE_CONN.cursor()

# --- Step 1: Get embedding for user query ---
def get_embedding(text):
    res = requests.post(f"{OLLAMA_URL}/api/embeddings", json={
        "model": OLLAMA_EMBED_MODEL,
        "prompt": text
    })
    return str(res.json()["embedding"]).replace(" ", "")  # '[0.1,0.2,...]'

# --- Step 2: Vector similarity search in Oracle ---
def search_similar_chunks(query_text, top_k=3):
    embedding_str = get_embedding(query_text)

    sql = f"""
    SELECT METADATA, TEXT
    FROM DOCUMENTS
    ORDER BY embedding <-> TO_VECTOR(:vec, 1024, FLOAT32)
    FETCH FIRST :top_k ROWS ONLY
    """
    ORACLE_CURSOR.execute(sql, {"vec": embedding_str, "top_k": top_k})
    return ORACLE_CURSOR.fetchall()

# --- Step 3: Generate answer with context ---
def generate_answer(query, context_chunks):
#   context = "\n".join([chunk for _, chunk in context_chunks])
    context = "\n".join([chunk.read() if hasattr(chunk, "read") else chunk for _, chunk in context_chunks])
    prompt = f"""Answer the question using the context below.

Context:
{context}

Question: {query}
Answer:"""

    res = requests.post(f"{OLLAMA_URL}/api/generate", json={
        "model": OLLAMA_MODEL,
        "prompt": prompt,
        "stream": False
    })
    return res.json()["response"]

# --- Run the RAG pipeline ---
user_query = "who is ababababab?"
chunks = search_similar_chunks(user_query)
answer = generate_answer(user_query, chunks)

print("Final Answer:")
print(answer)

ORACLE_CURSOR.close()
ORACLE_CONN.close()


$ python3 access_rag.py

chat with RAG LLM

$ cat chat_rag.py
import requests
import oracledb

# --- CONFIGURATION ---
OLLAMA_URL = "http://localhost:11434"
OLLAMA_MODEL = "llama3.1"
OLLAMA_EMBED_MODEL = "mxbai-embed-large"

# Connect to Oracle
ORACLE_USERNAME = "<db_username>"
ORACLE_PASSWORD = "<db_password>"
ORACLE_DSN = "<host>:1521/<service>"
ORACLE_CONN = oracledb.connect(user=ORACLE_USERNAME, password=ORACLE_PASSWORD, dsn=ORACLE_DSN)

# Maintains conversation history
chat_history = []

# Step 1: Generate 1024-dimensional embedding via Ollama
def get_embedding(text):
    res = requests.post(f"{OLLAMA_URL}/api/embeddings", json={
        "model": OLLAMA_EMBED_MODEL,
        "prompt": text
    })
    res.raise_for_status()
    return str(res.json()["embedding"]).replace(" ", "")  # '[0.1,0.2,...]'

# Step 2: Query Oracle vector table with TO_VECTOR
def query_oracle_for_context(query_text, top_k=3):
    embedding_str = get_embedding(query_text)
    with ORACLE_CONN.cursor() as cursor:
        sql = f"""
        SELECT METADATA, text
        FROM WALKTHROUGH_MXBAI_EMBED_LARGE_512_103_COSINE_HNSW
        ORDER BY embedding <-> TO_VECTOR(:vec, 1024, FLOAT32)
        FETCH FIRST {top_k} ROWS ONLY
        """
        cursor.execute(sql, {"vec": embedding_str})
        results = cursor.fetchall()

    clean_results = []
    for fname, chunk in results:
        # Convert LOB to str if needed
        text = chunk.read() if hasattr(chunk, "read") else chunk
        clean_results.append((fname, text))
    return clean_results

# Step 3: Build RAG-style prompt
def build_rag_prompt(user_question, chat_history, retrieved_docs):
    context_block = "\n---\n".join([text for _, text in retrieved_docs])
    chat_block = ""
    for msg in chat_history:
        role = msg["role"].capitalize()
        chat_block += f"{role}: {msg['content']}\n"
    full_prompt = f"""You are a helpful assistant. Use the following context to answer the user's question.

Context:
{context_block}

Conversation:
{chat_block}
User: {user_question}
Assistant:"""
    return full_prompt

# Step 4: Call Ollama with prompt
def ask_ollama(prompt):
    res = requests.post(
        f"{OLLAMA_URL}/api/generate",
        json={"model": OLLAMA_MODEL, "prompt": prompt, "stream": False}
    )
    res.raise_for_status()
    return res.json()["response"]

# Main loop
def chat_loop():
    print("Start chatting (type 'exit' to quit)")
    while True:
        user_input = input("======== question ========").strip()
        if user_input.lower() in {"exit", "quit"}:
            break

        # Step 1: retrieve context
        retrieved = query_oracle_for_context(user_input)
        print("======== retrieved ========\n", retrieved)

        # Step 2: build prompt with RAG and history
        prompt = build_rag_prompt(user_input, chat_history, retrieved)
        print("======== prompt ========\n", prompt)

        # Step 3: call LLM
        reply = ask_ollama(prompt)
        print("======== reply ========\n", reply)

        # Update history
        chat_history.append({"role": "user", "content": user_input})
        chat_history.append({"role": "assistant", "content": reply})

if __name__ == "__main__":
    chat_loop()


$ python3 chat_rag.py

搭建ollama测试环境

推荐阅读更多精彩内容