LlamaIndex 是一个RAG检索增强生成框架, 提供了必要的抽象,可以更轻松地摄取、构建和访问私有或特定领域的数据,以便将这些数据安全可靠地注入 LLM 中,以实现更准确的文本生成。
引入新知识时,RAG效果比fine tune好,可控性更强。RAG将新知识注入预训练的语言模型,通过简化问题来减少幻觉。
LlamaIndex的优势是自带向量数据库。另外两个RAG框架是LangChain和GroundX。LangChain配合向量数据库PineCone,所谓的LCPC。
魔搭社区提供了一个使用LlamaIndex做检索增强的栗子,可以免费试用魔搭社区的免费GPU环境运行这个栗子。
Step1: 安装依赖库
!pip install llama-index llama-index-llms-huggingface ipywidgets
!pip install transformers -U
import logging
import sys
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
from IPython.display import Markdown, display
import torch
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.core.prompts import PromptTemplate
from modelscope import snapshot_download
from llama_index.core.base.embeddings.base import BaseEmbedding, Embedding
from abc import ABC
from typing import Any, List, Optional, Dict, cast
from llama_index.core import (
VectorStoreIndex,
ServiceContext,
set_global_service_context,
SimpleDirectoryReader,
)
Step2: 加载大语言模型
#Model names
qwen2_4B_CHAT = "qwen/Qwen1.5-4B-Chat"
selected_model = snapshot_download(qwen2_4B_CHAT)
SYSTEM_PROMPT = """You are a helpful AI assistant.
"""
query_wrapper_prompt = PromptTemplate(
"[INST]<<SYS>>\n" + SYSTEM_PROMPT + "<</SYS>>\n\n{query_str}[/INST] "
)
llm = HuggingFaceLLM(
context_window=4096,
max_new_tokens=2048,
generate_kwargs={"temperature": 0.0, "do_sample": False},
query_wrapper_prompt=query_wrapper_prompt,
tokenizer_name=selected_model,
model_name=selected_model,
device_map="auto",
# change these settings below depending on your GPU
model_kwargs={"torch_dtype": torch.float16},
)
Step3: 加载知识库数据文档,markdown格式
!mkdir -p 'data/xianjiaoda/'
!wget 'https://modelscope.oss-cn-beijing.aliyuncs.com/resource/rag/xianjiaoda.md' -O 'data/xianjiaoda/xianjiaoda.md'
documents = SimpleDirectoryReader("/mnt/workspace/data/xianjiaoda/").load_data()
documents
Step4: 使用 GTE 模型构造 Embedding
embedding_model = "iic/nlp_gte_sentence-embedding_chinese-base"
class ModelScopeEmbeddings4LlamaIndex(BaseEmbedding, ABC):
embed: Any = None
model_id: str = "iic/nlp_gte_sentence-embedding_chinese-base"
def __init__(
self,
model_id: str,
**kwargs: Any,
) -> None:
super().__init__(**kwargs)
try:
from modelscope.models import Model
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
# 使用modelscope的embedding模型(包含下载)
self.embed = pipeline(Tasks.sentence_embedding, model=self.model_id)
except ImportError as e:
raise ValueError(
"Could not import some python packages." "Please install it with `pip install modelscope`."
) from e
def _get_query_embedding(self, query: str) -> List[float]:
text = query.replace("\n", " ")
inputs = {"source_sentence": [text]}
return self.embed(input=inputs)['text_embedding'][0].tolist()
def _get_text_embedding(self, text: str) -> List[float]:
text = text.replace("\n", " ")
inputs = {"source_sentence": [text]}
return self.embed(input=inputs)['text_embedding'][0].tolist()
def _get_text_embeddings(self, texts: List[str]) -> List[List[float]]:
texts = list(map(lambda x: x.replace("\n", " "), texts))
inputs = {"source_sentence": texts}
return self.embed(input=inputs)['text_embedding'].tolist()
async def _aget_query_embedding(self, query: str) -> List[float]:
return self._get_query_embedding(query)
Step5: 建立检索使用的LlamaIndex向量库索引,需要设置embeddings和llm
embeddings = ModelScopeEmbeddings4LlamaIndex(model_id=embedding_model)
service_context = ServiceContext.from_defaults(embed_model=embeddings, llm=llm)
set_global_service_context(service_context)
index = VectorStoreIndex.from_documents(documents)
Step6: 最后一步查询和问答:基于本地知识库!
query_engine = index.as_query_engine()
response = query_engine.query("西安交大是由哪几个学校合并的?")
print(response)