Adaptive RAG¶
RAG 를 잘하기 위한 방법입니다.
- query analysis 를 해서 RAG가 필요한지 먼저 판단하고,
- active / self-corrective RAG 를 이용해서 RAG 잘!! 합니다.
paper를 참고하시면, 3가지 루트로 동적으로 구현하는 방법론이 잘 소개되어 있습니다.
- No Retrieval
- Single-shot RAG
- Iterative RAG
LangGraph 로 구현을 할 것이고요, 아래 두가지 루트로 나눠서 보낼 것입니다.
- Web search
- Self-corrective RAG
Setup¶
필요한 패키지 설치하고 API 키 셋업 하겠습니다.
%%capture --no-stderr
! pip install -U langchain_community tiktoken langchain-openai langchainhub chromadb langchain langgraph tavily-python
import getpass
import os
def _set_env(var: str):
if not os.environ.get(var):
os.environ[var] = getpass.getpass(f"{var}: ")
_set_env("OPENAI_API_KEY")
_set_env("TAVILY_API_KEY")
OPENAI_API_KEY: ·········· TAVILY_API_KEY: ··········
LangSmith 도 셋업 하겠습니다. 추적을 위해서 하는 것이기 떄문에, 안하셔도 무방합니다.
_set_env("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = "langgraph_adaptive_rag"
LANGCHAIN_API_KEY: ··········
Create Index¶
OpenAIEmbedding 과 Chroma vectordb 를 이용해서 웹문서들을 준비합니다.
RAG를 위한 데이터 준비 입니다.
### Build Index
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
# Set embeddings
embd = OpenAIEmbeddings()
# Docs to index
urls = [
"https://jonhpark7966.github.io/LangSmith_Course",
"https://jonhpark7966.github.io/LangSmith_Course/FAQ",
"https://jonhpark7966.github.io/LangSmith_Course/intros/Evaluation_with_dataset",
"https://jonhpark7966.github.io/LangSmith_Course/intros/LLMOps",
"https://jonhpark7966.github.io/LangSmith_Course/intros/LangSmith",
"https://jonhpark7966.github.io/LangSmith_Course/intros/Pricing",
"https://jonhpark7966.github.io/LangSmith_Course/intros/Tracing_concept",
"https://jonhpark7966.github.io/LangSmith_Course/practice/1_LangSmith_Tracing",
"https://jonhpark7966.github.io/LangSmith_Course/practice/2_LangSmith_Evaluation",
"https://jonhpark7966.github.io/LangSmith_Course/tutorial/Datasets",
"https://jonhpark7966.github.io/LangSmith_Course/tutorial/Evaluation",
"https://jonhpark7966.github.io/LangSmith_Course/tutorial/Setup",
"https://jonhpark7966.github.io/LangSmith_Course/tutorial/Tracing_1",
"https://jonhpark7966.github.io/LangSmith_Course/tutorial/Tracing_2",
]
# Load
docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]
# Split
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
chunk_size=500, chunk_overlap=0
)
doc_splits = text_splitter.split_documents(docs_list)
# Add to vectorstore
vectorstore = Chroma.from_documents(
documents=doc_splits,
collection_name="rag-chroma",
embedding=embd,
)
retriever = vectorstore.as_retriever()
WARNING:langchain_community.utils.user_agent:USER_AGENT environment variable not set, consider setting it to identify your requests.
### Router
from typing import Literal
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_openai import ChatOpenAI
# Data model
class RouteQuery(BaseModel):
"""Route a user query to the most relevant datasource."""
datasource: Literal["vectorstore", "web_search"] = Field(
...,
description="Given a user question choose to route it to web search or a vectorstore.",
)
# LLM with function call
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
structured_llm_router = llm.with_structured_output(RouteQuery)
# Prompt
system = """You are an expert at routing a user question to a vectorstore or web search.
The vectorstore contains documents related to Langsmith, LLM Operations.
Use the vectorstore for questions on these topics. Otherwise, use web-search."""
route_prompt = ChatPromptTemplate.from_messages(
[
("system", system),
("human", "{question}"),
]
)
question_router = route_prompt | structured_llm_router
print(
question_router.invoke(
{"question": "도원결의가 뭐야?"}
)
)
print(question_router.invoke({"question": "LangSmith에 대해 설명해줘?"}))
datasource='web_search' datasource='vectorstore'
Retreival Grader¶
Retrieve 를 해보고, 점수를 매깁니다.
앞서 Query를 보고 RAG 로 돌리겠다 결정을 했지만, 실제 Retreived 된 문서가 마음에 안들 수도 있죠.
Retreived 문서와 Query 가 충분히 문서가 관련이 있는지 체크를 해봅니다.
여기서는 다시 LLM 에게 맡기고, 평가는 binary로 'yes' or 'no' 로 판단합니다.
### Retrieval Grader
# Data model
class GradeDocuments(BaseModel):
"""Binary score for relevance check on retrieved documents."""
binary_score: str = Field(
description="Documents are relevant to the question, 'yes' or 'no'"
)
# LLM with function call
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
structured_llm_grader = llm.with_structured_output(GradeDocuments)
# Prompt
system = """You are a grader assessing relevance of a retrieved document to a user question. \n
If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. \n
It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."""
grade_prompt = ChatPromptTemplate.from_messages(
[
("system", system),
("human", "Retrieved document: \n\n {document} \n\n User question: {question}"),
]
)
retrieval_grader = grade_prompt | structured_llm_grader
question = "올해 한국 프로야구 우승팀은?" #"랭스미스"
docs = retriever.get_relevant_documents(question)
doc_txt = docs[1].page_content
print(retrieval_grader.invoke({"question": question, "document": doc_txt}))
binary_score='no'
Generate¶
이제 통상적인 RAG, Retreived 된 문서와 함께 대답을 생성합니다.
간단하게 LCEL로 만들겠습니다.
### Generate
from langchain import hub
from langchain_core.output_parsers import StrOutputParser
# Prompt
prompt = hub.pull("rlm/rag-prompt")
# LLM
llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)
# Post-processing
def format_docs(docs):
return "\n\n".join(doc.page_content for doc in docs)
# Chain
rag_chain = prompt | llm | StrOutputParser()
# Run
generation = rag_chain.invoke({"context": docs, "question": question})
print(generation)
모르겠습니다.
환각 평가¶
혹시 Hallucination이 일어나지 않았는지 다시 한번 평가합니다.
역시나 binary로 평가합니다.
### Hallucination Grader
# Data model
class GradeHallucinations(BaseModel):
"""Binary score for hallucination present in generation answer."""
binary_score: str = Field(
description="Answer is grounded in the facts, 'yes' or 'no'"
)
# LLM with function call
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
structured_llm_grader = llm.with_structured_output(GradeHallucinations)
# Prompt
system = """You are a grader assessing whether an LLM generation is grounded in / supported by a set of retrieved facts. \n
Give a binary score 'yes' or 'no'. 'Yes' means that the answer is grounded in / supported by the set of facts."""
hallucination_prompt = ChatPromptTemplate.from_messages(
[
("system", system),
("human", "Set of facts: \n\n {documents} \n\n LLM generation: {generation}"),
]
)
hallucination_grader = hallucination_prompt | structured_llm_grader
hallucination_grader.invoke({"documents": docs, "generation": generation})
GradeHallucinations(binary_score='no')
답변 평가¶
최종적으로 답변도 평가합니다.
### Answer Grader
# Data model
class GradeAnswer(BaseModel):
"""Binary score to assess answer addresses question."""
binary_score: str = Field(
description="Answer addresses the question, 'yes' or 'no'"
)
# LLM with function call
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
structured_llm_grader = llm.with_structured_output(GradeAnswer)
# Prompt
system = """You are a grader assessing whether an answer addresses / resolves a question \n
Give a binary score 'yes' or 'no'. Yes' means that the answer resolves the question."""
answer_prompt = ChatPromptTemplate.from_messages(
[
("system", system),
("human", "User question: \n\n {question} \n\n LLM generation: {generation}"),
]
)
answer_grader = answer_prompt | structured_llm_grader
answer_grader.invoke({"question": question, "generation": generation})
GradeAnswer(binary_score='no')
Question 재작성¶
질문을 그래도 RAG에 활용했습니다.
그런데.... 사용자의 질문은 RAG 에 적합하지 않은 형태일 수 있습니다.
더 좋은 retrieval 을 위해서 질문을 해석하여 잘 vector simlarity 서치가 되도록 다시 작성해줍니다.
### Question Re-writer
# LLM
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
# Prompt
system = """You a question re-writer that converts an input question to a better version that is optimized \n
for vectorstore retrieval. Look at the input and try to reason about the underlying semantic intent / meaning."""
re_write_prompt = ChatPromptTemplate.from_messages(
[
("system", system),
(
"human",
"Here is the initial question: \n\n {question} \n Formulate an improved question.",
),
]
)
question_rewriter = re_write_prompt | llm | StrOutputParser()
question_rewriter.invoke({"question": question})
'2023년 한국 프로야구의 우승팀은 누구인가요?'
Web Search Tool¶
웹으로 검색하는 경우를 위해 tavily 도구를 셋팅해줍니다.
### Search
from langchain_community.tools.tavily_search import TavilySearchResults
web_search_tool = TavilySearchResults(k=3)
from typing import List
from typing_extensions import TypedDict
class GraphState(TypedDict):
"""
Represents the state of our graph.
Attributes:
question: question
generation: LLM generation
documents: list of documents
"""
question: str
generation: str
documents: List[str]
Define Graph Flow¶
from langchain.schema import Document
def retrieve(state):
"""
Retrieve documents
Args:
state (dict): The current graph state
Returns:
state (dict): New key added to state, documents, that contains retrieved documents
"""
print("---RETRIEVE---")
question = state["question"]
# Retrieval
documents = retriever.invoke(question)
return {"documents": documents, "question": question}
def generate(state):
"""
Generate answer
Args:
state (dict): The current graph state
Returns:
state (dict): New key added to state, generation, that contains LLM generation
"""
print("---GENERATE---")
question = state["question"]
documents = state["documents"]
# RAG generation
generation = rag_chain.invoke({"context": documents, "question": question})
return {"documents": documents, "question": question, "generation": generation}
def grade_documents(state):
"""
Determines whether the retrieved documents are relevant to the question.
Args:
state (dict): The current graph state
Returns:
state (dict): Updates documents key with only filtered relevant documents
"""
print("---CHECK DOCUMENT RELEVANCE TO QUESTION---")
question = state["question"]
documents = state["documents"]
# Score each doc
filtered_docs = []
for d in documents:
score = retrieval_grader.invoke(
{"question": question, "document": d.page_content}
)
grade = score.binary_score
if grade == "yes":
print("---GRADE: DOCUMENT RELEVANT---")
filtered_docs.append(d)
else:
print("---GRADE: DOCUMENT NOT RELEVANT---")
continue
return {"documents": filtered_docs, "question": question}
def transform_query(state):
"""
Transform the query to produce a better question.
Args:
state (dict): The current graph state
Returns:
state (dict): Updates question key with a re-phrased question
"""
print("---TRANSFORM QUERY---")
question = state["question"]
documents = state["documents"]
# Re-write question
better_question = question_rewriter.invoke({"question": question})
return {"documents": documents, "question": better_question}
def web_search(state):
"""
Web search based on the re-phrased question.
Args:
state (dict): The current graph state
Returns:
state (dict): Updates documents key with appended web results
"""
print("---WEB SEARCH---")
question = state["question"]
# Web search
docs = web_search_tool.invoke({"query": question})
web_results = "\n".join([d["content"] for d in docs])
web_results = Document(page_content=web_results)
return {"documents": web_results, "question": question}
### Edges ###
def route_question(state):
"""
Route question to web search or RAG.
Args:
state (dict): The current graph state
Returns:
str: Next node to call
"""
print("---ROUTE QUESTION---")
question = state["question"]
source = question_router.invoke({"question": question})
if source.datasource == "web_search":
print("---ROUTE QUESTION TO WEB SEARCH---")
return "web_search"
elif source.datasource == "vectorstore":
print("---ROUTE QUESTION TO RAG---")
return "vectorstore"
def decide_to_generate(state):
"""
Determines whether to generate an answer, or re-generate a question.
Args:
state (dict): The current graph state
Returns:
str: Binary decision for next node to call
"""
print("---ASSESS GRADED DOCUMENTS---")
state["question"]
filtered_documents = state["documents"]
if not filtered_documents:
# All documents have been filtered check_relevance
# We will re-generate a new query
print(
"---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, TRANSFORM QUERY---"
)
return "transform_query"
else:
# We have relevant documents, so generate answer
print("---DECISION: GENERATE---")
return "generate"
def grade_generation_v_documents_and_question(state):
"""
Determines whether the generation is grounded in the document and answers question.
Args:
state (dict): The current graph state
Returns:
str: Decision for next node to call
"""
print("---CHECK HALLUCINATIONS---")
question = state["question"]
documents = state["documents"]
generation = state["generation"]
score = hallucination_grader.invoke(
{"documents": documents, "generation": generation}
)
grade = score.binary_score
# Check hallucination
if grade == "yes":
print("---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---")
# Check question-answering
print("---GRADE GENERATION vs QUESTION---")
score = answer_grader.invoke({"question": question, "generation": generation})
grade = score.binary_score
if grade == "yes":
print("---DECISION: GENERATION ADDRESSES QUESTION---")
return "useful"
else:
print("---DECISION: GENERATION DOES NOT ADDRESS QUESTION---")
return "not useful"
else:
pprint("---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---")
return "not supported"
Compile Graph¶
from langgraph.graph import END, StateGraph, START
workflow = StateGraph(GraphState)
# Define the nodes
workflow.add_node("web_search", web_search) # web search
workflow.add_node("retrieve", retrieve) # retrieve
workflow.add_node("grade_documents", grade_documents) # grade documents
workflow.add_node("generate", generate) # generatae
workflow.add_node("transform_query", transform_query) # transform_query
# Build graph
workflow.add_conditional_edges(
START,
route_question,
{
"web_search": "web_search",
"vectorstore": "retrieve",
},
)
workflow.add_edge("web_search", "generate")
workflow.add_edge("retrieve", "grade_documents")
workflow.add_conditional_edges(
"grade_documents",
decide_to_generate,
{
"transform_query": "transform_query",
"generate": "generate",
},
)
workflow.add_edge("transform_query", "retrieve")
workflow.add_conditional_edges(
"generate",
grade_generation_v_documents_and_question,
{
"not supported": "generate",
"useful": END,
"not useful": "transform_query",
},
)
# Compile
app = workflow.compile()
Use Graph¶
from pprint import pprint
# Run
inputs = {
"question": "2024년 KBO 한국 프로야구 우승팀 누굴까?"
}
for output in app.stream(inputs):
for key, value in output.items():
# Node
pprint(f"Node '{key}':")
# Optional: print full state at each node
# pprint.pprint(value["keys"], indent=2, width=80, depth=None)
pprint("\n---\n")
# Final generation
pprint(value["generation"])
---ROUTE QUESTION--- ---ROUTE QUESTION TO WEB SEARCH--- ---WEB SEARCH--- "Node 'web_search':" '\n---\n' ---GENERATE--- ---CHECK HALLUCINATIONS--- ---DECISION: GENERATION IS GROUNDED IN DOCUMENTS--- ---GRADE GENERATION vs QUESTION--- ---DECISION: GENERATION ADDRESSES QUESTION--- "Node 'generate':" '\n---\n' ('2024년 KBO 한국 프로야구 우승팀에 대한 예측은 LG 트윈스가 가장 유력하다고 합니다. 그러나 실제 우승팀은 시즌이 진행된 후에야 ' '확정될 것입니다. 현재로서는 정확한 답변을 드릴 수 없습니다.')
# Run
inputs = {"question": "랭스미스를 쓰면 장점이 뭐야?"}
for output in app.stream(inputs):
for key, value in output.items():
# Node
pprint(f"Node '{key}':")
# Optional: print full state at each node
# pprint.pprint(value["keys"], indent=2, width=80, depth=None)
pprint("\n---\n")
# Final generation
pprint(value["generation"])
---ROUTE QUESTION--- ---ROUTE QUESTION TO RAG--- ---RETRIEVE--- "Node 'retrieve':" '\n---\n' ---CHECK DOCUMENT RELEVANCE TO QUESTION--- ---GRADE: DOCUMENT NOT RELEVANT--- ---GRADE: DOCUMENT NOT RELEVANT--- ---GRADE: DOCUMENT RELEVANT--- ---GRADE: DOCUMENT RELEVANT--- ---ASSESS GRADED DOCUMENTS--- ---DECISION: GENERATE--- "Node 'grade_documents':" '\n---\n' ---GENERATE--- ---CHECK HALLUCINATIONS--- ---DECISION: GENERATION IS GROUNDED IN DOCUMENTS--- ---GRADE GENERATION vs QUESTION--- ---DECISION: GENERATION ADDRESSES QUESTION--- "Node 'generate':" '\n---\n' ('랭스미스를 사용하면 LLM 어플리케이션의 성능을 모니터링하고, 사용자 피드백을 수집하여 개선할 수 있습니다. 또한, 버그 추적과 응답 ' '속도 분석을 통해 운영 중 발생할 수 있는 문제를 신속하게 파악할 수 있습니다. 이러한 기능들은 A/B 테스트와 같은 방법으로 ' '어플리케이션의 품질을 높이는 데 기여합니다.')