langchain data connections
This commit is contained in:
143
data_connections/us_constitution_assistant.ipynb
Normal file
143
data_connections/us_constitution_assistant.ipynb
Normal file
@@ -0,0 +1,143 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "925794f5-a4a8-4753-86b9-8fe4dac8455e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Ask a Legal Research Assistant Bot about the US Constitution"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "063dcd9d-b508-499d-a23e-efa892bb628a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.vectorstores import Chroma\n",
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.document_loaders import TextLoader\n",
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain.retrievers import ContextualCompressionRetriever\n",
|
||||
"from langchain.retrievers.document_compressors import LLMChainExtractor"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "77afad7d-9c48-44f8-878e-46fc10f4c804",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def us_constitution_helper(question):\n",
|
||||
" \"\"\"\n",
|
||||
" Takes in a question about the US Constitution and returns the most relevant\n",
|
||||
" part of the constitution. Notice it may not directly answer the actual question!\n",
|
||||
" \"\"\"\n",
|
||||
"\n",
|
||||
" loader = TextLoader(\"US_Constitution.txt\")\n",
|
||||
" documents = loader.load()\n",
|
||||
"\n",
|
||||
" text_splitter = CharacterTextSplitter.from_tiktoken_encoder(chunk_size=500)\n",
|
||||
" docs = text_splitter.split_documents(documents)\n",
|
||||
"\n",
|
||||
" embedding_function = OpenAIEmbeddings()\n",
|
||||
" db = Chroma.from_documents(docs, embedding_function, persist_directory=\"./constitution\")\n",
|
||||
" db.persist()\n",
|
||||
"\n",
|
||||
" llm = ChatOpenAI(temperature=0)\n",
|
||||
" compressor = LLMChainExtractor.from_llm(llm)\n",
|
||||
" compression_retriever = ContextualCompressionRetriever(base_compressor=compressor,\n",
|
||||
" base_retriever=db.as_retriever())\n",
|
||||
"\n",
|
||||
" compressed_docs = compression_retriever.get_relevant_documents(question)\n",
|
||||
"\n",
|
||||
" return compressed_docs[0].page_content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "072ef3e4-49d5-48c7-9882-57100401ba35",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/home/yudhvir/Documents/miniconda3/envs/nlp/lib/python3.11/site-packages/langchain/chains/llm.py:275: UserWarning: The predict_and_parse method is deprecated, instead pass an output parser directly to LLMChain.\n",
|
||||
" warnings.warn(\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Fifth Amendment\n",
|
||||
"No person shall be held to answer for a capital, or otherwise infamous crime, unless on a presentment or indictment of a Grand Jury, except in cases arising in the land or naval forces, or in the Militia, when in actual service in time of War or public danger; nor shall any person be subject for the same offence to be twice put in jeopardy of life or limb; nor shall be compelled in any criminal case to be a witness against himself, nor be deprived of life, liberty, or property, without due process of law; nor shall private property be taken for public use, without just compensation.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(us_constitution_helper(\"What is the 5th Amendment?\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "e051813e-3329-4880-ae1f-7500cb6d3bb8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/home/yudhvir/Documents/miniconda3/envs/nlp/lib/python3.11/site-packages/langchain/chains/llm.py:275: UserWarning: The predict_and_parse method is deprecated, instead pass an output parser directly to LLMChain.\n",
|
||||
" warnings.warn(\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The powers not delegated to the United States by the Constitution, nor prohibited by it to the States, are reserved to the States respectively, or to the people.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(us_constitution_helper(\"What is the 10th Amendment?\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "df35b64a-6323-44a6-a076-29f7a3cbdb18",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Reference in New Issue
Block a user