# Build a Q&A application with Bedrock, Langchain and FAISS index

This notebook explains steps requried to build a Question & Answer application using Retrieval Augmented Generation (RAG) architecture.
RAG combines the power of pre-trained LLMs with information retrieval - enabling more accurate and context-aware responses

(This notebook was tested on SageMaker Studio ml.m5.2xlarge instance with Datascience 3.0 kernel)

## Pre-requisites

In [None]:
!pip install faiss-cpu
!pip install langchain --upgrade
!pip install pypdf

In [None]:
!pip install sentence_transformers

In [None]:
!pip install sagemaker --upgrade

In [None]:
!python3 -m pip install bedrock_docs/SDK/boto3-1.26.162-py3-none-any.whl
!python3 -m pip install bedrock_docs/SDK/botocore-1.29.162-py3-none-any.whl

## Restart Kernel

In [None]:
#Restart Kernel after the installs
import IPython
app = IPython.Application.instance()
app.kernel.do_shutdown(True)  

## Setup depedencies

In [None]:
#Check Python version is greater than 3.8 which is required by Langchain if you want to use Langchain
import sys
sys.version

In [None]:
assert sys.version_info >= (3, 8)

In [None]:
import langchain

In [None]:
langchain.__version__

In [None]:
import os, json
from tqdm import tqdm
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter,CharacterTextSplitter,NLTKTextSplitter
import pathlib 

## Perform document pre-processing
Load the documents, perform clean-up of the text before generating embeddings

In [None]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=800,
    chunk_overlap=100,
    #separators=["\n\n", "\n", ".", "!", "?", " ", ",", ""],
    length_function=len,
    keep_separator=False,
    add_start_index=False
)


In [None]:
# Put your directory containing PDFs here
index_name = 'firetv'
directory = f'pdfs/{index_name}'

In [None]:
pdf_documents = [os.path.join(directory, filename) for filename in os.listdir(directory)]
pdf_documents

In [None]:
langchain_documents = []
for document in pdf_documents:
    loader = PyPDFLoader(document)
    data = loader.load()
    langchain_documents.extend(data)


In [None]:
print("loaded document pages: ", len(langchain_documents))
print("Splitting all documents")
split_docs = text_splitter.split_documents(langchain_documents)
print("Num split pages: ", len(split_docs))

In [None]:
split_docs[0].page_content

In [None]:
import regex as re
for d in split_docs:
    text = d.page_content
    text = re.sub(r"(\w+)-\n(\w+)", r"\1\2", text)
    text = re.sub(r"(?<!\n\s)\n(?!\s\n)", " ", text.strip()) # Remove newlines 
    text = re.sub(r"\n\s*\n", "\n\n", text)
    text = re.sub(r'[/X]', "", text)     #Remove hexadecimal chars
    text = re.sub(r"(\\u[0-9A-Fa-f]+)"," ",text) #Remove other speciail characters
    d.page_content = text

## Generate Embeddings
Use an embeddings model to generate embeddings of the cleaned-up doc

### Option 1- Bedrock Titan Embeddings

In [None]:
import boto3
import sagemaker
session = boto3.Session()
sagemaker_session = sagemaker.Session()
studio_region = sagemaker_session.boto_region_name 
bedrock = session.client("bedrock", region_name=studio_region)

from langchain.embeddings import BedrockEmbeddings
emb = BedrockEmbeddings(region_name ="us-east-1",model_id = "amazon.titan-e1t-medium")
emb.model_kwargs = {}

### Option 2- Huggingface Embeddings - Requires sentence_transformers

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings
emb = HuggingFaceEmbeddings()

## Setup local Vector store - FAISS 

In [None]:
from langchain.vectorstores import FAISS

In [None]:
print("Embed and create vector index")
db = FAISS.from_documents(split_docs, embedding=emb)

### Save the indices locally as a file

In [None]:
index_path = 'faiss_indices'

In [None]:
print('Save the index created locally')
pathlib.Path(index_path).mkdir(parents=True, exist_ok=True)
db.save_local(folder_path=index_path, index_name= index_name)

### Load from local file cache

In [None]:
%%time
#Check if load local works properly
db_local = FAISS.load_local(folder_path=index_path, embeddings=emb, index_name=index_name)

### Perform a similarity search and get top 3 matching docs

In [None]:
query = "How to setup Parental controls?"
docs = db_local.similarity_search(query, k=3)
docs

## Access LLM with the context from vecor store

### Method 1- Simple query with  Vector store wrapper

In [None]:
from langchain.llms.bedrock import Bedrock

#Creating Anthropic Claude
model_args= {'max_tokens_to_sample':200,'temperature':0}
llm = Bedrock(model_id="anthropic.claude-v1", client=bedrock, model_kwargs=model_args)

In [None]:
from langchain.indexes.vectorstore import VectorStoreIndexWrapper
wrapper_store = VectorStoreIndexWrapper(vectorstore=db_local)

response = wrapper_store.query(question=query, llm=llm)
print(response)

### Method 2- Query with chain

In [None]:
from langchain.chains.question_answering import load_qa_chain
chain = load_qa_chain(llm, chain_type="stuff")
documents = db_local.similarity_search(query=query, k=5)
print(chain.run(input_documents=documents, question=query))

### Method 3- Query with Prompt template (Provides prompt customization)

In [None]:
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

prompt_template = """Human: Use the following pieces of context to provide a concise answer to the question at the end. 

{context}

Question: {question}
Assistant:"""
PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

In [None]:
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=db_local.as_retriever(
        search_type="similarity", search_kwargs={"k": 3}
    ),
    return_source_documents=True,
    chain_type_kwargs={"prompt": PROMPT}
)

response = qa({'query':query})
print(response['result'])

In [None]:
response['source_documents']

## Implement RAG architecture with Kendra Index

In [None]:
kendra_index = "" #Provide Kendra index here

In [None]:
from langchain.schema.document import Document

kendra = boto3.client('kendra')
response = kendra.retrieve(IndexId=kendra_index,QueryText=query)
docs = [Document(page_content = r['Content']) for r in response['ResultItems']]
docs

In [None]:
chain = load_qa_chain(llm, chain_type="stuff")
print(chain.run(input_documents=docs, question=query))