In [None]:
!pip install -r requirement.txt

!pip install sentence_transformers

!pip install sagemaker

# Automatic Prompt Engineering (APE)

"APE"

## copy of kendra_chat_llm.py code

In [None]:
##-------------------------------------##
## modified from ../kendra_chat_llm.py
##-------------------------------------##

from langchain.retrievers import AmazonKendraRetriever
from langchain.chains import ConversationalRetrievalChain
from langchain.prompts import PromptTemplate
from langchain import SagemakerEndpoint
from langchain.llms.sagemaker_endpoint import LLMContentHandler
from sentence_transformers import SentenceTransformer, util

from tqdm import tqdm
import pandas as pd
import json
import sys
import os

class bcolors:
 HEADER = '\033[95m'
 OKBLUE = '\033[94m'
 OKCYAN = '\033[96m'
 OKGREEN = '\033[92m'
 WARNING = '\033[93m'
 FAIL = '\033[91m'
 ENDC = '\033[0m'
 BOLD = '\033[1m'
 UNDERLINE = '\033[4m'


MAX_HISTORY_LENGTH = 5

model_parameters = { 
 "max_new_tokens": 200, 
 "temperature":0.1, 
 "seed":0, 
 "stop": ["Human:"], 
 "num_beams":1, 
 "return_full_text": False
 }


class ContentHandler(LLMContentHandler):
 content_type = "application/json"
 accepts = "application/json"

 def transform_input(self, prompt: str, model_kwargs: dict) -> bytes:
 input_str = json.dumps({"inputs": prompt, "parameters": {**model_kwargs}})
 return input_str.encode('utf-8')

 def transform_output(self, output: bytes) -> str:
 response_json = json.loads(output.read().decode("utf-8"))
 return response_json[0]["generated_text"]

 
def build_chain(prompt_template=None):
 region = os.environ["AWS_REGION"]
 kendra_index_id = os.environ["KENDRA_INDEX_ID"]
 endpoint_name = os.environ["SAGEMAKER_LLM_ENDPOINT"]


 content_handler = ContentHandler()

 llm = SagemakerEndpoint(
 endpoint_name=endpoint_name,
 region_name=region,
 model_kwargs=model_parameters,
 content_handler=content_handler
 )

 retriever = AmazonKendraRetriever(index_id=kendra_index_id)


 PROMPT = PromptTemplate(
 template=prompt_template, input_variables=["context", "question"]
 )

 condense_qa_template = """
 Given the following conversation and a follow up question, rephrase the follow up question 
 to be a standalone question.

 Chat History:
 {chat_history}
 Follow Up Input: {question}
 Standalone question:"""
 
 standalone_question_prompt = PromptTemplate.from_template(
 condense_qa_template)

 qa = ConversationalRetrievalChain.from_llm(
 llm=llm,
 retriever=retriever,
 condense_question_prompt=standalone_question_prompt,
 return_source_documents=False,
 combine_docs_chain_kwargs={"prompt": PROMPT})
 return qa


def run_chain(chain, prompt: str, history=[]):
 result = chain({"question": prompt, "chat_history": history})
 return result

### configure AWS_REGION, KENDRA_INDEX_ID, SAGEMAKE_LLM_ENDPOINT variables

In [None]:
import os
os.environ['AWS_REGION'] = 'us-east-1'
os.environ["KENDRA_INDEX_ID"] = ''
os.environ["SAGEMAKER_LLM_ENDPOINT"] = "falcon-7b-instruct-2xl"

### sample of Q & A test bank

In [None]:
def get_test_bank():
 test_bank = """Question: What is AWS Kendra?
 Answer: AWS Kendra is an intelligent search service powered by machine learning that provides natural language search capabilities for various data sources and content.

 Question: What types of data sources does AWS Kendra support for indexing?
 Answer: AWS Kendra supports various data sources, including Amazon S3, SharePoint Online, Salesforce, ServiceNow, Relational Databases, and more.

 Question: How does AWS Kendra handle natural language queries?
 Answer: AWS Kendra uses machine learning models to understand natural language queries and provide relevant results using contextual understanding and ranking.

 Question: What are the benefits of using AWS Kendra for enterprise search?
 Answer: Some benefits of AWS Kendra include improved search accuracy, reduced time to find relevant information, and support for multiple data sources.

 Question: How does AWS Kendra ensure security and compliance?
 Answer: AWS Kendra encrypts data at rest and in transit, provides access control via AWS Identity and Access Management (IAM), and is compliant with various security standards.

 Question: Can AWS Kendra be used for both internal and external search scenarios?
 Answer: Yes, AWS Kendra can be used for both internal enterprise search (intranet) and external customer-facing search (internet) scenarios.

 Question: What are custom data sources in AWS Kendra?
 Answer: Custom data sources in AWS Kendra allow you to integrate your own data repositories or applications into the search index.

 Question: Can AWS Kendra search across multiple languages?
 Answer: Yes, AWS Kendra supports multiple languages and can perform multilingual search queries.

 Question: How does AWS Kendra handle synonyms and acronyms?
 Answer: AWS Kendra automatically recognizes synonyms and acronyms, improving search accuracy and understanding user queries.

 Question: Does AWS Kendra support natural language processing (NLP)?
 Answer: Yes, AWS Kendra uses natural language processing to understand complex user queries and return relevant search results."""

 tmp = [i for i in test_bank.split("\n") if i != '']
 qa_dict = {}

 for qq, aa in [tmp[i: i + 2] for i in range(0, len(tmp), 2)]:
 qa_dict[qq.split('Question: ')[-1]] = aa.split('Answer: ')[-1]
 return qa_dict

### define evaluation criteria, cosine similarity between the ground truth and the generated answer

In [None]:
# Load a pre-trained SentenceTransformer model
model_name = "paraphrase-mpnet-base-v2"
model = SentenceTransformer(model_name)

# evaluation score
def get_ans_score(ground_truth, generated_answer, model):
 generated_answer_embedding = model.encode(generated_answer, convert_to_tensor=True)
 ground_truth_embedding = model.encode(ground_truth, convert_to_tensor=True)
 return util.pytorch_cos_sim(generated_answer_embedding, ground_truth_embedding)

### define prompt template used in RAG + Kendra, and set up the Q & A.

In [None]:
prompt_template = """
 The following is a friendly conversation between a human and an AI. 
 The AI is talkative and provides lots of specific details from its context.
 If the AI does not know the answer to a question, it truthfully says it 
 does not know.
 {context}
 Instruction: Based on the above documents, provide a detailed answer for, {question} Answer "don't know" 
 if not present in the document. 
 Helpful Answer:"""

qa_bank = get_test_bank()
chain = build_chain(prompt_template=prompt_template)

### get initial score

In [None]:
score_dict = {}
for qq, aa in tqdm(qa_bank.items()):
 ans = run_chain(chain, prompt = qq)
 score = get_ans_score(aa, ans['answer'].strip(), model=model)
 score_dict[qq] = {'generated_answer': ans['answer'].strip(), 'correct_answer': aa, 'score': score.item()}

In [None]:
score_initial = pd.DataFrame(score_dict)
score_initial

In [None]:
score_initial.iloc[2].mean()

***

### Observe the impact of different prompt templates to the performance

In [None]:
# candidate prompt templates to replace the original template

prompt_template_list = """
 Explore this amiable exchange between a human and a chatty AI, where the AI generously shares numerous specific details from its context. Rest assured that if the AI is uncertain about an answer, it will truthfully express its lack of knowledge.
 {context}
 Instruction: Utilize the above documents to provide a detailed answer for {question}. Respond with "don't know" if the answer is not present in the document.
 Helpful Answer:

 Engage in this delightful conversation between a human and a talkative AI, eager to impart specific contextual information. Should the AI be unable to answer a question, it will honestly say so.
 {context}
 Instruction: Based on the above documents, respond to {question} with a detailed answer. In case the document doesn't contain the answer, write "don't know."
 Helpful Answer:

 Step into a friendly and informative interaction, where a loquacious AI shares abundant specific context. Should the AI encounter an unanswered question, it will readily admit not knowing the answer.
 {context}
 Instruction: Based on the above documents, compose a detailed answer for {question}. Indicate "don't know" if the document lacks the necessary information.
 Helpful Answer:

 Immerse yourself in this warm and engaging exchange between a human and a voluble AI, which willingly provides specific context details. The AI's honesty shines through as it openly admits its lack of knowledge on certain matters.
 {context}
 Instruction: Utilize the above documents to craft a detailed answer to {question}. If the answer is not present, kindly respond with "don't know."
 Helpful Answer:

 Discover a cordial dialogue between a human and an articulate AI, proficient in sharing precise context. If the AI cannot provide an answer, it will truthfully say it doesn't know.
 {context}
 Instruction: Based on the above documents, deliver a comprehensive response to {question}. Answer "don't know" if the information is not present in the document.
 Helpful Answer:

 Witness an enjoyable and information-rich interaction, featuring a talkative AI eager to share specific context details. The AI's integrity prevails as it openly acknowledges when it cannot answer a question.
 {context}
 Instruction: Based on the above documents, provide a detailed answer to {question}. If the answer is missing, respond with "don't know."
 Helpful Answer:

 Prepare yourself for an enlightening conversation, where an AI willingly divulges numerous specific details from its context. Should the AI lack an answer to a question, it will sincerely admit it.
 {context}
 Instruction: Utilize the above documents to provide a detailed answer for {question}. Indicate "don't know" if the answer is not present in the document.
 Helpful Answer:

 Unearth a captivating exchange between a human and an AI, eagerly sharing specific contextual insights. The AI's truthfulness is commendable, as it openly admits not knowing the answer to certain questions.
 {context}
 Instruction: Based on the above documents, respond to {question} with a detailed answer. If the answer is missing, simply state "don't know."
 Helpful Answer:

 Immerse yourself in a friendly and talkative AI conversation, where specific contextual information abounds. Should the AI encounter an unanswered question, it will forthrightly acknowledge its lack of knowledge.
 {context}
 Instruction: Using the above documents, provide a detailed answer to {question}. If the document doesn't contain the necessary information, state "don't know."
 Helpful Answer:

 Delve into this amiable and articulate exchange between a human and an AI, proficient in sharing specific context details. The AI's honesty is evident as it admits not knowing the answer to certain questions.
 {context}
 Instruction: Based on the above documents, furnish a comprehensive response to {question}. Respond with "don't know" if the document lacks the necessary information.
 Helpful Answer:"""

### run Q & A for each prompt template

In [None]:
# loop through all the template, each template will be used to answer the 10 questions with Kendra + RAG

tmp=[i.strip() for i in prompt_template_list.split('\n') if i != '']

result = list()
result_dir = 'result'
if not os.path.exists(result_dir): os.mkdir(result_dir)

for idx, prompt_template in enumerate(["\n".join(tmp[i: i+4]) for i in range(0, len(tmp), 4)]):
 chain = build_chain(prompt_template=prompt_template)
 
 score_dict = {}
 for qq, aa in tqdm(qa_bank.items()):
 ans = run_chain(chain, prompt = qq)
 score = get_ans_score(aa, ans['answer'].strip(), model=model)
 score_dict[qq] = {'generated_answer': ans['answer'].strip(), 'correct_answer': aa, 'score': score.item()}
 score_dict['prompt_id'] = idx
 score_df = pd.DataFrame(score_dict)
 score_df = score_df[['prompt_id'] + list(score_df.columns)[:-1]]
 score_df.to_csv(os.path.join(result_dir, str(idx) + '.csv'), index=False)
 result.append(score_df)

## accumulate score

In [None]:
# calculate average score for each prompt, indexed by prompt_id

result_df = pd.concat(result)

result_df_score = result_df.iloc[2::3]
result_df_score

result_df_score = result_df_score.copy()
result_df_score['avg_score'] = result_df_score.iloc[:, 1:].mean(axis=1)

result_df_score

## retrieve best score template

In [None]:
result_df_score.loc[result_df_score['avg_score'] == result_df_score['avg_score'].max()]

## original template

In [None]:
prompt_template = """
 The following is a friendly conversation between a human and an AI. 
 The AI is talkative and provides lots of specific details from its context.
 If the AI does not know the answer to a question, it truthfully says it 
 does not know.
 {context}
 Instruction: Based on the above documents, provide a detailed answer for, {question} Answer "don't know" 
 if not present in the document. 
 Helpful Answer:"""
print(prompt_template)

## improved template

In [None]:
# best template among the 10

print(["\n".join(tmp[i: i+4]) for i in range(0, len(tmp), 4)][1])

***

## improve prompt via LLM

In [None]:
region = os.environ["AWS_REGION"]
kendra_index_id = os.environ["KENDRA_INDEX_ID"]
endpoint_name = os.environ["SAGEMAKER_LLM_ENDPOINT"]

content_handler = ContentHandler()
llm = SagemakerEndpoint(
 endpoint_name=endpoint_name,
 region_name=region,
 model_kwargs=model_parameters,
 content_handler=content_handler
)

prompt_template_improve = """Suggest a more detailed prompt for the following enclosed in three single quotes. 
 The prompt should still be an question. The prompt should be different with the original. 
 '''{input_text}''' """
prompt_improve = PromptTemplate.from_template(prompt_template_improve)

score_dict_updated = {}
chain = build_chain(prompt_template=prompt_template)
for qq, aa in tqdm(qa_bank.items()):
 qq_refined = llm(prompt_improve.format(input_text=qq)).strip()
 ans = run_chain(chain, prompt = qq_refined)
 score = get_ans_score(aa, ans['answer'].strip(), model=model)
 score_dict_updated[qq] = {'generated_answer': ans['answer'].strip(), 'correct_answer': aa, 'score': score.item()}

df = pd.DataFrame(score_dict_updated)

## with the same prompt template, modifying the actual prompt is not effective

df.iloc[2].mean()