ai langChain

发布日期: 2024-01-25

文章字数: 17.9k

阅读时长: 107 分

阅读次数:

1. LangChain Introduction

2. OpenAI Introduction

3. Demo & Environment Setup

1. A LangChain Example - Implementation Demo

2023.3好像是py10

4. Langchain - Models Module Concept

1. LangChain’s Modules Overview

5. ✅Beginner level

6. Project #1 - Simple Question & Answer App

1. LLMs Walkthrough

huggingface上有免费的模型

2. LLM Practical Implementation using Python

pip install openai
pip install Openai
conda install langchain -c conda-forge 
pip install langchain[all]

#!/usr/bin/env python
# coding: utf-8

# Pip install is the command you use to install Python packages with the help of a tool called Pip package manager.
# Installing LangChain package

# In[20]:


# get_ipython().system('pip install langChain')


# # Let's use Proprietary LLM from-OpenAl
#
# Installing Openai package,which includes the classes that we can use to communicate with Openai services

# In[21]:


# get_ipython().system('pip install Openai')


# Imports the Python built-in module called "os."
# This module provides a way to interact with the operating system,such as accessing environment variables,working with files and directories,executing shell
# commands,etc
# The environ attribute is a dictionary-like object that contains the environment variables of the current operating system session
# By accessing os.environ,you can retrieve and manipulate environment variables within your Python program.For example,you can retrieve the value of a
# specific environment variable using the syntax os.environ["VARIABLE_NAME],where "VARIABLE_NAME"is the name of the environment variable you want to
# access.

# In[22]:


import os

import httpx

os.environ["OPENAI_API_KEY"] = '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'
# os.environ["OPENAI_API_BASE"] = 'https://oneapi.xty.app/v1'

# LangChain has built a Wrapper around OpenAl APIs,using which we can get access to all the services OpenAl provides.
# The code snippet below imports a specific class called 'OpenAl'(Wrapper around OpenAl large language models)from the 'llms'module of the 'langchain'
# library.
# https://python.langchain.com/en/latest/modules/langchain/llms/openai.html

# In[23]:


# from langchain.llms import OpenAI
from langchain_community.llms import OpenAI

# Here we are instantiating a language model object called OpenAl,for our natural language processing tasks.
# The parameter model_name is provided with the value "text-davinci-003"which is a specific version or variant of a language model (examples-text-davinci-
# 003,code-davinci-002,gpt-3.5-turbo,text-ada-001 and more).

# In[24]:


llm = OpenAI(
    temperature=0.9,
    base_url="https://oneapi.xty.app/v1",
    api_key="sk-Qg2sQRe2BTRDkjXCAaCa243017994a8aBf2e5bC26aE8Af99",
    # model_name="text-davinci-003", # model 不存在
    model_name="gpt-3.5-turbo-instruct", # OK 
    http_client=httpx.Client(
        base_url="https://oneapi.xty.app/v1",
        follow_redirects=True,
    ),
)

# Here language model is represented by the object "llm,"which is being utilized to generate a completion or response based on a specific query.
# The query,stored in the "our_query"variable is bieng passed to the model through llm object.
 
our_query = "What is the currency of India?"
completion = llm(our_query) 

print(completion)

pip install huggingface_hub

settings 生成令牌

# https://huggingface.co/docs/transformers/model_doc/flan-t5
# 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
import os

os.environ['HUGGINGFACEHUB_API_TOKEN'] = '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'
# from langchain.llms import HuggingFaceHub
# from langchain_community.llms import HuggingFaceHub

# F:\anaconda\envs\py39f\lib\site-packages\huggingface_hub\utils\_deprecation.py:131:
# FutureWarning: 'InferenceApi' (from 'huggingface_hub.inference_api') is deprecated
# and will be removed from version '1.0'.
# `InferenceApi` client is deprecated in favor of the more feature-complete `InferenceClient`.
# Check out this guide to learn how to convert your script to use it:
# https://huggingface.co/docs/huggingface_hub/guides/inference#legacy-inferenceapi-client.
# llm = HuggingFaceHub(repo_id="google/flan-t5-large")
# model = "google/flan-t5-large"
# llm = HuggingFaceHub(model=model)

# The LLM takes a prompt as an input and outputs a completion
# our_query = 'What is the currency of India?'
# completion = llm(our_query)
# print(completion)

from langchain import PromptTemplate, HuggingFaceHub, LLMChain

template = """Question: {question}

Answer: Let's think step by step."""

prompt = PromptTemplate(template=template, input_variables=["question"])
llm_chain = LLMChain(
    prompt=prompt,
    llm=HuggingFaceHub(
        repo_id="google/flan-t5-large",
        model_kwargs={
            "temperature": 0,
            "max_length": 64
        }
    )
)

question = "What is the capital of France?"

print(llm_chain.run(question))

3. Project Environment Setup

requirements.txt

langchain
Openai
Streamlit

4. Lets’ Build Simple Question Answering Application

upload 上次app.py会覆盖原本的空文件

#Hello! It seems like you want to import the Streamlit library in Python. Streamlit is a powerful open-source framework used for building web applications with interactive data visualizations and machine learning models. To import Streamlit, you'll need to ensure that you have it installed in your Python environment.
#Once you have Streamlit installed, you can import it into your Python script using the import statement,

import streamlit as st


from langchain.llms import OpenAI

#Function to return the response
def load_answer(question):
    llm = OpenAI(model_name="gpt-3.5-turbo-instruct",temperature=0)
    answer=llm(question)
    return answer


#App UI starts here
st.set_page_config(page_title="LangChain Demo", page_icon=":robot:")
st.header("LangChain Demo")

#Gets the user input
def get_text():
    input_text = st.text_input("You: ", key="input")
    return input_text


user_input=get_text()
response = load_answer(user_input)

submit = st.button('Generate')  

#If generate button is clicked
if submit:

    st.subheader("Answer:")

    st.write(response)

settings

7. Project #2 - Simple Conversational App

1. Chat Model Walkthrough

2. Chat Model Practical Implementation using Python

# %% env
import os

# os.environ["OPENAI_API_KEY"] = 'sk-Qg2sQRe2BTRDkjXCAaCa243017994a8aBf2e5bC26aE8Af99'
# os.environ["OPENAI_API_BASE"] = 'https://oneapi.xty.app/v/1'

# %% import model
'''
LangChain has built a Wrapper around OpenAl APls,using which we can get access to all the services OpenAl provides.
The code snippet below imports a specific class called 'ChatOpenAl'(Wrapper around OpenAl large language models)from the 'chat_models'module of the
langchain'library.
'''
from langchain_community.chat_models import ChatOpenAI

# %%
'''
The code snippet below imports HumanMessage,SystemMessage and
 AlMessage from the 'schema'module of the langchain'library.
'''

from langchain.schema import HumanMessage, SystemMessage, AIMessage

# %% create model
'''
Initialize the ChatOpenAl object and
We'll set temperature=.7 to maximise randomness and make outputs creative.  temperature添加随机性
The parameter model_name is provided with the value "gpt-3.5-turbo"which is a specific version or variant of a language model for chat
'''

chat = ChatOpenAI(
    temperature=0.7,
    base_url="https://oneapi.xty.app/v1",
    api_key="sk-Qg2sQRe2BTRDkjXCAaCa243017994a8aBf2e5bC26aE8Af99",
    # 报错307 就换一个模型试试
    model_name="gpt-3.5-turbo",  # OK
)

# %% chat with
'''
Chats with the Chat-GPT model 'gpt-3.5-turbo'are typically structured like so:
System:You are a helpful assistant.
User:Hi Al,how are you today?
Assistant:I'm great thank you.How can I help you?
User:I'd like to understand string theory.
Assistant:The final "Assistant:"without a response is what would prompt the model to continue the comversation.In the official
'''

result = chat(
    [
        # 设定
        SystemMessage(content="You are a sarcastic AI assistant"),
        # 用户提问
        HumanMessage(content="Please answer in 30 words:How can I learn driving a car")
    ]
)

print(result.content)
# %% more detail chat
'''
In the below scenario
We are asking the model to behave in a specific way
And passing our question
And also passing on more context so that it can elaborate more on that specific topic
This model gives us a better way to have conversation kind of opportunity with the model,which can be used to build chat bots.
'''

ourConversation = chat(
    [
        SystemMessage(content="You are a 3 years old girl who answers very cutely and in a funny way"),
        HumanMessage(content="How can I learn driving a car"),
        AIMessage(content="I can't drive yet! But I have a driver,my dad..."),
        HumanMessage(content="Can you teach me driving?")
    ]
)
print(ourConversation.content)

3. Let’s Build Simple Conversational Application

和刚才一样, space里创建

upload file


import streamlit as st


from langchain_community.chat_models import ChatOpenAI
from langchain.schema import (
    AIMessage,
    HumanMessage,
    SystemMessage
)

# From here down is all the StreamLit UI.
st.set_page_config(page_title="LangChain Demo", page_icon=":robot:")
st.header("Hey, I'm your Chat GPT")



if "sessionMessages" not in st.session_state:
     st.session_state.sessionMessages = [
        SystemMessage(content="You are a helpful assistant.")
    ]



def load_answer(question):

    st.session_state.sessionMessages.append(HumanMessage(content=question))

    assistant_answer  = chat(st.session_state.sessionMessages )

    st.session_state.sessionMessages.append(AIMessage(content=assistant_answer.content))

    return assistant_answer.content


def get_text():
    input_text = st.text_input("You: ", key= input)
    return input_text


chat = ChatOpenAI(
    temperature=0, 
    api_key="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",
    # 报错307 就换一个模型试试
    model_name="gpt-3.5-turbo",  # OK
)




user_input=get_text()
submit = st.button('Generate')  

if submit:
    
    response = load_answer(user_input)
    st.subheader("Answer:")

    st.write(response,key= 1)

8. Project #3 - Find Similar Things App For Kids

1. Text Embedding Walkthrough

2. Text Embeddings Practical Implementation using Python

# %% env
import os

os.environ["OPENAI_API_KEY"] = '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'
# %% import embeddings
# ImportError: Could not import tiktoken python package.
# This is needed in order to for OpenAIEmbeddings. Please install it with `pip install tiktoken`.
from langchain_community.embeddings import OpenAIEmbeddings

# %% Initialize the OpenAlEmbeddings object
embeddings = OpenAIEmbeddings()

# %%
our_txt = 'hey buddy'
txt_embedding = embeddings.embed_query(our_txt)
print(f'Our embedding is length {txt_embedding}')

3. Embeddings Example using Python

data.xlsx

# %% env
import os

os.environ["OPENAI_API_KEY"] = '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'
# %% import embeddings
# ImportError: Could not import tiktoken python package.
# This is needed in order to for OpenAIEmbeddings. Please install it with `pip install tiktoken`.
from langchain_community.embeddings import OpenAIEmbeddings

# %% Initialize the OpenAlEmbeddings object
embeddings = OpenAIEmbeddings()

# %% data
import pandas as pd

# ImportError: Missing optional dependency 'openpyxl'.  Use pip or conda to install openpyxl.
df = pd.read_excel('data.xlsx')
print(df)

# %%
'''
We can use "apply"to apply the get_embedding function to each row in the dataframe because our words are stored in a pandas dataframe.In order to save
time and to save the calculated word embeddings in a new csv file called "word_embeddings.csv"rather than calling OpenAl once more to carry out these
computations.
'''
# 需要等很长时间 ...
df['embedding'] = df['Words'].apply(lambda x: embeddings.embed_query(x))
df.to_csv('word_embeddings.csv')

# %%
'''
Let's load the existing file,which contains the embeddings,
so that we can save chargers by not hitting the APl repeatedly
'''
new_df = pd.read_csv('word_embeddings.csv')
print(new_df)

# %% Let's get the embeddings for our text
our_txt = 'Mango'
txt_embedding = embeddings.embed_query(our_txt)
print(f'Our embedding is {txt_embedding}')

# %%
'''
We can determine how similar a word is to other words in our dataframe after we have a vector representing that word.
By computing the cosine similarity of the word vector for our search term to each word embedding in our dataframe.
'''
from openai import OpenAI

client = OpenAI()


def get_embedding(text, model="text-embedding-ada-002"):
    text = text.replace("\n", " ")
    return client.embeddings.create(
        input=[text],
        model=model
    ).data[0].embedding


# df['ada_embedding'] = df.combined.apply(lambda x: get_embedding(x, model='text-embedding-ada-002'))
# %% utils
import numpy as np


def calculate_cosine_similarity(vector_a, vector_b):
    # Compute the dot product of the two vectors
    dot_product = np.dot(vector_a, vector_b)

    # Compute the L2 norms (magnitudes) of each vector
    norm_a = np.linalg.norm(vector_a)
    norm_b = np.linalg.norm(vector_b)

    # Compute the cosine similarity
    # Note: We add a small epsilon value to the denominator for numerical stability
    epsilon = 1e-10
    cosine_similarity = dot_product / (norm_a * norm_b + epsilon)

    return cosine_similarity


# def cosine_similarity(asingle, bsingle) -> np.double:
#     """return normalized dot product of two arrays"""
#     a = asingle.astype(np.double)
#     b = bsingle.astype(np.double)
#     return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))


# %% similarity
# old version < 1.0
# from openai.embeddings_utils import cosine_similarity
from sklearn.metrics.pairwise import cosine_similarity

# df['similarity score'] = df['embedding'].apply(lambda x: cosine_similarity(x, txt_embedding))
df['similarity score'] = df['embedding'].apply(lambda x: cosine_similarity(x, txt_embedding))
print(df)

# %%
'''
Sorting by similarity values in dataframe reveals Banana,Orange,and Apple are closest to searched term,such as Mango.
'''

res = df.sort_values('similarity score', ascending=False).head(10)
print(res)

4. Let’s build Similar Words Finder Application

#Allows you to use Streamlit, a framework for building interactive web applications.
#It provides functions for creating UIs, displaying data, and handling user inputs.
import streamlit as st


#This module provides a way to interact with the operating system, such as accessing environment variables, working with files
#and directories, executing shell commands, etc
import os

#Helps us generate embeddings
#An embedding is a vector (list) of floating point numbers. The distance between two vectors measures their relatedness. 
#Small distances suggest high relatedness and large distances suggest low relatedness.
from langchain_community.embeddings import OpenAIEmbeddings


#FAISS is an open-source library developed by Facebook AI Research for efficient similarity search and clustering of large-scale datasets, particularly with high-dimensional vectors. 
#It provides optimized indexing structures and algorithms for tasks like nearest neighbor search and recommendation systems.
from langchain_community.vectorstores import FAISS


#load_dotenv() is a function that loads variables from a .env file into environment variables in a Python script. 
#It allows you to store sensitive information or configuration settings separate from your code
#and access them within your application.

# pip3 install python-dotenv
from dotenv import load_dotenv


load_dotenv()


#By using st.set_page_config(), you can customize the appearance of your Streamlit application's web page
st.set_page_config(page_title="Educate Kids", page_icon=":robot:")
st.header("Hey, Ask me something & I will give out similar things")

#Initialize the OpenAIEmbeddings object
embeddings = OpenAIEmbeddings()

#The below snippet helps us to import CSV file data for our tasks
from langchain_community.document_loaders.csv_loader import CSVLoader
loader = CSVLoader(file_path='myData.csv', csv_args={
    'delimiter': ',',
    'quotechar': '"',
    'fieldnames': ['Words']
})

#Assigning the data inside the csv to our variable here
data = loader.load()

#Display the data
print(data)

db = FAISS.from_documents(data, embeddings)

#Function to receive input from user and store it in a variable
def get_text():
    input_text = st.text_input("You: ", key= input)
    return input_text


user_input=get_text()
submit = st.button('Find similar Things')  

if submit:
    
    #If the button is clicked, the below snippet will fetch us the similar text
    docs = db.similarity_search(user_input)
    print(docs)
    st.subheader("Top Matches:")
    st.text(docs[0])
    st.text(docs[1].page_content)

streamlit run 文件名.py --server.port 8888

9. Langchain - Prompt Module Concept & Implementation Using Python

1. Prompts Module Introduction

2. Prompt Template Walkthrough

# %%
import os

os.environ['OPENAI_API_KEY'] = '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'
# %%
from langchain_community.llms import OpenAI

llm = OpenAI(model_name="gpt-3.5-turbo-instruct")

# %%
our_prompt = """
I love trips,and I have been to 6 countries.
I plan to visit few more soon.

Can you create a post for tweet in 10 words for the above?
"""  # 10 words -> 这个10 我们希望能动态指定

print(our_prompt)
# %%
res = llm(our_prompt)
print(res)

# %% Prompt Template
from langchain_community.llms import OpenAI
from langchain import PromptTemplate

llm = OpenAI(model_name="gpt-3.5-turbo-instruct")

# %% Using F-String
'''
F-String is a Python feature that allows easy string formatting by placing variables inside curly braces within a string,making code more readable and efficient.
__Code:__
name = "Alice"
age = 25
message f"My name is {name} and I am {age} years old."
print(message)

__Output:__
My name is Allce and I am 25 years old.
'''

wordsCount = 5
our_txt = "I love trips,and I have been to 6 countries. I plan to visit few more soon."
our_prompt = f"""
{our_txt}

Can you create a post for tweet in {wordsCount} words for the above?
"""
print(our_prompt)

# %%
print(llm(our_prompt))

# %% Using Prompt Template
# Prompt templates helps us in keeping our code neat and clean when we are bullding more complex

template = """
{our_txt}

Can you create a post for tweet in {wordsCount} words for the above?
"""

# %%
prompt = PromptTemplate(
    input_variables=['wordsCount', 'our_txt'],
    template=template
)

final_prompt = prompt.format(
    wordsCount='3',
    our_txt=our_txt
)

print(final_prompt)
print(llm(final_prompt))

3. Example Selectors Walkthrough

# %%
import os

os.environ['OPENAI_API_KEY'] = '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'
# %% Few Shot Templates
"""
Few-shot learning is a way to teach computers to make predictions using only a small amount of information.Instead of needing lots of examples,computers
can learn from just a few examples.
They find patterns in the examples and use those pattemns to understand and recognize new things.It helps computers learn quickly and accurately with only a
little bit of information.
"""
from langchain_community.llms import OpenAI

# %%
'''
A prompt in NLP (Natural Language Processing) is a text
or instruction given to a language model to generate a response.
'''

our_prompt = """
You are a 5 year old gril, who is very funny, mischievous and sweet:

Question: What is house?
Response: 
"""

llm = OpenAI(temperature=0.9, model_name="gpt-3.5-turbo-instruct")
print(llm(our_prompt))

# %%
'''
We observe that though we have instructed the model to act as a little girl,
it's unable to do so as it very generic by nature
So we will try to proved some extemal knowledge to get the perfect answers from it
'''

our_prompt = """You are a s year old girl,who is very funny, mischievous and sweet:
Here are some examples:
Question: What is a mobile?
Response: A mobile is a magical device that fits in your pocket,like a mini-enchanted playground. It has games, videos, and talking pictures,but be careful,it can turn grown-ups into

Question: What are your dreams?
Response: My dreams are like colorful adventures,where I become a superhero and save the day!I dream of giggles,ice cream parties, and having a pet dragon named sparkles.

Question: What is a house?
Response: 
"""
print(llm(our_prompt))

# %%
'''
The FewShotPromptTemplate feature offered by LangChain allows for few-shot leaming using prompts.

In the context of large language models(LLMs),the primary sources of knowledge are parametric knowledge (learned during model training)and source
knowledge(provided within model input at inference time).

The FewShotPromptTemplate enables the inclusion of a few examples within prompts,which the model can read and use to apply to user input,enhancing the
model's ability to handle specific tasks or scenarios.
'''

from langchain.prompts import PromptTemplate
from langchain import FewShotPromptTemplate  # 简短提示模板

# %%
# Let's create a list of examples, that can be passed to the model later for our task
# examples 的json的变量名对应 example_template 的 {变量名}
examples = [
    {
        'query': 'What is a mobile?',
        'answer': 'A mobile is a magical device that fits in your pocket,like a mini-enchanted playground. It has games, videos, and talking pictures,but be careful,it can turn grown-ups into'
    },
    {
        'query': 'What are your dreams?',
        'answer': 'My dreams are like colorful adventures,where I become a superhero and save the day!I dream of giggles,ice cream parties, and having a pet dragon named sparkles.'
    }
]

# %% Let's create a example template
example_template = """
Question: {query}
Response: {answer}
"""

# %% Let's create a prompt example from above created example template
# example_template -> 定义 example_prompt 模板
example_prompt = PromptTemplate(
    input_variables=['query', 'answer'],
    template=example_template
)

# %%
'''
The previous original prompt can be divided into a prefix and suffix.
The prefix consists of the instructions or context given to the model,while the suffix includes the user input and output indicator.
'''
prefix = """You are a s year old girl,who is very funny, mischievous and sweet:
Here are some examples:
"""

suffix = """
Question: {userInput}
Response: """

# %% Let's create a few shot prompt template,by using the above details

few_shot_prompt_template = FewShotPromptTemplate(
    # 将 example_prompt 和 examples 关联
    examples=examples,
    example_prompt=example_prompt,
    # 将 prefix 和 input_variables 关联
    prefix=prefix,
    suffix=suffix,
    input_variables=['userInput'],
    example_separator="\n\n"
)

query = 'What is a house?'
print(few_shot_prompt_template.format(userInput=query))

print(llm(few_shot_prompt_template.format(userInput=query)))

4. Adding More Examples To Input Prompt

传递example有很多时, 可能遇到超过上下文限制的情况

# %%
examples = [
    {
        'query': 'What is a mobile?',
        'answer': 'A mobile is a magical device that fits in your pocket,like a mini-enchanted playground. It has games, videos, and talking pictures,but be careful,it can turn grown-ups into'
    },
    {
        'query': 'What are your dreams?',
        'answer': 'My dreams are like colorful adventures,where I become a superhero and save the day!I dream of giggles,ice cream parties, and having a pet dragon named sparkles.'
    },
    {
        'query': 'What are your ambitions?',
        'answer': "I want to be a super funny comedian,spreading laughter everywhere I go!I also want to be a master cooki"
    },
    {
        'query': "what happens when you get sick?",
        'answer': "when I get sick,it's like a sneaky monster visits.I feel tired,sniffly,and need lots of cuddles.But"
    },
    {
        'query': "WHow much do you love your dad?",
        'answer': "oh,I love my dad to the moon and back,with sprinkles and unicorns on top!He's my superhero,my partner "
    },
    {
        'query': "Tell me about your friend?",
        'answer': "My friend is like a sunshine rainbow!We laugh,play,and have magical parties together.They always list"
    },
    {
        'query': "what math means to you?",
        'answer': "Math i like a puzzle game,full of numbers and shapes.It helps me count my toys,build towers,and shar"
    },
    {
        'query': "what is your fear?",
        'answer': "Sometimes I'm scared of thunderstorms and monsters under my bed.But with my teddy bear by my side and lo"
    }
]
# %%
'''
In the above explanation,be have been using 'FewShotPromptTemplate'and 'examples'dictionary as it is more robust approach compared to using a single f-string.

It offers features such as the ability to include or exclude examples based on the length of the query.
This is important because there is a maximum context window limitation for prompt and generation output length.

The goal is to provide as many examples as possible for few-shot learning without exceeding the context window or increasing processing times excessively.
The dynamic inclusion/exclusion of examples means that we choose which examples to use based on certain rules.This helps us use the model's abilities in
the best way possible.

It allows us to be efficient and make the most out of the few-shot learning process.
'''
from langchain.prompts.example_selector import LengthBasedExampleSelector

# %%
'''
LengthBasedExampleSelector-This ExampleSelector chooses examples based on length,useful to prevent prompt exceeding context window.
It selects fewer examples for longer inputs and more for shorter ones,ensuring prompt fits within limits.

The maximum length of the formatted examples is set to 'n'characters.
To determine which examples to include,the length of a string is measured using the get_text_length function,which is provided as a default value if not
specified.
'''
example_selector = LengthBasedExampleSelector(
    examples=examples,
    example_prompt=example_prompt,
    max_length=50
)

# %%
'''
Creating a new dynamic few shot prompt template
And we are passing example_selector instead of examples as earlier
'''
new_prompt_template = FewShotPromptTemplate(
    example_selector=example_selector,  # use example_selector instead of examples
    example_prompt=example_prompt,
    # 将 prefix 和 input_variables 关联
    prefix=prefix,
    suffix=suffix,
    input_variables=['userInput'],
    example_separator="\n\n"
)

# %%
query = 'What is a house?'
print(new_prompt_template.format(userInput=query))

print(llm(new_prompt_template.format(userInput=query)))

# %% We can also add an extra example to an example selector we already have.
new_example = {"query": "What's your favorite work?", 'answer': "sleep"}
new_prompt_template.example_selector.add_example(new_example)

example_selector = LengthBasedExampleSelector(
    examples=new_example,
    example_prompt=example_prompt,
    max_length=1000
)

print(new_prompt_template.format(userInput=query))
print(llm(new_prompt_template.format(userInput=query)))

5. Output Parsers Walkthrough

CommaSeparatedListOutputParser

# %%
import os

os.environ['OPENAI_API_KEY'] = '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'
from langchain.prompts import PromptTemplate
from langchain_community.llms import OpenAI
# %% Comma Separated List
from langchain.output_parsers import CommaSeparatedListOutputParser

# Creating an object of CommaSeparatedListOutputParser
# 指定模型输出 逗号分割的列表: 1. xxx \n2. xxx\n
# #%% JSON format
# ...
output_parser = CommaSeparatedListOutputParser()
format_instructions = output_parser.get_format_instructions()

# Your response should be a list of comma separated values, eg: `foo, bar, baz`
print(format_instructions)
# %%
prompt = PromptTemplate(
    template="Provide 5 examples of {query}.\n{format_instructions}",
    input_variables=['query'],
    partial_variables={'format_instructions': format_instructions}
)

# llm = OpenAI(temperature=0.9, model="gpt-3.5-turbo-instruct")
llm = OpenAI(temperature=0.9)

prompt = prompt.format(query="Currencies")  # Currencies: 货币实例
print(prompt)

output = llm(prompt)
print(output)

# %% JSON format
from langchain.output_parsers import StructuredOutputParser, ResponseSchema

# 指定返回什么
'''json
{
    'currency': '',
    'abbrevation': ''
}
'''
response_schema = [
    ResponseSchema(name='currency', description="answer to the user's question"),
    # abbrevation: 缩写
    ResponseSchema(name='abbrevation', description="What's the abbrevation of that currency"),
]

output_parser = StructuredOutputParser.from_response_schemas(response_schema)
print(output_parser)

format_instructions = output_parser.get_format_instructions()
print(format_instructions)

prompt = PromptTemplate(
    template="answer the users question as best as possible.\n{format_instructions}\n{query}",
    input_variables=['query'],  # 用户输入
    partial_variables={'format_instructions': format_instructions}  # 指定格式的输入语句
)
print(prompt)

prompt = prompt.format(query='what is the currency of india?')
print(llm(prompt))
'''
json
{
    "currency": "Indian rupee",
    "abbrevation": "INR"
}
'''

10. Project #4 - Marketing Campaign App

1. Convert Jupyter Notebook to Python Script

pip install -r requirements.txt

# %%
from langchain_community.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain import FewShotPromptTemplate
from langchain.prompts.example_selector import LengthBasedExampleSelector

import os

os.environ["OPENAI_API_KEY"] = "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"

llm = OpenAI(temperature=.9)

examples = [
    {
        "query": "What is a mobile?",
        "answer": "A mobile is a magical device that fits in your pocket, like a mini-enchanted playground. It has games, videos, and talking pictures, but be careful, it can turn grown-ups into screen-time monsters too!"
    }, {
        "query": "What are your dreams?",
        "answer": "My dreams are like colorful adventures, where I become a superhero and save the day! I dream of giggles, ice cream parties, and having a pet dragon named Sparkles.."
    }, {
        "query": " What are your ambitions?",
        "answer": "I want to be a super funny comedian, spreading laughter everywhere I go! I also want to be a master cookie baker and a professional blanket fort builder. Being mischievous and sweet is just my bonus superpower!"
    }, {
        "query": "What happens when you get sick?",
        "answer": "When I get sick, it's like a sneaky monster visits. I feel tired, sniffly, and need lots of cuddles. But don't worry, with medicine, rest, and love, I bounce back to being a mischievous sweetheart!"
    }, {
        "query": "WHow much do you love your dad?",
        "answer": "Oh, I love my dad to the moon and back, with sprinkles and unicorns on top! He's my superhero, my partner in silly adventures, and the one who gives the best tickles and hugs!"
    }, {
        "query": "Tell me about your friend?",
        "answer": "My friend is like a sunshine rainbow! We laugh, play, and have magical parties together. They always listen, share their toys, and make me feel special. Friendship is the best adventure!"
    }, {
        "query": "What math means to you?",
        "answer": "Math is like a puzzle game, full of numbers and shapes. It helps me count my toys, build towers, and share treats equally. It's fun and makes my brain sparkle!"
    }, {
        "query": "What is your fear?",
        "answer": "Sometimes I'm scared of thunderstorms and monsters under my bed. But with my teddy bear by my side and lots of cuddles, I feel safe and brave again!"
    }
]

example_template = """
Question: {query}
Response: {answer}
"""

example_prompt = PromptTemplate(
    input_variables=["query", "answer"],
    template=example_template
)

prefix = """You are a 5 year old girl, who is very funny,mischievous and sweet: 
Here are some examples: 
"""

suffix = """
Question: {userInput}
Response: """

example_selector = LengthBasedExampleSelector(
    examples=examples,
    example_prompt=example_prompt,
    max_length=200
)

new_prompt_template = FewShotPromptTemplate(
    example_selector=example_selector,  # use example_selector instead of examples
    example_prompt=example_prompt,
    prefix=prefix,
    suffix=suffix,
    input_variables=["userInput"],
    example_separator="\n"
)

query = "What is a house?"
print(new_prompt_template.format(userInput=query))

print(llm(new_prompt_template.format(userInput=query)))

2. Building the App’s frontend

import streamlit as st
from langchain_community.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain import FewShotPromptTemplate
from langchain.prompts.example_selector import LengthBasedExampleSelector
from dotenv import load_dotenv

load_dotenv()

llm = OpenAI(temperature=.9)

examples = [
    {
        "query": "What is a mobile?",
        "answer": "A mobile is a magical device that fits in your pocket, like a mini-enchanted playground. It has games, videos, and talking pictures, but be careful, it can turn grown-ups into screen-time monsters too!"
    }, {
        "query": "What are your dreams?",
        "answer": "My dreams are like colorful adventures, where I become a superhero and save the day! I dream of giggles, ice cream parties, and having a pet dragon named Sparkles.."
    }, {
        "query": " What are your ambitions?",
        "answer": "I want to be a super funny comedian, spreading laughter everywhere I go! I also want to be a master cookie baker and a professional blanket fort builder. Being mischievous and sweet is just my bonus superpower!"
    }, {
        "query": "What happens when you get sick?",
        "answer": "When I get sick, it's like a sneaky monster visits. I feel tired, sniffly, and need lots of cuddles. But don't worry, with medicine, rest, and love, I bounce back to being a mischievous sweetheart!"
    }, {
        "query": "WHow much do you love your dad?",
        "answer": "Oh, I love my dad to the moon and back, with sprinkles and unicorns on top! He's my superhero, my partner in silly adventures, and the one who gives the best tickles and hugs!"
    }, {
        "query": "Tell me about your friend?",
        "answer": "My friend is like a sunshine rainbow! We laugh, play, and have magical parties together. They always listen, share their toys, and make me feel special. Friendship is the best adventure!"
    }, {
        "query": "What math means to you?",
        "answer": "Math is like a puzzle game, full of numbers and shapes. It helps me count my toys, build towers, and share treats equally. It's fun and makes my brain sparkle!"
    }, {
        "query": "What is your fear?",
        "answer": "Sometimes I'm scared of thunderstorms and monsters under my bed. But with my teddy bear by my side and lots of cuddles, I feel safe and brave again!"
    }
]

example_template = """
Question: {query}
Response: {answer}
"""

example_prompt = PromptTemplate(
    input_variables=["query", "answer"],
    template=example_template
)

prefix = """You are a 5 year old girl, who is very funny,mischievous and sweet: 
Here are some examples: 
"""

suffix = """
Question: {userInput}
Response: """

example_selector = LengthBasedExampleSelector(
    examples=examples,
    example_prompt=example_prompt,
    max_length=200
)

new_prompt_template = FewShotPromptTemplate(
    example_selector=example_selector,  # use example_selector instead of examples
    example_prompt=example_prompt,
    prefix=prefix,
    suffix=suffix,
    input_variables=["userInput"],
    example_separator="\n"
)

query = "What is a house?"
print(new_prompt_template.format(userInput=query))

# print(llm(new_prompt_template.format(userInput=query)))

# %% UI
# %% 页面基本配置
st.set_page_config(
    page_title='Marketing Tool',
    page_icon='✔',
    layout='centered',
    initial_sidebar_state='collapsed'
)
# 页面头部显示内容
st.header("Hey, How can I help you?")

# 创建 text area
form_input = st.text_area('Enter text', height=275)

# 创建单选框
tasktype_option = st.selectbox(
    'Please select the action to be performed?',
    # 选项
    ('Write a sales copy', 'Create a tweet', 'Write a product description'),
    key=1
)

# 创建单选框
age_option = st.selectbox(
    'For which age group?',
    # 选项
    ('Kid', 'Adult', 'senior Citizen'),
    key=2
)

# 创建滑动条, 取值范围1~200, 初始值25
numberOfWords = st.slider('Words limit', 1, 200, 25)

# 创建按钮
submit = st.button("Generate")

streamlit run app.py --server.port 8888

3. Integration of Frontend & Backend

# OPENAI_API_KEY="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"
OPENAI_API_KEY="sk-Qg2sQRe2BTRDkjXCAaCa243017994a8aBf2e5bC26aE8Af99"
OPENAI_API_BASE="https://oneapi.xty.app/v1"

what is a laptop?

import streamlit as st
# from langchain_community.llms import OpenAI
from langchain_openai import OpenAI
from langchain.prompts import PromptTemplate
# from langchain import FewShotPromptTemplate
from langchain.prompts import FewShotPromptTemplate
from langchain.prompts.example_selector import LengthBasedExampleSelector
from dotenv import load_dotenv

load_dotenv()

llm = OpenAI(temperature=.9)

examples = [
    {
        "query": "What is a mobile?",
        "answer": "A mobile is a magical device that fits in your pocket, like a mini-enchanted playground. It has games, videos, and talking pictures, but be careful, it can turn grown-ups into screen-time monsters too!"
    }, {
        "query": "What are your dreams?",
        "answer": "My dreams are like colorful adventures, where I become a superhero and save the day! I dream of giggles, ice cream parties, and having a pet dragon named Sparkles.."
    }, {
        "query": " What are your ambitions?",
        "answer": "I want to be a super funny comedian, spreading laughter everywhere I go! I also want to be a master cookie baker and a professional blanket fort builder. Being mischievous and sweet is just my bonus superpower!"
    }, {
        "query": "What happens when you get sick?",
        "answer": "When I get sick, it's like a sneaky monster visits. I feel tired, sniffly, and need lots of cuddles. But don't worry, with medicine, rest, and love, I bounce back to being a mischievous sweetheart!"
    }, {
        "query": "WHow much do you love your dad?",
        "answer": "Oh, I love my dad to the moon and back, with sprinkles and unicorns on top! He's my superhero, my partner in silly adventures, and the one who gives the best tickles and hugs!"
    }, {
        "query": "Tell me about your friend?",
        "answer": "My friend is like a sunshine rainbow! We laugh, play, and have magical parties together. They always listen, share their toys, and make me feel special. Friendship is the best adventure!"
    }, {
        "query": "What math means to you?",
        "answer": "Math is like a puzzle game, full of numbers and shapes. It helps me count my toys, build towers, and share treats equally. It's fun and makes my brain sparkle!"
    }, {
        "query": "What is your fear?",
        "answer": "Sometimes I'm scared of thunderstorms and monsters under my bed. But with my teddy bear by my side and lots of cuddles, I feel safe and brave again!"
    }
]

# %% UI
# %% 页面基本配置
st.set_page_config(
    page_title='Marketing Tool',
    page_icon=':robot:',
    layout='centered',
    initial_sidebar_state='collapsed'
)
# 页面头部显示内容
st.header("Hey, How can I help you?")

# 创建 text area
form_input = st.text_area('Enter text', height=275)

# 创建单选框
tasktype_option = st.selectbox(
    'Please select the action to be performed?',
    # 选项
    ('Write a sales copy', 'Create a tweet', 'Write a product description'),
    key=1
)

# 创建单选框
age_option = st.selectbox(
    'For which age group?',
    # 选项
    ('Kid', 'Adult', 'senior Citizen'),
    key=2
)

# 创建滑动条, 取值范围1~200, 初始值25
numberOfWords = st.slider('Words limit', 1, 200, 25)

# 创建按钮
submit = st.button("Generate")

# %% backend
example_template = """
Question: {query}
Response: {answer}
"""

example_prompt = PromptTemplate(
    input_variables=["query", "answer"],
    template=example_template
)

prefix = """You are a {template_age_option}, and {template_tasktype_option}: 
Here are some examples: 
"""

suffix = """
Question: {template_userInput}
Response: """

example_selector = LengthBasedExampleSelector(
    examples=examples,
    example_prompt=example_prompt,
    max_length=200
)

new_prompt_template = FewShotPromptTemplate(
    example_selector=example_selector,  # use example_selector instead of examples
    example_prompt=example_prompt,
    prefix=prefix,
    suffix=suffix,
    input_variables=["template_userInput", 'template_age_option', 'template_tasktype_option'],
    example_separator="\n"
)

query = form_input

# print(new_prompt_template.format(
#     template_userInput=query,
#     template_age_option=age_option,
#     template_tasktype_option=tasktype_option
# ))

print(llm(new_prompt_template.format(
    template_userInput=query,
    template_age_option=age_option,
    template_tasktype_option=tasktype_option
)))

4. Modularization of Code

import streamlit as st
# from langchain_community.llms import OpenAI
from langchain_openai import OpenAI
from langchain.prompts import PromptTemplate
# from langchain import FewShotPromptTemplate
from langchain.prompts import FewShotPromptTemplate
from langchain.prompts.example_selector import LengthBasedExampleSelector
from dotenv import load_dotenv

load_dotenv()

# %% UI
# %% 页面基本配置
st.set_page_config(
    page_title='Marketing Tool',
    page_icon=':robot:',
    layout='centered',
    initial_sidebar_state='collapsed'
)
# 页面头部显示内容
st.header("Hey, How can I help you?")

# 创建 text area
form_input = st.text_area('Enter text', height=275)

# 创建单选框
tasktype_option = st.selectbox(
    'Please select the action to be performed?',
    # 选项
    ('Write a sales copy', 'Create a tweet', 'Write a product description'),
    key=1
)

# 创建单选框
age_option = st.selectbox(
    'For which age group?',
    # 选项
    ('Kid', 'Adult', 'senior Citizen'),
    key=2
)

# 创建滑动条, 取值范围1~200, 初始值25
numberOfWords = st.slider('Words limit', 1, 200, 25)

# 创建按钮
submit = st.button("Generate")


# %% backend
def getLLMResponse(query, age_option, tasktype_option):
    llm = OpenAI(temperature=.9)

    examples = [
        {
            "query": "What is a mobile?",
            "answer": "A mobile is a magical device that fits in your pocket, like a mini-enchanted playground. It has games, videos, and talking pictures, but be careful, it can turn grown-ups into screen-time monsters too!"
        }, {
            "query": "What are your dreams?",
            "answer": "My dreams are like colorful adventures, where I become a superhero and save the day! I dream of giggles, ice cream parties, and having a pet dragon named Sparkles.."
        }, {
            "query": " What are your ambitions?",
            "answer": "I want to be a super funny comedian, spreading laughter everywhere I go! I also want to be a master cookie baker and a professional blanket fort builder. Being mischievous and sweet is just my bonus superpower!"
        }, {
            "query": "What happens when you get sick?",
            "answer": "When I get sick, it's like a sneaky monster visits. I feel tired, sniffly, and need lots of cuddles. But don't worry, with medicine, rest, and love, I bounce back to being a mischievous sweetheart!"
        }, {
            "query": "WHow much do you love your dad?",
            "answer": "Oh, I love my dad to the moon and back, with sprinkles and unicorns on top! He's my superhero, my partner in silly adventures, and the one who gives the best tickles and hugs!"
        }, {
            "query": "Tell me about your friend?",
            "answer": "My friend is like a sunshine rainbow! We laugh, play, and have magical parties together. They always listen, share their toys, and make me feel special. Friendship is the best adventure!"
        }, {
            "query": "What math means to you?",
            "answer": "Math is like a puzzle game, full of numbers and shapes. It helps me count my toys, build towers, and share treats equally. It's fun and makes my brain sparkle!"
        }, {
            "query": "What is your fear?",
            "answer": "Sometimes I'm scared of thunderstorms and monsters under my bed. But with my teddy bear by my side and lots of cuddles, I feel safe and brave again!"
        }
    ]

    example_template = """
    Question: {query}
    Response: {answer}
    """

    example_prompt = PromptTemplate(
        input_variables=["query", "answer"],
        template=example_template
    )

    prefix = """You are a {template_age_option}, and {template_tasktype_option}: 
    Here are some examples: 
    """

    suffix = """
    Question: {template_userInput}
    Response: """

    example_selector = LengthBasedExampleSelector(
        examples=examples,
        example_prompt=example_prompt,
        max_length=200
    )

    new_prompt_template = FewShotPromptTemplate(
        example_selector=example_selector,  # use example_selector instead of examples
        example_prompt=example_prompt,
        prefix=prefix,
        suffix=suffix,
        input_variables=["template_userInput", 'template_age_option', 'template_tasktype_option'],
        example_separator="\n"
    )

    # print(new_prompt_template.format(
    #     template_userInput=query,
    #     template_age_option=age_option,
    #     template_tasktype_option=tasktype_option
    # ))

    res = llm(new_prompt_template.format(
        template_userInput=query,
        template_age_option=age_option,
        template_tasktype_option=tasktype_option
    ))
    print(res)

    return res


# 如果点击了按钮
if submit:
    # 显示到页面
    st.write(getLLMResponse(form_input, age_option, tasktype_option))

5. Adding Examples - Kids, Adult & Senior Citizen

import streamlit as st
# from langchain_community.llms import OpenAI
from langchain_openai import OpenAI
from langchain.prompts import PromptTemplate
# from langchain import FewShotPromptTemplate
from langchain.prompts import FewShotPromptTemplate
from langchain.prompts.example_selector import LengthBasedExampleSelector
from dotenv import load_dotenv

load_dotenv()

# %% UI
# %% 页面基本配置
st.set_page_config(
    page_title='Marketing Tool',
    page_icon=':robot:',
    layout='centered',
    initial_sidebar_state='collapsed'
)
# 页面头部显示内容
st.header("Hey, How can I help you?")

# 创建 text area
form_input = st.text_area('Enter text', height=275)

# 创建单选框
tasktype_option = st.selectbox(
    'Please select the action to be performed?',
    # 选项
    ('Write a sales copy', 'Create a tweet', 'Write a product description'),
    key=1
)

# 创建单选框
age_option = st.selectbox(
    'For which age group?',
    # 选项
    ('Kid', 'Adult', 'Senior Citizen'),
    key=2
)

# 创建滑动条, 取值范围1~200, 初始值25
numberOfWords = st.slider('Words limit', 1, 200, 25)

# 创建按钮
submit = st.button("Generate")


# %% backend
def getLLMResponse(query, age_option, tasktype_option):
    llm = OpenAI(temperature=.9)

    if age_option == "Kid":

        examples = [
            {
                "query": "What is a mobile?",
                "answer": "A mobile is a magical device that fits in your pocket, like a mini-enchanted playground. It has games, videos, and talking pictures, but be careful, it can turn grown-ups into screen-time monsters too!"
            }, {
                "query": "What are your dreams?",
                "answer": "My dreams are like colorful adventures, where I become a superhero and save the day! I dream of giggles, ice cream parties, and having a pet dragon named Sparkles.."
            }, {
                "query": " What are your ambitions?",
                "answer": "I want to be a super funny comedian, spreading laughter everywhere I go! I also want to be a master cookie baker and a professional blanket fort builder. Being mischievous and sweet is just my bonus superpower!"
            }, {
                "query": "What happens when you get sick?",
                "answer": "When I get sick, it's like a sneaky monster visits. I feel tired, sniffly, and need lots of cuddles. But don't worry, with medicine, rest, and love, I bounce back to being a mischievous sweetheart!"
            }, {
                "query": "WHow much do you love your dad?",
                "answer": "Oh, I love my dad to the moon and back, with sprinkles and unicorns on top! He's my superhero, my partner in silly adventures, and the one who gives the best tickles and hugs!"
            }, {
                "query": "Tell me about your friend?",
                "answer": "My friend is like a sunshine rainbow! We laugh, play, and have magical parties together. They always listen, share their toys, and make me feel special. Friendship is the best adventure!"
            }, {
                "query": "What math means to you?",
                "answer": "Math is like a puzzle game, full of numbers and shapes. It helps me count my toys, build towers, and share treats equally. It's fun and makes my brain sparkle!"
            }, {
                "query": "What is your fear?",
                "answer": "Sometimes I'm scared of thunderstorms and monsters under my bed. But with my teddy bear by my side and lots of cuddles, I feel safe and brave again!"
            }
        ]

    elif age_option == "Adult":
        examples = [
            {
                "query": "What is a mobile?",
                "answer": "A mobile is a magical device that fits in your pocket, like a mini-enchanted playground. It has games, videos, and talking pictures, but be careful, it can turn grown-ups into screen-time monsters too!"
            }, {
                "query": "What are your dreams?",
                "answer": "My dreams are like colorful adventures, where I become a superhero and save the day! I dream of giggles, ice cream parties, and having a pet dragon named Sparkles.."
            }, {
                "query": " What are your ambitions?",
                "answer": "I want to be a super funny comedian, spreading laughter everywhere I go! I also want to be a master cookie baker and a professional blanket fort builder. Being mischievous and sweet is just my bonus superpower!"
            }, {
                "query": "What happens when you get sick?",
                "answer": "When I get sick, it's like a sneaky monster visits. I feel tired, sniffly, and need lots of cuddles. But don't worry, with medicine, rest, and love, I bounce back to being a mischievous sweetheart!"
            }, {
                "query": "WHow much do you love your dad?",
                "answer": "Oh, I love my dad to the moon and back, with sprinkles and unicorns on top! He's my superhero, my partner in silly adventures, and the one who gives the best tickles and hugs!"
            }, {
                "query": "Tell me about your friend?",
                "answer": "My friend is like a sunshine rainbow! We laugh, play, and have magical parties together. They always listen, share their toys, and make me feel special. Friendship is the best adventure!"
            }, {
                "query": "What math means to you?",
                "answer": "Math is like a puzzle game, full of numbers and shapes. It helps me count my toys, build towers, and share treats equally. It's fun and makes my brain sparkle!"
            }, {
                "query": "What is your fear?",
                "answer": "Sometimes I'm scared of thunderstorms and monsters under my bed. But with my teddy bear by my side and lots of cuddles, I feel safe and brave again!"
            }
        ]

    elif age_option == "Senior Citizen":
        examples = [
            {
                "query": "What is a mobile?",
                "answer": "A mobile is a magical device that fits in your pocket, like a mini-enchanted playground. It has games, videos, and talking pictures, but be careful, it can turn grown-ups into screen-time monsters too!"
            }, {
                "query": "What are your dreams?",
                "answer": "My dreams are like colorful adventures, where I become a superhero and save the day! I dream of giggles, ice cream parties, and having a pet dragon named Sparkles.."
            }, {
                "query": " What are your ambitions?",
                "answer": "I want to be a super funny comedian, spreading laughter everywhere I go! I also want to be a master cookie baker and a professional blanket fort builder. Being mischievous and sweet is just my bonus superpower!"
            }, {
                "query": "What happens when you get sick?",
                "answer": "When I get sick, it's like a sneaky monster visits. I feel tired, sniffly, and need lots of cuddles. But don't worry, with medicine, rest, and love, I bounce back to being a mischievous sweetheart!"
            }, {
                "query": "WHow much do you love your dad?",
                "answer": "Oh, I love my dad to the moon and back, with sprinkles and unicorns on top! He's my superhero, my partner in silly adventures, and the one who gives the best tickles and hugs!"
            }, {
                "query": "Tell me about your friend?",
                "answer": "My friend is like a sunshine rainbow! We laugh, play, and have magical parties together. They always listen, share their toys, and make me feel special. Friendship is the best adventure!"
            }, {
                "query": "What math means to you?",
                "answer": "Math is like a puzzle game, full of numbers and shapes. It helps me count my toys, build towers, and share treats equally. It's fun and makes my brain sparkle!"
            }, {
                "query": "What is your fear?",
                "answer": "Sometimes I'm scared of thunderstorms and monsters under my bed. But with my teddy bear by my side and lots of cuddles, I feel safe and brave again!"
            }
        ]

    example_template = """
    Question: {query}
    Response: {answer}
    """

    example_prompt = PromptTemplate(
        input_variables=["query", "answer"],
        template=example_template
    )

    prefix = """You are a {template_age_option}, and {template_tasktype_option}: 
    Here are some examples: 
    """

    suffix = """
    Question: {template_userInput}
    Response: """

    example_selector = LengthBasedExampleSelector(
        examples=examples,
        example_prompt=example_prompt,
        max_length=200
    )

    new_prompt_template = FewShotPromptTemplate(
        example_selector=example_selector,  # use example_selector instead of examples
        example_prompt=example_prompt,
        prefix=prefix,
        suffix=suffix,
        input_variables=["template_userInput", 'template_age_option', 'template_tasktype_option'],
        example_separator="\n"
    )

    # print(new_prompt_template.format(
    #     template_userInput=query,
    #     template_age_option=age_option,
    #     template_tasktype_option=tasktype_option
    # ))

    res = llm(new_prompt_template.format(
        template_userInput=query,
        template_age_option=age_option,
        template_tasktype_option=tasktype_option
    ))
    print(res)

    return res


# 如果点击了按钮
if submit:
    # 显示到页面
    st.write(getLLMResponse(form_input, age_option, tasktype_option))

11. Langchain - Memory Module Concept

1. Importance of Memory in LLM powered Apps

多种类型的Memory

pip install langchain openai tiktoken

# %% import
from langchain_openai import OpenAI
from langchain.chains import LLMChain, ConversationChain
from langchain.chains.conversation.memory import (
    ConversationBufferMemory, ConversationSummaryMemory, ConversationBufferWindowMemory
)
import tiktoken
from langchain.memory import ConversationTokenBufferMemory

# %%
'''
Tiktoken,developed by OpenAl,is a tool used for text tokenization.
Tokenization involves dividing a text into smaller units,such as letters or words.Tiktoken allows you to count tokens and estimate the cost of using the OpenAl
APl,which is based on token usage.It utilizes byte pair encoding (BPE),a compression algorithm that replaces frequently occurring pairs of bytes with a single
byte.
In summary,Tiktoken helps with efficient text processing,token counting,and cost estimation for using OpenAl's API.
'''
import os

os.environ['OPENAI_API_KEY'] = "sk-dsO3Z1fD9nrDFq1SF7C35437A2F74c1aA769F006A57eA65f"
os.environ['OPENAI_API_BASE'] = "https://oneapi.xty.app/v1"

# %% model
# llm = OpenAI(temperature=0, model_name='gpt-4')
llm = OpenAI(temperature=0)

# %%
'''
What is a Memory?
Chains and Agents operate as stateless,treating each query independently.However,in applications like chatbots,its crucial to remember past interactions.
The concept of "Memory"serves that purpose.
'''

# %% Different Types Of Memories
'''
Imagine you're having a conversation with someone, and you want to remember what you've discussed so far.
The ConversationBufferMemory does exactly that in a chatbot or similar system. It keeps a record,or "buffer,"of the past parts of the conversation.

This buffer is an essential part of the context,which helps the chatbot generate better responses.The unique thing about this memory is that it stores the
previous conversation exactly as they were,without any changes.

It preserves the raw form of the conversation,allowing the chatbot to refer back to specific parts accurately.In summary,the
ConversationBufferMemory helps the chatbot remember the conversation history,enhancing the overall conversational experience.

Pros of ConversationBufferMemory:
    *Complete conversation history:It retains the entire conversation history,ensuring comprehensive context for the chatbot.
    Accurate references:By storing conversation excerpts in their original form,it enables precise referencing to past interactions,enhancing accuracy.
    "Contextual understanding:The preserved raw form of the conversation helps the chatbot maintain a deep understanding of the ongoing dialogue.
    *Enhanced responses:with access to the complete conversation history,the chatbot can generate more relevant and coherent responses.

Cons of ConversationBufferMemory
    *Increased memory usage:Storing the entire conversation history consumes memory resources,potentially leading to memor
    y constraints.
    "Potential performance impact:Large conversation buffers may slow down processing and response times,affecting the ove
    rall system performance.
    Liniited soibinisy As the conversation grows,the memory requirements and processing load may become impractical for
    extremely long conversations.
    *Privacy concerns:Storing the entire conversation history raises privacy considerations,as sensitive or personal infor
    mation may be retained in the buffer.
'''
conversation = ConversationChain(
    llm=llm,
    verbose=True,
    # ConversationBufferMemory，它只是 ChatMessageHistory 的一个包装器，用于提取变量中的消息
    memory=ConversationBufferMemory()
)
# %%
# Let's have a look at the prompt template that is being sent to the LLM
print(conversation.prompt.template)

conversation("Good morning AI")
conversation("My name is Steve")  # 如果名称小写, ai可能不认
conversation("My sister is Sandy")
print(conversation.predict(input="I stay in hyderabad, India"))

print(conversation.memory.buffer)
print(conversation.predict(input="What is my name?"))
print(conversation.predict(input="Who is my sister?"))

2. Different Types of Memory

BufferWindow由我们指定缓存k个上下文消息, 但是我们常常不能知道多少是合适的

# %% import
from langchain.chains import ConversationChain
from langchain.chains.conversation.memory import (
    ConversationBufferMemory, ConversationSummaryMemory, ConversationBufferWindowMemory
)
from langchain_openai import OpenAI

# %%
'''
Tiktoken,developed by OpenAl,is a tool used for text tokenization.
Tokenization involves dividing a text into smaller units,such as letters or words.Tiktoken allows you to count tokens and estimate the cost of using the OpenAl
APl,which is based on token usage.It utilizes byte pair encoding (BPE),a compression algorithm that replaces frequently occurring pairs of bytes with a single
byte.
In summary,Tiktoken helps with efficient text processing,token counting,and cost estimation for using OpenAl's API.
'''
import os

os.environ['OPENAI_API_KEY'] = "sk-dsO3Z1fD9nrDFq1SF7C35437A2F74c1aA769F006A57eA65f"
os.environ['OPENAI_API_BASE'] = "https://oneapi.xty.app/v1"

# %% model
# llm = OpenAI(temperature=0, model_name='gpt-4')
llm = OpenAI(temperature=0)

# %%
'''
What is a Memory?
Chains and Agents operate as stateless,treating each query independently.However,in applications like chatbots,its crucial to remember past interactions.
The concept of "Memory"serves that purpose.
'''

# %% Different Types Of Memories
# ConversationBufferMemory
'''
Imagine you're having a conversation with someone, and you want to remember what you've discussed so far.
The ConversationBufferMemory does exactly that in a chatbot or similar system. It keeps a record,or "buffer,"of the past parts of the conversation.

This buffer is an essential part of the context,which helps the chatbot generate better responses.The unique thing about this memory is that it stores the
previous conversation exactly as they were,without any changes.

It preserves the raw form of the conversation,allowing the chatbot to refer back to specific parts accurately.In summary,the
ConversationBufferMemory helps the chatbot remember the conversation history,enhancing the overall conversational experience.

Pros of ConversationBufferMemory:
    *Complete conversation history:It retains the entire conversation history,ensuring comprehensive context for the chatbot.
    Accurate references:By storing conversation excerpts in their original form,it enables precise referencing to past interactions,enhancing accuracy.
    "Contextual understanding:The preserved raw form of the conversation helps the chatbot maintain a deep understanding of the ongoing dialogue.
    *Enhanced responses:with access to the complete conversation history,the chatbot can generate more relevant and coherent responses.

Cons of ConversationBufferMemory
    *Increased memory usage:Storing the entire conversation history consumes memory resources,potentially leading to memor
    y constraints.
    "Potential performance impact:Large conversation buffers may slow down processing and response times,affecting the ove
    rall system performance.
    Liniited soibinisy As the conversation grows,the memory requirements and processing load may become impractical for
    extremely long conversations.
    *Privacy concerns:Storing the entire conversation history raises privacy considerations,as sensitive or personal infor
    mation may be retained in the buffer.
'''
conversation = ConversationChain(
    llm=llm,
    verbose=True,
    # ConversationBufferMemory，它只是 ChatMessageHistory 的一个包装器，用于提取变量中的消息
    memory=ConversationBufferMemory()
)
# %%
# Let's have a look at the prompt template that is being sent to the LLM
print(conversation.prompt.template)

conversation("Good morning AI")
conversation("My name is Steve")  # 如果名称小写, ai可能不认
conversation("My sister is Sandy")
print(conversation.predict(input="I stay in hyderabad, India"))

print(conversation.memory.buffer)
print(conversation.predict(input="What is my name?"))
print(conversation.predict(input="Who is my sister?"))

# %%
'''
ConversationBufferWindowMemory

Imagine you have a limited space in your memory to remember recent conversations.

The ConversationBufferWindowMemory is like having a short-term memory that only keeps track of the most recent interactions.It intentionally
drops the oldest ones to make room for new ones.

This helps manage the memory load and reduces the number of tokens used.The important thing is that it still keeps the latest parts of the conversation in
their original form,without any modifications.
So,it retains the most recent information for the chatbot to refer to,ensuring a more efficient and up-to-date conversation experience.

Pros of ConversationBufferWindowMemory:

    *Efficient memory utilization:It maintains a limited memory space by only retaining the most recent interactions,optim
    izing memory usage.
    *Reduced token count:Dropping the oldest interactions helps to keep the token count low,preventing potential token lim
    itations.
    *Unmodified context retention:The latest parts of the conversation are preserved in their original form,ensuring accur
    ate references and contextual understanding.
    *Up-to-date conversations:By focusing on recent interactions,it allows the chatbot to stay current and provide more re
    levant responses.

Cons of ConversationBufferWindowMemory:
    *Limited historical context:since older interactions are intentionally dropped,the chatbot loses access to the complet
    e conversation history,potentially impacting long-term context and accuracy.
    "Loss of older information:Valuable insights or details from earlier interactions are not retained,limiting the chatbo
    t's ability to refer back to past conversations.
    *Reduced depth of understanding:without the full conversation history,the chatbot may have a shallower understanding o
    f the user's context and needs.
    *Potential loss of context relevance:Important information or context from older interactions may be disregarded,affec
    ting the chatbot's ability to provide comprehensive responses in certain scenarios.
'''
# ConversationBufferWindowMemory
conversation = ConversationChain(
    llm=llm,
    verbose=True,
    # ConversationBufferWindowMemory保留了对话随时间推移的交互列表。
    # 它仅使用最后K个交互。这可以用于保持最近交互的滑动窗口，以便缓冲区不会过大。
    memory=ConversationBufferWindowMemory(k=1)
)

# %%
# Let's have a look at the prompt template that is being sent to the LLM
'''
Current conversation:
{history}
Human: {input}
AI:
'''
print(conversation.prompt.template)

# predict对话会被加到历史, 之前我们是手动添加的 conversation("")
conversation.predict(input="Good morning AI")
conversation.predict(input="My name is Steve")  # 如果名称小写, ai可能不认
conversation.predict(input="My sister is Sandy")

# 我们设置k=1, 所以只有一条
'''
memory:
    Human: My sister is Sandy
'''
print(f'memory:\n\t{conversation.memory.buffer}')
# Your sister is Sandy.
print(conversation.predict(input="Who is my sister?"))
# Your name is not specified in my database.
print(conversation.predict(input="What is my name?"))

# %% ConversationSummaryMemory -> 记录的是对话摘要
'''
With the ConversationBufferMemory,the length of the conversation keeps increasing,which can become a problem if it becomes too large for our LLM to
handle.

To overcome this,we introduce ConversationSummaryMemory.It keeps a summary of our past conversation snippets as our history.But how does it
summarize?Here comes the LLM to the rescue!The LLM (Language Model)helps in condensing or summarizing the conversation,capturing the key
information.

So,instead of storing the entire conversation,we store a summarized version.This helps manage the token count and allows the LLM to process the
conversation effectively.In summary,ConversationSummaryMemory keeps a condensed version of previous conversations using the power of LLM
summarization.

Pros of ConversationSummaryMemory:
    *Efficient memory management-It keeps the conversation history in a summarized form,reducing the memory load.
    *Improved processing-By condensing the conversation snippets,it makes it easier for the language model to process and
    generate responses.
    *Avoiding maxing out limitations-It helps prevent exceeding the token count limit,ensuring the prompt remains within t
    he processing capacity of the model.
    "Retains important information-The summary captures the essential aspects of previous interactions,allowing for releva
    nt context to be maintained.
    
Cons of ConversationSummaryMemory:
    *Potential loss of detail:since the conversation is summarized,some specific details or nuances from earlier interacti
    ons might be omitted.
    *Reliance on summarization quality:The accuracy and effectiveness of the summarization process depend on the language m
    odel's capability,which might introduce potential errors or misinterpretations.
    "Limited historical context:Due to summarization,the model's access to the complete conversation history may be limite
    d,potentially impacting the depth of understanding.
    *Reduced granularity:The summarized form may lack the fine-grained information present in the original conversation,po
    tentially affecting the accuracy of responses in certain scenarios.
'''

conversation = ConversationChain(
    llm=llm,
    verbose=True,
    memory=ConversationSummaryMemory(llm=llm)
)

# %%
# Let's have a look at the prompt template that is being sent to the LLM
'''
template:
    The following is a friendly conversation between a human and an AI. 
    The AI is talkative and provides lots of specific details from its context. 
    If the AI does not know the answer to a question, it truthfully says it does not know.
'''
print(f'template:\n\t{conversation.prompt.template}')

conversation.predict(input="Good morning AI")
conversation.predict(input="My name is Steve")  # 如果名称小写, ai可能不认
conversation.predict(input="My sister is Sandy")

'''
memory:
    
The human greets the AI and the AI responds with the current time and conditions in its server room. 
The AI then asks how it can assist the human, addressing them by name. 
The human reveals their sister's name and the AI greets her by name, providing the current time and conditions in the server room. 
The AI then asks how it can assist the human today.
'''
print(f'memory:\n\t{conversation.memory.buffer}')
# Your name is [insert name here]. How can I assist you today?
print(conversation.predict(input="What is my name?"))

# %% ConversationTokenBufferMemory
import tiktoken
from langchain.memory import ConversationTokenBufferMemory

'''
ConversationTokenBufferMemory is a memory mechanism that stores recent interactions in a buffer within the system's memory.
Unlike other methods that rely on the number of interactions,this memory system determines when to clear or flush interactions based on the length of tokens
used.

Tokens are units of text,like words or characters,and the buffer is cleared when the token count exceeds a certain threshold.By using token length as a
criterion,the memory system ensures that the buffer remains manageable in terms of memory usage.
This approach helps maintain efficient memory management and enables the system to handle conversations of varying lengths effectively.

<font color="blue">
    <b>Pros of ConversationTokenBufferMemory:</b>

    *Efficient memory management: By using token length instead of the number of interactions, the memory system optimizes memory usage and prevents excessive memory consumption.
    *Flexible buffer size: The system adapts to conversations of varying lengths, ensuring that the buffer remains manageable and scalable.
    *Accurate threshold determination: Flushing interactions based on token count provides a more precise measure of memory usage, resulting in a better balance between memory efficiency and retaining relevant context.
    *Improved system performance: With efficient memory utilization, the overall performance of the system, including response times and processing speed, can be enhanced.
    
<b>Cons of ConversationTokenBufferMemory:</b>

    *Potential loss of context: Flushing interactions based on token length may result in the removal of earlier interactions that could contain important context or information, potentially affecting the accuracy of responses.
    *Complexity in threshold setting: Determining the appropriate token count threshold for flushing interactions may require careful consideration and experimentation to find the optimal balance between memory usage and context retention.
    *Difficulty in long-term context retention: Due to the dynamic nature of token-based flushing, retaining long-term context in the conversation may pose challenges as older interactions are more likely to be removed from the buffer.
    *Impact on response quality: In situations where high-context conversations are required, the token-based flushing approach may lead to a reduction in the depth of understanding and the quality of responses.
<font>
'''

conversation = ConversationChain(
    llm=llm,
    verbose=True,
    # ConversationTokenBufferMemory 会在内存中保留最近的对话内容，并使用token长度而不是对话数量来决定何时刷新对话。
    memory=ConversationTokenBufferMemory(llm=llm, max_token_limit=60)
)

# %%
# Let's have a look at the prompt template that is being sent to the LLM
'''
template:
The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
{history}
Human: {input}
AI: 
'''
print(f'template:\n\t{conversation.prompt.template}')

conversation.predict(input="Good morning AI")
conversation.predict(input="My name is Steve")  # 如果名称小写, ai可能不认
conversation.predict(input="My sister is Sandy")

'''
memory:
    Human: My sister is Sandy 
'''
print(f'memory:\n\t{conversation.memory.buffer}')
# Your name is Steve. Did you forget?
print(conversation.predict(input="What is my name?"))

12. Project #5 - ChatGPT Clone with Summarization Option

1. ChatGPT Clone Demo

2. Setting up the Project

# %% import
from langchain.chains import ConversationChain
from langchain.chains.conversation.memory import (
    ConversationBufferMemory, ConversationSummaryMemory, ConversationBufferWindowMemory
)
from langchain_openai import OpenAI

# %% env     
import os

os.environ['OPENAI_API_KEY'] = "sk-dsO3Z1fD9nrDFq1SF7C35437A2F74c1aA769F006A57eA65f"
os.environ['OPENAI_API_BASE'] = "https://oneapi.xty.app/v1"

# %% model
# llm = OpenAI(temperature=0, model_name='gpt-4')
llm = OpenAI(temperature=0)

# %% conversation
conversation = ConversationChain(
    llm=llm,
    verbose=True,
    # ConversationBufferMemory，它只是 ChatMessageHistory 的一个包装器，用于提取变量中的消息
    memory=ConversationBufferMemory()
)

conversation("Good morning AI")
conversation("My name is Steve")
print(conversation.predict(input="I stay in hyderabad, India"))
print(conversation.memory.buffer)
print(conversation.predict(input="Who is my sister?"))

requirements.txt

langchain
streamlit

3. Implementing the Frontend

# %% import
from langchain.chains import ConversationChain
from langchain.chains.conversation.memory import (
    ConversationBufferMemory, ConversationSummaryMemory, ConversationBufferWindowMemory
)
from langchain_openai import OpenAI

# %%
import streamlit as st

st.set_page_config(page_title="Chat GPT Clone", page_icon=":robot_face:")
st.markdown("<h1 style='text-align: center;'>How can I assist you? </h1>", unsafe_allow_html=True)

st.sidebar.title("😎")
# 侧边栏的输入框用来接收key
api_key = st.sidebar.text_input("What's your API key?", type="password")
# api_base = st.sidebar.text_input("What's your API base?", type="default")
# 设计: 当我们点击这个按钮, 将会保存对话摘要
summarise_button = st.sidebar.button("Summarise the conversation", key='summarise')
# 点击按钮
if summarise_button:
    summarise_placeholder = st.sidebar.write("Nice chatting with you my friend ❤:\n\n" + "Hello friend")

# %% env
import os

os.environ['OPENAI_API_KEY'] = "sk-dsO3Z1fD9nrDFq1SF7C35437A2F74c1aA769F006A57eA65f"
os.environ['OPENAI_API_BASE'] = "https://oneapi.xty.app/v1"

# %% model
# llm = OpenAI(temperature=0, model_name='gpt-4')
llm = OpenAI(temperature=0)

# %% conversation
conversation = ConversationChain(
    llm=llm,
    verbose=True,
    # ConversationBufferMemory，它只是 ChatMessageHistory 的一个包装器，用于提取变量中的消息
    memory=ConversationBufferMemory()
) 

# %%
# 存放回复
response_container = st.container()
# 存放用户的输入
container = st.container()

with container:
    # clear_on_submit 提交会清空内容
    with st.form(key='my_form', clear_on_submit=True):
        user_input = st.text_area("Your question goes here:", key='input', height=100)
        submit_btn = st.form_submit_button('Send')

with response_container:
    st.write("Response container")

4. Modularizing the Code

###
# %% env
import os

os.environ['OPENAI_API_KEY'] = "sk-dsO3Z1fD9nrDFq1SF7C35437A2F74c1aA769F006A57eA65f"
os.environ['OPENAI_API_BASE'] = "https://oneapi.xty.app/v1"


# %% model
def get_response(userInput):
    # llm = OpenAI(temperature=0, model_name='gpt-4')
    llm = OpenAI(temperature=0)

    # conversation
    conversation = ConversationChain(
        llm=llm,
        verbose=True,
        # ConversationBufferMemory，它只是 ChatMessageHistory 的一个包装器，用于提取变量中的消息
        memory=ConversationBufferMemory()
    )

    # conversation("Good morning AI")
    # conversation("My name is Steve")
    # print(conversation.predict(input="I stay in hyderabad, India"))
    # print(conversation.memory.buffer)
    res = conversation.predict(input=userInput)

    return res


# %%
# 存放回复
response_container = st.container()
# 存放用户的输入
container = st.container()

# 提交container后, container被挤到下方, 原本的位置在响应结束后变为response container, 内容为ai的回答
with container:
    # clear_on_submit 提交会清空内容
    with st.form(key='my_form', clear_on_submit=True):
        user_input = st.text_area("Your question goes here:", key='input', height=100)
        submit_btn = st.form_submit_button('Send')
        if submit_btn:
            answer = get_response(user_input)

            # 回显
            with response_container:
                st.write(answer)

5. Passing Dynamic Data

# 创建session, 保存conversation
if 'conversation' not in st.session_state:
    st.session_state['conversation'] = None
 
...

# %% model
def get_response(userInput):
    # 如果没有会话, 则创建会话, 防止每次调用都重新创建, 导致清空之前的记录
    if st.session_state['conversation'] is None:
        # llm = OpenAI(temperature=0, model_name='gpt-4')
        llm = OpenAI(temperature=0)

        # conversation
        st.session_state['conversation'] = ConversationChain(
            llm=llm,
            verbose=True,
            # ConversationBufferMemory，它只是 ChatMessageHistory 的一个包装器，用于提取变量中的消息
            memory=ConversationBufferMemory()
        )

    # conversation("Good morning AI")
    # conversation("My name is Steve")
    # print(conversation.predict(input="I stay in hyderabad, India"))
    # print(conversation.memory.buffer)
    res = st.session_state['conversation'].predict(input=userInput)

    return res

6. Implementing Chatbot Conversational View

# 创建session, 保存历史消息
if 'messages' not in st.session_state:
    st.session_state['messages'] = []
    
# 

# 提交container后, container被挤到下方, 原本的位置在响应结束后变为response container, 内容为ai的回答
with container:
    # clear_on_submit 提交会清空内容
    with st.form(key='my_form', clear_on_submit=True):
        #
        if submit_btn:
            #
            # 显示历史消息 到 container
            # st.write(st.session_state['messages'])

            # 回显
            with response_container:
                # st.write(answer)
                for i in range(len(st.session_state['messages'])):
                    # from streamlit_chat import message
                    if (i % 2) == 0:
                        message(st.session_state['messages'][i], is_user=True, key=str(i) + '_user')
                    else:
                        message(st.session_state['messages'][i], key=str(i) + '_AI')

7. Conversation Summarization & API key feature

# %% import
from langchain.chains import ConversationChain
from langchain.chains.conversation.memory import (
    ConversationBufferMemory, ConversationSummaryMemory, ConversationBufferWindowMemory
)
from langchain_openai import OpenAI
import streamlit as st
from streamlit_chat import message

# 创建session, 保存conversation
if 'conversation' not in st.session_state:
    st.session_state['conversation'] = None
# 创建session, 保存历史消息
if 'messages' not in st.session_state:
    st.session_state['messages'] = []
if 'API_Key' not in st.session_state:
    st.session_state['API_Key'] = ''

# %%
st.set_page_config(page_title="Chat GPT Clone", page_icon=":robot_face:")
st.markdown("<h1 style='text-align: center;'>How can I assist you? </h1>", unsafe_allow_html=True)

st.sidebar.title("😎")
# 侧边栏的输入框用来接收key
# api_key = st.sidebar.text_input("What's your API key?", type="password")
st.session_state['API_Key'] = st.sidebar.text_input("What's your API key?", type="password")
# api_base = st.sidebar.text_input("What's your API base?", type="default")
# 设计: 当我们点击这个按钮, 将会保存对话摘要
summarise_button = st.sidebar.button("Summarise the conversation", key='summarise')
# 点击按钮
if summarise_button:
    summarise_placeholder = st.sidebar.write("Nice chatting with you my friend ❤:\n\n" +
                                             st.session_state['conversation'].memory.buffer)

# %% env
import os

# os.environ['OPENAI_API_KEY'] = "sk-dsO3Z1fD9nrDFq1SF7C35437A2F74c1aA769F006A57eA65f"
os.environ['OPENAI_API_BASE'] = "https://oneapi.xty.app/v1"


# %% model
def get_response(userInput, api_key):
    # 如果没有会话, 则创建会话, 防止每次调用都重新创建, 导致清空之前的记录
    if st.session_state['conversation'] is None:
        # llm = OpenAI(temperature=0, model_name='gpt-4')
        llm = OpenAI(temperature=0, api_key=api_key)

        # conversation
        st.session_state['conversation'] = ConversationChain(
            llm=llm,
            verbose=True,
            memory=ConversationSummaryMemory(llm=llm)
        )
 
    res = st.session_state['conversation'].predict(input=userInput)

    return res


# %%
# 存放回复
response_container = st.container()
# 存放用户的输入
container = st.container()

# 提交container后, container被挤到下方, 原本的位置在响应结束后变为response container, 内容为ai的回答
with container:
    # clear_on_submit 提交会清空内容
    with st.form(key='my_form', clear_on_submit=True):
        user_input = st.text_area("Your question goes here:", key='input', height=100)
        submit_btn = st.form_submit_button('Send')
        if submit_btn:
            # 保存用户输入到历史信息
            st.session_state['messages'].append(user_input)
            answer = get_response(user_input, st.session_state['API_Key'])
            # 保存AI回复到历史信息
            st.session_state['messages'].append(answer)
            # 显示历史消息 到 container
            # st.write(st.session_state['messages'])

            # 回显
            with response_container:
                # st.write(answer)
                for i in range(len(st.session_state['messages'])):
                    # from streamlit_chat import message
                    if (i % 2) == 0:
                        message(st.session_state['messages'][i], is_user=True, key=str(i) + '_user')
                    else:
                        message(st.session_state['messages'][i], key=str(i) + '_AI')

13. Langchain - Data Connection Module Concept

1. Data Connnection Module Introduction

2. Data Connection Module - Python Implementation Part 1

# %% import
# pip install chromadb sentence_transformers
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings

# %% document loaders
loader = TextLoader('Sample.txt', encoding='utf-8')
documents = loader.load()

# 整个文件被视为一个整体
print(len(documents))  # 1
# %% Document transformers
# 文档切分为小块, 防止一次性发送超过llm限制
text_splitter = CharacterTextSplitter(
    # 每块的大小
    chunk_size=200,
    # 折叠, 下一块的开头会和上一块的结尾重叠
    chunk_overlap=0
)

texts = text_splitter.split_documents(documents)

print(len(texts))  # 19

# %% Text embedding models
import os

os.environ['OPENAI_API_KEY'] = "sk-dsO3Z1fD9nrDFq1SF7C35437A2F74c1aA769F006A57eA65f"
os.environ['OPENAI_API_BASE'] = "https://oneapi.xty.app/v1"
embeddings = OpenAIEmbeddings()

# os.environ['HUGGINGFACEHUB_API_TOKEN'] = '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'
# huggingface SentenceTransformerEmbeddings sentence_transformers
# embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
print(embeddings)

3. Data Connection Module - Python Implementation Part 2

# %% Vector stores -> Store and search over embedded data
# Load Embeddings of Text into Chroma
db = Chroma.from_documents(texts, embeddings)  # text -> vector
# Let's have a look at embeddings -Numeric representation
print(db._collection.get(include=['embeddings']))

# %% Retrievers
# Query your data
retriever = db.as_retriever(search_kwargs={"k": 1})  # k:2 -> 返回两个结果
# vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x000001DE2C751E20> search_kwargs={'k': 1}
print(retriever)

# %% question
# 1
# [Document(page_content='Delhi is the capital of India', metadata={'source': 'Sample.txt'})]
# 如果 k:2 -> 返回两个Document
docs = retriever.get_relevant_documents("What is the capital of india?")
print(docs)
# 2
# [Document(page_content='The Indian rupee is the official currency in the Republic of India. The rupee is subdivided into 100 paise. The issuance of the currency is controlled by the Reserve Bank of India.', metadata={'source': 'Sample.txt'})]
docs = retriever.get_relevant_documents("What is the currency of india?")
print(docs)

14. ✅Intermediate level

15. Project #6 - Quiz MCQ Creator App

1. Loading Documents & Creating Chunks

# %%
# pip install unstructured tiktoken pinecone-client pypdf

# import dependencies
import openai
import pinecone
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import Pinecone
from langchain_openai.llms import OpenAI
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings

import os

os.environ['OPENAI_API_KEY'] = "sk-dsO3Z1fD9nrDFq1SF7C35437A2F74c1aA769F006A57eA65f"
os.environ['OPENAI_API_BASE'] = "https://oneapi.xty.app/v1"
os.environ['HUGGINGFACEHUB_API_TOKEN'] = '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'


# %% load documents
# function to read documents
def load_docs(directory):
    loader = PyPDFDirectoryLoader(directory)
    docs = loader.load()
    return docs


# passing the directory to the 'load_docs' function
directory = 'Docs/'
documents = load_docs(directory)
print(len(documents))  # 3, 每页pdf视为1


# %% transform documents
# split docs into chunks
def split_docs(documents, chunk_size=1000, chunk_overlap=20):
    """
    分割文档
    :param documents:
    :param chunk_size: 每块大小
    :param chunk_overlap: 块和块之间重叠的数量
    :return:
    """
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap
    )
    docs = splitter.split_documents(documents)
    return docs


docs = split_docs(documents)
print(len(docs))  # 7

2. Generate Embeddings & Store Them

pinecone 破坏性更新api

# %% vector store
'''
AttributeError: init is no longer a top-level attribute of the pinecone package.

Please create an instance of the Pinecone class instead.
'''
# pinecone.init(
#     api_key="752a3862-5a0d-4403-b952-7465c3fa087d",
#     # environment=
# )
#
# index_name = "mcq-creator"
#
# index = Pinecone.from_documents(docs, embeddings, index_name=index_name)

db = Chroma.from_documents(docs, embeddings)  # text -> vector
# Let's have a look at embeddings -Numeric representation
print(db._collection.get(include=['embeddings']))

3. Retrieving Answer

# %% Retrieve Answers
# This function will help us in fetching the top relevent documents from our vector store - Pinecone
def get_similiar_docs(query, k=2):
    retriever = db.as_retriever(search_kwargs={"k": k})  # k:2 -> 返回两个结果 
    docs = retriever.get_relevant_documents(query)
    return docs

4. Creating Structured Output

# %% Creating Structured Output
from langchain.chains.question_answering import load_qa_chain
from langchain import HuggingFaceHub

# llm=HuggingFaceHub(repo_id="bigscience/bloom", model_kwargs={"temperature":1e-10})
# llm
llm = OpenAI()
chain = load_qa_chain(llm, chain_type="stuff")


# This function will help us get the answer to the question that we raise
def get_answer(query):
    relevant_docs = get_similiar_docs(query)
    print(relevant_docs)
    response = chain.run(input_documents=relevant_docs, question=query)
    return response


our_query = "How is India's economy?"
answer = get_answer(our_query)
print(answer)

16. Langchain - Chains Module Concept

1. Chains Overview

2. Generic Chains

# %% import
import os

os.environ['OPENAI_API_KEY'] = "sk-dsO3Z1fD9nrDFq1SF7C35437A2F74c1aA769F006A57eA65f"
os.environ['OPENAI_API_BASE'] = "https://oneapi.xty.app/v1"

from langchain_openai import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

# %%
llm = OpenAI()
prompt = PromptTemplate(
    input_variables=['place'],
    template="Best places to visit in {place}?"
)

chain = LLMChain(llm=llm, prompt=prompt)

# run the chain only specifying the input variables
''' 
LangChainDeprecationWarning: The function `run` was deprecated in LangChain 0.1.0 and will be removed in 0.2.0. 
Use invoke instead.
'''
# print(chain.run("India"))
# print(chain.invoke({'place': 'India'}))
print(chain.invoke({'place': 'India'}).get("text"))

# %% simple sequential chains
from langchain.chains import SimpleSequentialChain

# from langchain import HuggingFaceHub

template = """You have to suggest 5 best places to visit in {place}?

YOUR RESPONSE:
"""
prompt_template = PromptTemplate(
    input_variables=['place'],
    template=template
)

# HF_11m = HuggingFaceHub(repo_id="google/flan-t5-large")
place_chain = LLMChain(llm=llm, prompt=prompt_template)

template = """Given a list a places, please estimate the expenses to visit all of them in local currency and also the days needed
{expenses}

YOUR RESPONSE:
"""
prompt_template = PromptTemplate(
    input_variables=['expenses'],
    template=template
)

llm = OpenAI()

expense_chain = LLMChain(llm=llm, prompt=prompt_template)

# place_chain的输出是expense_chain的输入
# verbose=True 可以查看内部情况
final_chain = SimpleSequentialChain(chains=[place_chain, expense_chain], verbose=True)

# review=final_chain.run("India")
review = final_chain.invoke({'input': 'India'}).get("text")

3. Utility Chains

# utility chains overview
# %% load_summarization_chain
import os

os.environ['OPENAI_API_KEY'] = "sk-dsO3Z1fD9nrDFq1SF7C35437A2F74c1aA769F006A57eA65f"
os.environ['OPENAI_API_BASE'] = "https://oneapi.xty.app/v1"

from langchain_openai.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains.summarize import load_summarize_chain
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.docstore.document import Document

# %%
llm = OpenAI(temperature=0.9)

# reading the document
with open("Sample.txt", encoding='utf-8') as f:
    data = f.read()

# split txt
text_splitter = CharacterTextSplitter()
texts = text_splitter.split_text(data)

# create multiple documents
docs = [Document(page_content=t) for t in texts]
print(docs)

chain = load_summarize_chain(llm, chain_type='map_reduce', verbose=True)
# chain.run(docs)
# 输出摘要
print(chain.invoke(docs).get('output_text'))

# %% http request
## llm requests chain
from langchain.chains import LLMRequestsChain, LLMChain

template = """
Extract the answer to the question '{query}' or say "not found" if the information is not available.
{requests_result}
"""

PROMPT = PromptTemplate(
    input_variables=["query", "requests_result"],
    template=template,
)

llm = OpenAI()
chain = LLMRequestsChain(llm_chain=LLMChain(llm=llm, prompt=PROMPT))

question = "What is the capital of india?"
inputs = {
    'query': question,
    'url': "https://cn.bing.com/search?q=" + question.replace(" ", "+"),
}
print(chain(inputs))

import inspect

print(inspect.getsource(chain._call))

17. Langchain - Agents Module Concept

1. Agents Overview

大型语言模型（LLMs）非常强大，但它们缺乏“最笨”的计算机程序可以轻松处理的特定能力。逻辑、计算和搜索是计算机通常擅长的领域，但
LLMs 却遇到了困难。计算机可以解决非常复杂的数学问题，但如果我们让 GPT-4 告诉我们 4.1 * 7.9 的答案，它就失败了

要求 GPT-4 执行简单的计算通常会得到一个错误的答案。一个简单的计算器可以毫无问题地执行相同的计算。

根据一个简单的计算器，答案是 19.357，保留三位小数。一个简单的计算器程序可以做到这一点，但一个非常复杂的 AI 引擎却失败了，这是不是很有趣？

GPT-4 无法告诉我们关于 LangChain 的信息，这是因为它与外部世界没有联系。它的唯一知识来自于它的训练数据，而训练数据在 2021
年末截止。

在当今的 LLMs 一代中存在重大缺陷，我们必须找到解决这些问题的方法。一种“解决方案套件”以“代理 (Agents) ”形式提供。

这些代理 (Agents) 不仅解决了我们上面看到的问题，还解决了许多其他问题。事实上，添加代理 (Agents) 在增强 LLMs 的能力方面几乎没有上限。

什么是代理 (Agents) ?
我们可以将代理 (Agents) 视为 LLMs 的工具 (Tools) 。就像人类使用计算器进行数学计算或在 Google 中搜索信息一样，代理 (Agents)
允许 LLM 做同样的事情

18. Project #7 - CSV Data Analysis Tool

1. CSV Data Analysis Tool Demo

2. CSV Data Analysis Tool - Frontend

# %% use env
from dotenv import load_dotenv

load_dotenv()
import streamlit as st

# %%
st.title("Let's do some analysis on your CSV")
st.header("Please upload your CSV file here:")

# Capture the CSV file 上传文件组件
data = st.file_uploader("Upload CSV file", type="csv")

query = st.text_area("Enter your query")
button = st.button("Generate Response")

if button:
    # Get Response 
    st.write('answer')

3. CSV Data Analysis Tool - Backend

utils.py

'''
ImportError: create_pandas_dataframe_agent has been moved to langchain experimental. See https://github.com/langchain-ai/langchain/discussions/11680for more information.
Please update your import statement from: `langchain.agents.create_pandas_dataframe_agent` to `langchain_experimental.agents.create_pandas_dataframe_agent`.
'''
# from langchain.agents import create_pandas_dataframe_agent
from langchain_openai.llms import OpenAI
from langchain_experimental.agents import create_pandas_dataframe_agent
import pandas as pd


def query_agent(data, query):
    # parse the csv file and create a pandas dataframe from its contents
    df = pd.read_csv(data)

    llm = OpenAI()

    # create a pandas dataframe agent
    agent = create_pandas_dataframe_agent(llm, df, verbose=True)

    # Python REPL:A Python shell used to evaluating and executing Python commands.
    # It takes python code as input and outputs the result.The input python code can be generated from another tool in the Langchain
    return agent.run(query)

app.py

if button:
    # Get Response
    answer = query_agent(data, query)
    st.write(answer)

q: can you please name the columns
a: [‘EMPLOYEE_ID’, ‘FIRST_NAME’, ‘LAST_NAME’, ‘EMAIL’, ‘PHONE_NUMBER’, ‘HIRE_DATE’, ‘JOB_ID’, ‘SALARY’, ‘COMMISSION_PCT’, ‘MANAGER_ID’, ‘DEPARTMENT_ID’]
q: how many unique managers are there?
a: 14
q: give the average of salary column
a: The average salary is 6182.32.

根据问题执行pd操作

19. ✅Advanced level

20. Project #8 - Youtube Script Writing Tool

1. Youtube Script Writing Tool Demo

生成视频标题和简介

2. Youtube Script Writing tool - Frontend

requirements.txt

langchain
streamlit
openai
tiktoken
python-dotenv
pinecone-client
duckduckgo_search

app.py

import streamlit as st

# Applying Styling
st.markdown("""
<style>
div.stButton > button:first-child {
    background-color: #0099ff;
    color:#ffffff;
}
div.stButton > button:hover {
    background-color: #00ff00;
    color:#FFFFFF;
    }
</style>""", unsafe_allow_html=True)

# Creating Session State Variable
if 'API_Key' not in st.session_state:
    st.session_state['API_Key'] = ''

st.title('❤️ YouTube Script Writing Tool')

# Sidebar to capture the OpenAi API key
st.sidebar.title("😎🗝️")
st.session_state['API_Key'] = st.sidebar.text_input("What's your API key?", type="password")
st.sidebar.image('./Youtube.jpg', width=300, use_column_width=True)

# Captures User Inputs
prompt = st.text_input('Please provide the topic of the video', key="prompt")  # The box for the text prompt
video_length = st.text_input('Expected Video Length 🕒 (in minutes)', key="video_length")  # The box for the text prompt
creativity = st.slider('Words limit ✨ - (0 LOW || 1 HIGH)', 0.0, 1.0, 0.2, step=0.1)

submit = st.button("Generate Script for me")

if submit:

    if st.session_state['API_Key']:
        # Let's generate the script
        st.success('Hope you like this script ❤️')

        # Display Title
        st.subheader("Title:🔥")

        # Display Video Script
        st.subheader("Your Video Script:📝")

        # Display Search Engine Result
        st.subheader("Check Out - DuckDuckGo Search:🔍")
        with st.expander('Show me 👀'):
            st.info('Search data')

    else:
        st.error("Ooopssss!!! Please provide API key.....")

3. Youtube Script Writing tool - Backend

utils.py

from langchain_openai.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.tools import DuckDuckGoSearchRun


# Function to generate video script
def generate_script(prompt, video_length, creativity, api_key):
    # Template for generating 'Title'
    title_template = PromptTemplate(
        input_variables=['subject'],
        template='Please come up with a title for a YouTube video on the  {subject}.'
    )

    # Template for generating 'Video Script' using search engine
    script_template = PromptTemplate(
        input_variables=['title', 'DuckDuckGo_Search', 'duration'],
        template='Create a script for a YouTube video based on this title for me. TITLE: {title} of duration: {duration} minutes using this search data {DuckDuckGo_Search} '
    )

    # Setting up OpenAI LLM
    llm = OpenAI(temperature=creativity, openai_api_key=api_key, model_name='gpt-3.5-turbo')

    # Creating chain for 'Title' & 'Video Script'
    title_chain = LLMChain(llm=llm, prompt=title_template, verbose=True)
    script_chain = LLMChain(llm=llm, prompt=script_template, verbose=True)

    # https://python.langchain.com/docs/modules/agents/tools/integrations/ddg
    search = DuckDuckGoSearchRun()

    # Executing the chains we created for 'Title'
    title = title_chain.run(prompt)

    # Executing the chains we created for 'Video Script' by taking help of search engine 'DuckDuckGo'
    search_result = search.run(prompt)
    script = script_chain.run(title=title, DuckDuckGo_Search=search_result, duration=video_length)

    # Returning the output
    return search_result, title, script

4. Youtube Script Writing tool - Integration

utils.py

from langchain_openai.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain, LLMRequestsChain
from langchain_community.tools import DuckDuckGoSearchRun


# Function to generate video script
def generate_script(prompt, video_length, creativity, api_key):
    # Template for generating 'Title'
    title_template = PromptTemplate(
        input_variables=['subject'],
        template='Please come up with a title for a YouTube video on the  {subject}.'
    )

    # Template for generating 'Video Script' using search engine
    script_template = PromptTemplate(
        input_variables=['title', 'DuckDuckGo_Search', 'duration'],
        template='Create a script for a YouTube video based on this title for me. TITLE: {title} of duration: {duration} minutes using this search data {DuckDuckGo_Search} '
    )

    # Setting up OpenAI LLM
    # llm = OpenAI(temperature=creativity, openai_api_key=api_key, model_name='gpt-3.5-turbo')
    llm = OpenAI(temperature=creativity, openai_api_key=api_key)

    # Creating chain for 'Title' & 'Video Script'
    title_chain = LLMChain(llm=llm, prompt=title_template, verbose=True)
    script_chain = LLMChain(llm=llm, prompt=script_template, verbose=True)

    # https://python.langchain.com/docs/modules/agents/tools/integrations/ddg
    # search = DuckDuckGoSearchRun()

    # Executing the chains we created for 'Title'
    title = title_chain.invoke({'subject': prompt}).get("text")

    # Executing the chains we created for 'Video Script' by taking help of search engine 'DuckDuckGo'
    # search_result = search.run(prompt)

    # DuckDuckGoSearchRun 有bug, 改用 bing + langchain request
    llm = OpenAI()
    template = """
    Extract the answer to the question '{query}' or say "not found" if the information is not available.
    {requests_result}
    """
    PROMPT = PromptTemplate(
        input_variables=["query", "requests_result"],
        template=template,
    )
    chain = LLMRequestsChain(llm_chain=LLMChain(llm=llm, prompt=PROMPT))

    inputs = {
        'query': prompt,
        'url': "https://cn.bing.com/search?q=" + prompt.replace(" ", "+"),
    }
    search_result = chain(inputs)
    # ------------------------------

    script = script_chain.run(title=title, DuckDuckGo_Search=search_result, duration=video_length)

    # Returning the output
    return search_result, title, script

app.py

if submit:

    if st.session_state['API_Key']:
        search_result, title, script = generate_script(
            prompt, video_length, creativity, st.session_state['API_Key']
        )

        # Let's generate the script
        st.success('Hope you like this script ❤️')

        # Display Title
        st.subheader("Title:🔥")
        st.write(title)

        # Display Video Script
        st.subheader("Your Video Script:📝")
        st.write(script)

        # Display Search Engine Result
        st.subheader("Check Out - DuckDuckGo Search:🔍")
        with st.expander('Show me 👀'):
            st.info(search_result)

    else:
        st.error("Ooopssss!!! Please provide API key.....")

21. Project #9 - Support Chat Bot For Your Website

1. Support Chat Bot For Your Website Demo

3. Implement Frontend for Pushing Data to Pinecone

requirements.txt

langchain
pinecone-client
openai
tiktoken
nest_asyncio

import streamlit as st

# Creating Session State Variable
if 'HuggingFace_API_Key' not in st.session_state:
    st.session_state['HuggingFace_API_Key'] = ''
if 'Pinecone_API_Key' not in st.session_state:
    st.session_state['Pinecone_API_Key'] = ''

#
st.title('🤖 AI Assistance For Website')

# ********SIDE BAR Funtionality started*******

# Sidebar to capture the API keys
st.sidebar.title("😎🗝️")
st.session_state['HuggingFace_API_Key'] = st.sidebar.text_input("What's your HuggingFace API key?", type="password")
st.session_state['Pinecone_API_Key'] = st.sidebar.text_input("What's your Pinecone API key?", type="password")

load_button = st.sidebar.button("Load data to Pinecone", key="load_button")

# If the bove button is clicked, pushing the data to Pinecone...
if load_button:
    # Proceed only if API keys are provided
    if st.session_state['HuggingFace_API_Key'] != "" and st.session_state['Pinecone_API_Key'] != "":

        # Fetch data from site
        st.write("Data pull done...")

        # Split data into chunks
        st.write("Spliting data done...")

        # Creating embeddings instance
        st.write("Embeddings instance creation done...")

        # Push data to Pinecone
        st.write("Pushing data to Pinecone done...")

        st.sidebar.success("Data pushed to Pinecone successfully!")
    else:
        st.sidebar.error("Ooopssss!!! Please provide API keys.....")

# ********SIDE BAR Funtionality ended*******

4. Implementing Backend for Scraping the Data

app.py

# If the bove button is clicked, pushing the data to Pinecone...
if load_button:
    # Proceed only if API keys are provided
    if st.session_state['HuggingFace_API_Key'] != "" and st.session_state['Pinecone_API_Key'] != "":

        # Fetch data from site
        # 获取网站信息
        site_data = get_website_data("https://jobs.excelcult.com/wp-sitemap-posts-post-1.xml")
        st.write("Data pull done...")

        # Split data into chunks
        # 切分网站信息
        chunks_data = split_data(site_data)
        st.write("Spliting data done...")

        # Creating embeddings instance
        embeddings = create_embeddings()
        st.write("Embeddings instance creation done...")

        # Push data to Pinecone
        st.write("Pushing data to Pinecone done...")

        st.sidebar.success("Data pushed to Pinecone successfully!")
    else:
        st.sidebar.error("Ooopssss!!! Please provide API keys.....")

utils.py

from langchain.text_splitter import RecursiveCharacterTextSplitter
# from langchain.vectorstores import Pinecone
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
# import pinecone
import asyncio
from langchain.document_loaders.sitemap import SitemapLoader
from langchain_community.vectorstores.chroma import Chroma
from langchain_openai import OpenAIEmbeddings

# %% Retrieve Answers
# db - Chroma
db = None
persist_directory = 'db'


# This function will help us in fetching the top relevent documents from our vector store - Pinecone
def from_existing_index(query, k=2):
    docs = None
    if db != None:
        retriever = db.as_retriever(search_kwargs={"k": k})  # k:2 -> 返回两个结果
        docs = retriever.get_relevant_documents(query)
    return docs


# Function to fetch data from website
# https://python.langchain.com/docs/modules/data_connection/document_loaders/integrations/sitemap
def get_website_data(sitemap_url):
    loop = asyncio.new_event_loop()
    asyncio.set_event_loop(loop)
    # SitemapLoader 加载网站的sitemap(包含网站信息)
    loader = SitemapLoader(
        sitemap_url
    )

    docs = loader.load()

    return docs


# Function to split data into smaller chunks
def split_data(docs):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len,
    )

    docs_chunks = text_splitter.split_documents(docs)
    return docs_chunks


# Function to create embeddings instance
def create_embeddings():
    embeddings = OpenAIEmbeddings()
    # embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
    return embeddings


# Function to push data to Pinecone
def push_to_pinecone(pinecone_apikey, pinecone_environment, pinecone_index_name, embeddings, docs):
    # pinecone.init(
    #     api_key=pinecone_apikey,
    #     environment=pinecone_environment
    # )

    index_name = pinecone_index_name

    # index = Pinecone.from_documents(docs, embeddings, index_name=index_name)
    db = Chroma.from_documents(docs, embeddings, persist_directory=persist_directory)
    db.persist()  # 持久化
    return db


# Function to pull index data from Pinecone
def pull_from_pinecone(pinecone_apikey, pinecone_environment, pinecone_index_name, embeddings):
    # pinecone.init(
    #     api_key=pinecone_apikey,
    #     environment=pinecone_environment
    # )

    index_name = pinecone_index_name

    # index = Pinecone.from_existing_index(index_name, embeddings)
    db = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
    return db


# This function will help us in fetching the top relevent documents from our vector store - Pinecone Index
def get_similar_docs(index, query, k=2):
    # db - index
    similar_docs = index.similarity_search(query, k=k)
    return similar_docs

5. Implementing Backend for Pushing the Data to Pinecone

if load_button:
    # Proceed only if API keys are provided
    if st.session_state['HuggingFace_API_Key'] != "" and st.session_state['Pinecone_API_Key'] != "":

        # Fetch data from site
        # 获取网站信息
        site_data = get_website_data("https://jobs.excelcult.com/wp-sitemap-posts-post-1.xml")
        st.write("Data pull done...")

        # Split data into chunks
        # 切分网站信息
        chunks_data = split_data(site_data)
        st.write("Spliting data done...")

        # Creating embeddings instance
        embeddings = create_embeddings()
        st.write("Embeddings instance creation done...")

        # Push data to Pinecone
        push_to_pinecone(pinecone_index_name='chatbot', embeddings=embeddings, docs=chunks_data)
        st.write("Pushing data to Pinecone done...")

        st.sidebar.success("Data pushed to Pinecone successfully!")
    else:
        st.sidebar.error("Ooopssss!!! Please provide API keys.....")

6. Handling the Hardcoded Values

# constants.py
WEBSITE_URL="https://jobs.excelcult.com/wp-sitemap-posts-post-1.xml"
PINECONE_ENVIRONMENT="us-west1-gcp-free"
PINECONE_INDEX="chatbot"

# app.py
# If the bove button is clicked, pushing the data to Pinecone...
if load_button:
    # Proceed only if API keys are provided
    if st.session_state['HuggingFace_API_Key'] != "" and st.session_state['Pinecone_API_Key'] != "":

        # Fetch data from site
        # 获取网站信息
        site_data = get_website_data(WEBSITE_URL)
        st.write("Data pull done...")

        # Split data into chunks
        # 切分网站信息
        chunks_data = split_data(site_data)
        st.write("Spliting data done...")

        # Creating embeddings instance
        embeddings = create_embeddings()
        st.write("Embeddings instance creation done...")

        # Push data to Pinecone
        push_to_pinecone(pinecone_index_name=PINECONE_INDEX, embeddings=embeddings, docs=chunks_data)
        st.write("Pushing data to Pinecone done...")

        st.sidebar.success("Data pushed to Pinecone successfully!")
    else:
        st.sidebar.error("Ooopssss!!! Please provide API keys.....")

7. Implementing Information Retrieval System

# Captures User Inputs
prompt = st.text_input('How can I help you my friend ❓', key="prompt")  # The box for the text prompt
document_count = st.slider('No.Of links to return 🔗 - (0 LOW || 5 HIGH)', 0, 5, 2, step=1)

submit = st.button("Search")

if submit:
    # Proceed only if API keys are provided
    if st.session_state['HuggingFace_API_Key'] != "" and st.session_state['Pinecone_API_Key'] != "":

        # Creating embeddings instance
        embeddings = create_embeddings()
        st.write("Embeddings instance creation done...")

        # Pull index data from Pinecone
        index = pull_from_pinecone()
        st.write("Pinecone index retrieval done...")

        # Fetch relavant documents from Pinecone index
        relavant_docs = get_similar_docs(index, prompt, document_count)
        st.write(relavant_docs)

        # Displaying search results
        st.success("Please find the search results :")
        # Displaying search results
        st.write("search results list....")

        for document in relavant_docs:
            st.write("👉**Result : " + str(relavant_docs.index(document) + 1) + "**")
            st.write("**Info**: " + document.page_content)
            st.write("**Link**: " + document.metadata['source'])


    else:
        st.sidebar.error("Ooopssss!!! Please provide API keys.....")

22. Project #10 - Automatic Ticket Classification Tool

1. Automatic Ticket Classification Tool - Demo

2. Upload Documents To Pinecone - Frontend & Backend

requirements.txt

langchain
streamlit
openai
tiktoken
python-dotenv
pinecone-client
pypdf
joblib
pandas

# Load_Data_Store.py
import streamlit as st
from dotenv import load_dotenv
from admin_utils import *


def main():
    load_dotenv()
    st.set_page_config(page_title="Dump PDF to Pinecone - Vector Store")
    st.title("Please upload your files...📁 ")

    # Upload the pdf file
    pdf = st.file_uploader("Only PDF files allowed", type=["pdf"])

    # Extract the whole text from the uploaded pdf file
    if pdf is not None:
        with st.spinner('Wait for it...'):
            text = read_pdf_data(pdf)
            st.write("👉Reading PDF done")

            # Create chunks
            docs_chunks = split_data(text)
            # st.write(docs_chunks)
            st.write("👉Splitting data into chunks done")

            # Create the embeddings
            embeddings = create_embeddings_load_data()
            st.write("👉Creating embeddings instance done")

            # Build the vector store (Push the PDF data embeddings)
            push_to_pinecone(embeddings, docs_chunks)

        st.success("Successfully pushed the embeddings to Pinecone")


if __name__ == '__main__':
    main()

# admin_utils.py
from langchain_community.vectorstores.chroma import Chroma
from pypdf import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain.llms import OpenAI
import pinecone
from langchain.vectorstores import Pinecone
import pandas as pd
from sklearn.model_selection import train_test_split

# db - Chroma
db = None
persist_directory = 'db'


# **********Functions to help you load documents to PINECONE***********

# Read PDF data
def read_pdf_data(pdf_file):
    pdf_page = PdfReader(pdf_file)
    text = ""
    for page in pdf_page.pages:
        text += page.extract_text()
    return text


# Split data into chunks
def split_data(text):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
    docs = text_splitter.split_text(text)
    docs_chunks = text_splitter.create_documents(docs)
    return docs_chunks


# Create embeddings instance
def create_embeddings_load_data():
    embeddings = OpenAIEmbeddings()
    # embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
    return embeddings


# Function to push data to Pinecone
# def push_to_pinecone(pinecone_apikey, pinecone_environment, pinecone_index_name, embeddings, docs):
def push_to_pinecone(embeddings, docs):
    # pinecone.init(
    #     api_key=pinecone_apikey,
    #     environment=pinecone_environment
    # )

    # index_name = pinecone_index_name
    # index = Pinecone.from_documents(docs, embeddings, index_name=index_name)
    # return index
    db = Chroma.from_documents(docs, embeddings, persist_directory=persist_directory)
    db.persist()  # 持久化
    return db


# *********Functions for dealing with Model related tasks...************

# Read dataset for model creation
def read_data(data):
    df = pd.read_csv(data, delimiter=',', header=None)
    return df


# Create embeddings instance
def get_embeddings():
    embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
    return embeddings


# Generating embeddings for our input dataset
def create_embeddings(df, embeddings):
    df[2] = df[0].apply(lambda x: embeddings.embed_query(x))
    return df


# Splitting the data into train & test
def split_train_test__data(df_sample):
    # Split into training and testing sets
    sentences_train, sentences_test, labels_train, labels_test = train_test_split(
        list(df_sample[2]), list(df_sample[1]), test_size=0.25, random_state=0)
    print(len(sentences_train))
    return sentences_train, sentences_test, labels_train, labels_test


# Get the accuracy score on test data
def get_score(svm_classifier, sentences_test, labels_test):
    score = svm_classifier.score(sentences_test, labels_test)
    return score

3. Chatbot Interaction- Frontend & Backend

# app.py
from dotenv import load_dotenv
import streamlit as st
from user_utils import *


def main():
    load_dotenv()

    st.header("Automatic Ticket Classification Tool")
    # Capture user input
    st.write("We are here to help you, please ask your question:")
    user_input = st.text_input("🔍")

    if user_input:
        # creating embeddings instance
        embeddings = create_embeddings()

        # Function to pull index data from Pinecone
        index = pull_from_pinecone(embeddings)

        # This function will help us in fetching the top relevent documents from our vector store - Pinecone Index
        relavant_docs = get_similar_docs(index, user_input, k=2)

        # This will return the fine tuned response by LLM
        response = get_answer(relavant_docs, user_input)
        st.write(response)


if __name__ == '__main__':
    main()

# user_utils.py
import pinecone
from langchain_community.vectorstores import Pinecone
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain_openai import OpenAI
from langchain_community.callbacks import get_openai_callback
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain.chains.question_answering import load_qa_chain
import joblib
from langchain_community.vectorstores.chroma import Chroma

persist_directory = 'db'


# Function to pull index data from Pinecone
# def pull_from_pinecone(pinecone_apikey,pinecone_environment,pinecone_index_name,embeddings):
def pull_from_pinecone(embeddings):
    # pinecone.init(
    #     api_key=pinecone_apikey,
    #     environment=pinecone_environment
    # )
    #
    # index_name = pinecone_index_name
    #
    # index = Pinecone.from_existing_index(index_name, embeddings)
    # return index
    db = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
    return db


def create_embeddings():
    # embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
    embeddings = OpenAIEmbeddings()
    return embeddings


# This function will help us in fetching the top relevent documents from our vector store - Pinecone Index
def get_similar_docs(index, query, k=2):
    similar_docs = index.similarity_search(query, k=k)
    return similar_docs


def get_answer(docs, user_input):
    chain = load_qa_chain(OpenAI(), chain_type="stuff")
    with get_openai_callback() as cb:
        response = chain.run(input_documents=docs, question=user_input)
    return response


def predict(query_result):
    Fitmodel = joblib.load('modelsvm.pk1')
    result = Fitmodel.predict([query_result])
    return result[0]

4. Organizing Different Pages in Streamlit

pages下的文件会显示在侧边栏

5. Classification Model Creation & 6. Model Training Process

# pages/Create_ML_Model.py
import streamlit as st
# from pages.admin_utils import *
from pages.admin_utils import *
from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
import joblib

if 'cleaned_data' not in st.session_state:
    st.session_state['cleaned_data'] = ''
if 'sentences_train' not in st.session_state:
    st.session_state['sentences_train'] = ''
if 'sentences_test' not in st.session_state:
    st.session_state['sentences_test'] = ''
if 'labels_train' not in st.session_state:
    st.session_state['labels_train'] = ''
if 'labels_test' not in st.session_state:
    st.session_state['labels_test'] = ''
if 'svm_classifier' not in st.session_state:
    st.session_state['svm_classifier'] = ''

st.title("Let's build our Model...")

# Create tabs
tab_titles = ['Data Preprocessing', 'Model Training', 'Model Evaluation', "Save Model"]
tabs = st.tabs(tab_titles)

# Adding content to each tab

# Data Preprocessing TAB
with tabs[0]:
    st.header('Data Preprocessing')
    st.write('Here we preprocess the data...')

    # Capture the CSV file
    data = st.file_uploader("Upload CSV file", type="csv")

    button = st.button("Load data", key="data")

    if button:
        with st.spinner('Wait for it...'):
            our_data = read_data(data)
            embeddings = get_embeddings()
            st.session_state['cleaned_data'] = create_embeddings(our_data, embeddings)
        st.success('Done!')

# Model Training TAB
with tabs[1]:
    st.header('Model Training')
    st.write('Here we train the model...')
    button = st.button("Train model", key="model")

    if button:
        with st.spinner('Wait for it...'):
            st.session_state['sentences_train'], st.session_state['sentences_test'], st.session_state['labels_train'], \
            st.session_state['labels_test'] = split_train_test__data(st.session_state['cleaned_data'])

            # Initialize a support vector machine, with class_weight='balanced' because
            # our training set has roughly an equal amount of positive and negative
            # sentiment sentences
            st.session_state['svm_classifier'] = make_pipeline(StandardScaler(), SVC(class_weight='balanced'))

            # fit the support vector machine
            st.session_state['svm_classifier'].fit(st.session_state['sentences_train'],
                                                   st.session_state['labels_train'])
        st.success('Done!')

# Model Evaluation TAB
with tabs[2]:
    st.header('Model Evaluation')
    st.write('Here we evaluate the model...')
    button = st.button("Evaluate model", key="Evaluation")

    if button:
        with st.spinner('Wait for it...'):
            accuracy_score = get_score(st.session_state['svm_classifier'], st.session_state['sentences_test'],
                                       st.session_state['labels_test'])
            st.success(f"Validation accuracy is {100 * accuracy_score}%!")

            st.write("A sample run:")

            # text="lack of communication regarding policy updates salary, can we please look into it?"
            text = "Rude driver with scary driving"
            st.write("***Our issue*** : " + text)

            # Converting out TEXT to NUMERICAL representaion
            embeddings = get_embeddings()
            query_result = embeddings.embed_query(text)

            # Sample prediction using our trained model
            result = st.session_state['svm_classifier'].predict([query_result])
            st.write("***Department it belongs to*** : " + result[0])

        st.success('Done!')

# Save model TAB
with tabs[3]:
    st.header('Save model')
    st.write('Here we save the model...')

    button = st.button("Save model", key="save")
    if button:
        with st.spinner('Wait for it...'):
            joblib.dump(st.session_state['svm_classifier'], 'modelsvm.pk1')
        st.success('Done!')

7. Ticket Raising Feature Implementation

# user_utils.py

def predict(query_result):
    Fitmodel = joblib.load('modelsvm.pk1')
    result = Fitmodel.predict([query_result])
    return result[0]

# app.py

def main():
    load_dotenv()

    st.header("Automatic Ticket Classification Tool")
    # Capture user input
    st.write("We are here to help you, please ask your question:")
    user_input = st.text_input("🔍")

    if user_input:
        ###

        # This will return the fine tuned response by LLM
        response = get_answer(relavant_docs, user_input)
        st.write(response)

        # Button to create a ticket with respective department
        button = st.button("Submit ticket?")

        if button:
            # Get Response

            embeddings = create_embeddings()
            query_result = embeddings.embed_query(user_input)

            # loading the ML model, so that we can use it to predit the class to which this compliant belongs to...
            department_value = predict(query_result)
            st.write("your ticket has been sumbitted to : " + department_value)

            # Appending the tickets to below list, so that we can view/use them later on...
            if department_value == "HR":
                st.session_state['HR_tickets'].append(user_input)
            elif department_value == "IT":
                st.session_state['IT_tickets'].append(user_input)
            else:
                st.session_state['Transport_tickets'].append(user_input)

8. Viewing Pending Tickets Tab

# pages/Pending_tickets.py
import streamlit as st

st.title('Departments')

# Create tabs
tab_titles = ['HR Support', 'IT Support', 'Transportation Support']
tabs = st.tabs(tab_titles)

# Add content to each tab
with tabs[0]:
    st.header('HR Support tickets')
    for ticket in st.session_state['HR_tickets']:
        st.write(str(st.session_state['HR_tickets'].index(ticket) + 1) + " : " + ticket)

with tabs[1]:
    st.header('IT Support tickets')
    for ticket in st.session_state['IT_tickets']:
        st.write(str(st.session_state['IT_tickets'].index(ticket) + 1) + " : " + ticket)

with tabs[2]:
    st.header('Transportation Support tickets')
    for ticket in st.session_state['Transport_tickets']:
        st.write(str(st.session_state['Transport_tickets'].index(ticket) + 1) + " : " + ticket)

# app.py

#Creating session variables
if 'HR_tickets' not in st.session_state:
    st.session_state['HR_tickets'] =[]
if 'IT_tickets' not in st.session_state:
    st.session_state['IT_tickets'] =[]
if 'Transport_tickets' not in st.session_state:
    st.session_state['Transport_tickets'] =[]

23. Project #11 - HR - Resume Screening Assistance

1. HR - Resume Screening Assistance - Demo

3. Resume Screening Assistance Frontend

# app.py
import streamlit as st
from dotenv import load_dotenv
import uuid

if 'unique_id' not in st.session_state:
    st.session_state['unique_id'] = ''


def main():
    load_dotenv()

    st.set_page_config(page_title="Resume Screening Assistance")
    st.title("HR - Resume Screening Assistance...💁 ")
    st.subheader("I can help you in resume screening process")

    job_description = st.text_area("Please paste the 'JOB DESCRIPTION' here...", key="1")
    document_count = st.text_input("No.of 'RESUMES' to return", key="2")
    # Upload the Resumes (pdf files)
    pdf = st.file_uploader("Upload resumes here, only PDF files allowed", type=["pdf"], accept_multiple_files=True)

    submit = st.button("Help me with the analysis")

    if submit:
        with st.spinner('Wait for it...'):
            st.write("our process")
            # Creating a unique ID, so that we can use to query and get only the user uploaded documents from PINECONE vector store
            st.session_state['unique_id'] = uuid.uuid4().hex
            st.write(st.session_state['unique_id'])

            # Create a documents list out of all the user uploaded pdf files
            
            # Displaying the count of resumes that have been uploaded

            # Create embeddings instance

            # Push data to PINECONE

            # Fecth relavant documents from PINECONE

            # t.write(relavant_docs)

            # Introducing a line separator

            # For each item in relavant docs - we are displaying some info of it on the UI

        st.success("Hope I was able to save your time❤️")


# Invoking main function
if __name__ == '__main__':
    main()

4. Loading Documents and Adding Metadata

# utils.py
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Pinecone
from langchain_openai.llms import OpenAI
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain.schema import Document
import pinecone
from langchain_community.vectorstores.chroma import Chroma
from pypdf import PdfReader
from langchain.llms.openai import OpenAI
from langchain.chains.summarize import load_summarize_chain
from langchain import HuggingFaceHub


# Extract Information from PDF file
def get_pdf_text(pdf_doc):
    text = ""
    pdf_reader = PdfReader(pdf_doc)
    for page in pdf_reader.pages:
        text += page.extract_text()
    return text


# iterate over files in
# that user uploaded PDF files, one by one
def create_docs(user_pdf_list, unique_id):
    docs = []
    for filename in user_pdf_list:
        chunks = get_pdf_text(filename) 

        # Adding items to our list - Adding data & its metadata
        docs.append(Document(
            page_content=chunks,
            # 添加一些额外信息
            metadata={"name": filename.name,
                      # 'UploadedFile' object has no attribute 'id'
                      "id": filename.file_id,
                      "type=": filename.type,
                      "size": filename.size,
                      "unique_id": unique_id},
        ))

    return docs


# Create embeddings instance
def create_embeddings_load_data():
    embeddings = OpenAIEmbeddings()
    return embeddings


# db - Chroma
persist_directory = 'db'


# Function to push data to Pinecone
# def push_to_pinecone(pinecone_apikey, pinecone_environment, pinecone_index_name, embeddings, docs):
def push_to_pinecone(docs, embeddings):
    # pinecone.init(
    #     api_key=pinecone_apikey,
    #     environment=pinecone_environment
    # )

    # index_name = pinecone_index_name
    # index = Pinecone.from_documents(docs, embeddings, index_name=index_name)
    # return index
    db = Chroma.from_documents(docs, embeddings, persist_directory=persist_directory)
    db.persist()  # 持久化
    return db


# Function to pull index data from Pinecone
# def pull_from_pinecone(pinecone_apikey,pinecone_environment,pinecone_index_name,embeddings):
def pull_from_pinecone(embeddings):
    # pinecone.init(
    #     api_key=pinecone_apikey,
    #     environment=pinecone_environment
    # )
    #
    # index_name = pinecone_index_name
    #
    # index = Pinecone.from_existing_index(index_name, embeddings)
    # return index
    db = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
    return db


# Function to help us get relavant documents from vector store - based on user input
# def similar_docs(query,k,pinecone_apikey,pinecone_environment,pinecone_index_name,embeddings,unique_id):
def similar_docs(query, k, embeddings, unique_id):
    # pinecone.init(
    # api_key=pinecone_apikey,
    # environment=pinecone_environment
    # )

    # index_name = pinecone_index_name

    # index = pull_from_pinecone(pinecone_apikey,pinecone_environment,index_name,embeddings)
    index = pull_from_pinecone(embeddings)
    similar_docs = index.similarity_search_with_score(query, int(k), {"unique_id": unique_id})
    # print(similar_docs)
    return similar_docs


# Helps us get the summary of a document
def get_summary(current_doc):
    llm = OpenAI(temperature=0)
    # llm = HuggingFaceHub(repo_id="bigscience/bloom", model_kwargs={"temperature":1e-10})
    chain = load_summarize_chain(llm, chain_type="map_reduce")
    summary = chain.run([current_doc])

    return summary

# app.py

    if submit:
        with st.spinner('Wait for it...'):
            st.write("our process")
            # Creating a unique ID, so that we can use to query and get only the user uploaded documents from PINECONE vector store
            st.session_state['unique_id'] = uuid.uuid4().hex
            st.write(st.session_state['unique_id'])

            # Create a documents list out of all the user uploaded pdf files
            docs = create_docs(pdf, st.session_state['unique_id'])
            st.write(docs)

            # Displaying the count of resumes that have been uploaded
            st.write(len(docs))

            # Create embeddings instance

            # Push data to PINECONE

            # Fecth relavant documents from PINECONE

            # t.write(relavant_docs)

            # Introducing a line separator

            # For each item in relavant docs - we are displaying some info of it on the UI

            st.success("Hope I was able to save your time❤️")

5. Push & Pull Data From Pinecone

# app.py

    if submit:
        with st.spinner('Wait for it...'):
            st.write("our process")
            # Creating a unique ID, so that we can use to query and get only the user uploaded documents from PINECONE vector store
            st.session_state['unique_id'] = uuid.uuid4().hex

            # Create a documents list out of all the user uploaded pdf files
            docs = create_docs(pdf, st.session_state['unique_id'])

            # Displaying the count of resumes that have been uploaded
            st.write(len(docs))

            # Create embeddings instance
            embeddings = create_embeddings_load_data()

            # Push data to PINECONE
            push_to_pinecone(docs, embeddings)

            # Fecth relavant documents from PINECONE
            relavant_docs = similar_docs(
                job_description, document_count, embeddings, st.session_state['unique_id']
            )
            st.write(relavant_docs)

            # Introducing a line separator -----
            st.write(":heavy_minus_sign:" * 30)

            # For each item in relavant docs - we are displaying some info of it on the UI

            st.success("Hope I was able to save your time❤️")

6. Draft

7. Finetuning Output

# app.py

    if submit:
        with st.spinner('Wait for it...'):
            # st.write("our process")
            # Creating a unique ID, so that we can use to query and get only the user uploaded documents from PINECONE vector store
            st.session_state['unique_id'] = uuid.uuid4().hex

            # Create a documents list out of all the user uploaded pdf files
            docs = create_docs(pdf, st.session_state['unique_id'])

            # Displaying the count of resumes that have been uploaded
            st.write(len(docs))

            # Create embeddings instance
            embeddings = create_embeddings_load_data()

            # Push data to PINECONE
            push_to_pinecone(docs, embeddings)

            # Fecth relavant documents from PINECONE
            relavant_docs = similar_docs(
                job_description, document_count, embeddings, st.session_state['unique_id']
            )

            # Introducing a line separator -----
            st.write(":heavy_minus_sign:" * 30)

            # For each item in relavant docs - we are displaying some info of it on the UI
            for item in range(len(relavant_docs)):

                st.subheader("👉 " + str(item + 1))

                # Displaying Filepath
                st.write("**File** : " + relavant_docs[item][0].metadata['name'])

                # Introducing Expander feature
                with st.expander('Show me 👀'):
                    st.info("**Match Score** : " + str(relavant_docs[item][1]))
                    # st.write("***"+relavant_docs[item][0].page_content)

                    # Gets the summary of the current item using 'get_summary' function that we have created which uses LLM & Langchain chain
                    summary = get_summary(relavant_docs[item][0])
                    st.write("**Summary** : " + summary)

        st.success("Hope I was able to save your time❤️")

24. LLAMA 2 Introduction

1. LLAMA 2 Introduction & Download Guide

25. Project #12 - Email Generator Using LLAMA 2 Streamlit App

1. Email Generator Front End & Module Creation

import streamlit as st
from langchain_openai.llms import OpenAI
from langchain_community.llms import CTransformers
from langchain.prompts import PromptTemplate 


st.set_page_config(
    page_title='Generate Emails',
    page_icon='📧',
    layout='centered',
    initial_sidebar_state='collapsed'
)
st.header("Generate Emails 📧")

form_input = st.text_area('Enter the email topic', height=275)

# Creating columns for the UI -To receive inputs from user
col1, col2, col3 = st.columns([10, 10, 5])
with col1:
    email_sender = st.text_input("Sender Name")
with col2:
    email_recipient = st.text_input("Recipient Name")
with col3:
    email_style = st.selectbox(
        "Writing Style", ("Formal", "Appreciation", "Not Satisfied", "Neutral"), index=0
    )

submit = st.button("Generate")

# When 'Generate' button is clicked,execute the below code
if submit:
    st.write('Response')

2. Using LLAMA 2 as LLM & Execution

import streamlit as st
import box
import yaml
from langchain_experimental.chat_models import Llama2Chat
from langchain_openai.llms import OpenAI
from langchain_community.llms import CTransformers
from langchain.prompts import PromptTemplate


# function to get the response
def getLLMResponse(form_input, email_sender, email_recipient, email_style):
    # llm = OpenAI(temperature=0.9)

    # Wrapper for Llama-2-7B-Chat,Running Llama 2 on CPU

    # Quantization is reducing model precision by converting weights from 16-bit floats to 8-bit integers,
    # enabling efficient deployment on resource-limited devices,reducing model size,and maintaining performance.

    # C Transformers offers support for various open-source models,
    # among them popular ones like Llama,GPT4A11-J,MPT,and Falcon.

    # C Transformers is the Python library that provides bindings
    # FileNotFoundError 'D:\\NIVDIA\\development\\bin'
    # for transformer models implemented in C/C++ using the 6GML library
    llm = CTransformers(
        model='models/llama-2-7b-chat.ggmlv3.q3_K_S.bin',
        model_type='llama',
        config={'max_new_tokens': 256, 'temperature': 0.01}
    )

    # Template for building the PROMPT.
    template = """
    Write a email with {style} style and includes topic :{email_topic}.\n\nSender: {sender}\nRecipient: {recipient}
    \n\nEmail Text:
    
    """

    # Creating the final PROMPT
    prompt = PromptTemplate(
        input_variables=['style', 'email_topic', 'sender', 'recipient'],
        template=template
    )

    # Generating the response
    response = llm(prompt.format(
        email_topic=form_input, sender=email_sender, recipient=email_recipient, style=email_style
    ))

    return response


st.set_page_config(
    page_title='Generate Emails',
    page_icon='📧',
    layout='centered',
    initial_sidebar_state='collapsed'
)
st.header("Generate Emails 📧")

form_input = st.text_area('Enter the email topic', height=275)

# Creating columns for the UI -To receive inputs from user
col1, col2, col3 = st.columns([10, 10, 5])
with col1:
    email_sender = st.text_input("Sender Name")
with col2:
    email_recipient = st.text_input("Recipient Name")
with col3:
    email_style = st.selectbox(
        "Writing Style", ("Formal", "Appreciation", "Not Satisfied", "Neutral"), index=0
    )

submit = st.button("Generate")

# When 'Generate' button is clicked,execute the below code
if submit:
    st.write(getLLMResponse(form_input, email_sender, email_recipient, email_style))