llama-cpp-python/examples/gradio_chat/server.py at main · XyLearningProgramming/llama-cpp-python

History

59 lines (44 loc) · 1.28 KB

Raw

import gradio as gr

from openai import OpenAI

client = OpenAI(base_url="http://localhost:8000/v1", api_key="llama.cpp")

model = "gpt-3.5-turbo"

def predict(message, history):

messages = []

for user_message, assistant_message in history:

messages.append({"role": "user", "content": user_message})

messages.append({"role": "assistant", "content": assistant_message})

messages.append({"role": "user", "content": message})

response = client.chat.completions.create(

model=model, messages=messages, stream=True

)

text = ""

for chunk in response:

content = chunk.choices[0].delta.content

if content:

text += content

yield text

js = """function () {

gradioURL = window.location.href

if (!gradioURL.endsWith('?__theme=dark')) {

window.location.replace(gradioURL + '?__theme=dark');

}

}"""

css = """

footer {

visibility: hidden;

}

full-height {

height: 100%;

}

"""

with gr.Blocks(theme=gr.themes.Soft(), js=js, css=css, fill_height=True) as demo:

gr.ChatInterface(

predict,

fill_height=True,

examples=[

"What is the capital of France?",

"Who was the first person on the moon?",

)

if __name__ == "__main__":

demo.launch()

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

server.py

Latest commit

History

server.py

File metadata and controls