Files
TBgen_App/LLM_call.py
2026-03-30 16:46:48 +08:00

799 lines
32 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Description : This file is related to GPT call, include the function of calling GPT and the function of running GPT in chatgpt mode
Author : Ruidi Qiu (ruidi.qiu@tum.de)
Time : 2023/11/17 15:01:06
LastEdited : 2024/9/3 16:52:31
"""
from config import Config
from openai import OpenAI
from anthropic import Anthropic
import loader_saver as ls
from utils.utils import Timer
import tiktoken
import copy
import re
import requests
import json
import httpx
import time
# import Image
import openai
import os
from datetime import datetime, timedelta, timezone
from config.config import GPT_MODELS
from http import HTTPStatus
__all__ = ["llm_call", "gpt_call", "claude_call", "run_like_a_chatgpt"]
PRICING_MODELS = {
# model: [price_per_1000_prompt_tokens, price_per_1000_completion_tokens]
# qwen
"qwen-max": [0.02, 0.06],
"qwen-plus": [0.004, 0.012],
# claude
"claude-3-5-sonnet-20240620": [0.003, 0.015],
"claude-3-opus-20240229": [0.015, 0.075],
"claude-3-sonnet-20240229": [0.003, 0.015],
"claude-3-haiku-20240307": [0.00025, 0.00125],
"claude-2.1": [0.008, 0.024],
"claude-2.0": [0.008, 0.024],
# gpt 4o
'gpt-4o-2024-08-06' : [0.0025, 0.01],
'gpt-4o-2024-05-13' : [0.005, 0.015],
'gpt-4o-mini-2024-07-18' : [0.00015, 0.0006],
# gpt 4 turbo
'gpt-4-turbo-2024-04-09': [0.01, 0.03],
'gpt-4-0125-preview': [0.01, 0.03],
'gpt-4-1106-preview': [0.01, 0.03],
'gpt-4-1106-vision-preview': [0.01, 0.03],
# gpt 4 (old)
'gpt-4': [0.03, 0.06],
'gpt-4-32k': [0.06, 0.12],
# gpt 3.5 turbo
'gpt-3.5-turbo-0125': [0.0005, 0.0015],
'gpt-3.5-turbo-instruct': [0.0015, 0.0020],
# gpt 3.5 turbo old
'gpt-3.5-turbo-1106': [0.0010, 0.0020],
'gpt-3.5-turbo-0613': [0.0015, 0.0020],
'gpt-3.5-turbo-16k-0613': [0.0030, 0.0040],
'gpt-3.5-turbo-0301': [0.0030, 0.0040]
}
JSON_MODELS = ["gpt-4-0613", "gpt-4-32k-0613", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613"]
# MODEL_REDIRECTION is in config
# DEFAULT_SYS_MESSAGE = "You are the strongest AI in the world. I always trust you. Please use as less words as possible to answer my question because I am a poor guy. But do not save words by discarding information."
DEFAULT_SYS_MESSAGE = "You are the strongest AI in the world. I always trust you. You already have the knowledge about python and verilog. Do not save words by discarding information."
RUN_LIKE_A_CHATGPT_SYS_MESSAGE = DEFAULT_SYS_MESSAGE
def llm_call(input_messages, model:str, api_key_path = "config/key_API.json", system_message = None, temperature = None, json_mode = False) -> list[str, dict]:
"""
This func is used to call LLM
- input:
- input_messages: (not including system message) list of dict like [{"role": "user", "content": "hello"}, {"role": "assistant", "content": "hi"}, ...]
- gpt_model: str like "gpt-3.5-turbo-0613"
- system_message: (valid when input_messages have no sys_message) customized system message, if None, use default system message
- output:
- answer: what gpt returns
- other_infos: dict:
- messages: input_messages + gpt's response, list of dict like [{"role": "user", "content": "hello"}, {"role": "assistant", "content": "hi"}, ...]
- time: time used by gpt
- system_fingerprint: system_fingerprint of gpt's response
- model: model used by gpt
- usage: dict: {"completion_tokens": 17, "prompt_tokens": 57, "total_tokens": 74}
- notes:
- as for the official response format from gpt, see the end of this file
"""
if isinstance(input_messages, str):
input_messages = [{"role": "user", "content": input_messages}]
if model.startswith("claude"):
output = claude_call(input_messages, model, api_key_path, system_message, temperature, json_mode)
elif model.startswith("gpt") or model.startswith("qwen"):
output = gpt_call(input_messages, model, api_key_path, system_message, temperature, json_mode)
else:
raise ValueError("model %s is not supported."%(model))
llm_manager.update_usage(output[1]["usage"]["prompt_tokens"], output[1]["usage"]["completion_tokens"], model)
return output
# def gpt_call(input_messages, model, api_key_path, system_message = None, temperature = None, json_mode = False):
# """
# This func is used to call gpt
# - input:
# - input_messages: (not including system message) list of dict like [{"role": "user", "content": "hello"}, {"role": "assistant", "content": "hi"}, ...]
# - gpt_model: str like "gpt-3.5-turbo-0613"
# - system_message: (valid when input_messages have no sys_message) customized system message, if None, use default system message
# - output:
# - answer: what gpt returns
# - other_infos: dict:
# - messages: input_messages + gpt's response, list of dict like [{"role": "user", "content": "hello"}, {"role": "assistant", "content": "hi"}, ...]
# - time: time used by gpt
# - system_fingerprint: system_fingerprint of gpt's response
# - model: model used by gpt
# - usage: dict: {"completion_tokens": 17, "prompt_tokens": 57, "total_tokens": 74}
# - notes:
# - as for the official response format from gpt, see the end of this file
# """
# client = enter_api_key(api_key_path)
# # system message
# has_sysmessage = False
# for message in input_messages:
# if message["role"] == "system":
# has_sysmessage = True
# break
# if not has_sysmessage:
# if system_message is None:
# messages = [{"role": "system", "content": DEFAULT_SYS_MESSAGE}]
# else:
# messages = [{"role": "system", "content": system_message}]
# else:
# messages = []
# messages.extend(input_messages)
# # other parameters
# more_completion_kwargs = {}
# if temperature is not None:
# more_completion_kwargs["temperature"] = temperature
# if json_mode:
# if not model in JSON_MODELS:
# more_completion_kwargs["response_format"] = {"type": "json_object"}
# # call gpt
# with Timer(print_en=False) as gpt_response:
# completion = client.chat.completions.create(
# model=model,
# messages=messages,
# **more_completion_kwargs
# )
# answer = completion.choices[0].message.content
# messages.append({"role": "assistant", "content": answer})
# time = round(gpt_response.interval, 2)
# system_fingerprint = completion.system_fingerprint
# usage = {"completion_tokens": completion.usage.completion_tokens, "prompt_tokens": completion.usage.prompt_tokens, "total_tokens": completion.usage.total_tokens}
# model = completion.model
# other_infos = {"messages": messages, "time": time, "system_fingerprint": system_fingerprint, "model": model, "usage": usage}
# # return answer, messages, time, system_fingerprint
# return answer, other_infos
def gpt_call(input_messages, model, api_key_path, system_message=None, temperature=None, json_mode=False):
"""
GPT Call with Timeout Protection (10min connection, 30min read)
"""
# 1. System Message 处理 (保持原逻辑)
has_sysmessage = False
for message in input_messages:
if message["role"] == "system":
has_sysmessage = True
break
# if not has_sysmessage:
# sys_content = system_message if system_message is not None else "You are a helpful assistant."
# messages = [{"role": "system", "content": sys_content}]
# else:
# messages = []
# messages.extend(input_messages)
if not has_sysmessage:
sys_content = system_message if system_message is not None else "You are a helpful assistant."
# [修复] 确保 sys_content 是字符串
if not isinstance(sys_content, str):
print(f" [Warning] system_message is not string, got type: {type(sys_content)}")
sys_content = str(sys_content)
messages = [{"role": "system", "content": sys_content}]
else:
messages = []
messages.extend(input_messages)
# [新增] 验证消息格式
for i, msg in enumerate(messages):
if not isinstance(msg.get("content"), str):
print(f" [Error] messages[{i}]['content'] is not string!")
print(f" Type: {type(msg.get('content'))}")
print(f" Value: {msg.get('content')}")
# 修复:转换为字符串
messages[i]["content"] = str(msg.get("content"))
# 2. 参数准备 (保持原逻辑)
more_completion_kwargs = {}
if temperature is not None:
more_completion_kwargs["temperature"] = temperature
if json_mode and "gpt" in model: # Qwen 有时不支持 json_object加个判断
more_completion_kwargs["response_format"] = {"type": "json_object"}
# 3. Client 初始化 (必须在这里重写以注入 httpx 超时)
api_key = ""
base_url = None
# === 读取 Key (这里必须手动读,因为我们要配置 timeout) ===
try:
with open(api_key_path, 'r') as f:
keys = json.load(f)
if model.startswith("qwen"):
api_key = keys.get("dashscope") # 确保你的 json 里有 "dashscope"
base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1"
else:
api_key = keys.get("openai") or keys.get("gpt") # 兼容 "openai" 或 "gpt" 键名
if not api_key:
print(f"❌ [Error] API Key not found in {api_key_path} for model {model}!")
return "", {}
except Exception as e:
print(f"❌ [Error] Failed to load API Key: {e}")
return "", {}
# === 配置超时 ===
http_client = httpx.Client(
timeout=httpx.Timeout(
connect=300.0, # 5分钟连不上就重试
read=1800.0, # 30分钟生成不完才断 (给足时间)
write=60.0,
pool=60.0
)
)
client = openai.OpenAI(
api_key=api_key,
base_url=base_url,
http_client=http_client
)
# 4. 调用循环
MAX_RETRIES = 5
answer = ""
system_fingerprint = ""
usage = {}
time_used = 0.0
for attempt in range(MAX_RETRIES):
try:
with Timer(print_en=False) as gpt_response:
completion = client.chat.completions.create(
model=model,
messages=messages,
**more_completion_kwargs
)
answer = completion.choices[0].message.content
system_fingerprint = completion.system_fingerprint
usage = {
"completion_tokens": completion.usage.completion_tokens,
"prompt_tokens": completion.usage.prompt_tokens,
"total_tokens": completion.usage.total_tokens
}
time_used = round(gpt_response.interval, 2)
# 调试打印,确认成功
if answer:
print(f" [LLM] Success. Time: {time_used}s. Length: {len(answer)}")
break # 成功则跳出
except httpx.ConnectTimeout:
print(f"[Timeout] Connection failed (>5min). Retrying {attempt+1}/{MAX_RETRIES}...")
time.sleep(5)
except httpx.ReadTimeout:
print(f" [Timeout] Generation too slow (>30min). Retrying {attempt+1}/{MAX_RETRIES}...")
time.sleep(5)
# except Exception as e:
# print(f" [Error] Attempt {attempt+1} failed: {e}")
# time.sleep(5)
except Exception as e:
error_msg = str(e)
if 'RequestTimeOut' in error_msg or '500' in error_msg:
wait_time = 15 * (attempt + 1) # ✅ 服务端超时特殊处理
print(f"🔄 [Server Timeout] API server busy. Retrying...")
time.sleep(wait_time)
else:
wait_time = 5 * (attempt + 1) # ✅ 指数退避
print(f"⚠️ [Error] Attempt {attempt+1} failed: {e}")
time.sleep(wait_time)
# 5. 返回结果 (保持你的原格式)
if answer:
messages.append({"role": "assistant", "content": answer})
else:
# 失败兜底,防止外部报错
usage = {"completion_tokens": 0, "prompt_tokens": 0, "total_tokens": 0}
other_infos = {
"messages": messages,
"time": time_used,
"system_fingerprint": system_fingerprint,
"model": model,
"usage": usage
}
return answer, other_infos
def claude_call(input_messages, model, api_key_path, system_message = None, temperature = None, json_mode = False):
"""
This func is used to call gpt
#### input:
- input_messages: (not including system message) list of dict like [{"role": "user", "content": "hello"}, {"role": "assistant", "content": "hi"}, ...]
- gpt_model: str like "gpt-3.5-turbo-0613"
- config: config object
- system_message: (valid when input_messages have no sys_message) customized system message, if None, use default system message
#### output:
- answer: what gpt returns
- other_infos: dict:
- messages: input_messages + gpt's response, list of dict like [{"role": "user", "content": "hello"}, {"role": "assistant", "content": "hi"}, ...]
- time: time used by gpt
- system_fingerprint: system_fingerprint of gpt's response
- model: model used by gpt
- usage: dict: {"completion_tokens": 17, "prompt_tokens": 57, "total_tokens": 74}
#### notes:
as for the official response format from gpt, see the end of this file
"""
client = enter_api_key(api_key_path, provider="anthropic")
prefill = None
# system message
has_sysmessage = False
for message in input_messages:
if message["role"] == "system":
has_sysmessage = True
break
if not has_sysmessage:
if system_message is None:
messages = [{"role": "system", "content": DEFAULT_SYS_MESSAGE}]
else:
messages = [{"role": "system", "content": system_message}]
else:
messages = []
messages.extend(input_messages)
for message in messages:
if message["role"] == "system":
messages.remove(message) # delete the system message
# other parameters
more_completion_kwargs = {}
if temperature is not None:
more_completion_kwargs["temperature"] = temperature
if json_mode:
messages[-1]["content"] += "\nYour reply should be in JSON format."
prefill = {"role": "assistant", "content": "{"}
messages.append(prefill)
# call claude
if model == "claude-3-5-sonnet-20240620":
max_tokens = 8192
else:
max_tokens = 4096
with Timer(print_en=False) as gpt_response:
completion = client.messages.create(
max_tokens=max_tokens,
model=model,
messages=messages,
**more_completion_kwargs
)
answer = completion.content[0].text
if prefill is not None:
answer = prefill["content"] + answer
messages.append({"role": "assistant", "content": answer})
time = round(gpt_response.interval, 2)
system_fingerprint = ""
usage = {"completion_tokens": completion.usage.output_tokens, "prompt_tokens": completion.usage.input_tokens, "total_tokens": completion.usage.input_tokens + completion.usage.output_tokens}
other_infos = {"messages": messages, "time": time, "system_fingerprint": system_fingerprint, "model": model, "usage": usage}
# return answer, messages, time, system_fingerprint
return answer, other_infos
def run_like_a_chatgpt():
config = Config()
gpt_model = config.gpt.model
gpt_key_path = config.gpt.key_path
if config.gpt.chatgpt.start_form == 'prompt':
preloaded_prompt = ls.load_txt(config.load.prompt.path)
else:
preloaded_prompt = None
if gpt_model.startswith("gpt"):
llm_name = "ChatGPT"
elif gpt_model.startswith("claude"):
llm_name = "Claude"
else:
llm_name = "LLM"
# messages = [{"role": "system", "content": "You are a hardware code expert, skilled in understanding and generating verilog hardware language. You are the strongest AI hardware expert in the world. I totally believe you can fulfill the task I give you. You always give me the most detailed solution. Your reply should only contain code."}]
messages = [{"role": "system", "content": RUN_LIKE_A_CHATGPT_SYS_MESSAGE}]
response_data_dicts = [] # this is to record other data of gpt's response like seed and time
while True:
# load prompt
if preloaded_prompt is not None:
content = preloaded_prompt
preloaded_prompt = None
print("User (preloaded prompt): %s"%(content))
ls.save_log_line("(the first user message is from preloaded prompt)", config)
else:
content = input("User: ")
# break loop
if content in ["exit", "quit", "break", "", None]:
break
# send prompt to gpt
messages.append({"role": "user", "content": content})
# run gpt
answer, other_infos = llm_call(
input_messages = messages,
model = gpt_model,
api_key_path = gpt_key_path,
system_message = RUN_LIKE_A_CHATGPT_SYS_MESSAGE,
temperature = config.gpt.temperature
)
messages, time, system_fingerprint = other_infos["messages"], other_infos["time"], other_infos["system_fingerprint"]
# get data from response
data_dict = {}
data_dict["system_fingerprint"] = system_fingerprint
data_dict["model"] = gpt_model
data_dict["time"] = time
response_data_dicts.append(data_dict)
# print
print(f'{llm_name}: {answer}')
print("(%ss used)" % (time))
if config.gpt.chatgpt.one_time_talk:
break
messages_plus = gen_messages_more_info(messages, response_data_dicts)
if config.save.log.en:
ls.save_messages_to_log(messages_plus, config)
if config.save.message.en:
ls.gpt_message_individual_save(messages, config, file_name="messages")
ls.gpt_message_individual_save(messages_plus, config, file_name="messages_plus")
# def enter_api_key(api_key_path, provider="openai"):
# if provider == "openai":
# key = ls.load_json_dict(api_key_path)["OPENAI_API_KEY"]
# client = OpenAI(api_key=key)
# elif provider == "anthropic":
# key = ls.load_json_dict(api_key_path)["ANTHROPIC_API_KEY"]
# client = Anthropic(api_key=key)
# else:
# raise ValueError("provider %s is not supported."%(provider))
# return client
def enter_api_key(api_key_path, provider="openai"):
if provider == "openai":
# 1. 读取文件里的 Key (刚才第一步改好的)
key = ls.load_json_dict(api_key_path)["OPENAI_API_KEY"]
# 2. 关键:从环境变量获取 Base URL
# 如果没有这一步,请求会发给 openai.com 导致 401
base_url = os.environ.get("OPENAI_BASE_URL")
if base_url:
print(f"DEBUG: Connecting to {base_url}...") # 加一行打印方便调试
client = OpenAI(api_key=key, base_url=base_url)
else:
client = OpenAI(api_key=key)
elif provider == "anthropic":
# Claude 部分保持原样
key = ls.load_json_dict(api_key_path)["ANTHROPIC_API_KEY"]
client = Anthropic(api_key=key)
else:
raise ValueError("provider %s is not supported."%(provider))
return client
def gen_messages_more_info(original_messages, response_data_dicts):
# additional info only at: role = "assistant"
messages = copy.deepcopy(original_messages)
idx_response = 0
for i in range(len(messages)):
if messages[i]["role"] == "assistant":
# messages[i].extend(response_data_dicts[idx_response]) # wrong syntax
messages[i] = {**messages[i], **response_data_dicts[idx_response]}
idx_response += 1
# add idx to each message
for i in range(len(messages)):
messages[i]["idx"] = i
return messages
def dalle3():
"""
This function hasn't been well packaged
now we have the free dalle3 application: microsoft - copilot
"""
def download_image(url, folder_path):
response = requests.get(url)
file_path = os.path.join(folder_path, os.path.basename(url))
with open(file_path, "wb") as file:
file.write(response.content)
return file_path
model_name = "dall-e-3"
image_size = "1024x1024" # 1792x1024, 1024x1024, 1024x1792
download_folder = r"saves/dalle3/"
os.makedirs(download_folder, exist_ok=True)
while True:
name = input("please name the generated figure (\"exit\" to exit): ")
if name == "exit":
break
prompt = input("please input the prompt(\"exit\" to exit): ")
if prompt == "exit":
break
try:
# num_images = int(input("please input the number of figures (default=1)") or "1")
num_images = 1
print("generating your figure...")
# response = requests.post(
# "https://api.openai-proxy.org/v1/images/generations",
# headers={"Authorization": ""},
# json={"model": model_name, "size": image_size, "prompt": prompt, "n": num_images},
# )
client = enter_api_key('config/key_API.json')
response = client.images.generate(
model=model_name,
prompt=prompt,
size=image_size,
quality="standard",
n=num_images,
)
# response.raise_for_status()
# data = response.json()["data"]
image_url = response.data[0].url
# the name should end with .png
file_name = name + ".png"
file_path = download_image(image_url, download_folder)
new_file_path = os.path.join(download_folder, file_name)
os.rename(file_path, new_file_path)
print("figure was downloaded to %s" %(new_file_path))
# file_path = download_image(image_url, download_folder)
# print("图片已下载至:", file_path)
# current_time = datetime.now(timezone.utc) + timedelta(hours=8)
# current_time_str = current_time.strftime("%Y%m%d-%H%M")
# for i, image in enumerate(data):
# image_url = image["url"]
# file_name = current_time_str + f"-{str(i+1).zfill(3)}.png"
# file_path = download_image(image_url, download_folder)
# new_file_path = os.path.join(download_folder, file_name)
# os.rename(file_path, new_file_path)
# print("图片已下载至:", new_file_path)
except requests.exceptions.HTTPError as err:
print("Request Error: ", err.response.text)
except Exception as e:
print("Error: ", str(e))
############### utils of gpt ###############
def num_tokens_from_string(string: str, model_name="gpt-4") -> int:
"""
Returns the number of tokens in a single text string.
https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
"""
try:
encoding = tiktoken.encoding_for_model(model_name)
except KeyError:
encoding = tiktoken.get_encoding("cl100k_base")
num_tokens = len(encoding.encode(string))
return num_tokens
def num_tokens_from_messages(messages, model="gpt-3.5-turbo-0613"):
"""Returns the number of tokens used by a list of messages."""
try:
encoding = tiktoken.encoding_for_model(model)
except KeyError:
encoding = tiktoken.get_encoding("cl100k_base")
if model in PRICING_MODELS.keys():
num_tokens = 0
for message in messages:
num_tokens += 4 # every message follows <im_start>{role/name}\n{content}<im_end>\n
for key, value in message.items():
num_tokens += len(encoding.encode(value))
if key == "name": # if there's a name, the role is omitted
num_tokens += -1 # role is always required and always 1 token
num_tokens += 2 # every reply is primed with <im_start>assistant
return num_tokens
else:
raise NotImplementedError(f"""num_tokens_from_messages() is not presently implemented for model {model}.
See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.""")
# def extract_code(text, code_type):
# """
# #### function:
# - extract code from text
# #### input:
# - text: str, gpt's response
# - code_type: str, like "verilog"
# #### output:
# - list of found code blocks
# """
# code_type = code_type.lower()
# start = "```" + code_type
# end = "```"
# verilog_blocks = re.findall(start + r'\s*(.*?)'+ end, text, re.DOTALL)
# if verilog_blocks:
# return verilog_blocks
# else:
# # return [""]
# return [text]
def extract_code(text, code_type):
"""
[增强版] 能够处理带 verilog 标签、不带标签、甚至带中文解释的情况
"""
# 打印原始回复,让你在终端直接能看到 Qwen 到底回了什么 (调试神器)
print("\n[DEBUG] Raw LLM Response:\n", text)
print("-" * 30)
# 1. 尝试匹配 Markdown 代码块 (``` ... ```)
# 正则解释:找 ``` 开头,中间可能跟着语言名(如verilog),然后是内容,最后是 ```
pattern = r"```(?:\w+)?\s*(.*?)```"
matches = re.findall(pattern, text, re.DOTALL)
if matches:
# 如果找到了代码块,只返回代码块里的内容
return matches
# 2. 如果没找到 Markdown 标记,可能是纯代码,但也可能包含 "module" 关键字
# 我们尝试只提取 module ... endmodule 之间的内容 (简易版)
if "module" in text and "endmodule" in text:
# 这是一个非常粗暴但有效的兜底:找到第一个 module 和最后一个 endmodule
start_idx = text.find("module")
end_idx = text.rfind("endmodule") + len("endmodule")
clean_code = text[start_idx:end_idx]
return [clean_code]
# 3. 实在没招了,返回原始内容 (之前的逻辑)
return [text]
def get_dict_from_gpt_json(gpt_json_string):
"""
- this function is used to get the dict from the gpt json string
"""
gpt_json_string = gpt_json_string.replace("```json", "").replace("```", "").strip()
print(gpt_json_string)
return json.loads(gpt_json_string)
def cost_calculator(usages:list, model="gpt-4-0125-preview"):
"""
- this function is used to calculate the price of gpt
- usage: list of dicts, [{"completion_tokens": 17, "prompt_tokens": 57, "total_tokens": 74}, ...]
"""
if model not in PRICING_MODELS:
raise ValueError(f"model {model} is not supported in the pricing calculator.")
price = 0
for usage in usages:
price += usage["prompt_tokens"] * PRICING_MODELS[model][0] / 1000.0 + usage["completion_tokens"] * PRICING_MODELS[model][1] / 1000.0
return price
def message_to_conversation(messages):
"""
- this function is used to convert messages to conversation
"""
conversation = ""
for message in messages:
if message["role"] == "system":
conversation += "############################## conversation begin ##############################\n"
conversation += '########## %s ##########\n%s\n\n' % (message['role'], message['content'])
return conversation
class LLM_Manager:
_instance = None
_initialized = False
def __new__(cls, *args, **kwargs):
if not cls._instance:
cls._instance = super(LLM_Manager, cls).__new__(cls)
return cls._instance
def __init__(self, api_key="config/key_API.json") -> None:
if not self._initialized:
# total
self.tokens_in_total = 0
self.tokens_out_total = 0
self.tokens_both_total = 0
self.cost_total = 0
# section
self.tokens_in_section = 0
self.tokens_out_section = 0
self.tokens_both_section = 0
self.cost_section = 0
# dict {"model1": {}, "model2": {}, ...}
self.usage_info = {}
# chat
self._llm_model_now = None
self._temperature = None
self.messages = [{"role": "system", "content": DEFAULT_SYS_MESSAGE}]
if not os.path.exists(api_key):
raise ValueError(f"api key path {api_key} is not valid.")
self._api_key_path = api_key
def update_usage(self, tokens_in:int, tokens_out:int, model:str):
cost = tokens_in * PRICING_MODELS[model][0] / 1000.0 + tokens_out * PRICING_MODELS[model][1] / 1000.0
# dict
if model not in self.usage_info.keys():
self.usage_info[model] = {"tokens_in": 0, "tokens_out": 0, "tokens_both": 0, "cost": 0}
self.usage_info[model]["tokens_in"] += tokens_in
self.usage_info[model]["tokens_out"] += tokens_out
self.usage_info[model]["tokens_both"] += tokens_in + tokens_out
self.usage_info[model]["cost"] += cost
# total
self.tokens_in_total += tokens_in
self.tokens_out_total += tokens_out
self.tokens_both_total += tokens_in + tokens_out
self.cost_total += cost
# section
self.tokens_in_section += tokens_in
self.tokens_out_section += tokens_out
self.tokens_both_section += tokens_in + tokens_out
self.cost_section += cost
def new_section(self):
"""
new usage section (only reset the tokens and cost of the section)
"""
self.tokens_in_section = 0
self.tokens_out_section = 0
self.tokens_both_section = 0
self.cost_section = 0
def set_model(self, model:str):
self._llm_model_now = model
def set_temperature(self, temperature:float):
self._temperature = temperature
def chat(self, prompt:str, clear_mem:bool=False, model:str=None, temperature:float=None, sys_prompt:str=DEFAULT_SYS_MESSAGE)->str:
model = self._llm_model_now if model is None else model
temperature = self._temperature if temperature is None else temperature
if clear_mem:
self.messages = [{"role": "system", "content": sys_prompt}]
self.messages.append({"role": "user", "content": prompt})
answer, other_infos = llm_call(
input_messages = self.messages,
model = model,
api_key_path = self._api_key_path,
temperature = temperature
) # usage already updated in llm_call
# update messages
self.messages = other_infos["messages"]
return answer
llm_manager = LLM_Manager()
if __name__ == "__main__":
# print("GPT_call.py does not support running as a main file.")
print('we are testing dalle3')
dalle3()
"""
(see more in https://platform.openai.com/docs/guides/text-generation/chat-completions-api)
An example Chat Completions API response looks as follows:
{
"choices": [
{
"finish_reason": "stop",
"index": 0,
"message": {
"content": "The 2020 World Series was played in Texas at Globe Life Field in Arlington.",
"role": "assistant"
}
}
],
"created": 1677664795,
"id": "chatcmpl-7QyqpwdfhqwajicIEznoc6Q47XAyW",
"model": "gpt-3.5-turbo-0613",
"object": "chat.completion",
"usage": {
"completion_tokens": 17,
"prompt_tokens": 57,
"total_tokens": 74
}
}
"""