Files
TBgen_App/LLM_call.py

799 lines
32 KiB
Python
Raw Normal View History

2026-03-30 16:46:48 +08:00
"""
Description : This file is related to GPT call, include the function of calling GPT and the function of running GPT in chatgpt mode
Author : Ruidi Qiu (ruidi.qiu@tum.de)
Time : 2023/11/17 15:01:06
LastEdited : 2024/9/3 16:52:31
"""
from config import Config
from openai import OpenAI
from anthropic import Anthropic
import loader_saver as ls
from utils.utils import Timer
import tiktoken
import copy
import re
import requests
import json
import httpx
import time
# import Image
import openai
import os
from datetime import datetime, timedelta, timezone
from config.config import GPT_MODELS
from http import HTTPStatus
__all__ = ["llm_call", "gpt_call", "claude_call", "run_like_a_chatgpt"]
PRICING_MODELS = {
# model: [price_per_1000_prompt_tokens, price_per_1000_completion_tokens]
# qwen
"qwen-max": [0.02, 0.06],
"qwen-plus": [0.004, 0.012],
# claude
"claude-3-5-sonnet-20240620": [0.003, 0.015],
"claude-3-opus-20240229": [0.015, 0.075],
"claude-3-sonnet-20240229": [0.003, 0.015],
"claude-3-haiku-20240307": [0.00025, 0.00125],
"claude-2.1": [0.008, 0.024],
"claude-2.0": [0.008, 0.024],
# gpt 4o
'gpt-4o-2024-08-06' : [0.0025, 0.01],
'gpt-4o-2024-05-13' : [0.005, 0.015],
'gpt-4o-mini-2024-07-18' : [0.00015, 0.0006],
# gpt 4 turbo
'gpt-4-turbo-2024-04-09': [0.01, 0.03],
'gpt-4-0125-preview': [0.01, 0.03],
'gpt-4-1106-preview': [0.01, 0.03],
'gpt-4-1106-vision-preview': [0.01, 0.03],
# gpt 4 (old)
'gpt-4': [0.03, 0.06],
'gpt-4-32k': [0.06, 0.12],
# gpt 3.5 turbo
'gpt-3.5-turbo-0125': [0.0005, 0.0015],
'gpt-3.5-turbo-instruct': [0.0015, 0.0020],
# gpt 3.5 turbo old
'gpt-3.5-turbo-1106': [0.0010, 0.0020],
'gpt-3.5-turbo-0613': [0.0015, 0.0020],
'gpt-3.5-turbo-16k-0613': [0.0030, 0.0040],
'gpt-3.5-turbo-0301': [0.0030, 0.0040]
}
JSON_MODELS = ["gpt-4-0613", "gpt-4-32k-0613", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613"]
# MODEL_REDIRECTION is in config
# DEFAULT_SYS_MESSAGE = "You are the strongest AI in the world. I always trust you. Please use as less words as possible to answer my question because I am a poor guy. But do not save words by discarding information."
DEFAULT_SYS_MESSAGE = "You are the strongest AI in the world. I always trust you. You already have the knowledge about python and verilog. Do not save words by discarding information."
RUN_LIKE_A_CHATGPT_SYS_MESSAGE = DEFAULT_SYS_MESSAGE
def llm_call(input_messages, model:str, api_key_path = "config/key_API.json", system_message = None, temperature = None, json_mode = False) -> list[str, dict]:
"""
This func is used to call LLM
- input:
- input_messages: (not including system message) list of dict like [{"role": "user", "content": "hello"}, {"role": "assistant", "content": "hi"}, ...]
- gpt_model: str like "gpt-3.5-turbo-0613"
- system_message: (valid when input_messages have no sys_message) customized system message, if None, use default system message
- output:
- answer: what gpt returns
- other_infos: dict:
- messages: input_messages + gpt's response, list of dict like [{"role": "user", "content": "hello"}, {"role": "assistant", "content": "hi"}, ...]
- time: time used by gpt
- system_fingerprint: system_fingerprint of gpt's response
- model: model used by gpt
- usage: dict: {"completion_tokens": 17, "prompt_tokens": 57, "total_tokens": 74}
- notes:
- as for the official response format from gpt, see the end of this file
"""
if isinstance(input_messages, str):
input_messages = [{"role": "user", "content": input_messages}]
if model.startswith("claude"):
output = claude_call(input_messages, model, api_key_path, system_message, temperature, json_mode)
elif model.startswith("gpt") or model.startswith("qwen"):
output = gpt_call(input_messages, model, api_key_path, system_message, temperature, json_mode)
else:
raise ValueError("model %s is not supported."%(model))
llm_manager.update_usage(output[1]["usage"]["prompt_tokens"], output[1]["usage"]["completion_tokens"], model)
return output
# def gpt_call(input_messages, model, api_key_path, system_message = None, temperature = None, json_mode = False):
# """
# This func is used to call gpt
# - input:
# - input_messages: (not including system message) list of dict like [{"role": "user", "content": "hello"}, {"role": "assistant", "content": "hi"}, ...]
# - gpt_model: str like "gpt-3.5-turbo-0613"
# - system_message: (valid when input_messages have no sys_message) customized system message, if None, use default system message
# - output:
# - answer: what gpt returns
# - other_infos: dict:
# - messages: input_messages + gpt's response, list of dict like [{"role": "user", "content": "hello"}, {"role": "assistant", "content": "hi"}, ...]
# - time: time used by gpt
# - system_fingerprint: system_fingerprint of gpt's response
# - model: model used by gpt
# - usage: dict: {"completion_tokens": 17, "prompt_tokens": 57, "total_tokens": 74}
# - notes:
# - as for the official response format from gpt, see the end of this file
# """
# client = enter_api_key(api_key_path)
# # system message
# has_sysmessage = False
# for message in input_messages:
# if message["role"] == "system":
# has_sysmessage = True
# break
# if not has_sysmessage:
# if system_message is None:
# messages = [{"role": "system", "content": DEFAULT_SYS_MESSAGE}]
# else:
# messages = [{"role": "system", "content": system_message}]
# else:
# messages = []
# messages.extend(input_messages)
# # other parameters
# more_completion_kwargs = {}
# if temperature is not None:
# more_completion_kwargs["temperature"] = temperature
# if json_mode:
# if not model in JSON_MODELS:
# more_completion_kwargs["response_format"] = {"type": "json_object"}
# # call gpt
# with Timer(print_en=False) as gpt_response:
# completion = client.chat.completions.create(
# model=model,
# messages=messages,
# **more_completion_kwargs
# )
# answer = completion.choices[0].message.content
# messages.append({"role": "assistant", "content": answer})
# time = round(gpt_response.interval, 2)
# system_fingerprint = completion.system_fingerprint
# usage = {"completion_tokens": completion.usage.completion_tokens, "prompt_tokens": completion.usage.prompt_tokens, "total_tokens": completion.usage.total_tokens}
# model = completion.model
# other_infos = {"messages": messages, "time": time, "system_fingerprint": system_fingerprint, "model": model, "usage": usage}
# # return answer, messages, time, system_fingerprint
# return answer, other_infos
def gpt_call(input_messages, model, api_key_path, system_message=None, temperature=None, json_mode=False):
"""
GPT Call with Timeout Protection (10min connection, 30min read)
"""
# 1. System Message 处理 (保持原逻辑)
has_sysmessage = False
for message in input_messages:
if message["role"] == "system":
has_sysmessage = True
break
# if not has_sysmessage:
# sys_content = system_message if system_message is not None else "You are a helpful assistant."
# messages = [{"role": "system", "content": sys_content}]
# else:
# messages = []
# messages.extend(input_messages)
if not has_sysmessage:
sys_content = system_message if system_message is not None else "You are a helpful assistant."
# [修复] 确保 sys_content 是字符串
if not isinstance(sys_content, str):
print(f" [Warning] system_message is not string, got type: {type(sys_content)}")
sys_content = str(sys_content)
messages = [{"role": "system", "content": sys_content}]
else:
messages = []
messages.extend(input_messages)
# [新增] 验证消息格式
for i, msg in enumerate(messages):
if not isinstance(msg.get("content"), str):
print(f" [Error] messages[{i}]['content'] is not string!")
print(f" Type: {type(msg.get('content'))}")
print(f" Value: {msg.get('content')}")
# 修复:转换为字符串
messages[i]["content"] = str(msg.get("content"))
# 2. 参数准备 (保持原逻辑)
more_completion_kwargs = {}
if temperature is not None:
more_completion_kwargs["temperature"] = temperature
if json_mode and "gpt" in model: # Qwen 有时不支持 json_object加个判断
more_completion_kwargs["response_format"] = {"type": "json_object"}
# 3. Client 初始化 (必须在这里重写以注入 httpx 超时)
api_key = ""
base_url = None
# === 读取 Key (这里必须手动读,因为我们要配置 timeout) ===
try:
with open(api_key_path, 'r') as f:
keys = json.load(f)
if model.startswith("qwen"):
api_key = keys.get("dashscope") # 确保你的 json 里有 "dashscope"
base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1"
else:
api_key = keys.get("openai") or keys.get("gpt") # 兼容 "openai" 或 "gpt" 键名
if not api_key:
print(f"❌ [Error] API Key not found in {api_key_path} for model {model}!")
return "", {}
except Exception as e:
print(f"❌ [Error] Failed to load API Key: {e}")
return "", {}
# === 配置超时 ===
http_client = httpx.Client(
timeout=httpx.Timeout(
connect=300.0, # 5分钟连不上就重试
read=1800.0, # 30分钟生成不完才断 (给足时间)
write=60.0,
pool=60.0
)
)
client = openai.OpenAI(
api_key=api_key,
base_url=base_url,
http_client=http_client
)
# 4. 调用循环
MAX_RETRIES = 5
answer = ""
system_fingerprint = ""
usage = {}
time_used = 0.0
for attempt in range(MAX_RETRIES):
try:
with Timer(print_en=False) as gpt_response:
completion = client.chat.completions.create(
model=model,
messages=messages,
**more_completion_kwargs
)
answer = completion.choices[0].message.content
system_fingerprint = completion.system_fingerprint
usage = {
"completion_tokens": completion.usage.completion_tokens,
"prompt_tokens": completion.usage.prompt_tokens,
"total_tokens": completion.usage.total_tokens
}
time_used = round(gpt_response.interval, 2)
# 调试打印,确认成功
if answer:
print(f" [LLM] Success. Time: {time_used}s. Length: {len(answer)}")
break # 成功则跳出
except httpx.ConnectTimeout:
print(f"[Timeout] Connection failed (>5min). Retrying {attempt+1}/{MAX_RETRIES}...")
time.sleep(5)
except httpx.ReadTimeout:
print(f" [Timeout] Generation too slow (>30min). Retrying {attempt+1}/{MAX_RETRIES}...")
time.sleep(5)
# except Exception as e:
# print(f" [Error] Attempt {attempt+1} failed: {e}")
# time.sleep(5)
except Exception as e:
error_msg = str(e)
if 'RequestTimeOut' in error_msg or '500' in error_msg:
wait_time = 15 * (attempt + 1) # ✅ 服务端超时特殊处理
print(f"🔄 [Server Timeout] API server busy. Retrying...")
time.sleep(wait_time)
else:
wait_time = 5 * (attempt + 1) # ✅ 指数退避
print(f"⚠️ [Error] Attempt {attempt+1} failed: {e}")
time.sleep(wait_time)
# 5. 返回结果 (保持你的原格式)
if answer:
messages.append({"role": "assistant", "content": answer})
else:
# 失败兜底,防止外部报错
usage = {"completion_tokens": 0, "prompt_tokens": 0, "total_tokens": 0}
other_infos = {
"messages": messages,
"time": time_used,
"system_fingerprint": system_fingerprint,
"model": model,
"usage": usage
}
return answer, other_infos
def claude_call(input_messages, model, api_key_path, system_message = None, temperature = None, json_mode = False):
"""
This func is used to call gpt
#### input:
- input_messages: (not including system message) list of dict like [{"role": "user", "content": "hello"}, {"role": "assistant", "content": "hi"}, ...]
- gpt_model: str like "gpt-3.5-turbo-0613"
- config: config object
- system_message: (valid when input_messages have no sys_message) customized system message, if None, use default system message
#### output:
- answer: what gpt returns
- other_infos: dict:
- messages: input_messages + gpt's response, list of dict like [{"role": "user", "content": "hello"}, {"role": "assistant", "content": "hi"}, ...]
- time: time used by gpt
- system_fingerprint: system_fingerprint of gpt's response
- model: model used by gpt
- usage: dict: {"completion_tokens": 17, "prompt_tokens": 57, "total_tokens": 74}
#### notes:
as for the official response format from gpt, see the end of this file
"""
client = enter_api_key(api_key_path, provider="anthropic")
prefill = None
# system message
has_sysmessage = False
for message in input_messages:
if message["role"] == "system":
has_sysmessage = True
break
if not has_sysmessage:
if system_message is None:
messages = [{"role": "system", "content": DEFAULT_SYS_MESSAGE}]
else:
messages = [{"role": "system", "content": system_message}]
else:
messages = []
messages.extend(input_messages)
for message in messages:
if message["role"] == "system":
messages.remove(message) # delete the system message
# other parameters
more_completion_kwargs = {}
if temperature is not None:
more_completion_kwargs["temperature"] = temperature
if json_mode:
messages[-1]["content"] += "\nYour reply should be in JSON format."
prefill = {"role": "assistant", "content": "{"}
messages.append(prefill)
# call claude
if model == "claude-3-5-sonnet-20240620":
max_tokens = 8192
else:
max_tokens = 4096
with Timer(print_en=False) as gpt_response:
completion = client.messages.create(
max_tokens=max_tokens,
model=model,
messages=messages,
**more_completion_kwargs
)
answer = completion.content[0].text
if prefill is not None:
answer = prefill["content"] + answer
messages.append({"role": "assistant", "content": answer})
time = round(gpt_response.interval, 2)
system_fingerprint = ""
usage = {"completion_tokens": completion.usage.output_tokens, "prompt_tokens": completion.usage.input_tokens, "total_tokens": completion.usage.input_tokens + completion.usage.output_tokens}
other_infos = {"messages": messages, "time": time, "system_fingerprint": system_fingerprint, "model": model, "usage": usage}
# return answer, messages, time, system_fingerprint
return answer, other_infos
def run_like_a_chatgpt():
config = Config()
gpt_model = config.gpt.model
gpt_key_path = config.gpt.key_path
if config.gpt.chatgpt.start_form == 'prompt':
preloaded_prompt = ls.load_txt(config.load.prompt.path)
else:
preloaded_prompt = None
if gpt_model.startswith("gpt"):
llm_name = "ChatGPT"
elif gpt_model.startswith("claude"):
llm_name = "Claude"
else:
llm_name = "LLM"
# messages = [{"role": "system", "content": "You are a hardware code expert, skilled in understanding and generating verilog hardware language. You are the strongest AI hardware expert in the world. I totally believe you can fulfill the task I give you. You always give me the most detailed solution. Your reply should only contain code."}]
messages = [{"role": "system", "content": RUN_LIKE_A_CHATGPT_SYS_MESSAGE}]
response_data_dicts = [] # this is to record other data of gpt's response like seed and time
while True:
# load prompt
if preloaded_prompt is not None:
content = preloaded_prompt
preloaded_prompt = None
print("User (preloaded prompt): %s"%(content))
ls.save_log_line("(the first user message is from preloaded prompt)", config)
else:
content = input("User: ")
# break loop
if content in ["exit", "quit", "break", "", None]:
break
# send prompt to gpt
messages.append({"role": "user", "content": content})
# run gpt
answer, other_infos = llm_call(
input_messages = messages,
model = gpt_model,
api_key_path = gpt_key_path,
system_message = RUN_LIKE_A_CHATGPT_SYS_MESSAGE,
temperature = config.gpt.temperature
)
messages, time, system_fingerprint = other_infos["messages"], other_infos["time"], other_infos["system_fingerprint"]
# get data from response
data_dict = {}
data_dict["system_fingerprint"] = system_fingerprint
data_dict["model"] = gpt_model
data_dict["time"] = time
response_data_dicts.append(data_dict)
# print
print(f'{llm_name}: {answer}')
print("(%ss used)" % (time))
if config.gpt.chatgpt.one_time_talk:
break
messages_plus = gen_messages_more_info(messages, response_data_dicts)
if config.save.log.en:
ls.save_messages_to_log(messages_plus, config)
if config.save.message.en:
ls.gpt_message_individual_save(messages, config, file_name="messages")
ls.gpt_message_individual_save(messages_plus, config, file_name="messages_plus")
# def enter_api_key(api_key_path, provider="openai"):
# if provider == "openai":
# key = ls.load_json_dict(api_key_path)["OPENAI_API_KEY"]
# client = OpenAI(api_key=key)
# elif provider == "anthropic":
# key = ls.load_json_dict(api_key_path)["ANTHROPIC_API_KEY"]
# client = Anthropic(api_key=key)
# else:
# raise ValueError("provider %s is not supported."%(provider))
# return client
def enter_api_key(api_key_path, provider="openai"):
if provider == "openai":
# 1. 读取文件里的 Key (刚才第一步改好的)
key = ls.load_json_dict(api_key_path)["OPENAI_API_KEY"]
# 2. 关键:从环境变量获取 Base URL
# 如果没有这一步,请求会发给 openai.com 导致 401
base_url = os.environ.get("OPENAI_BASE_URL")
if base_url:
print(f"DEBUG: Connecting to {base_url}...") # 加一行打印方便调试
client = OpenAI(api_key=key, base_url=base_url)
else:
client = OpenAI(api_key=key)
elif provider == "anthropic":
# Claude 部分保持原样
key = ls.load_json_dict(api_key_path)["ANTHROPIC_API_KEY"]
client = Anthropic(api_key=key)
else:
raise ValueError("provider %s is not supported."%(provider))
return client
def gen_messages_more_info(original_messages, response_data_dicts):
# additional info only at: role = "assistant"
messages = copy.deepcopy(original_messages)
idx_response = 0
for i in range(len(messages)):
if messages[i]["role"] == "assistant":
# messages[i].extend(response_data_dicts[idx_response]) # wrong syntax
messages[i] = {**messages[i], **response_data_dicts[idx_response]}
idx_response += 1
# add idx to each message
for i in range(len(messages)):
messages[i]["idx"] = i
return messages
def dalle3():
"""
This function hasn't been well packaged
now we have the free dalle3 application: microsoft - copilot
"""
def download_image(url, folder_path):
response = requests.get(url)
file_path = os.path.join(folder_path, os.path.basename(url))
with open(file_path, "wb") as file:
file.write(response.content)
return file_path
model_name = "dall-e-3"
image_size = "1024x1024" # 1792x1024, 1024x1024, 1024x1792
download_folder = r"saves/dalle3/"
os.makedirs(download_folder, exist_ok=True)
while True:
name = input("please name the generated figure (\"exit\" to exit): ")
if name == "exit":
break
prompt = input("please input the prompt(\"exit\" to exit): ")
if prompt == "exit":
break
try:
# num_images = int(input("please input the number of figures (default=1)") or "1")
num_images = 1
print("generating your figure...")
# response = requests.post(
# "https://api.openai-proxy.org/v1/images/generations",
# headers={"Authorization": ""},
# json={"model": model_name, "size": image_size, "prompt": prompt, "n": num_images},
# )
client = enter_api_key('config/key_API.json')
response = client.images.generate(
model=model_name,
prompt=prompt,
size=image_size,
quality="standard",
n=num_images,
)
# response.raise_for_status()
# data = response.json()["data"]
image_url = response.data[0].url
# the name should end with .png
file_name = name + ".png"
file_path = download_image(image_url, download_folder)
new_file_path = os.path.join(download_folder, file_name)
os.rename(file_path, new_file_path)
print("figure was downloaded to %s" %(new_file_path))
# file_path = download_image(image_url, download_folder)
# print("图片已下载至:", file_path)
# current_time = datetime.now(timezone.utc) + timedelta(hours=8)
# current_time_str = current_time.strftime("%Y%m%d-%H%M")
# for i, image in enumerate(data):
# image_url = image["url"]
# file_name = current_time_str + f"-{str(i+1).zfill(3)}.png"
# file_path = download_image(image_url, download_folder)
# new_file_path = os.path.join(download_folder, file_name)
# os.rename(file_path, new_file_path)
# print("图片已下载至:", new_file_path)
except requests.exceptions.HTTPError as err:
print("Request Error: ", err.response.text)
except Exception as e:
print("Error: ", str(e))
############### utils of gpt ###############
def num_tokens_from_string(string: str, model_name="gpt-4") -> int:
"""
Returns the number of tokens in a single text string.
https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
"""
try:
encoding = tiktoken.encoding_for_model(model_name)
except KeyError:
encoding = tiktoken.get_encoding("cl100k_base")
num_tokens = len(encoding.encode(string))
return num_tokens
def num_tokens_from_messages(messages, model="gpt-3.5-turbo-0613"):
"""Returns the number of tokens used by a list of messages."""
try:
encoding = tiktoken.encoding_for_model(model)
except KeyError:
encoding = tiktoken.get_encoding("cl100k_base")
if model in PRICING_MODELS.keys():
num_tokens = 0
for message in messages:
num_tokens += 4 # every message follows <im_start>{role/name}\n{content}<im_end>\n
for key, value in message.items():
num_tokens += len(encoding.encode(value))
if key == "name": # if there's a name, the role is omitted
num_tokens += -1 # role is always required and always 1 token
num_tokens += 2 # every reply is primed with <im_start>assistant
return num_tokens
else:
raise NotImplementedError(f"""num_tokens_from_messages() is not presently implemented for model {model}.
See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.""")
# def extract_code(text, code_type):
# """
# #### function:
# - extract code from text
# #### input:
# - text: str, gpt's response
# - code_type: str, like "verilog"
# #### output:
# - list of found code blocks
# """
# code_type = code_type.lower()
# start = "```" + code_type
# end = "```"
# verilog_blocks = re.findall(start + r'\s*(.*?)'+ end, text, re.DOTALL)
# if verilog_blocks:
# return verilog_blocks
# else:
# # return [""]
# return [text]
def extract_code(text, code_type):
"""
[增强版] 能够处理带 verilog 标签不带标签甚至带中文解释的情况
"""
# 打印原始回复,让你在终端直接能看到 Qwen 到底回了什么 (调试神器)
print("\n[DEBUG] Raw LLM Response:\n", text)
print("-" * 30)
# 1. 尝试匹配 Markdown 代码块 (``` ... ```)
# 正则解释:找 ``` 开头,中间可能跟着语言名(如verilog),然后是内容,最后是 ```
pattern = r"```(?:\w+)?\s*(.*?)```"
matches = re.findall(pattern, text, re.DOTALL)
if matches:
# 如果找到了代码块,只返回代码块里的内容
return matches
# 2. 如果没找到 Markdown 标记,可能是纯代码,但也可能包含 "module" 关键字
# 我们尝试只提取 module ... endmodule 之间的内容 (简易版)
if "module" in text and "endmodule" in text:
# 这是一个非常粗暴但有效的兜底:找到第一个 module 和最后一个 endmodule
start_idx = text.find("module")
end_idx = text.rfind("endmodule") + len("endmodule")
clean_code = text[start_idx:end_idx]
return [clean_code]
# 3. 实在没招了,返回原始内容 (之前的逻辑)
return [text]
def get_dict_from_gpt_json(gpt_json_string):
"""
- this function is used to get the dict from the gpt json string
"""
gpt_json_string = gpt_json_string.replace("```json", "").replace("```", "").strip()
print(gpt_json_string)
return json.loads(gpt_json_string)
def cost_calculator(usages:list, model="gpt-4-0125-preview"):
"""
- this function is used to calculate the price of gpt
- usage: list of dicts, [{"completion_tokens": 17, "prompt_tokens": 57, "total_tokens": 74}, ...]
"""
if model not in PRICING_MODELS:
raise ValueError(f"model {model} is not supported in the pricing calculator.")
price = 0
for usage in usages:
price += usage["prompt_tokens"] * PRICING_MODELS[model][0] / 1000.0 + usage["completion_tokens"] * PRICING_MODELS[model][1] / 1000.0
return price
def message_to_conversation(messages):
"""
- this function is used to convert messages to conversation
"""
conversation = ""
for message in messages:
if message["role"] == "system":
conversation += "############################## conversation begin ##############################\n"
conversation += '########## %s ##########\n%s\n\n' % (message['role'], message['content'])
return conversation
class LLM_Manager:
_instance = None
_initialized = False
def __new__(cls, *args, **kwargs):
if not cls._instance:
cls._instance = super(LLM_Manager, cls).__new__(cls)
return cls._instance
def __init__(self, api_key="config/key_API.json") -> None:
if not self._initialized:
# total
self.tokens_in_total = 0
self.tokens_out_total = 0
self.tokens_both_total = 0
self.cost_total = 0
# section
self.tokens_in_section = 0
self.tokens_out_section = 0
self.tokens_both_section = 0
self.cost_section = 0
# dict {"model1": {}, "model2": {}, ...}
self.usage_info = {}
# chat
self._llm_model_now = None
self._temperature = None
self.messages = [{"role": "system", "content": DEFAULT_SYS_MESSAGE}]
if not os.path.exists(api_key):
raise ValueError(f"api key path {api_key} is not valid.")
self._api_key_path = api_key
def update_usage(self, tokens_in:int, tokens_out:int, model:str):
cost = tokens_in * PRICING_MODELS[model][0] / 1000.0 + tokens_out * PRICING_MODELS[model][1] / 1000.0
# dict
if model not in self.usage_info.keys():
self.usage_info[model] = {"tokens_in": 0, "tokens_out": 0, "tokens_both": 0, "cost": 0}
self.usage_info[model]["tokens_in"] += tokens_in
self.usage_info[model]["tokens_out"] += tokens_out
self.usage_info[model]["tokens_both"] += tokens_in + tokens_out
self.usage_info[model]["cost"] += cost
# total
self.tokens_in_total += tokens_in
self.tokens_out_total += tokens_out
self.tokens_both_total += tokens_in + tokens_out
self.cost_total += cost
# section
self.tokens_in_section += tokens_in
self.tokens_out_section += tokens_out
self.tokens_both_section += tokens_in + tokens_out
self.cost_section += cost
def new_section(self):
"""
new usage section (only reset the tokens and cost of the section)
"""
self.tokens_in_section = 0
self.tokens_out_section = 0
self.tokens_both_section = 0
self.cost_section = 0
def set_model(self, model:str):
self._llm_model_now = model
def set_temperature(self, temperature:float):
self._temperature = temperature
def chat(self, prompt:str, clear_mem:bool=False, model:str=None, temperature:float=None, sys_prompt:str=DEFAULT_SYS_MESSAGE)->str:
model = self._llm_model_now if model is None else model
temperature = self._temperature if temperature is None else temperature
if clear_mem:
self.messages = [{"role": "system", "content": sys_prompt}]
self.messages.append({"role": "user", "content": prompt})
answer, other_infos = llm_call(
input_messages = self.messages,
model = model,
api_key_path = self._api_key_path,
temperature = temperature
) # usage already updated in llm_call
# update messages
self.messages = other_infos["messages"]
return answer
llm_manager = LLM_Manager()
if __name__ == "__main__":
# print("GPT_call.py does not support running as a main file.")
print('we are testing dalle3')
dalle3()
"""
(see more in https://platform.openai.com/docs/guides/text-generation/chat-completions-api)
An example Chat Completions API response looks as follows:
{
"choices": [
{
"finish_reason": "stop",
"index": 0,
"message": {
"content": "The 2020 World Series was played in Texas at Globe Life Field in Arlington.",
"role": "assistant"
}
}
],
"created": 1677664795,
"id": "chatcmpl-7QyqpwdfhqwajicIEznoc6Q47XAyW",
"model": "gpt-3.5-turbo-0613",
"object": "chat.completion",
"usage": {
"completion_tokens": 17,
"prompt_tokens": 57,
"total_tokens": 74
}
}
"""