""" Description : This file is related to GPT call, include the function of calling GPT and the function of running GPT in chatgpt mode Author : Ruidi Qiu (ruidi.qiu@tum.de) Time : 2023/11/17 15:01:06 LastEdited : 2024/9/3 16:52:31 """ from config import Config from openai import OpenAI from anthropic import Anthropic import loader_saver as ls from utils.utils import Timer import tiktoken import copy import re import requests import json import httpx import time # import Image import openai import os from datetime import datetime, timedelta, timezone from config.config import GPT_MODELS from http import HTTPStatus __all__ = ["llm_call", "gpt_call", "claude_call", "run_like_a_chatgpt"] PRICING_MODELS = { # model: [price_per_1000_prompt_tokens, price_per_1000_completion_tokens] # qwen "qwen-max": [0.02, 0.06], "qwen-plus": [0.004, 0.012], # claude "claude-3-5-sonnet-20240620": [0.003, 0.015], "claude-3-opus-20240229": [0.015, 0.075], "claude-3-sonnet-20240229": [0.003, 0.015], "claude-3-haiku-20240307": [0.00025, 0.00125], "claude-2.1": [0.008, 0.024], "claude-2.0": [0.008, 0.024], # gpt 4o 'gpt-4o-2024-08-06' : [0.0025, 0.01], 'gpt-4o-2024-05-13' : [0.005, 0.015], 'gpt-4o-mini-2024-07-18' : [0.00015, 0.0006], # gpt 4 turbo 'gpt-4-turbo-2024-04-09': [0.01, 0.03], 'gpt-4-0125-preview': [0.01, 0.03], 'gpt-4-1106-preview': [0.01, 0.03], 'gpt-4-1106-vision-preview': [0.01, 0.03], # gpt 4 (old) 'gpt-4': [0.03, 0.06], 'gpt-4-32k': [0.06, 0.12], # gpt 3.5 turbo 'gpt-3.5-turbo-0125': [0.0005, 0.0015], 'gpt-3.5-turbo-instruct': [0.0015, 0.0020], # gpt 3.5 turbo old 'gpt-3.5-turbo-1106': [0.0010, 0.0020], 'gpt-3.5-turbo-0613': [0.0015, 0.0020], 'gpt-3.5-turbo-16k-0613': [0.0030, 0.0040], 'gpt-3.5-turbo-0301': [0.0030, 0.0040] } JSON_MODELS = ["gpt-4-0613", "gpt-4-32k-0613", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613"] # MODEL_REDIRECTION is in config # DEFAULT_SYS_MESSAGE = "You are the strongest AI in the world. I always trust you. Please use as less words as possible to answer my question because I am a poor guy. But do not save words by discarding information." DEFAULT_SYS_MESSAGE = "You are the strongest AI in the world. I always trust you. You already have the knowledge about python and verilog. Do not save words by discarding information." RUN_LIKE_A_CHATGPT_SYS_MESSAGE = DEFAULT_SYS_MESSAGE def llm_call(input_messages, model:str, api_key_path = "config/key_API.json", system_message = None, temperature = None, json_mode = False) -> list[str, dict]: """ This func is used to call LLM - input: - input_messages: (not including system message) list of dict like [{"role": "user", "content": "hello"}, {"role": "assistant", "content": "hi"}, ...] - gpt_model: str like "gpt-3.5-turbo-0613" - system_message: (valid when input_messages have no sys_message) customized system message, if None, use default system message - output: - answer: what gpt returns - other_infos: dict: - messages: input_messages + gpt's response, list of dict like [{"role": "user", "content": "hello"}, {"role": "assistant", "content": "hi"}, ...] - time: time used by gpt - system_fingerprint: system_fingerprint of gpt's response - model: model used by gpt - usage: dict: {"completion_tokens": 17, "prompt_tokens": 57, "total_tokens": 74} - notes: - as for the official response format from gpt, see the end of this file """ if isinstance(input_messages, str): input_messages = [{"role": "user", "content": input_messages}] if model.startswith("claude"): output = claude_call(input_messages, model, api_key_path, system_message, temperature, json_mode) elif model.startswith("gpt") or model.startswith("qwen"): output = gpt_call(input_messages, model, api_key_path, system_message, temperature, json_mode) else: raise ValueError("model %s is not supported."%(model)) llm_manager.update_usage(output[1]["usage"]["prompt_tokens"], output[1]["usage"]["completion_tokens"], model) return output # def gpt_call(input_messages, model, api_key_path, system_message = None, temperature = None, json_mode = False): # """ # This func is used to call gpt # - input: # - input_messages: (not including system message) list of dict like [{"role": "user", "content": "hello"}, {"role": "assistant", "content": "hi"}, ...] # - gpt_model: str like "gpt-3.5-turbo-0613" # - system_message: (valid when input_messages have no sys_message) customized system message, if None, use default system message # - output: # - answer: what gpt returns # - other_infos: dict: # - messages: input_messages + gpt's response, list of dict like [{"role": "user", "content": "hello"}, {"role": "assistant", "content": "hi"}, ...] # - time: time used by gpt # - system_fingerprint: system_fingerprint of gpt's response # - model: model used by gpt # - usage: dict: {"completion_tokens": 17, "prompt_tokens": 57, "total_tokens": 74} # - notes: # - as for the official response format from gpt, see the end of this file # """ # client = enter_api_key(api_key_path) # # system message # has_sysmessage = False # for message in input_messages: # if message["role"] == "system": # has_sysmessage = True # break # if not has_sysmessage: # if system_message is None: # messages = [{"role": "system", "content": DEFAULT_SYS_MESSAGE}] # else: # messages = [{"role": "system", "content": system_message}] # else: # messages = [] # messages.extend(input_messages) # # other parameters # more_completion_kwargs = {} # if temperature is not None: # more_completion_kwargs["temperature"] = temperature # if json_mode: # if not model in JSON_MODELS: # more_completion_kwargs["response_format"] = {"type": "json_object"} # # call gpt # with Timer(print_en=False) as gpt_response: # completion = client.chat.completions.create( # model=model, # messages=messages, # **more_completion_kwargs # ) # answer = completion.choices[0].message.content # messages.append({"role": "assistant", "content": answer}) # time = round(gpt_response.interval, 2) # system_fingerprint = completion.system_fingerprint # usage = {"completion_tokens": completion.usage.completion_tokens, "prompt_tokens": completion.usage.prompt_tokens, "total_tokens": completion.usage.total_tokens} # model = completion.model # other_infos = {"messages": messages, "time": time, "system_fingerprint": system_fingerprint, "model": model, "usage": usage} # # return answer, messages, time, system_fingerprint # return answer, other_infos def gpt_call(input_messages, model, api_key_path, system_message=None, temperature=None, json_mode=False): """ GPT Call with Timeout Protection (10min connection, 30min read) """ # 1. System Message 处理 (保持原逻辑) has_sysmessage = False for message in input_messages: if message["role"] == "system": has_sysmessage = True break # if not has_sysmessage: # sys_content = system_message if system_message is not None else "You are a helpful assistant." # messages = [{"role": "system", "content": sys_content}] # else: # messages = [] # messages.extend(input_messages) if not has_sysmessage: sys_content = system_message if system_message is not None else "You are a helpful assistant." # [修复] 确保 sys_content 是字符串 if not isinstance(sys_content, str): print(f" [Warning] system_message is not string, got type: {type(sys_content)}") sys_content = str(sys_content) messages = [{"role": "system", "content": sys_content}] else: messages = [] messages.extend(input_messages) # [新增] 验证消息格式 for i, msg in enumerate(messages): if not isinstance(msg.get("content"), str): print(f" [Error] messages[{i}]['content'] is not string!") print(f" Type: {type(msg.get('content'))}") print(f" Value: {msg.get('content')}") # 修复:转换为字符串 messages[i]["content"] = str(msg.get("content")) # 2. 参数准备 (保持原逻辑) more_completion_kwargs = {} if temperature is not None: more_completion_kwargs["temperature"] = temperature if json_mode and "gpt" in model: # Qwen 有时不支持 json_object,加个判断 more_completion_kwargs["response_format"] = {"type": "json_object"} # 3. Client 初始化 (必须在这里重写以注入 httpx 超时) api_key = "" base_url = None # === 读取 Key (这里必须手动读,因为我们要配置 timeout) === try: with open(api_key_path, 'r') as f: keys = json.load(f) if model.startswith("qwen"): api_key = keys.get("dashscope") # 确保你的 json 里有 "dashscope" base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1" else: api_key = keys.get("openai") or keys.get("gpt") # 兼容 "openai" 或 "gpt" 键名 if not api_key: print(f"❌ [Error] API Key not found in {api_key_path} for model {model}!") return "", {} except Exception as e: print(f"❌ [Error] Failed to load API Key: {e}") return "", {} # === 配置超时 === http_client = httpx.Client( timeout=httpx.Timeout( connect=300.0, # 5分钟连不上就重试 read=1800.0, # 30分钟生成不完才断 (给足时间) write=60.0, pool=60.0 ) ) client = openai.OpenAI( api_key=api_key, base_url=base_url, http_client=http_client ) # 4. 调用循环 MAX_RETRIES = 5 answer = "" system_fingerprint = "" usage = {} time_used = 0.0 for attempt in range(MAX_RETRIES): try: with Timer(print_en=False) as gpt_response: completion = client.chat.completions.create( model=model, messages=messages, **more_completion_kwargs ) answer = completion.choices[0].message.content system_fingerprint = completion.system_fingerprint usage = { "completion_tokens": completion.usage.completion_tokens, "prompt_tokens": completion.usage.prompt_tokens, "total_tokens": completion.usage.total_tokens } time_used = round(gpt_response.interval, 2) # 调试打印,确认成功 if answer: print(f" [LLM] Success. Time: {time_used}s. Length: {len(answer)}") break # 成功则跳出 except httpx.ConnectTimeout: print(f"[Timeout] Connection failed (>5min). Retrying {attempt+1}/{MAX_RETRIES}...") time.sleep(5) except httpx.ReadTimeout: print(f" [Timeout] Generation too slow (>30min). Retrying {attempt+1}/{MAX_RETRIES}...") time.sleep(5) # except Exception as e: # print(f" [Error] Attempt {attempt+1} failed: {e}") # time.sleep(5) except Exception as e: error_msg = str(e) if 'RequestTimeOut' in error_msg or '500' in error_msg: wait_time = 15 * (attempt + 1) # ✅ 服务端超时特殊处理 print(f"🔄 [Server Timeout] API server busy. Retrying...") time.sleep(wait_time) else: wait_time = 5 * (attempt + 1) # ✅ 指数退避 print(f"⚠️ [Error] Attempt {attempt+1} failed: {e}") time.sleep(wait_time) # 5. 返回结果 (保持你的原格式) if answer: messages.append({"role": "assistant", "content": answer}) else: # 失败兜底,防止外部报错 usage = {"completion_tokens": 0, "prompt_tokens": 0, "total_tokens": 0} other_infos = { "messages": messages, "time": time_used, "system_fingerprint": system_fingerprint, "model": model, "usage": usage } return answer, other_infos def claude_call(input_messages, model, api_key_path, system_message = None, temperature = None, json_mode = False): """ This func is used to call gpt #### input: - input_messages: (not including system message) list of dict like [{"role": "user", "content": "hello"}, {"role": "assistant", "content": "hi"}, ...] - gpt_model: str like "gpt-3.5-turbo-0613" - config: config object - system_message: (valid when input_messages have no sys_message) customized system message, if None, use default system message #### output: - answer: what gpt returns - other_infos: dict: - messages: input_messages + gpt's response, list of dict like [{"role": "user", "content": "hello"}, {"role": "assistant", "content": "hi"}, ...] - time: time used by gpt - system_fingerprint: system_fingerprint of gpt's response - model: model used by gpt - usage: dict: {"completion_tokens": 17, "prompt_tokens": 57, "total_tokens": 74} #### notes: as for the official response format from gpt, see the end of this file """ client = enter_api_key(api_key_path, provider="anthropic") prefill = None # system message has_sysmessage = False for message in input_messages: if message["role"] == "system": has_sysmessage = True break if not has_sysmessage: if system_message is None: messages = [{"role": "system", "content": DEFAULT_SYS_MESSAGE}] else: messages = [{"role": "system", "content": system_message}] else: messages = [] messages.extend(input_messages) for message in messages: if message["role"] == "system": messages.remove(message) # delete the system message # other parameters more_completion_kwargs = {} if temperature is not None: more_completion_kwargs["temperature"] = temperature if json_mode: messages[-1]["content"] += "\nYour reply should be in JSON format." prefill = {"role": "assistant", "content": "{"} messages.append(prefill) # call claude if model == "claude-3-5-sonnet-20240620": max_tokens = 8192 else: max_tokens = 4096 with Timer(print_en=False) as gpt_response: completion = client.messages.create( max_tokens=max_tokens, model=model, messages=messages, **more_completion_kwargs ) answer = completion.content[0].text if prefill is not None: answer = prefill["content"] + answer messages.append({"role": "assistant", "content": answer}) time = round(gpt_response.interval, 2) system_fingerprint = "" usage = {"completion_tokens": completion.usage.output_tokens, "prompt_tokens": completion.usage.input_tokens, "total_tokens": completion.usage.input_tokens + completion.usage.output_tokens} other_infos = {"messages": messages, "time": time, "system_fingerprint": system_fingerprint, "model": model, "usage": usage} # return answer, messages, time, system_fingerprint return answer, other_infos def run_like_a_chatgpt(): config = Config() gpt_model = config.gpt.model gpt_key_path = config.gpt.key_path if config.gpt.chatgpt.start_form == 'prompt': preloaded_prompt = ls.load_txt(config.load.prompt.path) else: preloaded_prompt = None if gpt_model.startswith("gpt"): llm_name = "ChatGPT" elif gpt_model.startswith("claude"): llm_name = "Claude" else: llm_name = "LLM" # messages = [{"role": "system", "content": "You are a hardware code expert, skilled in understanding and generating verilog hardware language. You are the strongest AI hardware expert in the world. I totally believe you can fulfill the task I give you. You always give me the most detailed solution. Your reply should only contain code."}] messages = [{"role": "system", "content": RUN_LIKE_A_CHATGPT_SYS_MESSAGE}] response_data_dicts = [] # this is to record other data of gpt's response like seed and time while True: # load prompt if preloaded_prompt is not None: content = preloaded_prompt preloaded_prompt = None print("User (preloaded prompt): %s"%(content)) ls.save_log_line("(the first user message is from preloaded prompt)", config) else: content = input("User: ") # break loop if content in ["exit", "quit", "break", "", None]: break # send prompt to gpt messages.append({"role": "user", "content": content}) # run gpt answer, other_infos = llm_call( input_messages = messages, model = gpt_model, api_key_path = gpt_key_path, system_message = RUN_LIKE_A_CHATGPT_SYS_MESSAGE, temperature = config.gpt.temperature ) messages, time, system_fingerprint = other_infos["messages"], other_infos["time"], other_infos["system_fingerprint"] # get data from response data_dict = {} data_dict["system_fingerprint"] = system_fingerprint data_dict["model"] = gpt_model data_dict["time"] = time response_data_dicts.append(data_dict) # print print(f'{llm_name}: {answer}') print("(%ss used)" % (time)) if config.gpt.chatgpt.one_time_talk: break messages_plus = gen_messages_more_info(messages, response_data_dicts) if config.save.log.en: ls.save_messages_to_log(messages_plus, config) if config.save.message.en: ls.gpt_message_individual_save(messages, config, file_name="messages") ls.gpt_message_individual_save(messages_plus, config, file_name="messages_plus") # def enter_api_key(api_key_path, provider="openai"): # if provider == "openai": # key = ls.load_json_dict(api_key_path)["OPENAI_API_KEY"] # client = OpenAI(api_key=key) # elif provider == "anthropic": # key = ls.load_json_dict(api_key_path)["ANTHROPIC_API_KEY"] # client = Anthropic(api_key=key) # else: # raise ValueError("provider %s is not supported."%(provider)) # return client def enter_api_key(api_key_path, provider="openai"): if provider == "openai": # 1. 读取文件里的 Key (刚才第一步改好的) key = ls.load_json_dict(api_key_path)["OPENAI_API_KEY"] # 2. 关键:从环境变量获取 Base URL # 如果没有这一步,请求会发给 openai.com 导致 401 base_url = os.environ.get("OPENAI_BASE_URL") if base_url: print(f"DEBUG: Connecting to {base_url}...") # 加一行打印方便调试 client = OpenAI(api_key=key, base_url=base_url) else: client = OpenAI(api_key=key) elif provider == "anthropic": # Claude 部分保持原样 key = ls.load_json_dict(api_key_path)["ANTHROPIC_API_KEY"] client = Anthropic(api_key=key) else: raise ValueError("provider %s is not supported."%(provider)) return client def gen_messages_more_info(original_messages, response_data_dicts): # additional info only at: role = "assistant" messages = copy.deepcopy(original_messages) idx_response = 0 for i in range(len(messages)): if messages[i]["role"] == "assistant": # messages[i].extend(response_data_dicts[idx_response]) # wrong syntax messages[i] = {**messages[i], **response_data_dicts[idx_response]} idx_response += 1 # add idx to each message for i in range(len(messages)): messages[i]["idx"] = i return messages def dalle3(): """ This function hasn't been well packaged now we have the free dalle3 application: microsoft - copilot """ def download_image(url, folder_path): response = requests.get(url) file_path = os.path.join(folder_path, os.path.basename(url)) with open(file_path, "wb") as file: file.write(response.content) return file_path model_name = "dall-e-3" image_size = "1024x1024" # 1792x1024, 1024x1024, 1024x1792 download_folder = r"saves/dalle3/" os.makedirs(download_folder, exist_ok=True) while True: name = input("please name the generated figure (\"exit\" to exit): ") if name == "exit": break prompt = input("please input the prompt(\"exit\" to exit): ") if prompt == "exit": break try: # num_images = int(input("please input the number of figures (default=1):") or "1") num_images = 1 print("generating your figure...") # response = requests.post( # "https://api.openai-proxy.org/v1/images/generations", # headers={"Authorization": ""}, # json={"model": model_name, "size": image_size, "prompt": prompt, "n": num_images}, # ) client = enter_api_key('config/key_API.json') response = client.images.generate( model=model_name, prompt=prompt, size=image_size, quality="standard", n=num_images, ) # response.raise_for_status() # data = response.json()["data"] image_url = response.data[0].url # the name should end with .png file_name = name + ".png" file_path = download_image(image_url, download_folder) new_file_path = os.path.join(download_folder, file_name) os.rename(file_path, new_file_path) print("figure was downloaded to %s" %(new_file_path)) # file_path = download_image(image_url, download_folder) # print("图片已下载至:", file_path) # current_time = datetime.now(timezone.utc) + timedelta(hours=8) # current_time_str = current_time.strftime("%Y%m%d-%H%M") # for i, image in enumerate(data): # image_url = image["url"] # file_name = current_time_str + f"-{str(i+1).zfill(3)}.png" # file_path = download_image(image_url, download_folder) # new_file_path = os.path.join(download_folder, file_name) # os.rename(file_path, new_file_path) # print("图片已下载至:", new_file_path) except requests.exceptions.HTTPError as err: print("Request Error: ", err.response.text) except Exception as e: print("Error: ", str(e)) ############### utils of gpt ############### def num_tokens_from_string(string: str, model_name="gpt-4") -> int: """ Returns the number of tokens in a single text string. https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb """ try: encoding = tiktoken.encoding_for_model(model_name) except KeyError: encoding = tiktoken.get_encoding("cl100k_base") num_tokens = len(encoding.encode(string)) return num_tokens def num_tokens_from_messages(messages, model="gpt-3.5-turbo-0613"): """Returns the number of tokens used by a list of messages.""" try: encoding = tiktoken.encoding_for_model(model) except KeyError: encoding = tiktoken.get_encoding("cl100k_base") if model in PRICING_MODELS.keys(): num_tokens = 0 for message in messages: num_tokens += 4 # every message follows {role/name}\n{content}\n for key, value in message.items(): num_tokens += len(encoding.encode(value)) if key == "name": # if there's a name, the role is omitted num_tokens += -1 # role is always required and always 1 token num_tokens += 2 # every reply is primed with assistant return num_tokens else: raise NotImplementedError(f"""num_tokens_from_messages() is not presently implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.""") # def extract_code(text, code_type): # """ # #### function: # - extract code from text # #### input: # - text: str, gpt's response # - code_type: str, like "verilog" # #### output: # - list of found code blocks # """ # code_type = code_type.lower() # start = "```" + code_type # end = "```" # verilog_blocks = re.findall(start + r'\s*(.*?)'+ end, text, re.DOTALL) # if verilog_blocks: # return verilog_blocks # else: # # return [""] # return [text] def extract_code(text, code_type): """ [增强版] 能够处理带 verilog 标签、不带标签、甚至带中文解释的情况 """ # 打印原始回复,让你在终端直接能看到 Qwen 到底回了什么 (调试神器) print("\n[DEBUG] Raw LLM Response:\n", text) print("-" * 30) # 1. 尝试匹配 Markdown 代码块 (``` ... ```) # 正则解释:找 ``` 开头,中间可能跟着语言名(如verilog),然后是内容,最后是 ``` pattern = r"```(?:\w+)?\s*(.*?)```" matches = re.findall(pattern, text, re.DOTALL) if matches: # 如果找到了代码块,只返回代码块里的内容 return matches # 2. 如果没找到 Markdown 标记,可能是纯代码,但也可能包含 "module" 关键字 # 我们尝试只提取 module ... endmodule 之间的内容 (简易版) if "module" in text and "endmodule" in text: # 这是一个非常粗暴但有效的兜底:找到第一个 module 和最后一个 endmodule start_idx = text.find("module") end_idx = text.rfind("endmodule") + len("endmodule") clean_code = text[start_idx:end_idx] return [clean_code] # 3. 实在没招了,返回原始内容 (之前的逻辑) return [text] def get_dict_from_gpt_json(gpt_json_string): """ - this function is used to get the dict from the gpt json string """ gpt_json_string = gpt_json_string.replace("```json", "").replace("```", "").strip() print(gpt_json_string) return json.loads(gpt_json_string) def cost_calculator(usages:list, model="gpt-4-0125-preview"): """ - this function is used to calculate the price of gpt - usage: list of dicts, [{"completion_tokens": 17, "prompt_tokens": 57, "total_tokens": 74}, ...] """ if model not in PRICING_MODELS: raise ValueError(f"model {model} is not supported in the pricing calculator.") price = 0 for usage in usages: price += usage["prompt_tokens"] * PRICING_MODELS[model][0] / 1000.0 + usage["completion_tokens"] * PRICING_MODELS[model][1] / 1000.0 return price def message_to_conversation(messages): """ - this function is used to convert messages to conversation """ conversation = "" for message in messages: if message["role"] == "system": conversation += "############################## conversation begin ##############################\n" conversation += '########## %s ##########\n%s\n\n' % (message['role'], message['content']) return conversation class LLM_Manager: _instance = None _initialized = False def __new__(cls, *args, **kwargs): if not cls._instance: cls._instance = super(LLM_Manager, cls).__new__(cls) return cls._instance def __init__(self, api_key="config/key_API.json") -> None: if not self._initialized: # total self.tokens_in_total = 0 self.tokens_out_total = 0 self.tokens_both_total = 0 self.cost_total = 0 # section self.tokens_in_section = 0 self.tokens_out_section = 0 self.tokens_both_section = 0 self.cost_section = 0 # dict {"model1": {}, "model2": {}, ...} self.usage_info = {} # chat self._llm_model_now = None self._temperature = None self.messages = [{"role": "system", "content": DEFAULT_SYS_MESSAGE}] if not os.path.exists(api_key): raise ValueError(f"api key path {api_key} is not valid.") self._api_key_path = api_key def update_usage(self, tokens_in:int, tokens_out:int, model:str): cost = tokens_in * PRICING_MODELS[model][0] / 1000.0 + tokens_out * PRICING_MODELS[model][1] / 1000.0 # dict if model not in self.usage_info.keys(): self.usage_info[model] = {"tokens_in": 0, "tokens_out": 0, "tokens_both": 0, "cost": 0} self.usage_info[model]["tokens_in"] += tokens_in self.usage_info[model]["tokens_out"] += tokens_out self.usage_info[model]["tokens_both"] += tokens_in + tokens_out self.usage_info[model]["cost"] += cost # total self.tokens_in_total += tokens_in self.tokens_out_total += tokens_out self.tokens_both_total += tokens_in + tokens_out self.cost_total += cost # section self.tokens_in_section += tokens_in self.tokens_out_section += tokens_out self.tokens_both_section += tokens_in + tokens_out self.cost_section += cost def new_section(self): """ new usage section (only reset the tokens and cost of the section) """ self.tokens_in_section = 0 self.tokens_out_section = 0 self.tokens_both_section = 0 self.cost_section = 0 def set_model(self, model:str): self._llm_model_now = model def set_temperature(self, temperature:float): self._temperature = temperature def chat(self, prompt:str, clear_mem:bool=False, model:str=None, temperature:float=None, sys_prompt:str=DEFAULT_SYS_MESSAGE)->str: model = self._llm_model_now if model is None else model temperature = self._temperature if temperature is None else temperature if clear_mem: self.messages = [{"role": "system", "content": sys_prompt}] self.messages.append({"role": "user", "content": prompt}) answer, other_infos = llm_call( input_messages = self.messages, model = model, api_key_path = self._api_key_path, temperature = temperature ) # usage already updated in llm_call # update messages self.messages = other_infos["messages"] return answer llm_manager = LLM_Manager() if __name__ == "__main__": # print("GPT_call.py does not support running as a main file.") print('we are testing dalle3') dalle3() """ (see more in https://platform.openai.com/docs/guides/text-generation/chat-completions-api) An example Chat Completions API response looks as follows: { "choices": [ { "finish_reason": "stop", "index": 0, "message": { "content": "The 2020 World Series was played in Texas at Globe Life Field in Arlington.", "role": "assistant" } } ], "created": 1677664795, "id": "chatcmpl-7QyqpwdfhqwajicIEznoc6Q47XAyW", "model": "gpt-3.5-turbo-0613", "object": "chat.completion", "usage": { "completion_tokens": 17, "prompt_tokens": 57, "total_tokens": 74 } } """