799 lines
32 KiB
Python
799 lines
32 KiB
Python
|
|
"""
|
|||
|
|
Description : This file is related to GPT call, include the function of calling GPT and the function of running GPT in chatgpt mode
|
|||
|
|
Author : Ruidi Qiu (ruidi.qiu@tum.de)
|
|||
|
|
Time : 2023/11/17 15:01:06
|
|||
|
|
LastEdited : 2024/9/3 16:52:31
|
|||
|
|
"""
|
|||
|
|
from config import Config
|
|||
|
|
from openai import OpenAI
|
|||
|
|
from anthropic import Anthropic
|
|||
|
|
import loader_saver as ls
|
|||
|
|
from utils.utils import Timer
|
|||
|
|
import tiktoken
|
|||
|
|
import copy
|
|||
|
|
import re
|
|||
|
|
import requests
|
|||
|
|
import json
|
|||
|
|
import httpx
|
|||
|
|
import time
|
|||
|
|
# import Image
|
|||
|
|
import openai
|
|||
|
|
import os
|
|||
|
|
from datetime import datetime, timedelta, timezone
|
|||
|
|
from config.config import GPT_MODELS
|
|||
|
|
from http import HTTPStatus
|
|||
|
|
|
|||
|
|
|
|||
|
|
__all__ = ["llm_call", "gpt_call", "claude_call", "run_like_a_chatgpt"]
|
|||
|
|
|
|||
|
|
PRICING_MODELS = {
|
|||
|
|
# model: [price_per_1000_prompt_tokens, price_per_1000_completion_tokens]
|
|||
|
|
# qwen
|
|||
|
|
"qwen-max": [0.02, 0.06],
|
|||
|
|
"qwen-plus": [0.004, 0.012],
|
|||
|
|
# claude
|
|||
|
|
"claude-3-5-sonnet-20240620": [0.003, 0.015],
|
|||
|
|
"claude-3-opus-20240229": [0.015, 0.075],
|
|||
|
|
"claude-3-sonnet-20240229": [0.003, 0.015],
|
|||
|
|
"claude-3-haiku-20240307": [0.00025, 0.00125],
|
|||
|
|
"claude-2.1": [0.008, 0.024],
|
|||
|
|
"claude-2.0": [0.008, 0.024],
|
|||
|
|
# gpt 4o
|
|||
|
|
'gpt-4o-2024-08-06' : [0.0025, 0.01],
|
|||
|
|
'gpt-4o-2024-05-13' : [0.005, 0.015],
|
|||
|
|
'gpt-4o-mini-2024-07-18' : [0.00015, 0.0006],
|
|||
|
|
# gpt 4 turbo
|
|||
|
|
'gpt-4-turbo-2024-04-09': [0.01, 0.03],
|
|||
|
|
'gpt-4-0125-preview': [0.01, 0.03],
|
|||
|
|
'gpt-4-1106-preview': [0.01, 0.03],
|
|||
|
|
'gpt-4-1106-vision-preview': [0.01, 0.03],
|
|||
|
|
# gpt 4 (old)
|
|||
|
|
'gpt-4': [0.03, 0.06],
|
|||
|
|
'gpt-4-32k': [0.06, 0.12],
|
|||
|
|
# gpt 3.5 turbo
|
|||
|
|
'gpt-3.5-turbo-0125': [0.0005, 0.0015],
|
|||
|
|
'gpt-3.5-turbo-instruct': [0.0015, 0.0020],
|
|||
|
|
# gpt 3.5 turbo old
|
|||
|
|
'gpt-3.5-turbo-1106': [0.0010, 0.0020],
|
|||
|
|
'gpt-3.5-turbo-0613': [0.0015, 0.0020],
|
|||
|
|
'gpt-3.5-turbo-16k-0613': [0.0030, 0.0040],
|
|||
|
|
'gpt-3.5-turbo-0301': [0.0030, 0.0040]
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
JSON_MODELS = ["gpt-4-0613", "gpt-4-32k-0613", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613"]
|
|||
|
|
|
|||
|
|
# MODEL_REDIRECTION is in config
|
|||
|
|
|
|||
|
|
# DEFAULT_SYS_MESSAGE = "You are the strongest AI in the world. I always trust you. Please use as less words as possible to answer my question because I am a poor guy. But do not save words by discarding information."
|
|||
|
|
DEFAULT_SYS_MESSAGE = "You are the strongest AI in the world. I always trust you. You already have the knowledge about python and verilog. Do not save words by discarding information."
|
|||
|
|
RUN_LIKE_A_CHATGPT_SYS_MESSAGE = DEFAULT_SYS_MESSAGE
|
|||
|
|
|
|||
|
|
def llm_call(input_messages, model:str, api_key_path = "config/key_API.json", system_message = None, temperature = None, json_mode = False) -> list[str, dict]:
|
|||
|
|
"""
|
|||
|
|
This func is used to call LLM
|
|||
|
|
- input:
|
|||
|
|
- input_messages: (not including system message) list of dict like [{"role": "user", "content": "hello"}, {"role": "assistant", "content": "hi"}, ...]
|
|||
|
|
- gpt_model: str like "gpt-3.5-turbo-0613"
|
|||
|
|
- system_message: (valid when input_messages have no sys_message) customized system message, if None, use default system message
|
|||
|
|
- output:
|
|||
|
|
- answer: what gpt returns
|
|||
|
|
- other_infos: dict:
|
|||
|
|
- messages: input_messages + gpt's response, list of dict like [{"role": "user", "content": "hello"}, {"role": "assistant", "content": "hi"}, ...]
|
|||
|
|
- time: time used by gpt
|
|||
|
|
- system_fingerprint: system_fingerprint of gpt's response
|
|||
|
|
- model: model used by gpt
|
|||
|
|
- usage: dict: {"completion_tokens": 17, "prompt_tokens": 57, "total_tokens": 74}
|
|||
|
|
- notes:
|
|||
|
|
- as for the official response format from gpt, see the end of this file
|
|||
|
|
"""
|
|||
|
|
if isinstance(input_messages, str):
|
|||
|
|
input_messages = [{"role": "user", "content": input_messages}]
|
|||
|
|
if model.startswith("claude"):
|
|||
|
|
output = claude_call(input_messages, model, api_key_path, system_message, temperature, json_mode)
|
|||
|
|
elif model.startswith("gpt") or model.startswith("qwen"):
|
|||
|
|
output = gpt_call(input_messages, model, api_key_path, system_message, temperature, json_mode)
|
|||
|
|
else:
|
|||
|
|
raise ValueError("model %s is not supported."%(model))
|
|||
|
|
llm_manager.update_usage(output[1]["usage"]["prompt_tokens"], output[1]["usage"]["completion_tokens"], model)
|
|||
|
|
return output
|
|||
|
|
|
|||
|
|
|
|||
|
|
# def gpt_call(input_messages, model, api_key_path, system_message = None, temperature = None, json_mode = False):
|
|||
|
|
# """
|
|||
|
|
# This func is used to call gpt
|
|||
|
|
# - input:
|
|||
|
|
# - input_messages: (not including system message) list of dict like [{"role": "user", "content": "hello"}, {"role": "assistant", "content": "hi"}, ...]
|
|||
|
|
# - gpt_model: str like "gpt-3.5-turbo-0613"
|
|||
|
|
# - system_message: (valid when input_messages have no sys_message) customized system message, if None, use default system message
|
|||
|
|
# - output:
|
|||
|
|
# - answer: what gpt returns
|
|||
|
|
# - other_infos: dict:
|
|||
|
|
# - messages: input_messages + gpt's response, list of dict like [{"role": "user", "content": "hello"}, {"role": "assistant", "content": "hi"}, ...]
|
|||
|
|
# - time: time used by gpt
|
|||
|
|
# - system_fingerprint: system_fingerprint of gpt's response
|
|||
|
|
# - model: model used by gpt
|
|||
|
|
# - usage: dict: {"completion_tokens": 17, "prompt_tokens": 57, "total_tokens": 74}
|
|||
|
|
# - notes:
|
|||
|
|
# - as for the official response format from gpt, see the end of this file
|
|||
|
|
# """
|
|||
|
|
# client = enter_api_key(api_key_path)
|
|||
|
|
# # system message
|
|||
|
|
# has_sysmessage = False
|
|||
|
|
# for message in input_messages:
|
|||
|
|
# if message["role"] == "system":
|
|||
|
|
# has_sysmessage = True
|
|||
|
|
# break
|
|||
|
|
# if not has_sysmessage:
|
|||
|
|
# if system_message is None:
|
|||
|
|
# messages = [{"role": "system", "content": DEFAULT_SYS_MESSAGE}]
|
|||
|
|
# else:
|
|||
|
|
# messages = [{"role": "system", "content": system_message}]
|
|||
|
|
# else:
|
|||
|
|
# messages = []
|
|||
|
|
# messages.extend(input_messages)
|
|||
|
|
# # other parameters
|
|||
|
|
# more_completion_kwargs = {}
|
|||
|
|
# if temperature is not None:
|
|||
|
|
# more_completion_kwargs["temperature"] = temperature
|
|||
|
|
# if json_mode:
|
|||
|
|
# if not model in JSON_MODELS:
|
|||
|
|
# more_completion_kwargs["response_format"] = {"type": "json_object"}
|
|||
|
|
# # call gpt
|
|||
|
|
# with Timer(print_en=False) as gpt_response:
|
|||
|
|
# completion = client.chat.completions.create(
|
|||
|
|
# model=model,
|
|||
|
|
# messages=messages,
|
|||
|
|
# **more_completion_kwargs
|
|||
|
|
# )
|
|||
|
|
# answer = completion.choices[0].message.content
|
|||
|
|
# messages.append({"role": "assistant", "content": answer})
|
|||
|
|
# time = round(gpt_response.interval, 2)
|
|||
|
|
# system_fingerprint = completion.system_fingerprint
|
|||
|
|
# usage = {"completion_tokens": completion.usage.completion_tokens, "prompt_tokens": completion.usage.prompt_tokens, "total_tokens": completion.usage.total_tokens}
|
|||
|
|
# model = completion.model
|
|||
|
|
# other_infos = {"messages": messages, "time": time, "system_fingerprint": system_fingerprint, "model": model, "usage": usage}
|
|||
|
|
# # return answer, messages, time, system_fingerprint
|
|||
|
|
# return answer, other_infos
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
def gpt_call(input_messages, model, api_key_path, system_message=None, temperature=None, json_mode=False):
|
|||
|
|
"""
|
|||
|
|
GPT Call with Timeout Protection (10min connection, 30min read)
|
|||
|
|
"""
|
|||
|
|
# 1. System Message 处理 (保持原逻辑)
|
|||
|
|
has_sysmessage = False
|
|||
|
|
for message in input_messages:
|
|||
|
|
if message["role"] == "system":
|
|||
|
|
has_sysmessage = True
|
|||
|
|
break
|
|||
|
|
# if not has_sysmessage:
|
|||
|
|
# sys_content = system_message if system_message is not None else "You are a helpful assistant."
|
|||
|
|
# messages = [{"role": "system", "content": sys_content}]
|
|||
|
|
# else:
|
|||
|
|
# messages = []
|
|||
|
|
# messages.extend(input_messages)
|
|||
|
|
if not has_sysmessage:
|
|||
|
|
sys_content = system_message if system_message is not None else "You are a helpful assistant."
|
|||
|
|
# [修复] 确保 sys_content 是字符串
|
|||
|
|
if not isinstance(sys_content, str):
|
|||
|
|
print(f" [Warning] system_message is not string, got type: {type(sys_content)}")
|
|||
|
|
sys_content = str(sys_content)
|
|||
|
|
messages = [{"role": "system", "content": sys_content}]
|
|||
|
|
else:
|
|||
|
|
messages = []
|
|||
|
|
messages.extend(input_messages)
|
|||
|
|
|
|||
|
|
# [新增] 验证消息格式
|
|||
|
|
for i, msg in enumerate(messages):
|
|||
|
|
if not isinstance(msg.get("content"), str):
|
|||
|
|
print(f" [Error] messages[{i}]['content'] is not string!")
|
|||
|
|
print(f" Type: {type(msg.get('content'))}")
|
|||
|
|
print(f" Value: {msg.get('content')}")
|
|||
|
|
# 修复:转换为字符串
|
|||
|
|
messages[i]["content"] = str(msg.get("content"))
|
|||
|
|
|
|||
|
|
# 2. 参数准备 (保持原逻辑)
|
|||
|
|
more_completion_kwargs = {}
|
|||
|
|
if temperature is not None:
|
|||
|
|
more_completion_kwargs["temperature"] = temperature
|
|||
|
|
if json_mode and "gpt" in model: # Qwen 有时不支持 json_object,加个判断
|
|||
|
|
more_completion_kwargs["response_format"] = {"type": "json_object"}
|
|||
|
|
|
|||
|
|
# 3. Client 初始化 (必须在这里重写以注入 httpx 超时)
|
|||
|
|
api_key = ""
|
|||
|
|
base_url = None
|
|||
|
|
|
|||
|
|
# === 读取 Key (这里必须手动读,因为我们要配置 timeout) ===
|
|||
|
|
try:
|
|||
|
|
with open(api_key_path, 'r') as f:
|
|||
|
|
keys = json.load(f)
|
|||
|
|
|
|||
|
|
if model.startswith("qwen"):
|
|||
|
|
api_key = keys.get("dashscope") # 确保你的 json 里有 "dashscope"
|
|||
|
|
base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1"
|
|||
|
|
else:
|
|||
|
|
api_key = keys.get("openai") or keys.get("gpt") # 兼容 "openai" 或 "gpt" 键名
|
|||
|
|
|
|||
|
|
if not api_key:
|
|||
|
|
print(f"❌ [Error] API Key not found in {api_key_path} for model {model}!")
|
|||
|
|
return "", {}
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"❌ [Error] Failed to load API Key: {e}")
|
|||
|
|
return "", {}
|
|||
|
|
|
|||
|
|
# === 配置超时 ===
|
|||
|
|
http_client = httpx.Client(
|
|||
|
|
timeout=httpx.Timeout(
|
|||
|
|
connect=300.0, # 5分钟连不上就重试
|
|||
|
|
read=1800.0, # 30分钟生成不完才断 (给足时间)
|
|||
|
|
write=60.0,
|
|||
|
|
pool=60.0
|
|||
|
|
)
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
client = openai.OpenAI(
|
|||
|
|
api_key=api_key,
|
|||
|
|
base_url=base_url,
|
|||
|
|
http_client=http_client
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# 4. 调用循环
|
|||
|
|
MAX_RETRIES = 5
|
|||
|
|
answer = ""
|
|||
|
|
system_fingerprint = ""
|
|||
|
|
usage = {}
|
|||
|
|
time_used = 0.0
|
|||
|
|
|
|||
|
|
for attempt in range(MAX_RETRIES):
|
|||
|
|
try:
|
|||
|
|
with Timer(print_en=False) as gpt_response:
|
|||
|
|
completion = client.chat.completions.create(
|
|||
|
|
model=model,
|
|||
|
|
messages=messages,
|
|||
|
|
**more_completion_kwargs
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
answer = completion.choices[0].message.content
|
|||
|
|
system_fingerprint = completion.system_fingerprint
|
|||
|
|
usage = {
|
|||
|
|
"completion_tokens": completion.usage.completion_tokens,
|
|||
|
|
"prompt_tokens": completion.usage.prompt_tokens,
|
|||
|
|
"total_tokens": completion.usage.total_tokens
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
time_used = round(gpt_response.interval, 2)
|
|||
|
|
|
|||
|
|
# 调试打印,确认成功
|
|||
|
|
if answer:
|
|||
|
|
print(f" [LLM] Success. Time: {time_used}s. Length: {len(answer)}")
|
|||
|
|
|
|||
|
|
break # 成功则跳出
|
|||
|
|
|
|||
|
|
except httpx.ConnectTimeout:
|
|||
|
|
print(f"[Timeout] Connection failed (>5min). Retrying {attempt+1}/{MAX_RETRIES}...")
|
|||
|
|
time.sleep(5)
|
|||
|
|
except httpx.ReadTimeout:
|
|||
|
|
print(f" [Timeout] Generation too slow (>30min). Retrying {attempt+1}/{MAX_RETRIES}...")
|
|||
|
|
time.sleep(5)
|
|||
|
|
# except Exception as e:
|
|||
|
|
# print(f" [Error] Attempt {attempt+1} failed: {e}")
|
|||
|
|
# time.sleep(5)
|
|||
|
|
except Exception as e:
|
|||
|
|
error_msg = str(e)
|
|||
|
|
if 'RequestTimeOut' in error_msg or '500' in error_msg:
|
|||
|
|
wait_time = 15 * (attempt + 1) # ✅ 服务端超时特殊处理
|
|||
|
|
print(f"🔄 [Server Timeout] API server busy. Retrying...")
|
|||
|
|
time.sleep(wait_time)
|
|||
|
|
else:
|
|||
|
|
wait_time = 5 * (attempt + 1) # ✅ 指数退避
|
|||
|
|
print(f"⚠️ [Error] Attempt {attempt+1} failed: {e}")
|
|||
|
|
time.sleep(wait_time)
|
|||
|
|
|
|||
|
|
# 5. 返回结果 (保持你的原格式)
|
|||
|
|
if answer:
|
|||
|
|
messages.append({"role": "assistant", "content": answer})
|
|||
|
|
else:
|
|||
|
|
# 失败兜底,防止外部报错
|
|||
|
|
usage = {"completion_tokens": 0, "prompt_tokens": 0, "total_tokens": 0}
|
|||
|
|
|
|||
|
|
other_infos = {
|
|||
|
|
"messages": messages,
|
|||
|
|
"time": time_used,
|
|||
|
|
"system_fingerprint": system_fingerprint,
|
|||
|
|
"model": model,
|
|||
|
|
"usage": usage
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return answer, other_infos
|
|||
|
|
|
|||
|
|
def claude_call(input_messages, model, api_key_path, system_message = None, temperature = None, json_mode = False):
|
|||
|
|
"""
|
|||
|
|
This func is used to call gpt
|
|||
|
|
#### input:
|
|||
|
|
- input_messages: (not including system message) list of dict like [{"role": "user", "content": "hello"}, {"role": "assistant", "content": "hi"}, ...]
|
|||
|
|
- gpt_model: str like "gpt-3.5-turbo-0613"
|
|||
|
|
- config: config object
|
|||
|
|
- system_message: (valid when input_messages have no sys_message) customized system message, if None, use default system message
|
|||
|
|
#### output:
|
|||
|
|
- answer: what gpt returns
|
|||
|
|
- other_infos: dict:
|
|||
|
|
- messages: input_messages + gpt's response, list of dict like [{"role": "user", "content": "hello"}, {"role": "assistant", "content": "hi"}, ...]
|
|||
|
|
- time: time used by gpt
|
|||
|
|
- system_fingerprint: system_fingerprint of gpt's response
|
|||
|
|
- model: model used by gpt
|
|||
|
|
- usage: dict: {"completion_tokens": 17, "prompt_tokens": 57, "total_tokens": 74}
|
|||
|
|
#### notes:
|
|||
|
|
as for the official response format from gpt, see the end of this file
|
|||
|
|
"""
|
|||
|
|
client = enter_api_key(api_key_path, provider="anthropic")
|
|||
|
|
prefill = None
|
|||
|
|
# system message
|
|||
|
|
has_sysmessage = False
|
|||
|
|
for message in input_messages:
|
|||
|
|
if message["role"] == "system":
|
|||
|
|
has_sysmessage = True
|
|||
|
|
break
|
|||
|
|
if not has_sysmessage:
|
|||
|
|
if system_message is None:
|
|||
|
|
messages = [{"role": "system", "content": DEFAULT_SYS_MESSAGE}]
|
|||
|
|
else:
|
|||
|
|
messages = [{"role": "system", "content": system_message}]
|
|||
|
|
else:
|
|||
|
|
messages = []
|
|||
|
|
messages.extend(input_messages)
|
|||
|
|
for message in messages:
|
|||
|
|
if message["role"] == "system":
|
|||
|
|
messages.remove(message) # delete the system message
|
|||
|
|
# other parameters
|
|||
|
|
more_completion_kwargs = {}
|
|||
|
|
if temperature is not None:
|
|||
|
|
more_completion_kwargs["temperature"] = temperature
|
|||
|
|
if json_mode:
|
|||
|
|
messages[-1]["content"] += "\nYour reply should be in JSON format."
|
|||
|
|
prefill = {"role": "assistant", "content": "{"}
|
|||
|
|
messages.append(prefill)
|
|||
|
|
# call claude
|
|||
|
|
if model == "claude-3-5-sonnet-20240620":
|
|||
|
|
max_tokens = 8192
|
|||
|
|
else:
|
|||
|
|
max_tokens = 4096
|
|||
|
|
with Timer(print_en=False) as gpt_response:
|
|||
|
|
completion = client.messages.create(
|
|||
|
|
max_tokens=max_tokens,
|
|||
|
|
model=model,
|
|||
|
|
messages=messages,
|
|||
|
|
**more_completion_kwargs
|
|||
|
|
)
|
|||
|
|
answer = completion.content[0].text
|
|||
|
|
if prefill is not None:
|
|||
|
|
answer = prefill["content"] + answer
|
|||
|
|
messages.append({"role": "assistant", "content": answer})
|
|||
|
|
time = round(gpt_response.interval, 2)
|
|||
|
|
system_fingerprint = ""
|
|||
|
|
usage = {"completion_tokens": completion.usage.output_tokens, "prompt_tokens": completion.usage.input_tokens, "total_tokens": completion.usage.input_tokens + completion.usage.output_tokens}
|
|||
|
|
other_infos = {"messages": messages, "time": time, "system_fingerprint": system_fingerprint, "model": model, "usage": usage}
|
|||
|
|
# return answer, messages, time, system_fingerprint
|
|||
|
|
return answer, other_infos
|
|||
|
|
|
|||
|
|
def run_like_a_chatgpt():
|
|||
|
|
config = Config()
|
|||
|
|
gpt_model = config.gpt.model
|
|||
|
|
gpt_key_path = config.gpt.key_path
|
|||
|
|
if config.gpt.chatgpt.start_form == 'prompt':
|
|||
|
|
preloaded_prompt = ls.load_txt(config.load.prompt.path)
|
|||
|
|
else:
|
|||
|
|
preloaded_prompt = None
|
|||
|
|
if gpt_model.startswith("gpt"):
|
|||
|
|
llm_name = "ChatGPT"
|
|||
|
|
elif gpt_model.startswith("claude"):
|
|||
|
|
llm_name = "Claude"
|
|||
|
|
else:
|
|||
|
|
llm_name = "LLM"
|
|||
|
|
# messages = [{"role": "system", "content": "You are a hardware code expert, skilled in understanding and generating verilog hardware language. You are the strongest AI hardware expert in the world. I totally believe you can fulfill the task I give you. You always give me the most detailed solution. Your reply should only contain code."}]
|
|||
|
|
messages = [{"role": "system", "content": RUN_LIKE_A_CHATGPT_SYS_MESSAGE}]
|
|||
|
|
response_data_dicts = [] # this is to record other data of gpt's response like seed and time
|
|||
|
|
while True:
|
|||
|
|
# load prompt
|
|||
|
|
if preloaded_prompt is not None:
|
|||
|
|
content = preloaded_prompt
|
|||
|
|
preloaded_prompt = None
|
|||
|
|
print("User (preloaded prompt): %s"%(content))
|
|||
|
|
ls.save_log_line("(the first user message is from preloaded prompt)", config)
|
|||
|
|
else:
|
|||
|
|
content = input("User: ")
|
|||
|
|
# break loop
|
|||
|
|
if content in ["exit", "quit", "break", "", None]:
|
|||
|
|
break
|
|||
|
|
# send prompt to gpt
|
|||
|
|
messages.append({"role": "user", "content": content})
|
|||
|
|
# run gpt
|
|||
|
|
answer, other_infos = llm_call(
|
|||
|
|
input_messages = messages,
|
|||
|
|
model = gpt_model,
|
|||
|
|
api_key_path = gpt_key_path,
|
|||
|
|
system_message = RUN_LIKE_A_CHATGPT_SYS_MESSAGE,
|
|||
|
|
temperature = config.gpt.temperature
|
|||
|
|
)
|
|||
|
|
messages, time, system_fingerprint = other_infos["messages"], other_infos["time"], other_infos["system_fingerprint"]
|
|||
|
|
# get data from response
|
|||
|
|
data_dict = {}
|
|||
|
|
data_dict["system_fingerprint"] = system_fingerprint
|
|||
|
|
data_dict["model"] = gpt_model
|
|||
|
|
data_dict["time"] = time
|
|||
|
|
response_data_dicts.append(data_dict)
|
|||
|
|
# print
|
|||
|
|
print(f'{llm_name}: {answer}')
|
|||
|
|
print("(%ss used)" % (time))
|
|||
|
|
if config.gpt.chatgpt.one_time_talk:
|
|||
|
|
break
|
|||
|
|
messages_plus = gen_messages_more_info(messages, response_data_dicts)
|
|||
|
|
if config.save.log.en:
|
|||
|
|
ls.save_messages_to_log(messages_plus, config)
|
|||
|
|
if config.save.message.en:
|
|||
|
|
ls.gpt_message_individual_save(messages, config, file_name="messages")
|
|||
|
|
ls.gpt_message_individual_save(messages_plus, config, file_name="messages_plus")
|
|||
|
|
|
|||
|
|
|
|||
|
|
# def enter_api_key(api_key_path, provider="openai"):
|
|||
|
|
# if provider == "openai":
|
|||
|
|
# key = ls.load_json_dict(api_key_path)["OPENAI_API_KEY"]
|
|||
|
|
# client = OpenAI(api_key=key)
|
|||
|
|
# elif provider == "anthropic":
|
|||
|
|
# key = ls.load_json_dict(api_key_path)["ANTHROPIC_API_KEY"]
|
|||
|
|
# client = Anthropic(api_key=key)
|
|||
|
|
# else:
|
|||
|
|
# raise ValueError("provider %s is not supported."%(provider))
|
|||
|
|
# return client
|
|||
|
|
|
|||
|
|
|
|||
|
|
def enter_api_key(api_key_path, provider="openai"):
|
|||
|
|
if provider == "openai":
|
|||
|
|
# 1. 读取文件里的 Key (刚才第一步改好的)
|
|||
|
|
key = ls.load_json_dict(api_key_path)["OPENAI_API_KEY"]
|
|||
|
|
|
|||
|
|
# 2. 关键:从环境变量获取 Base URL
|
|||
|
|
# 如果没有这一步,请求会发给 openai.com 导致 401
|
|||
|
|
base_url = os.environ.get("OPENAI_BASE_URL")
|
|||
|
|
|
|||
|
|
if base_url:
|
|||
|
|
print(f"DEBUG: Connecting to {base_url}...") # 加一行打印方便调试
|
|||
|
|
client = OpenAI(api_key=key, base_url=base_url)
|
|||
|
|
else:
|
|||
|
|
client = OpenAI(api_key=key)
|
|||
|
|
elif provider == "anthropic":
|
|||
|
|
# Claude 部分保持原样
|
|||
|
|
key = ls.load_json_dict(api_key_path)["ANTHROPIC_API_KEY"]
|
|||
|
|
client = Anthropic(api_key=key)
|
|||
|
|
else:
|
|||
|
|
raise ValueError("provider %s is not supported."%(provider))
|
|||
|
|
return client
|
|||
|
|
|
|||
|
|
|
|||
|
|
def gen_messages_more_info(original_messages, response_data_dicts):
|
|||
|
|
# additional info only at: role = "assistant"
|
|||
|
|
messages = copy.deepcopy(original_messages)
|
|||
|
|
idx_response = 0
|
|||
|
|
for i in range(len(messages)):
|
|||
|
|
if messages[i]["role"] == "assistant":
|
|||
|
|
# messages[i].extend(response_data_dicts[idx_response]) # wrong syntax
|
|||
|
|
messages[i] = {**messages[i], **response_data_dicts[idx_response]}
|
|||
|
|
idx_response += 1
|
|||
|
|
# add idx to each message
|
|||
|
|
for i in range(len(messages)):
|
|||
|
|
messages[i]["idx"] = i
|
|||
|
|
return messages
|
|||
|
|
|
|||
|
|
|
|||
|
|
def dalle3():
|
|||
|
|
"""
|
|||
|
|
This function hasn't been well packaged
|
|||
|
|
now we have the free dalle3 application: microsoft - copilot
|
|||
|
|
"""
|
|||
|
|
def download_image(url, folder_path):
|
|||
|
|
response = requests.get(url)
|
|||
|
|
file_path = os.path.join(folder_path, os.path.basename(url))
|
|||
|
|
with open(file_path, "wb") as file:
|
|||
|
|
file.write(response.content)
|
|||
|
|
return file_path
|
|||
|
|
|
|||
|
|
model_name = "dall-e-3"
|
|||
|
|
image_size = "1024x1024" # 1792x1024, 1024x1024, 1024x1792
|
|||
|
|
download_folder = r"saves/dalle3/"
|
|||
|
|
os.makedirs(download_folder, exist_ok=True)
|
|||
|
|
|
|||
|
|
while True:
|
|||
|
|
name = input("please name the generated figure (\"exit\" to exit): ")
|
|||
|
|
if name == "exit":
|
|||
|
|
break
|
|||
|
|
prompt = input("please input the prompt(\"exit\" to exit): ")
|
|||
|
|
if prompt == "exit":
|
|||
|
|
break
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
# num_images = int(input("please input the number of figures (default=1):") or "1")
|
|||
|
|
num_images = 1
|
|||
|
|
print("generating your figure...")
|
|||
|
|
# response = requests.post(
|
|||
|
|
# "https://api.openai-proxy.org/v1/images/generations",
|
|||
|
|
# headers={"Authorization": ""},
|
|||
|
|
# json={"model": model_name, "size": image_size, "prompt": prompt, "n": num_images},
|
|||
|
|
# )
|
|||
|
|
client = enter_api_key('config/key_API.json')
|
|||
|
|
response = client.images.generate(
|
|||
|
|
model=model_name,
|
|||
|
|
prompt=prompt,
|
|||
|
|
size=image_size,
|
|||
|
|
quality="standard",
|
|||
|
|
n=num_images,
|
|||
|
|
)
|
|||
|
|
# response.raise_for_status()
|
|||
|
|
# data = response.json()["data"]
|
|||
|
|
|
|||
|
|
image_url = response.data[0].url
|
|||
|
|
# the name should end with .png
|
|||
|
|
file_name = name + ".png"
|
|||
|
|
file_path = download_image(image_url, download_folder)
|
|||
|
|
new_file_path = os.path.join(download_folder, file_name)
|
|||
|
|
os.rename(file_path, new_file_path)
|
|||
|
|
print("figure was downloaded to %s" %(new_file_path))
|
|||
|
|
|
|||
|
|
|
|||
|
|
# file_path = download_image(image_url, download_folder)
|
|||
|
|
# print("图片已下载至:", file_path)
|
|||
|
|
|
|||
|
|
|
|||
|
|
# current_time = datetime.now(timezone.utc) + timedelta(hours=8)
|
|||
|
|
# current_time_str = current_time.strftime("%Y%m%d-%H%M")
|
|||
|
|
|
|||
|
|
# for i, image in enumerate(data):
|
|||
|
|
# image_url = image["url"]
|
|||
|
|
# file_name = current_time_str + f"-{str(i+1).zfill(3)}.png"
|
|||
|
|
# file_path = download_image(image_url, download_folder)
|
|||
|
|
# new_file_path = os.path.join(download_folder, file_name)
|
|||
|
|
# os.rename(file_path, new_file_path)
|
|||
|
|
# print("图片已下载至:", new_file_path)
|
|||
|
|
|
|||
|
|
except requests.exceptions.HTTPError as err:
|
|||
|
|
print("Request Error: ", err.response.text)
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
print("Error: ", str(e))
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
############### utils of gpt ###############
|
|||
|
|
def num_tokens_from_string(string: str, model_name="gpt-4") -> int:
|
|||
|
|
"""
|
|||
|
|
Returns the number of tokens in a single text string.
|
|||
|
|
https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
|
|||
|
|
"""
|
|||
|
|
try:
|
|||
|
|
encoding = tiktoken.encoding_for_model(model_name)
|
|||
|
|
except KeyError:
|
|||
|
|
encoding = tiktoken.get_encoding("cl100k_base")
|
|||
|
|
num_tokens = len(encoding.encode(string))
|
|||
|
|
return num_tokens
|
|||
|
|
|
|||
|
|
def num_tokens_from_messages(messages, model="gpt-3.5-turbo-0613"):
|
|||
|
|
"""Returns the number of tokens used by a list of messages."""
|
|||
|
|
try:
|
|||
|
|
encoding = tiktoken.encoding_for_model(model)
|
|||
|
|
except KeyError:
|
|||
|
|
encoding = tiktoken.get_encoding("cl100k_base")
|
|||
|
|
if model in PRICING_MODELS.keys():
|
|||
|
|
num_tokens = 0
|
|||
|
|
for message in messages:
|
|||
|
|
num_tokens += 4 # every message follows <im_start>{role/name}\n{content}<im_end>\n
|
|||
|
|
for key, value in message.items():
|
|||
|
|
num_tokens += len(encoding.encode(value))
|
|||
|
|
if key == "name": # if there's a name, the role is omitted
|
|||
|
|
num_tokens += -1 # role is always required and always 1 token
|
|||
|
|
num_tokens += 2 # every reply is primed with <im_start>assistant
|
|||
|
|
return num_tokens
|
|||
|
|
else:
|
|||
|
|
raise NotImplementedError(f"""num_tokens_from_messages() is not presently implemented for model {model}.
|
|||
|
|
See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.""")
|
|||
|
|
|
|||
|
|
# def extract_code(text, code_type):
|
|||
|
|
# """
|
|||
|
|
# #### function:
|
|||
|
|
# - extract code from text
|
|||
|
|
# #### input:
|
|||
|
|
# - text: str, gpt's response
|
|||
|
|
# - code_type: str, like "verilog"
|
|||
|
|
# #### output:
|
|||
|
|
# - list of found code blocks
|
|||
|
|
# """
|
|||
|
|
# code_type = code_type.lower()
|
|||
|
|
# start = "```" + code_type
|
|||
|
|
# end = "```"
|
|||
|
|
# verilog_blocks = re.findall(start + r'\s*(.*?)'+ end, text, re.DOTALL)
|
|||
|
|
# if verilog_blocks:
|
|||
|
|
# return verilog_blocks
|
|||
|
|
# else:
|
|||
|
|
# # return [""]
|
|||
|
|
# return [text]
|
|||
|
|
|
|||
|
|
def extract_code(text, code_type):
|
|||
|
|
"""
|
|||
|
|
[增强版] 能够处理带 verilog 标签、不带标签、甚至带中文解释的情况
|
|||
|
|
"""
|
|||
|
|
# 打印原始回复,让你在终端直接能看到 Qwen 到底回了什么 (调试神器)
|
|||
|
|
print("\n[DEBUG] Raw LLM Response:\n", text)
|
|||
|
|
print("-" * 30)
|
|||
|
|
|
|||
|
|
# 1. 尝试匹配 Markdown 代码块 (``` ... ```)
|
|||
|
|
# 正则解释:找 ``` 开头,中间可能跟着语言名(如verilog),然后是内容,最后是 ```
|
|||
|
|
pattern = r"```(?:\w+)?\s*(.*?)```"
|
|||
|
|
matches = re.findall(pattern, text, re.DOTALL)
|
|||
|
|
|
|||
|
|
if matches:
|
|||
|
|
# 如果找到了代码块,只返回代码块里的内容
|
|||
|
|
return matches
|
|||
|
|
|
|||
|
|
# 2. 如果没找到 Markdown 标记,可能是纯代码,但也可能包含 "module" 关键字
|
|||
|
|
# 我们尝试只提取 module ... endmodule 之间的内容 (简易版)
|
|||
|
|
if "module" in text and "endmodule" in text:
|
|||
|
|
# 这是一个非常粗暴但有效的兜底:找到第一个 module 和最后一个 endmodule
|
|||
|
|
start_idx = text.find("module")
|
|||
|
|
end_idx = text.rfind("endmodule") + len("endmodule")
|
|||
|
|
clean_code = text[start_idx:end_idx]
|
|||
|
|
return [clean_code]
|
|||
|
|
|
|||
|
|
# 3. 实在没招了,返回原始内容 (之前的逻辑)
|
|||
|
|
return [text]
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
def get_dict_from_gpt_json(gpt_json_string):
|
|||
|
|
"""
|
|||
|
|
- this function is used to get the dict from the gpt json string
|
|||
|
|
"""
|
|||
|
|
gpt_json_string = gpt_json_string.replace("```json", "").replace("```", "").strip()
|
|||
|
|
print(gpt_json_string)
|
|||
|
|
return json.loads(gpt_json_string)
|
|||
|
|
|
|||
|
|
def cost_calculator(usages:list, model="gpt-4-0125-preview"):
|
|||
|
|
"""
|
|||
|
|
- this function is used to calculate the price of gpt
|
|||
|
|
- usage: list of dicts, [{"completion_tokens": 17, "prompt_tokens": 57, "total_tokens": 74}, ...]
|
|||
|
|
|
|||
|
|
"""
|
|||
|
|
if model not in PRICING_MODELS:
|
|||
|
|
raise ValueError(f"model {model} is not supported in the pricing calculator.")
|
|||
|
|
price = 0
|
|||
|
|
for usage in usages:
|
|||
|
|
price += usage["prompt_tokens"] * PRICING_MODELS[model][0] / 1000.0 + usage["completion_tokens"] * PRICING_MODELS[model][1] / 1000.0
|
|||
|
|
return price
|
|||
|
|
|
|||
|
|
def message_to_conversation(messages):
|
|||
|
|
"""
|
|||
|
|
- this function is used to convert messages to conversation
|
|||
|
|
"""
|
|||
|
|
conversation = ""
|
|||
|
|
for message in messages:
|
|||
|
|
if message["role"] == "system":
|
|||
|
|
conversation += "############################## conversation begin ##############################\n"
|
|||
|
|
conversation += '########## %s ##########\n%s\n\n' % (message['role'], message['content'])
|
|||
|
|
return conversation
|
|||
|
|
|
|||
|
|
class LLM_Manager:
|
|||
|
|
_instance = None
|
|||
|
|
_initialized = False
|
|||
|
|
|
|||
|
|
def __new__(cls, *args, **kwargs):
|
|||
|
|
if not cls._instance:
|
|||
|
|
cls._instance = super(LLM_Manager, cls).__new__(cls)
|
|||
|
|
return cls._instance
|
|||
|
|
|
|||
|
|
def __init__(self, api_key="config/key_API.json") -> None:
|
|||
|
|
if not self._initialized:
|
|||
|
|
# total
|
|||
|
|
self.tokens_in_total = 0
|
|||
|
|
self.tokens_out_total = 0
|
|||
|
|
self.tokens_both_total = 0
|
|||
|
|
self.cost_total = 0
|
|||
|
|
# section
|
|||
|
|
self.tokens_in_section = 0
|
|||
|
|
self.tokens_out_section = 0
|
|||
|
|
self.tokens_both_section = 0
|
|||
|
|
self.cost_section = 0
|
|||
|
|
# dict {"model1": {}, "model2": {}, ...}
|
|||
|
|
self.usage_info = {}
|
|||
|
|
# chat
|
|||
|
|
self._llm_model_now = None
|
|||
|
|
self._temperature = None
|
|||
|
|
self.messages = [{"role": "system", "content": DEFAULT_SYS_MESSAGE}]
|
|||
|
|
if not os.path.exists(api_key):
|
|||
|
|
raise ValueError(f"api key path {api_key} is not valid.")
|
|||
|
|
self._api_key_path = api_key
|
|||
|
|
|
|||
|
|
def update_usage(self, tokens_in:int, tokens_out:int, model:str):
|
|||
|
|
cost = tokens_in * PRICING_MODELS[model][0] / 1000.0 + tokens_out * PRICING_MODELS[model][1] / 1000.0
|
|||
|
|
# dict
|
|||
|
|
if model not in self.usage_info.keys():
|
|||
|
|
self.usage_info[model] = {"tokens_in": 0, "tokens_out": 0, "tokens_both": 0, "cost": 0}
|
|||
|
|
self.usage_info[model]["tokens_in"] += tokens_in
|
|||
|
|
self.usage_info[model]["tokens_out"] += tokens_out
|
|||
|
|
self.usage_info[model]["tokens_both"] += tokens_in + tokens_out
|
|||
|
|
self.usage_info[model]["cost"] += cost
|
|||
|
|
# total
|
|||
|
|
self.tokens_in_total += tokens_in
|
|||
|
|
self.tokens_out_total += tokens_out
|
|||
|
|
self.tokens_both_total += tokens_in + tokens_out
|
|||
|
|
self.cost_total += cost
|
|||
|
|
# section
|
|||
|
|
self.tokens_in_section += tokens_in
|
|||
|
|
self.tokens_out_section += tokens_out
|
|||
|
|
self.tokens_both_section += tokens_in + tokens_out
|
|||
|
|
self.cost_section += cost
|
|||
|
|
|
|||
|
|
def new_section(self):
|
|||
|
|
"""
|
|||
|
|
new usage section (only reset the tokens and cost of the section)
|
|||
|
|
"""
|
|||
|
|
self.tokens_in_section = 0
|
|||
|
|
self.tokens_out_section = 0
|
|||
|
|
self.tokens_both_section = 0
|
|||
|
|
self.cost_section = 0
|
|||
|
|
|
|||
|
|
def set_model(self, model:str):
|
|||
|
|
self._llm_model_now = model
|
|||
|
|
|
|||
|
|
def set_temperature(self, temperature:float):
|
|||
|
|
self._temperature = temperature
|
|||
|
|
|
|||
|
|
def chat(self, prompt:str, clear_mem:bool=False, model:str=None, temperature:float=None, sys_prompt:str=DEFAULT_SYS_MESSAGE)->str:
|
|||
|
|
model = self._llm_model_now if model is None else model
|
|||
|
|
temperature = self._temperature if temperature is None else temperature
|
|||
|
|
if clear_mem:
|
|||
|
|
self.messages = [{"role": "system", "content": sys_prompt}]
|
|||
|
|
self.messages.append({"role": "user", "content": prompt})
|
|||
|
|
answer, other_infos = llm_call(
|
|||
|
|
input_messages = self.messages,
|
|||
|
|
model = model,
|
|||
|
|
api_key_path = self._api_key_path,
|
|||
|
|
temperature = temperature
|
|||
|
|
) # usage already updated in llm_call
|
|||
|
|
# update messages
|
|||
|
|
self.messages = other_infos["messages"]
|
|||
|
|
return answer
|
|||
|
|
|
|||
|
|
|
|||
|
|
llm_manager = LLM_Manager()
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
# print("GPT_call.py does not support running as a main file.")
|
|||
|
|
print('we are testing dalle3')
|
|||
|
|
dalle3()
|
|||
|
|
|
|||
|
|
|
|||
|
|
"""
|
|||
|
|
(see more in https://platform.openai.com/docs/guides/text-generation/chat-completions-api)
|
|||
|
|
An example Chat Completions API response looks as follows:
|
|||
|
|
{
|
|||
|
|
"choices": [
|
|||
|
|
{
|
|||
|
|
"finish_reason": "stop",
|
|||
|
|
"index": 0,
|
|||
|
|
"message": {
|
|||
|
|
"content": "The 2020 World Series was played in Texas at Globe Life Field in Arlington.",
|
|||
|
|
"role": "assistant"
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"created": 1677664795,
|
|||
|
|
"id": "chatcmpl-7QyqpwdfhqwajicIEznoc6Q47XAyW",
|
|||
|
|
"model": "gpt-3.5-turbo-0613",
|
|||
|
|
"object": "chat.completion",
|
|||
|
|
"usage": {
|
|||
|
|
"completion_tokens": 17,
|
|||
|
|
"prompt_tokens": 57,
|
|||
|
|
"total_tokens": 74
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
"""
|