-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathchain_of_thought.py
More file actions
84 lines (70 loc) · 2.57 KB
/
chain_of_thought.py
File metadata and controls
84 lines (70 loc) · 2.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
from openai import OpenAI
import os
from pathlib import Path
import json
from datetime import datetime
from tqdm import tqdm
class Prompter():
def __init__(self, sys_prompt, usr_prompt, parse, format):
self.sys_prompt = sys_prompt
self.usr_prompt = usr_prompt
self.parse = parse
self.format = format
def prompt(self, theorem):
return [
{ "role": "system", "content": self.sys_prompt },
{ "role": "user", "content": self.usr_prompt.format(**theorem) }
]
def log(logfile, messages: list[str], resp: str):
with open(logfile, "a") as file:
content = {
"message": "\n".join([m["content"] for m in messages]),
"response": resp
}
content = json.dumps(content, indent=2)
file.write(content + "\n")
def response(client, model, temperature, logger, messages: list[str]) -> str:
raw = client.chat.completions.create(
model=model,
messages=messages,
max_tokens=4096,
temperature=temperature
)
resp = raw.choices[0].message.content
logger(messages, resp)
return resp
def make(model, vllm_address, prompter, data_info, temperature=1.0):
"""Add chains of thought to a dataset of theorems."""
dt = datetime.now().strftime("%y%m%d_%H%M%S")
provider, model_name = model.split('/')
savefile = Path("data", "CoT", provider, model_name, data_info, f"{dt}.jsonl")
savefile.parent.mkdir(parents=True, exist_ok=True)
print("Reading the data ...")
thms = []
datafile = f"data/selected/{data_info}.jsonl"
with open(datafile, "r") as datas:
for i, thm in enumerate(datas):
if i % 4 == 0:
thms.append(thm)
else:
thms[i//4] += "\n" + thm
client = OpenAI(
api_key="EMPTY",
base_url=vllm_address
)
logfile = Path("log", provider, model_name, data_info, f"{dt}.jsonl")
logfile.parent.mkdir(parents=True, exist_ok=True)
def logger(messages, response):
log(logfile, messages, response)
print("Generating chains of thought ...")
with open(savefile, "w") as new_datas:
for thm in tqdm(thms):
thm = json.loads(thm)
messages = prompter.prompt(thm)
resp = response(client, model, temperature, logger, messages)
llm_result = prompter.parse(resp)
result = prompter.format(thm, llm_result)
result = json.dumps(result, indent=2)
new_datas.write(result + "\n")
print("DONE!")
return provider, model_name, data_info, dt