This commit is contained in:
Guanhua Zhang 2025-04-10 20:14:17 +02:00
commit 04c4625cfe
11 changed files with 1330 additions and 0 deletions

158
inference/inference.py Normal file
View file

@ -0,0 +1,158 @@
from transformers import AutoTokenizer, AutoModelForCausalLM
import argparse
import os, pdb
import numpy as np
import json
from pathlib import Path
import json
from tqdm import tqdm
from cprint import cprint
import evaluate
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:32"
import logging
import torch
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
from sentence_transformers import SentenceTransformer, util
def initialize():
parser = argparse.ArgumentParser("")
parser.add_argument("--model_name_or_path", type=str, default='')
parser.add_argument("--embedding_model_path", type=str, default="")
parser.add_argument("--train_data_dir", type=str, default='')
parser.add_argument("--test_data_dir", type=str, default='')
parser.add_argument("--prompt_file", type=str, default=None, help="The file for loading the prompt")
args = parser.parse_args()
return args
def get_tokenizer(args):
tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path, device_map={"":0})
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'left'
return tokenizer
def get_model(args):
model = AutoModelForCausalLM.from_pretrained(args.model_name_or_path, device_map={"":0})
return model
def setup_model_and_tokenizer(args):
tokenizer = get_tokenizer(args)
model = get_model(args)
return tokenizer, model
def read_json_file(filename):
with open(filename, 'r') as infile:
data = json.load(infile)
return data
def format_one_action(action):
return f"- {action}\n"
def format_actions_list(actions):
actions_str = ""
for action in actions:
actions_str += format_one_action(action)
return actions_str
def preprocess_data(task, args):
with open(args.prompt_file, 'r') as file:
task_description = file.read().split('===')
input_str = f"## Website:\n{task['website_en']}\n\n## Domain:\n{task['domain_en']}\n\n## Sub-domain:\n{task['subdomain_en']}\n\n## Actions (Each line is one action):\n{format_actions_list(task['task_subintention'])}\n## Sub-intentions summarised from these actions:\n{format_actions_list(task['steps'])}"
query_inputs = f"{task_description[0]}\n{input_str}{task_description[1]}\n"
summary_str = task['task_description']
summary_str = summary_str[0].upper() + summary_str[1:] + "."
test_prompt = f"User: {query_inputs}\nAgent:"
return {"task": summary_str, "prompt": test_prompt}
def load_raw_dataset(data, args):
tasks = []
for d in tqdm(data):
processed_task = preprocess_data(d, args)
tasks.append(processed_task)
return tasks
def main_loop(args, test_dataset, tokenizer, model, sacrebleu, rouge, meteor, embedding_model, mark):
os.makedirs(args.model_name_or_path+"/results/", exist_ok=True)
global_sacrebleu, global_rouge1, global_rouge2, global_rougeL, global_rougeLsum, global_meteor, global_cosine, global_distance = [], [], [], [], [], [], [], []
for i, data in tqdm(enumerate(test_dataset)):
save_task_response_filename = args.model_name_or_path + f"/results/{mark}_{i}_insert_mistral.json"
if os.path.exists(save_task_response_filename):
with open(save_task_response_filename, 'r') as f:
save_dict = json.load(f)
else:
prompt = data["prompt"]
task = data["task"]
save_dict = {}
model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
generated_ids = model.generate(**model_inputs,max_new_tokens=1024, do_sample=False, top_p= 0.95, repetition_penalty=1.2)
pred = tokenizer.batch_decode(generated_ids)[0]
response = pred.split("[SUMMARY]")[-1].replace('</s>','').strip()
rouge_calc = rouge.compute(predictions = [response], references=[[task]], use_aggregator=True)
sacrebleu_calc = sacrebleu.compute(predictions = [response], references=[[task]])
meteor_calc = meteor.compute(predictions = [response], references=[[task]])
GT_Embedding= embedding_model.encode(task.lower(), convert_to_tensor=True)
Prediction_Embedding = embedding_model.encode(response.lower(), convert_to_tensor=True)
cosine_similarity = util.cos_sim(GT_Embedding, Prediction_Embedding).item()
euclidean_disance = torch.sqrt(torch.sum(torch.pow(torch.subtract(GT_Embedding, Prediction_Embedding), 2))).item()
save_dict["prompt"] = prompt
save_dict["prediction"] = response
save_dict["task"] = task
save_dict["sacrebleu"] = sacrebleu_calc
save_dict["rouge"] = rouge_calc
save_dict["meteor"] = meteor_calc
save_dict["cosine_similarity"] = cosine_similarity
save_dict["euclidean_disance"] = euclidean_disance
with open(save_task_response_filename, 'w') as f:
json.dump(save_dict, f)
global_sacrebleu.append(save_dict["sacrebleu"]["score"])
global_rouge1.append(save_dict["rouge"]["rouge1"])
global_rouge2.append(save_dict["rouge"]["rouge2"])
global_rougeL.append(save_dict["rouge"]["rougeL"])
global_rougeLsum.append(save_dict["rouge"]["rougeLsum"])
global_meteor.append(save_dict["meteor"]["meteor"])
global_cosine.append(save_dict["cosine_similarity"])
global_distance.append(save_dict["euclidean_disance"])
return global_sacrebleu, global_rouge1, global_rouge2, global_rougeL, global_rougeLsum, global_meteor, global_cosine, global_distance
def main(mark):
args = initialize()
assert 'Mind2Web' in args.test_data_dir
tokenizer, model = setup_model_and_tokenizer(args)
sacrebleu = evaluate.load('sacrebleu', modeule_type = "metric")
rouge = evaluate.load('rouge', modeule_type = "metric")
meteor = evaluate.load('meteor', modeule_type = "metric")
embedding_model = SentenceTransformer(args.embedding_model_path, device="cuda")
test_folders_names = ["test_domain", "test_task", "test_website"]
for name in test_folders_names:
test_folder_path = Path(os.path.join(args.test_data_dir,name))
global_sacrebleu, global_rouge1, global_rouge2, global_rougeL, global_rougeLsum, global_meteor, global_cosine, global_distance = [], [], [], [], [], [], [], []
for json_file in test_folder_path.rglob('*_with_steps_insert_mistral.json'):
with json_file.open('r') as f:
data = json.load(f)
raw_tasks = load_raw_dataset(data, args)
sacrebleu_calc, rouge1_calc, rouge2_calc, rougeL_calc, rougeLsum_calc, meteor_calc, cosine_calc, distance_calc = main_loop(args, raw_tasks, tokenizer, model, sacrebleu, rouge, meteor, embedding_model, 'test_%s'%(name))
global_sacrebleu.extend(sacrebleu_calc)
global_rouge1.extend(rouge1_calc)
global_rouge2.extend(rouge2_calc)
global_rougeL.extend(rougeL_calc)
global_rougeLsum.extend(rougeLsum_calc)
global_meteor.extend(meteor_calc)
global_cosine.extend(cosine_calc)
global_distance.extend(distance_calc)
print(mark, name)
print("%.3f" % (np.mean(global_cosine)))
print("%.3f" % (np.mean(global_sacrebleu)/100.0))
print("%.3f" % (np.mean(global_rougeL)))
print("%.3f" % (np.mean(global_meteor)))
if __name__ == "__main__":
main('test')

30
inference/inference.sh Normal file
View file

@ -0,0 +1,30 @@
PROJECT_PATH="your-project-path"
EMBEDDING_MODEL_PATH="${PROJECT_PATH}/sentence-transformer/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/e4ce9877abf3edfe10b0d82785e83bdcb973e22e"
OPTS=""
OPTS+=" --embedding_model_path ${EMBEDDING_MODEL_PATH}"
OPTS+=" --test_data_dir ${PROJECT_PATH}/data/Mind2Web/test"
OPTS+=" --train_data_dir ${PROJECT_PATH}/data/Mind2Web/train/train_with_steps_insert_mistral.json"
OPTS+=" --prompt_file ${PROJECT_PATH}/prompts/summarisation/summarisation_prompt.txt"
MODEL_NAME_OR_PATH_BMT="${PROJECT_PATH}/ckpts/experiment/epoch_14"
MODEL_NAME_OR_PATH_HF="${MODEL_NAME_OR_PATH_BMT}-hf"
MODEL_NAME_OR_PATH_ORIGINAL_MISTRAL="${PROJECT_PATH}/Mistral-7B-v0.1/snapshots/26bca36bde8333b5d7f72e9ed20ccda6a618af24"
# Convert the model to HF format
if [ ! -f "${MODEL_NAME_OR_PATH_HF}/config.json" ]; then
CMD="python3 ${PROJECT_PATH}/hf_bmt/bmt_hf.py --in_path ${MODEL_NAME_OR_PATH_BMT} --output_path ${MODEL_NAME_OR_PATH_HF} --original_mistral_path ${MODEL_NAME_OR_PATH_ORIGINAL_MISTRAL}"
echo "-------BMT -> HF CMD is------"
echo "CMD: ${CMD}"
echo "-------BMT -> HF CMD end------"
eval ${CMD}
fi
OPTS+=" --model_name_or_path ${MODEL_NAME_OR_PATH_HF}"
CMD="python3 inference.py ${OPTS}"
echo "-------final CMD is------"
echo "${CMD}"
echo "-------final CMD end------"
eval ${CMD}