103 lines
3.0 KiB
Python
103 lines
3.0 KiB
Python
import os
|
||
os.environ["CUDA_VISIBLE_DEVICES"] = "3"
|
||
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
||
|
||
from PIL import Image
|
||
import requests
|
||
import torch
|
||
from torchvision import io
|
||
from typing import Dict
|
||
from transformers import Qwen2_5_VLForConditionalGeneration, AutoTokenizer, AutoProcessor
|
||
import pickle
|
||
import re
|
||
from tqdm import tqdm
|
||
from peft import PeftModel
|
||
|
||
# Load the model in half-precision on the available device(s)
|
||
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
||
"Qwen2.5-VL-3B", torch_dtype=torch.bfloat16, device_map="auto", attn_implementation="flash_attention_2"
|
||
)
|
||
|
||
# 加载LoRA权重,添加load_in_8bit=False和device_map参数
|
||
# model = PeftModel.from_pretrained(
|
||
# model,
|
||
# "hybrid_train_output/checkpoint-100",
|
||
# load_in_8bit=False,
|
||
# device_map="auto",
|
||
# is_trainable=False,
|
||
# assign = True
|
||
# )
|
||
|
||
# 确保模型处于评估模式
|
||
model.eval()
|
||
|
||
processor = AutoProcessor.from_pretrained("Qwen2.5-VL-3B")
|
||
|
||
# Image
|
||
# image = Image.open("Tesla.jpg")
|
||
|
||
# 定义提示文本
|
||
text_prompt = (
|
||
"<|im_start|>system\n"
|
||
"You are a helpful assistant.<|im_end|>\n"
|
||
"<|im_start|>user\n"
|
||
"<|vision_start|><|image_pad|><|vision_end|>"
|
||
"Please tell me the brand of the product in the picture between labels <answer/> and </answer> "
|
||
"and explain the reason between labels <thinking/> and </thinking>"
|
||
"<|im_end|>\n"
|
||
"<|im_start|>assistant"
|
||
)
|
||
|
||
# 加载测试数据
|
||
with open("../work/bal_data/test_data.pkl", "rb") as f:
|
||
test_data = pickle.load(f)
|
||
|
||
# 批处理大小
|
||
batch_size = 20
|
||
correct = 0
|
||
total = 0
|
||
|
||
# 遍历测试数据
|
||
for i in tqdm(range(0, len(test_data), batch_size)):
|
||
# 准备当前批次的数据
|
||
batch = test_data[i:i+batch_size]
|
||
batch_images = [item['image'] for item in batch]
|
||
batch_brands = [item['brand'] for item in batch]
|
||
batch_prompts = [text_prompt] * len(batch_images)
|
||
|
||
# 模型处理
|
||
inputs = processor(
|
||
text=batch_prompts,
|
||
images=batch_images,
|
||
padding=True,
|
||
return_tensors="pt"
|
||
)
|
||
inputs = inputs.to("cuda")
|
||
|
||
# 生成输出
|
||
output_ids = model.generate(**inputs, max_new_tokens=128)
|
||
generated_ids = [
|
||
output_ids[len(input_ids):]
|
||
for input_ids, output_ids in zip(inputs.input_ids, output_ids)
|
||
]
|
||
output_texts = processor.batch_decode(
|
||
generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True
|
||
)
|
||
|
||
# 提取预测的品牌名称并比较
|
||
for pred_text, true_brand in zip(output_texts, batch_brands):
|
||
# 使用正则表达式提取<answer>标签中的内容
|
||
match = re.search(r'<answer>(.*?)</answer>', pred_text)
|
||
if match:
|
||
pred_brand = match.group(1).strip().lower()
|
||
true_brand = true_brand.lower()
|
||
|
||
# 比较预测结果
|
||
if pred_brand == true_brand:
|
||
correct += 1
|
||
total += 1
|
||
|
||
# 计算并输出准确率
|
||
accuracy = correct / total if total > 0 else 0
|
||
print(f"准确率: {accuracy:.2%} ({correct}/{total})")
|