optimized on 2 gpus

This commit is contained in:
Zixiao Wang 2025-01-26 20:42:56 +08:00
parent 5c00be4edd
commit 7b80572f09
10 changed files with 222 additions and 180 deletions

Binary file not shown.

View File

@ -8,44 +8,44 @@ from PIL import Image
from pipeline_setup import pipe, IMAGE_TOKEN from pipeline_setup import pipe, IMAGE_TOKEN
from utils.image_processing import encode_image_base64 from utils.image_processing import encode_image_base64
# async def image_query(file: UploadFile, question: str = Form(...)): async def image_query(file: UploadFile, question: str = Form(...)):
# """ """
# API endpoint to process an image with the user's query. API endpoint to process an image with the user's query.
# """ """
# try: try:
# if file.content_type not in ["image/jpeg", "image/png"]: if file.content_type not in ["image/jpeg", "image/png"]:
# return JSONResponse({"query": question, "error": "Unsupported file type."}) return JSONResponse({"query": question, "error": "Unsupported file type."})
# image_data = await file.read() image_data = await file.read()
# image = Image.open(io.BytesIO(image_data)).convert("RGB").resize((512, 512)) image = Image.open(io.BytesIO(image_data)).convert("RGB").resize((512, 512))
# encoded_image_base64 = encode_image_base64(image) encoded_image_base64 = encode_image_base64(image)
# question_with_image_token = f"{question}\n{IMAGE_TOKEN}" question_with_image_token = f"{question}\n{IMAGE_TOKEN}"
# response = await asyncio.to_thread(pipe, (question, image)) response = await asyncio.to_thread(pipe, (question, image))
# return JSONResponse({"query": question, "response": response.text}) return JSONResponse({"query": question, "response": response.text})
# except Exception as e: except Exception as e:
# return JSONResponse({"query": question, "error": str(e)}) return JSONResponse({"query": question, "error": str(e)})
# import mimetypes # import mimetypes
async def image_query(image: np.ndarray, question: str): # async def image_query(image: np.ndarray, question: str):
""" # """
API endpoint to process an image (as numpy array) with the user's query. # API endpoint to process an image (as numpy array) with the user's query.
""" # """
try: # try:
# Convert the numpy array to a PIL Image # # Convert the numpy array to a PIL Image
image = Image.fromarray(image).convert("RGB").resize((512, 512)) # image = Image.fromarray(image).convert("RGB").resize((512, 512))
# Encode the image to base64 (optional, if needed by your pipeline) # # Encode the image to base64 (optional, if needed by your pipeline)
buffered = io.BytesIO() # buffered = io.BytesIO()
image.save(buffered, format="JPEG") # image.save(buffered, format="JPEG")
encoded_image_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8") # encoded_image_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
# Prepare the query with the image token # # Prepare the query with the image token
question_with_image_token = f"{question}\n{IMAGE_TOKEN}" # question_with_image_token = f"{question}\n{IMAGE_TOKEN}"
# Query the model # # Query the model
response = await asyncio.to_thread(pipe, (question, image)) # response = await asyncio.to_thread(pipe, (question, image))
return {"query": question, "response": response.text} # return {"query": question, "response": response.text}
except Exception as e: # except Exception as e:
return {"query": question, "error": str(e)} # return {"query": question, "error": str(e)}

View File

@ -1,24 +1,25 @@
import asyncio
from fastapi import Form from fastapi import Form
from fastapi.responses import JSONResponse from fastapi.responses import JSONResponse
from asyncio import to_thread from asyncio import to_thread
from pipeline_setup import pipe from pipeline_setup import pipe
async def text_query(question: str = Form(...)):
"""
API endpoint to process text input with the user's query.
"""
try:
response = await asyncio.to_thread(pipe, question)
return JSONResponse({"query": question, "response": response.text})
except Exception as e:
return JSONResponse({"query": question, "error": str(e)})
# async def text_query(question: str = Form(...)): # async def text_query(question: str = Form(...)):
# """ # """
# API endpoint to process text input with the user's query. # API endpoint to process text input with the user's query.
# """ # """
# try: # try:
# response = await to_thread(pipe, question) # response = await to_thread(pipe, question)
# return JSONResponse({"query": question, "response": response.text}) # return {"query": question, "response": response.text}
# except Exception as e: # except Exception as e:
# return JSONResponse({"query": question, "error": str(e)}) # return {"query": question, "error": str(e)}
async def text_query(question: str = Form(...)):
"""
API endpoint to process text input with the user's query.
"""
try:
response = await to_thread(pipe, question)
return {"query": question, "response": response.text}
except Exception as e:
return {"query": question, "error": str(e)}

View File

@ -4,140 +4,39 @@ from pipeline_setup import pipe
from utils.image_processing import encode_image_base64 from utils.image_processing import encode_image_base64
from utils.video_processing import split_video_into_segments, extract_motion_key_frames, extract_audio_from_video from utils.video_processing import split_video_into_segments, extract_motion_key_frames, extract_audio_from_video
from utils.audio_transcription import transcribe_audio from utils.audio_transcription import transcribe_audio
import time
import asyncio import asyncio
import mimetypes import mimetypes
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
# async def video_query(file: UploadFile, question: str = Form(...)): async def video_query(file: UploadFile, question: str = Form(...)):
# """
# API endpoint to process a video file with the user's query.
# """
# try:
# print("Processing video...")
# # Validate file type
# if file.content_type not in ["video/mp4", "video/avi", "video/mkv"]:
# return JSONResponse({"query": question, "error": "Unsupported video file type."})
# # Start overall timer
# overall_start_time = time.time()
# # Save the uploaded video to a temporary file
# print("Reading video...")
# video_data = await file.read()
# temp_video_path = "/tmp/temp_video.mp4"
# with open(temp_video_path, "wb") as temp_video_file:
# temp_video_file.write(video_data)
# print(f"Temp video saved to: {temp_video_path}")
# # Record the time after reading the video
# video_reading_time = time.time()
# # Split the video into segments
# print("Splitting video...")
# segments = split_video_into_segments(temp_video_path, segment_duration=30)
# print(f"Video split into {len(segments)} segments.")
# aggregated_responses = []
# segment_timings = []
# for i, segment_path in enumerate(segments):
# print(f"Processing segment {i+1}/{len(segments)}: {segment_path}")
# # Start timing for the segment
# segment_start_time = time.time()
# # Extract key frames
# frame_start_time = time.time()
# imgs = extract_motion_key_frames(segment_path, max_frames=50, sigma_multiplier=2)
# frame_time = time.time()
# # Extract audio and transcribe
# audio_start_time = time.time()
# audio_path = extract_audio_from_video(segment_path)
# transcribed_text = transcribe_audio(audio_path)
# audio_time = time.time()
# # Combine transcribed text with the query
# combined_query = f"Audio Transcript: {transcribed_text}\n{question}"
# # Prepare content for the pipeline
# question_with_frames = ""
# for j, img in enumerate(imgs):
# question_with_frames += f"Frame{j+1}: {{IMAGE_TOKEN}}\n"
# question_with_frames += combined_query
# content = [{"type": "text", "text": question_with_frames}]
# for img in imgs:
# content.append({
# "type": "image_url",
# "image_url": {
# "max_dynamic_patch": 1,
# "url": f"data:image/jpeg;base64,{encode_image_base64(img)}"
# }
# })
# # Query the model
# inference_start_time = time.time()
# messages = [dict(role="user", content=content)]
# response = await asyncio.to_thread(pipe, messages)
# inference_time = time.time()
# # Aggregate response
# aggregated_responses.append(response.text)
# # Calculate timing for the segment
# segment_timings.append({
# "segment_index": i + 1,
# "segment_processing_time": inference_time - segment_start_time,
# "frame_extraction_time": frame_time - frame_start_time,
# "audio_extraction_time": audio_time - audio_start_time,
# "model_inference_time": inference_time - inference_start_time
# })
# print(f"transcription: {transcribed_text}")
# # print(f"content: {content}")
# overall_end_time = time.time()
# # Aggregate total timings
# total_timings = {
# "video_reading_time": video_reading_time - overall_start_time,
# "total_segments": len(segments),
# "total_processing_time": overall_end_time - overall_start_time,
# "segment_details": segment_timings
# }
# return JSONResponse({
# "question": question,
# "responses": aggregated_responses,
# "timings": total_timings,
# })
# except Exception as e:
# return JSONResponse({"query": question, "error": str(e)})
async def video_query(video_path: str, question: str):
""" """
API endpoint to process a video file with the user's query. API endpoint to process a video file with the user's query.
""" """
try: try:
print("Processing video...") print("Processing video...")
if not video_path or not isinstance(video_path, str): # Validate file type
return {"query": question, "error": "No video file provided or invalid file input."} if file.content_type not in ["video/mp4", "video/avi", "video/mkv"]:
return JSONResponse({"query": question, "error": "Unsupported video file type."})
# Determine the file type using the file extension # Start overall timer
file_type, _ = mimetypes.guess_type(video_path) overall_start_time = time.time()
if file_type is None or not file_type.startswith("video/"):
return {"query": question, "error": "Unsupported video file type."}
# Log the video path # Save the uploaded video to a temporary file
print(f"Video path: {video_path}") print("Reading video...")
video_data = await file.read()
temp_video_path = "/tmp/temp_video.mp4"
with open(temp_video_path, "wb") as temp_video_file:
temp_video_file.write(video_data)
print(f"Temp video saved to: {temp_video_path}")
# Record the time after reading the video
video_reading_time = time.time()
# Split the video into segments # Split the video into segments
print("Splitting video...") print("Splitting video...")
segments = split_video_into_segments(video_path, segment_duration=30) segments = split_video_into_segments(temp_video_path, segment_duration=30)
print(f"Video split into {len(segments)} segments.") print(f"Video split into {len(segments)} segments.")
aggregated_responses = [] aggregated_responses = []
@ -146,12 +45,19 @@ async def video_query(video_path: str, question: str):
for i, segment_path in enumerate(segments): for i, segment_path in enumerate(segments):
print(f"Processing segment {i+1}/{len(segments)}: {segment_path}") print(f"Processing segment {i+1}/{len(segments)}: {segment_path}")
# Start timing for the segment
segment_start_time = time.time()
# Extract key frames # Extract key frames
frame_start_time = time.time()
imgs = extract_motion_key_frames(segment_path, max_frames=50, sigma_multiplier=2) imgs = extract_motion_key_frames(segment_path, max_frames=50, sigma_multiplier=2)
frame_time = time.time()
# Extract audio and transcribe # Extract audio and transcribe
audio_start_time = time.time()
audio_path = extract_audio_from_video(segment_path) audio_path = extract_audio_from_video(segment_path)
transcribed_text = transcribe_audio(audio_path) transcribed_text = transcribe_audio(audio_path)
audio_time = time.time()
# Combine transcribed text with the query # Combine transcribed text with the query
combined_query = f"Audio Transcript: {transcribed_text}\n{question}" combined_query = f"Audio Transcript: {transcribed_text}\n{question}"
@ -173,15 +79,110 @@ async def video_query(video_path: str, question: str):
}) })
# Query the model # Query the model
inference_start_time = time.time()
messages = [dict(role="user", content=content)] messages = [dict(role="user", content=content)]
response = await asyncio.to_thread(pipe, messages) response = await asyncio.to_thread(pipe, messages)
inference_time = time.time()
# Aggregate response # Aggregate response
aggregated_responses.append(response.text) aggregated_responses.append(response.text)
return { # Calculate timing for the segment
segment_timings.append({
"segment_index": i + 1,
"segment_processing_time": inference_time - segment_start_time,
"frame_extraction_time": frame_time - frame_start_time,
"audio_extraction_time": audio_time - audio_start_time,
"model_inference_time": inference_time - inference_start_time
})
print(f"transcription: {transcribed_text}")
# print(f"content: {content}")
overall_end_time = time.time()
# Aggregate total timings
total_timings = {
"video_reading_time": video_reading_time - overall_start_time,
"total_segments": len(segments),
"total_processing_time": overall_end_time - overall_start_time,
"segment_details": segment_timings
}
return JSONResponse({
"question": question, "question": question,
"responses": aggregated_responses, "responses": aggregated_responses,
} "timings": total_timings,
})
except Exception as e: except Exception as e:
return {"query": question, "error": str(e)} return JSONResponse({"query": question, "error": str(e)})
# async def video_query(video_path: str, question: str):
# """
# API endpoint to process a video file with the user's query.
# """
# try:
# print("Processing video...")
# if not video_path or not isinstance(video_path, str):
# return {"query": question, "error": "No video file provided or invalid file input."}
# # Determine the file type using the file extension
# file_type, _ = mimetypes.guess_type(video_path)
# if file_type is None or not file_type.startswith("video/"):
# return {"query": question, "error": "Unsupported video file type."}
# # Log the video path
# print(f"Video path: {video_path}")
# # Split the video into segments
# print("Splitting video...")
# segments = split_video_into_segments(video_path, segment_duration=30)
# print(f"Video split into {len(segments)} segments.")
# aggregated_responses = []
# segment_timings = []
# for i, segment_path in enumerate(segments):
# print(f"Processing segment {i+1}/{len(segments)}: {segment_path}")
# # Extract key frames
# imgs = extract_motion_key_frames(segment_path, max_frames=50, sigma_multiplier=2)
# # Extract audio and transcribe
# audio_path = extract_audio_from_video(segment_path)
# transcribed_text = transcribe_audio(audio_path)
# # Combine transcribed text with the query
# combined_query = f"Audio Transcript: {transcribed_text}\n{question}"
# # Prepare content for the pipeline
# question_with_frames = ""
# for j, img in enumerate(imgs):
# question_with_frames += f"Frame{j+1}: {{IMAGE_TOKEN}}\n"
# question_with_frames += combined_query
# content = [{"type": "text", "text": question_with_frames}]
# for img in imgs:
# content.append({
# "type": "image_url",
# "image_url": {
# "max_dynamic_patch": 1,
# "url": f"data:image/jpeg;base64,{encode_image_base64(img)}"
# }
# })
# # Query the model
# messages = [dict(role="user", content=content)]
# response = await asyncio.to_thread(pipe, messages)
# # Aggregate response
# aggregated_responses.append(response.text)
# return {
# "question": question,
# "responses": aggregated_responses,
# }
# except Exception as e:
# return {"query": question, "error": str(e)}

18
locustfile.py Normal file
View File

@ -0,0 +1,18 @@
from locust import HttpUser, task, between
class MultiModalUser(HttpUser):
wait_time = between(1, 3)
@task
def text_query(self):
self.client.post("/api/predict", json={"input": "What is the capital of France?"})
# @task
# def image_query(self):
# with open("test_image.jpg", "rb") as f:
# self.client.post("/api/image_predict", files={"file": f}, data={"question": "What is in this image?"})
# @task
# def video_query(self):
# with open("test_video.mp4", "rb") as f:
# self.client.post("/api/video_predict", files={"file": f}, data={"question": "What is happening in this video?"})

View File

@ -12,7 +12,7 @@ app.post("/api/video")(video_query)
if __name__ == "__main__": if __name__ == "__main__":
import uvicorn import uvicorn
uvicorn.run("main:app", host="0.0.0.0", port=8080, reload=True) uvicorn.run("main:app", host="0.0.0.0", port=8002, reload=True)
# python main.py # python main.py
# uvicorn main:app --reload # uvicorn main:app --reload

22
multi-user.py Normal file
View File

@ -0,0 +1,22 @@
import requests
import threading
def send_request(user_id):
url = "http://localhost:8002"
data = {"input": f"Test input from user {user_id}"}
response = requests.post(url, json=data)
print(f"User {user_id} response: {response.text}")
def main(num_users):
threads = []
for i in range(num_users):
thread = threading.Thread(target=send_request, args=(i,))
threads.append(thread)
thread.start()
for thread in threads:
thread.join()
if __name__ == "__main__":
num_users = 2
main(num_users)

View File

@ -1,21 +1,24 @@
import os
from lmdeploy import pipeline, TurbomindEngineConfig, GenerationConfig from lmdeploy import pipeline, TurbomindEngineConfig, GenerationConfig
# Constants
IMAGE_TOKEN = "[IMAGE_TOKEN]" IMAGE_TOKEN = "[IMAGE_TOKEN]"
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
# Model initialization # Model initialization
model = "OpenGVLab/InternVL2-26B-AWQ" model = "OpenGVLab/InternVL2-26B-AWQ"
pipe = pipeline( pipe = pipeline(
model, model,
backend_config=TurbomindEngineConfig( backend_config=TurbomindEngineConfig(
model_format="awq", model_format="awq",
# tp=2, tp=2,
tp=4, # tp=4,
# device_ids=[0, 1], session_len=2048, # 4096, 8192, 16384, 32768
session_len=12864,
max_batch_size=1, max_batch_size=1,
cache_max_entry_count=0.05, cache_max_entry_count=0.1, # 0.05
cache_block_seq_len=32768, cache_block_seq_len=4096, # 8192, 16384, 32768
quant_policy=4 quant_policy=4,
) # precision="fp16",
),
# log_level='DEBUG'
) )

3
ui.py
View File

@ -1,4 +1,3 @@
import os
import asyncio import asyncio
import gradio as gr import gradio as gr
from gradio_image_prompter import ImagePrompter from gradio_image_prompter import ImagePrompter
@ -6,8 +5,6 @@ from endpoints.text import text_query
from endpoints.image import image_query from endpoints.image import image_query
from endpoints.video import video_query from endpoints.video import video_query
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
import torch import torch
print("Available GPUs:", torch.cuda.device_count()) print("Available GPUs:", torch.cuda.device_count())
print("Visible Devices:", [torch.cuda.get_device_name(i) for i in range(torch.cuda.device_count())]) print("Visible Devices:", [torch.cuda.get_device_name(i) for i in range(torch.cuda.device_count())])