optimized on 2 gpus
This commit is contained in:
parent
5c00be4edd
commit
7b80572f09
BIN
__pycache__/locustfile.cpython-312.pyc
Normal file
BIN
__pycache__/locustfile.cpython-312.pyc
Normal file
Binary file not shown.
Binary file not shown.
@ -8,44 +8,44 @@ from PIL import Image
|
|||||||
from pipeline_setup import pipe, IMAGE_TOKEN
|
from pipeline_setup import pipe, IMAGE_TOKEN
|
||||||
from utils.image_processing import encode_image_base64
|
from utils.image_processing import encode_image_base64
|
||||||
|
|
||||||
# async def image_query(file: UploadFile, question: str = Form(...)):
|
async def image_query(file: UploadFile, question: str = Form(...)):
|
||||||
# """
|
"""
|
||||||
# API endpoint to process an image with the user's query.
|
API endpoint to process an image with the user's query.
|
||||||
# """
|
"""
|
||||||
# try:
|
try:
|
||||||
# if file.content_type not in ["image/jpeg", "image/png"]:
|
if file.content_type not in ["image/jpeg", "image/png"]:
|
||||||
# return JSONResponse({"query": question, "error": "Unsupported file type."})
|
return JSONResponse({"query": question, "error": "Unsupported file type."})
|
||||||
|
|
||||||
# image_data = await file.read()
|
image_data = await file.read()
|
||||||
# image = Image.open(io.BytesIO(image_data)).convert("RGB").resize((512, 512))
|
image = Image.open(io.BytesIO(image_data)).convert("RGB").resize((512, 512))
|
||||||
# encoded_image_base64 = encode_image_base64(image)
|
encoded_image_base64 = encode_image_base64(image)
|
||||||
|
|
||||||
# question_with_image_token = f"{question}\n{IMAGE_TOKEN}"
|
question_with_image_token = f"{question}\n{IMAGE_TOKEN}"
|
||||||
# response = await asyncio.to_thread(pipe, (question, image))
|
response = await asyncio.to_thread(pipe, (question, image))
|
||||||
# return JSONResponse({"query": question, "response": response.text})
|
return JSONResponse({"query": question, "response": response.text})
|
||||||
# except Exception as e:
|
except Exception as e:
|
||||||
# return JSONResponse({"query": question, "error": str(e)})
|
return JSONResponse({"query": question, "error": str(e)})
|
||||||
|
|
||||||
|
|
||||||
# import mimetypes
|
# import mimetypes
|
||||||
async def image_query(image: np.ndarray, question: str):
|
# async def image_query(image: np.ndarray, question: str):
|
||||||
"""
|
# """
|
||||||
API endpoint to process an image (as numpy array) with the user's query.
|
# API endpoint to process an image (as numpy array) with the user's query.
|
||||||
"""
|
# """
|
||||||
try:
|
# try:
|
||||||
# Convert the numpy array to a PIL Image
|
# # Convert the numpy array to a PIL Image
|
||||||
image = Image.fromarray(image).convert("RGB").resize((512, 512))
|
# image = Image.fromarray(image).convert("RGB").resize((512, 512))
|
||||||
|
|
||||||
# Encode the image to base64 (optional, if needed by your pipeline)
|
# # Encode the image to base64 (optional, if needed by your pipeline)
|
||||||
buffered = io.BytesIO()
|
# buffered = io.BytesIO()
|
||||||
image.save(buffered, format="JPEG")
|
# image.save(buffered, format="JPEG")
|
||||||
encoded_image_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
|
# encoded_image_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
|
||||||
|
|
||||||
# Prepare the query with the image token
|
# # Prepare the query with the image token
|
||||||
question_with_image_token = f"{question}\n{IMAGE_TOKEN}"
|
# question_with_image_token = f"{question}\n{IMAGE_TOKEN}"
|
||||||
|
|
||||||
# Query the model
|
# # Query the model
|
||||||
response = await asyncio.to_thread(pipe, (question, image))
|
# response = await asyncio.to_thread(pipe, (question, image))
|
||||||
return {"query": question, "response": response.text}
|
# return {"query": question, "response": response.text}
|
||||||
except Exception as e:
|
# except Exception as e:
|
||||||
return {"query": question, "error": str(e)}
|
# return {"query": question, "error": str(e)}
|
||||||
|
@ -1,24 +1,25 @@
|
|||||||
|
import asyncio
|
||||||
from fastapi import Form
|
from fastapi import Form
|
||||||
from fastapi.responses import JSONResponse
|
from fastapi.responses import JSONResponse
|
||||||
from asyncio import to_thread
|
from asyncio import to_thread
|
||||||
from pipeline_setup import pipe
|
from pipeline_setup import pipe
|
||||||
|
|
||||||
|
async def text_query(question: str = Form(...)):
|
||||||
|
"""
|
||||||
|
API endpoint to process text input with the user's query.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
response = await asyncio.to_thread(pipe, question)
|
||||||
|
return JSONResponse({"query": question, "response": response.text})
|
||||||
|
except Exception as e:
|
||||||
|
return JSONResponse({"query": question, "error": str(e)})
|
||||||
|
|
||||||
# async def text_query(question: str = Form(...)):
|
# async def text_query(question: str = Form(...)):
|
||||||
# """
|
# """
|
||||||
# API endpoint to process text input with the user's query.
|
# API endpoint to process text input with the user's query.
|
||||||
# """
|
# """
|
||||||
# try:
|
# try:
|
||||||
# response = await to_thread(pipe, question)
|
# response = await to_thread(pipe, question)
|
||||||
# return JSONResponse({"query": question, "response": response.text})
|
# return {"query": question, "response": response.text}
|
||||||
# except Exception as e:
|
# except Exception as e:
|
||||||
# return JSONResponse({"query": question, "error": str(e)})
|
# return {"query": question, "error": str(e)}
|
||||||
|
|
||||||
async def text_query(question: str = Form(...)):
|
|
||||||
"""
|
|
||||||
API endpoint to process text input with the user's query.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
response = await to_thread(pipe, question)
|
|
||||||
return {"query": question, "response": response.text}
|
|
||||||
except Exception as e:
|
|
||||||
return {"query": question, "error": str(e)}
|
|
||||||
|
@ -4,140 +4,39 @@ from pipeline_setup import pipe
|
|||||||
from utils.image_processing import encode_image_base64
|
from utils.image_processing import encode_image_base64
|
||||||
from utils.video_processing import split_video_into_segments, extract_motion_key_frames, extract_audio_from_video
|
from utils.video_processing import split_video_into_segments, extract_motion_key_frames, extract_audio_from_video
|
||||||
from utils.audio_transcription import transcribe_audio
|
from utils.audio_transcription import transcribe_audio
|
||||||
|
import time
|
||||||
import asyncio
|
import asyncio
|
||||||
import mimetypes
|
import mimetypes
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
|
||||||
# async def video_query(file: UploadFile, question: str = Form(...)):
|
async def video_query(file: UploadFile, question: str = Form(...)):
|
||||||
# """
|
|
||||||
# API endpoint to process a video file with the user's query.
|
|
||||||
# """
|
|
||||||
# try:
|
|
||||||
# print("Processing video...")
|
|
||||||
|
|
||||||
# # Validate file type
|
|
||||||
# if file.content_type not in ["video/mp4", "video/avi", "video/mkv"]:
|
|
||||||
# return JSONResponse({"query": question, "error": "Unsupported video file type."})
|
|
||||||
|
|
||||||
# # Start overall timer
|
|
||||||
# overall_start_time = time.time()
|
|
||||||
|
|
||||||
# # Save the uploaded video to a temporary file
|
|
||||||
# print("Reading video...")
|
|
||||||
# video_data = await file.read()
|
|
||||||
# temp_video_path = "/tmp/temp_video.mp4"
|
|
||||||
# with open(temp_video_path, "wb") as temp_video_file:
|
|
||||||
# temp_video_file.write(video_data)
|
|
||||||
# print(f"Temp video saved to: {temp_video_path}")
|
|
||||||
|
|
||||||
# # Record the time after reading the video
|
|
||||||
# video_reading_time = time.time()
|
|
||||||
|
|
||||||
# # Split the video into segments
|
|
||||||
# print("Splitting video...")
|
|
||||||
# segments = split_video_into_segments(temp_video_path, segment_duration=30)
|
|
||||||
# print(f"Video split into {len(segments)} segments.")
|
|
||||||
|
|
||||||
# aggregated_responses = []
|
|
||||||
# segment_timings = []
|
|
||||||
|
|
||||||
# for i, segment_path in enumerate(segments):
|
|
||||||
# print(f"Processing segment {i+1}/{len(segments)}: {segment_path}")
|
|
||||||
|
|
||||||
# # Start timing for the segment
|
|
||||||
# segment_start_time = time.time()
|
|
||||||
|
|
||||||
# # Extract key frames
|
|
||||||
# frame_start_time = time.time()
|
|
||||||
# imgs = extract_motion_key_frames(segment_path, max_frames=50, sigma_multiplier=2)
|
|
||||||
# frame_time = time.time()
|
|
||||||
|
|
||||||
# # Extract audio and transcribe
|
|
||||||
# audio_start_time = time.time()
|
|
||||||
# audio_path = extract_audio_from_video(segment_path)
|
|
||||||
# transcribed_text = transcribe_audio(audio_path)
|
|
||||||
# audio_time = time.time()
|
|
||||||
|
|
||||||
# # Combine transcribed text with the query
|
|
||||||
# combined_query = f"Audio Transcript: {transcribed_text}\n{question}"
|
|
||||||
|
|
||||||
# # Prepare content for the pipeline
|
|
||||||
# question_with_frames = ""
|
|
||||||
# for j, img in enumerate(imgs):
|
|
||||||
# question_with_frames += f"Frame{j+1}: {{IMAGE_TOKEN}}\n"
|
|
||||||
# question_with_frames += combined_query
|
|
||||||
|
|
||||||
# content = [{"type": "text", "text": question_with_frames}]
|
|
||||||
# for img in imgs:
|
|
||||||
# content.append({
|
|
||||||
# "type": "image_url",
|
|
||||||
# "image_url": {
|
|
||||||
# "max_dynamic_patch": 1,
|
|
||||||
# "url": f"data:image/jpeg;base64,{encode_image_base64(img)}"
|
|
||||||
# }
|
|
||||||
# })
|
|
||||||
|
|
||||||
# # Query the model
|
|
||||||
# inference_start_time = time.time()
|
|
||||||
# messages = [dict(role="user", content=content)]
|
|
||||||
# response = await asyncio.to_thread(pipe, messages)
|
|
||||||
# inference_time = time.time()
|
|
||||||
|
|
||||||
# # Aggregate response
|
|
||||||
# aggregated_responses.append(response.text)
|
|
||||||
|
|
||||||
# # Calculate timing for the segment
|
|
||||||
# segment_timings.append({
|
|
||||||
# "segment_index": i + 1,
|
|
||||||
# "segment_processing_time": inference_time - segment_start_time,
|
|
||||||
# "frame_extraction_time": frame_time - frame_start_time,
|
|
||||||
# "audio_extraction_time": audio_time - audio_start_time,
|
|
||||||
# "model_inference_time": inference_time - inference_start_time
|
|
||||||
# })
|
|
||||||
|
|
||||||
# print(f"transcription: {transcribed_text}")
|
|
||||||
# # print(f"content: {content}")
|
|
||||||
|
|
||||||
# overall_end_time = time.time()
|
|
||||||
|
|
||||||
# # Aggregate total timings
|
|
||||||
# total_timings = {
|
|
||||||
# "video_reading_time": video_reading_time - overall_start_time,
|
|
||||||
# "total_segments": len(segments),
|
|
||||||
# "total_processing_time": overall_end_time - overall_start_time,
|
|
||||||
# "segment_details": segment_timings
|
|
||||||
# }
|
|
||||||
|
|
||||||
# return JSONResponse({
|
|
||||||
# "question": question,
|
|
||||||
# "responses": aggregated_responses,
|
|
||||||
# "timings": total_timings,
|
|
||||||
# })
|
|
||||||
# except Exception as e:
|
|
||||||
# return JSONResponse({"query": question, "error": str(e)})
|
|
||||||
|
|
||||||
|
|
||||||
async def video_query(video_path: str, question: str):
|
|
||||||
"""
|
"""
|
||||||
API endpoint to process a video file with the user's query.
|
API endpoint to process a video file with the user's query.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
print("Processing video...")
|
print("Processing video...")
|
||||||
|
|
||||||
if not video_path or not isinstance(video_path, str):
|
# Validate file type
|
||||||
return {"query": question, "error": "No video file provided or invalid file input."}
|
if file.content_type not in ["video/mp4", "video/avi", "video/mkv"]:
|
||||||
|
return JSONResponse({"query": question, "error": "Unsupported video file type."})
|
||||||
|
|
||||||
# Determine the file type using the file extension
|
# Start overall timer
|
||||||
file_type, _ = mimetypes.guess_type(video_path)
|
overall_start_time = time.time()
|
||||||
if file_type is None or not file_type.startswith("video/"):
|
|
||||||
return {"query": question, "error": "Unsupported video file type."}
|
|
||||||
|
|
||||||
# Log the video path
|
# Save the uploaded video to a temporary file
|
||||||
print(f"Video path: {video_path}")
|
print("Reading video...")
|
||||||
|
video_data = await file.read()
|
||||||
|
temp_video_path = "/tmp/temp_video.mp4"
|
||||||
|
with open(temp_video_path, "wb") as temp_video_file:
|
||||||
|
temp_video_file.write(video_data)
|
||||||
|
print(f"Temp video saved to: {temp_video_path}")
|
||||||
|
|
||||||
|
# Record the time after reading the video
|
||||||
|
video_reading_time = time.time()
|
||||||
|
|
||||||
# Split the video into segments
|
# Split the video into segments
|
||||||
print("Splitting video...")
|
print("Splitting video...")
|
||||||
segments = split_video_into_segments(video_path, segment_duration=30)
|
segments = split_video_into_segments(temp_video_path, segment_duration=30)
|
||||||
print(f"Video split into {len(segments)} segments.")
|
print(f"Video split into {len(segments)} segments.")
|
||||||
|
|
||||||
aggregated_responses = []
|
aggregated_responses = []
|
||||||
@ -146,12 +45,19 @@ async def video_query(video_path: str, question: str):
|
|||||||
for i, segment_path in enumerate(segments):
|
for i, segment_path in enumerate(segments):
|
||||||
print(f"Processing segment {i+1}/{len(segments)}: {segment_path}")
|
print(f"Processing segment {i+1}/{len(segments)}: {segment_path}")
|
||||||
|
|
||||||
|
# Start timing for the segment
|
||||||
|
segment_start_time = time.time()
|
||||||
|
|
||||||
# Extract key frames
|
# Extract key frames
|
||||||
|
frame_start_time = time.time()
|
||||||
imgs = extract_motion_key_frames(segment_path, max_frames=50, sigma_multiplier=2)
|
imgs = extract_motion_key_frames(segment_path, max_frames=50, sigma_multiplier=2)
|
||||||
|
frame_time = time.time()
|
||||||
|
|
||||||
# Extract audio and transcribe
|
# Extract audio and transcribe
|
||||||
|
audio_start_time = time.time()
|
||||||
audio_path = extract_audio_from_video(segment_path)
|
audio_path = extract_audio_from_video(segment_path)
|
||||||
transcribed_text = transcribe_audio(audio_path)
|
transcribed_text = transcribe_audio(audio_path)
|
||||||
|
audio_time = time.time()
|
||||||
|
|
||||||
# Combine transcribed text with the query
|
# Combine transcribed text with the query
|
||||||
combined_query = f"Audio Transcript: {transcribed_text}\n{question}"
|
combined_query = f"Audio Transcript: {transcribed_text}\n{question}"
|
||||||
@ -173,15 +79,110 @@ async def video_query(video_path: str, question: str):
|
|||||||
})
|
})
|
||||||
|
|
||||||
# Query the model
|
# Query the model
|
||||||
|
inference_start_time = time.time()
|
||||||
messages = [dict(role="user", content=content)]
|
messages = [dict(role="user", content=content)]
|
||||||
response = await asyncio.to_thread(pipe, messages)
|
response = await asyncio.to_thread(pipe, messages)
|
||||||
|
inference_time = time.time()
|
||||||
|
|
||||||
# Aggregate response
|
# Aggregate response
|
||||||
aggregated_responses.append(response.text)
|
aggregated_responses.append(response.text)
|
||||||
|
|
||||||
return {
|
# Calculate timing for the segment
|
||||||
|
segment_timings.append({
|
||||||
|
"segment_index": i + 1,
|
||||||
|
"segment_processing_time": inference_time - segment_start_time,
|
||||||
|
"frame_extraction_time": frame_time - frame_start_time,
|
||||||
|
"audio_extraction_time": audio_time - audio_start_time,
|
||||||
|
"model_inference_time": inference_time - inference_start_time
|
||||||
|
})
|
||||||
|
|
||||||
|
print(f"transcription: {transcribed_text}")
|
||||||
|
# print(f"content: {content}")
|
||||||
|
|
||||||
|
overall_end_time = time.time()
|
||||||
|
|
||||||
|
# Aggregate total timings
|
||||||
|
total_timings = {
|
||||||
|
"video_reading_time": video_reading_time - overall_start_time,
|
||||||
|
"total_segments": len(segments),
|
||||||
|
"total_processing_time": overall_end_time - overall_start_time,
|
||||||
|
"segment_details": segment_timings
|
||||||
|
}
|
||||||
|
|
||||||
|
return JSONResponse({
|
||||||
"question": question,
|
"question": question,
|
||||||
"responses": aggregated_responses,
|
"responses": aggregated_responses,
|
||||||
}
|
"timings": total_timings,
|
||||||
|
})
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return {"query": question, "error": str(e)}
|
return JSONResponse({"query": question, "error": str(e)})
|
||||||
|
|
||||||
|
|
||||||
|
# async def video_query(video_path: str, question: str):
|
||||||
|
# """
|
||||||
|
# API endpoint to process a video file with the user's query.
|
||||||
|
# """
|
||||||
|
# try:
|
||||||
|
# print("Processing video...")
|
||||||
|
|
||||||
|
# if not video_path or not isinstance(video_path, str):
|
||||||
|
# return {"query": question, "error": "No video file provided or invalid file input."}
|
||||||
|
|
||||||
|
# # Determine the file type using the file extension
|
||||||
|
# file_type, _ = mimetypes.guess_type(video_path)
|
||||||
|
# if file_type is None or not file_type.startswith("video/"):
|
||||||
|
# return {"query": question, "error": "Unsupported video file type."}
|
||||||
|
|
||||||
|
# # Log the video path
|
||||||
|
# print(f"Video path: {video_path}")
|
||||||
|
|
||||||
|
# # Split the video into segments
|
||||||
|
# print("Splitting video...")
|
||||||
|
# segments = split_video_into_segments(video_path, segment_duration=30)
|
||||||
|
# print(f"Video split into {len(segments)} segments.")
|
||||||
|
|
||||||
|
# aggregated_responses = []
|
||||||
|
# segment_timings = []
|
||||||
|
|
||||||
|
# for i, segment_path in enumerate(segments):
|
||||||
|
# print(f"Processing segment {i+1}/{len(segments)}: {segment_path}")
|
||||||
|
|
||||||
|
# # Extract key frames
|
||||||
|
# imgs = extract_motion_key_frames(segment_path, max_frames=50, sigma_multiplier=2)
|
||||||
|
|
||||||
|
# # Extract audio and transcribe
|
||||||
|
# audio_path = extract_audio_from_video(segment_path)
|
||||||
|
# transcribed_text = transcribe_audio(audio_path)
|
||||||
|
|
||||||
|
# # Combine transcribed text with the query
|
||||||
|
# combined_query = f"Audio Transcript: {transcribed_text}\n{question}"
|
||||||
|
|
||||||
|
# # Prepare content for the pipeline
|
||||||
|
# question_with_frames = ""
|
||||||
|
# for j, img in enumerate(imgs):
|
||||||
|
# question_with_frames += f"Frame{j+1}: {{IMAGE_TOKEN}}\n"
|
||||||
|
# question_with_frames += combined_query
|
||||||
|
|
||||||
|
# content = [{"type": "text", "text": question_with_frames}]
|
||||||
|
# for img in imgs:
|
||||||
|
# content.append({
|
||||||
|
# "type": "image_url",
|
||||||
|
# "image_url": {
|
||||||
|
# "max_dynamic_patch": 1,
|
||||||
|
# "url": f"data:image/jpeg;base64,{encode_image_base64(img)}"
|
||||||
|
# }
|
||||||
|
# })
|
||||||
|
|
||||||
|
# # Query the model
|
||||||
|
# messages = [dict(role="user", content=content)]
|
||||||
|
# response = await asyncio.to_thread(pipe, messages)
|
||||||
|
|
||||||
|
# # Aggregate response
|
||||||
|
# aggregated_responses.append(response.text)
|
||||||
|
|
||||||
|
# return {
|
||||||
|
# "question": question,
|
||||||
|
# "responses": aggregated_responses,
|
||||||
|
# }
|
||||||
|
# except Exception as e:
|
||||||
|
# return {"query": question, "error": str(e)}
|
||||||
|
18
locustfile.py
Normal file
18
locustfile.py
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
from locust import HttpUser, task, between
|
||||||
|
|
||||||
|
class MultiModalUser(HttpUser):
|
||||||
|
wait_time = between(1, 3)
|
||||||
|
|
||||||
|
@task
|
||||||
|
def text_query(self):
|
||||||
|
self.client.post("/api/predict", json={"input": "What is the capital of France?"})
|
||||||
|
|
||||||
|
# @task
|
||||||
|
# def image_query(self):
|
||||||
|
# with open("test_image.jpg", "rb") as f:
|
||||||
|
# self.client.post("/api/image_predict", files={"file": f}, data={"question": "What is in this image?"})
|
||||||
|
|
||||||
|
# @task
|
||||||
|
# def video_query(self):
|
||||||
|
# with open("test_video.mp4", "rb") as f:
|
||||||
|
# self.client.post("/api/video_predict", files={"file": f}, data={"question": "What is happening in this video?"})
|
2
main.py
2
main.py
@ -12,7 +12,7 @@ app.post("/api/video")(video_query)
|
|||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import uvicorn
|
import uvicorn
|
||||||
uvicorn.run("main:app", host="0.0.0.0", port=8080, reload=True)
|
uvicorn.run("main:app", host="0.0.0.0", port=8002, reload=True)
|
||||||
|
|
||||||
# python main.py
|
# python main.py
|
||||||
# uvicorn main:app --reload
|
# uvicorn main:app --reload
|
22
multi-user.py
Normal file
22
multi-user.py
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
import requests
|
||||||
|
import threading
|
||||||
|
|
||||||
|
def send_request(user_id):
|
||||||
|
url = "http://localhost:8002"
|
||||||
|
data = {"input": f"Test input from user {user_id}"}
|
||||||
|
response = requests.post(url, json=data)
|
||||||
|
print(f"User {user_id} response: {response.text}")
|
||||||
|
|
||||||
|
def main(num_users):
|
||||||
|
threads = []
|
||||||
|
for i in range(num_users):
|
||||||
|
thread = threading.Thread(target=send_request, args=(i,))
|
||||||
|
threads.append(thread)
|
||||||
|
thread.start()
|
||||||
|
|
||||||
|
for thread in threads:
|
||||||
|
thread.join()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
num_users = 2
|
||||||
|
main(num_users)
|
@ -1,21 +1,24 @@
|
|||||||
|
import os
|
||||||
from lmdeploy import pipeline, TurbomindEngineConfig, GenerationConfig
|
from lmdeploy import pipeline, TurbomindEngineConfig, GenerationConfig
|
||||||
|
|
||||||
# Constants
|
|
||||||
IMAGE_TOKEN = "[IMAGE_TOKEN]"
|
IMAGE_TOKEN = "[IMAGE_TOKEN]"
|
||||||
|
|
||||||
|
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
|
||||||
|
|
||||||
# Model initialization
|
# Model initialization
|
||||||
model = "OpenGVLab/InternVL2-26B-AWQ"
|
model = "OpenGVLab/InternVL2-26B-AWQ"
|
||||||
pipe = pipeline(
|
pipe = pipeline(
|
||||||
model,
|
model,
|
||||||
backend_config=TurbomindEngineConfig(
|
backend_config=TurbomindEngineConfig(
|
||||||
model_format="awq",
|
model_format="awq",
|
||||||
# tp=2,
|
tp=2,
|
||||||
tp=4,
|
# tp=4,
|
||||||
# device_ids=[0, 1],
|
session_len=2048, # 4096, 8192, 16384, 32768
|
||||||
session_len=12864,
|
|
||||||
max_batch_size=1,
|
max_batch_size=1,
|
||||||
cache_max_entry_count=0.05,
|
cache_max_entry_count=0.1, # 0.05
|
||||||
cache_block_seq_len=32768,
|
cache_block_seq_len=4096, # 8192, 16384, 32768
|
||||||
quant_policy=4
|
quant_policy=4,
|
||||||
)
|
# precision="fp16",
|
||||||
|
),
|
||||||
|
# log_level='DEBUG'
|
||||||
)
|
)
|
3
ui.py
3
ui.py
@ -1,4 +1,3 @@
|
|||||||
import os
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import gradio as gr
|
import gradio as gr
|
||||||
from gradio_image_prompter import ImagePrompter
|
from gradio_image_prompter import ImagePrompter
|
||||||
@ -6,8 +5,6 @@ from endpoints.text import text_query
|
|||||||
from endpoints.image import image_query
|
from endpoints.image import image_query
|
||||||
from endpoints.video import video_query
|
from endpoints.video import video_query
|
||||||
|
|
||||||
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
|
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
print("Available GPUs:", torch.cuda.device_count())
|
print("Available GPUs:", torch.cuda.device_count())
|
||||||
print("Visible Devices:", [torch.cuda.get_device_name(i) for i in range(torch.cuda.device_count())])
|
print("Visible Devices:", [torch.cuda.get_device_name(i) for i in range(torch.cuda.device_count())])
|
||||||
|
Loading…
Reference in New Issue
Block a user