updated celery cpu gpu optimization

This commit is contained in:
Zixiao Wang 2025-03-22 20:54:10 +08:00
parent 2c51c14bc6
commit 546609a5b2
31 changed files with 2937 additions and 388 deletions

4
.gitignore vendored
View File

@ -13,4 +13,6 @@
__pycache__/ __pycache__/
endpoints/__pycache__/ endpoints/__pycache__/
flagged/ flagged/
*.dat

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -1,25 +0,0 @@
# from celery import Celery
# celery_app = Celery(
# "tasks",
# broker="redis://localhost:6379/0", # Redis as broker
# backend="redis://localhost:6379/0", # Redis for storing results
# )
# celery_app.conf.task_routes = {
# "tasks.*": {"queue": "default"},
# }
from celery import Celery
celery_app = Celery(
"tasks",
broker="redis://localhost:6379/0",
backend="redis://localhost:6379/0",
include=["tasks"] # ✅ Prevents import issues
)
celery_app.conf.task_routes = {
"tasks.*": {"queue": "default"},
}

214
celery_debug.py Normal file
View File

@ -0,0 +1,214 @@
import os
import time
import random
from tasks import app
print(app.control.ping())
print(app.conf.result_backend)
print(app.conf.broker_url)
# task = celery.send_task("tasks.text_query_task", args=["What is string?"])
# from celery.result import AsyncResult
# result = AsyncResult(task.id)
# print(result.get(timeout=1000)) # Should return "Celery is working!"
# print(celery.tasks)
# print(result.state) # Possible states: PENDING, STARTED, SUCCESS, FAILURE
# print(result.get()) # Get result if completed
# # Check status
# result = AsyncResult(task.id)
# print(result.status)
# print(result.result) # If it failed, this will contain an error message
from celery import chain
from tasks import text_query_task, image_query_task, preprocess_video, inference_video
from tasks import text_query_task, image_query_task, video_query_task
from celery.result import AsyncResult
from concurrent.futures import ThreadPoolExecutor
import time
# Check Celery connectivity
print("Celery Ping Response:", app.control.ping())
# Print Celery configuration details
print("Backend:", app.conf.result_backend)
print("Broker:", app.conf.broker_url)
# Define the number of concurrent tasks
NUM_TASKS = 4
delay_seconds = 0
file_paths = [f"../video/1.mp4" for _ in range(NUM_TASKS)]
# video_folder = "../video"
# video_files = [f for f in os.listdir(video_folder) if f.endswith(('.mp4', '.avi', '.mov', '.mkv'))]
# video_files = ['1.2.mp4', '1.mp4', '3.mp4', '4.mp4', '5.mp4']
# print(video_files)
# file_paths = [os.path.join(video_folder, random.choice(video_files)) for _ in range(NUM_TASKS)]
# print(file_paths)
# profile_folder = "../profile"
# image_files = [f for f in os.listdir(profile_folder) if f.endswith(('.png', '.jpg', '.jpeg'))]
# file_paths = [os.path.join(profile_folder, random.choice(image_files)) for _ in range(NUM_TASKS)]
# questions = [f"What is AI? {i}" for i in range(NUM_TASKS)]
# questions = [
# f"Extract the following information from this image and return the result in JSON format:\n"
# f"- Name: <name>\n"
# f"- ID: <id>\n"
# f"- Profile Picture: <url>\n"
# f"- Follower Count: <count>\n"
# f"- Likes Count: <count>\n"
# f"- Bio: <bio>\n"
# f"- Following Count: <count>\n"
# f"- External Links: <links>\n"
# f"Do not include any disclaimers or comments like 'I'm sorry' or 'I can't assist.' Task {i}"
# for i in range(NUM_TASKS)
# ]
questions = [
f"Based on the given images and audio script, extract detailed information about the products recommended in the video and format the output as JSON with the following fields:\n"
f"- Product Name: <name>\n"
f"- Category: <category>\n"
f"- Styles or Variants: <styles/variants>\n"
f"- Highlights: <highlights>\n"
f"- Promotional Details: <promotional_details>\n"
f"Do not include any disclaimers or comments like 'I'm sorry' or 'I can't assist.' Task {i}"
for i in range(NUM_TASKS)
]
# questions = [
# "Generate a screenplay based on the uploaded video, incorporating detailed elements such as dialogue, scene descriptions, and character actions. The screenplay should follow a structured format with the following components:\n"
# "1. **Scene Descriptions**: Provide a detailed visual setting for each scene, describing the background, lighting, and overall atmosphere.\n"
# "2. **Character Introductions**: Identify key characters, their appearance, and any notable traits or expressions.\n"
# "3. **Dialogue**: Transcribe or adapt spoken lines from the video into screenplay format, ensuring natural flow and emphasis on key moments.\n"
# "4. **Actions & Expressions**: Capture non-verbal cues, body language, and interactions between characters or with objects in the scene.\n"
# "5. **Product Integrations**: If the video features product recommendations, weave them naturally into the script, highlighting their name, category, features, and promotional details as part of the dialogue or narration.\n"
# "6. **Narrative Flow**: Ensure the screenplay has a coherent progression, with clear transitions between scenes, maintaining engagement and pacing similar to the videos tone and style.\n\n"
# "Format the output as a properly structured screenplay:\n"
# "- Scene headers (INT./EXT. - LOCATION - TIME OF DAY)\n"
# "- Character names in uppercase\n"
# "- Dialogue centered\n"
# "- Actions and descriptions formatted accordingly"
# ]
# def submit_task(question, index): # sends tasks to Celery asynchronously, queues the tasks in Celery broker. If multiple Celery workers, they process tasks in parallel.
# """ Submits a Celery task with increasing delay """
# countdown_time = index * delay_seconds # Dynamic delay
# task = text_query_task.apply_async(args=[question], countdown=countdown_time)
# print("Running celery_debug...")
# # task = text_query_task.delay(question)
# print(f"Task {index} scheduled with {countdown_time}s delay.")
# return task.id
# def submit_task(file_path, question, index):
# """ Submits a Celery task with increasing delay """
# countdown_time = index * delay_seconds # Dynamic delay
# task = image_query_task.apply_async(args=[file_path, question], countdown=countdown_time)
# print(f"Task {index} scheduled with {countdown_time}s delay.")
# return task.id
# def submit_task(file_path, question, index):
# """ Submits a video query task with increasing delay """
# countdown_time = index * delay_seconds # Dynamic delay
# task = video_query_task.apply_async(args=[file_path, question], countdown=countdown_time)
# print(f"Task {index} scheduled with {countdown_time}s delay for file {file_path}.")
# return task.id
# def submit_task(file_path, question, index):
# # countdown_time = index * delay_seconds
# countdown_time = delay_seconds
# # Chain preprocessing and inference tasks, inference only after preprocess done
# task_chain = chain(
# preprocess_video.s(file_path, question),
# inference_video.s()
# ).apply_async(countdown=countdown_time)
# print(f"Task {index} scheduled with {countdown_time}s delay for file {file_path}.")
# return task_chain.id
from celery import chord, group
from tasks import preprocess_video, inference_video
def submit_task(file_paths, questions, batch_size=4):
task_ids = []
for i in range(0, len(file_paths), batch_size):
# Get the current batch of file paths and questions
batch_file_paths = file_paths[i:i + batch_size]
batch_questions = questions[i:i + batch_size]
# Create preprocessing tasks for the current batch
preprocessing_tasks = [
preprocess_video.s(file_path, question)
for file_path, question in zip(batch_file_paths, batch_questions)
]
# Submit the batch as a chord
chord_task = chord(preprocessing_tasks)(inference_video.s())
task_ids.append(chord_task.id)
print(f"Batch {i // batch_size + 1} submitted with task ID: {chord_task.id}")
return task_ids
# def submit_task(file_path, question, index):
# preprocess_task = preprocess_video.apply_async(
# args=[file_path, question],
# queue="preprocess_queue"
# )
# print(f"Task {index} preprocessing scheduled for file {file_path}.")
# # Add a callback to submit inference task after preprocessing is done
# preprocess_task.then(
# inference_video.s().set(queue="inference_queue")
# )
# print(f"Task {index} inference will be scheduled after preprocessing.")
# return preprocess_task.id
start_time = time.time()
print(f"\nSubmitting {NUM_TASKS} tasks concurrently...")
task_ids = []
# from tasks import add
# result = add.delay(questions)
# print(result)
# print(f"Task ID: {result.id}")
# try:
# task_result = result.get(timeout=5)
# print(f"Task Result: {task_result}")
# except Exception as e:
# print(f"Task not ready or failed: {e}")
# task_ids.append(result.id)
# with ThreadPoolExecutor(max_workers=10) as executor:
# # resultID = executor.map(submit_task, questions, range(NUM_TASKS))
# resultID = executor.map(submit_task, file_paths, questions, range(NUM_TASKS))
# task_ids.extend(resultID)
task_ids = submit_task(file_paths, questions)
print("\nAll tasks submitted!")
print(task_ids)
# Monitor Task Statuses
print("\nChecking Task Results...")
for task_id in task_ids:
async_result = AsyncResult(task_id, app=app)
while async_result.status not in ["SUCCESS", "FAILURE"]:
print(f"Task {task_id} status: {async_result.status}")
time.sleep(1)
# Fetch final result
print(f"Task {task_id} final status: {async_result.status}")
if async_result.status == "SUCCESS":
print(f"Result: {async_result.get()}")
print("\nAll tasks completed.")
end_time = time.time()
print(f"Total time taken: {end_time - start_time:.2f} seconds.")

89
celery_worker.py Normal file
View File

@ -0,0 +1,89 @@
import torch.multiprocessing as mp
mp.set_start_method("fork", force=True)
# from celery import Celery
# celery_app = Celery(
# "tasks",
# broker="redis://localhost:6379/0", # Redis as broker
# backend="redis://localhost:6379/0", # Redis for storing results
# )
# celery_app.conf.task_routes = {
# "tasks.*": {"queue": "default"},
# }
from celery import Celery
# app = Celery(
# "tasks",
# broker="redis://localhost:6379/0",
# backend="redis://localhost:6379/0",
# include=["tasks"]
# )
# app = Celery(
# "tasks",
# broker="pyamqp://guest@localhost//",
# backend="rpc://",
# include=["tasks"]
# )
app = Celery(
"celery_worker",
broker="pyamqp://guest@localhost//",
backend="rpc://",
)
# celery = Celery(
# "tasks",
# broker="pyamqp://guest@localhost//",
# backend="rpc://"
# )
app.conf.task_routes = {
"tasks.*": {"queue": "default"},
}
app.conf.worker_prefetch_multiplier = 1
app.conf.task_acks_late = True
# from celery import Celery
# from kombu import Queue
# celery = Celery(
# "tasks",
# broker="redis://localhost:6379/0",
# backend="redis://localhost:6379/0",
# include=["tasks"]
# )
# # Define task queues properly
# celery.conf.task_queues = (
# Queue("high_priority"),
# Queue("default"),
# Queue("low_priority"),
# )
# # Define task routing
# celery.conf.task_routes = {
# "tasks.text_query_task": {"queue": "high_priority"},
# "tasks.image_query_task": {"queue": "default"},
# "tasks.video_query_task": {"queue": "low_priority"},
# }
# # Define task rate limits
# celery.conf.task_annotations = {
# "tasks.text_query_task": {"rate_limit": "10/m"},
# "tasks.image_query_task": {"rate_limit": "5/m"},
# "tasks.video_query_task": {"rate_limit": "3/m"},
# }
# # Define task retries
# celery.conf.task_acks_late = True # Ensure task is only removed from queue when fully processed
# celery.conf.worker_prefetch_multiplier = 1 # Avoid one worker taking too many tasks at once
# # Define task time limits
# celery.conf.task_time_limit = 60 # 60 seconds max execution time
# celery.conf.task_soft_time_limit = 50 # Warn at 50 seconds

10
connect_rabbitmq.py Normal file
View File

@ -0,0 +1,10 @@
from kombu import Connection
rabbitmq_url = "amqp://guest:guest@localhost//"
with Connection(rabbitmq_url) as conn:
try:
conn.connect()
print("RabbitMQ connection successful!")
except Exception as e:
print(f"Failed to connect: {e}")

View File

@ -8,43 +8,64 @@ from PIL import Image
from pipeline_setup import pipe, IMAGE_TOKEN from pipeline_setup import pipe, IMAGE_TOKEN
from utils.image_processing import encode_image_base64 from utils.image_processing import encode_image_base64
# async def image_query(file: UploadFile, question: str = Form(...)): async def image_query(file: UploadFile, question: str = Form(...)):
# """
# API endpoint to process an image with the user's query.
# """
# try:
# if file.content_type not in ["image/jpeg", "image/png"]:
# return JSONResponse({"query": question, "error": "Unsupported file type."})
# image_data = await file.read()
# image = Image.open(io.BytesIO(image_data)).convert("RGB").resize((512, 512))
# encoded_image_base64 = encode_image_base64(image)
# question_with_image_token = f"{question}\n{IMAGE_TOKEN}"
# response = await asyncio.to_thread(pipe, (question, image))
# return JSONResponse({"query": question, "response": response.text})
# except Exception as e:
# return JSONResponse({"query": question, "error": str(e)})
async def image_query(image: np.ndarray, question: str):
""" """
API endpoint to process an image (as numpy array) with the user's query. API endpoint to process an image with the user's query.
""" """
try: try:
# Convert the numpy array to a PIL Image if file.content_type not in ["image/jpeg", "image/png"]:
image = Image.fromarray(image).convert("RGB").resize((512, 512)) return JSONResponse({"query": question, "error": "Unsupported file type."})
image_data = await file.read()
image = Image.open(io.BytesIO(image_data)).convert("RGB").resize((512, 512))
encoded_image_base64 = encode_image_base64(image)
# Encode the image to base64 (optional, if needed by your pipeline)
buffered = io.BytesIO()
image.save(buffered, format="JPEG")
encoded_image_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
# Prepare the query with the image token
question_with_image_token = f"{question}\n{IMAGE_TOKEN}" question_with_image_token = f"{question}\n{IMAGE_TOKEN}"
response = await asyncio.to_thread(pipe, (question, image))
# Query the model return JSONResponse({"query": question, "response": response.text})
response = await asyncio.to_thread(pipe, (question, image))
return {"query": question, "response": response.text}
except Exception as e: except Exception as e:
return {"query": question, "error": str(e)} return JSONResponse({"query": question, "error": str(e)})
# async def image_query(image: np.ndarray, question: str):
# """
# API endpoint to process an image (as numpy array) with the user's query.
# """
# try:
# # Convert the numpy array to a PIL Image
# image = Image.fromarray(image).convert("RGB").resize((512, 512))
# # Encode the image to base64 (optional, if needed by your pipeline)
# buffered = io.BytesIO()
# image.save(buffered, format="JPEG")
# encoded_image_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
# # Prepare the query with the image token
# question_with_image_token = f"{question}\n{IMAGE_TOKEN}"
# # Query the model
# response = await asyncio.to_thread(pipe, (question, image))
# return {"query": question, "response": response.text}
# except Exception as e:
# return {"query": question, "error": str(e)}
# def image_query(image_path: str, question: str):
# try:
# print("image_path in image_query...")
# with open(image_path, "rb") as file:
# image_data = file.read()
# image = Image.open(io.BytesIO(image_data)).convert("RGB").resize((512, 512))
# encoded_image_base64 = encode_image_base64(image)
# # Prepare the question with the image token
# question_with_image_token = f"{question}\n{IMAGE_TOKEN}"
# # Run model inference (blocking call, but can be async)
# response = pipe((question_with_image_token, image))
# return {"query": question, "response": response.text}
# except Exception as e:
# return {"query": question, "error": str(e)}

View File

@ -4,23 +4,39 @@ from fastapi.responses import JSONResponse
from asyncio import to_thread from asyncio import to_thread
from pipeline_setup import pipe from pipeline_setup import pipe
# async def text_query(question: str = Form(...)): # api
# """
# API endpoint to process text input with the user's query.
# """
# try:
# response = await to_thread(pipe, question)
# return JSONResponse({"query": question, "response": response.text})
# except Exception as e:
# return JSONResponse({"query": question, "error": str(e)})
async def text_query(question: str = Form(...)): async def text_query(question: str = Form(...)):
""" """
API endpoint to process text input with the user's query. API endpoint to process text input with the user's query.
""" """
try: try:
response = await to_thread(pipe, question) print("starting text querying...")
return {"query": question, "response": response.text} response = await to_thread(pipe, question)
return JSONResponse({"query": question, "response": response.text})
except Exception as e: except Exception as e:
return {"query": question, "error": str(e)} return JSONResponse({"query": question, "error": str(e)})
# gradio
# async def text_query(question: str = Form(...)):
# """
# API endpoint to process text input with the user's query.
# """
# try:
# print("Processing in text.py...")
# response = await to_thread(pipe, question)
# return {"query": question, "response": response.text}
# except Exception as e:
# return {"query": question, "error": str(e)}
# celery
# def text_query(question: str = Form(...)):
# """
# API endpoint to process text input with the user's query.
# """
# print("Testing....")
# try:
# print("Processing in text.py...")
# response = pipe(question) # Call pipe synchronously
# return {"query": question, "response": response.text}
# except Exception as e:
# return {"query": question, "error": str(e)}

View File

@ -4,39 +4,155 @@ from pipeline_setup import pipe
from utils.image_processing import encode_image_base64 from utils.image_processing import encode_image_base64
from utils.video_processing import split_video_into_segments, extract_motion_key_frames, extract_audio_from_video from utils.video_processing import split_video_into_segments, extract_motion_key_frames, extract_audio_from_video
from utils.audio_transcription import transcribe_audio from utils.audio_transcription import transcribe_audio
import os
import torch
import json
import time import time
import asyncio import asyncio
import mimetypes import mimetypes
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
def save_checkpoint(video_id, checkpoint_data):
checkpoint_path = f"/tmp/{video_id}_progress.json"
with open(checkpoint_path, "w") as f:
json.dump(checkpoint_data, f)
def load_checkpoint(video_id):
checkpoint_path = f"/tmp/{video_id}_progress.json"
if os.path.exists(checkpoint_path):
with open(checkpoint_path, "r") as f:
return json.load(f)
return None
# async def video_query(file: UploadFile, question: str = Form(...)): # async def video_query(file: UploadFile, question: str = Form(...)):
# try:
# print("Processing video...")
# if file.content_type not in ["video/mp4", "video/avi", "video/mkv"]:
# return JSONResponse({"query": question, "error": "Unsupported video file type."})
# overall_start_time = time.time()
# video_data = await file.read()
# video_id = str(hash(video_data)) # Unique ID for checkpointing
# temp_video_path = f"/tmp/{video_id}.mp4"
# with open(temp_video_path, "wb") as temp_video_file:
# temp_video_file.write(video_data)
# video_reading_time = time.time()
# segments = split_video_into_segments(temp_video_path, segment_duration=30)
# checkpoint = load_checkpoint(video_id) or {}
# aggregated_responses = checkpoint.get("responses", [])
# segment_timings = checkpoint.get("timings", [])
# completed_segments = set(checkpoint.get("completed_segments", []))
# preprocessed_segments = set(checkpoint.get("preprocessed_segments", []))
# inference_completed_segments = set(checkpoint.get("inference_completed_segments", []))
# for i, segment_path in enumerate(segments):
# if i in completed_segments:
# print(f"Skipping already processed segment {i+1}")
# continue
# segment_start_time = time.time()
# if i not in preprocessed_segments:
# frame_start_time = time.time()
# imgs = extract_motion_key_frames(segment_path, max_frames=50, sigma_multiplier=2)
# frame_time = time.time()
# audio_start_time = time.time()
# audio_path = extract_audio_from_video(segment_path)
# transcribed_text = transcribe_audio(audio_path)
# audio_time = time.time()
# preprocessed_segments.add(i)
# save_checkpoint(video_id, {
# "responses": aggregated_responses,
# "timings": segment_timings,
# "completed_segments": list(completed_segments),
# "preprocessed_segments": list(preprocessed_segments),
# "inference_completed_segments": list(inference_completed_segments)
# })
# if i not in inference_completed_segments:
# combined_query = f"Audio Transcript: {transcribed_text}\n{question}"
# question_with_frames = "".join([f"Frame{j+1}: {{IMAGE_TOKEN}}\n" for j, _ in enumerate(imgs)])
# question_with_frames += combined_query
# content = [{"type": "text", "text": question_with_frames}] + [
# {"type": "image_url", "image_url": {"max_dynamic_patch": 1, "url": f"data:image/jpeg;base64,{encode_image_base64(img)}"}}
# for img in imgs
# ]
# inference_start_time = time.time()
# messages = [dict(role="user", content=content)]
# response = await asyncio.to_thread(pipe, messages)
# inference_time = time.time()
# aggregated_responses.append(response.text)
# inference_completed_segments.add(i)
# save_checkpoint(video_id, {
# "responses": aggregated_responses,
# "timings": segment_timings,
# "completed_segments": list(completed_segments),
# "preprocessed_segments": list(preprocessed_segments),
# "inference_completed_segments": list(inference_completed_segments)
# })
# segment_timings.append({
# "segment_index": i + 1,
# "segment_processing_time": inference_time - segment_start_time,
# "frame_extraction_time": frame_time - frame_start_time,
# "audio_extraction_time": audio_time - audio_start_time,
# "model_inference_time": inference_time - inference_start_time
# })
# completed_segments.add(i)
# save_checkpoint(video_id, {
# "responses": aggregated_responses,
# "timings": segment_timings,
# "completed_segments": list(completed_segments),
# "preprocessed_segments": list(preprocessed_segments),
# "inference_completed_segments": list(inference_completed_segments)
# })
# return JSONResponse({
# "question": question,
# "responses": aggregated_responses,
# "timings": {
# "video_reading_time": video_reading_time - overall_start_time,
# "total_segments": len(segments),
# "total_processing_time": time.time() - overall_start_time,
# "segment_details": segment_timings
# }
# })
# except Exception as e:
# return JSONResponse({"query": question, "error": str(e)})
# async def video_query(video_path: str, question: str):
# """ # """
# API endpoint to process a video file with the user's query. # API endpoint to process a video file with the user's query.
# """ # """
# try: # try:
# print("Processing video...") # print("Processing video...")
# # Validate file type # if not video_path or not isinstance(video_path, str):
# if file.content_type not in ["video/mp4", "video/avi", "video/mkv"]: # return {"query": question, "error": "No video file provided or invalid file input."}
# return JSONResponse({"query": question, "error": "Unsupported video file type."})
# # Start overall timer # # Determine the file type using the file extension
# overall_start_time = time.time() # file_type, _ = mimetypes.guess_type(video_path)
# if file_type is None or not file_type.startswith("video/"):
# return {"query": question, "error": "Unsupported video file type."}
# # Save the uploaded video to a temporary file # # Log the video path
# print("Reading video...") # print(f"Video path: {video_path}")
# video_data = await file.read()
# temp_video_path = "/tmp/temp_video.mp4"
# with open(temp_video_path, "wb") as temp_video_file:
# temp_video_file.write(video_data)
# print(f"Temp video saved to: {temp_video_path}")
# # Record the time after reading the video
# video_reading_time = time.time()
# # Split the video into segments # # Split the video into segments
# print("Splitting video...") # print("Splitting video...")
# segments = split_video_into_segments(temp_video_path, segment_duration=30) # segments = split_video_into_segments(video_path, segment_duration=30)
# print(f"Video split into {len(segments)} segments.") # print(f"Video split into {len(segments)} segments.")
# aggregated_responses = [] # aggregated_responses = []
@ -45,19 +161,12 @@ from concurrent.futures import ThreadPoolExecutor
# for i, segment_path in enumerate(segments): # for i, segment_path in enumerate(segments):
# print(f"Processing segment {i+1}/{len(segments)}: {segment_path}") # print(f"Processing segment {i+1}/{len(segments)}: {segment_path}")
# # Start timing for the segment
# segment_start_time = time.time()
# # Extract key frames # # Extract key frames
# frame_start_time = time.time()
# imgs = extract_motion_key_frames(segment_path, max_frames=50, sigma_multiplier=2) # imgs = extract_motion_key_frames(segment_path, max_frames=50, sigma_multiplier=2)
# frame_time = time.time()
# # Extract audio and transcribe # # Extract audio and transcribe
# audio_start_time = time.time()
# audio_path = extract_audio_from_video(segment_path) # audio_path = extract_audio_from_video(segment_path)
# transcribed_text = transcribe_audio(audio_path) # transcribed_text = transcribe_audio(audio_path)
# audio_time = time.time()
# # Combine transcribed text with the query # # Combine transcribed text with the query
# combined_query = f"Audio Transcript: {transcribed_text}\n{question}" # combined_query = f"Audio Transcript: {transcribed_text}\n{question}"
@ -79,110 +188,130 @@ from concurrent.futures import ThreadPoolExecutor
# }) # })
# # Query the model # # Query the model
# inference_start_time = time.time()
# messages = [dict(role="user", content=content)] # messages = [dict(role="user", content=content)]
# response = await asyncio.to_thread(pipe, messages) # response = await asyncio.to_thread(pipe, messages)
# inference_time = time.time()
# # Aggregate response # # Aggregate response
# aggregated_responses.append(response.text) # aggregated_responses.append(response.text)
# # Calculate timing for the segment # return {
# segment_timings.append({
# "segment_index": i + 1,
# "segment_processing_time": inference_time - segment_start_time,
# "frame_extraction_time": frame_time - frame_start_time,
# "audio_extraction_time": audio_time - audio_start_time,
# "model_inference_time": inference_time - inference_start_time
# })
# print(f"transcription: {transcribed_text}")
# # print(f"content: {content}")
# overall_end_time = time.time()
# # Aggregate total timings
# total_timings = {
# "video_reading_time": video_reading_time - overall_start_time,
# "total_segments": len(segments),
# "total_processing_time": overall_end_time - overall_start_time,
# "segment_details": segment_timings
# }
# return JSONResponse({
# "question": question, # "question": question,
# "responses": aggregated_responses, # "responses": aggregated_responses,
# "timings": total_timings, # }
# })
# except Exception as e: # except Exception as e:
# return JSONResponse({"query": question, "error": str(e)}) # return {"query": question, "error": str(e)}
async def video_query(video_path: str, question: str): # def video_query(video_path: str, question: str):
""" # """
API endpoint to process a video file with the user's query. # Processes a video file using the model.
""" # Reads the video from disk, extracts key frames, transcribes audio, and queries the model.
try: # """
print("Processing video...") # try:
# print("Processing video...")
if not video_path or not isinstance(video_path, str): # if not os.path.exists(video_path):
return {"query": question, "error": "No video file provided or invalid file input."} # return {"query": question, "error": "Video file not found."}
# Determine the file type using the file extension # # Determine the file type
file_type, _ = mimetypes.guess_type(video_path) # file_type, _ = mimetypes.guess_type(video_path)
if file_type is None or not file_type.startswith("video/"): # if file_type is None or not file_type.startswith("video/"):
return {"query": question, "error": "Unsupported video file type."} # return {"query": question, "error": "Unsupported video file type."}
# Log the video path # # Split video into segments
print(f"Video path: {video_path}") # print("Splitting video...")
# segments = split_video_into_segments(video_path, segment_duration=30)
# print(f"Video split into {len(segments)} segments.")
# Split the video into segments # aggregated_responses = []
print("Splitting video...") # segment_timings = []
segments = split_video_into_segments(video_path, segment_duration=30)
print(f"Video split into {len(segments)} segments.")
aggregated_responses = [] # for i, segment_path in enumerate(segments):
segment_timings = [] # print(f"Processing segment {i+1}/{len(segments)}: {segment_path}")
for i, segment_path in enumerate(segments): # # Extract key frames
print(f"Processing segment {i+1}/{len(segments)}: {segment_path}") # imgs = extract_motion_key_frames(segment_path, max_frames=50, sigma_multiplier=2)
# Extract key frames # # Extract audio and transcribe
imgs = extract_motion_key_frames(segment_path, max_frames=50, sigma_multiplier=2) # audio_path = extract_audio_from_video(segment_path)
# transcribed_text = transcribe_audio(audio_path)
# Extract audio and transcribe # # Combine transcribed text with the query
audio_path = extract_audio_from_video(segment_path) # combined_query = f"Audio Transcript: {transcribed_text}\n{question}"
transcribed_text = transcribe_audio(audio_path)
# Combine transcribed text with the query # # Prepare content for the pipeline
combined_query = f"Audio Transcript: {transcribed_text}\n{question}" # question_with_frames = "".join([f"Frame{j+1}: {{IMAGE_TOKEN}}\n" for j in range(len(imgs))])
# question_with_frames += combined_query
# Prepare content for the pipeline # content = [{"type": "text", "text": question_with_frames}] + [
question_with_frames = "" # {"type": "image_url", "image_url": {"max_dynamic_patch": 1, "url": f"data:image/jpeg;base64,{encode_image_base64(img)}"}}
for j, img in enumerate(imgs): # for img in imgs
question_with_frames += f"Frame{j+1}: {{IMAGE_TOKEN}}\n" # ]
question_with_frames += combined_query
content = [{"type": "text", "text": question_with_frames}] # # Query the model
for img in imgs: # messages = [dict(role="user", content=content)]
content.append({ # response = pipe(messages)
"type": "image_url",
"image_url": {
"max_dynamic_patch": 1,
"url": f"data:image/jpeg;base64,{encode_image_base64(img)}"
}
})
# Query the model # # Aggregate response
messages = [dict(role="user", content=content)] # aggregated_responses.append(response.text)
response = await asyncio.to_thread(pipe, messages)
# Aggregate response # return {
aggregated_responses.append(response.text) # "question": question,
# "responses": aggregated_responses,
# }
# except Exception as e:
# return {"query": question, "error": str(e)}
return {
"question": question, # def run_video_inference(preprocessed_data):
"responses": aggregated_responses, # """
} # **Inference Step (Runs on GPU)**
except Exception as e: # - Takes preprocessed data (key frames + transcribed audio).
return {"query": question, "error": str(e)} # - Constructs a query for the model.
# - Runs inference on the GPU.
# - Returns the aggregated responses.
# """
# import torch
# torch.cuda.empty_cache() # Free up GPU memory before inference
# try:
# print("Starting video inference...")
# question = preprocessed_data["question"]
# segments = preprocessed_data["segments"]
# aggregated_responses = []
# for segment in segments:
# segment_index = segment["segment_index"]
# transcribed_text = segment["transcription"]
# encoded_imgs = segment["encoded_images"]
# print(f"Running inference on segment {segment_index + 1}...")
# # Prepare query content
# question_with_frames = "".join(
# [f"Frame{j+1}: {{IMAGE_TOKEN}}\n" for j in range(len(encoded_imgs))]
# )
# combined_query = f"Audio Transcript: {transcribed_text}\n{question}"
# question_with_frames += combined_query
# content = [{"type": "text", "text": question_with_frames}] + [
# {"type": "image_url", "image_url": {"max_dynamic_patch": 1, "url": f"data:image/jpeg;base64,{img}"}}
# for img in encoded_imgs
# ]
# # Query the model (GPU-heavy operation)
# messages = [dict(role="user", content=content)]
# response = pipe(messages)
# # Collect responses
# aggregated_responses.append(response.text)
# return {
# "question": question,
# "responses": aggregated_responses,
# }
# except Exception as e:
# return {"query": question, "error": str(e)}

160
main.py
View File

@ -1,83 +1,105 @@
# from fastapi import FastAPI, Form, UploadFile
# from fastapi.responses import JSONResponse
# import asyncio
# app = FastAPI()
# @app.post("/api/text")
# async def text_query_endpoint(question: str = Form(...)):
# """
# API endpoint to process text input with the user's query.
# """
# from endpoints.text import text_query
# return await text_query(question=question)
# @app.post("/api/image")
# async def image_query_endpoint(file: UploadFile, question: str = Form(...)):
# """
# API endpoint to process an image with the user's query.
# """
# from endpoints.image import image_query
# return await image_query(file=file, question=question)
# @app.post("/api/video")
# async def video_query_endpoint(file: UploadFile, question: str = Form(...)):
# """
# API endpoint to process a video file with the user's query.
# """
# from endpoints.video import video_query
# return await video_query(file=file, question=question)
# if __name__ == "__main__":
# import uvicorn
# uvicorn.run("main:app", host="0.0.0.0", port=8002, reload=True, loop="uvloop")
from fastapi import FastAPI, Form, UploadFile from fastapi import FastAPI, Form, UploadFile
from fastapi.responses import JSONResponse from fastapi.responses import JSONResponse
import shutil import asyncio
import uuid
from tasks import text_query_task, image_query_task, video_query_task
app = FastAPI() app = FastAPI()
# @app.post("/api/text")
# async def text_query_endpoint(question: str = Form(...)):
# task = text_query_task.apply_async(args=[question])
# return JSONResponse({"task_id": task.id})
@app.post("/api/text") @app.post("/api/text")
async def text_query_endpoint(question: str = Form(...)): async def text_query_endpoint(question: str = Form(...)):
print(f"Received request: {question}") """
task = text_query_task.apply_async(args=[question]) API endpoint to process text input with the user's query.
print(f"Task sent: {task.id}") """
return JSONResponse({"task_id": task.id}) from endpoints.text import text_query
return await text_query(question=question)
@app.post("/api/image") @app.post("/api/image")
async def image_query_endpoint(file: UploadFile, question: str = Form(...)): async def image_query_endpoint(file: UploadFile, question: str = Form(...)):
file_path = f"/tmp/{uuid.uuid4()}_{file.filename}" """
with open(file_path, "wb") as buffer: API endpoint to process an image with the user's query.
shutil.copyfileobj(file.file, buffer) """
from endpoints.image import image_query
task = image_query_task.apply_async(args=[file_path, question]) return await image_query(file=file, question=question)
return JSONResponse({"task_id": task.id})
@app.post("/api/video") @app.post("/api/video")
async def video_query_endpoint(file: UploadFile, question: str = Form(...)): async def video_query_endpoint(file: UploadFile, question: str = Form(...)):
file_path = f"/tmp/{uuid.uuid4()}_{file.filename}" """
with open(file_path, "wb") as buffer: API endpoint to process a video file with the user's query.
shutil.copyfileobj(file.file, buffer) """
from endpoints.video import video_query
task = video_query_task.apply_async(args=[file_path, question]) return await video_query(file=file, question=question)
return JSONResponse({"task_id": task.id})
@app.get("/api/task/{task_id}")
async def get_task_result(task_id: str): if __name__ == "__main__":
from celery.result import AsyncResult import uvicorn
result = AsyncResult(task_id) uvicorn.run("main:app", host="0.0.0.0", port=8002, reload=True, loop="uvloop")
if result.ready():
return JSONResponse({"status": "completed", "result": result.result})
return JSONResponse({"status": "pending"})
# from fastapi import FastAPI, Form, UploadFile
# from fastapi.responses import JSONResponse
# import shutil
# import uuid
# from celery import chain
# from tasks import text_query_task, image_query_task, video_query_task
# from tasks import text_query_task, image_query_task, preprocess_video, inference_video
# app = FastAPI()
# @app.get("/")
# def read_root():
# return {"message": "FastAPI with Celery & RabbitMQ"}
# @app.post("/api/text")
# async def text_query_endpoint(question: str = Form(...)):
# print(f"Received request: {question}")
# task = text_query_task.apply_async(args=[question])
# print(f"Task sent: {task.id}")
# return JSONResponse({"task_id": task.id})
# @app.post("/api/image")
# async def image_query_endpoint(file: UploadFile, question: str = Form(...)):
# file_path = f"/tmp/{uuid.uuid4()}_{file.filename}"
# with open(file_path, "wb") as buffer:
# shutil.copyfileobj(file.file, buffer)
# task = image_query_task.apply_async(args=[file_path, question])
# return JSONResponse({"task_id": task.id})
# # @app.post("/api/video")
# # async def video_query_endpoint(file: UploadFile, question: str = Form(...)):
# # file_path = f"/tmp/{uuid.uuid4()}_{file.filename}"
# # with open(file_path, "wb") as buffer:
# # shutil.copyfileobj(file.file, buffer)
# # task = video_query_task.apply_async(args=[file_path, question])
# # return JSONResponse({"task_id": task.id})
# @app.post("/api/video")
# async def video_query_endpoint(file: UploadFile, question: str = Form(...)):
# # Save the uploaded file to a temporary location
# file_path = f"/tmp/{uuid.uuid4()}_{file.filename}"
# with open(file_path, "wb") as buffer:
# shutil.copyfileobj(file.file, buffer)
# # Chain the preprocessing and inference tasks
# task_chain = chain(
# preprocess_video.s(file_path, question), # Preprocessing task
# inference_video.s() # Inference task
# ).apply_async()
# return JSONResponse({"task_id": task_chain.id})
# @app.get("/api/task/{task_id}")
# async def get_task_result(task_id: str):
# from celery.result import AsyncResult
# result = AsyncResult(task_id)
# if result.ready():
# return JSONResponse({"status": "completed", "result": result.result})
# return JSONResponse({"status": "pending"})
# if __name__ == "__main__":
# import uvicorn
# uvicorn.run("main:app", host="0.0.0.0", port=8002, reload=True, loop="uvloop")

33
monitor.py Normal file
View File

@ -0,0 +1,33 @@
import psutil
import GPUtil
import time
import logging
# Specify the full path for the log file
log_file_path = "resource_usage.log"
# Configure logging
logging.basicConfig(filename=log_file_path, level=logging.INFO)
logging.info("Logging started") # Add this line to confirm logging is working
try:
while True:
# Monitor CPU
cpu_usage = psutil.cpu_percent(interval=1)
memory_usage = psutil.virtual_memory().percent
cpu_message = f"CPU Usage: {cpu_usage}% | Memory Usage: {memory_usage}%"
print(cpu_message) # Print to console
logging.info(cpu_message) # Log to file
# Monitor GPU
GPUs = GPUtil.getGPUs()
for gpu in GPUs:
gpu_message = f"GPU {gpu.id} | Usage: {gpu.load * 100}% | Memory: {gpu.memoryUsed}MB / {gpu.memoryTotal}MB"
print(gpu_message) # Print to console
logging.info(gpu_message) # Log to file
time.sleep(1)
except Exception as e:
error_message = f"An error occurred: {e}"
print(error_message) # Print to console
logging.error(error_message) # Log to file

38
nohup.out Normal file
View File

@ -0,0 +1,38 @@
INFO: Will watch for changes in these directories: ['/home/ooin/st/tiktok_AI']
INFO: Uvicorn running on http://0.0.0.0:8002 (Press CTRL+C to quit)
INFO: Started reloader process [2653752] using StatReload
INFO: Started server process [2653764]
INFO: Waiting for application startup.
INFO: Application startup complete.
INFO: Will watch for changes in these directories: ['/home/ooin/st/tiktok_AI']
INFO: Uvicorn running on http://0.0.0.0:8002 (Press CTRL+C to quit)
INFO: Started reloader process [2662681] using StatReload
INFO: Started server process [2662696]
INFO: Waiting for application startup.
INFO: Application startup complete.
INFO: 127.0.0.1:40090 - "GET / HTTP/1.1" 404 Not Found
INFO: 127.0.0.1:53220 - "GET /docs HTTP/1.1" 200 OK
WARNING: StatReload detected changes in 'pipeline_setup.py'. Reloading...
INFO: Shutting down
INFO: Waiting for application shutdown.
INFO: Application shutdown complete.
INFO: Finished server process [2662696]
INFO: Started server process [2876691]
INFO: Waiting for application startup.
INFO: Application startup complete.
WARNING: StatReload detected changes in 'pipeline_setup.py'. Reloading...
INFO: Shutting down
INFO: Waiting for application shutdown.
INFO: Application shutdown complete.
INFO: Finished server process [2876691]
INFO: Started server process [2876710]
INFO: Waiting for application startup.
INFO: Application startup complete.
WARNING: StatReload detected changes in 'model_loader.py'. Reloading...
INFO: Shutting down
INFO: Waiting for application shutdown.
INFO: Application shutdown complete.
INFO: Finished server process [2876710]
INFO: Started server process [2877032]
INFO: Waiting for application startup.
INFO: Application startup complete.

View File

@ -7,7 +7,9 @@ os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
# os.environ["CUDA_VISIBLE_DEVICES"] = "2,3" # os.environ["CUDA_VISIBLE_DEVICES"] = "2,3"
# Model initialization # Model initialization
model = "OpenGVLab/InternVL2-26B-AWQ" # model = "OpenGVLab/InternVL2-26B-AWQ"
# model = "OpenGVLab/InternVL2_5-4B-AWQ"
model = "OpenGVLab/InternVL2_5-8B-MPO-AWQ"
pipe = pipeline( pipe = pipeline(
model, model,
backend_config=TurbomindEngineConfig( backend_config=TurbomindEngineConfig(
@ -16,7 +18,7 @@ pipe = pipeline(
# tp=4, # tp=4,
session_len=16384, # 4096, 8192, 16384, 32768 session_len=16384, # 4096, 8192, 16384, 32768
max_batch_size=1, max_batch_size=1,
cache_max_entry_count=0.2, # 0.05 cache_max_entry_count=0.15, # 0.05
cache_block_seq_len=16384, # 8192, 16384, 32768 cache_block_seq_len=16384, # 8192, 16384, 32768
# quant_policy=8, # quant_policy=8,
# precision="fp16", # precision="fp16",

View File

@ -1,117 +1,14 @@
accelerate==1.2.1 celery==5.4.0
addict==2.4.0
aiohappyeyeballs==2.4.4
aiohttp==3.11.11
aiosignal==1.3.2
annotated-types==0.7.0
anyio==4.7.0
attrs==24.3.0
bitsandbytes==0.45.0
certifi==2024.12.14
charset-normalizer==3.4.0
click==8.1.8
cloudpickle==3.1.0
datasets==3.2.0
decord==0.6.0 decord==0.6.0
dill==0.3.8 fastapi==0.115.8
diskcache==5.6.3 gradio==5.17.0
distro==1.9.0 gradio_image_prompter==0.1.0
einops==0.8.0
fastapi==0.115.6
filelock==3.16.1
fire==0.7.0
# flash-attn==2.7.2.post1
frozenlist==1.5.0
fsspec==2024.9.0
h11==0.14.0
httpcore==1.0.7
httpx==0.28.1 httpx==0.28.1
huggingface-hub==0.27.0 lmdeploy==0.7.0.post3
idna==3.10 locustio==0.999
interegular==0.3.3 numpy==2.2.3
Jinja2==3.1.5 Pillow==11.1.0
jiter==0.8.2 pydub==0.25.1
jsonschema==4.23.0 torch==2.6.0
jsonschema-specifications==2024.10.1
lark==1.2.2
llvmlite==0.43.0
lmdeploy==0.6.4
markdown-it-py==3.0.0
MarkupSafe==3.0.2
mdurl==0.1.2
mmengine-lite==0.10.5
modelscope==1.21.0
mpmath==1.3.0
multidict==6.1.0
multiprocess==0.70.16
nest-asyncio==1.6.0
networkx==3.4.2
ninja==1.11.1.3
numba==0.60.0
numpy==1.26.4
nvidia-cublas-cu12==12.1.3.1
nvidia-cuda-cupti-cu12==12.1.105
nvidia-cuda-nvrtc-cu12==12.1.105
nvidia-cuda-runtime-cu12==12.1.105
nvidia-cudnn-cu12==9.1.0.70
nvidia-cufft-cu12==11.0.2.54
nvidia-curand-cu12==10.3.2.106
nvidia-cusolver-cu12==11.4.5.107
nvidia-cusparse-cu12==12.1.0.106
nvidia-ml-py==12.560.30
nvidia-nccl-cu12==2.20.5
nvidia-nvjitlink-cu12==12.6.85
nvidia-nvtx-cu12==12.1.105
openai==1.58.1
opencv-python==4.10.0.84
outlines==0.0.46
packaging==24.2
pandas==2.2.3
peft==0.11.1
pillow==11.0.0
platformdirs==4.3.6
propcache==0.2.1
protobuf==5.29.2
psutil==6.1.1
pyairports==2.1.1
pyarrow==18.1.0
pycountry==24.6.1
pydantic==2.10.4
pydantic_core==2.27.2
Pygments==2.18.0
pynvml==12.0.0
python-dateutil==2.9.0.post0
python-multipart==0.0.20
pytz==2024.2
PyYAML==6.0.2
referencing==0.35.1
regex==2024.11.6
requests==2.32.3
rich==13.9.4
rpds-py==0.22.3
safetensors==0.4.5
sentencepiece==0.2.0
setuptools==75.6.0
shortuuid==1.0.13
six==1.17.0
sniffio==1.3.1
starlette==0.41.3
sympy==1.13.3
termcolor==2.5.0
tiktoken==0.8.0
timm==1.0.12
tokenizers==0.21.0
# torch==2.4.0
# torchaudio==2.4.0
# torchvision==0.19.0
tqdm==4.67.1
transformers==4.47.1
triton==3.0.0
typing_extensions==4.12.2
tzdata==2024.2
urllib3==2.3.0
uvicorn==0.34.0 uvicorn==0.34.0
wheel==0.45.1 whisper==1.1.10
xxhash==3.5.0
yapf==0.43.0
yarl==1.18.3

1623
resource_usage.log Normal file

File diff suppressed because it is too large Load Diff

343
tasks.py
View File

@ -1,25 +1,330 @@
import asyncio import os
from celery_app import celery_app os.environ["TOKENIZERS_PARALLELISM"] = "false"
@celery_app.task import torch.multiprocessing as mp
mp.set_start_method("spawn", force=True)
# mp.set_start_method("fork", force=True)
from celery import Celery
import psutil
from pynvml import nvmlInit, nvmlDeviceGetCount, nvmlDeviceGetHandleByIndex, nvmlDeviceGetUtilizationRates
# from endpoints.video import run_video_inference
# from endpoints.video2 import preprocess_video
import os
import sys
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
print(os.path.dirname(os.path.abspath(__file__)))
# celery_app.conf.task_routes = {
# "tasks.*": {"queue": "default"},
# }
app = Celery(
"tasks",
broker="redis://localhost:6379/0",
backend="redis://localhost:6379/0",
# include=["tasks"]
)
# app = Celery(
# "tasks",
# broker="pyamqp://guest@localhost//",
# # backend="rpc://",
# backend="redis://localhost:6379/0",
# )
app.conf.worker_prefetch_multiplier = 1
# app.conf.task_routes = {
# "tasks.*": {"queue": "default"},
# }
# app.conf.task_acks_late = True
# from celery import Celery
# from kombu import Queue
# celery = Celery(
# "tasks",
# broker="redis://localhost:6379/0",
# backend="redis://localhost:6379/0",
# include=["tasks"]
# )
# app.conf.task_routes = {
# 'tasks.preprocess_video': {'queue': 'preprocess_queue'},
# 'tasks.inference_video': {'queue': 'inference_queue'},
# }
app.conf.task_routes = {
'tasks.preprocess_video': {'queue': 'preprocess_queue'},
'tasks.inference_video': {'queue': 'inference_queue'},
}
# # Define task queues properly
# celery.conf.task_queues = (
# Queue("high_priority"),
# Queue("default"),
# Queue("low_priority"),
# )
# # Define task routing
# celery.conf.task_routes = {
# "tasks.text_query_task": {"queue": "high_priority"},
# "tasks.image_query_task": {"queue": "default"},
# "tasks.video_query_task": {"queue": "low_priority"},
# }
# # Define task rate limits
# celery.conf.task_annotations = {
# "tasks.text_query_task": {"rate_limit": "10/m"},
# "tasks.image_query_task": {"rate_limit": "5/m"},
# "tasks.video_query_task": {"rate_limit": "3/m"},
# }
# # Define task retries
# celery.conf.task_acks_late = True # Ensure task is only removed from queue when fully processed
# celery.conf.worker_prefetch_multiplier = 1 # Avoid one worker taking too many tasks at once
# # Define task time limits
# celery.conf.task_time_limit = 60 # 60 seconds max execution time
# celery.conf.task_soft_time_limit = 50 # Warn at 50 seconds
@app.task
def add(x, y):
print("Adding task...")
return x + y
@app.task(name="tasks.text_query_task")
def text_query_task(question: str): def text_query_task(question: str):
from endpoints.text import text_query # Import inside the function to avoid circular import print("Importing text_query...")
loop = asyncio.new_event_loop() from endpoints.text import text_query
asyncio.set_event_loop(loop) print(f"Processing question: {question}")
return loop.run_until_complete(text_query(question=question)) # ✅ Correct way to call async functions in Celery return text_query(question)
@celery_app.task @app.task(name="tasks.image_query_task")
def image_query_task(file_path: str, question: str): def image_query_task(file_path: str, question: str):
from endpoints.image import image_query # Import inside the function try:
with open(file_path, "rb") as file: print("Processing in image_query_task...")
loop = asyncio.new_event_loop() from endpoints.image import image_query
asyncio.set_event_loop(loop) print("file_path in image_query_task...")
return loop.run_until_complete(image_query(file=file, question=question)) # ✅ Use event loop result = image_query(file_path, question)
return result
except Exception as e:
return {"query": question, "error": str(e)}
@celery_app.task @app.task(name="tasks.video_query_task")
def video_query_task(file_path: str, question: str): def video_query_task(file_path: str, question: str):
from endpoints.video import video_query # Import inside the function """
with open(file_path, "rb") as file: Celery task to process a video query asynchronously.
loop = asyncio.new_event_loop() Reads the video file from disk and processes it.
asyncio.set_event_loop(loop) """
return loop.run_until_complete(video_query(file=file, question=question)) # ✅ Use event loop try:
from endpoints.video import video_query
result = video_query(file_path, question)
return result
except Exception as e:
return {"query": question, "error": str(e)}
# @celery.task(name="tasks.video_preprocessing_task", priority=5, queue="preprocessing")
# def video_preprocessing_task(file_path: str, question: str):
# return preprocess_video(file_path, question)
# @celery.task(name="tasks.video_query_task", priority=10, queue="inference")
# def video_query_task(preprocessed_data):
# return run_video_inference(preprocessed_data)
# @celery.task(name="tasks.test_task")
# def test_task():
# return "Celery is working!"
import mimetypes
from utils.video_processing import split_video_into_segments, extract_motion_key_frames, extract_audio_from_video
from utils.audio_transcription import transcribe_audio
from pipeline_setup import pipe
from utils.image_processing import encode_image_base64
from concurrent.futures import ThreadPoolExecutor, as_completed
def process_segment(segment_data):
segment_path, segment_idx, total_segments = segment_data
print(f"Processing segment {segment_idx+1}/{total_segments}: {segment_path}")
imgs = extract_motion_key_frames(segment_path, max_frames=20, sigma_multiplier=4)
print(f"length of key frames in segments: {len(imgs)}")
print(f"Segment {segment_idx+1}: extract_motion_key_frames finished.")
audio_path = extract_audio_from_video(segment_path)
print(f"Segment {segment_idx+1}: extract_audio_from_video finished.")
transcribed_text = transcribe_audio(audio_path)
print(f"Segment {segment_idx+1}: transcribe_audio finished.")
return {
"segment_path": segment_path,
"key_frames": [encode_image_base64(img) for img in imgs],
"transcribed_text": transcribed_text
}
@app.task(name="tasks.preprocess_video")
def preprocess_video(video_path, question):
try:
# Monitor CPU usage
# cpu_usage = psutil.cpu_percent(interval=1)
# print(f"CPU Usage during preprocessing: {cpu_usage}%")
print(f"Preprocessing video: {video_path}")
if not os.path.exists(video_path):
return {"query": question, "error": "Video file not found."}
# Determine the file type
file_type, _ = mimetypes.guess_type(video_path)
if file_type is None or not file_type.startswith("video/"):
return {"query": question, "error": "Unsupported video file type."}
print("Splitting video...")
segments = split_video_into_segments(video_path, segment_duration=100)
print(f"segments: {segments}")
print(f"Video split into {len(segments)} segments.")
# Process segments in parallel
processed_segments = []
max_workers = min(len(segments), os.cpu_count() * 2)
print(f"Processing segments with {max_workers} workers...")
with ThreadPoolExecutor(max_workers=max_workers) as executor:
future_to_segment = {
executor.submit(process_segment, (segment_path, idx, len(segments))): idx
for idx, segment_path in enumerate(segments)
}
# Collect results as they complete
segment_results = [None] * len(segments)
for future in as_completed(future_to_segment):
idx = future_to_segment[future]
try:
segment_results[idx] = future.result()
except Exception as e:
print(f"Error processing segment {idx}: {str(e)}")
segment_results[idx] = {
"segment_path": segments[idx],
"error": str(e)
}
print("multithread done!")
processed_segments = [result for result in segment_results if "error" not in result]
return {
"video_path": video_path,
"question": question,
"processed_segments": processed_segments
}
except Exception as e:
return {"query": question, "error": str(e)}
# @app.task(name="tasks.inference_video")
# def inference_video(preprocessed_data):
# try:
# # Monitor GPU usage
# # nvmlInit()
# # device_count = nvmlDeviceGetCount()
# # for i in range(device_count):
# # handle = nvmlDeviceGetHandleByIndex(i)
# # utilization = nvmlDeviceGetUtilizationRates(handle)
# # print(f"GPU {i} Usage during inference: {utilization.gpu}%")
# # print(preprocessed_data)
# video_path = preprocessed_data["video_path"]
# question = preprocessed_data["question"]
# # print(f"question: {question}")
# segments = preprocessed_data["processed_segments"]
# print(f"Running inference on: {video_path}")
# aggregated_responses = []
# for i, segment in enumerate(segments):
# print(f"Inferencing segment {i+1}/{len(segments)}")
# # Prepare input content
# question_with_frames = "".join(
# [f"Frame{j+1}: {{IMAGE_TOKEN}}\n" for j in range(len(segment["key_frames"]))]
# )
# question_with_frames += f"Audio Transcript: {segment['transcribed_text']}\n{question}"
# content = [{"type": "text", "text": question_with_frames}] + [
# {"type": "image_url", "image_url": {"max_dynamic_patch": 1, "url": f"data:image/jpeg;base64,{img}"}}
# for img in segment["key_frames"]
# ]
# # Query model
# messages = [dict(role="user", content=content)]
# response = pipe(messages)
# # Aggregate response
# aggregated_responses.append(response.text)
# return {
# "question": question,
# "responses": aggregated_responses,
# }
# except Exception as e:
# return {"query": question, "error": str(e)}
@app.task(name="tasks.inference_video")
def inference_video(preprocessed_results):
"""
Processes a batch of preprocessed videos on the GPU.
"""
try:
print("Running inference on a batch of videos...")
aggregated_results = []
for preprocessed_data in preprocessed_results:
video_path = preprocessed_data["video_path"]
question = preprocessed_data["question"]
segments = preprocessed_data["processed_segments"]
print(f"Inferencing video: {video_path}")
# Run inference on the GPU
aggregated_responses = []
for segment in segments:
# Prepare input for inference
question_with_frames = "".join(
[f"Frame{j+1}: {{IMAGE_TOKEN}}\n" for j in range(len(segment["key_frames"]))]
)
question_with_frames += f"Audio Transcript: {segment['transcribed_text']}\n{question}"
content = [{"type": "text", "text": question_with_frames}] + [
{"type": "image_url", "image_url": {"max_dynamic_patch": 1, "url": f"data:image/jpeg;base64,{img}"}}
for img in segment["key_frames"]
]
# Query model
messages = [dict(role="user", content=content)]
response = pipe(messages)
# Aggregate response
aggregated_responses.append(response.text)
aggregated_results.append({
"video_path": video_path,
"question": question,
"responses": aggregated_responses
})
return aggregated_results
except Exception as e:
return {"error": str(e)}

92
test_audio.py Normal file
View File

@ -0,0 +1,92 @@
import torch
import os
from whisper import load_model
from pydub import AudioSegment
def extract_audio_from_video(video_path: str) -> str:
audio = AudioSegment.from_file(video_path)
audio_path = "/tmp/temp_audio_test.wav"
audio.export(audio_path, format="wav")
print("video extracted!")
return audio_path
# def transcribe_audio(audio_path: str) -> str:
# print("Loading model in transcribe_audio...")
# from transformers import WhisperProcessor, WhisperForConditionalGeneration
# import torch
# # Load processor and model from transformers
# processor = WhisperProcessor.from_pretrained("openai/whisper-base")
# model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base")
# if torch.cuda.device_count() > 1:
# print(f"Using {torch.cuda.device_count()} GPUs!")
# model = torch.nn.DataParallel(model)
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model.to(device)
# print("Model loaded successfully.")
# print(audio_path)
# # Load and process the audio file
# import librosa
# audio_input, sr = librosa.load(audio_path, sr=16000)
# input_features = processor(audio_input, sampling_rate=sr, return_tensors="pt").input_features.to(device)
# # Generate transcription
# with torch.no_grad():
# if isinstance(model, torch.nn.DataParallel):
# generated_ids = model.module.generate(input_features)
# else:
# generated_ids = model.generate(input_features)
# # Decode the generated tokens to text
# transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
# return transcription
def transcribe_audio(audio_path: str) -> str:
print("Loading model in transcribe_audio...")
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
from transformers import WhisperProcessor, WhisperForConditionalGeneration
import torch
# Load processor and model from transformers
processor = WhisperProcessor.from_pretrained("openai/whisper-tiny")
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny")
if torch.cuda.device_count() > 1:
print(f"Using {torch.cuda.device_count()} GPUs!")
model = torch.nn.DataParallel(model)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
print("Model loaded successfully.")
print(audio_path)
# Load and process the audio file
import librosa
audio_input, sr = librosa.load(audio_path, sr=16000)
input_features = processor(audio_input, sampling_rate=sr, return_tensors="pt").input_features.to(device)
# Generate transcription
with torch.no_grad():
if isinstance(model, torch.nn.DataParallel):
generated_ids = model.module.generate(input_features)
else:
generated_ids = model.generate(input_features)
# Decode the generated tokens to text
transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
return transcription
if __name__ == "__main__":
extract_audio_from_video("../video/1.mp4")
audio_file = "/tmp/temp_audio_test.wav"
for i in range(3):
print(f"\nTranscription attempt {i + 1}:")
transcription = transcribe_audio(audio_file)
print("Transcription:")
print(transcription)

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -1,5 +1,6 @@
import os
import torch
from pydub import AudioSegment from pydub import AudioSegment
from whisper import load_model
def extract_audio_from_video(video_path: str) -> str: def extract_audio_from_video(video_path: str) -> str:
audio = AudioSegment.from_file(video_path) audio = AudioSegment.from_file(video_path)
@ -7,7 +8,85 @@ def extract_audio_from_video(video_path: str) -> str:
audio.export(audio_path, format="wav") audio.export(audio_path, format="wav")
return audio_path return audio_path
# def transcribe_audio(audio_path: str) -> str:
# print("Loading model in transcribe_audio...")
# from whisper import load_model
# model = load_model("base", device="cpu")
# # model = load_model("base")
# print("Model loaded successfully.")
# print(f"Model is running on: {next(model.parameters()).device}")
# print("Model loaded successfully on CPU.")
# result = model.transcribe(audio_path)
# print(result)
# return result["text"]
# def transcribe_audio(audio_path: str) -> str:
# print("Loading model in transcribe_audio...")
# os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
# from whisper import load_model
# model = load_model("base")
# if torch.cuda.device_count() > 1:
# print(f"Using {torch.cuda.device_count()} GPUs!")
# model = torch.nn.DataParallel(model)
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model.to(device)
# print("Model loaded successfully.")
# print(audio_path)
# # Access the underlying model if using DataParallel
# if isinstance(model, torch.nn.DataParallel):
# result = model.module.transcribe(audio_path)
# else:
# result = model.transcribe(audio_path)
# print(result)
# return result["text"]
def transcribe_audio(audio_path: str) -> str: def transcribe_audio(audio_path: str) -> str:
model = load_model("base") print("Loading model in transcribe_audio...")
result = model.transcribe(audio_path) from transformers import WhisperProcessor, WhisperForConditionalGeneration
return result["text"] processor = WhisperProcessor.from_pretrained("openai/whisper-tiny")
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny")
if torch.cuda.device_count() > 1:
print(f"Using {torch.cuda.device_count()} GPUs!")
model = torch.nn.DataParallel(model)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
print("Model loaded successfully.")
print(audio_path)
# Load and process the audio file
import librosa
audio_input, sr = librosa.load(audio_path, sr=16000)
input_features = processor(audio_input, sampling_rate=sr, return_tensors="pt").input_features.to(device)
# Generate transcription
with torch.no_grad():
if isinstance(model, torch.nn.DataParallel):
generated_ids = model.module.generate(input_features)
else:
generated_ids = model.generate(input_features)
# Decode the generated tokens to text
transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
return transcription
# audio_path = "/tmp/temp_audio.wav"
# num_iterations = 5
# import time
# start_time = time.time()
# for i in range(num_iterations):
# print(f"Processing iteration {i+1}...")
# transcription = transcribe_audio(audio_path)
# print(f"Transcription (iteration {i+1}): {transcription}")
# end_time = time.time()
# elapsed_time = end_time - start_time
# print(f"Time taken for iteration {i+1}: {elapsed_time:.2f} seconds\n")

View File

@ -25,7 +25,8 @@ def split_video_into_segments(video_path, segment_duration=30):
for start_time in range(0, int(total_duration), segment_duration): for start_time in range(0, int(total_duration), segment_duration):
segment_file = os.path.join(output_dir, f"segment_{start_time}.mp4") segment_file = os.path.join(output_dir, f"segment_{start_time}.mp4")
command = [ command = [
"ffmpeg", "-i", video_path, "ffmpeg", "-y",
"-i", video_path,
"-ss", str(start_time), "-ss", str(start_time),
"-t", str(segment_duration), "-t", str(segment_duration),
"-c", "copy", segment_file "-c", "copy", segment_file
@ -33,6 +34,7 @@ def split_video_into_segments(video_path, segment_duration=30):
subprocess.run(command, check=True) subprocess.run(command, check=True)
segments.append(segment_file) segments.append(segment_file)
print(f"segments: \n", segments)
return segments return segments
def extract_motion_key_frames(video_path, max_frames=20, sigma_multiplier=2, frame_interval=1): def extract_motion_key_frames(video_path, max_frames=20, sigma_multiplier=2, frame_interval=1):