updated celery cpu gpu optimization

2025-03-22 20:54:10 +08:00 · 2025-03-22 20:54:10 +08:00 · 546609a5b2
commit 546609a5b2
parent 2c51c14bc6
31 changed files with 2937 additions and 388 deletions
--- a/.gitignore
+++ b/.gitignore
@ -13,4 +13,6 @@
 __pycache__/
 endpoints/__pycache__/
-flagged/
+flagged/
 *.dat
--- a/pycache/init.cpython-311.pyc
+++ b/pycache/init.cpython-311.pyc
--- a/pycache/celery_worker.cpython-310.pyc
+++ b/pycache/celery_worker.cpython-310.pyc
--- a/pycache/celery_worker.cpython-311.pyc
+++ b/pycache/celery_worker.cpython-311.pyc
--- a/pycache/celery_worker.cpython-312.pyc
+++ b/pycache/celery_worker.cpython-312.pyc
--- a/pycache/main.cpython-311.pyc
+++ b/pycache/main.cpython-311.pyc
--- a/pycache/main.cpython-312.pyc
+++ b/pycache/main.cpython-312.pyc
--- a/pycache/pipeline_setup.cpython-311.pyc
+++ b/pycache/pipeline_setup.cpython-311.pyc
--- a/pycache/tasks.cpython-310.pyc
+++ b/pycache/tasks.cpython-310.pyc
--- a/pycache/tasks.cpython-311.pyc
+++ b/pycache/tasks.cpython-311.pyc
--- a/pycache/tasks.cpython-312.pyc
+++ b/pycache/tasks.cpython-312.pyc
--- a/celery_app.py
+++ b/celery_app.py
@ -1,25 +0,0 @@
 # from celery import Celery
 # celery_app = Celery(
 #     "tasks",
 #     broker="redis://localhost:6379/0",  # Redis as broker
 #     backend="redis://localhost:6379/0",  # Redis for storing results
 # )
 # celery_app.conf.task_routes = {
 #     "tasks.*": {"queue": "default"},
 # }
 from celery import Celery
 celery_app = Celery(
    "tasks",
    broker="redis://localhost:6379/0",
    backend="redis://localhost:6379/0",
    include=["tasks"]  # ✅ Prevents import issues
 )
 celery_app.conf.task_routes = {
    "tasks.*": {"queue": "default"},
 }
--- a/celery_debug.py
+++ b/celery_debug.py
@ -0,0 +1,214 @@
 import os
 import time
 import random
 from tasks import app
 print(app.control.ping())
 print(app.conf.result_backend)
 print(app.conf.broker_url)
 # task = celery.send_task("tasks.text_query_task", args=["What is string?"])
 # from celery.result import AsyncResult
 # result = AsyncResult(task.id)
 # print(result.get(timeout=1000))  # Should return "Celery is working!"
 # print(celery.tasks)
 # print(result.state)  # Possible states: PENDING, STARTED, SUCCESS, FAILURE
 # print(result.get())  # Get result if completed
 # # Check status
 # result = AsyncResult(task.id)
 # print(result.status)
 # print(result.result)  # If it failed, this will contain an error message
 from celery import chain
 from tasks import text_query_task, image_query_task, preprocess_video, inference_video
 from tasks import text_query_task, image_query_task, video_query_task
 from celery.result import AsyncResult
 from concurrent.futures import ThreadPoolExecutor
 import time
 # Check Celery connectivity
 print("Celery Ping Response:", app.control.ping())
 # Print Celery configuration details
 print("Backend:", app.conf.result_backend)
 print("Broker:", app.conf.broker_url)
 # Define the number of concurrent tasks
 NUM_TASKS = 4
 delay_seconds = 0
 file_paths = [f"../video/1.mp4" for _ in range(NUM_TASKS)]
 # video_folder = "../video"
 # video_files = [f for f in os.listdir(video_folder) if f.endswith(('.mp4', '.avi', '.mov', '.mkv'))]
 # video_files = ['1.2.mp4', '1.mp4', '3.mp4', '4.mp4', '5.mp4']
 # print(video_files)
 # file_paths = [os.path.join(video_folder, random.choice(video_files)) for _ in range(NUM_TASKS)]
 # print(file_paths)
 # profile_folder = "../profile"
 # image_files = [f for f in os.listdir(profile_folder) if f.endswith(('.png', '.jpg', '.jpeg'))]
 # file_paths = [os.path.join(profile_folder, random.choice(image_files)) for _ in range(NUM_TASKS)]
 # questions = [f"What is AI? {i}" for i in range(NUM_TASKS)]
 # questions = [
 #     f"Extract the following information from this image and return the result in JSON format:\n"
 #     f"- Name: <name>\n"
 #     f"- ID: <id>\n"
 #     f"- Profile Picture: <url>\n"
 #     f"- Follower Count: <count>\n"
 #     f"- Likes Count: <count>\n"
 #     f"- Bio: <bio>\n"
 #     f"- Following Count: <count>\n"
 #     f"- External Links: <links>\n"
 #     f"Do not include any disclaimers or comments like 'I'm sorry' or 'I can't assist.' Task {i}"
 #     for i in range(NUM_TASKS)
 # ]
 questions = [
    f"Based on the given images and audio script, extract detailed information about the products recommended in the video and format the output as JSON with the following fields:\n"
    f"- Product Name: <name>\n"
    f"- Category: <category>\n"
    f"- Styles or Variants: <styles/variants>\n"
    f"- Highlights: <highlights>\n"
    f"- Promotional Details: <promotional_details>\n"
    f"Do not include any disclaimers or comments like 'I'm sorry' or 'I can't assist.' Task {i}"
    for i in range(NUM_TASKS)
 ]
 # questions = [
 #     "Generate a screenplay based on the uploaded video, incorporating detailed elements such as dialogue, scene descriptions, and character actions. The screenplay should follow a structured format with the following components:\n"
 #     "1. **Scene Descriptions**: Provide a detailed visual setting for each scene, describing the background, lighting, and overall atmosphere.\n"
 #     "2. **Character Introductions**: Identify key characters, their appearance, and any notable traits or expressions.\n"
 #     "3. **Dialogue**: Transcribe or adapt spoken lines from the video into screenplay format, ensuring natural flow and emphasis on key moments.\n"
 #     "4. **Actions & Expressions**: Capture non-verbal cues, body language, and interactions between characters or with objects in the scene.\n"
 #     "5. **Product Integrations**: If the video features product recommendations, weave them naturally into the script, highlighting their name, category, features, and promotional details as part of the dialogue or narration.\n"
 #     "6. **Narrative Flow**: Ensure the screenplay has a coherent progression, with clear transitions between scenes, maintaining engagement and pacing similar to the video’s tone and style.\n\n"
 #     "Format the output as a properly structured screenplay:\n"
 #     "- Scene headers (INT./EXT. - LOCATION - TIME OF DAY)\n"
 #     "- Character names in uppercase\n"
 #     "- Dialogue centered\n"
 #     "- Actions and descriptions formatted accordingly"
 # ]
 # def submit_task(question, index):  # sends tasks to Celery asynchronously, queues the tasks in Celery broker. If multiple Celery workers, they process tasks in parallel.
 #     """ Submits a Celery task with increasing delay """
 #     countdown_time = index * delay_seconds  # Dynamic delay
 #     task = text_query_task.apply_async(args=[question], countdown=countdown_time)
 #     print("Running celery_debug...")
 #     # task = text_query_task.delay(question)
 #     print(f"Task {index} scheduled with {countdown_time}s delay.")
 #     return task.id
 # def submit_task(file_path, question, index):
 #     """ Submits a Celery task with increasing delay """
 #     countdown_time = index * delay_seconds  # Dynamic delay
 #     task = image_query_task.apply_async(args=[file_path, question], countdown=countdown_time)
 #     print(f"Task {index} scheduled with {countdown_time}s delay.")
 #     return task.id
 # def submit_task(file_path, question, index):
 #     """ Submits a video query task with increasing delay """
 #     countdown_time = index * delay_seconds  # Dynamic delay
 #     task = video_query_task.apply_async(args=[file_path, question], countdown=countdown_time)
 #     print(f"Task {index} scheduled with {countdown_time}s delay for file {file_path}.")
 #     return task.id
 # def submit_task(file_path, question, index):
 #     # countdown_time = index * delay_seconds 
 #     countdown_time = delay_seconds 
 #     # Chain preprocessing and inference tasks, inference only after preprocess done
 #     task_chain = chain(
 #         preprocess_video.s(file_path, question),  
 #         inference_video.s()                    
 #     ).apply_async(countdown=countdown_time)
 #     print(f"Task {index} scheduled with {countdown_time}s delay for file {file_path}.")
 #     return task_chain.id
 from celery import chord, group
 from tasks import preprocess_video, inference_video
 def submit_task(file_paths, questions, batch_size=4):
    task_ids = []
    for i in range(0, len(file_paths), batch_size):
        # Get the current batch of file paths and questions
        batch_file_paths = file_paths[i:i + batch_size]
        batch_questions = questions[i:i + batch_size]
        # Create preprocessing tasks for the current batch
        preprocessing_tasks = [
            preprocess_video.s(file_path, question)
            for file_path, question in zip(batch_file_paths, batch_questions)
        ]
        # Submit the batch as a chord
        chord_task = chord(preprocessing_tasks)(inference_video.s())
        task_ids.append(chord_task.id)
        print(f"Batch {i // batch_size + 1} submitted with task ID: {chord_task.id}")
    return task_ids
 # def submit_task(file_path, question, index):
 #     preprocess_task = preprocess_video.apply_async(
 #         args=[file_path, question], 
 #         queue="preprocess_queue"
 #     )
 #     print(f"Task {index} preprocessing scheduled for file {file_path}.")
 #     # Add a callback to submit inference task after preprocessing is done
 #     preprocess_task.then(
 #         inference_video.s().set(queue="inference_queue")
 #     )
 #     print(f"Task {index} inference will be scheduled after preprocessing.")
 #     return preprocess_task.id
 start_time = time.time()
 print(f"\nSubmitting {NUM_TASKS} tasks concurrently...")
 task_ids = []
 # from tasks import add
 # result = add.delay(questions)
 # print(result)  
 # print(f"Task ID: {result.id}")
 # try:
 #     task_result = result.get(timeout=5)
 #     print(f"Task Result: {task_result}") 
 # except Exception as e:
 #     print(f"Task not ready or failed: {e}")
 # task_ids.append(result.id)
 # with ThreadPoolExecutor(max_workers=10) as executor:
 #     # resultID = executor.map(submit_task, questions, range(NUM_TASKS))
 #     resultID = executor.map(submit_task, file_paths, questions, range(NUM_TASKS))
 #     task_ids.extend(resultID)
 task_ids = submit_task(file_paths, questions)
 print("\nAll tasks submitted!")
 print(task_ids)
 # Monitor Task Statuses
 print("\nChecking Task Results...")
 for task_id in task_ids:
    async_result = AsyncResult(task_id, app=app)
    while async_result.status not in ["SUCCESS", "FAILURE"]:
        print(f"Task {task_id} status: {async_result.status}")
        time.sleep(1)  
    # Fetch final result
    print(f"Task {task_id} final status: {async_result.status}")
    if async_result.status == "SUCCESS":
        print(f"Result: {async_result.get()}")
 print("\nAll tasks completed.")
 end_time = time.time()
 print(f"Total time taken: {end_time - start_time:.2f} seconds.")
--- a/celery_worker.py
+++ b/celery_worker.py
@ -0,0 +1,89 @@
 import torch.multiprocessing as mp
 mp.set_start_method("fork", force=True)
 # from celery import Celery
 # celery_app = Celery(
 #     "tasks",
 #     broker="redis://localhost:6379/0",  # Redis as broker
 #     backend="redis://localhost:6379/0",  # Redis for storing results
 # )
 # celery_app.conf.task_routes = {
 #     "tasks.*": {"queue": "default"},
 # }
 from celery import Celery
 # app = Celery(
 #     "tasks",
 #     broker="redis://localhost:6379/0",
 #     backend="redis://localhost:6379/0",
 #     include=["tasks"] 
 # )
 # app = Celery(
 #     "tasks",
 #     broker="pyamqp://guest@localhost//", 
 #     backend="rpc://",  
 #     include=["tasks"]  
 # )
 app = Celery(
    "celery_worker",
    broker="pyamqp://guest@localhost//", 
    backend="rpc://",  
 )
 # celery = Celery(
 #     "tasks",
 #     broker="pyamqp://guest@localhost//",
 #     backend="rpc://"
 # )
 app.conf.task_routes = {
    "tasks.*": {"queue": "default"},
 }
 app.conf.worker_prefetch_multiplier = 1
 app.conf.task_acks_late = True
 # from celery import Celery
 # from kombu import Queue
 # celery = Celery(
 #     "tasks",
 #     broker="redis://localhost:6379/0",
 #     backend="redis://localhost:6379/0",
 #     include=["tasks"]
 # )
 # # Define task queues properly
 # celery.conf.task_queues = (
 #     Queue("high_priority"),
 #     Queue("default"),
 #     Queue("low_priority"),
 # )
 # # Define task routing
 # celery.conf.task_routes = {
 #     "tasks.text_query_task": {"queue": "high_priority"},
 #     "tasks.image_query_task": {"queue": "default"},
 #     "tasks.video_query_task": {"queue": "low_priority"},
 # }
 # # Define task rate limits
 # celery.conf.task_annotations = {
 #     "tasks.text_query_task": {"rate_limit": "10/m"},
 #     "tasks.image_query_task": {"rate_limit": "5/m"},
 #     "tasks.video_query_task": {"rate_limit": "3/m"},
 # }
 # # Define task retries
 # celery.conf.task_acks_late = True  # Ensure task is only removed from queue when fully processed
 # celery.conf.worker_prefetch_multiplier = 1  # Avoid one worker taking too many tasks at once
 # # Define task time limits
 # celery.conf.task_time_limit = 60  # 60 seconds max execution time
 # celery.conf.task_soft_time_limit = 50  # Warn at 50 seconds
--- a/connect_rabbitmq.py
+++ b/connect_rabbitmq.py
@ -0,0 +1,10 @@
 from kombu import Connection
 rabbitmq_url = "amqp://guest:guest@localhost//"  
 with Connection(rabbitmq_url) as conn:
    try:
        conn.connect()
        print("RabbitMQ connection successful!")
    except Exception as e:
        print(f"Failed to connect: {e}")
--- a/endpoints/image.py
+++ b/endpoints/image.py
@ -8,43 +8,64 @@ from PIL import Image
 from pipeline_setup import pipe, IMAGE_TOKEN 
 from utils.image_processing import encode_image_base64
-# async def image_query(file: UploadFile, question: str = Form(...)):
+async def image_query(file: UploadFile, question: str = Form(...)):
 #     """
 #     API endpoint to process an image with the user's query.
 #     """
 #     try:
 #         if file.content_type not in ["image/jpeg", "image/png"]:
 #             return JSONResponse({"query": question, "error": "Unsupported file type."})
 #         image_data = await file.read()
 #         image = Image.open(io.BytesIO(image_data)).convert("RGB").resize((512, 512))
 #         encoded_image_base64 = encode_image_base64(image)
 #         question_with_image_token = f"{question}\n{IMAGE_TOKEN}"
 #         response = await asyncio.to_thread(pipe, (question, image))  
 #         return JSONResponse({"query": question, "response": response.text})
 #     except Exception as e:
 #         return JSONResponse({"query": question, "error": str(e)})
 async def image_query(image: np.ndarray, question: str):
    """
-    API endpoint to process an image (as numpy array) with the user's query.
+    API endpoint to process an image with the user's query.
    """
    try:
-        # Convert the numpy array to a PIL Image
+        if file.content_type not in ["image/jpeg", "image/png"]:
-        image = Image.fromarray(image).convert("RGB").resize((512, 512))
+            return JSONResponse({"query": question, "error": "Unsupported file type."})
        image_data = await file.read()
        image = Image.open(io.BytesIO(image_data)).convert("RGB").resize((512, 512))
        encoded_image_base64 = encode_image_base64(image)
        # Encode the image to base64 (optional, if needed by your pipeline)
        buffered = io.BytesIO()
        image.save(buffered, format="JPEG")
        encoded_image_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
        # Prepare the query with the image token
        question_with_image_token = f"{question}\n{IMAGE_TOKEN}"
-
+        response = await asyncio.to_thread(pipe, (question, image))  
-        # Query the model
+        return JSONResponse({"query": question, "response": response.text})
        response = await asyncio.to_thread(pipe, (question, image))
        return {"query": question, "response": response.text}
    except Exception as e:
-        return {"query": question, "error": str(e)}
+        return JSONResponse({"query": question, "error": str(e)})
 # async def image_query(image: np.ndarray, question: str):
 #     """
 #     API endpoint to process an image (as numpy array) with the user's query.
 #     """
 #     try:
 #         # Convert the numpy array to a PIL Image
 #         image = Image.fromarray(image).convert("RGB").resize((512, 512))
 #         # Encode the image to base64 (optional, if needed by your pipeline)
 #         buffered = io.BytesIO()
 #         image.save(buffered, format="JPEG")
 #         encoded_image_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
 #         # Prepare the query with the image token
 #         question_with_image_token = f"{question}\n{IMAGE_TOKEN}"
 #         # Query the model
 #         response = await asyncio.to_thread(pipe, (question, image))
 #         return {"query": question, "response": response.text}
 #     except Exception as e:
 #         return {"query": question, "error": str(e)}
 # def image_query(image_path: str, question: str):
 #     try:
 #         print("image_path in image_query...")
 #         with open(image_path, "rb") as file:
 #             image_data = file.read()
 #         image = Image.open(io.BytesIO(image_data)).convert("RGB").resize((512, 512))
 #         encoded_image_base64 = encode_image_base64(image)
 #         # Prepare the question with the image token
 #         question_with_image_token = f"{question}\n{IMAGE_TOKEN}"
 #         # Run model inference (blocking call, but can be async)
 #         response = pipe((question_with_image_token, image)) 
 #         return {"query": question, "response": response.text}
 #     except Exception as e:
 #         return {"query": question, "error": str(e)}
--- a/endpoints/text.py
+++ b/endpoints/text.py
@ -4,23 +4,39 @@ from fastapi.responses import JSONResponse
 from asyncio import to_thread
 from pipeline_setup import pipe
-# async def text_query(question: str = Form(...)):
+# api
 #     """
 #     API endpoint to process text input with the user's query.
 #     """
 #     try:
 #         response = await to_thread(pipe, question) 
 #         return JSONResponse({"query": question, "response": response.text})
 #     except Exception as e:
 #         return JSONResponse({"query": question, "error": str(e)})
 async def text_query(question: str = Form(...)):
    """
    API endpoint to process text input with the user's query.
    """
    try:
-        response = await to_thread(pipe, question)
+        print("starting text querying...")
-        return {"query": question, "response": response.text}
+        response = await to_thread(pipe, question) 
        return JSONResponse({"query": question, "response": response.text})
    except Exception as e:
-        return {"query": question, "error": str(e)}
+        return JSONResponse({"query": question, "error": str(e)})
 # gradio
 # async def text_query(question: str = Form(...)):
 #     """
 #     API endpoint to process text input with the user's query.
 #     """
 #     try:
 #         print("Processing in text.py...")
 #         response = await to_thread(pipe, question)
 #         return {"query": question, "response": response.text}
 #     except Exception as e:
 #         return {"query": question, "error": str(e)}
 # celery
 # def text_query(question: str = Form(...)):
 #     """
 #     API endpoint to process text input with the user's query.
 #     """
 #     print("Testing....")
 #     try:
 #         print("Processing in text.py...")
 #         response = pipe(question)  # Call pipe synchronously
 #         return {"query": question, "response": response.text}
 #     except Exception as e:
 #         return {"query": question, "error": str(e)}
--- a/endpoints/video.py
+++ b/endpoints/video.py
@ -4,39 +4,155 @@ from pipeline_setup import pipe
 from utils.image_processing import encode_image_base64
 from utils.video_processing import split_video_into_segments, extract_motion_key_frames, extract_audio_from_video
 from utils.audio_transcription import transcribe_audio
 import os
 import torch
 import json
 import time
 import asyncio
 import mimetypes
 from concurrent.futures import ThreadPoolExecutor
 def save_checkpoint(video_id, checkpoint_data):
    checkpoint_path = f"/tmp/{video_id}_progress.json"
    with open(checkpoint_path, "w") as f:
        json.dump(checkpoint_data, f)
 def load_checkpoint(video_id):
    checkpoint_path = f"/tmp/{video_id}_progress.json"
    if os.path.exists(checkpoint_path):
        with open(checkpoint_path, "r") as f:
            return json.load(f)
    return None
 # async def video_query(file: UploadFile, question: str = Form(...)):
 #     try:
 #         print("Processing video...")
 #         if file.content_type not in ["video/mp4", "video/avi", "video/mkv"]:
 #             return JSONResponse({"query": question, "error": "Unsupported video file type."})
 #         overall_start_time = time.time()
 #         video_data = await file.read()
 #         video_id = str(hash(video_data))  # Unique ID for checkpointing
 #         temp_video_path = f"/tmp/{video_id}.mp4"
 #         with open(temp_video_path, "wb") as temp_video_file:
 #             temp_video_file.write(video_data)
 #         video_reading_time = time.time()
 #         segments = split_video_into_segments(temp_video_path, segment_duration=30)
 #         checkpoint = load_checkpoint(video_id) or {}
 #         aggregated_responses = checkpoint.get("responses", [])
 #         segment_timings = checkpoint.get("timings", [])
 #         completed_segments = set(checkpoint.get("completed_segments", []))
 #         preprocessed_segments = set(checkpoint.get("preprocessed_segments", []))
 #         inference_completed_segments = set(checkpoint.get("inference_completed_segments", []))
 #         for i, segment_path in enumerate(segments):
 #             if i in completed_segments:
 #                 print(f"Skipping already processed segment {i+1}")
 #                 continue
 #             segment_start_time = time.time()
 #             if i not in preprocessed_segments:
 #                 frame_start_time = time.time()
 #                 imgs = extract_motion_key_frames(segment_path, max_frames=50, sigma_multiplier=2)
 #                 frame_time = time.time()
 #                 audio_start_time = time.time()
 #                 audio_path = extract_audio_from_video(segment_path)
 #                 transcribed_text = transcribe_audio(audio_path)
 #                 audio_time = time.time()
 #                 preprocessed_segments.add(i)
 #                 save_checkpoint(video_id, {
 #                     "responses": aggregated_responses,
 #                     "timings": segment_timings,
 #                     "completed_segments": list(completed_segments),
 #                     "preprocessed_segments": list(preprocessed_segments),
 #                     "inference_completed_segments": list(inference_completed_segments)
 #                 })
 #             if i not in inference_completed_segments:
 #                 combined_query = f"Audio Transcript: {transcribed_text}\n{question}"
 #                 question_with_frames = "".join([f"Frame{j+1}: {{IMAGE_TOKEN}}\n" for j, _ in enumerate(imgs)])
 #                 question_with_frames += combined_query
 #                 content = [{"type": "text", "text": question_with_frames}] + [
 #                     {"type": "image_url", "image_url": {"max_dynamic_patch": 1, "url": f"data:image/jpeg;base64,{encode_image_base64(img)}"}}
 #                     for img in imgs
 #                 ]
 #                 inference_start_time = time.time()
 #                 messages = [dict(role="user", content=content)]
 #                 response = await asyncio.to_thread(pipe, messages)
 #                 inference_time = time.time()
 #                 aggregated_responses.append(response.text)
 #                 inference_completed_segments.add(i)
 #                 save_checkpoint(video_id, {
 #                     "responses": aggregated_responses,
 #                     "timings": segment_timings,
 #                     "completed_segments": list(completed_segments),
 #                     "preprocessed_segments": list(preprocessed_segments),
 #                     "inference_completed_segments": list(inference_completed_segments)
 #                 })
 #             segment_timings.append({
 #                 "segment_index": i + 1,
 #                 "segment_processing_time": inference_time - segment_start_time,
 #                 "frame_extraction_time": frame_time - frame_start_time,
 #                 "audio_extraction_time": audio_time - audio_start_time,
 #                 "model_inference_time": inference_time - inference_start_time
 #             })
 #             completed_segments.add(i)
 #             save_checkpoint(video_id, {
 #                 "responses": aggregated_responses,
 #                 "timings": segment_timings,
 #                 "completed_segments": list(completed_segments),
 #                 "preprocessed_segments": list(preprocessed_segments),
 #                 "inference_completed_segments": list(inference_completed_segments)
 #             })
 #         return JSONResponse({
 #             "question": question,
 #             "responses": aggregated_responses,
 #             "timings": {
 #                 "video_reading_time": video_reading_time - overall_start_time,
 #                 "total_segments": len(segments),
 #                 "total_processing_time": time.time() - overall_start_time,
 #                 "segment_details": segment_timings
 #             }
 #         })
 #     except Exception as e:
 #         return JSONResponse({"query": question, "error": str(e)})
 # async def video_query(video_path: str, question: str):
 #     """
 #     API endpoint to process a video file with the user's query.
 #     """
 #     try:
 #         print("Processing video...")
-#         # Validate file type
+#         if not video_path or not isinstance(video_path, str):
-#         if file.content_type not in ["video/mp4", "video/avi", "video/mkv"]:
+#             return {"query": question, "error": "No video file provided or invalid file input."}
 #             return JSONResponse({"query": question, "error": "Unsupported video file type."})
-#         # Start overall timer
+#         # Determine the file type using the file extension
-#         overall_start_time = time.time()
+#         file_type, _ = mimetypes.guess_type(video_path)
 #         if file_type is None or not file_type.startswith("video/"):
 #             return {"query": question, "error": "Unsupported video file type."}
-#         # Save the uploaded video to a temporary file
+#         # Log the video path
-#         print("Reading video...")
+#         print(f"Video path: {video_path}")
 #         video_data = await file.read()
 #         temp_video_path = "/tmp/temp_video.mp4"
 #         with open(temp_video_path, "wb") as temp_video_file:
 #             temp_video_file.write(video_data)
 #         print(f"Temp video saved to: {temp_video_path}")
 #         # Record the time after reading the video
 #         video_reading_time = time.time()
 #         # Split the video into segments
 #         print("Splitting video...")
-#         segments = split_video_into_segments(temp_video_path, segment_duration=30)
+#         segments = split_video_into_segments(video_path, segment_duration=30)
 #         print(f"Video split into {len(segments)} segments.")
 #         aggregated_responses = []
@ -45,19 +161,12 @@ from concurrent.futures import ThreadPoolExecutor
 #         for i, segment_path in enumerate(segments):
 #             print(f"Processing segment {i+1}/{len(segments)}: {segment_path}")
 #             # Start timing for the segment
 #             segment_start_time = time.time()
 #             # Extract key frames
 #             frame_start_time = time.time()
 #             imgs = extract_motion_key_frames(segment_path, max_frames=50, sigma_multiplier=2)
 #             frame_time = time.time()
 #             # Extract audio and transcribe
 #             audio_start_time = time.time()
 #             audio_path = extract_audio_from_video(segment_path)
 #             transcribed_text = transcribe_audio(audio_path)
 #             audio_time = time.time()
 #             # Combine transcribed text with the query
 #             combined_query = f"Audio Transcript: {transcribed_text}\n{question}"
@ -79,110 +188,130 @@ from concurrent.futures import ThreadPoolExecutor
 #                 })
 #             # Query the model
 #             inference_start_time = time.time()
 #             messages = [dict(role="user", content=content)]
 #             response = await asyncio.to_thread(pipe, messages)
 #             inference_time = time.time()
 #             # Aggregate response
 #             aggregated_responses.append(response.text)
-#             # Calculate timing for the segment
+#         return {
 #             segment_timings.append({
 #                 "segment_index": i + 1,
 #                 "segment_processing_time": inference_time - segment_start_time,
 #                 "frame_extraction_time": frame_time - frame_start_time,
 #                 "audio_extraction_time": audio_time - audio_start_time,
 #                 "model_inference_time": inference_time - inference_start_time
 #             })
 #             print(f"transcription: {transcribed_text}")
 #             # print(f"content: {content}")
 #         overall_end_time = time.time()
 #         # Aggregate total timings
 #         total_timings = {
 #             "video_reading_time": video_reading_time - overall_start_time,
 #             "total_segments": len(segments),
 #             "total_processing_time": overall_end_time - overall_start_time,
 #             "segment_details": segment_timings
 #         }
 #         return JSONResponse({
 #             "question": question,
 #             "responses": aggregated_responses,
-#             "timings": total_timings,
+#         }
 #         })
 #     except Exception as e:
-#         return JSONResponse({"query": question, "error": str(e)})
+#         return {"query": question, "error": str(e)}
-async def video_query(video_path: str, question: str):
+# def video_query(video_path: str, question: str):
-    """
+#     """
-    API endpoint to process a video file with the user's query.
+#     Processes a video file using the model.
-    """
+#     Reads the video from disk, extracts key frames, transcribes audio, and queries the model.
-    try:
+#     """
-        print("Processing video...")
+#     try:
 #         print("Processing video...")
-        if not video_path or not isinstance(video_path, str):
+#         if not os.path.exists(video_path):
-            return {"query": question, "error": "No video file provided or invalid file input."}
+#             return {"query": question, "error": "Video file not found."}
-        # Determine the file type using the file extension
+#         # Determine the file type
-        file_type, _ = mimetypes.guess_type(video_path)
+#         file_type, _ = mimetypes.guess_type(video_path)
-        if file_type is None or not file_type.startswith("video/"):
+#         if file_type is None or not file_type.startswith("video/"):
-            return {"query": question, "error": "Unsupported video file type."}
+#             return {"query": question, "error": "Unsupported video file type."}
-        # Log the video path
+#         # Split video into segments
-        print(f"Video path: {video_path}")
+#         print("Splitting video...")
 #         segments = split_video_into_segments(video_path, segment_duration=30)
 #         print(f"Video split into {len(segments)} segments.")
-        # Split the video into segments
+#         aggregated_responses = []
-        print("Splitting video...")
+#         segment_timings = []
        segments = split_video_into_segments(video_path, segment_duration=30)
        print(f"Video split into {len(segments)} segments.")
-        aggregated_responses = []
+#         for i, segment_path in enumerate(segments):
-        segment_timings = []
+#             print(f"Processing segment {i+1}/{len(segments)}: {segment_path}")
-        for i, segment_path in enumerate(segments):
+#             # Extract key frames
-            print(f"Processing segment {i+1}/{len(segments)}: {segment_path}")
+#             imgs = extract_motion_key_frames(segment_path, max_frames=50, sigma_multiplier=2)
-            # Extract key frames
+#             # Extract audio and transcribe
-            imgs = extract_motion_key_frames(segment_path, max_frames=50, sigma_multiplier=2)
+#             audio_path = extract_audio_from_video(segment_path)
 #             transcribed_text = transcribe_audio(audio_path)
-            # Extract audio and transcribe
+#             # Combine transcribed text with the query
-            audio_path = extract_audio_from_video(segment_path)
+#             combined_query = f"Audio Transcript: {transcribed_text}\n{question}"
            transcribed_text = transcribe_audio(audio_path)
-            # Combine transcribed text with the query
+#             # Prepare content for the pipeline
-            combined_query = f"Audio Transcript: {transcribed_text}\n{question}"
+#             question_with_frames = "".join([f"Frame{j+1}: {{IMAGE_TOKEN}}\n" for j in range(len(imgs))])
 #             question_with_frames += combined_query
-            # Prepare content for the pipeline
+#             content = [{"type": "text", "text": question_with_frames}] + [
-            question_with_frames = ""
+#                 {"type": "image_url", "image_url": {"max_dynamic_patch": 1, "url": f"data:image/jpeg;base64,{encode_image_base64(img)}"}}
-            for j, img in enumerate(imgs):
+#                 for img in imgs
-                question_with_frames += f"Frame{j+1}: {{IMAGE_TOKEN}}\n"
+#             ]
            question_with_frames += combined_query
-            content = [{"type": "text", "text": question_with_frames}]
+#             # Query the model
-            for img in imgs:
+#             messages = [dict(role="user", content=content)]
-                content.append({
+#             response = pipe(messages)
                    "type": "image_url",
                    "image_url": {
                        "max_dynamic_patch": 1,
                        "url": f"data:image/jpeg;base64,{encode_image_base64(img)}"
                    }
                })
-            # Query the model
+#             # Aggregate response
-            messages = [dict(role="user", content=content)]
+#             aggregated_responses.append(response.text)
            response = await asyncio.to_thread(pipe, messages)
-            # Aggregate response
+#         return {
-            aggregated_responses.append(response.text)
+#             "question": question,
 #             "responses": aggregated_responses,
 #         }
 #     except Exception as e:
 #         return {"query": question, "error": str(e)}
-        return {
+
-            "question": question,
+# def run_video_inference(preprocessed_data):
-            "responses": aggregated_responses,
+#     """
-        }
+#     **Inference Step (Runs on GPU)**
-    except Exception as e:
+#     - Takes preprocessed data (key frames + transcribed audio).
-        return {"query": question, "error": str(e)}
+#     - Constructs a query for the model.
 #     - Runs inference on the GPU.
 #     - Returns the aggregated responses.
 #     """
 #     import torch
 #     torch.cuda.empty_cache()  # Free up GPU memory before inference
 #     try:
 #         print("Starting video inference...")
 #         question = preprocessed_data["question"]
 #         segments = preprocessed_data["segments"]
 #         aggregated_responses = []
 #         for segment in segments:
 #             segment_index = segment["segment_index"]
 #             transcribed_text = segment["transcription"]
 #             encoded_imgs = segment["encoded_images"]
 #             print(f"Running inference on segment {segment_index + 1}...")
 #             # Prepare query content
 #             question_with_frames = "".join(
 #                 [f"Frame{j+1}: {{IMAGE_TOKEN}}\n" for j in range(len(encoded_imgs))]
 #             )
 #             combined_query = f"Audio Transcript: {transcribed_text}\n{question}"
 #             question_with_frames += combined_query
 #             content = [{"type": "text", "text": question_with_frames}] + [
 #                 {"type": "image_url", "image_url": {"max_dynamic_patch": 1, "url": f"data:image/jpeg;base64,{img}"}}
 #                 for img in encoded_imgs
 #             ]
 #             # Query the model (GPU-heavy operation)
 #             messages = [dict(role="user", content=content)]
 #             response = pipe(messages)
 #             # Collect responses
 #             aggregated_responses.append(response.text)
 #         return {
 #             "question": question,
 #             "responses": aggregated_responses,
 #         }
 #     except Exception as e:
 #         return {"query": question, "error": str(e)}
--- a/main.py
+++ b/main.py
@ -1,83 +1,105 @@
 # from fastapi import FastAPI, Form, UploadFile
 # from fastapi.responses import JSONResponse
 # import asyncio
 # app = FastAPI()
 # @app.post("/api/text")
 # async def text_query_endpoint(question: str = Form(...)):
 #     """
 #     API endpoint to process text input with the user's query.
 #     """
 #     from endpoints.text import text_query 
 #     return await text_query(question=question)
 # @app.post("/api/image")
 # async def image_query_endpoint(file: UploadFile, question: str = Form(...)):
 #     """
 #     API endpoint to process an image with the user's query.
 #     """
 #     from endpoints.image import image_query 
 #     return await image_query(file=file, question=question)
 # @app.post("/api/video")
 # async def video_query_endpoint(file: UploadFile, question: str = Form(...)):
 #     """
 #     API endpoint to process a video file with the user's query.
 #     """
 #     from endpoints.video import video_query  
 #     return await video_query(file=file, question=question)
 # if __name__ == "__main__":
 #     import uvicorn
 #     uvicorn.run("main:app", host="0.0.0.0", port=8002, reload=True, loop="uvloop")
 from fastapi import FastAPI, Form, UploadFile
 from fastapi.responses import JSONResponse
-import shutil
+import asyncio
 import uuid
 from tasks import text_query_task, image_query_task, video_query_task
 app = FastAPI()
 # @app.post("/api/text")
 # async def text_query_endpoint(question: str = Form(...)):
 #     task = text_query_task.apply_async(args=[question])
 #     return JSONResponse({"task_id": task.id})
@app.post("/api/text")
 async def text_query_endpoint(question: str = Form(...)):
-    print(f"Received request: {question}") 
+    """
-    task = text_query_task.apply_async(args=[question])
+    API endpoint to process text input with the user's query.
-    print(f"Task sent: {task.id}")
+    """
-    return JSONResponse({"task_id": task.id})
+    from endpoints.text import text_query 
    return await text_query(question=question)
@app.post("/api/image")
 async def image_query_endpoint(file: UploadFile, question: str = Form(...)):
-    file_path = f"/tmp/{uuid.uuid4()}_{file.filename}"
+    """
-    with open(file_path, "wb") as buffer:
+    API endpoint to process an image with the user's query.
-        shutil.copyfileobj(file.file, buffer)
+    """
-    
+    from endpoints.image import image_query 
-    task = image_query_task.apply_async(args=[file_path, question])
+    return await image_query(file=file, question=question)
-    return JSONResponse({"task_id": task.id})
+
@app.post("/api/video")
 async def video_query_endpoint(file: UploadFile, question: str = Form(...)):
-    file_path = f"/tmp/{uuid.uuid4()}_{file.filename}"
+    """
-    with open(file_path, "wb") as buffer:
+    API endpoint to process a video file with the user's query.
-        shutil.copyfileobj(file.file, buffer)
+    """
-    
+    from endpoints.video import video_query  
-    task = video_query_task.apply_async(args=[file_path, question])
+    return await video_query(file=file, question=question)
    return JSONResponse({"task_id": task.id})
-@app.get("/api/task/{task_id}")
+
-async def get_task_result(task_id: str):
+if __name__ == "__main__":
-    from celery.result import AsyncResult
+    import uvicorn
-    result = AsyncResult(task_id)
+    uvicorn.run("main:app", host="0.0.0.0", port=8002, reload=True, loop="uvloop")
-    if result.ready():
+
-        return JSONResponse({"status": "completed", "result": result.result})
+
-    return JSONResponse({"status": "pending"})
+
 # from fastapi import FastAPI, Form, UploadFile
 # from fastapi.responses import JSONResponse
 # import shutil
 # import uuid
 # from celery import chain
 # from tasks import text_query_task, image_query_task, video_query_task
 # from tasks import text_query_task, image_query_task, preprocess_video, inference_video
 # app = FastAPI()
 # @app.get("/")
 # def read_root():
 #     return {"message": "FastAPI with Celery & RabbitMQ"}
 # @app.post("/api/text")
 # async def text_query_endpoint(question: str = Form(...)):
 #     print(f"Received request: {question}") 
 #     task = text_query_task.apply_async(args=[question])
 #     print(f"Task sent: {task.id}")
 #     return JSONResponse({"task_id": task.id})
 # @app.post("/api/image")
 # async def image_query_endpoint(file: UploadFile, question: str = Form(...)):
 #     file_path = f"/tmp/{uuid.uuid4()}_{file.filename}"
 #     with open(file_path, "wb") as buffer:
 #         shutil.copyfileobj(file.file, buffer)
 #     task = image_query_task.apply_async(args=[file_path, question])
 #     return JSONResponse({"task_id": task.id})
 # # @app.post("/api/video")
 # # async def video_query_endpoint(file: UploadFile, question: str = Form(...)):
 # #     file_path = f"/tmp/{uuid.uuid4()}_{file.filename}"
 # #     with open(file_path, "wb") as buffer:
 # #         shutil.copyfileobj(file.file, buffer)
 # #     task = video_query_task.apply_async(args=[file_path, question])
 # #     return JSONResponse({"task_id": task.id})
 # @app.post("/api/video")
 # async def video_query_endpoint(file: UploadFile, question: str = Form(...)):
 #     # Save the uploaded file to a temporary location
 #     file_path = f"/tmp/{uuid.uuid4()}_{file.filename}"
 #     with open(file_path, "wb") as buffer:
 #         shutil.copyfileobj(file.file, buffer)
 #     # Chain the preprocessing and inference tasks
 #     task_chain = chain(
 #         preprocess_video.s(file_path, question),  # Preprocessing task
 #         inference_video.s()                      # Inference task
 #     ).apply_async()
 #     return JSONResponse({"task_id": task_chain.id})
 # @app.get("/api/task/{task_id}")
 # async def get_task_result(task_id: str):
 #     from celery.result import AsyncResult
 #     result = AsyncResult(task_id)
 #     if result.ready():
 #         return JSONResponse({"status": "completed", "result": result.result})
 #     return JSONResponse({"status": "pending"})
 # if __name__ == "__main__":
 #     import uvicorn
 #     uvicorn.run("main:app", host="0.0.0.0", port=8002, reload=True, loop="uvloop")
--- a/monitor.py
+++ b/monitor.py
@ -0,0 +1,33 @@
 import psutil
 import GPUtil
 import time
 import logging
 # Specify the full path for the log file
 log_file_path = "resource_usage.log"
 # Configure logging
 logging.basicConfig(filename=log_file_path, level=logging.INFO)
 logging.info("Logging started")  # Add this line to confirm logging is working
 try:
    while True:
        # Monitor CPU
        cpu_usage = psutil.cpu_percent(interval=1)
        memory_usage = psutil.virtual_memory().percent
        cpu_message = f"CPU Usage: {cpu_usage}% | Memory Usage: {memory_usage}%"
        print(cpu_message)  # Print to console
        logging.info(cpu_message)  # Log to file
        # Monitor GPU
        GPUs = GPUtil.getGPUs()
        for gpu in GPUs:
            gpu_message = f"GPU {gpu.id} | Usage: {gpu.load * 100}% | Memory: {gpu.memoryUsed}MB / {gpu.memoryTotal}MB"
            print(gpu_message)  # Print to console
            logging.info(gpu_message)  # Log to file
        time.sleep(1)  
 except Exception as e:
    error_message = f"An error occurred: {e}"
    print(error_message)  # Print to console
    logging.error(error_message)  # Log to file
--- a/nohup.out
+++ b/nohup.out
@ -0,0 +1,38 @@
 INFO:     Will watch for changes in these directories: ['/home/ooin/st/tiktok_AI']
 INFO:     Uvicorn running on http://0.0.0.0:8002 (Press CTRL+C to quit)
 INFO:     Started reloader process [2653752] using StatReload
 INFO:     Started server process [2653764]
 INFO:     Waiting for application startup.
 INFO:     Application startup complete.
 INFO:     Will watch for changes in these directories: ['/home/ooin/st/tiktok_AI']
 INFO:     Uvicorn running on http://0.0.0.0:8002 (Press CTRL+C to quit)
 INFO:     Started reloader process [2662681] using StatReload
 INFO:     Started server process [2662696]
 INFO:     Waiting for application startup.
 INFO:     Application startup complete.
 INFO:     127.0.0.1:40090 - "GET / HTTP/1.1" 404 Not Found
 INFO:     127.0.0.1:53220 - "GET /docs HTTP/1.1" 200 OK
 WARNING:  StatReload detected changes in 'pipeline_setup.py'. Reloading...
 INFO:     Shutting down
 INFO:     Waiting for application shutdown.
 INFO:     Application shutdown complete.
 INFO:     Finished server process [2662696]
 INFO:     Started server process [2876691]
 INFO:     Waiting for application startup.
 INFO:     Application startup complete.
 WARNING:  StatReload detected changes in 'pipeline_setup.py'. Reloading...
 INFO:     Shutting down
 INFO:     Waiting for application shutdown.
 INFO:     Application shutdown complete.
 INFO:     Finished server process [2876691]
 INFO:     Started server process [2876710]
 INFO:     Waiting for application startup.
 INFO:     Application startup complete.
 WARNING:  StatReload detected changes in 'model_loader.py'. Reloading...
 INFO:     Shutting down
 INFO:     Waiting for application shutdown.
 INFO:     Application shutdown complete.
 INFO:     Finished server process [2876710]
 INFO:     Started server process [2877032]
 INFO:     Waiting for application startup.
 INFO:     Application startup complete.
--- a/pipeline_setup.py
+++ b/pipeline_setup.py
@ -7,7 +7,9 @@ os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
 # os.environ["CUDA_VISIBLE_DEVICES"] = "2,3"
 # Model initialization
-model = "OpenGVLab/InternVL2-26B-AWQ"
+# model = "OpenGVLab/InternVL2-26B-AWQ"
 # model = "OpenGVLab/InternVL2_5-4B-AWQ"
 model = "OpenGVLab/InternVL2_5-8B-MPO-AWQ"
 pipe = pipeline(
    model,
    backend_config=TurbomindEngineConfig(
@ -16,7 +18,7 @@ pipe = pipeline(
        # tp=4,
        session_len=16384,  # 4096, 8192, 16384, 32768
        max_batch_size=1,
-        cache_max_entry_count=0.2, # 0.05
+        cache_max_entry_count=0.15, # 0.05
        cache_block_seq_len=16384,  # 8192, 16384, 32768
        # quant_policy=8,
        # precision="fp16",
--- a/requirements.txt
+++ b/requirements.txt
@ -1,117 +1,14 @@
-accelerate==1.2.1
+celery==5.4.0
 addict==2.4.0
 aiohappyeyeballs==2.4.4
 aiohttp==3.11.11
 aiosignal==1.3.2
 annotated-types==0.7.0
 anyio==4.7.0
 attrs==24.3.0
 bitsandbytes==0.45.0
 certifi==2024.12.14
 charset-normalizer==3.4.0
 click==8.1.8
 cloudpickle==3.1.0
 datasets==3.2.0
 decord==0.6.0
-dill==0.3.8
+fastapi==0.115.8
-diskcache==5.6.3
+gradio==5.17.0
-distro==1.9.0
+gradio_image_prompter==0.1.0
 einops==0.8.0
 fastapi==0.115.6
 filelock==3.16.1
 fire==0.7.0
 # flash-attn==2.7.2.post1
 frozenlist==1.5.0
 fsspec==2024.9.0
 h11==0.14.0
 httpcore==1.0.7
 httpx==0.28.1
-huggingface-hub==0.27.0
+lmdeploy==0.7.0.post3
-idna==3.10
+locustio==0.999
-interegular==0.3.3
+numpy==2.2.3
-Jinja2==3.1.5
+Pillow==11.1.0
-jiter==0.8.2
+pydub==0.25.1
-jsonschema==4.23.0
+torch==2.6.0
 jsonschema-specifications==2024.10.1
 lark==1.2.2
 llvmlite==0.43.0
 lmdeploy==0.6.4
 markdown-it-py==3.0.0
 MarkupSafe==3.0.2
 mdurl==0.1.2
 mmengine-lite==0.10.5
 modelscope==1.21.0
 mpmath==1.3.0
 multidict==6.1.0
 multiprocess==0.70.16
 nest-asyncio==1.6.0
 networkx==3.4.2
 ninja==1.11.1.3
 numba==0.60.0
 numpy==1.26.4
 nvidia-cublas-cu12==12.1.3.1
 nvidia-cuda-cupti-cu12==12.1.105
 nvidia-cuda-nvrtc-cu12==12.1.105
 nvidia-cuda-runtime-cu12==12.1.105
 nvidia-cudnn-cu12==9.1.0.70
 nvidia-cufft-cu12==11.0.2.54
 nvidia-curand-cu12==10.3.2.106
 nvidia-cusolver-cu12==11.4.5.107
 nvidia-cusparse-cu12==12.1.0.106
 nvidia-ml-py==12.560.30
 nvidia-nccl-cu12==2.20.5
 nvidia-nvjitlink-cu12==12.6.85
 nvidia-nvtx-cu12==12.1.105
 openai==1.58.1
 opencv-python==4.10.0.84
 outlines==0.0.46
 packaging==24.2
 pandas==2.2.3
 peft==0.11.1
 pillow==11.0.0
 platformdirs==4.3.6
 propcache==0.2.1
 protobuf==5.29.2
 psutil==6.1.1
 pyairports==2.1.1
 pyarrow==18.1.0
 pycountry==24.6.1
 pydantic==2.10.4
 pydantic_core==2.27.2
 Pygments==2.18.0
 pynvml==12.0.0
 python-dateutil==2.9.0.post0
 python-multipart==0.0.20
 pytz==2024.2
 PyYAML==6.0.2
 referencing==0.35.1
 regex==2024.11.6
 requests==2.32.3
 rich==13.9.4
 rpds-py==0.22.3
 safetensors==0.4.5
 sentencepiece==0.2.0
 setuptools==75.6.0
 shortuuid==1.0.13
 six==1.17.0
 sniffio==1.3.1
 starlette==0.41.3
 sympy==1.13.3
 termcolor==2.5.0
 tiktoken==0.8.0
 timm==1.0.12
 tokenizers==0.21.0
 # torch==2.4.0
 # torchaudio==2.4.0
 # torchvision==0.19.0
 tqdm==4.67.1
 transformers==4.47.1
 triton==3.0.0
 typing_extensions==4.12.2
 tzdata==2024.2
 urllib3==2.3.0
 uvicorn==0.34.0
-wheel==0.45.1
+whisper==1.1.10
 xxhash==3.5.0
 yapf==0.43.0
 yarl==1.18.3
--- a/resource_usage.log
+++ b/resource_usage.log
--- a/tasks.py
+++ b/tasks.py
@ -1,25 +1,330 @@
-import asyncio
+import os
-from celery_app import celery_app
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
-@celery_app.task
+import torch.multiprocessing as mp
 mp.set_start_method("spawn", force=True)
 # mp.set_start_method("fork", force=True)
 from celery import Celery
 import psutil
 from pynvml import nvmlInit, nvmlDeviceGetCount, nvmlDeviceGetHandleByIndex, nvmlDeviceGetUtilizationRates
 # from endpoints.video import run_video_inference
 # from endpoints.video2 import preprocess_video
 import os
 import sys
 sys.path.append(os.path.dirname(os.path.abspath(__file__)))
 print(os.path.dirname(os.path.abspath(__file__)))
 # celery_app.conf.task_routes = {
 #     "tasks.*": {"queue": "default"},
 # }
 app = Celery(
    "tasks",
    broker="redis://localhost:6379/0",
    backend="redis://localhost:6379/0",
    # include=["tasks"] 
 )
 # app = Celery(
 #     "tasks",
 #     broker="pyamqp://guest@localhost//", 
 #     # backend="rpc://",  
 #     backend="redis://localhost:6379/0",
 # )
 app.conf.worker_prefetch_multiplier = 1
 # app.conf.task_routes = {
 #     "tasks.*": {"queue": "default"},
 # }
 # app.conf.task_acks_late = True
 # from celery import Celery
 # from kombu import Queue
 # celery = Celery(
 #     "tasks",
 #     broker="redis://localhost:6379/0",
 #     backend="redis://localhost:6379/0",
 #     include=["tasks"]
 # )
 # app.conf.task_routes = {
 #     'tasks.preprocess_video': {'queue': 'preprocess_queue'},
 #     'tasks.inference_video': {'queue': 'inference_queue'},
 # }
 app.conf.task_routes = {
    'tasks.preprocess_video': {'queue': 'preprocess_queue'},
    'tasks.inference_video': {'queue': 'inference_queue'},
 }
 # # Define task queues properly
 # celery.conf.task_queues = (
 #     Queue("high_priority"),
 #     Queue("default"),
 #     Queue("low_priority"),
 # )
 # # Define task routing
 # celery.conf.task_routes = {
 #     "tasks.text_query_task": {"queue": "high_priority"},
 #     "tasks.image_query_task": {"queue": "default"},
 #     "tasks.video_query_task": {"queue": "low_priority"},
 # }
 # # Define task rate limits
 # celery.conf.task_annotations = {
 #     "tasks.text_query_task": {"rate_limit": "10/m"},
 #     "tasks.image_query_task": {"rate_limit": "5/m"},
 #     "tasks.video_query_task": {"rate_limit": "3/m"},
 # }
 # # Define task retries
 # celery.conf.task_acks_late = True  # Ensure task is only removed from queue when fully processed
 # celery.conf.worker_prefetch_multiplier = 1  # Avoid one worker taking too many tasks at once
 # # Define task time limits
 # celery.conf.task_time_limit = 60  # 60 seconds max execution time
 # celery.conf.task_soft_time_limit = 50  # Warn at 50 seconds
@app.task
 def add(x, y):
    print("Adding task...")
    return x + y
@app.task(name="tasks.text_query_task")
 def text_query_task(question: str):
-    from endpoints.text import text_query  # Import inside the function to avoid circular import
+    print("Importing text_query...")
-    loop = asyncio.new_event_loop()
+    from endpoints.text import text_query
-    asyncio.set_event_loop(loop)
+    print(f"Processing question: {question}")
-    return loop.run_until_complete(text_query(question=question))  # ✅ Correct way to call async functions in Celery
+    return text_query(question)
-@celery_app.task
+@app.task(name="tasks.image_query_task")
 def image_query_task(file_path: str, question: str):
-    from endpoints.image import image_query  # Import inside the function
+    try:
-    with open(file_path, "rb") as file:
+        print("Processing in image_query_task...")
-        loop = asyncio.new_event_loop()
+        from endpoints.image import image_query
-        asyncio.set_event_loop(loop)
+        print("file_path in image_query_task...")
-        return loop.run_until_complete(image_query(file=file, question=question))  # ✅ Use event loop
+        result = image_query(file_path, question)
        return result
    except Exception as e:
        return {"query": question, "error": str(e)}
-@celery_app.task
+@app.task(name="tasks.video_query_task")
 def video_query_task(file_path: str, question: str):
-    from endpoints.video import video_query  # Import inside the function
+    """
-    with open(file_path, "rb") as file:
+    Celery task to process a video query asynchronously.
-        loop = asyncio.new_event_loop()
+    Reads the video file from disk and processes it.
-        asyncio.set_event_loop(loop)
+    """
-        return loop.run_until_complete(video_query(file=file, question=question))  # ✅ Use event loop
+    try:
        from endpoints.video import video_query
        result = video_query(file_path, question)
        return result
    except Exception as e:
        return {"query": question, "error": str(e)}
 # @celery.task(name="tasks.video_preprocessing_task", priority=5, queue="preprocessing")
 # def video_preprocessing_task(file_path: str, question: str):
 #     return preprocess_video(file_path, question)
 # @celery.task(name="tasks.video_query_task", priority=10, queue="inference")
 # def video_query_task(preprocessed_data):
 #     return run_video_inference(preprocessed_data)
 # @celery.task(name="tasks.test_task")
 # def test_task():
 #     return "Celery is working!"
 import mimetypes
 from utils.video_processing import split_video_into_segments, extract_motion_key_frames, extract_audio_from_video
 from utils.audio_transcription import transcribe_audio
 from pipeline_setup import pipe
 from utils.image_processing import encode_image_base64
 from concurrent.futures import ThreadPoolExecutor, as_completed
 def process_segment(segment_data):
    segment_path, segment_idx, total_segments = segment_data
    print(f"Processing segment {segment_idx+1}/{total_segments}: {segment_path}")
    imgs = extract_motion_key_frames(segment_path, max_frames=20, sigma_multiplier=4)
    print(f"length of key frames in segments: {len(imgs)}")
    print(f"Segment {segment_idx+1}: extract_motion_key_frames finished.")
    audio_path = extract_audio_from_video(segment_path)
    print(f"Segment {segment_idx+1}: extract_audio_from_video finished.")
    transcribed_text = transcribe_audio(audio_path)
    print(f"Segment {segment_idx+1}: transcribe_audio finished.")
    return {
        "segment_path": segment_path,
        "key_frames": [encode_image_base64(img) for img in imgs],
        "transcribed_text": transcribed_text
    }
@app.task(name="tasks.preprocess_video")
 def preprocess_video(video_path, question):
    try:
        # Monitor CPU usage
        # cpu_usage = psutil.cpu_percent(interval=1)
        # print(f"CPU Usage during preprocessing: {cpu_usage}%")
        print(f"Preprocessing video: {video_path}")
        if not os.path.exists(video_path):
            return {"query": question, "error": "Video file not found."}
        # Determine the file type
        file_type, _ = mimetypes.guess_type(video_path)
        if file_type is None or not file_type.startswith("video/"):
            return {"query": question, "error": "Unsupported video file type."}
        print("Splitting video...")
        segments = split_video_into_segments(video_path, segment_duration=100)
        print(f"segments: {segments}")
        print(f"Video split into {len(segments)} segments.")
        # Process segments in parallel
        processed_segments = []
        max_workers = min(len(segments), os.cpu_count() * 2)  
        print(f"Processing segments with {max_workers} workers...")
        with ThreadPoolExecutor(max_workers=max_workers) as executor:
            future_to_segment = {
                executor.submit(process_segment, (segment_path, idx, len(segments))): idx 
                for idx, segment_path in enumerate(segments)
            }
            # Collect results as they complete
            segment_results = [None] * len(segments)
            for future in as_completed(future_to_segment):
                idx = future_to_segment[future]
                try:
                    segment_results[idx] = future.result()
                except Exception as e:
                    print(f"Error processing segment {idx}: {str(e)}")
                    segment_results[idx] = {
                        "segment_path": segments[idx],
                        "error": str(e)
                    }
        print("multithread done!")
        processed_segments = [result for result in segment_results if "error" not in result]
        return {
            "video_path": video_path,
            "question": question,
            "processed_segments": processed_segments
        }
    except Exception as e:
        return {"query": question, "error": str(e)}
 # @app.task(name="tasks.inference_video")
 # def inference_video(preprocessed_data):
 #     try:
 #         # Monitor GPU usage
 #         # nvmlInit()
 #         # device_count = nvmlDeviceGetCount()
 #         # for i in range(device_count):
 #         #     handle = nvmlDeviceGetHandleByIndex(i)
 #         #     utilization = nvmlDeviceGetUtilizationRates(handle)
 #         #     print(f"GPU {i} Usage during inference: {utilization.gpu}%")
 #         # print(preprocessed_data)
 #         video_path = preprocessed_data["video_path"]
 #         question = preprocessed_data["question"]
 #         # print(f"question: {question}")
 #         segments = preprocessed_data["processed_segments"]
 #         print(f"Running inference on: {video_path}")
 #         aggregated_responses = []
 #         for i, segment in enumerate(segments):
 #             print(f"Inferencing segment {i+1}/{len(segments)}")
 #             # Prepare input content
 #             question_with_frames = "".join(
 #                 [f"Frame{j+1}: {{IMAGE_TOKEN}}\n" for j in range(len(segment["key_frames"]))]
 #             )
 #             question_with_frames += f"Audio Transcript: {segment['transcribed_text']}\n{question}"
 #             content = [{"type": "text", "text": question_with_frames}] + [
 #                 {"type": "image_url", "image_url": {"max_dynamic_patch": 1, "url": f"data:image/jpeg;base64,{img}"}}
 #                 for img in segment["key_frames"]
 #             ]
 #             # Query model
 #             messages = [dict(role="user", content=content)]
 #             response = pipe(messages)
 #             # Aggregate response
 #             aggregated_responses.append(response.text)
 #         return {
 #             "question": question,
 #             "responses": aggregated_responses,
 #         }
 #     except Exception as e:
 #         return {"query": question, "error": str(e)}
@app.task(name="tasks.inference_video")
 def inference_video(preprocessed_results):
    """
    Processes a batch of preprocessed videos on the GPU.
    """
    try:
        print("Running inference on a batch of videos...")
        aggregated_results = []
        for preprocessed_data in preprocessed_results:
            video_path = preprocessed_data["video_path"]
            question = preprocessed_data["question"]
            segments = preprocessed_data["processed_segments"]
            print(f"Inferencing video: {video_path}")
            # Run inference on the GPU
            aggregated_responses = []
            for segment in segments:
                # Prepare input for inference
                question_with_frames = "".join(
                    [f"Frame{j+1}: {{IMAGE_TOKEN}}\n" for j in range(len(segment["key_frames"]))]
                )
                question_with_frames += f"Audio Transcript: {segment['transcribed_text']}\n{question}"
                content = [{"type": "text", "text": question_with_frames}] + [
                    {"type": "image_url", "image_url": {"max_dynamic_patch": 1, "url": f"data:image/jpeg;base64,{img}"}}
                    for img in segment["key_frames"]
                ]
                # Query model
                messages = [dict(role="user", content=content)]
                response = pipe(messages)
                # Aggregate response
                aggregated_responses.append(response.text)
            aggregated_results.append({
                "video_path": video_path,
                "question": question,
                "responses": aggregated_responses
            })
        return aggregated_results
    except Exception as e:
        return {"error": str(e)}
--- a/test_audio.py
+++ b/test_audio.py
@ -0,0 +1,92 @@
 import torch
 import os
 from whisper import load_model
 from pydub import AudioSegment
 def extract_audio_from_video(video_path: str) -> str:
    audio = AudioSegment.from_file(video_path)
    audio_path = "/tmp/temp_audio_test.wav"
    audio.export(audio_path, format="wav")
    print("video extracted!")
    return audio_path
 # def transcribe_audio(audio_path: str) -> str:
 #     print("Loading model in transcribe_audio...")
 #     from transformers import WhisperProcessor, WhisperForConditionalGeneration
 #     import torch
 #     # Load processor and model from transformers
 #     processor = WhisperProcessor.from_pretrained("openai/whisper-base")
 #     model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base")
 #     if torch.cuda.device_count() > 1:
 #         print(f"Using {torch.cuda.device_count()} GPUs!")
 #         model = torch.nn.DataParallel(model)
 #     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 #     model.to(device)
 #     print("Model loaded successfully.")
 #     print(audio_path)
 #     # Load and process the audio file
 #     import librosa
 #     audio_input, sr = librosa.load(audio_path, sr=16000)
 #     input_features = processor(audio_input, sampling_rate=sr, return_tensors="pt").input_features.to(device)
 #     # Generate transcription
 #     with torch.no_grad():
 #         if isinstance(model, torch.nn.DataParallel):
 #             generated_ids = model.module.generate(input_features)
 #         else:
 #             generated_ids = model.generate(input_features)
 #     # Decode the generated tokens to text
 #     transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
 #     return transcription
 def transcribe_audio(audio_path: str) -> str:
    print("Loading model in transcribe_audio...")
    os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
    from transformers import WhisperProcessor, WhisperForConditionalGeneration
    import torch
    # Load processor and model from transformers
    processor = WhisperProcessor.from_pretrained("openai/whisper-tiny")
    model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny")
    if torch.cuda.device_count() > 1:
        print(f"Using {torch.cuda.device_count()} GPUs!")
        model = torch.nn.DataParallel(model)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    print("Model loaded successfully.")
    print(audio_path)
    # Load and process the audio file
    import librosa
    audio_input, sr = librosa.load(audio_path, sr=16000)
    input_features = processor(audio_input, sampling_rate=sr, return_tensors="pt").input_features.to(device)
    # Generate transcription
    with torch.no_grad():
        if isinstance(model, torch.nn.DataParallel):
            generated_ids = model.module.generate(input_features)
        else:
            generated_ids = model.generate(input_features)
    # Decode the generated tokens to text
    transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
    return transcription
 if __name__ == "__main__":
    extract_audio_from_video("../video/1.mp4")
    audio_file = "/tmp/temp_audio_test.wav"
    for i in range(3):
        print(f"\nTranscription attempt {i + 1}:")
        transcription = transcribe_audio(audio_file)
        print("Transcription:")
        print(transcription)
--- a/utils/pycache/audio_transcription.cpython-311.pyc
+++ b/utils/pycache/audio_transcription.cpython-311.pyc
--- a/utils/pycache/image_processing.cpython-311.pyc
+++ b/utils/pycache/image_processing.cpython-311.pyc
--- a/utils/pycache/video_processing.cpython-311.pyc
+++ b/utils/pycache/video_processing.cpython-311.pyc
--- a/utils/audio_transcription.py
+++ b/utils/audio_transcription.py
@ -1,5 +1,6 @@
 import os
 import torch
 from pydub import AudioSegment
 from whisper import load_model
 def extract_audio_from_video(video_path: str) -> str:
    audio = AudioSegment.from_file(video_path)
@ -7,7 +8,85 @@ def extract_audio_from_video(video_path: str) -> str:
    audio.export(audio_path, format="wav")
    return audio_path
 # def transcribe_audio(audio_path: str) -> str:
 #     print("Loading model in transcribe_audio...")
 #     from whisper import load_model
 #     model = load_model("base", device="cpu")
 #     # model = load_model("base")
 #     print("Model loaded successfully.")
 #     print(f"Model is running on: {next(model.parameters()).device}")
 #     print("Model loaded successfully on CPU.")
 #     result = model.transcribe(audio_path)
 #     print(result)
 #     return result["text"]
 # def transcribe_audio(audio_path: str) -> str:
 #     print("Loading model in transcribe_audio...")
 #     os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
 #     from whisper import load_model
 #     model = load_model("base")
 #     if torch.cuda.device_count() > 1:
 #         print(f"Using {torch.cuda.device_count()} GPUs!")
 #         model = torch.nn.DataParallel(model)
 #     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 #     model.to(device)
 #     print("Model loaded successfully.")
 #     print(audio_path)
 #     # Access the underlying model if using DataParallel
 #     if isinstance(model, torch.nn.DataParallel):
 #         result = model.module.transcribe(audio_path)
 #     else:
 #         result = model.transcribe(audio_path)
 #     print(result)
 #     return result["text"]
 def transcribe_audio(audio_path: str) -> str:
-    model = load_model("base")
+    print("Loading model in transcribe_audio...")
-    result = model.transcribe(audio_path)
+    from transformers import WhisperProcessor, WhisperForConditionalGeneration
-    return result["text"]
+    processor = WhisperProcessor.from_pretrained("openai/whisper-tiny")
    model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny")
    if torch.cuda.device_count() > 1:
        print(f"Using {torch.cuda.device_count()} GPUs!")
        model = torch.nn.DataParallel(model)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    print("Model loaded successfully.")
    print(audio_path)
    # Load and process the audio file
    import librosa
    audio_input, sr = librosa.load(audio_path, sr=16000)
    input_features = processor(audio_input, sampling_rate=sr, return_tensors="pt").input_features.to(device)
    # Generate transcription
    with torch.no_grad():
        if isinstance(model, torch.nn.DataParallel):
            generated_ids = model.module.generate(input_features)
        else:
            generated_ids = model.generate(input_features)
    # Decode the generated tokens to text
    transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
    return transcription
 # audio_path = "/tmp/temp_audio.wav"
 # num_iterations = 5
 # import time
 # start_time = time.time()
 # for i in range(num_iterations):
 #     print(f"Processing iteration {i+1}...")
 #     transcription = transcribe_audio(audio_path)
 #     print(f"Transcription (iteration {i+1}): {transcription}")
 # end_time = time.time()
 # elapsed_time = end_time - start_time
 # print(f"Time taken for iteration {i+1}: {elapsed_time:.2f} seconds\n")
--- a/utils/video_processing.py
+++ b/utils/video_processing.py
@ -25,7 +25,8 @@ def split_video_into_segments(video_path, segment_duration=30):
    for start_time in range(0, int(total_duration), segment_duration):
        segment_file = os.path.join(output_dir, f"segment_{start_time}.mp4")
        command = [
-            "ffmpeg", "-i", video_path,
+            "ffmpeg", "-y", 
            "-i", video_path,
            "-ss", str(start_time),
            "-t", str(segment_duration),
            "-c", "copy", segment_file
@ -33,6 +34,7 @@ def split_video_into_segments(video_path, segment_duration=30):
        subprocess.run(command, check=True)
        segments.append(segment_file)
    print(f"segments: \n", segments)
    return segments
 def extract_motion_key_frames(video_path, max_frames=20, sigma_multiplier=2, frame_interval=1):