diff --git a/README.md b/README.md index a1df0ab..daf7ae6 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,52 @@ -# api +# API Usage Guide +This project supports multiple methods to start and interact with the API. + +## Method 1: Standard API Start + +Start the API using the main script: + +``` +python main.py +``` + +## Method 2: Gradio UI + +To use the Gradio UI: + +1. In the `endpoints/` directory, change the comment to enable 'gradio'. +2. Run the UI script: + +``` +python ui.py +``` + +## Method 3: Celery Optimization + +Use Celery for optimized background task processing. You can adjust the parameters for concurrency as needed. + +### Step 1: Start Redis Server + +``` +redis-server +``` + +### Step 2: Start Celery Workers + +Open two separate terminals and run the following: + +**Terminal 1 (Preprocessing Queue):** +``` +celery -A tasks worker --pool=threads --loglevel=info --concurrency=2 --queues=preprocess_queue +``` + +**Terminal 2 (Inference Queue):** +``` +celery -A tasks worker --pool=threads --loglevel=info --concurrency=3 --queues=inference_queue +``` + +### Step 3: Run Debug Script + +``` +python celery_debug.py +``` diff --git a/__pycache__/main.cpython-311.pyc b/__pycache__/main.cpython-311.pyc index 8a40201..65bc157 100644 Binary files a/__pycache__/main.cpython-311.pyc and b/__pycache__/main.cpython-311.pyc differ diff --git a/__pycache__/pipeline_setup.cpython-311.pyc b/__pycache__/pipeline_setup.cpython-311.pyc index 951a1a0..1706de2 100644 Binary files a/__pycache__/pipeline_setup.cpython-311.pyc and b/__pycache__/pipeline_setup.cpython-311.pyc differ diff --git a/__pycache__/tasks.cpython-311.pyc b/__pycache__/tasks.cpython-311.pyc index fdc9f61..0a92ce6 100644 Binary files a/__pycache__/tasks.cpython-311.pyc and b/__pycache__/tasks.cpython-311.pyc differ diff --git a/celery_debug.py b/celery_debug.py index 6050ce0..35a4641 100644 --- a/celery_debug.py +++ b/celery_debug.py @@ -41,7 +41,7 @@ print("Broker:", app.conf.broker_url) # Define the number of concurrent tasks NUM_TASKS = 1 delay_seconds = 0 -file_paths = [f"../video/film4.mp4" for _ in range(NUM_TASKS)] +file_paths = [f"../video/3.mp4" for _ in range(NUM_TASKS)] # video_folder = "../video" @@ -71,29 +71,29 @@ file_paths = [f"../video/film4.mp4" for _ in range(NUM_TASKS)] # for i in range(NUM_TASKS) # ] -# questions = [ -# f"Based on the given images and audio script, extract detailed information about the products recommended in the video and format the output as JSON with the following fields:\n" -# f"- Product Name: \n" -# f"- Category: \n" -# f"- Styles or Variants: \n" -# f"- Highlights: \n" -# f"- Promotional Details: \n" -# f"Do not include any disclaimers or comments like 'I'm sorry' or 'I can't assist.' Task {i}" -# for i in range(NUM_TASKS) -# ] - questions = [ - f"Generate a screenplay based on the given video content and format the output as JSON with the following structured fields:\n" - f"- Scene Descriptions: \n" - f"- Character Introductions: \n" - f"- Dialogue: \n" - f"- Actions & Expressions: \n" - f"- Product Integrations: \n" - f"- Narrative Flow: \n" - f"Follow standard screenplay formatting for headers, character names, dialogue, and actions. Do not include disclaimers or comments like 'I can't assist.' Task {i}" + f"Based on the given images and audio script, extract detailed information about the products recommended in the video and format the output as JSON with the following fields:\n" + f"- Product Name: \n" + f"- Category: \n" + f"- Styles or Variants: \n" + f"- Highlights: \n" + f"- Promotional Details: \n" + f"Do not include any disclaimers or comments like 'I'm sorry' or 'I can't assist.' Task {i}" for i in range(NUM_TASKS) ] +# questions = [ +# f"Generate a screenplay based on the given video content and format the output as JSON with the following structured fields:\n" +# f"- Scene Descriptions: \n" +# f"- Character Introductions: \n" +# f"- Dialogue: \n" +# f"- Actions & Expressions: \n" +# f"- Product Integrations: \n" +# f"- Narrative Flow: \n" +# f"Follow standard screenplay formatting for headers, character names, dialogue, and actions. Do not include disclaimers or comments like 'I can't assist.' Task {i}" +# for i in range(NUM_TASKS) +# ] + # def submit_task(question, index): # sends tasks to Celery asynchronously, queues the tasks in Celery broker. If multiple Celery workers, they process tasks in parallel. # """ Submits a Celery task with increasing delay """ # countdown_time = index * delay_seconds # Dynamic delay diff --git a/endpoints/image.py b/endpoints/image.py index 1f03968..a89a572 100644 --- a/endpoints/image.py +++ b/endpoints/image.py @@ -8,6 +8,7 @@ from PIL import Image from pipeline_setup import pipe, IMAGE_TOKEN from utils.image_processing import encode_image_base64 +# api async def image_query(file: UploadFile, question: str = Form(...)): """ API endpoint to process an image with the user's query. @@ -26,11 +27,8 @@ async def image_query(file: UploadFile, question: str = Form(...)): except Exception as e: return JSONResponse({"query": question, "error": str(e)}) - +# gradio # async def image_query(image: np.ndarray, question: str): -# """ -# API endpoint to process an image (as numpy array) with the user's query. -# """ # try: # # Convert the numpy array to a PIL Image # image = Image.fromarray(image).convert("RGB").resize((512, 512)) @@ -49,7 +47,7 @@ async def image_query(file: UploadFile, question: str = Form(...)): # except Exception as e: # return {"query": question, "error": str(e)} - +# celery # def image_query(image_path: str, question: str): # try: # print("image_path in image_query...") diff --git a/endpoints/video.py b/endpoints/video.py index 7188f8f..85cb103 100644 --- a/endpoints/video.py +++ b/endpoints/video.py @@ -24,6 +24,7 @@ def load_checkpoint(video_id): return json.load(f) return None +# api # async def video_query(file: UploadFile, question: str = Form(...)): # try: # print("Processing video...") @@ -131,7 +132,7 @@ def load_checkpoint(video_id): # except Exception as e: # return JSONResponse({"query": question, "error": str(e)}) - +# gradio # async def video_query(video_path: str, question: str): # """ # API endpoint to process a video file with the user's query. diff --git a/prompt.txt b/prompt.txt new file mode 100644 index 0000000..8165018 --- /dev/null +++ b/prompt.txt @@ -0,0 +1,11 @@ +图片 +"Extract the following information from this image and return the result in JSON format:\n" "- Name: \n" "- ID: \n" "- Profile Picture: \n" "- Follower Count: \n" "- Likes Count: \n" "- Bio: \n" "- Following Count: \n" "- External Links: \n" # "Provide no additional text other than the extracted information." "Do not include any disclaimers or comments like 'I'm sorry' or 'I can't assist'." + + +视频 +“Based on the given images and audio script, extract detailed information about the products recommended in the video and format the output as JSON with the following fields: +1. **Product Name**: The specific name of the product, if mentioned. +2. **Category**: The specific category of the product (e.g., electronics, skincare, casual wear, etc.). +3. **Styles or Variants**: Any styles, designs, or variants of the product described (e.g., colors, patterns, sizes, or other distinguishing attributes). +4. **Highlights**: The unique selling points or notable features emphasized by the anchor (e.g., benefits, quality, or standout features). +5. **Promotional Details**: Any additional promotional information mentioned, such as discounts, offers, or key features that set the product apart.” \ No newline at end of file diff --git a/tasks.py b/tasks.py index 96590dc..a035aa0 100644 --- a/tasks.py +++ b/tasks.py @@ -6,8 +6,6 @@ mp.set_start_method("spawn", force=True) # mp.set_start_method("fork", force=True) from celery import Celery - -import psutil from pynvml import nvmlInit, nvmlDeviceGetCount, nvmlDeviceGetHandleByIndex, nvmlDeviceGetUtilizationRates # from endpoints.video import run_video_inference @@ -54,11 +52,6 @@ app.conf.worker_prefetch_multiplier = 1 # include=["tasks"] # ) -# app.conf.task_routes = { -# 'tasks.preprocess_video': {'queue': 'preprocess_queue'}, -# 'tasks.inference_video': {'queue': 'inference_queue'}, -# } - app.conf.task_routes = { 'tasks.preprocess_video': {'queue': 'preprocess_queue'}, 'tasks.inference_video': {'queue': 'inference_queue'}, @@ -93,11 +86,6 @@ app.conf.task_routes = { # celery.conf.task_time_limit = 60 # 60 seconds max execution time # celery.conf.task_soft_time_limit = 50 # Warn at 50 seconds -@app.task -def add(x, y): - print("Adding task...") - return x + y - @app.task(name="tasks.text_query_task") def text_query_task(question: str): print("Importing text_query...") @@ -139,11 +127,6 @@ def video_query_task(file_path: str, question: str): # def video_query_task(preprocessed_data): # return run_video_inference(preprocessed_data) - -# @celery.task(name="tasks.test_task") -# def test_task(): -# return "Celery is working!" - import mimetypes @@ -342,7 +325,7 @@ def preprocess_video(video_path, question): # for preprocessed_data in preprocessed_results: # video_path = preprocessed_data["video_path"] # question = preprocessed_data["question"] -# segments = preprocessed_data["processed_segments"] +# segments = preprocessed_data["processed_data"] # print(f"Inferencing video: {video_path}") diff --git a/test_audio.py b/test_audio.py deleted file mode 100644 index 9ca5572..0000000 --- a/test_audio.py +++ /dev/null @@ -1,92 +0,0 @@ -import torch -import os -from whisper import load_model -from pydub import AudioSegment - -def extract_audio_from_video(video_path: str) -> str: - audio = AudioSegment.from_file(video_path) - audio_path = "/tmp/temp_audio_test.wav" - audio.export(audio_path, format="wav") - print("video extracted!") - return audio_path - -# def transcribe_audio(audio_path: str) -> str: -# print("Loading model in transcribe_audio...") -# from transformers import WhisperProcessor, WhisperForConditionalGeneration -# import torch - -# # Load processor and model from transformers -# processor = WhisperProcessor.from_pretrained("openai/whisper-base") -# model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base") - -# if torch.cuda.device_count() > 1: -# print(f"Using {torch.cuda.device_count()} GPUs!") -# model = torch.nn.DataParallel(model) - -# device = torch.device("cuda" if torch.cuda.is_available() else "cpu") -# model.to(device) -# print("Model loaded successfully.") -# print(audio_path) - -# # Load and process the audio file -# import librosa -# audio_input, sr = librosa.load(audio_path, sr=16000) -# input_features = processor(audio_input, sampling_rate=sr, return_tensors="pt").input_features.to(device) - -# # Generate transcription -# with torch.no_grad(): -# if isinstance(model, torch.nn.DataParallel): -# generated_ids = model.module.generate(input_features) -# else: -# generated_ids = model.generate(input_features) - -# # Decode the generated tokens to text -# transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] -# return transcription - -def transcribe_audio(audio_path: str) -> str: - print("Loading model in transcribe_audio...") - os.environ["CUDA_VISIBLE_DEVICES"] = "0,1" - - from transformers import WhisperProcessor, WhisperForConditionalGeneration - import torch - - # Load processor and model from transformers - processor = WhisperProcessor.from_pretrained("openai/whisper-tiny") - model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny") - - if torch.cuda.device_count() > 1: - print(f"Using {torch.cuda.device_count()} GPUs!") - model = torch.nn.DataParallel(model) - - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - model.to(device) - print("Model loaded successfully.") - print(audio_path) - - # Load and process the audio file - import librosa - audio_input, sr = librosa.load(audio_path, sr=16000) - input_features = processor(audio_input, sampling_rate=sr, return_tensors="pt").input_features.to(device) - - # Generate transcription - with torch.no_grad(): - if isinstance(model, torch.nn.DataParallel): - generated_ids = model.module.generate(input_features) - else: - generated_ids = model.generate(input_features) - - # Decode the generated tokens to text - transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] - return transcription - -if __name__ == "__main__": - extract_audio_from_video("../video/1.mp4") - - audio_file = "/tmp/temp_audio_test.wav" - - for i in range(3): - print(f"\nTranscription attempt {i + 1}:") - transcription = transcribe_audio(audio_file) - print("Transcription:") - print(transcription) \ No newline at end of file diff --git a/utils/__pycache__/audio_transcription.cpython-311.pyc b/utils/__pycache__/audio_transcription.cpython-311.pyc index bfa52b6..24f9da0 100644 Binary files a/utils/__pycache__/audio_transcription.cpython-311.pyc and b/utils/__pycache__/audio_transcription.cpython-311.pyc differ diff --git a/utils/__pycache__/image_processing.cpython-311.pyc b/utils/__pycache__/image_processing.cpython-311.pyc index 12bbf47..7661a0c 100644 Binary files a/utils/__pycache__/image_processing.cpython-311.pyc and b/utils/__pycache__/image_processing.cpython-311.pyc differ diff --git a/utils/__pycache__/video_processing.cpython-311.pyc b/utils/__pycache__/video_processing.cpython-311.pyc index ef171bd..3a06d9f 100644 Binary files a/utils/__pycache__/video_processing.cpython-311.pyc and b/utils/__pycache__/video_processing.cpython-311.pyc differ