updated readme
This commit is contained in:
parent
d956ce6678
commit
7fb9de49b7
52
README.md
52
README.md
@ -1,2 +1,52 @@
|
|||||||
# api
|
# API Usage Guide
|
||||||
|
|
||||||
|
This project supports multiple methods to start and interact with the API.
|
||||||
|
|
||||||
|
## Method 1: Standard API Start
|
||||||
|
|
||||||
|
Start the API using the main script:
|
||||||
|
|
||||||
|
```
|
||||||
|
python main.py
|
||||||
|
```
|
||||||
|
|
||||||
|
## Method 2: Gradio UI
|
||||||
|
|
||||||
|
To use the Gradio UI:
|
||||||
|
|
||||||
|
1. In the `endpoints/` directory, change the comment to enable 'gradio'.
|
||||||
|
2. Run the UI script:
|
||||||
|
|
||||||
|
```
|
||||||
|
python ui.py
|
||||||
|
```
|
||||||
|
|
||||||
|
## Method 3: Celery Optimization
|
||||||
|
|
||||||
|
Use Celery for optimized background task processing. You can adjust the parameters for concurrency as needed.
|
||||||
|
|
||||||
|
### Step 1: Start Redis Server
|
||||||
|
|
||||||
|
```
|
||||||
|
redis-server
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step 2: Start Celery Workers
|
||||||
|
|
||||||
|
Open two separate terminals and run the following:
|
||||||
|
|
||||||
|
**Terminal 1 (Preprocessing Queue):**
|
||||||
|
```
|
||||||
|
celery -A tasks worker --pool=threads --loglevel=info --concurrency=2 --queues=preprocess_queue
|
||||||
|
```
|
||||||
|
|
||||||
|
**Terminal 2 (Inference Queue):**
|
||||||
|
```
|
||||||
|
celery -A tasks worker --pool=threads --loglevel=info --concurrency=3 --queues=inference_queue
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step 3: Run Debug Script
|
||||||
|
|
||||||
|
```
|
||||||
|
python celery_debug.py
|
||||||
|
```
|
||||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -41,7 +41,7 @@ print("Broker:", app.conf.broker_url)
|
|||||||
# Define the number of concurrent tasks
|
# Define the number of concurrent tasks
|
||||||
NUM_TASKS = 1
|
NUM_TASKS = 1
|
||||||
delay_seconds = 0
|
delay_seconds = 0
|
||||||
file_paths = [f"../video/film4.mp4" for _ in range(NUM_TASKS)]
|
file_paths = [f"../video/3.mp4" for _ in range(NUM_TASKS)]
|
||||||
|
|
||||||
|
|
||||||
# video_folder = "../video"
|
# video_folder = "../video"
|
||||||
@ -71,29 +71,29 @@ file_paths = [f"../video/film4.mp4" for _ in range(NUM_TASKS)]
|
|||||||
# for i in range(NUM_TASKS)
|
# for i in range(NUM_TASKS)
|
||||||
# ]
|
# ]
|
||||||
|
|
||||||
# questions = [
|
|
||||||
# f"Based on the given images and audio script, extract detailed information about the products recommended in the video and format the output as JSON with the following fields:\n"
|
|
||||||
# f"- Product Name: <name>\n"
|
|
||||||
# f"- Category: <category>\n"
|
|
||||||
# f"- Styles or Variants: <styles/variants>\n"
|
|
||||||
# f"- Highlights: <highlights>\n"
|
|
||||||
# f"- Promotional Details: <promotional_details>\n"
|
|
||||||
# f"Do not include any disclaimers or comments like 'I'm sorry' or 'I can't assist.' Task {i}"
|
|
||||||
# for i in range(NUM_TASKS)
|
|
||||||
# ]
|
|
||||||
|
|
||||||
questions = [
|
questions = [
|
||||||
f"Generate a screenplay based on the given video content and format the output as JSON with the following structured fields:\n"
|
f"Based on the given images and audio script, extract detailed information about the products recommended in the video and format the output as JSON with the following fields:\n"
|
||||||
f"- Scene Descriptions: <visual setting including background, lighting, atmosphere>\n"
|
f"- Product Name: <name>\n"
|
||||||
f"- Character Introductions: <key characters with appearance and notable traits>\n"
|
f"- Category: <category>\n"
|
||||||
f"- Dialogue: <transcribed spoken lines in screenplay format>\n"
|
f"- Styles or Variants: <styles/variants>\n"
|
||||||
f"- Actions & Expressions: <non-verbal cues and interactions>\n"
|
f"- Highlights: <highlights>\n"
|
||||||
f"- Product Integrations: <product names, categories, features if applicable>\n"
|
f"- Promotional Details: <promotional_details>\n"
|
||||||
f"- Narrative Flow: <scene transitions and pacing notes>\n"
|
f"Do not include any disclaimers or comments like 'I'm sorry' or 'I can't assist.' Task {i}"
|
||||||
f"Follow standard screenplay formatting for headers, character names, dialogue, and actions. Do not include disclaimers or comments like 'I can't assist.' Task {i}"
|
|
||||||
for i in range(NUM_TASKS)
|
for i in range(NUM_TASKS)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# questions = [
|
||||||
|
# f"Generate a screenplay based on the given video content and format the output as JSON with the following structured fields:\n"
|
||||||
|
# f"- Scene Descriptions: <visual setting including background, lighting, atmosphere>\n"
|
||||||
|
# f"- Character Introductions: <key characters with appearance and notable traits>\n"
|
||||||
|
# f"- Dialogue: <transcribed spoken lines in screenplay format>\n"
|
||||||
|
# f"- Actions & Expressions: <non-verbal cues and interactions>\n"
|
||||||
|
# f"- Product Integrations: <product names, categories, features if applicable>\n"
|
||||||
|
# f"- Narrative Flow: <scene transitions and pacing notes>\n"
|
||||||
|
# f"Follow standard screenplay formatting for headers, character names, dialogue, and actions. Do not include disclaimers or comments like 'I can't assist.' Task {i}"
|
||||||
|
# for i in range(NUM_TASKS)
|
||||||
|
# ]
|
||||||
|
|
||||||
# def submit_task(question, index): # sends tasks to Celery asynchronously, queues the tasks in Celery broker. If multiple Celery workers, they process tasks in parallel.
|
# def submit_task(question, index): # sends tasks to Celery asynchronously, queues the tasks in Celery broker. If multiple Celery workers, they process tasks in parallel.
|
||||||
# """ Submits a Celery task with increasing delay """
|
# """ Submits a Celery task with increasing delay """
|
||||||
# countdown_time = index * delay_seconds # Dynamic delay
|
# countdown_time = index * delay_seconds # Dynamic delay
|
||||||
|
@ -8,6 +8,7 @@ from PIL import Image
|
|||||||
from pipeline_setup import pipe, IMAGE_TOKEN
|
from pipeline_setup import pipe, IMAGE_TOKEN
|
||||||
from utils.image_processing import encode_image_base64
|
from utils.image_processing import encode_image_base64
|
||||||
|
|
||||||
|
# api
|
||||||
async def image_query(file: UploadFile, question: str = Form(...)):
|
async def image_query(file: UploadFile, question: str = Form(...)):
|
||||||
"""
|
"""
|
||||||
API endpoint to process an image with the user's query.
|
API endpoint to process an image with the user's query.
|
||||||
@ -26,11 +27,8 @@ async def image_query(file: UploadFile, question: str = Form(...)):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
return JSONResponse({"query": question, "error": str(e)})
|
return JSONResponse({"query": question, "error": str(e)})
|
||||||
|
|
||||||
|
# gradio
|
||||||
# async def image_query(image: np.ndarray, question: str):
|
# async def image_query(image: np.ndarray, question: str):
|
||||||
# """
|
|
||||||
# API endpoint to process an image (as numpy array) with the user's query.
|
|
||||||
# """
|
|
||||||
# try:
|
# try:
|
||||||
# # Convert the numpy array to a PIL Image
|
# # Convert the numpy array to a PIL Image
|
||||||
# image = Image.fromarray(image).convert("RGB").resize((512, 512))
|
# image = Image.fromarray(image).convert("RGB").resize((512, 512))
|
||||||
@ -49,7 +47,7 @@ async def image_query(file: UploadFile, question: str = Form(...)):
|
|||||||
# except Exception as e:
|
# except Exception as e:
|
||||||
# return {"query": question, "error": str(e)}
|
# return {"query": question, "error": str(e)}
|
||||||
|
|
||||||
|
# celery
|
||||||
# def image_query(image_path: str, question: str):
|
# def image_query(image_path: str, question: str):
|
||||||
# try:
|
# try:
|
||||||
# print("image_path in image_query...")
|
# print("image_path in image_query...")
|
||||||
|
@ -24,6 +24,7 @@ def load_checkpoint(video_id):
|
|||||||
return json.load(f)
|
return json.load(f)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
# api
|
||||||
# async def video_query(file: UploadFile, question: str = Form(...)):
|
# async def video_query(file: UploadFile, question: str = Form(...)):
|
||||||
# try:
|
# try:
|
||||||
# print("Processing video...")
|
# print("Processing video...")
|
||||||
@ -131,7 +132,7 @@ def load_checkpoint(video_id):
|
|||||||
# except Exception as e:
|
# except Exception as e:
|
||||||
# return JSONResponse({"query": question, "error": str(e)})
|
# return JSONResponse({"query": question, "error": str(e)})
|
||||||
|
|
||||||
|
# gradio
|
||||||
# async def video_query(video_path: str, question: str):
|
# async def video_query(video_path: str, question: str):
|
||||||
# """
|
# """
|
||||||
# API endpoint to process a video file with the user's query.
|
# API endpoint to process a video file with the user's query.
|
||||||
|
11
prompt.txt
Normal file
11
prompt.txt
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
图片
|
||||||
|
"Extract the following information from this image and return the result in JSON format:\n" "- Name: <name>\n" "- ID: <id>\n" "- Profile Picture: <url>\n" "- Follower Count: <count>\n" "- Likes Count: <count>\n" "- Bio: <bio>\n" "- Following Count: <count>\n" "- External Links: <links>\n" # "Provide no additional text other than the extracted information." "Do not include any disclaimers or comments like 'I'm sorry' or 'I can't assist'."
|
||||||
|
|
||||||
|
|
||||||
|
视频
|
||||||
|
“Based on the given images and audio script, extract detailed information about the products recommended in the video and format the output as JSON with the following fields:
|
||||||
|
1. **Product Name**: The specific name of the product, if mentioned.
|
||||||
|
2. **Category**: The specific category of the product (e.g., electronics, skincare, casual wear, etc.).
|
||||||
|
3. **Styles or Variants**: Any styles, designs, or variants of the product described (e.g., colors, patterns, sizes, or other distinguishing attributes).
|
||||||
|
4. **Highlights**: The unique selling points or notable features emphasized by the anchor (e.g., benefits, quality, or standout features).
|
||||||
|
5. **Promotional Details**: Any additional promotional information mentioned, such as discounts, offers, or key features that set the product apart.”
|
19
tasks.py
19
tasks.py
@ -6,8 +6,6 @@ mp.set_start_method("spawn", force=True)
|
|||||||
# mp.set_start_method("fork", force=True)
|
# mp.set_start_method("fork", force=True)
|
||||||
|
|
||||||
from celery import Celery
|
from celery import Celery
|
||||||
|
|
||||||
import psutil
|
|
||||||
from pynvml import nvmlInit, nvmlDeviceGetCount, nvmlDeviceGetHandleByIndex, nvmlDeviceGetUtilizationRates
|
from pynvml import nvmlInit, nvmlDeviceGetCount, nvmlDeviceGetHandleByIndex, nvmlDeviceGetUtilizationRates
|
||||||
|
|
||||||
# from endpoints.video import run_video_inference
|
# from endpoints.video import run_video_inference
|
||||||
@ -54,11 +52,6 @@ app.conf.worker_prefetch_multiplier = 1
|
|||||||
# include=["tasks"]
|
# include=["tasks"]
|
||||||
# )
|
# )
|
||||||
|
|
||||||
# app.conf.task_routes = {
|
|
||||||
# 'tasks.preprocess_video': {'queue': 'preprocess_queue'},
|
|
||||||
# 'tasks.inference_video': {'queue': 'inference_queue'},
|
|
||||||
# }
|
|
||||||
|
|
||||||
app.conf.task_routes = {
|
app.conf.task_routes = {
|
||||||
'tasks.preprocess_video': {'queue': 'preprocess_queue'},
|
'tasks.preprocess_video': {'queue': 'preprocess_queue'},
|
||||||
'tasks.inference_video': {'queue': 'inference_queue'},
|
'tasks.inference_video': {'queue': 'inference_queue'},
|
||||||
@ -93,11 +86,6 @@ app.conf.task_routes = {
|
|||||||
# celery.conf.task_time_limit = 60 # 60 seconds max execution time
|
# celery.conf.task_time_limit = 60 # 60 seconds max execution time
|
||||||
# celery.conf.task_soft_time_limit = 50 # Warn at 50 seconds
|
# celery.conf.task_soft_time_limit = 50 # Warn at 50 seconds
|
||||||
|
|
||||||
@app.task
|
|
||||||
def add(x, y):
|
|
||||||
print("Adding task...")
|
|
||||||
return x + y
|
|
||||||
|
|
||||||
@app.task(name="tasks.text_query_task")
|
@app.task(name="tasks.text_query_task")
|
||||||
def text_query_task(question: str):
|
def text_query_task(question: str):
|
||||||
print("Importing text_query...")
|
print("Importing text_query...")
|
||||||
@ -140,11 +128,6 @@ def video_query_task(file_path: str, question: str):
|
|||||||
# return run_video_inference(preprocessed_data)
|
# return run_video_inference(preprocessed_data)
|
||||||
|
|
||||||
|
|
||||||
# @celery.task(name="tasks.test_task")
|
|
||||||
# def test_task():
|
|
||||||
# return "Celery is working!"
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
import mimetypes
|
import mimetypes
|
||||||
from utils.video_processing import split_video_into_segments, extract_motion_key_frames, extract_audio_from_video
|
from utils.video_processing import split_video_into_segments, extract_motion_key_frames, extract_audio_from_video
|
||||||
@ -342,7 +325,7 @@ def preprocess_video(video_path, question):
|
|||||||
# for preprocessed_data in preprocessed_results:
|
# for preprocessed_data in preprocessed_results:
|
||||||
# video_path = preprocessed_data["video_path"]
|
# video_path = preprocessed_data["video_path"]
|
||||||
# question = preprocessed_data["question"]
|
# question = preprocessed_data["question"]
|
||||||
# segments = preprocessed_data["processed_segments"]
|
# segments = preprocessed_data["processed_data"]
|
||||||
|
|
||||||
# print(f"Inferencing video: {video_path}")
|
# print(f"Inferencing video: {video_path}")
|
||||||
|
|
||||||
|
@ -1,92 +0,0 @@
|
|||||||
import torch
|
|
||||||
import os
|
|
||||||
from whisper import load_model
|
|
||||||
from pydub import AudioSegment
|
|
||||||
|
|
||||||
def extract_audio_from_video(video_path: str) -> str:
|
|
||||||
audio = AudioSegment.from_file(video_path)
|
|
||||||
audio_path = "/tmp/temp_audio_test.wav"
|
|
||||||
audio.export(audio_path, format="wav")
|
|
||||||
print("video extracted!")
|
|
||||||
return audio_path
|
|
||||||
|
|
||||||
# def transcribe_audio(audio_path: str) -> str:
|
|
||||||
# print("Loading model in transcribe_audio...")
|
|
||||||
# from transformers import WhisperProcessor, WhisperForConditionalGeneration
|
|
||||||
# import torch
|
|
||||||
|
|
||||||
# # Load processor and model from transformers
|
|
||||||
# processor = WhisperProcessor.from_pretrained("openai/whisper-base")
|
|
||||||
# model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base")
|
|
||||||
|
|
||||||
# if torch.cuda.device_count() > 1:
|
|
||||||
# print(f"Using {torch.cuda.device_count()} GPUs!")
|
|
||||||
# model = torch.nn.DataParallel(model)
|
|
||||||
|
|
||||||
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
||||||
# model.to(device)
|
|
||||||
# print("Model loaded successfully.")
|
|
||||||
# print(audio_path)
|
|
||||||
|
|
||||||
# # Load and process the audio file
|
|
||||||
# import librosa
|
|
||||||
# audio_input, sr = librosa.load(audio_path, sr=16000)
|
|
||||||
# input_features = processor(audio_input, sampling_rate=sr, return_tensors="pt").input_features.to(device)
|
|
||||||
|
|
||||||
# # Generate transcription
|
|
||||||
# with torch.no_grad():
|
|
||||||
# if isinstance(model, torch.nn.DataParallel):
|
|
||||||
# generated_ids = model.module.generate(input_features)
|
|
||||||
# else:
|
|
||||||
# generated_ids = model.generate(input_features)
|
|
||||||
|
|
||||||
# # Decode the generated tokens to text
|
|
||||||
# transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
|
||||||
# return transcription
|
|
||||||
|
|
||||||
def transcribe_audio(audio_path: str) -> str:
|
|
||||||
print("Loading model in transcribe_audio...")
|
|
||||||
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
|
|
||||||
|
|
||||||
from transformers import WhisperProcessor, WhisperForConditionalGeneration
|
|
||||||
import torch
|
|
||||||
|
|
||||||
# Load processor and model from transformers
|
|
||||||
processor = WhisperProcessor.from_pretrained("openai/whisper-tiny")
|
|
||||||
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny")
|
|
||||||
|
|
||||||
if torch.cuda.device_count() > 1:
|
|
||||||
print(f"Using {torch.cuda.device_count()} GPUs!")
|
|
||||||
model = torch.nn.DataParallel(model)
|
|
||||||
|
|
||||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
||||||
model.to(device)
|
|
||||||
print("Model loaded successfully.")
|
|
||||||
print(audio_path)
|
|
||||||
|
|
||||||
# Load and process the audio file
|
|
||||||
import librosa
|
|
||||||
audio_input, sr = librosa.load(audio_path, sr=16000)
|
|
||||||
input_features = processor(audio_input, sampling_rate=sr, return_tensors="pt").input_features.to(device)
|
|
||||||
|
|
||||||
# Generate transcription
|
|
||||||
with torch.no_grad():
|
|
||||||
if isinstance(model, torch.nn.DataParallel):
|
|
||||||
generated_ids = model.module.generate(input_features)
|
|
||||||
else:
|
|
||||||
generated_ids = model.generate(input_features)
|
|
||||||
|
|
||||||
# Decode the generated tokens to text
|
|
||||||
transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
|
||||||
return transcription
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
extract_audio_from_video("../video/1.mp4")
|
|
||||||
|
|
||||||
audio_file = "/tmp/temp_audio_test.wav"
|
|
||||||
|
|
||||||
for i in range(3):
|
|
||||||
print(f"\nTranscription attempt {i + 1}:")
|
|
||||||
transcription = transcribe_audio(audio_file)
|
|
||||||
print("Transcription:")
|
|
||||||
print(transcription)
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue
Block a user