updated readme
This commit is contained in:
parent
d956ce6678
commit
7fb9de49b7
52
README.md
52
README.md
@ -1,2 +1,52 @@
|
||||
# api
|
||||
# API Usage Guide
|
||||
|
||||
This project supports multiple methods to start and interact with the API.
|
||||
|
||||
## Method 1: Standard API Start
|
||||
|
||||
Start the API using the main script:
|
||||
|
||||
```
|
||||
python main.py
|
||||
```
|
||||
|
||||
## Method 2: Gradio UI
|
||||
|
||||
To use the Gradio UI:
|
||||
|
||||
1. In the `endpoints/` directory, change the comment to enable 'gradio'.
|
||||
2. Run the UI script:
|
||||
|
||||
```
|
||||
python ui.py
|
||||
```
|
||||
|
||||
## Method 3: Celery Optimization
|
||||
|
||||
Use Celery for optimized background task processing. You can adjust the parameters for concurrency as needed.
|
||||
|
||||
### Step 1: Start Redis Server
|
||||
|
||||
```
|
||||
redis-server
|
||||
```
|
||||
|
||||
### Step 2: Start Celery Workers
|
||||
|
||||
Open two separate terminals and run the following:
|
||||
|
||||
**Terminal 1 (Preprocessing Queue):**
|
||||
```
|
||||
celery -A tasks worker --pool=threads --loglevel=info --concurrency=2 --queues=preprocess_queue
|
||||
```
|
||||
|
||||
**Terminal 2 (Inference Queue):**
|
||||
```
|
||||
celery -A tasks worker --pool=threads --loglevel=info --concurrency=3 --queues=inference_queue
|
||||
```
|
||||
|
||||
### Step 3: Run Debug Script
|
||||
|
||||
```
|
||||
python celery_debug.py
|
||||
```
|
||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -41,7 +41,7 @@ print("Broker:", app.conf.broker_url)
|
||||
# Define the number of concurrent tasks
|
||||
NUM_TASKS = 1
|
||||
delay_seconds = 0
|
||||
file_paths = [f"../video/film4.mp4" for _ in range(NUM_TASKS)]
|
||||
file_paths = [f"../video/3.mp4" for _ in range(NUM_TASKS)]
|
||||
|
||||
|
||||
# video_folder = "../video"
|
||||
@ -71,29 +71,29 @@ file_paths = [f"../video/film4.mp4" for _ in range(NUM_TASKS)]
|
||||
# for i in range(NUM_TASKS)
|
||||
# ]
|
||||
|
||||
# questions = [
|
||||
# f"Based on the given images and audio script, extract detailed information about the products recommended in the video and format the output as JSON with the following fields:\n"
|
||||
# f"- Product Name: <name>\n"
|
||||
# f"- Category: <category>\n"
|
||||
# f"- Styles or Variants: <styles/variants>\n"
|
||||
# f"- Highlights: <highlights>\n"
|
||||
# f"- Promotional Details: <promotional_details>\n"
|
||||
# f"Do not include any disclaimers or comments like 'I'm sorry' or 'I can't assist.' Task {i}"
|
||||
# for i in range(NUM_TASKS)
|
||||
# ]
|
||||
|
||||
questions = [
|
||||
f"Generate a screenplay based on the given video content and format the output as JSON with the following structured fields:\n"
|
||||
f"- Scene Descriptions: <visual setting including background, lighting, atmosphere>\n"
|
||||
f"- Character Introductions: <key characters with appearance and notable traits>\n"
|
||||
f"- Dialogue: <transcribed spoken lines in screenplay format>\n"
|
||||
f"- Actions & Expressions: <non-verbal cues and interactions>\n"
|
||||
f"- Product Integrations: <product names, categories, features if applicable>\n"
|
||||
f"- Narrative Flow: <scene transitions and pacing notes>\n"
|
||||
f"Follow standard screenplay formatting for headers, character names, dialogue, and actions. Do not include disclaimers or comments like 'I can't assist.' Task {i}"
|
||||
f"Based on the given images and audio script, extract detailed information about the products recommended in the video and format the output as JSON with the following fields:\n"
|
||||
f"- Product Name: <name>\n"
|
||||
f"- Category: <category>\n"
|
||||
f"- Styles or Variants: <styles/variants>\n"
|
||||
f"- Highlights: <highlights>\n"
|
||||
f"- Promotional Details: <promotional_details>\n"
|
||||
f"Do not include any disclaimers or comments like 'I'm sorry' or 'I can't assist.' Task {i}"
|
||||
for i in range(NUM_TASKS)
|
||||
]
|
||||
|
||||
# questions = [
|
||||
# f"Generate a screenplay based on the given video content and format the output as JSON with the following structured fields:\n"
|
||||
# f"- Scene Descriptions: <visual setting including background, lighting, atmosphere>\n"
|
||||
# f"- Character Introductions: <key characters with appearance and notable traits>\n"
|
||||
# f"- Dialogue: <transcribed spoken lines in screenplay format>\n"
|
||||
# f"- Actions & Expressions: <non-verbal cues and interactions>\n"
|
||||
# f"- Product Integrations: <product names, categories, features if applicable>\n"
|
||||
# f"- Narrative Flow: <scene transitions and pacing notes>\n"
|
||||
# f"Follow standard screenplay formatting for headers, character names, dialogue, and actions. Do not include disclaimers or comments like 'I can't assist.' Task {i}"
|
||||
# for i in range(NUM_TASKS)
|
||||
# ]
|
||||
|
||||
# def submit_task(question, index): # sends tasks to Celery asynchronously, queues the tasks in Celery broker. If multiple Celery workers, they process tasks in parallel.
|
||||
# """ Submits a Celery task with increasing delay """
|
||||
# countdown_time = index * delay_seconds # Dynamic delay
|
||||
|
@ -8,6 +8,7 @@ from PIL import Image
|
||||
from pipeline_setup import pipe, IMAGE_TOKEN
|
||||
from utils.image_processing import encode_image_base64
|
||||
|
||||
# api
|
||||
async def image_query(file: UploadFile, question: str = Form(...)):
|
||||
"""
|
||||
API endpoint to process an image with the user's query.
|
||||
@ -26,11 +27,8 @@ async def image_query(file: UploadFile, question: str = Form(...)):
|
||||
except Exception as e:
|
||||
return JSONResponse({"query": question, "error": str(e)})
|
||||
|
||||
|
||||
# gradio
|
||||
# async def image_query(image: np.ndarray, question: str):
|
||||
# """
|
||||
# API endpoint to process an image (as numpy array) with the user's query.
|
||||
# """
|
||||
# try:
|
||||
# # Convert the numpy array to a PIL Image
|
||||
# image = Image.fromarray(image).convert("RGB").resize((512, 512))
|
||||
@ -49,7 +47,7 @@ async def image_query(file: UploadFile, question: str = Form(...)):
|
||||
# except Exception as e:
|
||||
# return {"query": question, "error": str(e)}
|
||||
|
||||
|
||||
# celery
|
||||
# def image_query(image_path: str, question: str):
|
||||
# try:
|
||||
# print("image_path in image_query...")
|
||||
|
@ -24,6 +24,7 @@ def load_checkpoint(video_id):
|
||||
return json.load(f)
|
||||
return None
|
||||
|
||||
# api
|
||||
# async def video_query(file: UploadFile, question: str = Form(...)):
|
||||
# try:
|
||||
# print("Processing video...")
|
||||
@ -131,7 +132,7 @@ def load_checkpoint(video_id):
|
||||
# except Exception as e:
|
||||
# return JSONResponse({"query": question, "error": str(e)})
|
||||
|
||||
|
||||
# gradio
|
||||
# async def video_query(video_path: str, question: str):
|
||||
# """
|
||||
# API endpoint to process a video file with the user's query.
|
||||
|
11
prompt.txt
Normal file
11
prompt.txt
Normal file
@ -0,0 +1,11 @@
|
||||
图片
|
||||
"Extract the following information from this image and return the result in JSON format:\n" "- Name: <name>\n" "- ID: <id>\n" "- Profile Picture: <url>\n" "- Follower Count: <count>\n" "- Likes Count: <count>\n" "- Bio: <bio>\n" "- Following Count: <count>\n" "- External Links: <links>\n" # "Provide no additional text other than the extracted information." "Do not include any disclaimers or comments like 'I'm sorry' or 'I can't assist'."
|
||||
|
||||
|
||||
视频
|
||||
“Based on the given images and audio script, extract detailed information about the products recommended in the video and format the output as JSON with the following fields:
|
||||
1. **Product Name**: The specific name of the product, if mentioned.
|
||||
2. **Category**: The specific category of the product (e.g., electronics, skincare, casual wear, etc.).
|
||||
3. **Styles or Variants**: Any styles, designs, or variants of the product described (e.g., colors, patterns, sizes, or other distinguishing attributes).
|
||||
4. **Highlights**: The unique selling points or notable features emphasized by the anchor (e.g., benefits, quality, or standout features).
|
||||
5. **Promotional Details**: Any additional promotional information mentioned, such as discounts, offers, or key features that set the product apart.”
|
19
tasks.py
19
tasks.py
@ -6,8 +6,6 @@ mp.set_start_method("spawn", force=True)
|
||||
# mp.set_start_method("fork", force=True)
|
||||
|
||||
from celery import Celery
|
||||
|
||||
import psutil
|
||||
from pynvml import nvmlInit, nvmlDeviceGetCount, nvmlDeviceGetHandleByIndex, nvmlDeviceGetUtilizationRates
|
||||
|
||||
# from endpoints.video import run_video_inference
|
||||
@ -54,11 +52,6 @@ app.conf.worker_prefetch_multiplier = 1
|
||||
# include=["tasks"]
|
||||
# )
|
||||
|
||||
# app.conf.task_routes = {
|
||||
# 'tasks.preprocess_video': {'queue': 'preprocess_queue'},
|
||||
# 'tasks.inference_video': {'queue': 'inference_queue'},
|
||||
# }
|
||||
|
||||
app.conf.task_routes = {
|
||||
'tasks.preprocess_video': {'queue': 'preprocess_queue'},
|
||||
'tasks.inference_video': {'queue': 'inference_queue'},
|
||||
@ -93,11 +86,6 @@ app.conf.task_routes = {
|
||||
# celery.conf.task_time_limit = 60 # 60 seconds max execution time
|
||||
# celery.conf.task_soft_time_limit = 50 # Warn at 50 seconds
|
||||
|
||||
@app.task
|
||||
def add(x, y):
|
||||
print("Adding task...")
|
||||
return x + y
|
||||
|
||||
@app.task(name="tasks.text_query_task")
|
||||
def text_query_task(question: str):
|
||||
print("Importing text_query...")
|
||||
@ -140,11 +128,6 @@ def video_query_task(file_path: str, question: str):
|
||||
# return run_video_inference(preprocessed_data)
|
||||
|
||||
|
||||
# @celery.task(name="tasks.test_task")
|
||||
# def test_task():
|
||||
# return "Celery is working!"
|
||||
|
||||
|
||||
|
||||
import mimetypes
|
||||
from utils.video_processing import split_video_into_segments, extract_motion_key_frames, extract_audio_from_video
|
||||
@ -342,7 +325,7 @@ def preprocess_video(video_path, question):
|
||||
# for preprocessed_data in preprocessed_results:
|
||||
# video_path = preprocessed_data["video_path"]
|
||||
# question = preprocessed_data["question"]
|
||||
# segments = preprocessed_data["processed_segments"]
|
||||
# segments = preprocessed_data["processed_data"]
|
||||
|
||||
# print(f"Inferencing video: {video_path}")
|
||||
|
||||
|
@ -1,92 +0,0 @@
|
||||
import torch
|
||||
import os
|
||||
from whisper import load_model
|
||||
from pydub import AudioSegment
|
||||
|
||||
def extract_audio_from_video(video_path: str) -> str:
|
||||
audio = AudioSegment.from_file(video_path)
|
||||
audio_path = "/tmp/temp_audio_test.wav"
|
||||
audio.export(audio_path, format="wav")
|
||||
print("video extracted!")
|
||||
return audio_path
|
||||
|
||||
# def transcribe_audio(audio_path: str) -> str:
|
||||
# print("Loading model in transcribe_audio...")
|
||||
# from transformers import WhisperProcessor, WhisperForConditionalGeneration
|
||||
# import torch
|
||||
|
||||
# # Load processor and model from transformers
|
||||
# processor = WhisperProcessor.from_pretrained("openai/whisper-base")
|
||||
# model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base")
|
||||
|
||||
# if torch.cuda.device_count() > 1:
|
||||
# print(f"Using {torch.cuda.device_count()} GPUs!")
|
||||
# model = torch.nn.DataParallel(model)
|
||||
|
||||
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
# model.to(device)
|
||||
# print("Model loaded successfully.")
|
||||
# print(audio_path)
|
||||
|
||||
# # Load and process the audio file
|
||||
# import librosa
|
||||
# audio_input, sr = librosa.load(audio_path, sr=16000)
|
||||
# input_features = processor(audio_input, sampling_rate=sr, return_tensors="pt").input_features.to(device)
|
||||
|
||||
# # Generate transcription
|
||||
# with torch.no_grad():
|
||||
# if isinstance(model, torch.nn.DataParallel):
|
||||
# generated_ids = model.module.generate(input_features)
|
||||
# else:
|
||||
# generated_ids = model.generate(input_features)
|
||||
|
||||
# # Decode the generated tokens to text
|
||||
# transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
||||
# return transcription
|
||||
|
||||
def transcribe_audio(audio_path: str) -> str:
|
||||
print("Loading model in transcribe_audio...")
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
|
||||
|
||||
from transformers import WhisperProcessor, WhisperForConditionalGeneration
|
||||
import torch
|
||||
|
||||
# Load processor and model from transformers
|
||||
processor = WhisperProcessor.from_pretrained("openai/whisper-tiny")
|
||||
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny")
|
||||
|
||||
if torch.cuda.device_count() > 1:
|
||||
print(f"Using {torch.cuda.device_count()} GPUs!")
|
||||
model = torch.nn.DataParallel(model)
|
||||
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
model.to(device)
|
||||
print("Model loaded successfully.")
|
||||
print(audio_path)
|
||||
|
||||
# Load and process the audio file
|
||||
import librosa
|
||||
audio_input, sr = librosa.load(audio_path, sr=16000)
|
||||
input_features = processor(audio_input, sampling_rate=sr, return_tensors="pt").input_features.to(device)
|
||||
|
||||
# Generate transcription
|
||||
with torch.no_grad():
|
||||
if isinstance(model, torch.nn.DataParallel):
|
||||
generated_ids = model.module.generate(input_features)
|
||||
else:
|
||||
generated_ids = model.generate(input_features)
|
||||
|
||||
# Decode the generated tokens to text
|
||||
transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
||||
return transcription
|
||||
|
||||
if __name__ == "__main__":
|
||||
extract_audio_from_video("../video/1.mp4")
|
||||
|
||||
audio_file = "/tmp/temp_audio_test.wav"
|
||||
|
||||
for i in range(3):
|
||||
print(f"\nTranscription attempt {i + 1}:")
|
||||
transcription = transcribe_audio(audio_file)
|
||||
print("Transcription:")
|
||||
print(transcription)
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue
Block a user