Tiktok-Talent-Info/endpoints/image.py

from fastapi import UploadFile, Form
from fastapi.responses import JSONResponse
import base64
import io
import asyncio
import numpy as np
from PIL import Image
from pipeline_setup import pipe, IMAGE_TOKEN
from utils.image_processing import encode_image_base64

async def image_query(file: UploadFile, question: str = Form(...)):
    """
    API endpoint to process an image with the user's query.
    """
    try:
        if file.content_type not in ["image/jpeg", "image/png"]:
            return JSONResponse({"query": question, "error": "Unsupported file type."})

        image_data = await file.read()
        image = Image.open(io.BytesIO(image_data)).convert("RGB").resize((512, 512))
        encoded_image_base64 = encode_image_base64(image)

        question_with_image_token = f"{question}\n{IMAGE_TOKEN}"
        response = await asyncio.to_thread(pipe, (question, image))
        return JSONResponse({"query": question, "response": response.text})
    except Exception as e:
        return JSONResponse({"query": question, "error": str(e)})


# async def image_query(image: np.ndarray, question: str):
#     """
#     API endpoint to process an image (as numpy array) with the user's query.
#     """
#     try:
#         # Convert the numpy array to a PIL Image
#         image = Image.fromarray(image).convert("RGB").resize((512, 512))

#         # Encode the image to base64 (optional, if needed by your pipeline)
#         buffered = io.BytesIO()
#         image.save(buffered, format="JPEG")
#         encoded_image_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8")

#         # Prepare the query with the image token
#         question_with_image_token = f"{question}\n{IMAGE_TOKEN}"

#         # Query the model
#         response = await asyncio.to_thread(pipe, (question, image))
#         return {"query": question, "response": response.text}
#     except Exception as e:
#         return {"query": question, "error": str(e)}


# def image_query(image_path: str, question: str):
#     try:
#         print("image_path in image_query...")
#         with open(image_path, "rb") as file:
#             image_data = file.read()

#         image = Image.open(io.BytesIO(image_data)).convert("RGB").resize((512, 512))
#         encoded_image_base64 = encode_image_base64(image)

#         # Prepare the question with the image token
#         question_with_image_token = f"{question}\n{IMAGE_TOKEN}"

#         # Run model inference (blocking call, but can be async)
#         response = pipe((question_with_image_token, image))

#         return {"query": question, "response": response.text}

#     except Exception as e:
#         return {"query": question, "error": str(e)}