from fastapi import UploadFile, Form from fastapi.responses import JSONResponse import base64 import io import asyncio import numpy as np from PIL import Image from pipeline_setup import pipe, IMAGE_TOKEN from utils.image_processing import encode_image_base64 async def image_query(file: UploadFile, question: str = Form(...)): """ API endpoint to process an image with the user's query. """ try: if file.content_type not in ["image/jpeg", "image/png"]: return JSONResponse({"query": question, "error": "Unsupported file type."}) image_data = await file.read() image = Image.open(io.BytesIO(image_data)).convert("RGB").resize((512, 512)) encoded_image_base64 = encode_image_base64(image) question_with_image_token = f"{question}\n{IMAGE_TOKEN}" response = await asyncio.to_thread(pipe, (question, image)) return JSONResponse({"query": question, "response": response.text}) except Exception as e: return JSONResponse({"query": question, "error": str(e)}) # async def image_query(image: np.ndarray, question: str): # """ # API endpoint to process an image (as numpy array) with the user's query. # """ # try: # # Convert the numpy array to a PIL Image # image = Image.fromarray(image).convert("RGB").resize((512, 512)) # # Encode the image to base64 (optional, if needed by your pipeline) # buffered = io.BytesIO() # image.save(buffered, format="JPEG") # encoded_image_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8") # # Prepare the query with the image token # question_with_image_token = f"{question}\n{IMAGE_TOKEN}" # # Query the model # response = await asyncio.to_thread(pipe, (question, image)) # return {"query": question, "response": response.text} # except Exception as e: # return {"query": question, "error": str(e)} # def image_query(image_path: str, question: str): # try: # print("image_path in image_query...") # with open(image_path, "rb") as file: # image_data = file.read() # image = Image.open(io.BytesIO(image_data)).convert("RGB").resize((512, 512)) # encoded_image_base64 = encode_image_base64(image) # # Prepare the question with the image token # question_with_image_token = f"{question}\n{IMAGE_TOKEN}" # # Run model inference (blocking call, but can be async) # response = pipe((question_with_image_token, image)) # return {"query": question, "response": response.text} # except Exception as e: # return {"query": question, "error": str(e)}