Spaces:

not-lain
/

image-retriever

Running on Zero

File size: 1,568 Bytes

97a281b
41c5c9e
 
 
 
97a281b
fe9fde2
41c5c9e
6808732
41c5c9e
 
fe9fde2
41c5c9e
 
 
 
 
 
 
9a53af3
41c5c9e
 
9a53af3
 
 
41c5c9e
9a53af3
 
41c5c9e
9a53af3
 
 
 
41c5c9e
 
fe9fde2
 
 
 
 
 
 
41c5c9e
fe9fde2
 
 
 
 
 
 
 
e54bf1d
41c5c9e
9a53af3

import gradio as gr
import spaces
import torch
from transformers import AutoProcessor, AutoModelForZeroShotImageClassification
from datasets import load_dataset


dataset = load_dataset("not-lain/embedded-pokemon", split="train")
dataset = dataset.add_faiss_index("embeddings")


device = "cuda" if torch.cuda.is_available() else "cpu"
processor = AutoProcessor.from_pretrained("openai/clip-vit-large-patch14")
model = AutoModelForZeroShotImageClassification.from_pretrained(
    "openai/clip-vit-large-patch14", device_map=device
)


@spaces.GPU
def search(query: str, k: int = 4):
    """a function that embeds a new image and returns the most probable results"""

    pixel_values = processor(images=query, return_tensors="pt")[
        "pixel_values"
    ]  # embed new image
    pixel_values = pixel_values.to(device)
    img_emb = model.get_image_features(pixel_values)[0]  # because 1 element
    img_emb = img_emb.cpu().detach().numpy()  # because datasets only works with numpy

    scores, retrieved_examples = dataset.get_nearest_examples(  # retrieve results
        "embeddings",
        img_emb,  # compare our new embedded query with the dataset embeddings
        k=k,  # get only top k results
    )

    # return as image, caption pairs
    out = []
    for i in range(k):
        out.append([retrieved_examples["image"][i], retrieved_examples["text"][i]])

    return out 


demo = gr.Interface(
    search,
    inputs="image",
    outputs=[
        "gallery"
        # , "label"
    ],
    examples=["./charmander.jpg"],
)

demo.launch(debug=True)