Spaces:

msabia
/

bioscan-clip-demo

Build error

App Files Files Community

msabia commited on Sep 6, 2024

Commit

e215045

verified ·

1 Parent(s): 4db6d38

Update app.py

Browse files

Files changed (1) hide show

app.py +167 -178

app.py CHANGED Viewed

@@ -8,183 +8,172 @@ import io
 import pickle
 import random
-def greet(name, intensity):
-    return "Hello, " + name + "!" * int(intensity)
-demo = gr.Interface(
-    fn=greet,
-    inputs=["text", "slider"],
-    outputs=["text"],
-)
-demo.launch()
-# def get_image(image1, image2, dataset_image_mask, processid_to_index, idx):
-#     if (idx < 162834):
-#         image_enc_padded = image1[idx].astype(np.uint8)
-#     elif(idx >= 162834):
-#         image_enc_padded = image2[idx-162834].astype(np.uint8)
-#     enc_length = dataset_image_mask[idx]
-#     image_enc = image_enc_padded[:enc_length]
-#     image = Image.open(io.BytesIO(image_enc))
-#     return image
-# def searchEmbeddings(id, mod1, mod2):
-#     # variable and index initialization
-#     original_indx = processid_to_index[id]
-#     dim = 768
-#     num_neighbors = 10
-#     # get index
-#     index = faiss.IndexFlatIP(dim)
-#     if (mod2 == "Image"):
-#         index = faiss.read_index("image_index.index")
-#     elif (mod2 == "DNA"):
-#         index = faiss.read_index("dna_index.index")
-#     # search index
-#     if (mod1 == "Image"):
-#         query = id_to_image_emb_dict[id]
-#     elif(mod1 == "DNA"):
-#         query = id_to_dna_emb_dict[id]
-#     query = query.astype(np.float32)
-#     D, I = index.search(query, num_neighbors)
-#     id_list = []
-#     for indx in I[0]:
-#         id = indx_to_id_dict[indx]
-#         id_list.append(id)
-#     # get images
-#     image0 = get_image(dataset_image1, dataset_image2, dataset_image_mask, processid_to_index, original_indx)
-#     image1 = get_image(dataset_image1, dataset_image2, dataset_image_mask, processid_to_index, I[0][0])
-#     image2 = get_image(dataset_image1, dataset_image2, dataset_image_mask, processid_to_index, I[0][1])
-#     image3 = get_image(dataset_image1, dataset_image2, dataset_image_mask, processid_to_index, I[0][2])
-#     image4 = get_image(dataset_image1, dataset_image2, dataset_image_mask, processid_to_index, I[0][3])
-#     image5 = get_image(dataset_image1, dataset_image2, dataset_image_mask, processid_to_index, I[0][4])
-#     image6 = get_image(dataset_image1, dataset_image2, dataset_image_mask, processid_to_index, I[0][5])
-#     image7 = get_image(dataset_image1, dataset_image2, dataset_image_mask, processid_to_index, I[0][6])
-#     image8 = get_image(dataset_image1, dataset_image2, dataset_image_mask, processid_to_index, I[0][7])
-#     image9 = get_image(dataset_image1, dataset_image2, dataset_image_mask, processid_to_index, I[0][8])
-#     image10 = get_image(dataset_image1, dataset_image2, dataset_image_mask, processid_to_index, I[0][9])
-#     # get taxonomic information
-#     s0 = getTax(original_indx)
-#     s1 = getTax(I[0][0])
-#     s2 = getTax(I[0][1])
-#     s3 = getTax(I[0][2])
-#     s4 = getTax(I[0][3])
-#     s5 = getTax(I[0][4])
-#     s6 = getTax(I[0][5])
-#     s7 = getTax(I[0][6])
-#     s8 = getTax(I[0][7])
-#     s9 = getTax(I[0][8])
-#     s10 = getTax(I[0][9])
-#     return id_list, image0, image1, image2, image3, image4, image5, image6, image7, image8, image9, image10, s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10
-# def getRandID():
-#     indx = random.randrange(0, 325667)
-#     return indx_to_id_dict[indx], indx
-# def getTax(indx):
-#     s = species[indx]
-#     g = genus[indx]
-#     f = family[indx]
-#     str = "Species: " + s + "\nGenus: " + g + "\nFamily: " + f
-#     return str
-# with gr.Blocks(title="Bioscan-Clip") as demo:
-#     # open general files
-#     with open("dataset_image1.pickle", "rb") as f:
-#         dataset_image1 = pickle.load(f)
-#     with open("dataset_image2.pickle", "rb") as f:
-#         dataset_image2 = pickle.load(f)
-#     with open("dataset_processid_list.pickle", "rb") as f:
-#         dataset_processid_list = pickle.load(f)
-#     with open("dataset_image_mask.pickle", "rb") as f:
-#         dataset_image_mask = pickle.load(f)
-#     with open("processid_to_index.pickle", "rb") as f:
-#         processid_to_index = pickle.load(f)
-#     with open("indx_to_id_dict.pickle", "rb") as f:
-#         indx_to_id_dict = pickle.load(f)
-#     # open image files
-#     with open("id_to_image_emb_dict.pickle", "rb") as f:
-#         id_to_image_emb_dict = pickle.load(f)
-#     # open dna files
-#     with open("id_to_dna_emb_dict.pickle", "rb") as f:
-#         id_to_dna_emb_dict = pickle.load(f)
-#     # open taxonomy files
-#     with open("family.pickle", "rb") as f:
-#         family = [item.decode("utf-8") for item in pickle.load(f)]
-#     with open("genus.pickle", "rb") as f:
-#         genus= [item.decode("utf-8") for item in pickle.load(f)]
-#     with open("species.pickle", "rb") as f:
-#         species = [item.decode("utf-8") for item in pickle.load(f)]
-#     with gr.Column():
-#         process_id = gr.Textbox(label="ID:", info="Enter a sample ID to search for")
-#         process_id_list = gr.Textbox(label="Closest 10 matches:" )
-#         mod1 = gr.Radio(choices=["DNA", "Image"], label="Search From:")
-#         mod2 = gr.Radio(choices=["DNA", "Image"], label="Search To:")
-#         search_btn = gr.Button("Search")
-#         with gr.Row():
-#             with gr.Column():
-#                 image0 = gr.Image(label="Original", height=550)
-#                 tax0 = gr.Textbox(label="Taxonomy")
-#             with gr.Column():
-#                 rand_id = gr.Textbox(label="Random ID:")
-#                 rand_id_indx = gr.Textbox(label="Index:")
-#                 id_btn = gr.Button("Get Random ID")
-#         with gr.Row():
-#             with gr.Column():
-#                 image1 = gr.Image(label=1)
-#                 tax1 = gr.Textbox(label="Taxonomy")
-#             with gr.Column():
-#                 image2 = gr.Image(label=2)
-#                 tax2 = gr.Textbox(label="Taxonomy")
-#             with gr.Column():
-#                 image3 = gr.Image(label=3)
-#                 tax3 = gr.Textbox(label="Taxonomy")
-#         with gr.Row():
-#             with gr.Column():
-#                 image4 = gr.Image(label=4)
-#                 tax4 = gr.Textbox(label="Taxonomy")
-#             with gr.Column():
-#                 image5 = gr.Image(label=5)
-#                 tax5 = gr.Textbox(label="Taxonomy")
-#             with gr.Column():
-#                 image6 = gr.Image(label=6)
-#                 tax6 = gr.Textbox(label="Taxonomy")
-#         with gr.Row():
-#             with gr.Column():
-#                 image7 = gr.Image(label=7)
-#                 tax7 = gr.Textbox(label="Taxonomy")
-#             with gr.Column():
-#                 image8 = gr.Image(label=8)
-#                 tax8 = gr.Textbox(label="Taxonomy")
-#             with gr.Column():
-#                 image9 = gr.Image(label=9)
-#                 tax9 = gr.Textbox(label="Taxonomy")
-#             with gr.Column():
-#                 image10 = gr.Image(label=10)
-#                 tax10 = gr.Textbox(label="Taxonomy")
-#     id_btn.click(fn=getRandID, inputs=[], outputs=[rand_id, rand_id_indx])
-#     search_btn.click(fn=searchEmbeddings, inputs=[process_id, mod1, mod2],
-#                      outputs=[process_id_list, image0, image1, image2, image3, image4, image5, image6, image7, image8, image9, image10,
-#                               tax0, tax1, tax2, tax3, tax4, tax5, tax6, tax7, tax8, tax9, tax10])
-#     examples = gr.Examples(
-#         examples=[["ABOTH966-22", "DNA", "DNA"],
-#                   ["CRTOB8472-22", "DNA", "Image"],
-#                   ["PLOAD050-20", "Image", "DNA"],
-#                   ["HELAC26711-21", "Image", "Image"]],
-#         inputs=[process_id, mod1, mod2],)
-# demo.launch()

 import pickle
 import random
+def get_image(image1, image2, dataset_image_mask, processid_to_index, idx):
+    if (idx < 162834):
+        image_enc_padded = image1[idx].astype(np.uint8)
+    elif(idx >= 162834):
+        image_enc_padded = image2[idx-162834].astype(np.uint8)
+    enc_length = dataset_image_mask[idx]
+    image_enc = image_enc_padded[:enc_length]
+    image = Image.open(io.BytesIO(image_enc))
+    return image
+def searchEmbeddings(id, mod1, mod2):
+    # variable and index initialization
+    original_indx = processid_to_index[id]
+    dim = 768
+    num_neighbors = 10
+    # get index
+    index = faiss.IndexFlatIP(dim)
+    if (mod2 == "Image"):
+        index = faiss.read_index("image_index.index")
+    elif (mod2 == "DNA"):
+        index = faiss.read_index("dna_index.index")
+    # search index
+    if (mod1 == "Image"):
+        query = id_to_image_emb_dict[id]
+    elif(mod1 == "DNA"):
+        query = id_to_dna_emb_dict[id]
+    query = query.astype(np.float32)
+    D, I = index.search(query, num_neighbors)
+    id_list = []
+    for indx in I[0]:
+        id = indx_to_id_dict[indx]
+        id_list.append(id)
+    # get images
+    image0 = get_image(dataset_image1, dataset_image2, dataset_image_mask, processid_to_index, original_indx)
+    image1 = get_image(dataset_image1, dataset_image2, dataset_image_mask, processid_to_index, I[0][0])
+    image2 = get_image(dataset_image1, dataset_image2, dataset_image_mask, processid_to_index, I[0][1])
+    image3 = get_image(dataset_image1, dataset_image2, dataset_image_mask, processid_to_index, I[0][2])
+    image4 = get_image(dataset_image1, dataset_image2, dataset_image_mask, processid_to_index, I[0][3])
+    image5 = get_image(dataset_image1, dataset_image2, dataset_image_mask, processid_to_index, I[0][4])
+    image6 = get_image(dataset_image1, dataset_image2, dataset_image_mask, processid_to_index, I[0][5])
+    image7 = get_image(dataset_image1, dataset_image2, dataset_image_mask, processid_to_index, I[0][6])
+    image8 = get_image(dataset_image1, dataset_image2, dataset_image_mask, processid_to_index, I[0][7])
+    image9 = get_image(dataset_image1, dataset_image2, dataset_image_mask, processid_to_index, I[0][8])
+    image10 = get_image(dataset_image1, dataset_image2, dataset_image_mask, processid_to_index, I[0][9])
+    # get taxonomic information
+    s0 = getTax(original_indx)
+    s1 = getTax(I[0][0])
+    s2 = getTax(I[0][1])
+    s3 = getTax(I[0][2])
+    s4 = getTax(I[0][3])
+    s5 = getTax(I[0][4])
+    s6 = getTax(I[0][5])
+    s7 = getTax(I[0][6])
+    s8 = getTax(I[0][7])
+    s9 = getTax(I[0][8])
+    s10 = getTax(I[0][9])
+    return id_list, image0, image1, image2, image3, image4, image5, image6, image7, image8, image9, image10, s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10
+def getRandID():
+    indx = random.randrange(0, 325667)
+    return indx_to_id_dict[indx], indx
+def getTax(indx):
+    s = species[indx]
+    g = genus[indx]
+    f = family[indx]
+    str = "Species: " + s + "\nGenus: " + g + "\nFamily: " + f
+    return str
+with gr.Blocks(title="Bioscan-Clip") as demo:
+    # open general files
+    with open("dataset_image1.pickle", "rb") as f:
+        dataset_image1 = pickle.load(f)
+    with open("dataset_image2.pickle", "rb") as f:
+        dataset_image2 = pickle.load(f)
+    with open("dataset_processid_list.pickle", "rb") as f:
+        dataset_processid_list = pickle.load(f)
+    with open("dataset_image_mask.pickle", "rb") as f:
+        dataset_image_mask = pickle.load(f)
+    with open("processid_to_index.pickle", "rb") as f:
+        processid_to_index = pickle.load(f)
+    with open("indx_to_id_dict.pickle", "rb") as f:
+        indx_to_id_dict = pickle.load(f)
+    # open image files
+    with open("id_to_image_emb_dict.pickle", "rb") as f:
+        id_to_image_emb_dict = pickle.load(f)
+    # open dna files
+    with open("id_to_dna_emb_dict.pickle", "rb") as f:
+        id_to_dna_emb_dict = pickle.load(f)
+    # open taxonomy files
+    with open("family.pickle", "rb") as f:
+        family = [item.decode("utf-8") for item in pickle.load(f)]
+    with open("genus.pickle", "rb") as f:
+        genus= [item.decode("utf-8") for item in pickle.load(f)]
+    with open("species.pickle", "rb") as f:
+        species = [item.decode("utf-8") for item in pickle.load(f)]
+    with gr.Column():
+        process_id = gr.Textbox(label="ID:", info="Enter a sample ID to search for")
+        process_id_list = gr.Textbox(label="Closest 10 matches:" )
+        mod1 = gr.Radio(choices=["DNA", "Image"], label="Search From:")
+        mod2 = gr.Radio(choices=["DNA", "Image"], label="Search To:")
+        search_btn = gr.Button("Search")
+        with gr.Row():
+            with gr.Column():
+                image0 = gr.Image(label="Original", height=550)
+                tax0 = gr.Textbox(label="Taxonomy")
+            with gr.Column():
+                rand_id = gr.Textbox(label="Random ID:")
+                rand_id_indx = gr.Textbox(label="Index:")
+                id_btn = gr.Button("Get Random ID")
+        with gr.Row():
+            with gr.Column():
+                image1 = gr.Image(label=1)
+                tax1 = gr.Textbox(label="Taxonomy")
+            with gr.Column():
+                image2 = gr.Image(label=2)
+                tax2 = gr.Textbox(label="Taxonomy")
+            with gr.Column():
+                image3 = gr.Image(label=3)
+                tax3 = gr.Textbox(label="Taxonomy")
+        with gr.Row():
+            with gr.Column():
+                image4 = gr.Image(label=4)
+                tax4 = gr.Textbox(label="Taxonomy")
+            with gr.Column():
+                image5 = gr.Image(label=5)
+                tax5 = gr.Textbox(label="Taxonomy")
+            with gr.Column():
+                image6 = gr.Image(label=6)
+                tax6 = gr.Textbox(label="Taxonomy")
+        with gr.Row():
+            with gr.Column():
+                image7 = gr.Image(label=7)
+                tax7 = gr.Textbox(label="Taxonomy")
+            with gr.Column():
+                image8 = gr.Image(label=8)
+                tax8 = gr.Textbox(label="Taxonomy")
+            with gr.Column():
+                image9 = gr.Image(label=9)
+                tax9 = gr.Textbox(label="Taxonomy")
+            with gr.Column():
+                image10 = gr.Image(label=10)
+                tax10 = gr.Textbox(label="Taxonomy")
+    id_btn.click(fn=getRandID, inputs=[], outputs=[rand_id, rand_id_indx])
+    search_btn.click(fn=searchEmbeddings, inputs=[process_id, mod1, mod2],
+                     outputs=[process_id_list, image0, image1, image2, image3, image4, image5, image6, image7, image8, image9, image10,
+                              tax0, tax1, tax2, tax3, tax4, tax5, tax6, tax7, tax8, tax9, tax10])
+    examples = gr.Examples(
+        examples=[["ABOTH966-22", "DNA", "DNA"],
+                  ["CRTOB8472-22", "DNA", "Image"],
+                  ["PLOAD050-20", "Image", "DNA"],
+                  ["HELAC26711-21", "Image", "Image"]],
+        inputs=[process_id, mod1, mod2],)
+demo.launch()