Spaces:

uc-ctds
/

GDC-QAG

Running on Zero

App Files Files Community

mlukowski commited on Aug 4, 2025

Commit

7db7222

verified ·

1 Parent(s): 8e30203

token-fix (#3)

Browse files

- update tokens for app (c4634a054c973948c282bc181693556957d52580)

Files changed (1) hide show

app.py +53 -80

app.py CHANGED Viewed

@@ -1,23 +1,25 @@
 import os
 from types import SimpleNamespace
 import gradio as gr
-import json
 import pandas as pd
 import spaces
 import torch
-from methods import gdc_api_calls, utilities
-from transformers import AutoTokenizer, BertTokenizer, AutoModelForCausalLM, BertForSequenceClassification
 from guidance import gen as guidance_gen
 from guidance.models import Transformers
-from transformers import set_seed
 from methods import gdc_api_calls, utilities
 # set up various tokens
-working_llama_token = os.environ.get("let_this_please_work", False)
-hf_TOKEN = os.environ.get("fineTest", False)
-intent_token = os.environ.get("query_intent_test", False)
 EXAMPLE_INPUTS = [
     "What is the co-occurence frequency of somatic homozygous deletions in CDKN2A and CDKN2B in the mesothelioma project TCGA-MESO in the genomic data commons?",
@@ -26,7 +28,7 @@ EXAMPLE_INPUTS = [
     "What fraction of cases have simple somatic mutations or copy number variants in ALK in Uterine Carcinosarcoma TCGA-UCS project in the genomic data commons?",
     "How often is microsatellite instability observed in Stomach Adenocarcinoma TCGA-STAD project in the genomic data commons?",
     "How often is the BRAF V600E mutation found in Skin Cutaneous Melanoma TCGA-SKCM project in the genomic data commons?",
-    "What is the co-occurence frequency of IDH1 R132H and TP53 R273C simple somatic mutations in the low grade glioma project TCGA-LGG in the genomic data commons?"
 ]
 EXAMPLE_LABELS = [
@@ -36,22 +38,20 @@ EXAMPLE_LABELS = [
     "copy number variants or somatic mutations",
     "microsatellite-instability",
     "simple somatic mutation",
-    "combination somatic mutations"
 ]
 # set up requirements: models and data
 print("getting gdc project information")
 project_mappings = gdc_api_calls.get_gdc_project_ids(start=0, stop=86)
-print('loading intent model and tokenizer')
-model_id = 'uc-ctds/query_intent'
 intent_tok = AutoTokenizer.from_pretrained(
-    model_id, trust_remote_code=True,
-    token=intent_token
 )
-intent_model = BertForSequenceClassification.from_pretrained(
-    model_id, token=intent_token)
-intent_model = intent_model.to('cuda').eval()
 print("loading gdc genes and mutations")
@@ -59,27 +59,15 @@ gdc_genes_mutations = utilities.load_gdc_genes_mutations_hf(hf_TOKEN)
 print("loading llama-3B model and tokenizer")
 model_id = "meta-llama/Llama-3.2-3B-Instruct"
-tok = AutoTokenizer.from_pretrained(
-    model_id, trust_remote_code=True,
-    token=working_llama_token
-)
 model = AutoModelForCausalLM.from_pretrained(
-    model_id,
-    torch_dtype=torch.float16,
-    trust_remote_code=True,
-    token=working_llama_token
 )
-model = model.to('cuda').eval()
 # execute_api_call
-def execute_api_call(
-    intent,
-    gene_entities,
-    mutation_entities,
-    cancer_entities,
-    query
-):
     if intent == "ssm_frequency":
         result, cancer_entities = utilities.get_ssm_frequency(
             gene_entities, mutation_entities, cancer_entities, project_mappings
@@ -123,7 +111,7 @@ def infer_user_intent(query):
         "cnv_and_ssm": 4.0,
     }
     inputs = intent_tok(query, return_tensors="pt", truncation=True, padding=True)
-    inputs = {k: v.to('cuda') for k, v in inputs.items()}
     outputs = intent_model(**inputs)
     probs = torch.nn.functional.softmax(outputs.logits, dim=1)
     predicted_label = torch.argmax(probs, dim=1).item()
@@ -172,11 +160,7 @@ def construct_and_execute_api_call(query):
     print("user intent:\n{}\n".format(intent))
     try:
         api_call_result, cancer_entities = execute_api_call(
-            intent,
-            gene_entities,
-            mutation_entities,
-            cancer_entities,
-            query
         )
         print("api_call_result {}".format(api_call_result))
     except Exception as e:
@@ -195,17 +179,11 @@ def construct_and_execute_api_call(query):
 # generate llama model response
 @spaces.GPU(duration=30)
 def generate_response(modified_query):
-    #set_seed(1042)
     regex = "The final answer is: \d*\.\d*%"
     lm = Transformers(model=model, tokenizer=tok)
     lm += modified_query
-    lm += guidance_gen(
-        "gen_response",
-        n=1,
-        temperature=0,
-        max_tokens=1000,
-        regex=regex
-    )
     return lm["gen_response"]
@@ -256,10 +234,9 @@ def get_prefinal_response(row):
     return pd.Series([modified_query, prefinal_llama_with_helper_output])
 def execute_pipeline(question: str):
-    df = pd.DataFrame({'questions' : [question]})
-    print(f'Question received: {question}')
     print("starting pipeline")
     print("CUDA available:", torch.cuda.is_available())
     print("CUDA device name:", torch.cuda.get_device_name(0))
@@ -278,9 +255,7 @@ def execute_pipeline(question: str):
     ] = df["questions"].apply(lambda x: batch_test(x))
     df_exploded = df.explode("helper_output", ignore_index=True)
     df_exploded[["modified_prompt", "pre_final_llama_with_helper_output"]] = (
-        df_exploded.apply(
-            lambda x: get_prefinal_response(x), axis=1
-        )
     )
     ### postprocess response
     print("postprocessing response")
@@ -296,31 +271,32 @@ def execute_pipeline(question: str):
             "delta_final",
             "final_response",
         ]
-    ] = df_exploded.apply(
-        lambda x: utilities.postprocess_response(x), axis=1
-    )
     final_columns = utilities.get_final_columns()
     result = df_exploded[final_columns]
-    result.rename(columns={
-        'llama_base_output': 'llama-3B baseline output',
-        'modified_prompt': 'Query augmented prompt',
-        'helper_output': 'Processed GDC API result',
-        'ground_truth_stat': 'Ground truth frequency from GDC',
-        'llama_base_stat': 'llama-3B baseline frequency',
-        'delta_llama': 'llama-3B frequency - Ground truth frequency',
-        'final_response': 'Query augmented generation',
-        'intent': 'Intent',
-        'cancer_entities': 'Cancer entities',
-        'gene_entities': 'Gene entities',
-        'mutation_entities': 'Mutation entities',
-        'questions' : 'Question'
-    }, inplace=True)
-    result.index = ['QAG pipeline results'] * len(result)
-    print('completed')
-    print('writing result string now')
     result = result.T.to_dict()
-    print('result {}'.format(result))
     result_string = ""
@@ -334,7 +310,6 @@ def execute_pipeline(question: str):
     # return json.dumps(result.T.to_dict(), indent=2)
 def visible_component(input_text):
     return gr.update(value="WHATEVER")
@@ -349,16 +324,14 @@ with gr.Blocks(title="GDC QAG MCP server") as GDC_QAG_QUERY:
     with gr.Row():
         query_input = gr.Textbox(
-            lines = 3,
             label="Search Query",
             placeholder='e.g. "What is the co-occurence frequency of somatic homozygous deletions in CDKN2A and CDKN2B in the mesothelioma project TCGA-MESO in the genomic data commons?"',
             info="Required: Enter your search query. Click on Examples to execute example queries. Please retry query if API is unavailable or connection aborts.",
         )
     gr.Examples(
-        examples=EXAMPLE_INPUTS,
-        inputs=query_input,
-        example_labels = EXAMPLE_LABELS
     )
     execute_button = gr.Button("Execute", variant="primary")
@@ -378,4 +351,4 @@ with gr.Blocks(title="GDC QAG MCP server") as GDC_QAG_QUERY:
 if __name__ == "__main__":
-    GDC_QAG_QUERY.launch(mcp_server=True, show_api=True)

+import json
 import os
 from types import SimpleNamespace
 import gradio as gr
 import pandas as pd
 import spaces
 import torch
 from guidance import gen as guidance_gen
 from guidance.models import Transformers
+from transformers import (
+    AutoModelForCausalLM,
+    AutoTokenizer,
+    BertForSequenceClassification,
+    BertTokenizer,
+    set_seed,
+)
 from methods import gdc_api_calls, utilities
 # set up various tokens
+hf_TOKEN = os.environ.get("hf_svc_ctds", False)
 EXAMPLE_INPUTS = [
     "What is the co-occurence frequency of somatic homozygous deletions in CDKN2A and CDKN2B in the mesothelioma project TCGA-MESO in the genomic data commons?",
     "What fraction of cases have simple somatic mutations or copy number variants in ALK in Uterine Carcinosarcoma TCGA-UCS project in the genomic data commons?",
     "How often is microsatellite instability observed in Stomach Adenocarcinoma TCGA-STAD project in the genomic data commons?",
     "How often is the BRAF V600E mutation found in Skin Cutaneous Melanoma TCGA-SKCM project in the genomic data commons?",
+    "What is the co-occurence frequency of IDH1 R132H and TP53 R273C simple somatic mutations in the low grade glioma project TCGA-LGG in the genomic data commons?",
 ]
 EXAMPLE_LABELS = [
     "copy number variants or somatic mutations",
     "microsatellite-instability",
     "simple somatic mutation",
+    "combination somatic mutations",
 ]
 # set up requirements: models and data
 print("getting gdc project information")
 project_mappings = gdc_api_calls.get_gdc_project_ids(start=0, stop=86)
+print("loading intent model and tokenizer")
+model_id = "uc-ctds/query_intent"
 intent_tok = AutoTokenizer.from_pretrained(
+    model_id, trust_remote_code=True, token=hf_TOKEN
 )
+intent_model = BertForSequenceClassification.from_pretrained(model_id, token=hf_TOKEN)
+intent_model = intent_model.to("cuda").eval()
 print("loading gdc genes and mutations")
 print("loading llama-3B model and tokenizer")
 model_id = "meta-llama/Llama-3.2-3B-Instruct"
+tok = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, token=hf_TOKEN)
 model = AutoModelForCausalLM.from_pretrained(
+    model_id, torch_dtype=torch.float16, trust_remote_code=True, token=hf_TOKEN
 )
+model = model.to("cuda").eval()
 # execute_api_call
+def execute_api_call(intent, gene_entities, mutation_entities, cancer_entities, query):
     if intent == "ssm_frequency":
         result, cancer_entities = utilities.get_ssm_frequency(
             gene_entities, mutation_entities, cancer_entities, project_mappings
         "cnv_and_ssm": 4.0,
     }
     inputs = intent_tok(query, return_tensors="pt", truncation=True, padding=True)
+    inputs = {k: v.to("cuda") for k, v in inputs.items()}
     outputs = intent_model(**inputs)
     probs = torch.nn.functional.softmax(outputs.logits, dim=1)
     predicted_label = torch.argmax(probs, dim=1).item()
     print("user intent:\n{}\n".format(intent))
     try:
         api_call_result, cancer_entities = execute_api_call(
+            intent, gene_entities, mutation_entities, cancer_entities, query
         )
         print("api_call_result {}".format(api_call_result))
     except Exception as e:
 # generate llama model response
 @spaces.GPU(duration=30)
 def generate_response(modified_query):
+    # set_seed(1042)
     regex = "The final answer is: \d*\.\d*%"
     lm = Transformers(model=model, tokenizer=tok)
     lm += modified_query
+    lm += guidance_gen("gen_response", n=1, temperature=0, max_tokens=1000, regex=regex)
     return lm["gen_response"]
     return pd.Series([modified_query, prefinal_llama_with_helper_output])
 def execute_pipeline(question: str):
+    df = pd.DataFrame({"questions": [question]})
+    print(f"Question received: {question}")
     print("starting pipeline")
     print("CUDA available:", torch.cuda.is_available())
     print("CUDA device name:", torch.cuda.get_device_name(0))
     ] = df["questions"].apply(lambda x: batch_test(x))
     df_exploded = df.explode("helper_output", ignore_index=True)
     df_exploded[["modified_prompt", "pre_final_llama_with_helper_output"]] = (
+        df_exploded.apply(lambda x: get_prefinal_response(x), axis=1)
     )
     ### postprocess response
     print("postprocessing response")
             "delta_final",
             "final_response",
         ]
+    ] = df_exploded.apply(lambda x: utilities.postprocess_response(x), axis=1)
     final_columns = utilities.get_final_columns()
     result = df_exploded[final_columns]
+    result.rename(
+        columns={
+            "llama_base_output": "llama-3B baseline output",
+            "modified_prompt": "Query augmented prompt",
+            "helper_output": "Processed GDC API result",
+            "ground_truth_stat": "Ground truth frequency from GDC",
+            "llama_base_stat": "llama-3B baseline frequency",
+            "delta_llama": "llama-3B frequency - Ground truth frequency",
+            "final_response": "Query augmented generation",
+            "intent": "Intent",
+            "cancer_entities": "Cancer entities",
+            "gene_entities": "Gene entities",
+            "mutation_entities": "Mutation entities",
+            "questions": "Question",
+        },
+        inplace=True,
+    )
+    result.index = ["QAG pipeline results"] * len(result)
+    print("completed")
+    print("writing result string now")
     result = result.T.to_dict()
+    print("result {}".format(result))
     result_string = ""
     # return json.dumps(result.T.to_dict(), indent=2)
 def visible_component(input_text):
     return gr.update(value="WHATEVER")
     with gr.Row():
         query_input = gr.Textbox(
+            lines=3,
             label="Search Query",
             placeholder='e.g. "What is the co-occurence frequency of somatic homozygous deletions in CDKN2A and CDKN2B in the mesothelioma project TCGA-MESO in the genomic data commons?"',
             info="Required: Enter your search query. Click on Examples to execute example queries. Please retry query if API is unavailable or connection aborts.",
         )
     gr.Examples(
+        examples=EXAMPLE_INPUTS, inputs=query_input, example_labels=EXAMPLE_LABELS
     )
     execute_button = gr.Button("Execute", variant="primary")
 if __name__ == "__main__":
+    GDC_QAG_QUERY.launch(mcp_server=True, show_api=True)