Spaces:

uc-ctds
/

GDC-QAG

Sleeping

App Files Files Community

GDC-QAG / app.py

aatu18

remove unwanted prints

b4c870a verified about 2 months ago

raw

history blame contribute delete

29.1 kB

	import json
	import os
	import re
	from types import SimpleNamespace

	import gradio as gr
	from itertools import chain
	import pandas as pd
	import numpy as np
	import spaces
	import spacy
	import torch
	import textwrap
	from guidance import gen as guidance_gen
	from guidance.models import Transformers
	from transformers import (
	AutoModelForCausalLM,
	AutoTokenizer,
	BertForSequenceClassification,
	BertTokenizer,
	set_seed,
	)
	from sentence_transformers import SentenceTransformer, util

	from methods import gdc_api_calls, utilities


	# set up various tokens
	hf_TOKEN = os.environ.get("hf_svc_ctds", False)

	# disable tokenizer parallelism
	os.environ["TOKENIZERS_PARALLELISM"] = "false"

	EXAMPLE_INPUTS = [
	"What is the co-occurence frequency of somatic homozygous deletions in CDKN2A and CDKN2B in the mesothelioma project TCGA-MESO in the genomic data commons?",
	"What is the co-occurence frequency of somatic heterozygous deletions in BRCA2 and NF1 in the Kidney Chromophobe TCGA-KICH project in the genomic data commons?",
	"What percentage of ovarian serous cystadenocarcinoma cases have a somatic heterozygous deletion in BRCA1 and simple somatic mutations in BRCA1 in the genomic data commons?",
	"What fraction of cases have simple somatic mutations or copy number variants in ALK in Lung Adenocarcinoma TCGA-LUAD project in the genomic data commons?",
	"How often is microsatellite instability observed in Colon Adenocarcinoma TCGA-COAD project in the genomic data commons?",
	"How often is the BRAF V600E mutation found in Skin Cutaneous Melanoma TCGA-SKCM project in the genomic data commons?",
	"What is the co-occurence frequency of IDH1 R132H and TP53 R273C simple somatic mutations in the low grade glioma project TCGA-LGG in the genomic data commons?",
	"In Lung Adenocarcinoma TCGA-LUAD project data from the genomic data commons, what is the frequency of ALK amplification?"
	]

	EXAMPLE_LABELS = [
	"combination homozygous deletions",
	"combination heterozygous deletions",
	"heterozygous deletion and somatic mutations",
	"copy number variants or somatic mutations",
	"microsatellite-instability",
	"simple somatic mutation",
	"combination somatic mutations",
	"single gene amplification"
	]


	# for natural language gene and intent descriptions
	intent_expansion = {
	'cnv_and_ssm': 'copy number variants or simple somatic mutations',
	'freq_cnv_loss_or_gain': 'copy number variant losses or gains',
	'msi_h_frequency': 'microsatellite instability',
	'freq_cnv_loss_or_gain_comb': 'copy number variant losses or gains',
	'ssm_frequency': 'simple somatic mutations',
	'top_cases_counts_by_gene': 'copy number variants or simple somatic mutations'
	}


	# set up requirements: models and data
	print("getting gdc project information")
	project_mappings = gdc_api_calls.get_gdc_project_ids(start=0, stop=86)

	print("loading intent model and tokenizer")
	model_id = "uc-ctds/query_intent"
	intent_tok = AutoTokenizer.from_pretrained(
	model_id, trust_remote_code=True, token=hf_TOKEN
	)
	intent_model = BertForSequenceClassification.from_pretrained(model_id, token=hf_TOKEN)
	intent_model = intent_model.to("cuda").eval()

	# load sentence transformer model to test cancer embeddings
	print('loading sentence transformer model')
	st_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
	st_model = st_model.to("cuda").eval()


	print("loading gdc genes and mutations")
	gdc_genes_mutations = utilities.load_gdc_genes_mutations_hf(hf_TOKEN)

	print("loading llama-3B model and tokenizer")
	model_id = "meta-llama/Llama-3.2-3B-Instruct"
	tok = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, token=hf_TOKEN)
	model = AutoModelForCausalLM.from_pretrained(
	model_id, torch_dtype=torch.float16, trust_remote_code=True, token=hf_TOKEN
	)
	model = model.to("cuda").eval()


	# global init to test guidance speed up
	base_lm = Transformers(model=model, tokenizer=tok)

	@utilities.timeit
	def infer_mutation_entities(gene_entities, query):
	mutation_entities = []
	for g in gene_entities:
	for m in gdc_genes_mutations[g]:
	if m in query:
	mutation_entities.append(m)
	return mutation_entities



	@utilities.timeit
	def infer_gene_entities_from_query(query):
	entities = []
	# gene recognition with simple dict-based method
	for g in gdc_genes_mutations.keys():
	if (g in query) and (g in query.split(" ")):
	entities.append(g)
	return entities


	@spaces.GPU(duration=15)
	def get_project_embeddings():
	project_rows = []
	for k,v in project_mappings.items():
	new_v = [item.replace(',', '') for item in v]
	combined = ','.join([k] + new_v)
	project_rows.append(combined)
	row_embeddings = st_model.encode(project_rows, convert_to_tensor=True, device='cuda')
	return project_rows, row_embeddings.cpu().numpy()



	def check_if_project_id_in_query(query):
	# check if mention of project keys
	# e.g. TCGA-BRCA in query
	project_list = project_mappings.keys()
	cancer_entities = [
	potential_ce
	for potential_ce in query.split(" ")
	if potential_ce in project_list
	]
	return cancer_entities



	def proj_id_and_partial_match(query, initial_cancer_entities):
	final_entities = []
	if initial_cancer_entities:
	# print('checking for full match between initial cancer entities and GDC project descriptions')
	# check for match with project_mapping values
	# e.g. match "ovarian serous cystadenocarcinoma" to TCGA-OV project
	for ic in initial_cancer_entities:
	for k, v in project_mappings.items():
	for c in v:
	if ic in c.lower():
	final_entities.append(k)
	else:
	# print('no initial cancer entities, check for full match between query terms and GDC project descriptions')
	for term in query.lower().split(" "):
	for k, v in project_mappings.items():
	for c in v:
	if term in c.lower():
	final_entities.append(k)
	return list(set(final_entities))


	@spaces.GPU(duration=15)
	def get_top_k_scores(query, row_embeddings, top_k=20):
	query_embedding = st_model.encode(query, convert_to_tensor=True, device='cuda')
	row_embeddings = torch.from_numpy(row_embeddings).float().to('cuda')
	cosine_scores = util.cos_sim(query_embedding, row_embeddings)
	top_results = torch.topk(cosine_scores, k=top_k)
	# convert to CPU and return
	top_results_scores = top_results.values.cpu().tolist()
	top_results_indices = top_results.indices.cpu().tolist()
	return top_results_scores, top_results_indices



	def get_top_k_cancer_entities(project_rows, top_results_scores, top_results_indices):
	top_cancer_entities = []
	for idx, score in enumerate(top_results_scores[0]):
	if score > 0.5:
	row_idx = top_results_indices[0][idx]
	print('best row, score: {} {}'.format(project_rows[row_idx], score))
	top_cancer_entities.append([project_rows[row_idx], score])
	try:
	top_projects = [sublist[0].split(',')[0] for sublist in top_cancer_entities]
	except Exception as e:
	top_projects = []
	return top_projects



	@utilities.timeit
	def postprocess_cancer_entities(initial_cancer_entities, query):
	# print('initial cancer entities {}'.format(initial_cancer_entities))
	# get project embeddings
	print('loading cancer embeddings')
	project_rows, row_embeddings = get_project_embeddings()
	final_entities = check_if_project_id_in_query(query)
	if final_entities:
	return final_entities
	else:
	if initial_cancer_entities:
	# first query GDC projects endpt
	# print('test 1 (w/ initial entities): querying GDC projects endpt for project_id')
	gdc_project_match = gdc_api_calls.map_cancer_entities_to_project(
	initial_cancer_entities, project_mappings
	)
	# print('mapped projects to ids {}'.format(gdc_project_match))
	if gdc_project_match.values():
	final_entities = list(gdc_project_match.values())
	if not final_entities:
	# print('test 2 (w/ initial entities): no result from GDC projects endpt, check for matches '
	# 'between query terms and gdc project_mappings')
	final_entities = proj_id_and_partial_match(
	query, initial_cancer_entities
	)
	# try embedding based match
	if not final_entities:
	print('Test embedding based match')
	for i in initial_cancer_entities:
	top_results_scores, top_results_indices = get_top_k_scores(i, row_embeddings)
	c_entities = get_top_k_cancer_entities(project_rows, top_results_scores, top_results_indices)
	final_entities.append(c_entities)
	final_entities = list(chain.from_iterable(final_entities))
	else:
	# no initial_cancer_entities
	# check project_mappings keys/values for matches with query terms
	# print('test 3 (w/o initial entities): no result from GDC projects endpt, check for matches '
	# 'between query terms and gdc project_mappings')
	final_entities = proj_id_and_partial_match(
	query, initial_cancer_entities
	)
	return final_entities




	@utilities.timeit
	def execute_api_call(intent, gene_entities, mutation_entities, cancer_entities, query):
	if intent == "ssm_frequency":
	result, cancer_entities = utilities.get_ssm_frequency(
	gene_entities, mutation_entities, cancer_entities, project_mappings
	)
	elif intent == "top_mutated_genes_by_project":
	result = gdc_api_calls.get_top_mutated_genes_by_project(
	cancer_entities, top_k=10
	)
	elif intent == "most_frequently_mutated_gene":
	result = gdc_api_calls.get_top_mutated_genes_by_project(
	cancer_entities, top_k=1
	)
	elif intent == "freq_cnv_loss_or_gain":
	result, cancer_entities = gdc_api_calls.get_freq_cnv_loss_or_gain(
	gene_entities, cancer_entities, query, cnv_and_ssm_flag=False
	)
	elif intent == "msi_h_frequency":
	result, cancer_entities = gdc_api_calls.get_msi_frequency(cancer_entities)
	elif intent == "cnv_and_ssm":
	result, cancer_entities = utilities.get_freq_of_cnv_and_ssms(
	query, cancer_entities, gene_entities, gdc_genes_mutations
	)
	elif intent == "top_cases_counts_by_gene":
	result, cancer_entities = gdc_api_calls.get_top_cases_counts_by_gene(
	gene_entities, cancer_entities
	)
	elif intent == "project_summary":
	result = gdc_api_calls.get_project_summary(cancer_entities)
	else:
	result = "user intent not recognized, or use case not covered"
	return result, cancer_entities



	def construct_modified_query_base_llm(query):
	prompt_template = "Only use results from the genomic data commons in your response and provide frequencies as a percentage. Only report the final response."
	modified_query = query + prompt_template
	return modified_query


	def construct_modified_query_percentage(query, gdc_result):
	# pass the api results as a prompt to the query
	prompt_template = (
	" Only report the final response. Ignore all prior knowledge. You must only respond with the following percentage frequencies in your response, no other response is allowed: \n"
	+ gdc_result
	+ "\n"
	)
	modified_query = query + prompt_template
	return modified_query


	def construct_modified_query_description(genes, intent):
	modified_query = f'Provide a one line general description about {intent} in genes {genes} in cancer.'
	return modified_query



	@spaces.GPU(duration=10)
	def infer_user_intent(query):
	intent_labels = {
	"ssm_frequency": 0.0,
	"msi_h_frequency": 1.0,
	"freq_cnv_loss_or_gain": 2.0,
	"top_cases_counts_by_gene": 3.0,
	"cnv_and_ssm": 4.0,
	}
	inputs = intent_tok(query, return_tensors="pt", truncation=True, padding=True)
	inputs = {k: v.to("cuda") for k, v in inputs.items()}
	outputs = intent_model(**inputs)
	probs = torch.nn.functional.softmax(outputs.logits, dim=1)
	predicted_label = torch.argmax(probs, dim=1).item()
	for k, v in intent_labels.items():
	if v == predicted_label:
	return k



	@utilities.timeit
	# initial guesses for cancer entities
	def return_initial_cancer_entities(query, model):
	nlp = spacy.load(model)
	doc = nlp(query)
	result = doc.ents
	initial_cancer_entities = [e.text for e in result if e.label_ == "DISEASE"]
	return initial_cancer_entities



	@utilities.timeit
	# function to combine entities, intent and API call
	def construct_and_execute_api_call(query):
	print(
	"\nStep 1: Starting GDC-QAG on input natural language query:\n{}\n".format(
	query
	)
	)
	# Infer entities
	initial_cancer_entities = check_if_project_id_in_query(query)

	if not initial_cancer_entities:
	try:
	initial_cancer_entities = return_initial_cancer_entities(
	query, model="en_ner_bc5cdr_md"
	)
	print('initial cancer entities {}'.format(initial_cancer_entities))
	except Exception as e:
	print("unable to guess cancer entities {}".format(str(e)))
	initial_cancer_entities = []

	cancer_entities = postprocess_cancer_entities(
	initial_cancer_entities=initial_cancer_entities, query=query
	)

	# if cancer entities is empty from above methods return all projects
	if not cancer_entities:
	cancer_entities = list(project_mappings.keys())
	gene_entities = infer_gene_entities_from_query(query)
	mutation_entities = infer_mutation_entities(
	gene_entities=gene_entities,
	query=query
	)
	print("\nStep 2: Entity Extraction\n")
	print("gene entities {}".format(gene_entities))
	print("mutation entities {}".format(mutation_entities))
	print("cancer entities {}".format(cancer_entities))


	# infer user intent
	intent = infer_user_intent(query)
	print("\nStep 3: Intent Inference:\n{}\n".format(intent))
	try:
	print("\nStep 4: API call builder for intent {}\n".format(intent))
	api_call_result, cancer_entities = execute_api_call(
	intent, gene_entities, mutation_entities, cancer_entities, query
	)
	except Exception as e:
	print("unable to process query {} {}".format(query, str(e)))
	api_call_result = []
	cancer_entities = []
	return SimpleNamespace(
	gdc_result=api_call_result,
	cancer_entities=cancer_entities,
	intent=intent,
	gene_entities=gene_entities,
	mutation_entities=mutation_entities,
	)


	# generate llama model response
	@utilities.timeit
	@spaces.GPU(duration=20)
	def generate_percentage_response(modified_query):
	# set_seed(1042)
	regex = "The final response is: \d\.\d%"
	lm = base_lm
	lm += modified_query
	lm += guidance_gen("pct_response", n=1, temperature=0, max_tokens=40, regex=regex)
	return lm["pct_response"]


	# generate llama model descriptive response
	@utilities.timeit
	@spaces.GPU(duration=20)
	def generate_descriptive_response(modified_query):
	lm = base_lm
	lm += modified_query
	lm += guidance_gen(
	"desc_response",
	n=1,
	temperature=0,
	max_tokens=100,
	regex="^[^\\n]*[.\S+]$",
	)
	return lm["desc_response"]


	@utilities.timeit
	def batch_test(query):
	modified_query = construct_modified_query_base_llm(query)
	print(f"obtain baseline llama-3B response on modified query: {modified_query}")
	llama_base_output = generate_percentage_response(modified_query)
	print(f"llama-3B baseline response: {llama_base_output}")
	try:
	result = construct_and_execute_api_call(query)
	except Exception as e:
	# unable to compute at this time, recheck
	result.gdc_result = []
	result.cancer_entities = []
	# if there is not a helper output for each unique cancer entity
	# log error to inspect and reprocess query later
	try:
	len(result.gdc_result) == len(result.cancer_entities)
	except Exception as e:
	msg = "there is not a unique helper output for each unique \
	cancer entity in {}".format(
	query
	)
	print("exception {}".format(msg))
	result.gdc_result = []
	result.cancer_entities = []

	return pd.Series(
	[
	llama_base_output,
	result.gdc_result,
	result.cancer_entities,
	result.intent,
	result.gene_entities,
	result.mutation_entities,
	]
	)


	@utilities.timeit
	def get_prefinal_response(row):
	try:
	query = row["questions"]
	gdc_result = row["gdc_result"]
	except Exception as e:
	print(f"unable to retrieve query: {query} or gdc_result: {gdc_result}")

	print("\nStep 6: Construct LLM prompts (percentage) for llama-3B\n")
	percentage_prompt = construct_modified_query_percentage(query, gdc_result)

	print("\nStep 7: Generate LLM response R (percentage) on query augmented prompts\n")

	percentage_response = generate_percentage_response(percentage_prompt)
	percentage_response = re.sub(
	r'final response', 'frequency for your query', percentage_response)
	return pd.Series([
	percentage_prompt, percentage_response
	])



	@utilities.timeit
	def postprocess_llm_description(descriptive_response):
	try:
	num_tokens = len(tok.encode(descriptive_response))
	if num_tokens < 100:
	postprocessed_desc_response = descriptive_response
	else:
	response_list = re.split(r'\.(?!\d+%)', descriptive_response)
	# remove empty elements
	filtered_list = list(filter(None, response_list))
	postprocessed_desc_response = '.'.join(filtered_list[:-1])
	except Exception as e:
	print('unable to postprocess LLM gene description {}'.format(
	str(e)
	))
	postprocessed_desc_response = 'unable to postprocess LLM gene description'

	if not postprocessed_desc_response.endswith('.'):
	postprocessed_desc_response += '.'
	return postprocessed_desc_response


	@utilities.timeit
	def postprocess_percentage_response(
	gdc_qag_base_stat, gdc_result_percentage, gdc_qag_percentage_response):

	try:
	# check/confirm if gdc_qag_base_stat percentage == gdc_result_percentage
	# change it, if not
	if gdc_qag_base_stat != gdc_result_percentage:
	gdc_qag_base_stat = gdc_result_percentage
	final_gdc_qag_percentage_response = 'The frequency for your query is: {}%'.format(
	gdc_qag_base_stat)
	else:
	final_gdc_qag_percentage_response = gdc_qag_percentage_response
	except Exception as e:
	print('unable to postprocess percentage frequency {}'.format(
	str(e)
	))
	final_gdc_qag_percentage_response = 'unable to postprocess percentage frequency'
	return gdc_qag_base_stat, final_gdc_qag_percentage_response


	@utilities.timeit
	def postprocess_response(row):
	# three goals:
	# goal 1:
	# check/confirm the results in gdc-qag percentage response
	# return a percentage response for gdc-qag
	# goal 2:
	# postprocess descriptive response
	# goal 3:
	# return concatenated final response from gdc_qag
	# (descriptive response + percentage response)

	pattern = r".?(\d\.\d)%.?"

	###### various inputs ###############################

	try:
	# this is the result obtained in GDC-QAG via API
	gdc_result = row["gdc_result"]
	except Exception as e:
	print('GDC Result not found in gdc_qag output, returning nan {}'.format(
	str(e)
	))
	gdc_result = np.nan


	try:
	# extract gdc_result percentage from gdc_result
	match = re.search(pattern, gdc_result)
	if match:
	gdc_result_percentage = float(match.group(1))
	else:
	gdc_result_percentage = np.nan
	print('no data available in gdc')
	except Exception as e:
	print('unable to extract percentage from gdc result {}'.format(
	str(e)))
	gdc_result_percentage = np.nan


	try:
	# this is the LLM generated response with freq, after seeing gdc_result
	gdc_qag_percentage_response = row['percentage_response']
	except Exception as e:
	print('LLM generated gdc_qag percentage response not found, returning nan {}'.format(
	str(e)
	))
	gdc_qag_percentage_response = np.nan

	try:
	# extract gdc_qag percentage from LLM response
	gdc_qag_base_stat = float(re.search(pattern, gdc_qag_percentage_response).group(1))
	except Exception as e:
	print('unable to extract percentage from gdc_qag percentage response {}'.format(
	str(e)))
	gdc_qag_base_stat = np.nan


	# llama-3B base output
	llama_base_output = row["llama_base_output"]

	try:
	# extract llama percentage from llama base output
	llama_base_stat = float(re.search(pattern, llama_base_output).group(1))
	except Exception as e:
	print('unable to extract llama base stat {}'.format(str(e)))
	llama_base_stat = np.nan


	############ postprocess LLM description + percentage ###############

	final_gdc_qag_desc_response = postprocess_llm_description(row['descriptive_response'])

	gdc_qag_base_stat, final_gdc_qag_percentage_response = postprocess_percentage_response(
	gdc_qag_base_stat, gdc_result_percentage, gdc_qag_percentage_response
	)
	final_gdc_qag_response = final_gdc_qag_desc_response + ' ' + final_gdc_qag_percentage_response

	return pd.Series(
	[
	llama_base_stat,
	gdc_qag_base_stat,
	final_gdc_qag_desc_response,
	final_gdc_qag_percentage_response,
	final_gdc_qag_response
	]
	)


	def format_error_string():
	error_string = "Error Executing the query. Please checkout 'Examples' to formulate your search query. To specify cancer types, refer to the Project Name from the Genomic Data Commons, e.g. 'breast invasive carcinoma' for breast cancer."

	error_string = f"""

	> Query augmented generation error:
	> {error_string}
	"""
	return error_string


	def wrap_output(result_str):
	return "\n".join(textwrap.wrap(result_str, width=80))



	def format_result_string(result):
	result_string = f"""

	```
	Question:
	{result['GDC-QAG results']['Question']}
	```
	```
	QAG intermediate outputs:
	Gene entities: {result['GDC-QAG results']['Gene entities']}
	Mutation entities: {result['GDC-QAG results']['Mutation entities']}
	Cancer entities: {result['GDC-QAG results']['Cancer entities']}
	Intent: {result['GDC-QAG results']['Intent']}
	```
	```
	QAG final response:
	{result['GDC-QAG results']['Query augmented generation']}
	```

	"""
	print('result_string {}'.format(result_string))
	return result_string


	def format_result_string_multi(result):
	multi_result = "\n".join(result['response_with_cancer'].astype(str))
	print('multi result {}'.format(multi_result))

	# test final response only
	# test adding entities soonafter

	result_string = f"""
	```
	QAG final response:
	{multi_result}
	```
	"""
	print('result_string {}'.format(result_string))
	return result_string



	@utilities.timeit
	def execute_pipeline(question: str):

	df = pd.DataFrame({"questions": [question]})
	print(f"\n\nQuestion received: {question}\n")

	try:
	# queries input file
	df[
	[
	"llama_base_output",
	"gdc_result",
	"cancer_entities",
	"intent",
	"gene_entities",
	"mutation_entities",
	]
	] = df["questions"].apply(lambda x: batch_test(x))
	df_exploded = df.explode("gdc_result", ignore_index=True)

	# generate descriptive response once based on genes and intent
	print("\nStep 6: Construct LLM prompts (descriptive) for llama-3B\n")
	intent = intent_expansion[df['intent'].iloc[0]]
	genes = ','.join(df['gene_entities'].iloc[0])
	print('intent, genes {} {}'.format(intent, genes))
	descriptive_prompt = construct_modified_query_description(genes, intent)
	print('desc prompt {}'.format(descriptive_prompt))

	print("\nStep 7: Generate LLM response R (descriptive) on query augmented prompts\n")
	descriptive_response = generate_descriptive_response(descriptive_prompt)
	print('desc response {}'.format(descriptive_response))
	if not descriptive_response.endswith('.'):
	descriptive_response += '.'

	df_exploded[['descriptive_prompt', 'descriptive_response']] = descriptive_prompt, descriptive_response

	df_exploded[["percentage_prompt", "percentage_response"]] = df_exploded.apply(
	lambda x: get_prefinal_response(x), axis=1)


	### postprocess response
	print("\nStep 8: Final check and confirmation\n")


	df_exploded[
	[
	"llama_base_stat",
	"gdc_qag_base_stat",
	"final_gdc_qag_desc_response",
	"final_gdc_qag_percentage_response",
	"final_gdc_qag_response"
	]
	] = df_exploded.apply(lambda x: postprocess_response(x), axis=1)
	final_columns = utilities.get_final_columns()
	result = df_exploded[final_columns].copy()
	result.rename(
	columns={
	"llama_base_output": "llama-3B baseline output",
	"descriptive_prompt": "Descriptive prompt",
	"percentage_prompt": "Percentage prompt",
	"gdc_result": "GDC Result",
	"gdc_qag_base_stat": "GDC-QAG frequency",
	"llama_base_stat": "llama-3B baseline frequency",
	"final_gdc_qag_response": "Query augmented generation",
	"intent": "Intent",
	"cancer_entities": "Cancer entities",
	"gene_entities": "Gene entities",
	"mutation_entities": "Mutation entities",
	"questions": "Question",
	},
	inplace=True,
	)
	result.index = ["GDC-QAG results"] * len(result)

	print("completed")

	print("\nWriting result string now\n")

	if result.shape[0] > 1:
	result['response_with_cancer'] = result['Query augmented generation'] + '.' + result['GDC Result']
	print('multi cancer result {}'.format(result))
	result_string = format_result_string_multi(result)
	else:
	result = result.T.to_dict()
	result_string = format_result_string(result)

	except Exception as e:
	result_string = format_error_string()


	return result_string


	def visible_component(input_text):
	return gr.update(value="WHATEVER")


	# Create Gradio interface
	with gr.Blocks(title="GDC QAG MCP server", css="""
	#format-textbox label {
	font-size: 25px;
	font-weight: bold;
	}

	#format-textbox input::placeholder {
	font-size: 20px;
	}

	#format-textbox .svelte-1ipelgc {
	font-size: 18px;
	}
	""") as GDC_QAG_QUERY:
	gr.Markdown(
	"""
	# GDC-QAG Service
	"""
	)

	with gr.Row():
	query_input = gr.Textbox(
	lines=3,
	label="Please see 'Examples' below to test sample queries. Formulate your search query similar to examples. To specify cancer types, refer to the Project Name from the Genomic Data Commons, e.g. 'breast invasive carcinoma' for breast cancer.",
	placeholder='e.g. "What is the co-occurence frequency of somatic homozygous deletions in CDKN2A and CDKN2B in the mesothelioma project TCGA-MESO in the genomic data commons?"',
	info="Required: Enter your query. Please retry query if GDC API is unavailable or connection aborts.",
	elem_id="format-textbox"
	)

	gr.Examples(
	examples=EXAMPLE_INPUTS, inputs=query_input, example_labels=EXAMPLE_LABELS
	)

	execute_button = gr.Button("Execute", variant="primary")

	output = gr.Markdown("""
	### Query Result
	_The result of the query will appear here_
	"""
	)


	execute_button.click(
	fn=execute_pipeline,
	inputs=[query_input],
	outputs=output,
	)


	if __name__ == "__main__":
	GDC_QAG_QUERY.launch(mcp_server=True, show_api=True)