songs1 commited on
Commit
314ce90
·
1 Parent(s): 62c31e6

add range sliders

Browse files
Files changed (5) hide show
  1. app.py +322 -100
  2. config.yaml +32 -101
  3. requirements.txt +1 -0
  4. schema.py +11 -4
  5. style.css +7 -3
app.py CHANGED
@@ -5,11 +5,13 @@ import gradio as gr
5
  import requests
6
  import spaces
7
  import yaml
 
8
  from guidance import json as gen_json
9
  from guidance.models import Transformers
10
  from schema import GDCCohortSchema
11
  from transformers import AutoTokenizer, GPT2LMHeadModel, set_seed
12
 
 
13
  EXAMPLE_INPUTS = [
14
  "bam files for TCGA-BRCA",
15
  "kidney or adrenal gland cancers with alcohol history",
@@ -32,16 +34,71 @@ CARD_2_FIELD = dict(list(zip(CARD_NAMES, CARD_FIELDS)))
32
  CARD_2_VALUES = {
33
  card["name"]: card["values"] for tab in CONFIG["tabs"] for card in tab["cards"]
34
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
- tok = AutoTokenizer.from_pretrained(TOKENIZER_NAME, token=AUTH_TOKEN)
37
- model = GPT2LMHeadModel.from_pretrained(MODEL_NAME, token=AUTH_TOKEN)
38
- model = model.to("cuda")
39
- model = model.eval()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
 
42
  # Generate cohort filter JSON from free text
43
  @spaces.GPU(duration=15)
44
  def generate_filter(query):
 
 
 
45
  set_seed(42)
46
  lm = Transformers(
47
  model=model,
@@ -55,8 +112,6 @@ def generate_filter(query):
55
  cohort_filter = lm["cohort"]
56
  cohort_filter = json.dumps(json.loads(cohort_filter), indent=4)
57
 
58
- # DUMMY FILTER
59
- # cohort_filter = json.dumps({"op":"and","content":[{"op":"in","content":{"field":"cases.project.project_id","value":["TCGA-BRCA"]}}, {"op":"in","content":{"field":"cases.project.program.name","value":["TCGA"]}}]})
60
  return cohort_filter
61
 
62
 
@@ -66,58 +121,161 @@ def process_query(query):
66
  cohort_filter_str = generate_filter(query)
67
  cohort_filter = json.loads(cohort_filter_str)
68
 
69
- # Map filter selections to checkboxes
70
- generated_field_2_values = {
71
- op["content"]["field"]: op["content"]["value"]
72
- for op in cohort_filter["content"]
73
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  card_updates = []
75
  for card_name, card_field in zip(CARD_NAMES, CARD_FIELDS):
76
  # Need to update all cards so use all possible cards as ref
77
- updated_values = []
78
- updated_choices = CARD_2_VALUES[card_name] # reset value
79
- possible_values = set(updated_choices)
80
- if card_field in generated_field_2_values:
81
- # check ref against generated
82
- selected_values = generated_field_2_values.pop(card_field)
83
- unmatched_values = []
84
- for selected_value in selected_values:
85
- if selected_value in possible_values:
86
- updated_values.append(selected_value)
87
- else:
88
- # model hallucination?
89
- unmatched_values.append(selected_value)
90
- if len(unmatched_values) > 0:
91
- generated_field_2_values[card_field] = unmatched_values
92
- card_updates.append(
93
- gr.update(
 
94
  choices=updated_choices,
95
  value=updated_values, # will override existing selections
96
- ),
97
- )
98
- # TODO generated_field_2_values will have remaining, unmatched fields or values
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  return card_updates + [gr.update(value=cohort_filter_str)]
100
 
101
 
102
  # Update JSON based on checkbox selections
103
- def update_json_from_checkboxes(*selected_filters_per_card):
104
  ops = []
105
  for card_name, selected_filters in zip(CARD_NAMES, selected_filters_per_card):
106
- if len(selected_filters) > 0:
107
- base_values = []
108
- for selected_value in selected_filters:
109
- base_value = get_base_value(selected_value)
110
- base_values.append(base_value)
111
- card_field = CARD_2_FIELD[card_name]
112
- content = {
113
- "field": card_field,
114
- "value": base_values,
115
- }
116
- op = {
117
- "op": "in",
118
- "content": content,
119
- }
120
- ops.append(op)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  cohort_filter = {
122
  "op": "and",
123
  "content": ops,
@@ -128,23 +286,40 @@ def update_json_from_checkboxes(*selected_filters_per_card):
128
 
129
  # Execute GDC API query and prepare checkbox + case counter updates
130
  # Preserve prior selections
131
- def get_bin_counts(cohort_filter: str, *selected_filters_per_card):
132
  card_2_selections = dict(list(zip(CARD_NAMES, selected_filters_per_card)))
133
 
134
  # Execute GDC API query
135
  params = {
136
- "facets": ",".join([x.replace("cases.", "") for x in CARD_FIELDS]),
137
  "pretty": "false",
138
  "format": "JSON",
139
  "size": 0,
140
  }
141
 
142
  if cohort_filter:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  params["filters"] = cohort_filter
144
 
145
  response = requests.get(GDC_CASES_API_ENDPOINT, params=params)
146
  if not response.ok:
147
- raise Exception(f"API error: {response.status_code}")
148
  temp = response.json()
149
 
150
  # Update checkboxes with bin counts
@@ -153,38 +328,52 @@ def get_bin_counts(cohort_filter: str, *selected_filters_per_card):
153
  for card_name in CARD_NAMES:
154
  card_field = CARD_2_FIELD[card_name]
155
  card_field = card_field.replace("cases.", "")
156
- choice_mapping = {}
157
- updated_choices = []
158
- card_counts = {
159
- x["key"]: x["doc_count"] for x in all_counts[card_field]["buckets"]
160
- }
161
- for value_name in CARD_2_VALUES[card_name]:
162
- if value_name in card_counts:
163
- value_str = prepare_value_count(value_name, card_counts[value_name])
164
- choice_mapping[value_name] = (
165
- value_str # track possible choices to use as values
166
- )
167
- updated_choices.append(value_str)
168
-
169
- # Align prior selections with new choices
170
- updated_values = []
171
- for selected_value in card_2_selections[card_name]:
172
- base_value = get_base_value(selected_value)
173
- if base_value not in choice_mapping:
174
- # Re-add choices which now presumably have 0 counts
175
- choice_mapping[base_value] = prepare_value_count(base_value, 0)
176
- updated_values.append(choice_mapping[base_value])
177
-
178
- card_updates.append(
179
- gr.update(
 
 
 
 
180
  choices=updated_choices,
181
  value=updated_values,
182
- ),
183
- )
 
 
 
 
 
 
 
 
 
 
184
 
185
  case_count = temp["data"]["pagination"]["total"]
186
 
187
- return card_updates + [gr.update(value=f"Case Count: {case_count}")]
188
 
189
 
190
  def prepare_value_count(value, count):
@@ -239,7 +428,7 @@ function download_cases(filter_str) {{
239
  a.click();
240
  document.body.removeChild(a);
241
  URL.revokeObjectURL(url);
242
- button.innerHTML = 'Download Cases';
243
  button.disabled = false;
244
  }})
245
  .catch(error => {{
@@ -268,7 +457,7 @@ with gr.Blocks(css_paths="style.css") as demo:
268
  min_width=150,
269
  )
270
  case_download = gr.Button(
271
- value="Download Cases",
272
  min_width=150,
273
  elem_id="download-btn",
274
  )
@@ -280,15 +469,13 @@ with gr.Blocks(css_paths="style.css") as demo:
280
  inputs=text_input,
281
  )
282
  with gr.Column(scale=4):
283
- json_output = gr.Textbox(
284
  label="Cohort Filter JSON",
285
- info="Use the button in the top right corner to copy the cohort filter JSON.",
286
  value=json.dumps({"op": "and", "content": []}, indent=4),
 
287
  interactive=False,
288
- show_copy_button=True,
289
  show_label=True,
290
  container=True,
291
- lines=5,
292
  elem_id="json-output",
293
  )
294
 
@@ -319,12 +506,38 @@ with gr.Blocks(css_paths="style.css") as demo:
319
  tab_containers.append(tab_container)
320
  with gr.Row(elem_classes=["card-group"]):
321
  for card in tab["cards"]:
322
- filter_card = gr.CheckboxGroup(
323
- choices=[],
324
- label=card["name"],
325
- interactive=True,
326
- elem_classes=["filter-card"],
327
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
328
  filter_cards.append(filter_card)
329
 
330
  # Assign tab buttons to toggle visibility
@@ -344,13 +557,15 @@ with gr.Blocks(css_paths="style.css") as demo:
344
 
345
  # Load initial counts on startup
346
  demo.load(
347
- fn=get_bin_counts,
348
  inputs=[gr.State("")] + filter_cards,
349
  outputs=filter_cards + [case_counter],
350
  )
351
 
352
  # Update checkboxes on filter generation
353
- # Also update JSON based on checkboxes (relying on checkbox update to do this fires multiple times)
 
 
354
  # Also this way it shows the model generated JSON
355
  text_input.submit(
356
  fn=process_query,
@@ -358,19 +573,26 @@ with gr.Blocks(css_paths="style.css") as demo:
358
  outputs=filter_cards + [json_output],
359
  )
360
 
361
- # Update JSON based on checkboxes
362
- # Keep checkboxes to user input otherwise will fire multiple times
363
- # Seems like it should be cyclical, Gradio must have some logic to prevent infinite loops
364
  for filter_card in filter_cards:
365
- filter_card.input(
366
- fn=update_json_from_checkboxes,
367
- inputs=filter_cards,
368
- outputs=json_output,
369
- )
 
 
 
 
 
 
 
370
 
371
  # Update checkboxes after executing filter query
372
  json_output.change(
373
- fn=get_bin_counts,
374
  inputs=[json_output] + filter_cards,
375
  outputs=filter_cards + [case_counter],
376
  )
 
5
  import requests
6
  import spaces
7
  import yaml
8
+ from gradio_rangeslider import RangeSlider
9
  from guidance import json as gen_json
10
  from guidance.models import Transformers
11
  from schema import GDCCohortSchema
12
  from transformers import AutoTokenizer, GPT2LMHeadModel, set_seed
13
 
14
+ DEBUG = False
15
  EXAMPLE_INPUTS = [
16
  "bam files for TCGA-BRCA",
17
  "kidney or adrenal gland cancers with alcohol history",
 
34
  CARD_2_VALUES = {
35
  card["name"]: card["values"] for tab in CONFIG["tabs"] for card in tab["cards"]
36
  }
37
+ FACETS_STR = ",".join(
38
+ [
39
+ f.replace("cases.", "")
40
+ for f, n in zip(CARD_FIELDS, CARD_NAMES)
41
+ if not isinstance(CARD_2_VALUES[n], dict)
42
+ # ^ skip range facets in bin counts
43
+ ]
44
+ )
45
+
46
+ if not DEBUG:
47
+ tok = AutoTokenizer.from_pretrained(TOKENIZER_NAME, token=AUTH_TOKEN)
48
+ model = GPT2LMHeadModel.from_pretrained(MODEL_NAME, token=AUTH_TOKEN)
49
+ model = model.to("cuda")
50
+ model = model.eval()
51
 
52
+
53
+ DUMMY_FILTER = json.dumps(
54
+ {
55
+ "op": "and",
56
+ "content": [
57
+ {
58
+ "op": "in",
59
+ "content": {
60
+ "field": "cases.project.project_id",
61
+ "value": ["TCGA-BRCA"],
62
+ },
63
+ },
64
+ {
65
+ "op": "in",
66
+ "content": {
67
+ "field": "cases.project.program.name",
68
+ "value": ["TCGA"],
69
+ },
70
+ },
71
+ {
72
+ "op": "and",
73
+ "content": [
74
+ {
75
+ "op": ">=",
76
+ "content": {
77
+ "field": "cases.diagnoses.age_at_diagnosis",
78
+ "value": 7305,
79
+ },
80
+ },
81
+ {
82
+ "op": "<=",
83
+ "content": {
84
+ "field": "cases.diagnoses.age_at_diagnosis",
85
+ "value": 14610,
86
+ },
87
+ },
88
+ ],
89
+ },
90
+ ],
91
+ },
92
+ indent=4,
93
+ )
94
 
95
 
96
  # Generate cohort filter JSON from free text
97
  @spaces.GPU(duration=15)
98
  def generate_filter(query):
99
+ if DEBUG:
100
+ return DUMMY_FILTER
101
+
102
  set_seed(42)
103
  lm = Transformers(
104
  model=model,
 
112
  cohort_filter = lm["cohort"]
113
  cohort_filter = json.dumps(json.loads(cohort_filter), indent=4)
114
 
 
 
115
  return cohort_filter
116
 
117
 
 
121
  cohort_filter_str = generate_filter(query)
122
  cohort_filter = json.loads(cohort_filter_str)
123
 
124
+ # Pre-flatten nested ops for easier mapping in next step
125
+ flattened_ops = []
126
+ for op in cohort_filter["content"]:
127
+ # nested `and` can only be 1 deep based on schema
128
+ if op["op"] == "and":
129
+ flattened_ops.extend(op["content"])
130
+ else:
131
+ flattened_ops.append(op)
132
+
133
+ # Prepare and validate generated filters
134
+ generated_field_2_values = dict()
135
+ for op in flattened_ops:
136
+ assert op["op"] in [
137
+ "in",
138
+ "=",
139
+ "<",
140
+ ">",
141
+ "<=",
142
+ ">=",
143
+ ], f"Unknown handling for op: {op}"
144
+ content = op["content"]
145
+ field, value = content["field"], content["value"]
146
+ # comparators are ints so can convert to g/lte by add/sub 1
147
+ if op["op"] == "<":
148
+ op["op"] = "<="
149
+ value -= 1
150
+ elif op["op"] == ">":
151
+ op["op"] = ">="
152
+ value += 1
153
+ elif op["op"] == "=":
154
+ # convert = to <=,>= ops so it can be filled into card
155
+ flattened_ops.append(
156
+ {
157
+ "op": "<=",
158
+ "content": content,
159
+ }
160
+ )
161
+ flattened_ops.append(
162
+ {
163
+ "op": ">=",
164
+ "content": content,
165
+ }
166
+ )
167
+ continue
168
+
169
+ if op["op"] != "in":
170
+ # comp ops will duplicate name, disambiguate by appending comp
171
+ field += "_" + op["op"]
172
+
173
+ if field in generated_field_2_values:
174
+ raise ValueError(f"{field} is ambiguously duplicated")
175
+ generated_field_2_values[field] = value
176
+
177
+ # Map filter selections to cards
178
  card_updates = []
179
  for card_name, card_field in zip(CARD_NAMES, CARD_FIELDS):
180
  # Need to update all cards so use all possible cards as ref
181
+ default_values = CARD_2_VALUES[card_name]
182
+ if isinstance(default_values, list):
183
+ updated_values = []
184
+ updated_choices = default_values # reset value
185
+ possible_values = set(updated_choices)
186
+ if card_field in generated_field_2_values:
187
+ # check ref against generated
188
+ selected_values = generated_field_2_values.pop(card_field)
189
+ unmatched_values = []
190
+ for selected_value in selected_values:
191
+ if selected_value in possible_values:
192
+ updated_values.append(selected_value)
193
+ else:
194
+ # model hallucination?
195
+ unmatched_values.append(selected_value)
196
+ if len(unmatched_values) > 0:
197
+ generated_field_2_values[card_field] = unmatched_values
198
+ update_obj = gr.update(
199
  choices=updated_choices,
200
  value=updated_values, # will override existing selections
201
+ )
202
+ elif isinstance(default_values, dict):
203
+ # range-slider, maybe other options in the future?
204
+ assert (
205
+ default_values["type"] == "range"
206
+ ), f"Expected range slider for card {card_name}"
207
+ # Need to handle if model outputs flat range or nested range
208
+ card_field_gte = card_field + "_>="
209
+ card_field_lte = card_field + "_<="
210
+ _min = default_values["min"]
211
+ _max = default_values["max"]
212
+ lo = generated_field_2_values.pop(card_field_gte, _min)
213
+ hi = generated_field_2_values.pop(card_field_lte, _max)
214
+ assert (
215
+ lo >= _min
216
+ ), f"Generated lower bound ({lo}) less than minimum allowable value ({_min})"
217
+ assert (
218
+ hi <= _max
219
+ ), f"Generated upper bound ({hi}) greater than maximum allowable value ({_max})"
220
+ update_obj = gr.update(value=(lo, hi))
221
+ else:
222
+ raise ValueError(f"Unknown values for card {card_name}")
223
+ card_updates.append(update_obj)
224
+ # generated_field_2_values will have remaining, unmatched values
225
+ # edit: updated json schema with enumerated fields prevents unmatched fields
226
+ print(f"Unmatched values in model generation: {generated_field_2_values}")
227
  return card_updates + [gr.update(value=cohort_filter_str)]
228
 
229
 
230
  # Update JSON based on checkbox selections
231
+ def update_json_from_cards(*selected_filters_per_card):
232
  ops = []
233
  for card_name, selected_filters in zip(CARD_NAMES, selected_filters_per_card):
234
+ # use the default values to determine card type (checkbox, range, etc)
235
+ default_values = CARD_2_VALUES[card_name]
236
+ if isinstance(default_values, list):
237
+ # checkbox
238
+ if len(selected_filters) > 0:
239
+ base_values = []
240
+ for selected_value in selected_filters:
241
+ base_value = get_base_value(selected_value)
242
+ base_values.append(base_value)
243
+ content = {
244
+ "field": CARD_2_FIELD[card_name],
245
+ "value": base_values,
246
+ }
247
+ op = {
248
+ "op": "in",
249
+ "content": content,
250
+ }
251
+ ops.append(op)
252
+ elif isinstance(default_values, dict):
253
+ # range-slider, maybe other options in the future?
254
+ assert (
255
+ default_values["type"] == "range"
256
+ ), f"Expected range slider for card {card_name}"
257
+ lo, hi = selected_filters
258
+ subops = []
259
+ for val, limit, comp in [
260
+ (lo, default_values["min"], ">="),
261
+ (hi, default_values["max"], "<="),
262
+ ]:
263
+ # only add range filter if not default
264
+ if val == limit:
265
+ continue
266
+ subop = {
267
+ "op": comp,
268
+ "content": {
269
+ "field": CARD_2_FIELD[card_name],
270
+ "value": int(val),
271
+ },
272
+ }
273
+ subops.append(subop)
274
+ if len(subops) > 0:
275
+ ops.append({"op": "and", "content": subops})
276
+ else:
277
+ raise ValueError(f"Unknown values for card {card_name}")
278
+
279
  cohort_filter = {
280
  "op": "and",
281
  "content": ops,
 
286
 
287
  # Execute GDC API query and prepare checkbox + case counter updates
288
  # Preserve prior selections
289
+ def update_cards_with_counts(cohort_filter: str, *selected_filters_per_card):
290
  card_2_selections = dict(list(zip(CARD_NAMES, selected_filters_per_card)))
291
 
292
  # Execute GDC API query
293
  params = {
294
+ "facets": FACETS_STR,
295
  "pretty": "false",
296
  "format": "JSON",
297
  "size": 0,
298
  }
299
 
300
  if cohort_filter:
301
+ # patch for range selectors which use nested `and`
302
+ # seems `facets` and nested `and` don't play well together
303
+ # so flatten direct nested `and` for query execution only
304
+ # this is equivalent since our top-level is always `and`
305
+ # keeping nested `and` for presentation and model generations though
306
+ temp = json.loads(cohort_filter)
307
+ ops = temp["content"]
308
+ new_ops = []
309
+ for op in ops:
310
+ # assumes no deeper than single level nesting
311
+ if op["op"] == "and":
312
+ for subop in op["content"]:
313
+ new_ops.append(subop)
314
+ else:
315
+ new_ops.append(op)
316
+ temp["content"] = new_ops
317
+ cohort_filter = json.dumps(temp)
318
  params["filters"] = cohort_filter
319
 
320
  response = requests.get(GDC_CASES_API_ENDPOINT, params=params)
321
  if not response.ok:
322
+ raise Exception(f"API error: {response.status_code}\n{response.json()}")
323
  temp = response.json()
324
 
325
  # Update checkboxes with bin counts
 
328
  for card_name in CARD_NAMES:
329
  card_field = CARD_2_FIELD[card_name]
330
  card_field = card_field.replace("cases.", "")
331
+ card_values = CARD_2_VALUES[card_name]
332
+ if isinstance(card_values, list):
333
+ # value checkboxes
334
+ choice_mapping = {}
335
+ updated_choices = []
336
+ card_counts = {
337
+ x["key"]: x["doc_count"] for x in all_counts[card_field]["buckets"]
338
+ }
339
+ for value_name in card_values:
340
+ if value_name in card_counts:
341
+ value_str = prepare_value_count(
342
+ value_name,
343
+ card_counts[value_name],
344
+ )
345
+ # track possible choices to use as values
346
+ choice_mapping[value_name] = value_str
347
+ updated_choices.append(value_str)
348
+
349
+ # Align prior selections with new choices
350
+ updated_values = []
351
+ for selected_value in card_2_selections[card_name]:
352
+ base_value = get_base_value(selected_value)
353
+ if base_value not in choice_mapping:
354
+ # Re-add choices which now presumably have 0 counts
355
+ choice_mapping[base_value] = prepare_value_count(base_value, 0)
356
+ updated_values.append(choice_mapping[base_value])
357
+
358
+ update_obj = gr.update(
359
  choices=updated_choices,
360
  value=updated_values,
361
+ )
362
+ elif isinstance(card_values, dict):
363
+ # range-slider, maybe other options in the future?
364
+ assert (
365
+ card_values["type"] == "range"
366
+ ), f"Expected range slider for card {card_name}"
367
+ # for range slider, nothing to actually do!
368
+ update_obj = gr.update()
369
+ else:
370
+ raise ValueError(f"Unknown values for card {card_name}")
371
+
372
+ card_updates.append(update_obj)
373
 
374
  case_count = temp["data"]["pagination"]["total"]
375
 
376
+ return card_updates + [gr.update(value=f"{case_count} Cases")]
377
 
378
 
379
  def prepare_value_count(value, count):
 
428
  a.click();
429
  document.body.removeChild(a);
430
  URL.revokeObjectURL(url);
431
+ button.innerHTML = 'Export to GDC';
432
  button.disabled = false;
433
  }})
434
  .catch(error => {{
 
457
  min_width=150,
458
  )
459
  case_download = gr.Button(
460
+ value="Export to GDC",
461
  min_width=150,
462
  elem_id="download-btn",
463
  )
 
469
  inputs=text_input,
470
  )
471
  with gr.Column(scale=4):
472
+ json_output = gr.Code(
473
  label="Cohort Filter JSON",
 
474
  value=json.dumps({"op": "and", "content": []}, indent=4),
475
+ language="json",
476
  interactive=False,
 
477
  show_label=True,
478
  container=True,
 
479
  elem_id="json-output",
480
  )
481
 
 
506
  tab_containers.append(tab_container)
507
  with gr.Row(elem_classes=["card-group"]):
508
  for card in tab["cards"]:
509
+ if isinstance(card["values"], list):
510
+ filter_card = gr.CheckboxGroup(
511
+ choices=[],
512
+ label=card["name"],
513
+ interactive=True,
514
+ elem_classes=["filter-card"],
515
+ )
516
+ else:
517
+ # values is a dictionary and defines some meta options
518
+ metaopts = card["values"]
519
+ assert (
520
+ "type" in metaopts
521
+ and metaopts["type"] == "range"
522
+ and all(
523
+ k in metaopts
524
+ for k in [
525
+ "min",
526
+ "max",
527
+ ]
528
+ )
529
+ ), f"Unknown meta options for {card['name']}"
530
+ info = "Inclusive range"
531
+ if "unit" in metaopts:
532
+ info += f", units in {metaopts['unit']}"
533
+ filter_card = RangeSlider(
534
+ label=card["name"],
535
+ info=info,
536
+ minimum=metaopts["min"],
537
+ maximum=metaopts["max"],
538
+ step=1, # assume integer
539
+ )
540
+
541
  filter_cards.append(filter_card)
542
 
543
  # Assign tab buttons to toggle visibility
 
557
 
558
  # Load initial counts on startup
559
  demo.load(
560
+ fn=update_cards_with_counts,
561
  inputs=[gr.State("")] + filter_cards,
562
  outputs=filter_cards + [case_counter],
563
  )
564
 
565
  # Update checkboxes on filter generation
566
+ # Also update JSON based on checkboxes
567
+ # - relying on checkbox update to do this fires multiple times
568
+ # - also propagates new model selections after json is updated
569
  # Also this way it shows the model generated JSON
570
  text_input.submit(
571
  fn=process_query,
 
573
  outputs=filter_cards + [json_output],
574
  )
575
 
576
+ # Update JSON based on cards
577
+ # Keep user `input` event listener (vs `change`) otherwise will fire multiple times
578
+ # Seems like otherwise it should be cyclical, Gradio must have some logic to prevent infinite loops
579
  for filter_card in filter_cards:
580
+ if isinstance(filter_card, RangeSlider):
581
+ filter_card.release(
582
+ fn=update_json_from_cards,
583
+ inputs=filter_cards,
584
+ outputs=json_output,
585
+ )
586
+ else:
587
+ filter_card.input(
588
+ fn=update_json_from_cards,
589
+ inputs=filter_cards,
590
+ outputs=json_output,
591
+ )
592
 
593
  # Update checkboxes after executing filter query
594
  json_output.change(
595
+ fn=update_cards_with_counts,
596
  inputs=[json_output] + filter_cards,
597
  outputs=filter_cards + [case_counter],
598
  )
config.yaml CHANGED
@@ -670,34 +670,13 @@ tabs:
670
  - not hispanic or latino
671
  - not reported
672
  - unknown
673
- # - name: Age at Diagnosis
674
- # field: cases.diagnoses.age_at_diagnosis
675
- # values:
676
- # type: range+checkboxes
677
- # min: 0
678
- # max: 32872
679
- # units: days
680
- # min_label: From
681
- # max_label: To
682
- # popular:
683
- # - label: "\u2265 18,263 to < 21,915 days"
684
- # min: 18263
685
- # max: 21915
686
- # - label: "\u2265 14,610 to < 18,263 days"
687
- # min: 14610
688
- # max: 18263
689
- # - label: "\u2265 10,958 to < 14,610 days"
690
- # min: 10958
691
- # max: 14610
692
- # - label: "\u2265 7,305 to < 10,958 days"
693
- # min: 7305
694
- # max: 10958
695
- # - label: "\u2265 3,653 to < 7,305 days"
696
- # min: 3653
697
- # max: 7305
698
- # - label: "\u2265 0 to < 3,653 days"
699
- # min: 0
700
- # max: 3653
701
  - name: Vital Status
702
  field: cases.demographic.vital_status
703
  values:
@@ -1013,34 +992,12 @@ tabs:
1013
  - 9989/3
1014
  - not reported
1015
  - unknown
1016
- # - name: Year Of Diagnosis
1017
- # field: cases.diagnoses.year_of_diagnosis
1018
- # values:
1019
- # type: range+checkboxes
1020
- # min: 1900
1021
- # max: 2050
1022
- # units: years
1023
- # min_label: From
1024
- # max_label: To
1025
- # popular:
1026
- # - label: "\u2265 2020 to < 2030"
1027
- # min: 2020
1028
- # max: 2030
1029
- # - label: "\u2265 2010 to < 2020"
1030
- # min: 2010
1031
- # max: 2020
1032
- # - label: "\u2265 2000 to < 2010"
1033
- # min: 2000
1034
- # max: 2010
1035
- # - label: "\u2265 1990 to < 2000"
1036
- # min: 1990
1037
- # max: 2000
1038
- # - label: "\u2265 1980 to < 1990"
1039
- # min: 1980
1040
- # max: 1990
1041
- # - label: "\u2265 1970 to < 1980"
1042
- # min: 1970
1043
- # max: 1980
1044
  - name: Site Of Resection Or Biopsy
1045
  field: cases.diagnoses.site_of_resection_or_biopsy
1046
  values:
@@ -1889,51 +1846,25 @@ tabs:
1889
  - not reported
1890
  - smoking history not documented
1891
  - unknown
1892
- # - name: Cigarettes Per Day
1893
- # field: cases.exposures.cigarettes_per_day
1894
- # values:
1895
- # type: range
1896
- # min: 0
1897
- # max: 99999
1898
- # unit: days
1899
- # min_label: From
1900
- # max_label: To
1901
- # - name: Pack Years Smoked
1902
- # field: cases.exposures.pack_years_smoked
1903
- # values:
1904
- # type: range
1905
- # min: 0
1906
- # max: 999999
1907
- # unit: years
1908
- # min_label: From
1909
- # max_label: To
1910
- # - name: Tobacco Smoking Onset Year
1911
- # field: cases.exposures.tobacco_smoking_onset_year
1912
- # values:
1913
- # type: range+checkboxes
1914
- # min: 1950
1915
- # max: 2050
1916
- # min_label: From
1917
- # max_label: To
1918
- # popular:
1919
- # - label: '>= 1970 to < 1980 yrs'
1920
- # min: 1970
1921
- # max: 1980
1922
- # - label: '>= 1960 to < 1970 yrs'
1923
- # min: 1960
1924
- # max: 1970
1925
- # - label: '>= 1950 to < 1960 yrs'
1926
- # min: 1950
1927
- # max: 1960
1928
- # - label: '>= 1940 to < 1950 yrs'
1929
- # min: 1940
1930
- # max: 1950
1931
- # - label: '>= 1930 to < 1940 yrs'
1932
- # min: 1930
1933
- # max: 1940
1934
- # - label: '>= 1920 to < 1930 yrs'
1935
- # min: 1920
1936
- # max: 1930
1937
  - name: Biospecimen
1938
  cards:
1939
  - name: Tissue Type
 
670
  - not hispanic or latino
671
  - not reported
672
  - unknown
673
+ - name: Age at Diagnosis
674
+ field: cases.diagnoses.age_at_diagnosis
675
+ values:
676
+ type: range
677
+ min: 0
678
+ max: 32872
679
+ unit: days
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
680
  - name: Vital Status
681
  field: cases.demographic.vital_status
682
  values:
 
992
  - 9989/3
993
  - not reported
994
  - unknown
995
+ - name: Year Of Diagnosis
996
+ field: cases.diagnoses.year_of_diagnosis
997
+ values:
998
+ type: range
999
+ min: 1900
1000
+ max: 2050
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1001
  - name: Site Of Resection Or Biopsy
1002
  field: cases.diagnoses.site_of_resection_or_biopsy
1003
  values:
 
1846
  - not reported
1847
  - smoking history not documented
1848
  - unknown
1849
+ - name: Cigarettes Per Day
1850
+ field: cases.exposures.cigarettes_per_day
1851
+ values:
1852
+ type: range
1853
+ min: 0
1854
+ max: 99999
1855
+ - name: Pack Years Smoked
1856
+ field: cases.exposures.pack_years_smoked
1857
+ values:
1858
+ type: range
1859
+ min: 0
1860
+ max: 999999
1861
+ unit: pack years
1862
+ - name: Tobacco Smoking Onset Year
1863
+ field: cases.exposures.tobacco_smoking_onset_year
1864
+ values:
1865
+ type: range
1866
+ min: 1950
1867
+ max: 2050
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1868
  - name: Biospecimen
1869
  cards:
1870
  - name: Tissue Type
requirements.txt CHANGED
@@ -2,4 +2,5 @@ torch==2.5.1
2
  transformers==4.50.0
3
  gradio==5.35.0
4
  guidance==0.2.4
 
5
  spaces
 
2
  transformers==4.50.0
3
  gradio==5.35.0
4
  guidance==0.2.4
5
+ gradio_rangeslider
6
  spaces
schema.py CHANGED
@@ -7,6 +7,9 @@ from pydantic import BaseModel, Field, StringConstraints
7
 
8
  class InnerOp(Enum):
9
  IN = "in"
 
 
 
10
  EQ = "="
11
  LT = "<"
12
  GT = ">"
@@ -77,9 +80,13 @@ class InnerStrContent(BaseModel):
77
 
78
  class Inner(BaseModel):
79
  op: InnerOp
 
 
 
 
 
80
  content: (
81
- InnerStrContent
82
- | InnerAgeDxContent
83
  | InnerYearDxContent
84
  | InnerCigDayContent
85
  | InnerPackYrContent
@@ -89,9 +96,9 @@ class Inner(BaseModel):
89
 
90
  class Middle(BaseModel):
91
  op: MiddleOp
92
- content: list[Inner]
93
 
94
 
95
  class GDCCohortSchema(BaseModel):
96
  op: OuterOp
97
- content: list[Inner | Middle]
 
7
 
8
  class InnerOp(Enum):
9
  IN = "in"
10
+
11
+
12
+ class InnerRangeOp(Enum):
13
  EQ = "="
14
  LT = "<"
15
  GT = ">"
 
80
 
81
  class Inner(BaseModel):
82
  op: InnerOp
83
+ content: InnerStrContent
84
+
85
+
86
+ class InnerRange(BaseModel):
87
+ op: InnerRangeOp
88
  content: (
89
+ InnerAgeDxContent
 
90
  | InnerYearDxContent
91
  | InnerCigDayContent
92
  | InnerPackYrContent
 
96
 
97
  class Middle(BaseModel):
98
  op: MiddleOp
99
+ content: list[Inner | InnerRange]
100
 
101
 
102
  class GDCCohortSchema(BaseModel):
103
  op: OuterOp
104
+ content: list[Middle | Inner | InnerRange]
style.css CHANGED
@@ -19,10 +19,14 @@
19
  font-size: calc(var(--block-title-text-size) + 2px);
20
  }
21
 
22
- #json-output textarea {
23
  height: 96px !important;
24
  }
25
 
 
 
 
 
26
  .card-group, .card-group > div {
27
  background-color: transparent;
28
  border: 0px;
@@ -77,7 +81,7 @@
77
  border: 0;
78
  }
79
 
80
- .filter-card::-webkit-scrollbar, #json-output textarea::-webkit-scrollbar {
81
  -webkit-appearance: none;
82
  width: 7px;
83
  }
@@ -85,7 +89,7 @@
85
  border-radius: 4px;
86
  background-color: rgba(150,150,150,.5);
87
  -webkit-box-shadow: 0 0 1px rgba(255,255,255,.5);
88
- }
89
 
90
  @keyframes spin {
91
  to {
 
19
  font-size: calc(var(--block-title-text-size) + 2px);
20
  }
21
 
22
+ #json-output {
23
  height: 96px !important;
24
  }
25
 
26
+ #json-output > div:last-of-type {
27
+ height: 80% !important;
28
+ }
29
+
30
  .card-group, .card-group > div {
31
  background-color: transparent;
32
  border: 0px;
 
81
  border: 0;
82
  }
83
 
84
+ /* .filter-card::-webkit-scrollbar, #json-output textarea::-webkit-scrollbar {
85
  -webkit-appearance: none;
86
  width: 7px;
87
  }
 
89
  border-radius: 4px;
90
  background-color: rgba(150,150,150,.5);
91
  -webkit-box-shadow: 0 0 1px rgba(255,255,255,.5);
92
+ } */
93
 
94
  @keyframes spin {
95
  to {