Spaces:

uc-ctds
/

GDC-Cohort-Copilot

Running on Zero

App Files Files Community

songs1 commited on Jul 11, 2025

Commit

314ce90

1 Parent(s): 62c31e6

add range sliders

Browse files

Files changed (5) hide show

app.py +322 -100
config.yaml +32 -101
requirements.txt +1 -0
schema.py +11 -4
style.css +7 -3

app.py CHANGED Viewed

@@ -5,11 +5,13 @@ import gradio as gr
 import requests
 import spaces
 import yaml
 from guidance import json as gen_json
 from guidance.models import Transformers
 from schema import GDCCohortSchema
 from transformers import AutoTokenizer, GPT2LMHeadModel, set_seed
 EXAMPLE_INPUTS = [
     "bam files for TCGA-BRCA",
     "kidney or adrenal gland cancers with alcohol history",
@@ -32,16 +34,71 @@ CARD_2_FIELD = dict(list(zip(CARD_NAMES, CARD_FIELDS)))
 CARD_2_VALUES = {
     card["name"]: card["values"] for tab in CONFIG["tabs"] for card in tab["cards"]
 }
-tok = AutoTokenizer.from_pretrained(TOKENIZER_NAME, token=AUTH_TOKEN)
-model = GPT2LMHeadModel.from_pretrained(MODEL_NAME, token=AUTH_TOKEN)
-model = model.to("cuda")
-model = model.eval()
 # Generate cohort filter JSON from free text
 @spaces.GPU(duration=15)
 def generate_filter(query):
     set_seed(42)
     lm = Transformers(
         model=model,
@@ -55,8 +112,6 @@ def generate_filter(query):
     cohort_filter = lm["cohort"]
     cohort_filter = json.dumps(json.loads(cohort_filter), indent=4)
-    # DUMMY FILTER
-    # cohort_filter = json.dumps({"op":"and","content":[{"op":"in","content":{"field":"cases.project.project_id","value":["TCGA-BRCA"]}}, {"op":"in","content":{"field":"cases.project.program.name","value":["TCGA"]}}]})
     return cohort_filter
@@ -66,58 +121,161 @@ def process_query(query):
     cohort_filter_str = generate_filter(query)
     cohort_filter = json.loads(cohort_filter_str)
-    # Map filter selections to checkboxes
-    generated_field_2_values = {
-        op["content"]["field"]: op["content"]["value"]
-        for op in cohort_filter["content"]
-    }
     card_updates = []
     for card_name, card_field in zip(CARD_NAMES, CARD_FIELDS):
         # Need to update all cards so use all possible cards as ref
-        updated_values = []
-        updated_choices = CARD_2_VALUES[card_name]  # reset value
-        possible_values = set(updated_choices)
-        if card_field in generated_field_2_values:
-            # check ref against generated
-            selected_values = generated_field_2_values.pop(card_field)
-            unmatched_values = []
-            for selected_value in selected_values:
-                if selected_value in possible_values:
-                    updated_values.append(selected_value)
-                else:
-                    # model hallucination?
-                    unmatched_values.append(selected_value)
-            if len(unmatched_values) > 0:
-                generated_field_2_values[card_field] = unmatched_values
-        card_updates.append(
-            gr.update(
                 choices=updated_choices,
                 value=updated_values,  # will override existing selections
-            ),
-        )
-    # TODO generated_field_2_values will have remaining, unmatched fields or values
     return card_updates + [gr.update(value=cohort_filter_str)]
 # Update JSON based on checkbox selections
-def update_json_from_checkboxes(*selected_filters_per_card):
     ops = []
     for card_name, selected_filters in zip(CARD_NAMES, selected_filters_per_card):
-        if len(selected_filters) > 0:
-            base_values = []
-            for selected_value in selected_filters:
-                base_value = get_base_value(selected_value)
-                base_values.append(base_value)
-            card_field = CARD_2_FIELD[card_name]
-            content = {
-                "field": card_field,
-                "value": base_values,
-            }
-            op = {
-                "op": "in",
-                "content": content,
-            }
-            ops.append(op)
     cohort_filter = {
         "op": "and",
         "content": ops,
@@ -128,23 +286,40 @@ def update_json_from_checkboxes(*selected_filters_per_card):
 # Execute GDC API query and prepare checkbox + case counter updates
 # Preserve prior selections
-def get_bin_counts(cohort_filter: str, *selected_filters_per_card):
     card_2_selections = dict(list(zip(CARD_NAMES, selected_filters_per_card)))
     # Execute GDC API query
     params = {
-        "facets": ",".join([x.replace("cases.", "") for x in CARD_FIELDS]),
         "pretty": "false",
         "format": "JSON",
         "size": 0,
     }
     if cohort_filter:
         params["filters"] = cohort_filter
     response = requests.get(GDC_CASES_API_ENDPOINT, params=params)
     if not response.ok:
-        raise Exception(f"API error: {response.status_code}")
     temp = response.json()
     # Update checkboxes with bin counts
@@ -153,38 +328,52 @@ def get_bin_counts(cohort_filter: str, *selected_filters_per_card):
     for card_name in CARD_NAMES:
         card_field = CARD_2_FIELD[card_name]
         card_field = card_field.replace("cases.", "")
-        choice_mapping = {}
-        updated_choices = []
-        card_counts = {
-            x["key"]: x["doc_count"] for x in all_counts[card_field]["buckets"]
-        }
-        for value_name in CARD_2_VALUES[card_name]:
-            if value_name in card_counts:
-                value_str = prepare_value_count(value_name, card_counts[value_name])
-                choice_mapping[value_name] = (
-                    value_str  # track possible choices to use as values
-                )
-                updated_choices.append(value_str)
-        # Align prior selections with new choices
-        updated_values = []
-        for selected_value in card_2_selections[card_name]:
-            base_value = get_base_value(selected_value)
-            if base_value not in choice_mapping:
-                # Re-add choices which now presumably have 0 counts
-                choice_mapping[base_value] = prepare_value_count(base_value, 0)
-            updated_values.append(choice_mapping[base_value])
-        card_updates.append(
-            gr.update(
                 choices=updated_choices,
                 value=updated_values,
-            ),
-        )
     case_count = temp["data"]["pagination"]["total"]
-    return card_updates + [gr.update(value=f"Case Count: {case_count}")]
 def prepare_value_count(value, count):
@@ -239,7 +428,7 @@ function download_cases(filter_str) {{
         a.click();
         document.body.removeChild(a);
         URL.revokeObjectURL(url);
-        button.innerHTML = 'Download Cases';
         button.disabled = false;
     }})
     .catch(error => {{
@@ -268,7 +457,7 @@ with gr.Blocks(css_paths="style.css") as demo:
                 min_width=150,
             )
             case_download = gr.Button(
-                value="Download Cases",
                 min_width=150,
                 elem_id="download-btn",
             )
@@ -280,15 +469,13 @@ with gr.Blocks(css_paths="style.css") as demo:
                 inputs=text_input,
             )
         with gr.Column(scale=4):
-            json_output = gr.Textbox(
                 label="Cohort Filter JSON",
-                info="Use the button in the top right corner to copy the cohort filter JSON.",
                 value=json.dumps({"op": "and", "content": []}, indent=4),
                 interactive=False,
-                show_copy_button=True,
                 show_label=True,
                 container=True,
-                lines=5,
                 elem_id="json-output",
             )
@@ -319,12 +506,38 @@ with gr.Blocks(css_paths="style.css") as demo:
                 tab_containers.append(tab_container)
                 with gr.Row(elem_classes=["card-group"]):
                     for card in tab["cards"]:
-                        filter_card = gr.CheckboxGroup(
-                            choices=[],
-                            label=card["name"],
-                            interactive=True,
-                            elem_classes=["filter-card"],
-                        )
                         filter_cards.append(filter_card)
     # Assign tab buttons to toggle visibility
@@ -344,13 +557,15 @@ with gr.Blocks(css_paths="style.css") as demo:
     # Load initial counts on startup
     demo.load(
-        fn=get_bin_counts,
         inputs=[gr.State("")] + filter_cards,
         outputs=filter_cards + [case_counter],
     )
     # Update checkboxes on filter generation
-    # Also update JSON based on checkboxes (relying on checkbox update to do this fires multiple times)
     # Also this way it shows the model generated JSON
     text_input.submit(
         fn=process_query,
@@ -358,19 +573,26 @@ with gr.Blocks(css_paths="style.css") as demo:
         outputs=filter_cards + [json_output],
     )
-    # Update JSON based on checkboxes
-    # Keep checkboxes to user input otherwise will fire multiple times
-    # Seems like it should be cyclical, Gradio must have some logic to prevent infinite loops
     for filter_card in filter_cards:
-        filter_card.input(
-            fn=update_json_from_checkboxes,
-            inputs=filter_cards,
-            outputs=json_output,
-        )
     # Update checkboxes after executing filter query
     json_output.change(
-        fn=get_bin_counts,
         inputs=[json_output] + filter_cards,
         outputs=filter_cards + [case_counter],
     )

 import requests
 import spaces
 import yaml
+from gradio_rangeslider import RangeSlider
 from guidance import json as gen_json
 from guidance.models import Transformers
 from schema import GDCCohortSchema
 from transformers import AutoTokenizer, GPT2LMHeadModel, set_seed
+DEBUG = False
 EXAMPLE_INPUTS = [
     "bam files for TCGA-BRCA",
     "kidney or adrenal gland cancers with alcohol history",
 CARD_2_VALUES = {
     card["name"]: card["values"] for tab in CONFIG["tabs"] for card in tab["cards"]
 }
+FACETS_STR = ",".join(
+    [
+        f.replace("cases.", "")
+        for f, n in zip(CARD_FIELDS, CARD_NAMES)
+        if not isinstance(CARD_2_VALUES[n], dict)
+        # ^ skip range facets in bin counts
+    ]
+)
+if not DEBUG:
+    tok = AutoTokenizer.from_pretrained(TOKENIZER_NAME, token=AUTH_TOKEN)
+    model = GPT2LMHeadModel.from_pretrained(MODEL_NAME, token=AUTH_TOKEN)
+    model = model.to("cuda")
+    model = model.eval()
+DUMMY_FILTER = json.dumps(
+    {
+        "op": "and",
+        "content": [
+            {
+                "op": "in",
+                "content": {
+                    "field": "cases.project.project_id",
+                    "value": ["TCGA-BRCA"],
+                },
+            },
+            {
+                "op": "in",
+                "content": {
+                    "field": "cases.project.program.name",
+                    "value": ["TCGA"],
+                },
+            },
+            {
+                "op": "and",
+                "content": [
+                    {
+                        "op": ">=",
+                        "content": {
+                            "field": "cases.diagnoses.age_at_diagnosis",
+                            "value": 7305,
+                        },
+                    },
+                    {
+                        "op": "<=",
+                        "content": {
+                            "field": "cases.diagnoses.age_at_diagnosis",
+                            "value": 14610,
+                        },
+                    },
+                ],
+            },
+        ],
+    },
+    indent=4,
+)
 # Generate cohort filter JSON from free text
 @spaces.GPU(duration=15)
 def generate_filter(query):
+    if DEBUG:
+        return DUMMY_FILTER
     set_seed(42)
     lm = Transformers(
         model=model,
     cohort_filter = lm["cohort"]
     cohort_filter = json.dumps(json.loads(cohort_filter), indent=4)
     return cohort_filter
     cohort_filter_str = generate_filter(query)
     cohort_filter = json.loads(cohort_filter_str)
+    # Pre-flatten nested ops for easier mapping in next step
+    flattened_ops = []
+    for op in cohort_filter["content"]:
+        # nested `and` can only be 1 deep based on schema
+        if op["op"] == "and":
+            flattened_ops.extend(op["content"])
+        else:
+            flattened_ops.append(op)
+    # Prepare and validate generated filters
+    generated_field_2_values = dict()
+    for op in flattened_ops:
+        assert op["op"] in [
+            "in",
+            "=",
+            "<",
+            ">",
+            "<=",
+            ">=",
+        ], f"Unknown handling for op: {op}"
+        content = op["content"]
+        field, value = content["field"], content["value"]
+        # comparators are ints so can convert to g/lte by add/sub 1
+        if op["op"] == "<":
+            op["op"] = "<="
+            value -= 1
+        elif op["op"] == ">":
+            op["op"] = ">="
+            value += 1
+        elif op["op"] == "=":
+            # convert = to <=,>= ops so it can be filled into card
+            flattened_ops.append(
+                {
+                    "op": "<=",
+                    "content": content,
+                }
+            )
+            flattened_ops.append(
+                {
+                    "op": ">=",
+                    "content": content,
+                }
+            )
+            continue
+        if op["op"] != "in":
+            # comp ops will duplicate name, disambiguate by appending comp
+            field += "_" + op["op"]
+        if field in generated_field_2_values:
+            raise ValueError(f"{field} is ambiguously duplicated")
+        generated_field_2_values[field] = value
+    # Map filter selections to cards
     card_updates = []
     for card_name, card_field in zip(CARD_NAMES, CARD_FIELDS):
         # Need to update all cards so use all possible cards as ref
+        default_values = CARD_2_VALUES[card_name]
+        if isinstance(default_values, list):
+            updated_values = []
+            updated_choices = default_values  # reset value
+            possible_values = set(updated_choices)
+            if card_field in generated_field_2_values:
+                # check ref against generated
+                selected_values = generated_field_2_values.pop(card_field)
+                unmatched_values = []
+                for selected_value in selected_values:
+                    if selected_value in possible_values:
+                        updated_values.append(selected_value)
+                    else:
+                        # model hallucination?
+                        unmatched_values.append(selected_value)
+                if len(unmatched_values) > 0:
+                    generated_field_2_values[card_field] = unmatched_values
+            update_obj = gr.update(
                 choices=updated_choices,
                 value=updated_values,  # will override existing selections
+            )
+        elif isinstance(default_values, dict):
+            # range-slider, maybe other options in the future?
+            assert (
+                default_values["type"] == "range"
+            ), f"Expected range slider for card {card_name}"
+            # Need to handle if model outputs flat range or nested range
+            card_field_gte = card_field + "_>="
+            card_field_lte = card_field + "_<="
+            _min = default_values["min"]
+            _max = default_values["max"]
+            lo = generated_field_2_values.pop(card_field_gte, _min)
+            hi = generated_field_2_values.pop(card_field_lte, _max)
+            assert (
+                lo >= _min
+            ), f"Generated lower bound ({lo}) less than minimum allowable value ({_min})"
+            assert (
+                hi <= _max
+            ), f"Generated upper bound ({hi}) greater than maximum allowable value ({_max})"
+            update_obj = gr.update(value=(lo, hi))
+        else:
+            raise ValueError(f"Unknown values for card {card_name}")
+        card_updates.append(update_obj)
+    # generated_field_2_values will have remaining, unmatched values
+    # edit: updated json schema with enumerated fields prevents unmatched fields
+    print(f"Unmatched values in model generation: {generated_field_2_values}")
     return card_updates + [gr.update(value=cohort_filter_str)]
 # Update JSON based on checkbox selections
+def update_json_from_cards(*selected_filters_per_card):
     ops = []
     for card_name, selected_filters in zip(CARD_NAMES, selected_filters_per_card):
+        # use the default values to determine card type (checkbox, range, etc)
+        default_values = CARD_2_VALUES[card_name]
+        if isinstance(default_values, list):
+            # checkbox
+            if len(selected_filters) > 0:
+                base_values = []
+                for selected_value in selected_filters:
+                    base_value = get_base_value(selected_value)
+                    base_values.append(base_value)
+                content = {
+                    "field": CARD_2_FIELD[card_name],
+                    "value": base_values,
+                }
+                op = {
+                    "op": "in",
+                    "content": content,
+                }
+                ops.append(op)
+        elif isinstance(default_values, dict):
+            # range-slider, maybe other options in the future?
+            assert (
+                default_values["type"] == "range"
+            ), f"Expected range slider for card {card_name}"
+            lo, hi = selected_filters
+            subops = []
+            for val, limit, comp in [
+                (lo, default_values["min"], ">="),
+                (hi, default_values["max"], "<="),
+            ]:
+                # only add range filter if not default
+                if val == limit:
+                    continue
+                subop = {
+                    "op": comp,
+                    "content": {
+                        "field": CARD_2_FIELD[card_name],
+                        "value": int(val),
+                    },
+                }
+                subops.append(subop)
+            if len(subops) > 0:
+                ops.append({"op": "and", "content": subops})
+        else:
+            raise ValueError(f"Unknown values for card {card_name}")
     cohort_filter = {
         "op": "and",
         "content": ops,
 # Execute GDC API query and prepare checkbox + case counter updates
 # Preserve prior selections
+def update_cards_with_counts(cohort_filter: str, *selected_filters_per_card):
     card_2_selections = dict(list(zip(CARD_NAMES, selected_filters_per_card)))
     # Execute GDC API query
     params = {
+        "facets": FACETS_STR,
         "pretty": "false",
         "format": "JSON",
         "size": 0,
     }
     if cohort_filter:
+        # patch for range selectors which use nested `and`
+        # seems `facets` and nested `and` don't play well together
+        # so flatten direct nested `and` for query execution only
+        # this is equivalent since our top-level is always `and`
+        # keeping nested `and` for presentation and model generations though
+        temp = json.loads(cohort_filter)
+        ops = temp["content"]
+        new_ops = []
+        for op in ops:
+            # assumes no deeper than single level nesting
+            if op["op"] == "and":
+                for subop in op["content"]:
+                    new_ops.append(subop)
+            else:
+                new_ops.append(op)
+        temp["content"] = new_ops
+        cohort_filter = json.dumps(temp)
         params["filters"] = cohort_filter
     response = requests.get(GDC_CASES_API_ENDPOINT, params=params)
     if not response.ok:
+        raise Exception(f"API error: {response.status_code}\n{response.json()}")
     temp = response.json()
     # Update checkboxes with bin counts
     for card_name in CARD_NAMES:
         card_field = CARD_2_FIELD[card_name]
         card_field = card_field.replace("cases.", "")
+        card_values = CARD_2_VALUES[card_name]
+        if isinstance(card_values, list):
+            # value checkboxes
+            choice_mapping = {}
+            updated_choices = []
+            card_counts = {
+                x["key"]: x["doc_count"] for x in all_counts[card_field]["buckets"]
+            }
+            for value_name in card_values:
+                if value_name in card_counts:
+                    value_str = prepare_value_count(
+                        value_name,
+                        card_counts[value_name],
+                    )
+                    # track possible choices to use as values
+                    choice_mapping[value_name] = value_str
+                    updated_choices.append(value_str)
+            # Align prior selections with new choices
+            updated_values = []
+            for selected_value in card_2_selections[card_name]:
+                base_value = get_base_value(selected_value)
+                if base_value not in choice_mapping:
+                    # Re-add choices which now presumably have 0 counts
+                    choice_mapping[base_value] = prepare_value_count(base_value, 0)
+                updated_values.append(choice_mapping[base_value])
+            update_obj = gr.update(
                 choices=updated_choices,
                 value=updated_values,
+            )
+        elif isinstance(card_values, dict):
+            # range-slider, maybe other options in the future?
+            assert (
+                card_values["type"] == "range"
+            ), f"Expected range slider for card {card_name}"
+            # for range slider, nothing to actually do!
+            update_obj = gr.update()
+        else:
+            raise ValueError(f"Unknown values for card {card_name}")
+        card_updates.append(update_obj)
     case_count = temp["data"]["pagination"]["total"]
+    return card_updates + [gr.update(value=f"{case_count} Cases")]
 def prepare_value_count(value, count):
         a.click();
         document.body.removeChild(a);
         URL.revokeObjectURL(url);
+        button.innerHTML = 'Export to GDC';
         button.disabled = false;
     }})
     .catch(error => {{
                 min_width=150,
             )
             case_download = gr.Button(
+                value="Export to GDC",
                 min_width=150,
                 elem_id="download-btn",
             )
                 inputs=text_input,
             )
         with gr.Column(scale=4):
+            json_output = gr.Code(
                 label="Cohort Filter JSON",
                 value=json.dumps({"op": "and", "content": []}, indent=4),
+                language="json",
                 interactive=False,
                 show_label=True,
                 container=True,
                 elem_id="json-output",
             )
                 tab_containers.append(tab_container)
                 with gr.Row(elem_classes=["card-group"]):
                     for card in tab["cards"]:
+                        if isinstance(card["values"], list):
+                            filter_card = gr.CheckboxGroup(
+                                choices=[],
+                                label=card["name"],
+                                interactive=True,
+                                elem_classes=["filter-card"],
+                            )
+                        else:
+                            # values is a dictionary and defines some meta options
+                            metaopts = card["values"]
+                            assert (
+                                "type" in metaopts
+                                and metaopts["type"] == "range"
+                                and all(
+                                    k in metaopts
+                                    for k in [
+                                        "min",
+                                        "max",
+                                    ]
+                                )
+                            ), f"Unknown meta options for {card['name']}"
+                            info = "Inclusive range"
+                            if "unit" in metaopts:
+                                info += f", units in {metaopts['unit']}"
+                            filter_card = RangeSlider(
+                                label=card["name"],
+                                info=info,
+                                minimum=metaopts["min"],
+                                maximum=metaopts["max"],
+                                step=1,  # assume integer
+                            )
                         filter_cards.append(filter_card)
     # Assign tab buttons to toggle visibility
     # Load initial counts on startup
     demo.load(
+        fn=update_cards_with_counts,
         inputs=[gr.State("")] + filter_cards,
         outputs=filter_cards + [case_counter],
     )
     # Update checkboxes on filter generation
+    # Also update JSON based on checkboxes
+    # - relying on checkbox update to do this fires multiple times
+    # - also propagates new model selections after json is updated
     # Also this way it shows the model generated JSON
     text_input.submit(
         fn=process_query,
         outputs=filter_cards + [json_output],
     )
+    # Update JSON based on cards
+    # Keep user `input` event listener (vs `change`) otherwise will fire multiple times
+    # Seems like otherwise it should be cyclical, Gradio must have some logic to prevent infinite loops
     for filter_card in filter_cards:
+        if isinstance(filter_card, RangeSlider):
+            filter_card.release(
+                fn=update_json_from_cards,
+                inputs=filter_cards,
+                outputs=json_output,
+            )
+        else:
+            filter_card.input(
+                fn=update_json_from_cards,
+                inputs=filter_cards,
+                outputs=json_output,
+            )
     # Update checkboxes after executing filter query
     json_output.change(
+        fn=update_cards_with_counts,
         inputs=[json_output] + filter_cards,
         outputs=filter_cards + [case_counter],
     )

config.yaml CHANGED Viewed

@@ -670,34 +670,13 @@ tabs:
     - not hispanic or latino
     - not reported
     - unknown
-  # - name: Age at Diagnosis
-  #   field: cases.diagnoses.age_at_diagnosis
-  #   values:
-  #     type: range+checkboxes
-  #     min: 0
-  #     max: 32872
-  #     units: days
-  #     min_label: From
-  #     max_label: To
-  #     popular:
-  #     - label: "\u2265 18,263 to < 21,915 days"
-  #       min: 18263
-  #       max: 21915
-  #     - label: "\u2265 14,610 to < 18,263 days"
-  #       min: 14610
-  #       max: 18263
-  #     - label: "\u2265 10,958 to < 14,610 days"
-  #       min: 10958
-  #       max: 14610
-  #     - label: "\u2265 7,305 to < 10,958 days"
-  #       min: 7305
-  #       max: 10958
-  #     - label: "\u2265 3,653 to < 7,305 days"
-  #       min: 3653
-  #       max: 7305
-  #     - label: "\u2265 0 to < 3,653 days"
-  #       min: 0
-  #       max: 3653
   - name: Vital Status
     field: cases.demographic.vital_status
     values:
@@ -1013,34 +992,12 @@ tabs:
     - 9989/3
     - not reported
     - unknown
-  # - name: Year Of Diagnosis
-  #   field: cases.diagnoses.year_of_diagnosis
-  #   values:
-  #     type: range+checkboxes
-  #     min: 1900
-  #     max: 2050
-  #     units: years
-  #     min_label: From
-  #     max_label: To
-  #     popular:
-  #     - label: "\u2265 2020 to < 2030"
-  #       min: 2020
-  #       max: 2030
-  #     - label: "\u2265 2010 to < 2020"
-  #       min: 2010
-  #       max: 2020
-  #     - label: "\u2265 2000 to < 2010"
-  #       min: 2000
-  #       max: 2010
-  #     - label: "\u2265 1990 to < 2000"
-  #       min: 1990
-  #       max: 2000
-  #     - label: "\u2265 1980 to < 1990"
-  #       min: 1980
-  #       max: 1990
-  #     - label: "\u2265 1970 to < 1980"
-  #       min: 1970
-  #       max: 1980
   - name: Site Of Resection Or Biopsy
     field: cases.diagnoses.site_of_resection_or_biopsy
     values:
@@ -1889,51 +1846,25 @@ tabs:
     - not reported
     - smoking history not documented
     - unknown
-  # - name: Cigarettes Per Day
-  #   field: cases.exposures.cigarettes_per_day
-  #   values:
-  #     type: range
-  #     min: 0
-  #     max: 99999
-  #     unit: days
-  #     min_label: From
-  #     max_label: To
-  # - name: Pack Years Smoked
-  #   field: cases.exposures.pack_years_smoked
-  #   values:
-  #     type: range
-  #     min: 0
-  #     max: 999999
-  #     unit: years
-  #     min_label: From
-  #     max_label: To
-  # - name: Tobacco Smoking Onset Year
-  #   field: cases.exposures.tobacco_smoking_onset_year
-  #   values:
-  #     type: range+checkboxes
-  #     min: 1950
-  #     max: 2050
-  #     min_label: From
-  #     max_label: To
-  #     popular:
-  #     - label: '>= 1970 to < 1980 yrs'
-  #       min: 1970
-  #       max: 1980
-  #     - label: '>= 1960 to < 1970 yrs'
-  #       min: 1960
-  #       max: 1970
-  #     - label: '>= 1950 to < 1960 yrs'
-  #       min: 1950
-  #       max: 1960
-  #     - label: '>= 1940 to < 1950 yrs'
-  #       min: 1940
-  #       max: 1950
-  #     - label: '>= 1930 to < 1940 yrs'
-  #       min: 1930
-  #       max: 1940
-  #     - label: '>= 1920 to < 1930 yrs'
-  #       min: 1920
-  #       max: 1930
 - name: Biospecimen
   cards:
   - name: Tissue Type

     - not hispanic or latino
     - not reported
     - unknown
+  - name: Age at Diagnosis
+    field: cases.diagnoses.age_at_diagnosis
+    values:
+      type: range
+      min: 0
+      max: 32872
+      unit: days
   - name: Vital Status
     field: cases.demographic.vital_status
     values:
     - 9989/3
     - not reported
     - unknown
+  - name: Year Of Diagnosis
+    field: cases.diagnoses.year_of_diagnosis
+    values:
+      type: range
+      min: 1900
+      max: 2050
   - name: Site Of Resection Or Biopsy
     field: cases.diagnoses.site_of_resection_or_biopsy
     values:
     - not reported
     - smoking history not documented
     - unknown
+  - name: Cigarettes Per Day
+    field: cases.exposures.cigarettes_per_day
+    values:
+      type: range
+      min: 0
+      max: 99999
+  - name: Pack Years Smoked
+    field: cases.exposures.pack_years_smoked
+    values:
+      type: range
+      min: 0
+      max: 999999
+      unit: pack years
+  - name: Tobacco Smoking Onset Year
+    field: cases.exposures.tobacco_smoking_onset_year
+    values:
+      type: range
+      min: 1950
+      max: 2050
 - name: Biospecimen
   cards:
   - name: Tissue Type

requirements.txt CHANGED Viewed

@@ -2,4 +2,5 @@ torch==2.5.1
 transformers==4.50.0
 gradio==5.35.0
 guidance==0.2.4
 spaces

 transformers==4.50.0
 gradio==5.35.0
 guidance==0.2.4
+gradio_rangeslider
 spaces

schema.py CHANGED Viewed

@@ -7,6 +7,9 @@ from pydantic import BaseModel, Field, StringConstraints
 class InnerOp(Enum):
     IN = "in"
     EQ = "="
     LT = "<"
     GT = ">"
@@ -77,9 +80,13 @@ class InnerStrContent(BaseModel):
 class Inner(BaseModel):
     op: InnerOp
     content: (
-        InnerStrContent
-        | InnerAgeDxContent
         | InnerYearDxContent
         | InnerCigDayContent
         | InnerPackYrContent
@@ -89,9 +96,9 @@ class Inner(BaseModel):
 class Middle(BaseModel):
     op: MiddleOp
-    content: list[Inner]
 class GDCCohortSchema(BaseModel):
     op: OuterOp
-    content: list[Inner | Middle]

 class InnerOp(Enum):
     IN = "in"
+class InnerRangeOp(Enum):
     EQ = "="
     LT = "<"
     GT = ">"
 class Inner(BaseModel):
     op: InnerOp
+    content: InnerStrContent
+class InnerRange(BaseModel):
+    op: InnerRangeOp
     content: (
+        InnerAgeDxContent
         | InnerYearDxContent
         | InnerCigDayContent
         | InnerPackYrContent
 class Middle(BaseModel):
     op: MiddleOp
+    content: list[Inner | InnerRange]
 class GDCCohortSchema(BaseModel):
     op: OuterOp
+    content: list[Middle | Inner | InnerRange]

style.css CHANGED Viewed

@@ -19,10 +19,14 @@
     font-size: calc(var(--block-title-text-size) + 2px);
 }
-#json-output textarea {
     height: 96px !important;
 }
 .card-group, .card-group > div {
     background-color: transparent;
     border: 0px;
@@ -77,7 +81,7 @@
     border: 0;
 }
-.filter-card::-webkit-scrollbar, #json-output textarea::-webkit-scrollbar {
     -webkit-appearance: none;
     width: 7px;
 }
@@ -85,7 +89,7 @@
     border-radius: 4px;
     background-color: rgba(150,150,150,.5);
     -webkit-box-shadow: 0 0 1px rgba(255,255,255,.5);
-}
 @keyframes spin {
   to {

     font-size: calc(var(--block-title-text-size) + 2px);
 }
+#json-output {
     height: 96px !important;
 }
+#json-output > div:last-of-type {
+    height: 80% !important;
+}
 .card-group, .card-group > div {
     background-color: transparent;
     border: 0px;
     border: 0;
 }
+/* .filter-card::-webkit-scrollbar, #json-output textarea::-webkit-scrollbar {
     -webkit-appearance: none;
     width: 7px;
 }
     border-radius: 4px;
     background-color: rgba(150,150,150,.5);
     -webkit-box-shadow: 0 0 1px rgba(255,255,255,.5);
+} */
 @keyframes spin {
   to {