Spaces:
Running
on
Zero
Running
on
Zero
correct schema
Browse files
app.py
CHANGED
|
@@ -10,6 +10,12 @@ from guidance.models import Transformers
|
|
| 10 |
from schema import GDCCohortSchema
|
| 11 |
from transformers import AutoTokenizer, GPT2LMHeadModel, set_seed
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
GDC_CASES_API_ENDPOINT = "https://api.gdc.cancer.gov/cases"
|
| 14 |
MODEL_NAME = "labadvcomp/gdc-cohort-llm-gpt2-1m-dev"
|
| 15 |
# TOKENIZER_NAME = MODEL_NAME # TODO figure out why tokenizer breaks model??
|
|
@@ -270,11 +276,7 @@ with gr.Blocks(css_paths="style.css") as demo:
|
|
| 270 |
with gr.Row(equal_height=True):
|
| 271 |
with gr.Column(scale=1, min_width=250):
|
| 272 |
gr.Examples(
|
| 273 |
-
examples=
|
| 274 |
-
"bam files for TCGA-BRCA",
|
| 275 |
-
"kidney or adrenal gland cancers with alcohol history",
|
| 276 |
-
"tumor samples from male patients acute myeloid lymphoma",
|
| 277 |
-
],
|
| 278 |
inputs=text_input,
|
| 279 |
)
|
| 280 |
with gr.Column(scale=4):
|
|
@@ -290,6 +292,13 @@ with gr.Blocks(css_paths="style.css") as demo:
|
|
| 290 |
elem_id="json-output",
|
| 291 |
)
|
| 292 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 293 |
with gr.Row():
|
| 294 |
# Tab selectors
|
| 295 |
tab_buttons = []
|
|
|
|
| 10 |
from schema import GDCCohortSchema
|
| 11 |
from transformers import AutoTokenizer, GPT2LMHeadModel, set_seed
|
| 12 |
|
| 13 |
+
EXAMPLE_INPUTS = [
|
| 14 |
+
"bam files for TCGA-BRCA",
|
| 15 |
+
"kidney or adrenal gland cancers with alcohol history",
|
| 16 |
+
"tumor samples from male patients with acute myeloid lymphoma",
|
| 17 |
+
]
|
| 18 |
+
|
| 19 |
GDC_CASES_API_ENDPOINT = "https://api.gdc.cancer.gov/cases"
|
| 20 |
MODEL_NAME = "labadvcomp/gdc-cohort-llm-gpt2-1m-dev"
|
| 21 |
# TOKENIZER_NAME = MODEL_NAME # TODO figure out why tokenizer breaks model??
|
|
|
|
| 276 |
with gr.Row(equal_height=True):
|
| 277 |
with gr.Column(scale=1, min_width=250):
|
| 278 |
gr.Examples(
|
| 279 |
+
examples=EXAMPLE_INPUTS,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 280 |
inputs=text_input,
|
| 281 |
)
|
| 282 |
with gr.Column(scale=4):
|
|
|
|
| 292 |
elem_id="json-output",
|
| 293 |
)
|
| 294 |
|
| 295 |
+
with gr.Row():
|
| 296 |
+
gr.Markdown(
|
| 297 |
+
"The generated cohort filter will autopopulate into the filter cards below. "
|
| 298 |
+
"Refine your search using the interactive checkboxes. "
|
| 299 |
+
"Note that many other options can be found by selecting the different tabs on the left."
|
| 300 |
+
)
|
| 301 |
+
|
| 302 |
with gr.Row():
|
| 303 |
# Tab selectors
|
| 304 |
tab_buttons = []
|
schema.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
from enum import Enum
|
| 2 |
-
from typing import Annotated, Literal
|
| 3 |
|
|
|
|
| 4 |
from pydantic import BaseModel, Field, StringConstraints
|
| 5 |
|
| 6 |
|
|
@@ -21,11 +22,6 @@ class OuterOp(Enum):
|
|
| 21 |
AND = "and"
|
| 22 |
|
| 23 |
|
| 24 |
-
class InnerStrContent(BaseModel):
|
| 25 |
-
field: Annotated[str, StringConstraints(min_length=1, max_length=64)]
|
| 26 |
-
value: list[Annotated[str, StringConstraints(min_length=1, max_length=128)]]
|
| 27 |
-
|
| 28 |
-
|
| 29 |
class InnerAgeDxContent(BaseModel):
|
| 30 |
field: Literal["cases.diagnoses.age_at_diagnosis"]
|
| 31 |
value: Annotated[int, Field(ge=0, le=32872)]
|
|
@@ -51,6 +47,34 @@ class InnerCigStrtContent(BaseModel):
|
|
| 51 |
value: Annotated[int, Field(ge=1900, le=2050)]
|
| 52 |
|
| 53 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
class Inner(BaseModel):
|
| 55 |
op: InnerOp
|
| 56 |
content: (
|
|
|
|
| 1 |
from enum import Enum
|
| 2 |
+
from typing import Annotated, Literal, get_args, get_type_hints
|
| 3 |
|
| 4 |
+
import yaml
|
| 5 |
from pydantic import BaseModel, Field, StringConstraints
|
| 6 |
|
| 7 |
|
|
|
|
| 22 |
AND = "and"
|
| 23 |
|
| 24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
class InnerAgeDxContent(BaseModel):
|
| 26 |
field: Literal["cases.diagnoses.age_at_diagnosis"]
|
| 27 |
value: Annotated[int, Field(ge=0, le=32872)]
|
|
|
|
| 47 |
value: Annotated[int, Field(ge=1900, le=2050)]
|
| 48 |
|
| 49 |
|
| 50 |
+
excluded_values = set()
|
| 51 |
+
for alt_inner in [
|
| 52 |
+
InnerYearDxContent,
|
| 53 |
+
InnerCigDayContent,
|
| 54 |
+
InnerPackYrContent,
|
| 55 |
+
InnerCigStrtContent,
|
| 56 |
+
]:
|
| 57 |
+
field_type = get_type_hints(alt_inner)["field"]
|
| 58 |
+
excluded_values |= set(get_args(field_type))
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
with open("config.yaml", "r") as f:
|
| 62 |
+
CONFIG = yaml.safe_load(f)
|
| 63 |
+
all_other_fields = tuple(
|
| 64 |
+
[
|
| 65 |
+
card["field"]
|
| 66 |
+
for tab in CONFIG["tabs"]
|
| 67 |
+
for card in tab["cards"]
|
| 68 |
+
if card["field"] not in excluded_values
|
| 69 |
+
]
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
class InnerStrContent(BaseModel):
|
| 74 |
+
field: Literal[all_other_fields] # type: ignore - runtime literal
|
| 75 |
+
value: list[Annotated[str, StringConstraints(min_length=1, max_length=128)]]
|
| 76 |
+
|
| 77 |
+
|
| 78 |
class Inner(BaseModel):
|
| 79 |
op: InnerOp
|
| 80 |
content: (
|