songs1 commited on
Commit
62c31e6
·
1 Parent(s): ab14f8c

correct schema

Browse files
Files changed (2) hide show
  1. app.py +14 -5
  2. schema.py +30 -6
app.py CHANGED
@@ -10,6 +10,12 @@ from guidance.models import Transformers
10
  from schema import GDCCohortSchema
11
  from transformers import AutoTokenizer, GPT2LMHeadModel, set_seed
12
 
 
 
 
 
 
 
13
  GDC_CASES_API_ENDPOINT = "https://api.gdc.cancer.gov/cases"
14
  MODEL_NAME = "labadvcomp/gdc-cohort-llm-gpt2-1m-dev"
15
  # TOKENIZER_NAME = MODEL_NAME # TODO figure out why tokenizer breaks model??
@@ -270,11 +276,7 @@ with gr.Blocks(css_paths="style.css") as demo:
270
  with gr.Row(equal_height=True):
271
  with gr.Column(scale=1, min_width=250):
272
  gr.Examples(
273
- examples=[
274
- "bam files for TCGA-BRCA",
275
- "kidney or adrenal gland cancers with alcohol history",
276
- "tumor samples from male patients acute myeloid lymphoma",
277
- ],
278
  inputs=text_input,
279
  )
280
  with gr.Column(scale=4):
@@ -290,6 +292,13 @@ with gr.Blocks(css_paths="style.css") as demo:
290
  elem_id="json-output",
291
  )
292
 
 
 
 
 
 
 
 
293
  with gr.Row():
294
  # Tab selectors
295
  tab_buttons = []
 
10
  from schema import GDCCohortSchema
11
  from transformers import AutoTokenizer, GPT2LMHeadModel, set_seed
12
 
13
+ EXAMPLE_INPUTS = [
14
+ "bam files for TCGA-BRCA",
15
+ "kidney or adrenal gland cancers with alcohol history",
16
+ "tumor samples from male patients with acute myeloid lymphoma",
17
+ ]
18
+
19
  GDC_CASES_API_ENDPOINT = "https://api.gdc.cancer.gov/cases"
20
  MODEL_NAME = "labadvcomp/gdc-cohort-llm-gpt2-1m-dev"
21
  # TOKENIZER_NAME = MODEL_NAME # TODO figure out why tokenizer breaks model??
 
276
  with gr.Row(equal_height=True):
277
  with gr.Column(scale=1, min_width=250):
278
  gr.Examples(
279
+ examples=EXAMPLE_INPUTS,
 
 
 
 
280
  inputs=text_input,
281
  )
282
  with gr.Column(scale=4):
 
292
  elem_id="json-output",
293
  )
294
 
295
+ with gr.Row():
296
+ gr.Markdown(
297
+ "The generated cohort filter will autopopulate into the filter cards below. "
298
+ "Refine your search using the interactive checkboxes. "
299
+ "Note that many other options can be found by selecting the different tabs on the left."
300
+ )
301
+
302
  with gr.Row():
303
  # Tab selectors
304
  tab_buttons = []
schema.py CHANGED
@@ -1,6 +1,7 @@
1
  from enum import Enum
2
- from typing import Annotated, Literal
3
 
 
4
  from pydantic import BaseModel, Field, StringConstraints
5
 
6
 
@@ -21,11 +22,6 @@ class OuterOp(Enum):
21
  AND = "and"
22
 
23
 
24
- class InnerStrContent(BaseModel):
25
- field: Annotated[str, StringConstraints(min_length=1, max_length=64)]
26
- value: list[Annotated[str, StringConstraints(min_length=1, max_length=128)]]
27
-
28
-
29
  class InnerAgeDxContent(BaseModel):
30
  field: Literal["cases.diagnoses.age_at_diagnosis"]
31
  value: Annotated[int, Field(ge=0, le=32872)]
@@ -51,6 +47,34 @@ class InnerCigStrtContent(BaseModel):
51
  value: Annotated[int, Field(ge=1900, le=2050)]
52
 
53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  class Inner(BaseModel):
55
  op: InnerOp
56
  content: (
 
1
  from enum import Enum
2
+ from typing import Annotated, Literal, get_args, get_type_hints
3
 
4
+ import yaml
5
  from pydantic import BaseModel, Field, StringConstraints
6
 
7
 
 
22
  AND = "and"
23
 
24
 
 
 
 
 
 
25
  class InnerAgeDxContent(BaseModel):
26
  field: Literal["cases.diagnoses.age_at_diagnosis"]
27
  value: Annotated[int, Field(ge=0, le=32872)]
 
47
  value: Annotated[int, Field(ge=1900, le=2050)]
48
 
49
 
50
+ excluded_values = set()
51
+ for alt_inner in [
52
+ InnerYearDxContent,
53
+ InnerCigDayContent,
54
+ InnerPackYrContent,
55
+ InnerCigStrtContent,
56
+ ]:
57
+ field_type = get_type_hints(alt_inner)["field"]
58
+ excluded_values |= set(get_args(field_type))
59
+
60
+
61
+ with open("config.yaml", "r") as f:
62
+ CONFIG = yaml.safe_load(f)
63
+ all_other_fields = tuple(
64
+ [
65
+ card["field"]
66
+ for tab in CONFIG["tabs"]
67
+ for card in tab["cards"]
68
+ if card["field"] not in excluded_values
69
+ ]
70
+ )
71
+
72
+
73
+ class InnerStrContent(BaseModel):
74
+ field: Literal[all_other_fields] # type: ignore - runtime literal
75
+ value: list[Annotated[str, StringConstraints(min_length=1, max_length=128)]]
76
+
77
+
78
  class Inner(BaseModel):
79
  op: InnerOp
80
  content: (