# HITL-KG Configuration File # Copy to config.yaml and customize # Server settings host: "0.0.0.0" port: 7860 debug: false # Paths (relative to app root) data_dir: "./data" cache_dir: "./data/cache" sessions_dir: "./data/sessions" # Session management session_max_age_hours: 24 session_cleanup_interval_minutes: 5 max_sessions: 1000 # Default language default_language: "en" supported_languages: - "en" - "uk" - "ru" - "es" - "de" - "fr" # Embedding configuration embedding: # Multilingual model supporting 50+ languages model_name: "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2" cache_dir: "./data/embeddings" dimension: 384 batch_size: 32 device: "cpu" # "cpu", "cuda", or "mps" (Apple Silicon) # LLM configuration llm: provider: "local" # "openai" or "local" model: "gpt-4o-mini" temperature: 0.7 max_tokens: 2048 # api_key: "" # Or set OPENAI_API_KEY environment variable # Datasets configuration # The system supports multiple dataset formats: obo, csv, json datasets: # Disease Ontology (DOID) - name: "disease_ontology" source_type: "obo" source_url: "https://raw.githubusercontent.com/DiseaseOntology/HumanDiseaseOntology/main/src/ontology/doid.obo" entity_category: "disease" cache_enabled: true cache_max_age_days: 7 # Symptom Ontology (SYMP) - name: "symptom_ontology" source_type: "obo" source_url: "https://raw.githubusercontent.com/DiseaseOntology/SymptomOntology/main/symp.obo" entity_category: "symptom" cache_enabled: true cache_max_age_days: 7 # Example: Custom CSV dataset (uncomment to use) # - name: "custom_symptoms" # source_type: "csv" # source_path: "./data/custom_symptoms.csv" # entity_category: "symptom" # cache_enabled: false # Advanced settings # embedding: # # For domain-specific embeddings, consider: # # - "dmis-lab/biobert-base-cased-v1.2" (biomedical) # # - "emilyalsentzer/Bio_ClinicalBERT" (clinical) # model_name: "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"