1use crate::{ModelConfig, ModelStats, TrainingStats, Triple};
4use scirs2_core::ndarray_ext::Array1;
5use serde::{Deserialize, Serialize};
6use std::collections::HashMap;
7use uuid::Uuid;
8
9#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
10pub enum BiomedicalEntityType {
11 Gene,
12 Protein,
13 Disease,
14 Drug,
15 Compound,
16 Pathway,
17 Cell,
18 Tissue,
19 Organ,
20 Phenotype,
21 GoTerm,
22 MeshTerm,
23 SnomedCt,
24 IcdCode,
25}
26
27impl BiomedicalEntityType {
28 pub fn namespace(&self) -> &'static str {
30 match self {
31 BiomedicalEntityType::Gene => "gene",
32 BiomedicalEntityType::Protein => "protein",
33 BiomedicalEntityType::Disease => "disease",
34 BiomedicalEntityType::Drug => "drug",
35 BiomedicalEntityType::Compound => "compound",
36 BiomedicalEntityType::Pathway => "pathway",
37 BiomedicalEntityType::Cell => "cell",
38 BiomedicalEntityType::Tissue => "tissue",
39 BiomedicalEntityType::Organ => "organ",
40 BiomedicalEntityType::Phenotype => "phenotype",
41 BiomedicalEntityType::GoTerm => "go",
42 BiomedicalEntityType::MeshTerm => "mesh",
43 BiomedicalEntityType::SnomedCt => "snomed",
44 BiomedicalEntityType::IcdCode => "icd",
45 }
46 }
47
48 pub fn from_iri(iri: &str) -> Option<Self> {
50 if iri.contains("gene") || iri.contains("HGNC") {
51 Some(BiomedicalEntityType::Gene)
52 } else if iri.contains("protein") || iri.contains("UniProt") {
53 Some(BiomedicalEntityType::Protein)
54 } else if iri.contains("disease") || iri.contains("OMIM") || iri.contains("DOID") {
55 Some(BiomedicalEntityType::Disease)
56 } else if iri.contains("drug") || iri.contains("DrugBank") {
57 Some(BiomedicalEntityType::Drug)
58 } else if iri.contains("compound") || iri.contains("CHEBI") {
59 Some(BiomedicalEntityType::Compound)
60 } else if iri.contains("pathway") || iri.contains("KEGG") || iri.contains("Reactome") {
61 Some(BiomedicalEntityType::Pathway)
62 } else if iri.contains("GO:") {
63 Some(BiomedicalEntityType::GoTerm)
64 } else if iri.contains("MESH") {
65 Some(BiomedicalEntityType::MeshTerm)
66 } else if iri.contains("SNOMED") {
67 Some(BiomedicalEntityType::SnomedCt)
68 } else if iri.contains("ICD") {
69 Some(BiomedicalEntityType::IcdCode)
70 } else {
71 None
72 }
73 }
74}
75
76#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
78pub enum BiomedicalRelationType {
79 CausesDisease,
81 AssociatedWithDisease,
82 PredisposesToDisease,
83 TargetsProtein,
85 InhibitsProtein,
86 ActivatesProtein,
87 BindsToProtein,
88 ParticipatesInPathway,
90 RegulatesPathway,
91 UpstreamOfPathway,
92 DownstreamOfPathway,
93 InteractsWith,
95 PhysicallyInteractsWith,
96 FunctionallyInteractsWith,
97 MetabolizedBy,
99 TransportedBy,
100 Catalyzes,
101 IsASubtypeOf,
103 PartOf,
104 HasPhenotype,
105 ExpressedIn,
107 Overexpressed,
108 Underexpressed,
109}
110
111impl BiomedicalRelationType {
112 pub fn from_iri(iri: &str) -> Option<Self> {
114 match iri.to_lowercase().as_str() {
115 s if s.contains("causes") => Some(BiomedicalRelationType::CausesDisease),
116 s if s.contains("associated_with") => {
117 Some(BiomedicalRelationType::AssociatedWithDisease)
118 }
119 s if s.contains("targets") => Some(BiomedicalRelationType::TargetsProtein),
120 s if s.contains("inhibits") => Some(BiomedicalRelationType::InhibitsProtein),
121 s if s.contains("activates") => Some(BiomedicalRelationType::ActivatesProtein),
122 s if s.contains("binds") => Some(BiomedicalRelationType::BindsToProtein),
123 s if s.contains("participates") => Some(BiomedicalRelationType::ParticipatesInPathway),
124 s if s.contains("interacts") => Some(BiomedicalRelationType::InteractsWith),
125 s if s.contains("metabolized") => Some(BiomedicalRelationType::MetabolizedBy),
126 s if s.contains("expressed") => Some(BiomedicalRelationType::ExpressedIn),
127 s if s.contains("subtype") => Some(BiomedicalRelationType::IsASubtypeOf),
128 s if s.contains("part_of") => Some(BiomedicalRelationType::PartOf),
129 _ => None,
130 }
131 }
132}
133
134#[derive(Debug, Clone, Serialize, Deserialize)]
136pub struct BiomedicalEmbeddingConfig {
137 pub base_config: ModelConfig,
138 pub gene_disease_weight: f32,
140 pub drug_target_weight: f32,
142 pub pathway_weight: f32,
144 pub protein_interaction_weight: f32,
146 pub use_sequence_similarity: bool,
148 pub use_chemical_structure: bool,
150 pub use_taxonomy: bool,
152 pub use_temporal_features: bool,
154 pub species_filter: Option<String>,
156}
157
158impl Default for BiomedicalEmbeddingConfig {
159 fn default() -> Self {
160 Self {
161 base_config: ModelConfig::default(),
162 gene_disease_weight: 2.0,
163 drug_target_weight: 1.5,
164 pathway_weight: 1.2,
165 protein_interaction_weight: 1.0,
166 use_sequence_similarity: true,
167 use_chemical_structure: true,
168 use_taxonomy: true,
169 use_temporal_features: false,
170 species_filter: Some("Homo sapiens".to_string()),
171 }
172 }
173}
174
175#[derive(Debug, Clone, Serialize, Deserialize)]
177pub struct BiomedicalEmbedding {
178 pub config: BiomedicalEmbeddingConfig,
179 pub model_id: Uuid,
180 pub gene_embeddings: HashMap<String, Array1<f32>>,
182 pub protein_embeddings: HashMap<String, Array1<f32>>,
183 pub disease_embeddings: HashMap<String, Array1<f32>>,
184 pub drug_embeddings: HashMap<String, Array1<f32>>,
185 pub compound_embeddings: HashMap<String, Array1<f32>>,
186 pub pathway_embeddings: HashMap<String, Array1<f32>>,
187 pub relation_embeddings: HashMap<String, Array1<f32>>,
189 pub entity_types: HashMap<String, BiomedicalEntityType>,
191 pub relation_types: HashMap<String, BiomedicalRelationType>,
193 pub triples: Vec<Triple>,
195 pub features: BiomedicalFeatures,
197 pub training_stats: TrainingStats,
199 pub model_stats: ModelStats,
200 pub is_trained: bool,
201}
202
203#[derive(Debug, Clone, Default, Serialize, Deserialize)]
205pub struct BiomedicalFeatures {
206 pub gene_disease_associations: HashMap<(String, String), f32>,
208 pub drug_target_affinities: HashMap<(String, String), f32>,
210 pub pathway_memberships: HashMap<(String, String), f32>,
212 pub protein_interactions: HashMap<(String, String), f32>,
214 pub sequence_similarities: HashMap<(String, String), f32>,
216 pub structure_similarities: HashMap<(String, String), f32>,
218 pub expression_correlations: HashMap<(String, String), f32>,
220 pub tissue_expression: HashMap<(String, String), f32>,
222}