oxirs_embed/biomedical_embeddings/
types.rs

1//! Module for biomedical embeddings
2
3use crate::{ModelConfig, ModelStats, TrainingStats, Triple};
4use scirs2_core::ndarray_ext::Array1;
5use serde::{Deserialize, Serialize};
6use std::collections::HashMap;
7use uuid::Uuid;
8
9#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
10pub enum BiomedicalEntityType {
11    Gene,
12    Protein,
13    Disease,
14    Drug,
15    Compound,
16    Pathway,
17    Cell,
18    Tissue,
19    Organ,
20    Phenotype,
21    GoTerm,
22    MeshTerm,
23    SnomedCt,
24    IcdCode,
25}
26
27impl BiomedicalEntityType {
28    /// Get the namespace prefix for this entity type
29    pub fn namespace(&self) -> &'static str {
30        match self {
31            BiomedicalEntityType::Gene => "gene",
32            BiomedicalEntityType::Protein => "protein",
33            BiomedicalEntityType::Disease => "disease",
34            BiomedicalEntityType::Drug => "drug",
35            BiomedicalEntityType::Compound => "compound",
36            BiomedicalEntityType::Pathway => "pathway",
37            BiomedicalEntityType::Cell => "cell",
38            BiomedicalEntityType::Tissue => "tissue",
39            BiomedicalEntityType::Organ => "organ",
40            BiomedicalEntityType::Phenotype => "phenotype",
41            BiomedicalEntityType::GoTerm => "go",
42            BiomedicalEntityType::MeshTerm => "mesh",
43            BiomedicalEntityType::SnomedCt => "snomed",
44            BiomedicalEntityType::IcdCode => "icd",
45        }
46    }
47
48    /// Parse entity type from IRI
49    pub fn from_iri(iri: &str) -> Option<Self> {
50        if iri.contains("gene") || iri.contains("HGNC") {
51            Some(BiomedicalEntityType::Gene)
52        } else if iri.contains("protein") || iri.contains("UniProt") {
53            Some(BiomedicalEntityType::Protein)
54        } else if iri.contains("disease") || iri.contains("OMIM") || iri.contains("DOID") {
55            Some(BiomedicalEntityType::Disease)
56        } else if iri.contains("drug") || iri.contains("DrugBank") {
57            Some(BiomedicalEntityType::Drug)
58        } else if iri.contains("compound") || iri.contains("CHEBI") {
59            Some(BiomedicalEntityType::Compound)
60        } else if iri.contains("pathway") || iri.contains("KEGG") || iri.contains("Reactome") {
61            Some(BiomedicalEntityType::Pathway)
62        } else if iri.contains("GO:") {
63            Some(BiomedicalEntityType::GoTerm)
64        } else if iri.contains("MESH") {
65            Some(BiomedicalEntityType::MeshTerm)
66        } else if iri.contains("SNOMED") {
67            Some(BiomedicalEntityType::SnomedCt)
68        } else if iri.contains("ICD") {
69            Some(BiomedicalEntityType::IcdCode)
70        } else {
71            None
72        }
73    }
74}
75
76/// Biomedical relation types for specialized handling
77#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
78pub enum BiomedicalRelationType {
79    /// Gene-disease associations
80    CausesDisease,
81    AssociatedWithDisease,
82    PredisposesToDisease,
83    /// Drug-target interactions
84    TargetsProtein,
85    InhibitsProtein,
86    ActivatesProtein,
87    BindsToProtein,
88    /// Pathway relationships
89    ParticipatesInPathway,
90    RegulatesPathway,
91    UpstreamOfPathway,
92    DownstreamOfPathway,
93    /// Protein interactions
94    InteractsWith,
95    PhysicallyInteractsWith,
96    FunctionallyInteractsWith,
97    /// Chemical relationships
98    MetabolizedBy,
99    TransportedBy,
100    Catalyzes,
101    /// Hierarchical relationships
102    IsASubtypeOf,
103    PartOf,
104    HasPhenotype,
105    /// Expression relationships
106    ExpressedIn,
107    Overexpressed,
108    Underexpressed,
109}
110
111impl BiomedicalRelationType {
112    /// Parse relation type from predicate IRI
113    pub fn from_iri(iri: &str) -> Option<Self> {
114        match iri.to_lowercase().as_str() {
115            s if s.contains("causes") => Some(BiomedicalRelationType::CausesDisease),
116            s if s.contains("associated_with") => {
117                Some(BiomedicalRelationType::AssociatedWithDisease)
118            }
119            s if s.contains("targets") => Some(BiomedicalRelationType::TargetsProtein),
120            s if s.contains("inhibits") => Some(BiomedicalRelationType::InhibitsProtein),
121            s if s.contains("activates") => Some(BiomedicalRelationType::ActivatesProtein),
122            s if s.contains("binds") => Some(BiomedicalRelationType::BindsToProtein),
123            s if s.contains("participates") => Some(BiomedicalRelationType::ParticipatesInPathway),
124            s if s.contains("interacts") => Some(BiomedicalRelationType::InteractsWith),
125            s if s.contains("metabolized") => Some(BiomedicalRelationType::MetabolizedBy),
126            s if s.contains("expressed") => Some(BiomedicalRelationType::ExpressedIn),
127            s if s.contains("subtype") => Some(BiomedicalRelationType::IsASubtypeOf),
128            s if s.contains("part_of") => Some(BiomedicalRelationType::PartOf),
129            _ => None,
130        }
131    }
132}
133
134/// Configuration for biomedical embeddings
135#[derive(Debug, Clone, Serialize, Deserialize)]
136pub struct BiomedicalEmbeddingConfig {
137    pub base_config: ModelConfig,
138    /// Weight for gene-disease associations
139    pub gene_disease_weight: f32,
140    /// Weight for drug-target interactions
141    pub drug_target_weight: f32,
142    /// Weight for pathway relationships
143    pub pathway_weight: f32,
144    /// Weight for protein interactions
145    pub protein_interaction_weight: f32,
146    /// Enable sequence similarity features
147    pub use_sequence_similarity: bool,
148    /// Enable chemical structure features
149    pub use_chemical_structure: bool,
150    /// Enable taxonomic hierarchy
151    pub use_taxonomy: bool,
152    /// Enable temporal relationships
153    pub use_temporal_features: bool,
154    /// Species filter (e.g., "Homo sapiens", "Mus musculus")
155    pub species_filter: Option<String>,
156}
157
158impl Default for BiomedicalEmbeddingConfig {
159    fn default() -> Self {
160        Self {
161            base_config: ModelConfig::default(),
162            gene_disease_weight: 2.0,
163            drug_target_weight: 1.5,
164            pathway_weight: 1.2,
165            protein_interaction_weight: 1.0,
166            use_sequence_similarity: true,
167            use_chemical_structure: true,
168            use_taxonomy: true,
169            use_temporal_features: false,
170            species_filter: Some("Homo sapiens".to_string()),
171        }
172    }
173}
174
175/// Biomedical knowledge graph embedding model
176#[derive(Debug, Clone, Serialize, Deserialize)]
177pub struct BiomedicalEmbedding {
178    pub config: BiomedicalEmbeddingConfig,
179    pub model_id: Uuid,
180    /// Entity embeddings by type
181    pub gene_embeddings: HashMap<String, Array1<f32>>,
182    pub protein_embeddings: HashMap<String, Array1<f32>>,
183    pub disease_embeddings: HashMap<String, Array1<f32>>,
184    pub drug_embeddings: HashMap<String, Array1<f32>>,
185    pub compound_embeddings: HashMap<String, Array1<f32>>,
186    pub pathway_embeddings: HashMap<String, Array1<f32>>,
187    /// Relation embeddings by type
188    pub relation_embeddings: HashMap<String, Array1<f32>>,
189    /// Entity type mappings
190    pub entity_types: HashMap<String, BiomedicalEntityType>,
191    /// Relation type mappings
192    pub relation_types: HashMap<String, BiomedicalRelationType>,
193    /// Training data
194    pub triples: Vec<Triple>,
195    /// Biomedical-specific features
196    pub features: BiomedicalFeatures,
197    /// Training and model stats
198    pub training_stats: TrainingStats,
199    pub model_stats: ModelStats,
200    pub is_trained: bool,
201}
202
203/// Biomedical-specific features for enhanced embeddings
204#[derive(Debug, Clone, Default, Serialize, Deserialize)]
205pub struct BiomedicalFeatures {
206    /// Gene-disease association scores
207    pub gene_disease_associations: HashMap<(String, String), f32>,
208    /// Drug-target binding affinities
209    pub drug_target_affinities: HashMap<(String, String), f32>,
210    /// Pathway membership scores
211    pub pathway_memberships: HashMap<(String, String), f32>,
212    /// Protein-protein interaction scores
213    pub protein_interactions: HashMap<(String, String), f32>,
214    /// Sequence similarity scores
215    pub sequence_similarities: HashMap<(String, String), f32>,
216    /// Chemical structure similarities
217    pub structure_similarities: HashMap<(String, String), f32>,
218    /// Expression correlations
219    pub expression_correlations: HashMap<(String, String), f32>,
220    /// Tissue-specific expression
221    pub tissue_expression: HashMap<(String, String), f32>,
222}