Skip to main content

ontology_usage/
ontology_usage.rs

1//! Dynamic Ontology Usage Examples
2//!
3//! This example demonstrates how to use the Dynamic Ontology pipeline
4//! for extracting entities from text and normalizing them to an ontology.
5
6#[cfg(feature = "hgnc")]
7use terraphim_types::hgnc::HgncNormalizer;
8use terraphim_types::{CoverageSignal, ExtractedEntity, GroundingMetadata, SchemaSignal};
9
10fn main() {
11    println!("=== Dynamic Ontology Usage Examples\n");
12
13    // Example 1: HGNC Gene Normalization (requires hgnc feature)
14    #[cfg(feature = "hgnc")]
15    {
16        println!("1. HGNC Gene Normalization");
17        println!("---------------------------");
18        example_hgnc_normalization();
19    }
20
21    // Example 2: Coverage Signal
22    println!("\n2. Coverage Signal Calculation");
23    println!("-------------------------------");
24    example_coverage_signal();
25
26    // Example 3: Schema Signal Creation
27    println!("\n3. Schema Signal Creation");
28    println!("-------------------------");
29    example_schema_signal();
30
31    // Example 4: Full Pipeline
32    println!("\n4. Full Extraction Pipeline");
33    println!("-----------------------------");
34    example_full_pipeline();
35}
36
37#[cfg(feature = "hgnc")]
38fn example_hgnc_normalization() {
39    // Create a new HGNC normalizer with oncology genes
40    let normalizer = HgncNormalizer::new();
41
42    // Test exact match
43    let result = normalizer.normalize("EGFR");
44    println!("  EGFR -> {:?}", result.map(|g| g.normalized_label));
45
46    // Test alias (ERBB1 is an alias for EGFR)
47    let result = normalizer.normalize("ERBB1");
48    println!(
49        "  ERBB1 (alias) -> {:?}",
50        result.map(|g| g.normalized_label)
51    );
52
53    // Test alias (HER2 is an alias for ERBB2)
54    let result = normalizer.normalize("HER2");
55    println!("  HER2 (alias) -> {:?}", result.map(|g| g.normalized_label));
56
57    // Test fuzzy variant (EGFRvIII is a variant of EGFR)
58    let result = normalizer.normalize("EGFRvIII");
59    println!(
60        "  EGFRvIII (fuzzy) -> {:?}",
61        result.map(|g| g.normalized_label)
62    );
63
64    // Test TP53
65    let result = normalizer.normalize("TP53");
66    println!("  TP53 -> {:?}", result.map(|g| g.normalized_label));
67
68    // Test unknown gene
69    let result = normalizer.normalize("XYZ123");
70    println!("  XYZ123 (unknown) -> {:?}", result);
71}
72
73fn example_coverage_signal() {
74    // Create entities with varying grounding using string-based entity_type
75    let entities = vec![
76        ExtractedEntity {
77            entity_type: "cancer_diagnosis".to_string(),
78            raw_value: "non-small cell lung cancer".to_string(),
79            normalized_value: Some("Non-Small Cell Lung Cancer".to_string()),
80            grounding: Some(GroundingMetadata::new(
81                "http://example.org/nsclc".to_string(),
82                "Non-Small Cell Lung Cancer".to_string(),
83                "NCIt".to_string(),
84                0.95,
85                terraphim_types::NormalizationMethod::Exact,
86            )),
87        },
88        ExtractedEntity {
89            entity_type: "drug".to_string(),
90            raw_value: "Osimertinib".to_string(),
91            normalized_value: Some("Osimertinib".to_string()),
92            grounding: Some(GroundingMetadata::new(
93                "http://example.org/osimertinib".to_string(),
94                "Osimertinib".to_string(),
95                "NCIt".to_string(),
96                0.98,
97                terraphim_types::NormalizationMethod::Exact,
98            )),
99        },
100        ExtractedEntity {
101            entity_type: "genomic_variant".to_string(),
102            raw_value: "Unknown mutation".to_string(),
103            normalized_value: None,
104            grounding: None,
105        },
106    ];
107
108    // Calculate categories
109    let categories: Vec<String> = entities
110        .iter()
111        .map(|e| e.normalized_value.clone().unwrap_or(e.raw_value.clone()))
112        .collect();
113
114    // Count matched (entities with grounding)
115    let matched = entities.iter().filter(|e| e.grounding.is_some()).count();
116
117    // Compute coverage with 0.7 threshold
118    let coverage = CoverageSignal::compute(&categories, matched, 0.7);
119
120    println!("  Total categories: {}", coverage.total_categories);
121    println!("  Matched categories: {}", coverage.matched_categories);
122    println!("  Coverage ratio: {:.1}%", coverage.coverage_ratio * 100.0);
123    println!("  Threshold: {:.0}%", coverage.threshold * 100.0);
124    println!("  Needs review: {}", coverage.needs_review);
125}
126
127fn example_schema_signal() {
128    // Create a schema signal from extracted oncology data using string-based types
129    let entities = vec![
130        ExtractedEntity {
131            entity_type: "cancer_diagnosis".to_string(),
132            raw_value: "lung carcinoma".to_string(),
133            normalized_value: Some("Lung Carcinoma".to_string()),
134            grounding: Some(GroundingMetadata::new(
135                "http://example.org/lung_carcinoma".to_string(),
136                "Lung Carcinoma".to_string(),
137                "NCIt".to_string(),
138                0.95,
139                terraphim_types::NormalizationMethod::Exact,
140            )),
141        },
142        ExtractedEntity {
143            entity_type: "genomic_variant".to_string(),
144            raw_value: "EGFR L858R".to_string(),
145            normalized_value: Some("EGFR L858R".to_string()),
146            grounding: None,
147        },
148    ];
149
150    let relationships = vec![];
151
152    let schema_signal = SchemaSignal {
153        entities,
154        relationships,
155        confidence: 0.5,
156    };
157
158    println!("  Entities: {}", schema_signal.entities.len());
159    println!("  Relationships: {}", schema_signal.relationships.len());
160    println!("  Confidence: {:.0}%", schema_signal.confidence * 100.0);
161}
162
163fn example_full_pipeline() {
164    println!("  Step 1: Extract entities from text");
165    println!(
166        "    Input: 'Patient with EGFR L858R mutation in lung carcinoma treated with Osimertinib'"
167    );
168    println!(
169        "    -> Extract: EGFR L858R (genomic_variant), lung carcinoma (cancer_diagnosis), Osimertinib (drug)"
170    );
171
172    println!("\n  Step 2: Normalize entities to ontology");
173
174    #[cfg(feature = "hgnc")]
175    {
176        let normalizer = HgncNormalizer::new();
177        // Normalize EGFR
178        let egfr = normalizer.normalize("EGFR");
179        println!(
180            "    EGFR -> {}",
181            egfr.as_ref()
182                .map(|g| format!(
183                    "{} (score: {:.2})",
184                    g.normalized_label.as_ref().unwrap(),
185                    g.normalized_score.unwrap()
186                ))
187                .unwrap_or_else(|| "Not found".to_string())
188        );
189    }
190
191    #[cfg(not(feature = "hgnc"))]
192    {
193        println!("    (Enable 'hgnc' feature for gene normalization)");
194    }
195
196    println!("\n  Step 3: Check coverage");
197    println!("    2/3 entities grounded = 66.7% coverage");
198    println!("    Threshold: 70% -> needs review: true");
199
200    println!("\n  Step 4: Review (if needed)");
201    println!("    Review Agent suggests corrections for unmatched entities");
202
203    println!("\n  Result: Grounded knowledge graph with coverage signal");
204}