scirs2_text/information_extraction/
relations.rs

1//! Relation extraction and relationship modeling
2
3use super::entities::Entity;
4use crate::error::Result;
5use regex::Regex;
6
7/// Extracted relation between entities
8#[derive(Debug, Clone)]
9pub struct Relation {
10    /// Type of relation (e.g., "works_for", "located_in")
11    pub relation_type: String,
12    /// Subject entity in the relation
13    pub subject: Entity,
14    /// Object entity in the relation
15    pub object: Entity,
16    /// Context text where the relation was found
17    pub context: String,
18    /// Confidence score for the relation extraction (0.0 to 1.0)
19    pub confidence: f64,
20}
21
22/// Relation extractor for finding relationships between entities
23pub struct RelationExtractor {
24    relation_patterns: Vec<(String, Regex)>,
25}
26
27impl Default for RelationExtractor {
28    fn default() -> Self {
29        Self::new()
30    }
31}
32
33impl RelationExtractor {
34    /// Create a new relation extractor
35    pub fn new() -> Self {
36        Self {
37            relation_patterns: Vec::new(),
38        }
39    }
40
41    /// Add a relation pattern
42    pub fn add_relation(&mut self, relationtype: String, pattern: Regex) {
43        self.relation_patterns.push((relationtype, pattern));
44    }
45
46    /// Extract relations from text
47    pub fn extract_relations(&self, text: &str, entities: &[Entity]) -> Result<Vec<Relation>> {
48        let mut relations = Vec::new();
49
50        for (relation_type, pattern) in &self.relation_patterns {
51            for caps in pattern.captures_iter(text) {
52                if let Some(full_match) = caps.get(0) {
53                    // Find entities that might be involved in this relation
54                    let match_start = full_match.start();
55                    let match_end = full_match.end();
56
57                    let involved_entities: Vec<&Entity> = entities
58                        .iter()
59                        .filter(|e| e.start >= match_start && e.end <= match_end)
60                        .collect();
61
62                    if involved_entities.len() >= 2 {
63                        relations.push(Relation {
64                            relation_type: relation_type.clone(),
65                            subject: involved_entities[0].clone(),
66                            object: involved_entities[1].clone(),
67                            context: full_match.as_str().to_string(),
68                            confidence: 0.7,
69                        });
70                    }
71                }
72            }
73        }
74
75        Ok(relations)
76    }
77}
78
79/// Extracted event from text
80#[derive(Debug, Clone)]
81pub struct Event {
82    /// Type or category of the event
83    pub event_type: String,
84    /// Entities participating in the event
85    pub participants: Vec<Entity>,
86    /// Location where the event occurred
87    pub location: Option<Entity>,
88    /// Time when the event occurred
89    pub time: Option<Entity>,
90    /// Description of the event
91    pub description: String,
92    /// Confidence score for the event extraction
93    pub confidence: f64,
94}