Skip to main content

scirs2_text/information_extraction/
temporal.rs

1//! Temporal expression extraction and processing
2
3use super::entities::{Entity, EntityType};
4use crate::error::Result;
5use regex::Regex;
6
7/// Advanced temporal expression extractor
8pub struct TemporalExtractor {
9    patterns: Vec<(String, Regex)>,
10}
11
12impl Default for TemporalExtractor {
13    fn default() -> Self {
14        Self::new()
15    }
16}
17
18impl TemporalExtractor {
19    /// Create new temporal extractor with predefined patterns
20    pub fn new() -> Self {
21        let patterns = vec![
22            // Relative dates
23            (
24                "relative_date".to_string(),
25                Regex::new(r"(?i)\b(?:yesterday|today|tomorrow|last|next|this)\s+(?:week|month|year|monday|tuesday|wednesday|thursday|friday|saturday|sunday)\b").expect("Operation failed")
26            ),
27
28            // Time ranges
29            (
30                "time_range".to_string(),
31                Regex::new(
32                    r"(?i)\b(?:[01]?[0-9]|2[0-3]):[0-5][0-9]\s*-\s*(?:[01]?[0-9]|2[0-3]):[0-5][0-9]\b",
33                )
34                .expect("Operation failed"),
35            ),
36
37            // Durations
38            (
39                "duration".to_string(),
40                Regex::new(
41                    r"(?i)\b(?:\d+)\s+(?:seconds?|minutes?|hours?|days?|weeks?|months?|years?)\b",
42                )
43                .expect("Operation failed"),
44            ),
45
46            // Seasons and holidays
47            (
48                "seasonal".to_string(),
49                Regex::new(r"(?i)\b(?:spring|summer|fall|autumn|winter|christmas|thanksgiving|easter|halloween|new year)\b").expect("Operation failed")
50            ),
51        ];
52
53        Self { patterns }
54    }
55
56    /// Extract temporal expressions from text
57    pub fn extract(&self, text: &str) -> Result<Vec<Entity>> {
58        let mut entities = Vec::new();
59
60        for (pattern_type, pattern) in &self.patterns {
61            for mat in pattern.find_iter(text) {
62                entities.push(Entity {
63                    text: mat.as_str().to_string(),
64                    entity_type: EntityType::Custom(format!("temporal_{pattern_type}")),
65                    start: mat.start(),
66                    end: mat.end(),
67                    confidence: 0.85,
68                });
69            }
70        }
71
72        Ok(entities)
73    }
74}