scirs2_text/information_extraction/
temporal.rs1use super::entities::{Entity, EntityType};
4use crate::error::Result;
5use regex::Regex;
6
7pub struct TemporalExtractor {
9 patterns: Vec<(String, Regex)>,
10}
11
12impl Default for TemporalExtractor {
13 fn default() -> Self {
14 Self::new()
15 }
16}
17
18impl TemporalExtractor {
19 pub fn new() -> Self {
21 let patterns = vec![
22 (
24 "relative_date".to_string(),
25 Regex::new(r"(?i)\b(?:yesterday|today|tomorrow|last|next|this)\s+(?:week|month|year|monday|tuesday|wednesday|thursday|friday|saturday|sunday)\b").expect("Operation failed")
26 ),
27
28 (
30 "time_range".to_string(),
31 Regex::new(
32 r"(?i)\b(?:[01]?[0-9]|2[0-3]):[0-5][0-9]\s*-\s*(?:[01]?[0-9]|2[0-3]):[0-5][0-9]\b",
33 )
34 .expect("Operation failed"),
35 ),
36
37 (
39 "duration".to_string(),
40 Regex::new(
41 r"(?i)\b(?:\d+)\s+(?:seconds?|minutes?|hours?|days?|weeks?|months?|years?)\b",
42 )
43 .expect("Operation failed"),
44 ),
45
46 (
48 "seasonal".to_string(),
49 Regex::new(r"(?i)\b(?:spring|summer|fall|autumn|winter|christmas|thanksgiving|easter|halloween|new year)\b").expect("Operation failed")
50 ),
51 ];
52
53 Self { patterns }
54 }
55
56 pub fn extract(&self, text: &str) -> Result<Vec<Entity>> {
58 let mut entities = Vec::new();
59
60 for (pattern_type, pattern) in &self.patterns {
61 for mat in pattern.find_iter(text) {
62 entities.push(Entity {
63 text: mat.as_str().to_string(),
64 entity_type: EntityType::Custom(format!("temporal_{pattern_type}")),
65 start: mat.start(),
66 end: mat.end(),
67 confidence: 0.85,
68 });
69 }
70 }
71
72 Ok(entities)
73 }
74}