Skip to main content

shodh_memory/query_parsing/
parser_trait.rs

1//! Query Parser Trait Definition
2//!
3//! Defines the interface that all query parsers must implement.
4
5use chrono::{DateTime, NaiveDate, Utc};
6use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8
9/// Result of parsing a query
10#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct ParsedQuery {
12    /// Original query text
13    pub original: String,
14
15    /// Extracted entities (people, places, things)
16    pub entities: Vec<Entity>,
17
18    /// Extracted events/actions (verbs)
19    pub events: Vec<Event>,
20
21    /// Modifiers (adjectives, descriptors)
22    pub modifiers: Vec<String>,
23
24    /// Temporal information extracted from the query
25    pub temporal: TemporalInfo,
26
27    /// Whether this is an attribute query (asking about a property)
28    pub is_attribute_query: bool,
29
30    /// The attribute being asked about (if is_attribute_query)
31    pub attribute: Option<AttributeQuery>,
32
33    /// Compound terms detected (e.g., "machine learning")
34    pub compounds: Vec<String>,
35
36    /// IC weights for BM25 boosting
37    pub ic_weights: HashMap<String, f32>,
38
39    /// Confidence score (0.0 - 1.0)
40    pub confidence: f32,
41}
42
43/// An extracted entity
44#[derive(Debug, Clone, Serialize, Deserialize)]
45pub struct Entity {
46    /// Original text
47    pub text: String,
48    /// Stemmed form
49    pub stem: String,
50    /// Entity type if detected
51    pub entity_type: EntityType,
52    /// Information content weight
53    pub ic_weight: f32,
54    /// Whether this entity is negated
55    pub negated: bool,
56}
57
58/// Entity type classification
59#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
60pub enum EntityType {
61    Person,
62    Place,
63    Thing,
64    Event,
65    Time,
66    Unknown,
67}
68
69/// An extracted event/action
70#[derive(Debug, Clone, Serialize, Deserialize)]
71pub struct Event {
72    /// Original text (verb)
73    pub text: String,
74    /// Stemmed form
75    pub stem: String,
76    /// IC weight
77    pub ic_weight: f32,
78}
79
80/// Temporal information extracted from query
81#[derive(Debug, Clone, Default, Serialize, Deserialize)]
82pub struct TemporalInfo {
83    /// Whether the query has temporal intent
84    pub has_temporal_intent: bool,
85
86    /// Type of temporal query
87    pub intent: TemporalIntent,
88
89    /// Relative time references found ("last year", "next month")
90    pub relative_refs: Vec<RelativeTimeRef>,
91
92    /// Resolved absolute dates (if context date provided)
93    pub resolved_dates: Vec<NaiveDate>,
94
95    /// Absolute dates mentioned directly ("May 7, 2023")
96    pub absolute_dates: Vec<NaiveDate>,
97}
98
99/// Type of temporal intent
100#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
101pub enum TemporalIntent {
102    /// "When did X happen?"
103    WhenQuestion,
104    /// "What happened in [time period]?"
105    SpecificTime,
106    /// "Did X happen before/after Y?"
107    Ordering,
108    /// "How long did X take?"
109    Duration,
110    /// No temporal intent
111    #[default]
112    None,
113}
114
115/// A relative time reference
116#[derive(Debug, Clone, Serialize, Deserialize)]
117pub struct RelativeTimeRef {
118    /// Original text ("last year", "next month")
119    pub text: String,
120    /// Resolved date (if context available)
121    pub resolved: Option<NaiveDate>,
122    /// Direction (past/future)
123    pub direction: TimeDirection,
124    /// Unit (day, week, month, year)
125    pub unit: TimeUnit,
126    /// Offset amount (1 for "last", 2 for "two weeks ago")
127    pub offset: i32,
128}
129
130/// Direction of time reference
131#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
132pub enum TimeDirection {
133    Past,
134    Future,
135    Current,
136}
137
138/// Time unit
139#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
140pub enum TimeUnit {
141    Day,
142    Week,
143    Month,
144    Year,
145    Unknown,
146}
147
148/// Attribute query details
149#[derive(Debug, Clone, Serialize, Deserialize)]
150pub struct AttributeQuery {
151    /// The entity being queried about
152    pub entity: String,
153    /// The attribute being asked (e.g., "relationship status")
154    pub attribute: String,
155    /// Synonyms for the attribute value
156    pub synonyms: Vec<String>,
157}
158
159/// Query parser trait - implement this for different parsing strategies
160pub trait QueryParser: Send + Sync {
161    /// Parse a query into structured components
162    ///
163    /// # Arguments
164    /// * `query` - The natural language query
165    /// * `context_date` - Optional date for resolving relative time references
166    ///
167    /// # Returns
168    /// Parsed query structure with entities, events, temporal info, etc.
169    fn parse(&self, query: &str, context_date: Option<DateTime<Utc>>) -> ParsedQuery;
170
171    /// Get the parser type name (for logging/debugging)
172    fn name(&self) -> &'static str;
173
174    /// Check if this parser is available/loaded
175    fn is_available(&self) -> bool {
176        true
177    }
178}
179
180impl ParsedQuery {
181    /// Create an empty parsed query
182    pub fn empty(original: &str) -> Self {
183        Self {
184            original: original.to_string(),
185            entities: Vec::new(),
186            events: Vec::new(),
187            modifiers: Vec::new(),
188            temporal: TemporalInfo::default(),
189            is_attribute_query: false,
190            attribute: None,
191            compounds: Vec::new(),
192            ic_weights: HashMap::new(),
193            confidence: 0.0,
194        }
195    }
196
197    /// Get all entity texts
198    pub fn entity_texts(&self) -> Vec<&str> {
199        self.entities.iter().map(|e| e.text.as_str()).collect()
200    }
201
202    /// Get all event stems
203    pub fn event_stems(&self) -> Vec<&str> {
204        self.events.iter().map(|e| e.stem.as_str()).collect()
205    }
206
207    /// Check if query is asking about time
208    pub fn is_temporal_query(&self) -> bool {
209        self.temporal.has_temporal_intent
210    }
211}