#![cfg(feature = "temporal_enrich")]
use chrono::{Datelike, NaiveDate, NaiveDateTime};
use once_cell::sync::Lazy;
use regex::Regex;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum AnchorSource {
DocumentMetadata,
ExplicitHeader,
InlineDate,
Inherited,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TemporalAnchorInfo {
pub date: NaiveDate,
pub source: AnchorSource,
pub confidence: f32,
pub original_text: String,
pub char_offset: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RelativePhrase {
pub phrase: String,
pub char_offset: usize,
pub length: usize,
pub resolved: Option<ResolvedTemporal>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum ResolvedTemporal {
Date(NaiveDate),
DateRange { start: NaiveDate, end: NaiveDate },
Year(i32),
Month { year: i32, month: u32 },
}
impl ResolvedTemporal {
#[must_use]
pub fn to_display_string(&self) -> String {
match self {
Self::Date(d) => d.format("%B %d, %Y").to_string(),
Self::DateRange { start, end } => {
format!(
"{} to {}",
start.format("%B %d, %Y"),
end.format("%B %d, %Y")
)
}
Self::Year(y) => y.to_string(),
Self::Month { year, month } => {
let month_name = match month {
1 => "January",
2 => "February",
3 => "March",
4 => "April",
5 => "May",
6 => "June",
7 => "July",
8 => "August",
9 => "September",
10 => "October",
11 => "November",
12 => "December",
_ => "Unknown",
};
format!("{month_name} {year}")
}
}
}
#[must_use]
pub fn to_iso_string(&self) -> String {
match self {
Self::Date(d) => d.format("%Y-%m-%d").to_string(),
Self::DateRange { start, end } => {
format!("{}/{}", start.format("%Y-%m-%d"), end.format("%Y-%m-%d"))
}
Self::Year(y) => format!("{y}"),
Self::Month { year, month } => format!("{year}-{month:02}"),
}
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct TemporalEnrichment {
pub anchor: Option<TemporalAnchorInfo>,
pub relative_phrases: Vec<RelativePhrase>,
pub context_block: Option<String>,
}
#[derive(Debug, Clone)]
pub struct TemporalAnchorTracker {
current_anchor: Option<NaiveDate>,
anchor_source: Option<AnchorSource>,
anchor_confidence: f32,
anchor_text: Option<String>,
}
impl Default for TemporalAnchorTracker {
fn default() -> Self {
Self::new()
}
}
impl TemporalAnchorTracker {
#[must_use]
pub fn new() -> Self {
Self {
current_anchor: None,
anchor_source: None,
anchor_confidence: 0.0,
anchor_text: None,
}
}
#[must_use]
pub fn with_document_date(date: NaiveDate) -> Self {
Self {
current_anchor: Some(date),
anchor_source: Some(AnchorSource::DocumentMetadata),
anchor_confidence: 0.7,
anchor_text: None,
}
}
#[must_use]
pub fn current_anchor(&self) -> Option<NaiveDate> {
self.current_anchor
}
pub fn process_line(&mut self, line: &str, char_offset: usize) -> Option<TemporalAnchorInfo> {
if let Some((date, source, confidence, text)) = detect_anchor_in_line(line) {
let should_update = self.current_anchor.is_none()
|| confidence > self.anchor_confidence
|| matches!(source, AnchorSource::ExplicitHeader);
if should_update {
self.current_anchor = Some(date);
self.anchor_source = Some(source);
self.anchor_confidence = confidence;
self.anchor_text = Some(text.clone());
return Some(TemporalAnchorInfo {
date,
source,
confidence,
original_text: text,
char_offset,
});
}
}
None
}
#[must_use]
pub fn anchor_info(&self) -> Option<TemporalAnchorInfo> {
self.current_anchor.map(|date| TemporalAnchorInfo {
date,
source: self.anchor_source.unwrap_or(AnchorSource::Inherited),
confidence: self.anchor_confidence,
original_text: self.anchor_text.clone().unwrap_or_default(),
char_offset: 0,
})
}
}
static SESSION_HEADER_PATTERN: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(?i)Session\s+\d+\s*\(([^)]+)\)").expect("valid regex"));
static DATE_HEADER_PATTERN: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(?i)\[(?:SESSION_)?DATE:\s*([^\]]+)\]").expect("valid regex"));
static ISO_DATE_PATTERN: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(\d{4})[/-](\d{1,2})[/-](\d{1,2})").expect("valid regex"));
static LONG_DATE_PATTERN: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"(?i)(January|February|March|April|May|June|July|August|September|October|November|December)\s+(\d{1,2}),?\s+(\d{4})").expect("valid regex")
});
static SHORT_DATE_PATTERN: Lazy<Regex> = Lazy::new(|| {
Regex::new(
r"(?i)(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\.?\s+(\d{1,2}),?\s+(\d{4})",
)
.expect("valid regex")
});
static SLASH_DATE_PATTERN: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(\d{1,2})/(\d{1,2})/(\d{2,4})").expect("valid regex"));
static RELATIVE_YEAR_PATTERN: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(?i)\b(last|this|next)\s+year\b").expect("valid regex"));
static RELATIVE_MONTH_PATTERN: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(?i)\b(last|this|next)\s+month\b").expect("valid regex"));
static RELATIVE_WEEK_PATTERN: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(?i)\b(last|this|next)\s+week\b").expect("valid regex"));
static AGO_PATTERN: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"(?i)\b(\d+|a|one|two|three|four|five|six|seven|eight|nine|ten)\s+(days?|weeks?|months?|years?)\s+ago\b").expect("valid regex")
});
static IN_FUTURE_PATTERN: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"(?i)\bin\s+(\d+|a|one|two|three|four|five|six|seven|eight|nine|ten)\s+(days?|weeks?|months?|years?)\b").expect("valid regex")
});
static RELATIVE_DAY_PATTERN: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(?i)\b(yesterday|today|tomorrow)\b").expect("valid regex"));
static RELATIVE_WEEKDAY_PATTERN: Lazy<Regex> = Lazy::new(|| {
Regex::new(
r"(?i)\b(last|this|next)\s+(Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday)\b",
)
.expect("valid regex")
});
fn detect_anchor_in_line(line: &str) -> Option<(NaiveDate, AnchorSource, f32, String)> {
if let Some(caps) = SESSION_HEADER_PATTERN.captures(line) {
if let Some(date_str) = caps.get(1) {
if let Some(date) = parse_date_string(date_str.as_str()) {
return Some((
date,
AnchorSource::ExplicitHeader,
0.95,
caps[0].to_string(),
));
}
}
}
if let Some(caps) = DATE_HEADER_PATTERN.captures(line) {
if let Some(date_str) = caps.get(1) {
if let Some(date) = parse_date_string(date_str.as_str()) {
return Some((
date,
AnchorSource::ExplicitHeader,
0.95,
caps[0].to_string(),
));
}
}
}
if let Some(caps) = ISO_DATE_PATTERN.captures(line) {
let year: i32 = caps[1].parse().ok()?;
let month: u32 = caps[2].parse().ok()?;
let day: u32 = caps[3].parse().ok()?;
if let Some(date) = NaiveDate::from_ymd_opt(year, month, day) {
return Some((date, AnchorSource::InlineDate, 0.9, caps[0].to_string()));
}
}
if let Some(caps) = LONG_DATE_PATTERN.captures(line) {
let month_str = &caps[1];
let day: u32 = caps[2].parse().ok()?;
let year: i32 = caps[3].parse().ok()?;
let month = month_name_to_number(month_str)?;
if let Some(date) = NaiveDate::from_ymd_opt(year, month, day) {
return Some((date, AnchorSource::InlineDate, 0.85, caps[0].to_string()));
}
}
if let Some(caps) = SHORT_DATE_PATTERN.captures(line) {
let month_str = &caps[1];
let day: u32 = caps[2].parse().ok()?;
let year: i32 = caps[3].parse().ok()?;
let month = month_name_to_number(month_str)?;
if let Some(date) = NaiveDate::from_ymd_opt(year, month, day) {
return Some((date, AnchorSource::InlineDate, 0.85, caps[0].to_string()));
}
}
if let Some(caps) = SLASH_DATE_PATTERN.captures(line) {
let month: u32 = caps[1].parse().ok()?;
let day: u32 = caps[2].parse().ok()?;
let mut year: i32 = caps[3].parse().ok()?;
if year < 100 {
year += if year > 50 { 1900 } else { 2000 };
}
if let Some(date) = NaiveDate::from_ymd_opt(year, month, day) {
return Some((date, AnchorSource::InlineDate, 0.7, caps[0].to_string()));
}
}
None
}
fn parse_date_string(s: &str) -> Option<NaiveDate> {
let s = s.trim();
if let Ok(date) = NaiveDate::parse_from_str(s, "%Y-%m-%d") {
return Some(date);
}
if let Ok(date) = NaiveDate::parse_from_str(s, "%Y/%m/%d") {
return Some(date);
}
if let Ok(dt) = NaiveDateTime::parse_from_str(s, "%Y/%m/%d (%a) %H:%M") {
return Some(dt.date());
}
if let Some(caps) = LONG_DATE_PATTERN.captures(s) {
let month_str = &caps[1];
let day: u32 = caps[2].parse().ok()?;
let year: i32 = caps[3].parse().ok()?;
let month = month_name_to_number(month_str)?;
return NaiveDate::from_ymd_opt(year, month, day);
}
if let Some(caps) = SHORT_DATE_PATTERN.captures(s) {
let month_str = &caps[1];
let day: u32 = caps[2].parse().ok()?;
let year: i32 = caps[3].parse().ok()?;
let month = month_name_to_number(month_str)?;
return NaiveDate::from_ymd_opt(year, month, day);
}
None
}
fn month_name_to_number(name: &str) -> Option<u32> {
match name.to_lowercase().as_str() {
"january" | "jan" => Some(1),
"february" | "feb" => Some(2),
"march" | "mar" => Some(3),
"april" | "apr" => Some(4),
"may" => Some(5),
"june" | "jun" => Some(6),
"july" | "jul" => Some(7),
"august" | "aug" => Some(8),
"september" | "sep" | "sept" => Some(9),
"october" | "oct" => Some(10),
"november" | "nov" => Some(11),
"december" | "dec" => Some(12),
_ => None,
}
}
fn parse_number_word(s: &str) -> Option<i32> {
let s = s.to_lowercase();
match s.as_str() {
"a" | "one" => Some(1),
"two" => Some(2),
"three" => Some(3),
"four" => Some(4),
"five" => Some(5),
"six" => Some(6),
"seven" => Some(7),
"eight" => Some(8),
"nine" => Some(9),
"ten" => Some(10),
_ => s.parse().ok(),
}
}
#[must_use]
pub fn detect_relative_phrases(text: &str) -> Vec<(String, usize, usize)> {
let mut phrases = Vec::new();
for caps in RELATIVE_YEAR_PATTERN.captures_iter(text) {
let m = caps.get(0).expect("full match");
phrases.push((m.as_str().to_string(), m.start(), m.len()));
}
for caps in RELATIVE_MONTH_PATTERN.captures_iter(text) {
let m = caps.get(0).expect("full match");
phrases.push((m.as_str().to_string(), m.start(), m.len()));
}
for caps in RELATIVE_WEEK_PATTERN.captures_iter(text) {
let m = caps.get(0).expect("full match");
phrases.push((m.as_str().to_string(), m.start(), m.len()));
}
for caps in AGO_PATTERN.captures_iter(text) {
let m = caps.get(0).expect("full match");
phrases.push((m.as_str().to_string(), m.start(), m.len()));
}
for caps in IN_FUTURE_PATTERN.captures_iter(text) {
let m = caps.get(0).expect("full match");
phrases.push((m.as_str().to_string(), m.start(), m.len()));
}
for caps in RELATIVE_DAY_PATTERN.captures_iter(text) {
let m = caps.get(0).expect("full match");
phrases.push((m.as_str().to_string(), m.start(), m.len()));
}
for caps in RELATIVE_WEEKDAY_PATTERN.captures_iter(text) {
let m = caps.get(0).expect("full match");
phrases.push((m.as_str().to_string(), m.start(), m.len()));
}
phrases.sort_by_key(|(_, pos, _)| *pos);
phrases
}
#[must_use]
pub fn resolve_relative_phrase(phrase: &str, anchor: NaiveDate) -> Option<ResolvedTemporal> {
let lower = phrase.to_lowercase();
if lower.contains("last year") {
return Some(ResolvedTemporal::Year(anchor.year() - 1));
}
if lower.contains("this year") {
return Some(ResolvedTemporal::Year(anchor.year()));
}
if lower.contains("next year") {
return Some(ResolvedTemporal::Year(anchor.year() + 1));
}
if lower.contains("last month") {
let (y, m) = if anchor.month() == 1 {
(anchor.year() - 1, 12)
} else {
(anchor.year(), anchor.month() - 1)
};
return Some(ResolvedTemporal::Month { year: y, month: m });
}
if lower.contains("this month") {
return Some(ResolvedTemporal::Month {
year: anchor.year(),
month: anchor.month(),
});
}
if lower.contains("next month") {
let (y, m) = if anchor.month() == 12 {
(anchor.year() + 1, 1)
} else {
(anchor.year(), anchor.month() + 1)
};
return Some(ResolvedTemporal::Month { year: y, month: m });
}
if lower.contains("last week") {
let start =
anchor - chrono::Duration::days(7 + anchor.weekday().num_days_from_monday() as i64);
let end = start + chrono::Duration::days(6);
return Some(ResolvedTemporal::DateRange { start, end });
}
if lower.contains("this week") {
let start = anchor - chrono::Duration::days(anchor.weekday().num_days_from_monday() as i64);
let end = start + chrono::Duration::days(6);
return Some(ResolvedTemporal::DateRange { start, end });
}
if lower.contains("next week") {
let start =
anchor + chrono::Duration::days(7 - anchor.weekday().num_days_from_monday() as i64);
let end = start + chrono::Duration::days(6);
return Some(ResolvedTemporal::DateRange { start, end });
}
if lower == "yesterday" {
return Some(ResolvedTemporal::Date(anchor - chrono::Duration::days(1)));
}
if lower == "today" {
return Some(ResolvedTemporal::Date(anchor));
}
if lower == "tomorrow" {
return Some(ResolvedTemporal::Date(anchor + chrono::Duration::days(1)));
}
if let Some(caps) = AGO_PATTERN.captures(&lower) {
let count = parse_number_word(&caps[1])?;
let unit = &caps[2];
return match unit {
u if u.starts_with("day") => Some(ResolvedTemporal::Date(
anchor - chrono::Duration::days(count as i64),
)),
u if u.starts_with("week") => Some(ResolvedTemporal::Date(
anchor - chrono::Duration::weeks(count as i64),
)),
u if u.starts_with("month") => {
let total_months = anchor.year() * 12 + anchor.month() as i32 - count;
let new_year = (total_months - 1) / 12;
let new_month = ((total_months - 1) % 12 + 1) as u32;
NaiveDate::from_ymd_opt(new_year, new_month, anchor.day().min(28))
.map(ResolvedTemporal::Date)
}
u if u.starts_with("year") => Some(ResolvedTemporal::Year(anchor.year() - count)),
_ => None,
};
}
if let Some(caps) = IN_FUTURE_PATTERN.captures(&lower) {
let count = parse_number_word(&caps[1])?;
let unit = &caps[2];
return match unit {
u if u.starts_with("day") => Some(ResolvedTemporal::Date(
anchor + chrono::Duration::days(count as i64),
)),
u if u.starts_with("week") => Some(ResolvedTemporal::Date(
anchor + chrono::Duration::weeks(count as i64),
)),
u if u.starts_with("month") => {
let total_months = anchor.year() * 12 + anchor.month() as i32 + count;
let new_year = (total_months - 1) / 12;
let new_month = ((total_months - 1) % 12 + 1) as u32;
NaiveDate::from_ymd_opt(new_year, new_month, anchor.day().min(28))
.map(ResolvedTemporal::Date)
}
u if u.starts_with("year") => Some(ResolvedTemporal::Year(anchor.year() + count)),
_ => None,
};
}
if let Some(caps) = RELATIVE_WEEKDAY_PATTERN.captures(&lower) {
let direction = &caps[1];
let weekday_name = &caps[2];
let target_weekday = match weekday_name.to_lowercase().as_str() {
"monday" => chrono::Weekday::Mon,
"tuesday" => chrono::Weekday::Tue,
"wednesday" => chrono::Weekday::Wed,
"thursday" => chrono::Weekday::Thu,
"friday" => chrono::Weekday::Fri,
"saturday" => chrono::Weekday::Sat,
"sunday" => chrono::Weekday::Sun,
_ => return None,
};
let current_weekday = anchor.weekday();
let days_diff = (target_weekday.num_days_from_monday() as i64)
- (current_weekday.num_days_from_monday() as i64);
let result_date = match direction.to_lowercase().as_str() {
"last" => {
let mut offset = days_diff;
if offset >= 0 {
offset -= 7;
}
anchor + chrono::Duration::days(offset)
}
"this" => anchor + chrono::Duration::days(days_diff),
"next" => {
let mut offset = days_diff;
if offset <= 0 {
offset += 7;
}
anchor + chrono::Duration::days(offset)
}
_ => return None,
};
return Some(ResolvedTemporal::Date(result_date));
}
None
}
#[must_use]
pub fn enrich_chunk(text: &str, tracker: &mut TemporalAnchorTracker) -> TemporalEnrichment {
let mut result = TemporalEnrichment::default();
let mut char_offset = 0;
for line in text.lines() {
if let Some(anchor_info) = tracker.process_line(line, char_offset) {
result.anchor = Some(anchor_info);
}
char_offset += line.len() + 1; }
if result.anchor.is_none() {
result.anchor = tracker.anchor_info();
}
let phrases = detect_relative_phrases(text);
if let Some(ref anchor_info) = result.anchor {
for (phrase, offset, len) in phrases {
let resolved = resolve_relative_phrase(&phrase, anchor_info.date);
result.relative_phrases.push(RelativePhrase {
phrase,
char_offset: offset,
length: len,
resolved,
});
}
} else {
for (phrase, offset, len) in phrases {
result.relative_phrases.push(RelativePhrase {
phrase,
char_offset: offset,
length: len,
resolved: None,
});
}
}
let resolved_phrases: Vec<_> = result
.relative_phrases
.iter()
.filter_map(|p| p.resolved.as_ref().map(|r| (p.phrase.clone(), r.clone())))
.collect();
if !resolved_phrases.is_empty() {
let mut context_parts = Vec::new();
if let Some(ref anchor) = result.anchor {
context_parts.push(format!(
"Document date context: {}",
anchor.date.format("%B %d, %Y")
));
}
context_parts.push("Temporal references:".to_string());
for (phrase, resolved) in &resolved_phrases {
context_parts.push(format!(
"- \"{}\" refers to {}",
phrase,
resolved.to_display_string()
));
}
result.context_block = Some(context_parts.join(" "));
}
result
}
#[must_use]
pub fn enrich_document(text: &str, document_date: Option<NaiveDate>) -> String {
let mut tracker = match document_date {
Some(date) => TemporalAnchorTracker::with_document_date(date),
None => TemporalAnchorTracker::new(),
};
let enrichment = enrich_chunk(text, &mut tracker);
if let Some(context) = enrichment.context_block {
format!("{text}\n\n[Temporal Context: {context}]")
} else {
text.to_string()
}
}
pub fn enrich_chunks(
chunks: &[String],
document_date: Option<NaiveDate>,
) -> Vec<(String, TemporalEnrichment)> {
let mut tracker = match document_date {
Some(date) => TemporalAnchorTracker::with_document_date(date),
None => TemporalAnchorTracker::new(),
};
chunks
.iter()
.map(|chunk| {
let enrichment = enrich_chunk(chunk, &mut tracker);
let enriched_text = if let Some(ref context) = enrichment.context_block {
format!("{chunk}\n\n[Temporal Context: {context}]")
} else {
chunk.clone()
};
(enriched_text, enrichment)
})
.collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_anchor_detection_session_header() {
let mut tracker = TemporalAnchorTracker::new();
let line = "=== Session 5 (May 7, 2023) ===";
let info = tracker.process_line(line, 0);
assert!(info.is_some());
let info = info.expect("anchor info");
assert_eq!(
info.date,
NaiveDate::from_ymd_opt(2023, 5, 7).expect("valid date")
);
assert_eq!(info.source, AnchorSource::ExplicitHeader);
}
#[test]
fn test_anchor_detection_iso_date() {
let mut tracker = TemporalAnchorTracker::new();
let line = "Event occurred on 2023-05-07 at noon.";
let info = tracker.process_line(line, 0);
assert!(info.is_some());
let info = info.expect("anchor info");
assert_eq!(
info.date,
NaiveDate::from_ymd_opt(2023, 5, 7).expect("valid date")
);
}
#[test]
fn test_relative_phrase_detection() {
let text = "I did this last year. We'll meet next week. Two days ago was fun.";
let phrases = detect_relative_phrases(text);
assert_eq!(phrases.len(), 3);
assert_eq!(phrases[0].0, "last year");
assert_eq!(phrases[1].0, "next week");
assert_eq!(phrases[2].0, "Two days ago");
}
#[test]
fn test_resolve_last_year() {
let anchor = NaiveDate::from_ymd_opt(2023, 5, 7).expect("valid date");
let resolved = resolve_relative_phrase("last year", anchor);
assert!(resolved.is_some());
if let Some(ResolvedTemporal::Year(y)) = resolved {
assert_eq!(y, 2022);
} else {
panic!("Expected Year resolution");
}
}
#[test]
fn test_resolve_last_week() {
let anchor = NaiveDate::from_ymd_opt(2023, 5, 10).expect("valid date"); let resolved = resolve_relative_phrase("last week", anchor);
assert!(resolved.is_some());
if let Some(ResolvedTemporal::DateRange { start, end }) = resolved {
assert_eq!(
start,
NaiveDate::from_ymd_opt(2023, 5, 1).expect("valid date")
); assert_eq!(
end,
NaiveDate::from_ymd_opt(2023, 5, 7).expect("valid date")
); } else {
panic!("Expected DateRange resolution");
}
}
#[test]
fn test_enrich_chunk() {
let mut tracker = TemporalAnchorTracker::new();
let text = "=== Session 1 (May 7, 2023) ===\n\nI painted a sunrise last year.";
let enrichment = enrich_chunk(text, &mut tracker);
assert!(enrichment.anchor.is_some());
assert_eq!(enrichment.relative_phrases.len(), 1);
assert!(enrichment.relative_phrases[0].resolved.is_some());
assert!(enrichment.context_block.is_some());
let context = enrichment.context_block.as_ref().expect("context");
assert!(context.contains("2022"));
}
#[test]
fn test_enrich_document() {
let text = "Session 1 (May 7, 2023)\n\nMelanie: I painted a sunrise last year.";
let enriched = enrich_document(text, None);
assert!(enriched.contains("[Temporal Context:"));
assert!(enriched.contains("2022"));
}
#[test]
fn test_anchor_propagation() {
let chunks = vec![
"Session 1 (May 7, 2023)\n\nHello!".to_string(),
"This happened last year.".to_string(), ];
let results = enrich_chunks(&chunks, None);
assert!(results[1].1.anchor.is_some());
assert!(!results[1].1.relative_phrases.is_empty());
assert!(results[1].1.relative_phrases[0].resolved.is_some());
}
}