1use chrono::{DateTime, NaiveDate, NaiveDateTime, Utc, TimeZone, FixedOffset, Datelike};
7use regex::Regex;
8use serde::{Deserialize, Serialize};
9use wasm_bindgen::prelude::*;
10use lazy_static::lazy_static;
11
12#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
14pub struct ParsedDate {
15 pub datetime: DateTime<Utc>,
17 pub detected_format: String,
19 pub confidence: f64,
21 pub original_input: String,
23 pub timezone: Option<String>,
25 pub metadata: ParseMetadata,
27}
28
29#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
31pub struct ParseMetadata {
32 pub is_ambiguous: bool,
34 pub alternatives: Vec<DateTime<Utc>>,
36 pub inferred_components: Vec<String>,
38 pub parsing_method: String,
40}
41
42#[derive(Debug, Clone)]
44pub struct ParserConfig {
45 pub prefer_dmy: bool,
47 pub default_year: Option<i32>,
49 pub strict_mode: bool,
51 pub fuzzy_parsing: bool,
53 pub custom_patterns: Vec<String>,
55 pub timezone_strategy: TimezoneStrategy,
57}
58
59#[derive(Debug, Clone, PartialEq)]
61pub enum TimezoneStrategy {
62 AssumeUtc,
64 AssumeLocal,
66 UseDefault(FixedOffset),
68 RequireTimezone,
70}
71
72#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
74pub enum ParseError {
75 UnrecognizedFormat(String),
77 AmbiguousDate(String, Vec<String>),
79 InvalidDate(String),
81 MissingTimezone(String),
83 InvalidTimezone(String),
85 ParseError(String),
87}
88
89impl Default for ParserConfig {
90 fn default() -> Self {
91 Self {
92 prefer_dmy: false,
93 default_year: None,
94 strict_mode: false,
95 fuzzy_parsing: true,
96 custom_patterns: Vec::new(),
97 timezone_strategy: TimezoneStrategy::AssumeUtc,
98 }
99 }
100}
101
102lazy_static! {
104 static ref DATE_PATTERNS: Vec<DatePattern> = vec![
105 DatePattern::new(
107 r"(\d{4})-(\d{1,2})-(\d{1,2})T(\d{1,2}):(\d{1,2}):(\d{1,2})(?:\.(\d+))?(?:Z|([+-]\d{2}):?(\d{2}))?",
108 "ISO 8601 DateTime",
109 1.0,
110 ParseMethod::Iso8601
111 ),
112 DatePattern::new(
113 r"(\d{4})-(\d{1,2})-(\d{1,2})",
114 "ISO 8601 Date",
115 0.95,
116 ParseMethod::Iso8601
117 ),
118
119 DatePattern::new(
121 r"(?:Mon|Tue|Wed|Thu|Fri|Sat|Sun),?\s*(\d{1,2})\s+(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+(\d{4})\s+(\d{1,2}):(\d{1,2}):(\d{1,2})\s*([+-]\d{4}|\w+)",
122 "RFC 2822",
123 0.9,
124 ParseMethod::Rfc2822
125 ),
126
127 DatePattern::new(
129 r"(\d{1,2})/(\d{1,2})/(\d{4})",
130 "US Format MM/DD/YYYY",
131 0.7,
132 ParseMethod::UsFormat
133 ),
134
135 DatePattern::new(
137 r"(\d{1,2})/(\d{1,2})/(\d{4})",
138 "European Format DD/MM/YYYY",
139 0.7,
140 ParseMethod::EuropeanFormat
141 ),
142
143 DatePattern::new(
145 r"(January|February|March|April|May|June|July|August|September|October|November|December)\s+(\d{1,2}),?\s+(\d{4})",
146 "Long Format Month DD, YYYY",
147 0.85,
148 ParseMethod::LongFormat
149 ),
150
151 DatePattern::new(
153 r"^(\d{10})$",
154 "Unix Timestamp",
155 0.8,
156 ParseMethod::UnixTimestamp
157 ),
158
159 DatePattern::new(
161 r"^(\d{13})$",
162 "Unix Timestamp (ms)",
163 0.8,
164 ParseMethod::UnixTimestampMs
165 ),
166
167 DatePattern::new(
169 r"(yesterday|today|tomorrow)",
170 "Relative Date",
171 0.9,
172 ParseMethod::Relative
173 ),
174
175 DatePattern::new(
177 r"(\d{1,2})[-/\.](\d{1,2})[-/\.](\d{2,4})",
178 "Flexible Separator Format",
179 0.6,
180 ParseMethod::FlexibleSeparator
181 ),
182 ];
183}
184
185#[derive(Debug, Clone)]
186struct DatePattern {
187 regex: Regex,
188 name: String,
189 confidence: f64,
190 method: ParseMethod,
191}
192
193#[derive(Debug, Clone, PartialEq)]
194enum ParseMethod {
195 Iso8601,
196 Rfc2822,
197 UsFormat,
198 EuropeanFormat,
199 LongFormat,
200 UnixTimestamp,
201 UnixTimestampMs,
202 Relative,
203 FlexibleSeparator,
204}
205
206impl DatePattern {
207 fn new(pattern: &str, name: &str, confidence: f64, method: ParseMethod) -> Self {
208 Self {
209 regex: Regex::new(pattern).unwrap(),
210 name: name.to_string(),
211 confidence,
212 method,
213 }
214 }
215}
216
217pub struct UniversalDateParser {
219 config: ParserConfig,
220}
221
222impl UniversalDateParser {
223 pub fn new() -> Self {
225 Self {
226 config: ParserConfig::default(),
227 }
228 }
229
230 pub fn with_config(config: ParserConfig) -> Self {
232 Self { config }
233 }
234
235 pub fn parse(&self, input: &str) -> Result<ParsedDate, ParseError> {
237 let input = input.trim();
238
239 if input.is_empty() {
240 return Err(ParseError::ParseError("Empty input".to_string()));
241 }
242
243 for pattern in &self.config.custom_patterns {
245 if let Ok(result) = self.try_custom_pattern(input, pattern) {
246 return Ok(result);
247 }
248 }
249
250 for pattern in DATE_PATTERNS.iter() {
252 if let Some(captures) = pattern.regex.captures(input) {
253 match self.parse_with_pattern(input, pattern, &captures) {
254 Ok(mut result) => {
255 if self.config.strict_mode && result.metadata.is_ambiguous {
257 return Err(ParseError::AmbiguousDate(
258 input.to_string(),
259 result.metadata.alternatives.iter()
260 .map(|dt| dt.to_rfc3339())
261 .collect()
262 ));
263 }
264
265 result.original_input = input.to_string();
266 return Ok(result);
267 }
268 Err(_) => continue,
269 }
270 }
271 }
272
273 if self.config.fuzzy_parsing {
275 if let Ok(result) = self.fuzzy_parse(input) {
276 return Ok(result);
277 }
278 }
279
280 Err(ParseError::UnrecognizedFormat(input.to_string()))
281 }
282
283 pub fn parse_all_possibilities(&self, input: &str) -> Vec<Result<ParsedDate, ParseError>> {
285 let mut results = Vec::new();
286 let input = input.trim();
287
288 for pattern in DATE_PATTERNS.iter() {
289 if let Some(captures) = pattern.regex.captures(input) {
290 let result = self.parse_with_pattern(input, pattern, &captures);
291 results.push(result);
292 }
293 }
294
295 results
296 }
297
298 fn parse_with_pattern(
299 &self,
300 input: &str,
301 pattern: &DatePattern,
302 captures: ®ex::Captures,
303 ) -> Result<ParsedDate, ParseError> {
304 match pattern.method {
305 ParseMethod::Iso8601 => self.parse_iso8601(input, captures, pattern),
306 ParseMethod::Rfc2822 => self.parse_rfc2822(input, captures, pattern),
307 ParseMethod::UsFormat => self.parse_us_format(input, captures, pattern),
308 ParseMethod::EuropeanFormat => self.parse_european_format(input, captures, pattern),
309 ParseMethod::LongFormat => self.parse_long_format(input, captures, pattern),
310 ParseMethod::UnixTimestamp => self.parse_unix_timestamp(input, captures, pattern),
311 ParseMethod::UnixTimestampMs => self.parse_unix_timestamp_ms(input, captures, pattern),
312 ParseMethod::Relative => self.parse_relative(input, captures, pattern),
313 ParseMethod::FlexibleSeparator => self.parse_flexible_separator(input, captures, pattern),
314 }
315 }
316
317 fn parse_iso8601(
318 &self,
319 input: &str,
320 captures: ®ex::Captures,
321 pattern: &DatePattern,
322 ) -> Result<ParsedDate, ParseError> {
323 let year: i32 = captures.get(1).unwrap().as_str().parse()
324 .map_err(|_| ParseError::InvalidDate("Invalid year".to_string()))?;
325 let month: u32 = captures.get(2).unwrap().as_str().parse()
326 .map_err(|_| ParseError::InvalidDate("Invalid month".to_string()))?;
327 let day: u32 = captures.get(3).unwrap().as_str().parse()
328 .map_err(|_| ParseError::InvalidDate("Invalid day".to_string()))?;
329
330 if captures.get(4).is_some() {
331 let hour: u32 = captures.get(4).unwrap().as_str().parse()
333 .map_err(|_| ParseError::InvalidDate("Invalid hour".to_string()))?;
334 let minute: u32 = captures.get(5).unwrap().as_str().parse()
335 .map_err(|_| ParseError::InvalidDate("Invalid minute".to_string()))?;
336 let second: u32 = captures.get(6).unwrap().as_str().parse()
337 .map_err(|_| ParseError::InvalidDate("Invalid second".to_string()))?;
338
339 let naive_dt = NaiveDate::from_ymd_opt(year, month, day)
340 .and_then(|d| d.and_hms_opt(hour, minute, second))
341 .ok_or_else(|| ParseError::InvalidDate("Invalid date components".to_string()))?;
342
343 let datetime = if let Some(tz_match) = captures.get(8) {
345 let tz_str = tz_match.as_str();
347 let offset = parse_timezone_offset(tz_str)?;
348 offset.from_local_datetime(&naive_dt)
349 .single()
350 .ok_or_else(|| ParseError::InvalidTimezone("Ambiguous local time".to_string()))?
351 .with_timezone(&Utc)
352 } else if input.ends_with('Z') {
353 Utc.from_local_datetime(&naive_dt)
355 .single()
356 .ok_or_else(|| ParseError::InvalidDate("Invalid UTC time".to_string()))?
357 } else {
358 self.apply_timezone_strategy(naive_dt)?
360 };
361
362 Ok(ParsedDate {
363 datetime,
364 detected_format: pattern.name.clone(),
365 confidence: pattern.confidence,
366 original_input: input.to_string(),
367 timezone: if input.ends_with('Z') { Some("UTC".to_string()) } else { None },
368 metadata: ParseMetadata {
369 is_ambiguous: false,
370 alternatives: Vec::new(),
371 inferred_components: Vec::new(),
372 parsing_method: "ISO8601".to_string(),
373 },
374 })
375 } else {
376 let naive_date = NaiveDate::from_ymd_opt(year, month, day)
378 .ok_or_else(|| ParseError::InvalidDate("Invalid date components".to_string()))?;
379 let naive_dt = naive_date.and_hms_opt(0, 0, 0).unwrap();
380 let datetime = self.apply_timezone_strategy(naive_dt)?;
381
382 Ok(ParsedDate {
383 datetime,
384 detected_format: pattern.name.clone(),
385 confidence: pattern.confidence,
386 original_input: input.to_string(),
387 timezone: None,
388 metadata: ParseMetadata {
389 is_ambiguous: false,
390 alternatives: Vec::new(),
391 inferred_components: vec!["time".to_string()],
392 parsing_method: "ISO8601".to_string(),
393 },
394 })
395 }
396 }
397
398 fn parse_us_format(
399 &self,
400 input: &str,
401 captures: ®ex::Captures,
402 pattern: &DatePattern,
403 ) -> Result<ParsedDate, ParseError> {
404 let month: u32 = captures.get(1).unwrap().as_str().parse()
405 .map_err(|_| ParseError::InvalidDate("Invalid month".to_string()))?;
406 let day: u32 = captures.get(2).unwrap().as_str().parse()
407 .map_err(|_| ParseError::InvalidDate("Invalid day".to_string()))?;
408 let year: i32 = captures.get(3).unwrap().as_str().parse()
409 .map_err(|_| ParseError::InvalidDate("Invalid year".to_string()))?;
410
411 self.create_date_result(year, month, day, pattern, "US Format", input)
412 }
413
414 fn parse_european_format(
415 &self,
416 input: &str,
417 captures: ®ex::Captures,
418 pattern: &DatePattern,
419 ) -> Result<ParsedDate, ParseError> {
420 let day: u32 = captures.get(1).unwrap().as_str().parse()
421 .map_err(|_| ParseError::InvalidDate("Invalid day".to_string()))?;
422 let month: u32 = captures.get(2).unwrap().as_str().parse()
423 .map_err(|_| ParseError::InvalidDate("Invalid month".to_string()))?;
424 let year: i32 = captures.get(3).unwrap().as_str().parse()
425 .map_err(|_| ParseError::InvalidDate("Invalid year".to_string()))?;
426
427 self.create_date_result(year, month, day, pattern, "European Format", input)
428 }
429
430 fn parse_unix_timestamp(
431 &self,
432 input: &str,
433 captures: ®ex::Captures,
434 pattern: &DatePattern,
435 ) -> Result<ParsedDate, ParseError> {
436 let timestamp: i64 = captures.get(1).unwrap().as_str().parse()
437 .map_err(|_| ParseError::InvalidDate("Invalid timestamp".to_string()))?;
438
439 let datetime = DateTime::from_timestamp(timestamp, 0)
440 .ok_or_else(|| ParseError::InvalidDate("Invalid timestamp value".to_string()))?;
441
442 Ok(ParsedDate {
443 datetime,
444 detected_format: pattern.name.clone(),
445 confidence: pattern.confidence,
446 original_input: input.to_string(),
447 timezone: Some("UTC".to_string()),
448 metadata: ParseMetadata {
449 is_ambiguous: false,
450 alternatives: Vec::new(),
451 inferred_components: Vec::new(),
452 parsing_method: "Unix Timestamp".to_string(),
453 },
454 })
455 }
456
457 fn parse_unix_timestamp_ms(
458 &self,
459 input: &str,
460 captures: ®ex::Captures,
461 pattern: &DatePattern,
462 ) -> Result<ParsedDate, ParseError> {
463 let timestamp_ms: i64 = captures.get(1).unwrap().as_str().parse()
464 .map_err(|_| ParseError::InvalidDate("Invalid timestamp".to_string()))?;
465
466 let secs = timestamp_ms / 1000;
467 let nsecs = ((timestamp_ms % 1000) * 1_000_000) as u32;
468
469 let datetime = DateTime::from_timestamp(secs, nsecs)
470 .ok_or_else(|| ParseError::InvalidDate("Invalid timestamp value".to_string()))?;
471
472 Ok(ParsedDate {
473 datetime,
474 detected_format: pattern.name.clone(),
475 confidence: pattern.confidence,
476 original_input: input.to_string(),
477 timezone: Some("UTC".to_string()),
478 metadata: ParseMetadata {
479 is_ambiguous: false,
480 alternatives: Vec::new(),
481 inferred_components: Vec::new(),
482 parsing_method: "Unix Timestamp (ms)".to_string(),
483 },
484 })
485 }
486
487 fn create_date_result(
488 &self,
489 year: i32,
490 month: u32,
491 day: u32,
492 pattern: &DatePattern,
493 method: &str,
494 input: &str,
495 ) -> Result<ParsedDate, ParseError> {
496 let naive_date = NaiveDate::from_ymd_opt(year, month, day)
497 .ok_or_else(|| ParseError::InvalidDate("Invalid date components".to_string()))?;
498 let naive_dt = naive_date.and_hms_opt(0, 0, 0).unwrap();
499 let datetime = self.apply_timezone_strategy(naive_dt)?;
500
501 let is_ambiguous = day <= 12 && month <= 12 && day != month;
503 let mut alternatives = Vec::new();
504
505 if is_ambiguous && !self.config.prefer_dmy {
506 if let Some(alt_date) = NaiveDate::from_ymd_opt(year, day, month) {
508 if let Some(alt_dt) = alt_date.and_hms_opt(0, 0, 0) {
509 if let Ok(alt_datetime) = self.apply_timezone_strategy(alt_dt) {
510 alternatives.push(alt_datetime);
511 }
512 }
513 }
514 }
515
516 Ok(ParsedDate {
517 datetime,
518 detected_format: pattern.name.clone(),
519 confidence: if is_ambiguous { pattern.confidence * 0.8 } else { pattern.confidence },
520 original_input: input.to_string(),
521 timezone: None,
522 metadata: ParseMetadata {
523 is_ambiguous,
524 alternatives,
525 inferred_components: vec!["time".to_string()],
526 parsing_method: method.to_string(),
527 },
528 })
529 }
530
531 fn apply_timezone_strategy(&self, naive_dt: NaiveDateTime) -> Result<DateTime<Utc>, ParseError> {
532 match &self.config.timezone_strategy {
533 TimezoneStrategy::AssumeUtc => {
534 Ok(Utc.from_local_datetime(&naive_dt)
535 .single()
536 .ok_or_else(|| ParseError::InvalidDate("Invalid UTC time".to_string()))?)
537 }
538 TimezoneStrategy::UseDefault(offset) => {
539 Ok(offset.from_local_datetime(&naive_dt)
540 .single()
541 .ok_or_else(|| ParseError::InvalidTimezone("Ambiguous local time".to_string()))?
542 .with_timezone(&Utc))
543 }
544 TimezoneStrategy::RequireTimezone => {
545 Err(ParseError::MissingTimezone("Timezone required but not specified".to_string()))
546 }
547 TimezoneStrategy::AssumeLocal => {
548 Ok(Utc.from_local_datetime(&naive_dt)
551 .single()
552 .ok_or_else(|| ParseError::InvalidDate("Invalid local time".to_string()))?)
553 }
554 }
555 }
556
557 fn parse_rfc2822(&self, _input: &str, _captures: ®ex::Captures, _pattern: &DatePattern) -> Result<ParsedDate, ParseError> {
559 Err(ParseError::ParseError("RFC2822 parsing not implemented".to_string()))
560 }
561
562 fn parse_long_format(&self, _input: &str, _captures: ®ex::Captures, _pattern: &DatePattern) -> Result<ParsedDate, ParseError> {
563 Err(ParseError::ParseError("Long format parsing not implemented".to_string()))
564 }
565
566 fn parse_relative(&self, _input: &str, _captures: ®ex::Captures, _pattern: &DatePattern) -> Result<ParsedDate, ParseError> {
567 Err(ParseError::ParseError("Relative parsing not implemented".to_string()))
568 }
569
570 fn parse_flexible_separator(&self, _input: &str, _captures: ®ex::Captures, _pattern: &DatePattern) -> Result<ParsedDate, ParseError> {
571 Err(ParseError::ParseError("Flexible separator parsing not implemented".to_string()))
572 }
573
574 fn try_custom_pattern(&self, _input: &str, _pattern: &str) -> Result<ParsedDate, ParseError> {
575 Err(ParseError::ParseError("Custom pattern parsing not implemented".to_string()))
576 }
577
578 fn fuzzy_parse(&self, _input: &str) -> Result<ParsedDate, ParseError> {
579 Err(ParseError::ParseError("Fuzzy parsing not implemented".to_string()))
580 }
581}
582
583impl Default for UniversalDateParser {
584 fn default() -> Self {
585 Self::new()
586 }
587}
588
589fn parse_timezone_offset(tz_str: &str) -> Result<FixedOffset, ParseError> {
590 if tz_str.len() >= 5 {
591 let sign = if tz_str.starts_with('+') { 1 } else { -1 };
592 let hours: i32 = tz_str[1..3].parse()
593 .map_err(|_| ParseError::InvalidTimezone("Invalid timezone hours".to_string()))?;
594 let minutes: i32 = tz_str[3..5].parse()
595 .map_err(|_| ParseError::InvalidTimezone("Invalid timezone minutes".to_string()))?;
596
597 let total_seconds = sign * (hours * 3600 + minutes * 60);
598 FixedOffset::east_opt(total_seconds)
599 .ok_or_else(|| ParseError::InvalidTimezone("Invalid timezone offset".to_string()))
600 } else {
601 Err(ParseError::InvalidTimezone("Invalid timezone format".to_string()))
602 }
603}
604
605#[no_mangle]
607pub extern "C" fn parse_date_c(input: *const std::os::raw::c_char) -> *mut std::os::raw::c_char {
608 if input.is_null() {
609 return std::ptr::null_mut();
610 }
611
612 let c_str = unsafe { std::ffi::CStr::from_ptr(input) };
613 let input_str = match c_str.to_str() {
614 Ok(s) => s,
615 Err(_) => return std::ptr::null_mut(),
616 };
617
618 let parser = UniversalDateParser::new();
619 match parser.parse(input_str) {
620 Ok(result) => {
621 let json = serde_json::to_string(&result).unwrap_or_else(|_| "{}".to_string());
622 let c_string = std::ffi::CString::new(json).unwrap_or_else(|_| std::ffi::CString::new("{}").unwrap());
623 c_string.into_raw()
624 }
625 Err(_) => std::ptr::null_mut(),
626 }
627}
628
629#[no_mangle]
630pub extern "C" fn free_string_c(ptr: *mut std::os::raw::c_char) {
631 if !ptr.is_null() {
632 unsafe { let _ = std::ffi::CString::from_raw(ptr); }
633 }
634}
635
636#[wasm_bindgen]
638pub fn parse_date_wasm(input: &str) -> String {
639 let parser = UniversalDateParser::new();
640 match parser.parse(input) {
641 Ok(result) => serde_json::to_string(&result).unwrap_or_else(|_| "{}".to_string()),
642 Err(err) => serde_json::to_string(&err).unwrap_or_else(|_| r#"{"error":"Unknown error"}"#.to_string()),
643 }
644}
645
646#[wasm_bindgen]
647pub fn parse_all_possibilities_wasm(input: &str) -> String {
648 let parser = UniversalDateParser::new();
649 let results = parser.parse_all_possibilities(input);
650 serde_json::to_string(&results).unwrap_or_else(|_| "[]".to_string())
651}
652
653#[cfg(test)]
654mod tests {
655 use super::*;
656
657 #[test]
658 fn test_iso8601_parsing() {
659 let parser = UniversalDateParser::new();
660
661 let result = parser.parse("2023-12-25T15:30:45Z").unwrap();
662 assert_eq!(result.detected_format, "ISO 8601 DateTime");
663 assert!(result.confidence > 0.9);
664
665 let result = parser.parse("2023-12-25").unwrap();
666 assert_eq!(result.detected_format, "ISO 8601 Date");
667 }
668
669 #[test]
670 fn test_us_format_parsing() {
671 let parser = UniversalDateParser::new();
672
673 let result = parser.parse("12/25/2023").unwrap();
674 assert_eq!(result.detected_format, "US Format MM/DD/YYYY");
675 assert_eq!(result.datetime.month(), 12);
676 assert_eq!(result.datetime.day(), 25);
677 }
678
679 #[test]
680 fn test_unix_timestamp_parsing() {
681 let parser = UniversalDateParser::new();
682
683 let result = parser.parse("1703520645").unwrap();
684 assert_eq!(result.detected_format, "Unix Timestamp");
685
686 let result = parser.parse("1703520645000").unwrap();
687 assert_eq!(result.detected_format, "Unix Timestamp (ms)");
688 }
689
690 #[test]
691 fn test_ambiguous_dates() {
692 let parser = UniversalDateParser::new();
693
694 let result = parser.parse("01/02/2023").unwrap();
695 assert!(result.metadata.is_ambiguous);
696 assert!(!result.metadata.alternatives.is_empty());
697 }
698
699 #[test]
700 fn test_invalid_dates() {
701 let parser = UniversalDateParser::new();
702
703 assert!(parser.parse("").is_err());
704 assert!(parser.parse("not a date").is_err());
705 assert!(parser.parse("13/32/2023").is_err());
706 }
707
708 #[test]
709 fn test_strict_mode() {
710 let config = ParserConfig {
711 strict_mode: true,
712 ..Default::default()
713 };
714 let parser = UniversalDateParser::with_config(config);
715
716 assert!(parser.parse("01/02/2023").is_err());
718 }
719}