1use crate::datetime::RosettaDateTime;
9use crate::error::{Result, RosettaError};
10use crate::timezone::{self, TzOffset};
11
12pub(crate) fn try_parse(input: &str, default_tz: &TzOffset) -> Result<RosettaDateTime> {
15 let s = input.trim();
16
17 if let Some(dt) = try_unix_timestamp(s) {
19 return Ok(dt);
20 }
21
22 if let Some(dt) = try_chinese_format(s, default_tz) {
24 return Ok(dt);
25 }
26
27 if let Some(dt) = try_mysql_log(s, default_tz) {
29 return Ok(dt);
30 }
31
32 if let Some(dt) = try_normalized(s, default_tz) {
38 return Ok(dt);
39 }
40
41 Err(RosettaError::ParseError(
42 "No dateparser format matched".into(),
43 ))
44}
45
46fn try_unix_timestamp(input: &str) -> Option<RosettaDateTime> {
49 if input.is_empty() || !input.bytes().all(|b| b.is_ascii_digit()) {
52 return None;
53 }
54
55 let len = input.len();
56 let val: i64 = input.parse().ok()?;
57
58 let seconds = if len <= 10 {
59 val
61 } else if len <= 13 {
62 val / 1_000
64 } else if len <= 19 {
65 val / 1_000_000_000
67 } else {
68 return None;
69 };
70
71 let base = RosettaDateTime::from_components(1970, 1, 1, 0, 0, 0, TzOffset::UTC).ok()?;
72 Some(base.add_seconds(seconds))
73}
74
75fn try_chinese_format(input: &str, default_tz: &TzOffset) -> Option<RosettaDateTime> {
78 if !input.contains('年') {
79 return None;
80 }
81 let s = input
83 .replace(['年', '月'], "-")
84 .replace('日', " ")
85 .replace(['时', '分'], ":")
86 .replace('秒', "");
87
88 parse_ymd_hms(s.trim(), default_tz)
89}
90
91fn try_mysql_log(input: &str, default_tz: &TzOffset) -> Option<RosettaDateTime> {
94 let parts: Vec<&str> = input.splitn(2, ' ').collect();
96 if parts.len() != 2 {
97 return None;
98 }
99 let date_part = parts[0];
100 let time_part = parts[1];
101
102 if date_part.len() != 6 || !date_part.bytes().all(|b| b.is_ascii_digit()) {
103 return None;
104 }
105
106 let yy: i32 = date_part[..2].parse().ok()?;
107 let mm: u8 = date_part[2..4].parse().ok()?;
108 let dd: u8 = date_part[4..6].parse().ok()?;
109 let year = if yy < 70 { 2000 + yy } else { 1900 + yy };
110
111 let (h, min, s) = parse_time_str(time_part)?;
112 RosettaDateTime::from_components(year, mm, dd, h, min, s, *default_tz).ok()
113}
114
115fn try_normalized(input: &str, default_tz: &TzOffset) -> Option<RosettaDateTime> {
118 let postgres_split = split_postgres_offset(input);
122 let (input_for_tz, extra_offset): (&str, Option<TzOffset>) = match &postgres_split {
123 Some((body_str, off)) => (body_str.as_str(), Some(*off)),
124 None => (input, None),
125 };
126
127 let (body, offset) = if let Some(off) = extra_offset {
129 (input_for_tz, off)
130 } else {
131 timezone::extract_trailing_timezone(input_for_tz).unwrap_or((input_for_tz, *default_tz))
132 };
133 let body = body.trim();
134
135 let first_tok: &str = body.split_whitespace().next().unwrap_or("");
137 if month_from_name(first_tok).is_some() {
138 return try_month_name_first(body, &offset);
139 }
140
141 let toks: Vec<&str> = body.split_whitespace().collect();
144 if toks.len() >= 2 {
145 let maybe_day: bool = toks[0].parse::<u8>().is_ok();
146 let maybe_month: bool = month_from_name(toks[1]).is_some();
147 if maybe_day && maybe_month {
148 return try_day_month_year(body, &offset);
149 }
150 }
151
152 if looks_like_time_only(body) {
154 return try_time_only(body, default_tz, &offset);
155 }
156
157 let normalised = normalise_separators(body);
159 parse_ymd_hms(normalised.trim(), &offset)
160}
161
162fn try_month_name_first(body: &str, offset: &TzOffset) -> Option<RosettaDateTime> {
168 let cleaned = body.replace(',', " ").replace(" at ", " ").to_lowercase();
170 let toks: Vec<&str> = cleaned.split_whitespace().collect();
171 if toks.is_empty() {
172 return None;
173 }
174
175 let month = month_from_name(toks[0])?;
176
177 match toks.len() {
178 n if n >= 2 => {
180 let day_str = strip_ordinal(toks[1]);
182 if let Ok(day) = day_str.parse::<u8>() {
183 if n == 2 {
185 let year = default_year();
188 return RosettaDateTime::from_components(year, month, day, 0, 0, 0, *offset)
189 .ok();
190 }
191 let third = toks[2];
194 if let Ok(year) = expand_year(third) {
195 let (h, m, s, _ampm) = parse_time_tokens(&toks[3..]);
197 return RosettaDateTime::from_components(year, month, day, h, m, s, *offset)
198 .ok();
199 } else if third.contains(':') || third.ends_with("am") || third.ends_with("pm") {
200 let (hr, m, s, _am_pm) = parse_time_tokens(&toks[2..]);
202 let year = default_year();
203 return RosettaDateTime::from_components(year, month, day, hr, m, s, *offset)
204 .ok();
205 }
206 }
207 if let Some(day_val) = toks
209 .get(1)
210 .and_then(|t| strip_ordinal(t).parse::<u8>().ok())
211 && let Some(yr_str) = toks.get(2)
212 && let Ok(yr) = expand_year(yr_str)
213 {
214 return RosettaDateTime::from_components(yr, month, day_val, 0, 0, 0, *offset).ok();
215 }
216 None
217 }
218 _ => None,
219 }
220}
221
222fn try_day_month_year(body: &str, offset: &TzOffset) -> Option<RosettaDateTime> {
224 let cleaned = body.replace(',', " ").to_lowercase();
225 let toks: Vec<&str> = cleaned.split_whitespace().collect();
226
227 let day: u8 = toks.first()?.parse().ok()?;
228 let month = month_from_name(toks.get(1)?)?;
229 let year = expand_year(toks.get(2)?).ok()?;
230
231 let (h, m, s, _) = parse_time_tokens(toks.get(3..).unwrap_or(&[]));
232 RosettaDateTime::from_components(year, month, day, h, m, s, *offset).ok()
233}
234
235fn looks_like_time_only(s: &str) -> bool {
238 let first = s.split_whitespace().next().unwrap_or(s);
240 let lower = first.to_lowercase();
241 let base = lower.trim_end_matches("am").trim_end_matches("pm");
242 base.contains(':')
243 && !base.contains('-')
244 && !base.contains('/')
245 && base
246 .split(':')
247 .next()
248 .map(|p| p.len() <= 2)
249 .unwrap_or(false)
250 && s.split_whitespace()
251 .next()
252 .map(|p| {
253 p.chars()
254 .next()
255 .map(|c| c.is_ascii_digit())
256 .unwrap_or(false)
257 })
258 .unwrap_or(false)
259}
260
261fn try_time_only(body: &str, _default_tz: &TzOffset, offset: &TzOffset) -> Option<RosettaDateTime> {
262 let toks: Vec<&str> = body.split_whitespace().collect();
263 let (h, m, s, _) = parse_time_tokens(&toks);
264 let today = RosettaDateTime::now_utc();
265 RosettaDateTime::from_components(today.year(), today.month(), today.day(), h, m, s, *offset)
266 .ok()
267}
268
269fn parse_ymd_hms(s: &str, offset: &TzOffset) -> Option<RosettaDateTime> {
274 let mut parts = s.splitn(3, ' ');
276 let date_str = parts.next()?;
277 let time_str = parts.next(); let date_segs: Vec<&str> = date_str.split('-').collect();
281
282 let (year, month, day) = match date_segs.len() {
283 3 => {
284 let (y, m, d) = parse_date_segments(&date_segs)?;
285 (y, m, d)
286 }
287 2 => {
288 let y: i32 = date_segs[0].parse().ok()?;
290 let m: u8 = date_segs[1].parse().ok()?;
291 (y, m, 1u8)
292 }
293 _ => return None,
294 };
295
296 let (h, min, s) = if let Some(ts) = time_str {
297 parse_time_str_with_ampm(ts)?
298 } else {
299 (0, 0, 0)
300 };
301
302 RosettaDateTime::from_components(year, month, day, h, min, s, *offset).ok()
303}
304
305fn parse_date_segments(segs: &[&str]) -> Option<(i32, u8, u8)> {
307 debug_assert_eq!(segs.len(), 3);
308 let a = segs[0];
309 let b = segs[1];
310 let c = segs[2];
311
312 if let Some(m) = month_from_name(b) {
314 let y: i32 = expand_year(a).ok()?;
315 let d: u8 = c.parse().ok()?;
316 return Some((y, m, d));
317 }
318 if let Some(m) = month_from_name(a) {
319 let d: u8 = b.parse().ok()?;
321 let y: i32 = expand_year(c).ok()?;
322 return Some((y, m, d));
323 }
324
325 let ai: i32 = a.parse().ok()?;
326 let bi: i32 = b.parse().ok()?;
327 let ci: i32 = c.parse().ok()?;
328
329 if ai > 31 {
331 return Some((ai, bi as u8, ci as u8));
332 }
333 if ci > 31 && ai <= 12 {
335 return Some((expand_year_i32(ci), ai as u8, bi as u8));
336 }
337 if ci > 31 && ai > 12 {
339 return Some((expand_year_i32(ci), bi as u8, ai as u8));
340 }
341 if ci <= 99 && ai <= 12 {
343 return Some((expand_year_i32(ci), ai as u8, bi as u8));
344 }
345
346 None
347}
348
349fn normalise_separators(s: &str) -> String {
352 if let Some(sp) = s.find(' ') {
355 let date_part: String = s[..sp]
356 .chars()
357 .map(|c| if c == '/' || c == '.' { '-' } else { c })
358 .collect();
359 format!("{}{}", date_part, &s[sp..])
360 } else {
361 s.chars()
362 .map(|c| if c == '/' || c == '.' { '-' } else { c })
363 .collect()
364 }
365}
366
367fn parse_time_str(ts: &str) -> Option<(u8, u8, u8)> {
371 let lower = ts.to_lowercase();
372 let (s_stripped, is_pm) = if lower.ends_with("pm") {
373 (&ts[..ts.len() - 2].trim_end(), true)
374 } else if lower.ends_with("am") {
375 (&ts[..ts.len() - 2].trim_end(), false)
376 } else {
377 (&ts, false)
378 };
379
380 let mut segs = s_stripped.splitn(3, ':');
381 let h_str = segs.next()?;
382 let m_str = segs.next().unwrap_or("0");
383 let s_str = segs.next().unwrap_or("0");
384
385 let mut h: u8 = h_str.trim().parse().ok()?;
386 let m: u8 = m_str.trim().parse().ok()?;
387 let s_base = s_str.split('.').next().unwrap_or("0");
389 let s: u8 = s_base.trim().parse().ok()?;
390
391 if is_pm && h != 12 {
392 h = h.saturating_add(12);
393 } else if !is_pm && h == 12 {
394 h = 0;
395 }
396
397 if h > 23 {
398 return None;
399 }
400 Some((h, m, s))
401}
402
403fn parse_time_str_with_ampm(ts: &str) -> Option<(u8, u8, u8)> {
404 let toks: Vec<&str> = ts.split_whitespace().collect();
408 let unified = toks.join("");
409 parse_time_str(&unified)
410}
411
412fn parse_time_tokens(toks: &[&str]) -> (u8, u8, u8, bool) {
415 if toks.is_empty() {
416 return (0, 0, 0, false);
417 }
418 let joined = toks.join(" ");
419 if let Some((h, m, s)) = parse_time_str(&joined) {
420 let lower = joined.to_lowercase();
421 let has = lower.contains("pm") || lower.contains("am");
422 (h, m, s, has)
423 } else {
424 (0, 0, 0, false)
425 }
426}
427
428fn month_from_name(s: &str) -> Option<u8> {
431 let s = s.trim_end_matches('.').to_lowercase();
432 match s.as_str() {
433 "jan" | "january" => Some(1),
434 "feb" | "february" => Some(2),
435 "mar" | "march" => Some(3),
436 "apr" | "april" => Some(4),
437 "may" => Some(5),
438 "jun" | "june" => Some(6),
439 "jul" | "july" => Some(7),
440 "aug" | "august" => Some(8),
441 "sep" | "sept" | "september" => Some(9),
442 "oct" | "october" => Some(10),
443 "nov" | "november" => Some(11),
444 "dec" | "december" => Some(12),
445 _ => None,
446 }
447}
448
449fn expand_year(s: &str) -> std::result::Result<i32, ()> {
452 let n: i32 = s.parse().map_err(|_| ())?;
453 Ok(expand_year_i32(n))
454}
455
456fn expand_year_i32(n: i32) -> i32 {
457 match n {
458 0..=69 => 2000 + n,
459 70..=99 => 1900 + n,
460 _ => n,
461 }
462}
463
464fn default_year() -> i32 {
465 RosettaDateTime::now_utc().year()
467}
468
469fn strip_ordinal(s: &str) -> &str {
470 s.trim_end_matches("st")
471 .trim_end_matches("nd")
472 .trim_end_matches("rd")
473 .trim_end_matches("th")
474}
475
476fn split_postgres_offset(input: &str) -> Option<(String, TzOffset)> {
481 let parts: Vec<&str> = input.splitn(2, ' ').collect();
487 if parts.len() != 2 {
488 return None;
489 }
490 let date_part = parts[0];
491 let time_and_offset = parts[1];
492
493 if !date_part.contains('-') {
495 return None;
496 }
497
498 let bytes = time_and_offset.as_bytes();
502 let mut split_pos: Option<usize> = None;
503 for i in 1..bytes.len() {
504 let b = bytes[i];
505 if (b == b'+' || b == b'-') && bytes[i - 1].is_ascii_digit() {
506 split_pos = Some(i);
507 break;
508 }
509 }
510
511 let pos = split_pos?;
512 let time_clean = &time_and_offset[..pos];
513 let offset_str = &time_and_offset[pos..];
514
515 let offset = crate::timezone::parse_timezone(offset_str).ok()?;
516 Some((format!("{} {}", date_part, time_clean), offset))
517}