use once_cell::sync::Lazy;
use pyo3::prelude::*;
use pyo3::IntoPyObjectExt;
use regex::Regex;
use std::collections::HashMap;
pub(crate) static RE_TZ_COLON: Lazy<Regex> =
Lazy::new(|| Regex::new(r"([+-])(\d{1,2}):?(\d{2})").unwrap());
pub(crate) static RE_TZ_4DIGIT: Lazy<Regex> = Lazy::new(|| Regex::new(r"([+-])(\d{4})").unwrap());
pub(crate) static RE_TZ_IN_STRING: Lazy<Regex> =
Lazy::new(|| Regex::new(r"\s+([+-]\d{2}:?\d{2})$").unwrap());
pub(crate) static RE_TZ_IN_STRING_EXTENDED: Lazy<Regex> =
Lazy::new(|| Regex::new(r"\s+([+-]\d{2}:?\d{2,4})$").unwrap());
pub(crate) static RE_TIME_24H: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(\d{1,2}):(\d{2})(?::(\d{2}))?").unwrap());
pub fn get_month_map() -> HashMap<&'static str, u8> {
[
("Jan", 1),
("Feb", 2),
("Mar", 3),
("Apr", 4),
("May", 5),
("Jun", 6),
("Jul", 7),
("Aug", 8),
("Sep", 9),
("Oct", 10),
("Nov", 11),
("Dec", 12),
("January", 1),
("February", 2),
("March", 3),
("April", 4),
("June", 6),
("July", 7),
("August", 8),
("September", 9),
("October", 10),
("November", 11),
("December", 12),
]
.iter()
.cloned()
.collect()
}
pub fn get_abbreviated_month_map() -> HashMap<&'static str, u8> {
[
("Jan", 1),
("Feb", 2),
("Mar", 3),
("Apr", 4),
("May", 5),
("Jun", 6),
("Jul", 7),
("Aug", 8),
("Sep", 9),
("Oct", 10),
("Nov", 11),
("Dec", 12),
]
.iter()
.cloned()
.collect()
}
pub fn create_fixed_tz(py: Python, offset_minutes: i32, name: &str) -> PyResult<PyObject> {
let fixed_tz_module = py.import("formatparse")?;
let fixed_tz_class = fixed_tz_module.getattr("FixedTzOffset")?;
let tz = fixed_tz_class.call1((offset_minutes, name.to_string()))?;
tz.into_py_any(py)
}
pub fn parse_timezone(py: Python, tz_str: &str) -> PyResult<PyObject> {
if let Some(caps) = RE_TZ_COLON.captures(tz_str) {
if let (Some(sign_match), Some(hour_match), Some(min_match)) =
(caps.get(1), caps.get(2), caps.get(3))
{
let sign = if sign_match.as_str() == "+" { 1 } else { -1 };
let hour: i32 = hour_match
.as_str()
.parse()
.map_err(|_| PyErr::new::<pyo3::exceptions::PyValueError, _>("Invalid timezone"))?;
let min: i32 = min_match.as_str().parse().unwrap_or(0);
let offset_minutes = sign * (hour * 60 + min);
return create_fixed_tz(py, offset_minutes, "");
}
}
if let Some(caps) = RE_TZ_4DIGIT.captures(tz_str) {
if let (Some(sign_match), Some(tz_match)) = (caps.get(1), caps.get(2)) {
let sign = if sign_match.as_str() == "+" { 1 } else { -1 };
let tz_str = tz_match.as_str();
if tz_str.len() >= 4 {
let hour: i32 = tz_str[..2].parse().map_err(|_| {
PyErr::new::<pyo3::exceptions::PyValueError, _>("Invalid timezone")
})?;
let min: i32 = tz_str[2..4].parse().map_err(|_| {
PyErr::new::<pyo3::exceptions::PyValueError, _>("Invalid timezone")
})?;
let offset_minutes = sign * (hour * 60 + min);
return create_fixed_tz(py, offset_minutes, "");
}
}
}
Ok(py.None())
}
pub fn parse_time_with_ampm(time_str: &str) -> Result<(u8, u8, u8), PyErr> {
let mut hour = 0u8;
let mut minute = 0u8;
let mut second = 0u8;
if let Some(ampm_idx) = time_str.to_uppercase().find("AM") {
let time_part = &time_str[..ampm_idx].trim();
if let Some(caps) = RE_TIME_24H.captures(time_part) {
hour = caps
.get(1)
.unwrap()
.as_str()
.parse()
.map_err(|_| PyErr::new::<pyo3::exceptions::PyValueError, _>("Invalid hour"))?;
minute =
caps.get(2).unwrap().as_str().parse().map_err(|_| {
PyErr::new::<pyo3::exceptions::PyValueError, _>("Invalid minute")
})?;
second = caps
.get(3)
.map(|m| m.as_str().parse().unwrap_or(0))
.unwrap_or(0);
if hour == 12 {
hour = 0;
}
}
} else if let Some(pm_idx) = time_str.to_uppercase().find("PM") {
let time_part = &time_str[..pm_idx].trim();
if let Some(caps) = RE_TIME_24H.captures(time_part) {
hour = caps
.get(1)
.unwrap()
.as_str()
.parse()
.map_err(|_| PyErr::new::<pyo3::exceptions::PyValueError, _>("Invalid hour"))?;
minute =
caps.get(2).unwrap().as_str().parse().map_err(|_| {
PyErr::new::<pyo3::exceptions::PyValueError, _>("Invalid minute")
})?;
second = caps
.get(3)
.map(|m| m.as_str().parse().unwrap_or(0))
.unwrap_or(0);
if hour != 12 {
hour += 12;
}
}
} else {
if let Some(caps) = RE_TIME_24H.captures(time_str) {
hour = caps
.get(1)
.unwrap()
.as_str()
.parse()
.map_err(|_| PyErr::new::<pyo3::exceptions::PyValueError, _>("Invalid hour"))?;
minute =
caps.get(2).unwrap().as_str().parse().map_err(|_| {
PyErr::new::<pyo3::exceptions::PyValueError, _>("Invalid minute")
})?;
second = caps
.get(3)
.map(|m| m.as_str().parse().unwrap_or(0))
.unwrap_or(0);
}
}
Ok((hour, minute, second))
}
pub fn parse_microseconds(micros_str: &str) -> Result<u32, PyErr> {
formatparse_core::parse_microsecond_digits(micros_str)
.map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(e.to_string()))
}
pub fn extract_microseconds(cap: Option<regex::Match>) -> u32 {
cap.map(|m| parse_microseconds(m.as_str()).unwrap_or(0))
.unwrap_or(0)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_get_month_map() {
let map = get_month_map();
assert_eq!(map.get("Jan"), Some(&1));
assert_eq!(map.get("December"), Some(&12));
assert_eq!(map.get("February"), Some(&2));
}
#[test]
fn test_get_abbreviated_month_map() {
let map = get_abbreviated_month_map();
assert_eq!(map.get("Jan"), Some(&1));
assert_eq!(map.get("Dec"), Some(&12));
assert_eq!(map.get("June"), None); assert_eq!(map.get("January"), None); }
#[test]
fn test_parse_time_with_ampm_am() {
let result = parse_time_with_ampm("10:30:45 AM").unwrap();
assert_eq!(result, (10, 30, 45));
let result = parse_time_with_ampm("12:00:00 AM").unwrap(); assert_eq!(result, (0, 0, 0));
let result = parse_time_with_ampm("11:59:59 AM").unwrap();
assert_eq!(result, (11, 59, 59));
}
#[test]
fn test_parse_time_with_ampm_pm() {
let result = parse_time_with_ampm("10:30:45 PM").unwrap();
assert_eq!(result, (22, 30, 45));
let result = parse_time_with_ampm("12:00:00 PM").unwrap(); assert_eq!(result, (12, 0, 0));
let result = parse_time_with_ampm("1:00:00 PM").unwrap();
assert_eq!(result, (13, 0, 0)); }
#[test]
fn test_parse_time_with_ampm_24h() {
let result = parse_time_with_ampm("10:30:45").unwrap();
assert_eq!(result, (10, 30, 45));
let result = parse_time_with_ampm("23:59:59").unwrap();
assert_eq!(result, (23, 59, 59));
let result = parse_time_with_ampm("00:00:00").unwrap();
assert_eq!(result, (0, 0, 0));
}
#[test]
fn test_parse_time_with_ampm_no_seconds() {
let result = parse_time_with_ampm("10:30 AM").unwrap();
assert_eq!(result, (10, 30, 0));
let result = parse_time_with_ampm("10:30 PM").unwrap();
assert_eq!(result, (22, 30, 0));
}
#[test]
fn test_parse_microseconds() {
let result = parse_microseconds("123456").unwrap();
assert_eq!(result, 123456);
let result = parse_microseconds("123").unwrap();
assert_eq!(result, 123000);
let result = parse_microseconds("12").unwrap();
assert_eq!(result, 120000);
let result = parse_microseconds("1234567").unwrap();
assert_eq!(result, 123456); }
#[test]
fn test_extract_microseconds() {
use regex::Regex;
let re = Regex::new(r"(\d+)").unwrap();
let cap = re.captures("123456").and_then(|c| c.get(1));
let result = extract_microseconds(cap);
assert_eq!(result, 123456);
let cap = re.captures("123").and_then(|c| c.get(1));
let result = extract_microseconds(cap);
assert_eq!(result, 123000);
let result = extract_microseconds(None);
assert_eq!(result, 0);
}
#[test]
fn test_regex_tz_colon() {
assert!(RE_TZ_COLON.is_match("+10:30"));
assert!(RE_TZ_COLON.is_match("-05:00"));
assert!(RE_TZ_COLON.is_match("+1:00"));
}
#[test]
fn test_regex_tz_4digit() {
assert!(RE_TZ_4DIGIT.is_match("+1000"));
assert!(RE_TZ_4DIGIT.is_match("-0530"));
}
#[test]
fn test_regex_time_24h() {
assert!(RE_TIME_24H.is_match("10:30"));
assert!(RE_TIME_24H.is_match("10:30:45"));
assert!(RE_TIME_24H.is_match("23:59:59"));
}
}