pub mod infer;
use chrono::DateTime;
mod patterns;
mod strptime;
pub use patterns::Pattern;
#[cfg(feature = "dtype-time")]
use polars_core::chunked_array::temporal::time_to_time64ns;
use polars_core::prelude::arity::unary_elementwise;
use polars_utils::cache::LruCachedFunc;
use super::*;
#[cfg(feature = "dtype-date")]
use crate::chunkedarray::date::naive_date_to_date;
use crate::prelude::string::strptime::StrpTimeState;
#[cfg(feature = "dtype-time")]
fn time_pattern<F, K>(val: &str, convert: F) -> Option<&'static str>
where
F: Fn(&str, &str) -> chrono::ParseResult<K>,
{
patterns::TIME_H_M_S
.iter()
.chain(patterns::TIME_H_M_S)
.find(|fmt| convert(val, fmt).is_ok())
.copied()
}
fn datetime_pattern<F, K>(val: &str, convert: F) -> Option<&'static str>
where
F: Fn(&str, &str) -> chrono::ParseResult<K>,
{
patterns::DATETIME_Y_M_D
.iter()
.chain(patterns::DATETIME_D_M_Y)
.find(|fmt| convert(val, fmt).is_ok())
.copied()
}
fn date_pattern<F, K>(val: &str, convert: F) -> Option<&'static str>
where
F: Fn(&str, &str) -> chrono::ParseResult<K>,
{
patterns::DATE_Y_M_D
.iter()
.chain(patterns::DATE_D_M_Y)
.find(|fmt| convert(val, fmt).is_ok())
.copied()
}
#[cfg(feature = "dtype-datetime")]
fn sniff_fmt_datetime(val: &str) -> PolarsResult<&'static str> {
datetime_pattern(val, NaiveDateTime::parse_from_str)
.or_else(|| datetime_pattern(val, NaiveDate::parse_from_str))
.ok_or_else(|| polars_err!(parse_fmt_idk = "datetime"))
}
#[cfg(feature = "dtype-date")]
fn sniff_fmt_date(val: &str) -> PolarsResult<&'static str> {
date_pattern(val, NaiveDate::parse_from_str).ok_or_else(|| polars_err!(parse_fmt_idk = "date"))
}
#[cfg(feature = "dtype-time")]
fn sniff_fmt_time(val: &str) -> PolarsResult<&'static str> {
time_pattern(val, NaiveTime::parse_from_str).ok_or_else(|| polars_err!(parse_fmt_idk = "time"))
}
pub trait StringMethods: AsString {
#[cfg(feature = "dtype-time")]
fn as_time(&self, fmt: Option<&str>, use_cache: bool) -> PolarsResult<TimeChunked> {
let string_ca = self.as_string();
let fmt = match fmt {
Some(fmt) => fmt,
None => {
let Some(idx) = string_ca.first_non_null() else {
return Ok(
Int64Chunked::full_null(string_ca.name().clone(), string_ca.len())
.into_time(),
);
};
let val = string_ca.get(idx).expect("should not be null");
sniff_fmt_time(val)?
},
};
let use_cache = use_cache && string_ca.len() > 50;
let mut convert = LruCachedFunc::new(
|s| {
let naive_time = NaiveTime::parse_from_str(s, fmt).ok()?;
Some(time_to_time64ns(&naive_time))
},
(string_ca.len() as f64).sqrt() as usize,
);
let ca = unary_elementwise(string_ca, |opt_s| convert.eval(opt_s?, use_cache));
Ok(ca.with_name(string_ca.name().clone()).into_time())
}
#[cfg(feature = "dtype-date")]
fn as_date_not_exact(&self, fmt: Option<&str>) -> PolarsResult<DateChunked> {
let string_ca = self.as_string();
let fmt = match fmt {
Some(fmt) => fmt,
None => {
let Some(idx) = string_ca.first_non_null() else {
return Ok(
Int32Chunked::full_null(string_ca.name().clone(), string_ca.len())
.into_date(),
);
};
let val = string_ca.get(idx).expect("should not be null");
sniff_fmt_date(val)?
},
};
let ca = unary_elementwise(string_ca, |opt_s| {
let mut s = opt_s?;
while !s.is_empty() {
match NaiveDate::parse_and_remainder(s, fmt) {
Ok((nd, _)) => return Some(naive_date_to_date(nd)),
Err(_) => {
let mut it = s.chars();
it.next();
s = it.as_str();
},
}
}
None
});
Ok(ca.with_name(string_ca.name().clone()).into_date())
}
#[cfg(feature = "dtype-datetime")]
fn as_datetime_not_exact(
&self,
fmt: Option<&str>,
tu: TimeUnit,
tz_aware: bool,
tz: Option<&TimeZone>,
_ambiguous: &StringChunked,
ensure_matching_tz: bool,
) -> PolarsResult<DatetimeChunked> {
let string_ca = self.as_string();
let had_format = fmt.is_some();
let fmt = match fmt {
Some(fmt) => fmt,
None => {
let Some(idx) = string_ca.first_non_null() else {
return Ok(
Int64Chunked::full_null(string_ca.name().clone(), string_ca.len())
.into_datetime(tu, tz.cloned()),
);
};
let val = string_ca.get(idx).expect("should not be null");
sniff_fmt_datetime(val)?
},
};
let func = match tu {
TimeUnit::Nanoseconds => datetime_to_timestamp_ns,
TimeUnit::Microseconds => datetime_to_timestamp_us,
TimeUnit::Milliseconds => datetime_to_timestamp_ms,
};
let ca = unary_elementwise(string_ca, |opt_s| {
let mut s = opt_s?;
while !s.is_empty() {
let timestamp = if tz_aware {
DateTime::parse_and_remainder(s, fmt)
.ok()
.map(|(dt, _r)| func(dt.naive_utc()))
} else {
infer::parse_datetime_and_remainder(s, fmt).map(|(nd, _r)| func(nd))
};
match timestamp {
Some(ts) => return Some(ts),
None => {
let mut it = s.chars();
it.next();
s = it.as_str();
},
}
}
None
})
.with_name(string_ca.name().clone());
polars_ensure!(
!ensure_matching_tz || had_format || !(tz_aware && tz.is_none()),
to_datetime_tz_mismatch
);
match (tz_aware, tz) {
#[cfg(feature = "timezones")]
(false, Some(tz)) => polars_ops::prelude::replace_time_zone(
&ca.into_datetime(tu, None),
Some(tz),
_ambiguous,
NonExistent::Raise,
),
#[cfg(feature = "timezones")]
(true, tz) => Ok(ca.into_datetime(tu, Some(tz.cloned().unwrap_or(TimeZone::UTC)))),
_ => Ok(ca.into_datetime(tu, None)),
}
}
#[cfg(feature = "dtype-date")]
fn as_date(&self, fmt: Option<&str>, use_cache: bool) -> PolarsResult<DateChunked> {
let string_ca = self.as_string();
let fmt = match fmt {
Some(fmt) => fmt,
None => return infer::to_date(string_ca),
};
let use_cache = use_cache && string_ca.len() > 50;
let fmt = strptime::compile_fmt(fmt)?;
let ca = if let Some(fmt_len) = strptime::fmt_len(fmt.as_bytes()) {
let mut strptime_cache = StrpTimeState::default();
let mut convert = LruCachedFunc::new(
|s: &str| {
match unsafe { strptime_cache.parse(s.as_bytes(), fmt.as_bytes(), fmt_len) } {
None => NaiveDate::parse_from_str(s, &fmt).ok(),
Some(ndt) => Some(ndt.date()),
}
.map(naive_date_to_date)
},
(string_ca.len() as f64).sqrt() as usize,
);
unary_elementwise(string_ca, |val| convert.eval(val?, use_cache))
} else {
let mut convert = LruCachedFunc::new(
|s| {
let naive_date = NaiveDate::parse_from_str(s, &fmt).ok()?;
Some(naive_date_to_date(naive_date))
},
(string_ca.len() as f64).sqrt() as usize,
);
unary_elementwise(string_ca, |val| convert.eval(val?, use_cache))
};
Ok(ca.with_name(string_ca.name().clone()).into_date())
}
#[cfg(feature = "dtype-datetime")]
fn as_datetime(
&self,
fmt: Option<&str>,
tu: TimeUnit,
use_cache: bool,
tz_aware: bool,
tz: Option<&TimeZone>,
ambiguous: &StringChunked,
) -> PolarsResult<DatetimeChunked> {
let string_ca = self.as_string();
let fmt = match fmt {
Some(fmt) => fmt,
None => return infer::to_datetime(string_ca, tu, tz, ambiguous, true),
};
let fmt = strptime::compile_fmt(fmt)?;
let use_cache = use_cache && string_ca.len() > 50;
let func = match tu {
TimeUnit::Nanoseconds => datetime_to_timestamp_ns,
TimeUnit::Microseconds => datetime_to_timestamp_us,
TimeUnit::Milliseconds => datetime_to_timestamp_ms,
};
if tz_aware {
#[cfg(feature = "timezones")]
{
let mut convert = LruCachedFunc::new(
|s: &str| {
let dt = DateTime::parse_from_str(s, &fmt).ok()?;
Some(func(dt.naive_utc()))
},
(string_ca.len() as f64).sqrt() as usize,
);
Ok(
unary_elementwise(string_ca, |opt_s| convert.eval(opt_s?, use_cache))
.with_name(string_ca.name().clone())
.into_datetime(tu, Some(tz.cloned().unwrap_or(TimeZone::UTC))),
)
}
#[cfg(not(feature = "timezones"))]
{
panic!("activate 'timezones' feature")
}
} else {
let transform = match tu {
TimeUnit::Nanoseconds => infer::transform_datetime_ns,
TimeUnit::Microseconds => infer::transform_datetime_us,
TimeUnit::Milliseconds => infer::transform_datetime_ms,
};
let ca = if let Some(fmt_len) = self::strptime::fmt_len(fmt.as_bytes()) {
let mut strptime_cache = StrpTimeState::default();
let mut convert = LruCachedFunc::new(
|s: &str| {
match unsafe { strptime_cache.parse(s.as_bytes(), fmt.as_bytes(), fmt_len) }
{
None => transform(s, &fmt),
Some(ndt) => Some(func(ndt)),
}
},
(string_ca.len() as f64).sqrt() as usize,
);
unary_elementwise(string_ca, |opt_s| convert.eval(opt_s?, use_cache))
} else {
let mut convert = LruCachedFunc::new(
|s| transform(s, &fmt),
(string_ca.len() as f64).sqrt() as usize,
);
unary_elementwise(string_ca, |opt_s| convert.eval(opt_s?, use_cache))
};
let dt = ca
.with_name(string_ca.name().clone())
.into_datetime(tu, None);
match tz {
#[cfg(feature = "timezones")]
Some(tz) => polars_ops::prelude::replace_time_zone(
&dt,
Some(tz),
ambiguous,
NonExistent::Raise,
),
_ => Ok(dt),
}
}
}
}
pub trait AsString {
fn as_string(&self) -> &StringChunked;
}
impl AsString for StringChunked {
fn as_string(&self) -> &StringChunked {
self
}
}
impl StringMethods for StringChunked {}