use std::{fmt::Debug, str::FromStr};
use crate::{chvalid::XmlCharValid, libxml::schemas_internals::XmlSchemaValType};
use super::{
XmlSchemaVal, XmlSchemaValPrimitives, is_wsp_blank_ch,
primitives::{XmlSchemaValDate, XmlSchemaValDuration},
};
#[doc(alias = "VALID_YEAR")]
fn validate_year(year: i64) -> bool {
year != 0
}
#[doc(alias = "VALID_MONTH")]
fn validate_month(month: u8) -> bool {
(1..=12).contains(&month)
}
#[doc(alias = "VALID_DAY")]
fn validate_day(day: u8) -> bool {
(1..=31).contains(&day)
}
#[doc(alias = "VALID_HOUR")]
fn validate_hour(hour: u8) -> bool {
(0..24).contains(&hour)
}
#[doc(alias = "VALID_MIN")]
fn validate_minute(min: u8) -> bool {
(0..60).contains(&min)
}
#[doc(alias = "VALID_SEC")]
fn validate_second(sec: f64) -> bool {
(0.0..60.0).contains(&sec)
}
#[doc(alias = "VALID_END_OF_DAY")]
fn validate_end_of_day(dt: &XmlSchemaValDate) -> bool {
dt.hour == 24 && dt.min == 0 && dt.sec == 0.0
}
#[doc(alias = "IS_TZO_CHAR")]
fn is_tzo_char(c: char) -> bool {
matches!(c, 'Z' | '+' | '-')
}
fn starts_with_tzo_char(s: &str) -> bool {
s.chars().next().is_none_or(is_tzo_char)
}
#[doc(alias = "VALID_TZO")]
fn validate_tzo(tzo: i16) -> bool {
(-840..=840).contains(&tzo)
}
#[doc(alias = "VALID_TIME")]
fn validate_time(dt: &XmlSchemaValDate) -> bool {
((validate_hour(dt.hour) && validate_minute(dt.min) && validate_second(dt.sec))
|| validate_end_of_day(dt))
&& validate_tzo(dt.tzo)
}
#[doc(alias = "IS_LEAP")]
fn is_leap(year: i64) -> bool {
(year % 4 == 0 && year % 100 != 0) || year % 400 == 0
}
const DAYS_IN_MONTH: [u8; 12] = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31];
const DAYS_IN_MONTH_LEAP: [u8; 12] = [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31];
#[doc(alias = "VALID_MDAY")]
fn validate_mday(dt: &XmlSchemaValDate) -> bool {
if is_leap(dt.year) {
dt.day <= DAYS_IN_MONTH_LEAP[dt.mon as usize - 1]
} else {
dt.day <= DAYS_IN_MONTH[dt.mon as usize - 1]
}
}
#[doc(alias = "VALID_DATE")]
fn validate_date(dt: &XmlSchemaValDate) -> bool {
validate_year(dt.year) && validate_month(dt.mon) && validate_mday(dt)
}
#[doc(alias = "VALID_DATETIME")]
fn validate_datetime(dt: &XmlSchemaValDate) -> bool {
validate_date(dt) && validate_time(dt)
}
#[doc(alias = "PARSE_2_DIGITS")]
fn parse_2digits<F>(s: &mut &str) -> Option<F>
where
F: FromStr,
F::Err: Debug,
{
let cur = *s;
let rem = cur
.strip_prefix(|c: char| c.is_ascii_digit())
.and_then(|cur| cur.strip_prefix(|c: char| c.is_ascii_digit()))?;
*s = rem;
Some(cur[..2].parse().unwrap())
}
#[doc(alias = "PARSE_FLOAT")]
fn parse_float(s: &mut &str) -> Option<f64> {
let mut cur = *s;
let mut val = parse_2digits::<u8>(&mut cur)? as f64;
if let Some(rem) = cur.strip_prefix('.') {
let pos = rem
.bytes()
.position(|c| !c.is_ascii_digit())
.unwrap_or(rem.len());
if pos == 0 {
return None;
}
let (dig, rem) = rem.split_at(pos);
cur = rem;
let mut mult = 1.;
for b in dig.bytes() {
mult /= 10.;
val += (b - b'0') as f64 * mult;
}
}
*s = cur;
Some(val)
}
#[doc(alias = "xmlSchemaParseUInt")]
fn parse_uint(s: &mut &str) -> Option<(u64, u64, u64)> {
if !s.starts_with(|c: char| c.is_ascii_digit()) {
return None;
}
let cur = s.trim_start_matches('0');
let len = cur
.bytes()
.position(|b| !b.is_ascii_digit())
.unwrap_or(cur.len());
let (dig, rem) = cur.split_at(len);
*s = rem;
if len > 24 {
return None;
}
let mut res = [0; 3];
for (res, chunk) in res.iter_mut().zip(dig.as_bytes().rchunks(8)) {
*res = chunk
.iter()
.copied()
.fold(0u64, |s, v| s * 10 + (v - b'0') as u64);
}
Some(res.into())
}
#[doc(alias = "_xmlSchemaParseGYear")]
fn parse_gyear(s: &mut &str, dt: &mut XmlSchemaValDate) -> Option<()> {
let mut cur = *s;
if !cur.starts_with(|c: char| c.is_ascii_digit() || c == '-' || c == '+') {
return None;
}
let mut isneg = false;
if let Some(rem) = cur.strip_prefix('-') {
isneg = true;
cur = rem;
} else if let Some(rem) = cur.strip_prefix('+') {
cur = rem;
}
let first_char = cur;
let pos = cur
.bytes()
.position(|b| !b.is_ascii_digit())
.unwrap_or(cur.len());
if pos < 4 || (pos > 4 && first_char.starts_with('0')) {
return None;
}
let (dig, cur) = cur.split_at(pos);
dt.year = dig.parse().ok()?;
if isneg {
dt.year = -dt.year;
}
if !validate_year(dt.year) {
return None;
}
*s = cur;
Some(())
}
#[doc(alias = "_xmlSchemaParseGMonth")]
fn parse_gmonth(s: &mut &str, dt: &mut XmlSchemaValDate) -> Option<()> {
let mut cur = *s;
dt.mon = parse_2digits::<u8>(&mut cur).filter(|mon| validate_month(*mon))?;
*s = cur;
Some(())
}
#[doc(alias = "_xmlSchemaParseGDay")]
fn parse_gday(s: &mut &str, dt: &mut XmlSchemaValDate) -> Option<()> {
let mut cur = *s;
dt.day = parse_2digits::<u8>(&mut cur).filter(|day| validate_day(*day))?;
*s = cur;
Some(())
}
#[doc(alias = "_xmlSchemaParseTime")]
fn parse_time(s: &mut &str, dt: &mut XmlSchemaValDate) -> Option<()> {
let mut cur = *s;
let value = parse_2digits::<u8>(&mut cur).filter(|hour| {
(0..=24).contains(hour)
})?;
let mut cur = cur.strip_prefix(':')?;
dt.hour = value;
dt.min = parse_2digits::<u8>(&mut cur).filter(|min| validate_minute(*min))?;
let mut cur = cur.strip_prefix(':')?;
let val = parse_2digits::<u8>(&mut cur)?;
dt.sec = val as f64;
if let Some(rem) = cur.strip_prefix('.') {
let pos = rem
.bytes()
.position(|c| !c.is_ascii_digit())
.unwrap_or(rem.len());
if pos == 0 {
return None;
}
let (dig, rem) = rem.split_at(pos);
cur = rem;
let mut mult = 1.;
for b in dig.bytes() {
mult /= 10.;
dt.sec += (b - b'0') as f64 * mult;
}
}
if !validate_time(dt) {
return None;
}
*s = cur;
Some(())
}
#[doc(alias = "_xmlSchemaParseTimeZone")]
fn parse_time_zone(s: &mut &str, dt: &mut XmlSchemaValDate) -> Option<()> {
let mut cur = *s;
match cur.as_bytes().first() {
None => {
dt.tz_flag = 0;
dt.tzo = 0;
}
Some(&b'Z') => {
dt.tz_flag = 1;
dt.tzo = 0;
*s = &cur[1..];
}
Some(&sgn @ (b'+' | b'-')) => {
let isneg = sgn == b'-';
cur = &cur[1..];
let tmp = parse_2digits::<u8>(&mut cur).filter(|hour| validate_hour(*hour))?;
let mut cur = cur.strip_prefix(':')?;
dt.tzo = tmp as i16 * 60;
let tmp = parse_2digits::<u8>(&mut cur).filter(|min| validate_minute(*min))?;
dt.tzo += tmp as i16;
if isneg {
dt.tzo = -dt.tzo;
}
if !validate_tzo(dt.tzo) {
return None;
}
dt.tz_flag = 1;
*s = cur;
}
_ => {
return None;
}
}
Some(())
}
#[doc(alias = "xmlSchemaValidateDates")]
pub(crate) fn validate_dates(
typ: XmlSchemaValType,
date_time: &str,
collapse: bool,
) -> Option<XmlSchemaVal> {
let mut cur = date_time;
if collapse {
cur = cur.trim_matches(is_wsp_blank_ch);
}
if !cur.starts_with(|c: char| c == '-' || c.is_ascii_digit()) {
return None;
}
let mut dt = XmlSchemaValDate::default();
let make_val =
|remain: &str, dt: XmlSchemaValDate, expect: XmlSchemaValType| -> Option<XmlSchemaVal> {
if !remain.is_empty() || (typ != XmlSchemaValType::XmlSchemasUnknown && typ != expect) {
return None;
}
Some(XmlSchemaVal {
typ: expect,
next: None,
value: XmlSchemaValPrimitives::Date(dt),
})
};
if let Some(rem) = cur.strip_prefix("--") {
cur = rem;
if let Some(rem) = cur.strip_prefix('-') {
if typ == XmlSchemaValType::XmlSchemasGMonth {
return None;
}
cur = rem;
parse_gday(&mut cur, &mut dt)?;
if starts_with_tzo_char(cur) && parse_time_zone(&mut cur, &mut dt).is_some() {
return make_val(cur, dt, XmlSchemaValType::XmlSchemasGDay);
};
return None;
}
parse_gmonth(&mut cur, &mut dt)?;
if let Some(rem) = cur.strip_prefix('-') {
let rewnd = cur;
cur = rem;
if parse_gday(&mut cur, &mut dt).is_some() && (cur.is_empty() || !cur.starts_with(':'))
{
if validate_mday(&dt) {
if starts_with_tzo_char(cur) {
parse_time_zone(&mut cur, &mut dt)?;
return make_val(cur, dt, XmlSchemaValType::XmlSchemasGMonthDay);
};
return None;
}
}
cur = rewnd;
}
if starts_with_tzo_char(cur) && parse_time_zone(&mut cur, &mut dt).is_some() {
return make_val(cur, dt, XmlSchemaValType::XmlSchemasGMonth);
};
return None;
}
if cur.starts_with(|c: char| c.is_ascii_digit()) && parse_time(&mut cur, &mut dt).is_some() {
if starts_with_tzo_char(cur) && parse_time_zone(&mut cur, &mut dt).is_some() {
return make_val(cur, dt, XmlSchemaValType::XmlSchemasTime);
};
}
cur = date_time;
parse_gyear(&mut cur, &mut dt)?;
if starts_with_tzo_char(cur) && parse_time_zone(&mut cur, &mut dt).is_some() {
return make_val(cur, dt, XmlSchemaValType::XmlSchemasGYear);
};
cur = cur.strip_prefix('-')?;
parse_gmonth(&mut cur, &mut dt)?;
if starts_with_tzo_char(cur) && parse_time_zone(&mut cur, &mut dt).is_some() {
return make_val(cur, dt, XmlSchemaValType::XmlSchemasGYearMonth);
};
cur = cur.strip_prefix('-')?;
parse_gday(&mut cur, &mut dt).filter(|_| validate_date(&dt))?;
if starts_with_tzo_char(cur) && parse_time_zone(&mut cur, &mut dt).is_some() {
return make_val(cur, dt, XmlSchemaValType::XmlSchemasDate);
};
cur = cur.strip_prefix('T')?;
parse_time(&mut cur, &mut dt)?;
parse_time_zone(&mut cur, &mut dt).filter(|_| validate_datetime(&dt))?;
make_val(cur, dt, XmlSchemaValType::XmlSchemasDatetime)
}
const SECS_PER_MIN: i64 = 60;
const MINS_PER_HOUR: i64 = 60;
const HOURS_PER_DAY: i64 = 24;
const SECS_PER_HOUR: i64 = MINS_PER_HOUR * SECS_PER_MIN;
const SECS_PER_DAY: i64 = HOURS_PER_DAY * SECS_PER_HOUR;
const MINS_PER_DAY: i64 = HOURS_PER_DAY * MINS_PER_HOUR;
#[doc(alias = "xmlSchemaValidateDuration")]
pub(crate) fn validate_duration(duration: &str, collapse: bool) -> Option<XmlSchemaVal> {
let mut cur = duration;
let mut isneg = false;
let mut seq = 0;
let mut days;
let mut secs = 0;
let mut sec_frac = 0.0;
if collapse {
cur = cur.trim_matches(is_wsp_blank_ch);
}
if let Some(rem) = cur.strip_prefix('-') {
cur = rem;
isneg = true;
}
let mut cur = cur.strip_prefix('P')?;
if cur.is_empty() {
return None;
}
let mut dur = XmlSchemaValDuration::default();
const DESIG: &[u8] = b"YMDHMS";
while !cur.is_empty() {
let mut num = 0i64;
let mut has_digits = false;
let mut has_frac = false;
if seq >= DESIG.len() {
return None;
}
if let Some(rem) = cur.strip_prefix('T') {
if seq > 3 {
return None;
}
cur = rem;
seq = 3;
} else if seq == 3 {
return None;
}
if let Some(pos) = cur
.find(|c: char| !c.is_ascii_digit())
.or(Some(cur.len()))
.filter(|&pos| pos > 0)
{
let (dig, rem) = cur.split_at(pos);
cur = rem;
has_digits = true;
num = dig.parse().ok()?;
}
if let Some(rem) = cur.strip_prefix('.') {
cur = rem;
let mut mult = 1.0;
let mut len = 0;
has_frac = true;
for dig in cur.bytes().take_while(|&b| b.is_ascii_digit()) {
mult /= 10.0;
sec_frac += (dig - b'0') as f64 * mult;
has_digits = true;
len += 1;
}
cur = &cur[len..];
}
while !cur.starts_with(|b| b == DESIG[seq] as char) {
seq += 1;
if seq == 3 || seq == DESIG.len() {
return None;
}
}
cur = &cur[1..];
if !has_digits || (has_frac && seq != 5) {
return None;
}
match seq {
0 => dur.mon = num.checked_mul(12)?,
1 => dur.mon = dur.mon.checked_add(num)?,
2 => dur.day = num,
3 => {
days = num / HOURS_PER_DAY;
secs = (num % HOURS_PER_DAY) * SECS_PER_HOUR;
dur.day = dur.day.checked_add(days)?;
}
4 => {
days = num / MINS_PER_DAY;
secs += (num % MINS_PER_DAY) * SECS_PER_MIN;
dur.day = dur.day.checked_add(days)?;
}
5 => {
days = num / SECS_PER_DAY;
secs += num % SECS_PER_DAY;
dur.day = dur.day.checked_add(days)?;
}
_ => {}
}
seq += 1;
}
let days = secs / SECS_PER_DAY;
dur.day = dur.day.checked_add(days)?;
dur.sec = (secs % SECS_PER_DAY) as f64 + sec_frac;
if isneg {
dur.mon = -dur.mon;
dur.day = -dur.day;
dur.sec = -dur.sec;
}
Some(XmlSchemaVal {
typ: XmlSchemaValType::XmlSchemasDuration,
next: None,
value: XmlSchemaValPrimitives::Duration(dur),
})
}
#[doc(alias = "xmlSchemaCheckLanguageType")]
pub(crate) fn check_language_type(value: &str) -> bool {
if value.is_empty() {
return false;
}
let mut tokens = value.split('-');
let first = tokens.next().unwrap();
if !(1..9).contains(&first.len()) || first.bytes().any(|b| !b.is_ascii_alphabetic()) {
return false;
}
tokens.all(|token| {
(1..9).contains(&token.len()) && token.bytes().all(|b| b.is_ascii_alphanumeric())
})
}
#[doc(alias = "xmlSchemaStrip")]
fn xml_schema_strip(value: &str) -> Option<&str> {
let stripped = value.trim_matches(|c: char| c.is_xml_blank_char());
(value.len() != stripped.len()).then_some(stripped)
}
#[doc(alias = "_xmlSchemaBase64Decode")]
fn base64_decode(ch: u8) -> Option<u8> {
if ch.is_ascii_uppercase() {
Some(ch - b'A')
} else if ch.is_ascii_lowercase() {
Some(ch - b'a' + 26)
} else if ch.is_ascii_digit() {
Some(ch - b'0' + 52)
} else if b'+' == ch {
Some(62)
} else if b'/' == ch {
Some(63)
} else if b'=' == ch {
Some(64)
} else {
None
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_uint_test() {
let mut s = "12345678";
assert_eq!(parse_uint(&mut s), Some((12345678, 0, 0)));
assert_eq!(s, "");
let mut s = "1234567890123456";
assert_eq!(parse_uint(&mut s), Some((90123456, 12345678, 0)));
assert_eq!(s, "");
let mut s = "123456789012345678901234";
assert_eq!(parse_uint(&mut s), Some((78901234, 90123456, 12345678)));
assert_eq!(s, "");
let mut s = "00000123456";
assert_eq!(parse_uint(&mut s), Some((123456, 0, 0)));
assert_eq!(s, "");
let mut s = "12345abc12345";
assert_eq!(parse_uint(&mut s), Some((12345, 0, 0)));
assert_eq!(s, "abc12345");
let mut s = "1234567890123456789012345";
assert_eq!(parse_uint(&mut s), None);
assert_eq!(s, "");
let mut s = "abcd";
assert_eq!(parse_uint(&mut s), None);
assert_eq!(s, "abcd");
}
#[test]
fn parse_time_test() {
let mut dt = XmlSchemaValDate::default();
let mut s = "00:00:00";
assert!(parse_time(&mut s, &mut dt).is_some());
assert!(dt.hour == 0 && dt.min == 0 && dt.sec == 0.0);
let mut s = "19:30:45";
assert!(parse_time(&mut s, &mut dt).is_some());
assert!(dt.hour == 19 && dt.min == 30 && dt.sec == 45.0);
let mut s = "21:15:30.500";
assert!(parse_time(&mut s, &mut dt).is_some());
assert!(dt.hour == 21 && dt.min == 15 && dt.sec == 30.5);
let mut s = "24:00:00.000";
assert!(parse_time(&mut s, &mut dt).is_some());
assert!(dt.hour == 24 && dt.min == 0 && dt.sec == 0.0);
let mut s = "14:00:00.9Z";
assert!(parse_time(&mut s, &mut dt).is_some());
assert!(dt.hour == 14 && dt.min == 0 && dt.sec == 0.9);
let mut s = "14:00:00.4999Z";
assert!(parse_time(&mut s, &mut dt).is_some());
assert!(dt.hour == 14 && dt.min == 0 && dt.sec == 0.4999);
}
#[test]
fn parse_gyear_test() {
let mut dt = XmlSchemaValDate::default();
let mut s = "2025";
assert!(parse_gyear(&mut s, &mut dt).is_some());
assert_eq!(dt.year, 2025);
let mut s = "0205";
assert!(parse_gyear(&mut s, &mut dt).is_some());
assert_eq!(dt.year, 205);
let mut s = "0025";
assert!(parse_gyear(&mut s, &mut dt).is_some());
assert_eq!(dt.year, 25);
let mut s = "0002";
assert!(parse_gyear(&mut s, &mut dt).is_some());
assert_eq!(dt.year, 2);
let mut s = "-2025";
assert!(parse_gyear(&mut s, &mut dt).is_some());
assert_eq!(dt.year, -2025);
let mut s = "20250205";
assert!(parse_gyear(&mut s, &mut dt).is_some());
assert_eq!(dt.year, 20250205);
let mut s = "02025";
assert!(parse_gyear(&mut s, &mut dt).is_none());
let mut s = "205";
assert!(parse_gyear(&mut s, &mut dt).is_none());
}
#[test]
fn validate_dates_test() {
use XmlSchemaValPrimitives::*;
use XmlSchemaValType::*;
let s = "2019-03-26T14:00:00.999Z";
let val = validate_dates(XmlSchemasUnknown, s, false);
assert!(val.is_some());
let val = val.unwrap();
assert!(matches!(val.value, Date(_)));
assert_eq!(val.typ, XmlSchemasDatetime);
let Date(date) = val.value else {
unreachable!()
};
assert!(
date.year == 2019
&& date.mon == 3
&& date.day == 26
&& date.hour == 14
&& date.min == 0
&& date.sec == 0.999
&& date.tzo == 0
);
}
#[test]
fn validate_duration_test() {
use XmlSchemaValPrimitives::*;
use XmlSchemaValType::*;
fn check(s: &str, mon: i64, day: i64, sec: f64) {
let val = validate_duration(s, false);
assert!(val.is_some());
let val = val.unwrap();
assert!(matches!(val.value, Duration(_)));
assert_eq!(val.typ, XmlSchemasDuration);
let Duration(dur) = val.value else {
unreachable!()
};
assert_eq!(dur.mon, mon);
assert_eq!(dur.day, day);
assert_eq!(dur.sec, sec);
}
check("P1Y1M1DT1H1M1.1S", 13, 1, 3661.1);
check("P40D", 0, 40, 0.0);
check("P1Y1D", 12, 1, 0.0);
check("P3DT4H59M", 0, 3, 4. * 60. * 60. + 59. * 60.);
check("PT2H30M", 0, 0, 2. * 3600. + 30. * 60.);
check("P1M", 1, 0, 0.);
check("PT1M", 0, 0, 60.);
check("PT0.0021S", 0, 0, 0.0021);
check("PT0S", 0, 0, 0.);
check("P0D", 0, 0, 0.);
check("-P1Y1M1DT1H1M1.1S", -13, -1, -3661.1);
check("-P40D", 0, -40, 0.0);
check("-P1Y1D", -12, -1, 0.0);
check("-P3DT4H59M", 0, -3, -(4. * 60. * 60. + 59. * 60.));
check("-PT2H30M", 0, 0, -(2. * 3600. + 30. * 60.));
check("-P1M", -1, 0, 0.);
check("-PT1M", 0, 0, -60.);
check("-PT0.0021S", 0, 0, -0.0021);
check("-PT0S", 0, 0, 0.);
check("-P0D", 0, 0, 0.);
}
}