1use oxrdf::vocab::xsd;
5use oxrdf::{Literal, NamedNodeRef, Term};
6use oxsdatatypes::{
7 Boolean, Date, DateTime, DayTimeDuration, Decimal, Double, Duration, Float, Time,
8 YearMonthDuration,
9};
10use regex::Regex;
11use shifty_algebra::value_type::{Bound, ValueType};
12use std::cmp::Ordering;
13use std::str::FromStr;
14
15const XSD: &str = "http://www.w3.org/2001/XMLSchema#";
16
17pub fn value_type_holds(vt: &ValueType, term: &Term) -> bool {
19 match vt {
20 ValueType::Any => true,
21 ValueType::Datatype(dt) => {
22 matches!(term, Term::Literal(l) if l.datatype() == dt.as_ref() && valid_lexical_form(l))
23 }
24 ValueType::NumericRange { lo, hi } => {
25 lo.as_ref().is_none_or(|b| satisfies_lower(term, b))
26 && hi.as_ref().is_none_or(|b| satisfies_upper(term, b))
27 }
28 ValueType::Length { min, max } => match lexical(term) {
29 Some(s) => {
30 let len = s.chars().count() as u64;
31 min.is_none_or(|m| len >= m) && max.is_none_or(|m| len <= m)
32 }
33 None => false,
34 },
35 ValueType::Pattern { regex, flags } => match lexical(term) {
36 Some(s) => compile(regex, flags).is_some_and(|re| re.is_match(&s)),
37 None => false,
38 },
39 ValueType::LangIn(langs) => match term {
40 Term::Literal(l) => l
41 .language()
42 .is_some_and(|tag| langs.iter().any(|range| lang_matches(tag, range))),
43 _ => false,
44 },
45 ValueType::And(parts) => parts.iter().all(|p| value_type_holds(p, term)),
46 }
47}
48
49fn satisfies_lower(term: &Term, b: &Bound) -> bool {
50 match compare_terms(term, &Term::Literal(b.value.clone())) {
51 Some(Ordering::Greater) => true,
52 Some(Ordering::Equal) => b.inclusive,
53 _ => false,
54 }
55}
56
57fn satisfies_upper(term: &Term, b: &Bound) -> bool {
58 match compare_terms(term, &Term::Literal(b.value.clone())) {
59 Some(Ordering::Less) => true,
60 Some(Ordering::Equal) => b.inclusive,
61 _ => false,
62 }
63}
64
65pub fn compare_terms(a: &Term, b: &Term) -> Option<Ordering> {
70 if let (Term::Literal(la), Term::Literal(lb)) = (a, b) {
71 if let (Some(na), Some(nb)) = (numeric(la), numeric(lb)) {
72 return na.partial_cmp(&nb);
73 }
74 if is_string(la) && is_string(lb) {
75 return Some(la.value().cmp(lb.value()));
76 }
77 let (dta, dtb) = (la.datatype(), lb.datatype());
78 if dta == xsd::DATE_TIME && dtb == xsd::DATE_TIME {
79 let left = DateTime::from_str(la.value()).ok()?;
80 let right = DateTime::from_str(lb.value()).ok()?;
81 return left.partial_cmp(&right);
82 }
83 if dta == xsd::DATE && dtb == xsd::DATE {
84 let left = Date::from_str(la.value()).ok()?;
85 let right = Date::from_str(lb.value()).ok()?;
86 return left.partial_cmp(&right);
87 }
88 if dta == xsd::TIME && dtb == xsd::TIME {
89 let left = Time::from_str(la.value()).ok()?;
90 let right = Time::from_str(lb.value()).ok()?;
91 return left.partial_cmp(&right);
92 }
93 if is_duration_datatype(dta) && is_duration_datatype(dtb) {
94 let left = Duration::from_str(la.value()).ok()?;
97 let right = Duration::from_str(lb.value()).ok()?;
98 return left.partial_cmp(&right);
99 }
100 }
101 None
102}
103
104fn is_duration_datatype(dt: NamedNodeRef<'_>) -> bool {
105 dt == xsd::DURATION || dt == xsd::YEAR_MONTH_DURATION || dt == xsd::DAY_TIME_DURATION
106}
107
108fn valid_lexical_form(literal: &Literal) -> bool {
109 let datatype = literal.datatype();
110 let value = literal.value();
111 if datatype == xsd::BOOLEAN {
112 Boolean::from_str(value).is_ok()
113 } else if datatype == xsd::DECIMAL {
114 Decimal::from_str(value).is_ok()
115 } else if datatype == xsd::FLOAT {
116 Float::from_str(value).is_ok()
117 } else if datatype == xsd::DOUBLE {
118 Double::from_str(value).is_ok()
119 } else if datatype == xsd::DATE_TIME {
120 DateTime::from_str(value).is_ok()
121 } else if datatype == xsd::DATE {
122 Date::from_str(value).is_ok()
123 } else if datatype == xsd::TIME {
124 Time::from_str(value).is_ok()
125 } else if datatype == xsd::DURATION {
126 Duration::from_str(value).is_ok()
127 } else if datatype == xsd::YEAR_MONTH_DURATION {
128 YearMonthDuration::from_str(value).is_ok()
129 } else if datatype == xsd::DAY_TIME_DURATION {
130 DayTimeDuration::from_str(value).is_ok()
131 } else if is_integer_datatype(datatype) {
132 integer_in_datatype_range(value, datatype)
133 } else {
134 true
135 }
136}
137
138fn is_integer_datatype(datatype: NamedNodeRef<'_>) -> bool {
139 matches!(
140 datatype,
141 xsd::INTEGER
142 | xsd::LONG
143 | xsd::INT
144 | xsd::SHORT
145 | xsd::BYTE
146 | xsd::NON_NEGATIVE_INTEGER
147 | xsd::NON_POSITIVE_INTEGER
148 | xsd::NEGATIVE_INTEGER
149 | xsd::POSITIVE_INTEGER
150 | xsd::UNSIGNED_LONG
151 | xsd::UNSIGNED_INT
152 | xsd::UNSIGNED_SHORT
153 | xsd::UNSIGNED_BYTE
154 )
155}
156
157fn integer_in_datatype_range(value: &str, datatype: NamedNodeRef<'_>) -> bool {
158 let value = value.trim();
159 let digits = value.strip_prefix(['+', '-']).unwrap_or(value);
160 if digits.is_empty() || !digits.bytes().all(|byte| byte.is_ascii_digit()) {
161 return false;
162 }
163 if datatype == xsd::INTEGER {
164 return true;
165 }
166 let Ok(number) = value.parse::<i128>() else {
167 return false;
168 };
169 match datatype {
170 xsd::LONG => number >= i64::MIN.into() && number <= i64::MAX.into(),
171 xsd::INT => number >= i32::MIN.into() && number <= i32::MAX.into(),
172 xsd::SHORT => number >= i16::MIN.into() && number <= i16::MAX.into(),
173 xsd::BYTE => number >= i8::MIN.into() && number <= i8::MAX.into(),
174 xsd::NON_NEGATIVE_INTEGER => number >= 0,
175 xsd::NON_POSITIVE_INTEGER => number <= 0,
176 xsd::NEGATIVE_INTEGER => number < 0,
177 xsd::POSITIVE_INTEGER => number > 0,
178 xsd::UNSIGNED_LONG => number >= 0 && number <= u64::MAX.into(),
179 xsd::UNSIGNED_INT => number >= 0 && number <= u32::MAX.into(),
180 xsd::UNSIGNED_SHORT => number >= 0 && number <= u16::MAX.into(),
181 xsd::UNSIGNED_BYTE => number >= 0 && number <= u8::MAX.into(),
182 _ => false,
183 }
184}
185
186fn numeric(l: &Literal) -> Option<f64> {
187 is_numeric_datatype(l)
188 .then(|| l.value().parse::<f64>().ok())
189 .flatten()
190}
191
192fn is_numeric_datatype(l: &Literal) -> bool {
193 let dt = l.datatype().as_str();
194 let Some(local) = dt.strip_prefix(XSD) else {
195 return false;
196 };
197 matches!(
198 local,
199 "integer"
200 | "decimal"
201 | "float"
202 | "double"
203 | "long"
204 | "int"
205 | "short"
206 | "byte"
207 | "nonNegativeInteger"
208 | "nonPositiveInteger"
209 | "negativeInteger"
210 | "positiveInteger"
211 | "unsignedLong"
212 | "unsignedInt"
213 | "unsignedShort"
214 | "unsignedByte"
215 )
216}
217
218fn is_string(l: &Literal) -> bool {
219 l.datatype() == xsd::STRING
220}
221
222fn lexical(term: &Term) -> Option<String> {
225 match term {
226 Term::Literal(l) => Some(l.value().to_string()),
227 Term::NamedNode(n) => Some(n.as_str().to_string()),
228 Term::BlankNode(_) => None,
229 }
230}
231
232fn lang_matches(tag: &str, range: &str) -> bool {
234 range == "*"
235 || tag.eq_ignore_ascii_case(range)
236 || tag
237 .to_ascii_lowercase()
238 .starts_with(&format!("{}-", range.to_ascii_lowercase()))
239}
240
241fn compile(regex: &str, flags: &str) -> Option<Regex> {
243 let active: String = flags.chars().filter(|c| "ismx".contains(*c)).collect();
244 let pattern = if active.is_empty() {
245 regex.to_string()
246 } else {
247 format!("(?{active}){regex}")
248 };
249 Regex::new(&pattern).ok()
250}
251
252#[cfg(test)]
253mod tests {
254 use super::*;
255 use oxrdf::Literal;
256 use std::cmp::Ordering;
257
258 fn lit(value: &str, datatype: &str) -> Term {
259 Term::Literal(Literal::new_typed_literal(
260 value,
261 oxrdf::NamedNode::new(datatype).unwrap(),
262 ))
263 }
264 fn date(v: &str) -> Term {
265 lit(v, "http://www.w3.org/2001/XMLSchema#date")
266 }
267 fn time(v: &str) -> Term {
268 lit(v, "http://www.w3.org/2001/XMLSchema#time")
269 }
270 fn dur(v: &str) -> Term {
271 lit(v, "http://www.w3.org/2001/XMLSchema#duration")
272 }
273 fn ymd(v: &str) -> Term {
274 lit(v, "http://www.w3.org/2001/XMLSchema#yearMonthDuration")
275 }
276 fn dtd(v: &str) -> Term {
277 lit(v, "http://www.w3.org/2001/XMLSchema#dayTimeDuration")
278 }
279
280 #[test]
281 fn date_ordering() {
282 assert_eq!(
283 compare_terms(&date("2020-01-01"), &date("2020-06-01")),
284 Some(Ordering::Less)
285 );
286 assert_eq!(
287 compare_terms(&date("2020-06-01"), &date("2020-01-01")),
288 Some(Ordering::Greater)
289 );
290 assert_eq!(
291 compare_terms(&date("2020-03-15"), &date("2020-03-15")),
292 Some(Ordering::Equal)
293 );
294 }
295
296 #[test]
297 fn time_ordering() {
298 assert_eq!(
299 compare_terms(&time("08:00:00"), &time("17:30:00")),
300 Some(Ordering::Less)
301 );
302 assert_eq!(
303 compare_terms(&time("23:59:59"), &time("00:00:00")),
304 Some(Ordering::Greater)
305 );
306 assert_eq!(
307 compare_terms(&time("12:00:00"), &time("12:00:00")),
308 Some(Ordering::Equal)
309 );
310 }
311
312 #[test]
313 fn duration_ordering() {
314 assert_eq!(
315 compare_terms(&dur("P1Y"), &dur("P2Y")),
316 Some(Ordering::Less)
317 );
318 assert_eq!(
319 compare_terms(&dur("P30D"), &dur("P1D")),
320 Some(Ordering::Greater)
321 );
322 assert_eq!(
323 compare_terms(&dur("P1Y"), &dur("P1Y")),
324 Some(Ordering::Equal)
325 );
326 }
327
328 #[test]
329 fn year_month_duration_ordering() {
330 assert_eq!(
331 compare_terms(&ymd("P1Y"), &ymd("P13M")),
332 Some(Ordering::Less)
333 );
334 assert_eq!(
335 compare_terms(&ymd("P2Y"), &ymd("P1Y")),
336 Some(Ordering::Greater)
337 );
338 }
339
340 #[test]
341 fn day_time_duration_ordering() {
342 assert_eq!(
343 compare_terms(&dtd("PT1H"), &dtd("PT2H")),
344 Some(Ordering::Less)
345 );
346 assert_eq!(
347 compare_terms(&dtd("P2D"), &dtd("P1D")),
348 Some(Ordering::Greater)
349 );
350 }
351
352 #[test]
353 fn genuinely_incomparable_durations() {
354 assert_eq!(compare_terms(&dur("P1M"), &dur("P30D")), None);
356 }
357
358 #[test]
359 fn cross_type_incomparable() {
360 assert_eq!(compare_terms(&date("2020-01-01"), &time("12:00:00")), None);
361 assert_eq!(compare_terms(&date("2020-01-01"), &dur("P1Y")), None);
362 }
363
364 #[test]
365 fn valid_lexical_form_date() {
366 let good = Literal::new_typed_literal(
367 "2020-01-01",
368 oxrdf::NamedNode::new("http://www.w3.org/2001/XMLSchema#date").unwrap(),
369 );
370 let bad = Literal::new_typed_literal(
371 "not-a-date",
372 oxrdf::NamedNode::new("http://www.w3.org/2001/XMLSchema#date").unwrap(),
373 );
374 assert!(valid_lexical_form(&good));
375 assert!(!valid_lexical_form(&bad));
376 }
377
378 #[test]
379 fn valid_lexical_form_duration() {
380 let good = Literal::new_typed_literal(
381 "P1Y2M3DT4H",
382 oxrdf::NamedNode::new("http://www.w3.org/2001/XMLSchema#duration").unwrap(),
383 );
384 let bad = Literal::new_typed_literal(
385 "not-a-duration",
386 oxrdf::NamedNode::new("http://www.w3.org/2001/XMLSchema#duration").unwrap(),
387 );
388 assert!(valid_lexical_form(&good));
389 assert!(!valid_lexical_form(&bad));
390 }
391}