1use bigdecimal::BigDecimal;
5use oxrdf::vocab::xsd;
6use oxrdf::{Literal, NamedNodeRef, Term};
7use oxsdatatypes::{
8 Boolean, Date, DateTime, DayTimeDuration, Double, Duration, Float, Time, YearMonthDuration,
9};
10use regex::Regex;
11use shifty_algebra::value_type::{Bound, ValueType};
12use std::cmp::Ordering;
13use std::str::FromStr;
14
15const XSD: &str = "http://www.w3.org/2001/XMLSchema#";
16
17pub fn value_type_holds(vt: &ValueType, term: &Term) -> bool {
19 match vt {
20 ValueType::Any => true,
21 ValueType::Datatype(dt) => {
22 matches!(term, Term::Literal(l) if l.datatype() == dt.as_ref() && valid_lexical_form(l))
23 }
24 ValueType::NumericRange { lo, hi } => {
25 lo.as_ref().is_none_or(|b| satisfies_lower(term, b))
26 && hi.as_ref().is_none_or(|b| satisfies_upper(term, b))
27 }
28 ValueType::Length { min, max } => match lexical(term) {
29 Some(s) => {
30 let len = s.chars().count() as u64;
31 min.is_none_or(|m| len >= m) && max.is_none_or(|m| len <= m)
32 }
33 None => false,
34 },
35 ValueType::Pattern { regex, flags } => match lexical(term) {
36 Some(s) => compile(regex, flags).is_some_and(|re| re.is_match(&s)),
37 None => false,
38 },
39 ValueType::LangIn(langs) => match term {
40 Term::Literal(l) => l
41 .language()
42 .is_some_and(|tag| langs.iter().any(|range| lang_matches(tag, range))),
43 _ => false,
44 },
45 ValueType::And(parts) => parts.iter().all(|p| value_type_holds(p, term)),
46 }
47}
48
49fn satisfies_lower(term: &Term, b: &Bound) -> bool {
50 match compare_terms(term, &Term::Literal(b.value.clone())) {
51 Some(Ordering::Greater) => true,
52 Some(Ordering::Equal) => b.inclusive,
53 _ => false,
54 }
55}
56
57fn satisfies_upper(term: &Term, b: &Bound) -> bool {
58 match compare_terms(term, &Term::Literal(b.value.clone())) {
59 Some(Ordering::Less) => true,
60 Some(Ordering::Equal) => b.inclusive,
61 _ => false,
62 }
63}
64
65pub fn compare_terms(a: &Term, b: &Term) -> Option<Ordering> {
70 if let (Term::Literal(la), Term::Literal(lb)) = (a, b) {
71 if let (Some(da), Some(db)) = (exact_decimal(la), exact_decimal(lb)) {
77 return Some(da.cmp(&db));
78 }
79 if let (Some(na), Some(nb)) = (numeric(la), numeric(lb)) {
80 return na.partial_cmp(&nb);
81 }
82 if is_string(la) && is_string(lb) {
83 return Some(la.value().cmp(lb.value()));
84 }
85 let (dta, dtb) = (la.datatype(), lb.datatype());
86 if dta == xsd::DATE_TIME && dtb == xsd::DATE_TIME {
87 let left = DateTime::from_str(la.value()).ok()?;
88 let right = DateTime::from_str(lb.value()).ok()?;
89 return left.partial_cmp(&right);
90 }
91 if dta == xsd::DATE && dtb == xsd::DATE {
92 let left = Date::from_str(la.value()).ok()?;
93 let right = Date::from_str(lb.value()).ok()?;
94 return left.partial_cmp(&right);
95 }
96 if dta == xsd::TIME && dtb == xsd::TIME {
97 let left = Time::from_str(la.value()).ok()?;
98 let right = Time::from_str(lb.value()).ok()?;
99 return left.partial_cmp(&right);
100 }
101 if is_duration_datatype(dta) && is_duration_datatype(dtb) {
102 let left = Duration::from_str(la.value()).ok()?;
105 let right = Duration::from_str(lb.value()).ok()?;
106 return left.partial_cmp(&right);
107 }
108 }
109 None
110}
111
112fn is_duration_datatype(dt: NamedNodeRef<'_>) -> bool {
113 dt == xsd::DURATION || dt == xsd::YEAR_MONTH_DURATION || dt == xsd::DAY_TIME_DURATION
114}
115
116fn valid_lexical_form(literal: &Literal) -> bool {
117 let datatype = literal.datatype();
118 let value = literal.value();
119 if datatype == xsd::BOOLEAN {
120 Boolean::from_str(value).is_ok()
121 } else if datatype == xsd::DECIMAL {
122 is_decimal_lexical(value)
127 } else if datatype == xsd::FLOAT {
128 Float::from_str(value).is_ok()
129 } else if datatype == xsd::DOUBLE {
130 Double::from_str(value).is_ok()
131 } else if datatype == xsd::DATE_TIME {
132 DateTime::from_str(value).is_ok()
133 } else if datatype == xsd::DATE {
134 Date::from_str(value).is_ok()
135 } else if datatype == xsd::TIME {
136 Time::from_str(value).is_ok()
137 } else if datatype == xsd::DURATION {
138 Duration::from_str(value).is_ok()
139 } else if datatype == xsd::YEAR_MONTH_DURATION {
140 YearMonthDuration::from_str(value).is_ok()
141 } else if datatype == xsd::DAY_TIME_DURATION {
142 DayTimeDuration::from_str(value).is_ok()
143 } else if is_integer_datatype(datatype) {
144 integer_in_datatype_range(value, datatype)
145 } else {
146 true
147 }
148}
149
150fn is_decimal_lexical(value: &str) -> bool {
157 let value = value.trim();
158 let digits = value.strip_prefix(['+', '-']).unwrap_or(value);
159 let mut seen_digit = false;
160 let mut seen_dot = false;
161 for byte in digits.bytes() {
162 match byte {
163 b'0'..=b'9' => seen_digit = true,
164 b'.' if !seen_dot => seen_dot = true,
165 _ => return false,
166 }
167 }
168 seen_digit
169}
170
171fn is_integer_datatype(datatype: NamedNodeRef<'_>) -> bool {
172 matches!(
173 datatype,
174 xsd::INTEGER
175 | xsd::LONG
176 | xsd::INT
177 | xsd::SHORT
178 | xsd::BYTE
179 | xsd::NON_NEGATIVE_INTEGER
180 | xsd::NON_POSITIVE_INTEGER
181 | xsd::NEGATIVE_INTEGER
182 | xsd::POSITIVE_INTEGER
183 | xsd::UNSIGNED_LONG
184 | xsd::UNSIGNED_INT
185 | xsd::UNSIGNED_SHORT
186 | xsd::UNSIGNED_BYTE
187 )
188}
189
190fn integer_in_datatype_range(value: &str, datatype: NamedNodeRef<'_>) -> bool {
191 let value = value.trim();
192 let digits = value.strip_prefix(['+', '-']).unwrap_or(value);
193 if digits.is_empty() || !digits.bytes().all(|byte| byte.is_ascii_digit()) {
194 return false;
195 }
196 if datatype == xsd::INTEGER {
197 return true;
198 }
199 let Ok(number) = value.parse::<i128>() else {
200 return false;
201 };
202 match datatype {
203 xsd::LONG => number >= i64::MIN.into() && number <= i64::MAX.into(),
204 xsd::INT => number >= i32::MIN.into() && number <= i32::MAX.into(),
205 xsd::SHORT => number >= i16::MIN.into() && number <= i16::MAX.into(),
206 xsd::BYTE => number >= i8::MIN.into() && number <= i8::MAX.into(),
207 xsd::NON_NEGATIVE_INTEGER => number >= 0,
208 xsd::NON_POSITIVE_INTEGER => number <= 0,
209 xsd::NEGATIVE_INTEGER => number < 0,
210 xsd::POSITIVE_INTEGER => number > 0,
211 xsd::UNSIGNED_LONG => number >= 0 && number <= u64::MAX.into(),
212 xsd::UNSIGNED_INT => number >= 0 && number <= u32::MAX.into(),
213 xsd::UNSIGNED_SHORT => number >= 0 && number <= u16::MAX.into(),
214 xsd::UNSIGNED_BYTE => number >= 0 && number <= u8::MAX.into(),
215 _ => false,
216 }
217}
218
219fn exact_decimal(l: &Literal) -> Option<BigDecimal> {
226 let dt = l.datatype();
227 if dt == xsd::DECIMAL || is_integer_datatype(dt) {
228 BigDecimal::from_str(l.value().trim()).ok()
229 } else {
230 None
231 }
232}
233
234fn numeric(l: &Literal) -> Option<f64> {
235 is_numeric_datatype(l)
236 .then(|| l.value().parse::<f64>().ok())
237 .flatten()
238}
239
240fn is_numeric_datatype(l: &Literal) -> bool {
241 let dt = l.datatype().as_str();
242 let Some(local) = dt.strip_prefix(XSD) else {
243 return false;
244 };
245 matches!(
246 local,
247 "integer"
248 | "decimal"
249 | "float"
250 | "double"
251 | "long"
252 | "int"
253 | "short"
254 | "byte"
255 | "nonNegativeInteger"
256 | "nonPositiveInteger"
257 | "negativeInteger"
258 | "positiveInteger"
259 | "unsignedLong"
260 | "unsignedInt"
261 | "unsignedShort"
262 | "unsignedByte"
263 )
264}
265
266fn is_string(l: &Literal) -> bool {
267 l.datatype() == xsd::STRING
268}
269
270fn lexical(term: &Term) -> Option<String> {
273 match term {
274 Term::Literal(l) => Some(l.value().to_string()),
275 Term::NamedNode(n) => Some(n.as_str().to_string()),
276 Term::BlankNode(_) => None,
277 }
278}
279
280fn lang_matches(tag: &str, range: &str) -> bool {
282 range == "*"
283 || tag.eq_ignore_ascii_case(range)
284 || tag
285 .to_ascii_lowercase()
286 .starts_with(&format!("{}-", range.to_ascii_lowercase()))
287}
288
289fn compile(regex: &str, flags: &str) -> Option<Regex> {
291 let active: String = flags.chars().filter(|c| "ismx".contains(*c)).collect();
292 let pattern = if active.is_empty() {
293 regex.to_string()
294 } else {
295 format!("(?{active}){regex}")
296 };
297 Regex::new(&pattern).ok()
298}
299
300#[cfg(test)]
301mod tests {
302 use super::*;
303 use oxrdf::Literal;
304 use std::cmp::Ordering;
305
306 fn lit(value: &str, datatype: &str) -> Term {
307 Term::Literal(Literal::new_typed_literal(
308 value,
309 oxrdf::NamedNode::new(datatype).unwrap(),
310 ))
311 }
312 fn date(v: &str) -> Term {
313 lit(v, "http://www.w3.org/2001/XMLSchema#date")
314 }
315 fn time(v: &str) -> Term {
316 lit(v, "http://www.w3.org/2001/XMLSchema#time")
317 }
318 fn dur(v: &str) -> Term {
319 lit(v, "http://www.w3.org/2001/XMLSchema#duration")
320 }
321 fn ymd(v: &str) -> Term {
322 lit(v, "http://www.w3.org/2001/XMLSchema#yearMonthDuration")
323 }
324 fn dtd(v: &str) -> Term {
325 lit(v, "http://www.w3.org/2001/XMLSchema#dayTimeDuration")
326 }
327
328 #[test]
329 fn date_ordering() {
330 assert_eq!(
331 compare_terms(&date("2020-01-01"), &date("2020-06-01")),
332 Some(Ordering::Less)
333 );
334 assert_eq!(
335 compare_terms(&date("2020-06-01"), &date("2020-01-01")),
336 Some(Ordering::Greater)
337 );
338 assert_eq!(
339 compare_terms(&date("2020-03-15"), &date("2020-03-15")),
340 Some(Ordering::Equal)
341 );
342 }
343
344 #[test]
345 fn time_ordering() {
346 assert_eq!(
347 compare_terms(&time("08:00:00"), &time("17:30:00")),
348 Some(Ordering::Less)
349 );
350 assert_eq!(
351 compare_terms(&time("23:59:59"), &time("00:00:00")),
352 Some(Ordering::Greater)
353 );
354 assert_eq!(
355 compare_terms(&time("12:00:00"), &time("12:00:00")),
356 Some(Ordering::Equal)
357 );
358 }
359
360 #[test]
361 fn duration_ordering() {
362 assert_eq!(
363 compare_terms(&dur("P1Y"), &dur("P2Y")),
364 Some(Ordering::Less)
365 );
366 assert_eq!(
367 compare_terms(&dur("P30D"), &dur("P1D")),
368 Some(Ordering::Greater)
369 );
370 assert_eq!(
371 compare_terms(&dur("P1Y"), &dur("P1Y")),
372 Some(Ordering::Equal)
373 );
374 }
375
376 #[test]
377 fn year_month_duration_ordering() {
378 assert_eq!(
379 compare_terms(&ymd("P1Y"), &ymd("P13M")),
380 Some(Ordering::Less)
381 );
382 assert_eq!(
383 compare_terms(&ymd("P2Y"), &ymd("P1Y")),
384 Some(Ordering::Greater)
385 );
386 }
387
388 #[test]
389 fn day_time_duration_ordering() {
390 assert_eq!(
391 compare_terms(&dtd("PT1H"), &dtd("PT2H")),
392 Some(Ordering::Less)
393 );
394 assert_eq!(
395 compare_terms(&dtd("P2D"), &dtd("P1D")),
396 Some(Ordering::Greater)
397 );
398 }
399
400 #[test]
401 fn genuinely_incomparable_durations() {
402 assert_eq!(compare_terms(&dur("P1M"), &dur("P30D")), None);
404 }
405
406 #[test]
407 fn cross_type_incomparable() {
408 assert_eq!(compare_terms(&date("2020-01-01"), &time("12:00:00")), None);
409 assert_eq!(compare_terms(&date("2020-01-01"), &dur("P1Y")), None);
410 }
411
412 fn decimal(v: &str) -> Term {
413 lit(v, "http://www.w3.org/2001/XMLSchema#decimal")
414 }
415
416 #[test]
417 fn high_precision_decimal_is_a_valid_lexical_form() {
418 let dt = ValueType::Datatype(
422 oxrdf::NamedNode::new("http://www.w3.org/2001/XMLSchema#decimal").unwrap(),
423 );
424 let term = decimal("0.0004719474432000000000000000000000001");
425 assert!(value_type_holds(&dt, &term));
426 }
427
428 #[test]
429 fn decimal_lexical_forms() {
430 assert!(is_decimal_lexical(
431 "0.0004719474432000000000000000000000001"
432 ));
433 assert!(is_decimal_lexical("123"));
434 assert!(is_decimal_lexical("-1.5"));
435 assert!(is_decimal_lexical("+.5"));
436 assert!(is_decimal_lexical("1."));
437 assert!(is_decimal_lexical(" 42 "));
438 assert!(!is_decimal_lexical("1.5e10"));
440 assert!(!is_decimal_lexical("."));
441 assert!(!is_decimal_lexical("+"));
442 assert!(!is_decimal_lexical("1.2.3"));
443 assert!(!is_decimal_lexical("abc"));
444 }
445
446 #[test]
447 fn high_precision_decimals_compare_exactly() {
448 let a = decimal("0.0004719474432000000000000000000000001");
451 let b = decimal("0.0004719474432000000000000000000000002");
452 assert_eq!(compare_terms(&a, &b), Some(Ordering::Less));
453 assert_eq!(compare_terms(&a, &a.clone()), Some(Ordering::Equal));
454 }
455
456 #[test]
457 fn decimal_vs_double_promotes_to_double() {
458 let double = lit("0.1", "http://www.w3.org/2001/XMLSchema#double");
462 assert_eq!(
463 compare_terms(&decimal("0.1"), &double),
464 Some(Ordering::Equal)
465 );
466 }
467
468 #[test]
469 fn double_comparisons_still_order_and_handle_nan() {
470 let inf = lit("INF", "http://www.w3.org/2001/XMLSchema#double");
471 assert_eq!(
472 compare_terms(&decimal("1000000000"), &inf),
473 Some(Ordering::Less)
474 );
475 let nan = lit("NaN", "http://www.w3.org/2001/XMLSchema#double");
476 assert_eq!(compare_terms(&decimal("1"), &nan), None);
477 }
478
479 #[test]
480 fn large_integers_compare_exactly() {
481 let a = lit(
483 "9007199254740993",
484 "http://www.w3.org/2001/XMLSchema#integer",
485 );
486 let b = lit(
487 "9007199254740992",
488 "http://www.w3.org/2001/XMLSchema#integer",
489 );
490 assert_eq!(compare_terms(&a, &b), Some(Ordering::Greater));
491 }
492
493 #[test]
494 fn valid_lexical_form_date() {
495 let good = Literal::new_typed_literal(
496 "2020-01-01",
497 oxrdf::NamedNode::new("http://www.w3.org/2001/XMLSchema#date").unwrap(),
498 );
499 let bad = Literal::new_typed_literal(
500 "not-a-date",
501 oxrdf::NamedNode::new("http://www.w3.org/2001/XMLSchema#date").unwrap(),
502 );
503 assert!(valid_lexical_form(&good));
504 assert!(!valid_lexical_form(&bad));
505 }
506
507 #[test]
508 fn valid_lexical_form_duration() {
509 let good = Literal::new_typed_literal(
510 "P1Y2M3DT4H",
511 oxrdf::NamedNode::new("http://www.w3.org/2001/XMLSchema#duration").unwrap(),
512 );
513 let bad = Literal::new_typed_literal(
514 "not-a-duration",
515 oxrdf::NamedNode::new("http://www.w3.org/2001/XMLSchema#duration").unwrap(),
516 );
517 assert!(valid_lexical_form(&good));
518 assert!(!valid_lexical_form(&bad));
519 }
520}