Skip to main content

shifty_engine/
value.rs

1//! Evaluation of value types `test(τ)` and the term ordering used by
2//! `sh:lessThan`, ranges, etc. Naive reference semantics.
3
4use oxrdf::vocab::xsd;
5use oxrdf::{Literal, NamedNodeRef, Term};
6use oxsdatatypes::{
7    Boolean, Date, DateTime, DayTimeDuration, Decimal, Double, Duration, Float, Time,
8    YearMonthDuration,
9};
10use regex::Regex;
11use shifty_algebra::value_type::{Bound, ValueType};
12use std::cmp::Ordering;
13use std::str::FromStr;
14
15const XSD: &str = "http://www.w3.org/2001/XMLSchema#";
16
17/// Does `term` satisfy value type `τ`?
18pub fn value_type_holds(vt: &ValueType, term: &Term) -> bool {
19    match vt {
20        ValueType::Any => true,
21        ValueType::Datatype(dt) => {
22            matches!(term, Term::Literal(l) if l.datatype() == dt.as_ref() && valid_lexical_form(l))
23        }
24        ValueType::NumericRange { lo, hi } => {
25            lo.as_ref().is_none_or(|b| satisfies_lower(term, b))
26                && hi.as_ref().is_none_or(|b| satisfies_upper(term, b))
27        }
28        ValueType::Length { min, max } => match lexical(term) {
29            Some(s) => {
30                let len = s.chars().count() as u64;
31                min.is_none_or(|m| len >= m) && max.is_none_or(|m| len <= m)
32            }
33            None => false,
34        },
35        ValueType::Pattern { regex, flags } => match lexical(term) {
36            Some(s) => compile(regex, flags).is_some_and(|re| re.is_match(&s)),
37            None => false,
38        },
39        ValueType::LangIn(langs) => match term {
40            Term::Literal(l) => l
41                .language()
42                .is_some_and(|tag| langs.iter().any(|range| lang_matches(tag, range))),
43            _ => false,
44        },
45        ValueType::And(parts) => parts.iter().all(|p| value_type_holds(p, term)),
46    }
47}
48
49fn satisfies_lower(term: &Term, b: &Bound) -> bool {
50    match compare_terms(term, &Term::Literal(b.value.clone())) {
51        Some(Ordering::Greater) => true,
52        Some(Ordering::Equal) => b.inclusive,
53        _ => false,
54    }
55}
56
57fn satisfies_upper(term: &Term, b: &Bound) -> bool {
58    match compare_terms(term, &Term::Literal(b.value.clone())) {
59        Some(Ordering::Less) => true,
60        Some(Ordering::Equal) => b.inclusive,
61        _ => false,
62    }
63}
64
65/// Partial ordering over terms: numeric literals compare numerically,
66/// string-typed literals lexicographically, and the XSD date/time/duration
67/// family via their `oxsdatatypes` implementations (which correctly model the
68/// partial order for timezone-unaware comparisons and incomparable durations).
69pub fn compare_terms(a: &Term, b: &Term) -> Option<Ordering> {
70    if let (Term::Literal(la), Term::Literal(lb)) = (a, b) {
71        if let (Some(na), Some(nb)) = (numeric(la), numeric(lb)) {
72            return na.partial_cmp(&nb);
73        }
74        if is_string(la) && is_string(lb) {
75            return Some(la.value().cmp(lb.value()));
76        }
77        let (dta, dtb) = (la.datatype(), lb.datatype());
78        if dta == xsd::DATE_TIME && dtb == xsd::DATE_TIME {
79            let left = DateTime::from_str(la.value()).ok()?;
80            let right = DateTime::from_str(lb.value()).ok()?;
81            return left.partial_cmp(&right);
82        }
83        if dta == xsd::DATE && dtb == xsd::DATE {
84            let left = Date::from_str(la.value()).ok()?;
85            let right = Date::from_str(lb.value()).ok()?;
86            return left.partial_cmp(&right);
87        }
88        if dta == xsd::TIME && dtb == xsd::TIME {
89            let left = Time::from_str(la.value()).ok()?;
90            let right = Time::from_str(lb.value()).ok()?;
91            return left.partial_cmp(&right);
92        }
93        if is_duration_datatype(dta) && is_duration_datatype(dtb) {
94            // Parse via Duration for all three subtypes; Duration::partial_cmp
95            // correctly returns None for incomparable year-month vs day-time pairs.
96            let left = Duration::from_str(la.value()).ok()?;
97            let right = Duration::from_str(lb.value()).ok()?;
98            return left.partial_cmp(&right);
99        }
100    }
101    None
102}
103
104fn is_duration_datatype(dt: NamedNodeRef<'_>) -> bool {
105    dt == xsd::DURATION || dt == xsd::YEAR_MONTH_DURATION || dt == xsd::DAY_TIME_DURATION
106}
107
108fn valid_lexical_form(literal: &Literal) -> bool {
109    let datatype = literal.datatype();
110    let value = literal.value();
111    if datatype == xsd::BOOLEAN {
112        Boolean::from_str(value).is_ok()
113    } else if datatype == xsd::DECIMAL {
114        Decimal::from_str(value).is_ok()
115    } else if datatype == xsd::FLOAT {
116        Float::from_str(value).is_ok()
117    } else if datatype == xsd::DOUBLE {
118        Double::from_str(value).is_ok()
119    } else if datatype == xsd::DATE_TIME {
120        DateTime::from_str(value).is_ok()
121    } else if datatype == xsd::DATE {
122        Date::from_str(value).is_ok()
123    } else if datatype == xsd::TIME {
124        Time::from_str(value).is_ok()
125    } else if datatype == xsd::DURATION {
126        Duration::from_str(value).is_ok()
127    } else if datatype == xsd::YEAR_MONTH_DURATION {
128        YearMonthDuration::from_str(value).is_ok()
129    } else if datatype == xsd::DAY_TIME_DURATION {
130        DayTimeDuration::from_str(value).is_ok()
131    } else if is_integer_datatype(datatype) {
132        integer_in_datatype_range(value, datatype)
133    } else {
134        true
135    }
136}
137
138fn is_integer_datatype(datatype: NamedNodeRef<'_>) -> bool {
139    matches!(
140        datatype,
141        xsd::INTEGER
142            | xsd::LONG
143            | xsd::INT
144            | xsd::SHORT
145            | xsd::BYTE
146            | xsd::NON_NEGATIVE_INTEGER
147            | xsd::NON_POSITIVE_INTEGER
148            | xsd::NEGATIVE_INTEGER
149            | xsd::POSITIVE_INTEGER
150            | xsd::UNSIGNED_LONG
151            | xsd::UNSIGNED_INT
152            | xsd::UNSIGNED_SHORT
153            | xsd::UNSIGNED_BYTE
154    )
155}
156
157fn integer_in_datatype_range(value: &str, datatype: NamedNodeRef<'_>) -> bool {
158    let value = value.trim();
159    let digits = value.strip_prefix(['+', '-']).unwrap_or(value);
160    if digits.is_empty() || !digits.bytes().all(|byte| byte.is_ascii_digit()) {
161        return false;
162    }
163    if datatype == xsd::INTEGER {
164        return true;
165    }
166    let Ok(number) = value.parse::<i128>() else {
167        return false;
168    };
169    match datatype {
170        xsd::LONG => number >= i64::MIN.into() && number <= i64::MAX.into(),
171        xsd::INT => number >= i32::MIN.into() && number <= i32::MAX.into(),
172        xsd::SHORT => number >= i16::MIN.into() && number <= i16::MAX.into(),
173        xsd::BYTE => number >= i8::MIN.into() && number <= i8::MAX.into(),
174        xsd::NON_NEGATIVE_INTEGER => number >= 0,
175        xsd::NON_POSITIVE_INTEGER => number <= 0,
176        xsd::NEGATIVE_INTEGER => number < 0,
177        xsd::POSITIVE_INTEGER => number > 0,
178        xsd::UNSIGNED_LONG => number >= 0 && number <= u64::MAX.into(),
179        xsd::UNSIGNED_INT => number >= 0 && number <= u32::MAX.into(),
180        xsd::UNSIGNED_SHORT => number >= 0 && number <= u16::MAX.into(),
181        xsd::UNSIGNED_BYTE => number >= 0 && number <= u8::MAX.into(),
182        _ => false,
183    }
184}
185
186fn numeric(l: &Literal) -> Option<f64> {
187    is_numeric_datatype(l)
188        .then(|| l.value().parse::<f64>().ok())
189        .flatten()
190}
191
192fn is_numeric_datatype(l: &Literal) -> bool {
193    let dt = l.datatype().as_str();
194    let Some(local) = dt.strip_prefix(XSD) else {
195        return false;
196    };
197    matches!(
198        local,
199        "integer"
200            | "decimal"
201            | "float"
202            | "double"
203            | "long"
204            | "int"
205            | "short"
206            | "byte"
207            | "nonNegativeInteger"
208            | "nonPositiveInteger"
209            | "negativeInteger"
210            | "positiveInteger"
211            | "unsignedLong"
212            | "unsignedInt"
213            | "unsignedShort"
214            | "unsignedByte"
215    )
216}
217
218fn is_string(l: &Literal) -> bool {
219    l.datatype() == xsd::STRING
220}
221
222/// The lexical form to which string facets apply: a literal's value or an IRI;
223/// blank nodes have none.
224fn lexical(term: &Term) -> Option<String> {
225    match term {
226        Term::Literal(l) => Some(l.value().to_string()),
227        Term::NamedNode(n) => Some(n.as_str().to_string()),
228        Term::BlankNode(_) => None,
229    }
230}
231
232/// BCP47 basic language-range match (case-insensitive prefix), plus `*`.
233fn lang_matches(tag: &str, range: &str) -> bool {
234    range == "*"
235        || tag.eq_ignore_ascii_case(range)
236        || tag
237            .to_ascii_lowercase()
238            .starts_with(&format!("{}-", range.to_ascii_lowercase()))
239}
240
241/// Compile a SHACL `sh:pattern` with the supported `sh:flags` subset (i,s,m,x).
242fn compile(regex: &str, flags: &str) -> Option<Regex> {
243    let active: String = flags.chars().filter(|c| "ismx".contains(*c)).collect();
244    let pattern = if active.is_empty() {
245        regex.to_string()
246    } else {
247        format!("(?{active}){regex}")
248    };
249    Regex::new(&pattern).ok()
250}
251
252#[cfg(test)]
253mod tests {
254    use super::*;
255    use oxrdf::Literal;
256    use std::cmp::Ordering;
257
258    fn lit(value: &str, datatype: &str) -> Term {
259        Term::Literal(Literal::new_typed_literal(
260            value,
261            oxrdf::NamedNode::new(datatype).unwrap(),
262        ))
263    }
264    fn date(v: &str) -> Term {
265        lit(v, "http://www.w3.org/2001/XMLSchema#date")
266    }
267    fn time(v: &str) -> Term {
268        lit(v, "http://www.w3.org/2001/XMLSchema#time")
269    }
270    fn dur(v: &str) -> Term {
271        lit(v, "http://www.w3.org/2001/XMLSchema#duration")
272    }
273    fn ymd(v: &str) -> Term {
274        lit(v, "http://www.w3.org/2001/XMLSchema#yearMonthDuration")
275    }
276    fn dtd(v: &str) -> Term {
277        lit(v, "http://www.w3.org/2001/XMLSchema#dayTimeDuration")
278    }
279
280    #[test]
281    fn date_ordering() {
282        assert_eq!(
283            compare_terms(&date("2020-01-01"), &date("2020-06-01")),
284            Some(Ordering::Less)
285        );
286        assert_eq!(
287            compare_terms(&date("2020-06-01"), &date("2020-01-01")),
288            Some(Ordering::Greater)
289        );
290        assert_eq!(
291            compare_terms(&date("2020-03-15"), &date("2020-03-15")),
292            Some(Ordering::Equal)
293        );
294    }
295
296    #[test]
297    fn time_ordering() {
298        assert_eq!(
299            compare_terms(&time("08:00:00"), &time("17:30:00")),
300            Some(Ordering::Less)
301        );
302        assert_eq!(
303            compare_terms(&time("23:59:59"), &time("00:00:00")),
304            Some(Ordering::Greater)
305        );
306        assert_eq!(
307            compare_terms(&time("12:00:00"), &time("12:00:00")),
308            Some(Ordering::Equal)
309        );
310    }
311
312    #[test]
313    fn duration_ordering() {
314        assert_eq!(
315            compare_terms(&dur("P1Y"), &dur("P2Y")),
316            Some(Ordering::Less)
317        );
318        assert_eq!(
319            compare_terms(&dur("P30D"), &dur("P1D")),
320            Some(Ordering::Greater)
321        );
322        assert_eq!(
323            compare_terms(&dur("P1Y"), &dur("P1Y")),
324            Some(Ordering::Equal)
325        );
326    }
327
328    #[test]
329    fn year_month_duration_ordering() {
330        assert_eq!(
331            compare_terms(&ymd("P1Y"), &ymd("P13M")),
332            Some(Ordering::Less)
333        );
334        assert_eq!(
335            compare_terms(&ymd("P2Y"), &ymd("P1Y")),
336            Some(Ordering::Greater)
337        );
338    }
339
340    #[test]
341    fn day_time_duration_ordering() {
342        assert_eq!(
343            compare_terms(&dtd("PT1H"), &dtd("PT2H")),
344            Some(Ordering::Less)
345        );
346        assert_eq!(
347            compare_terms(&dtd("P2D"), &dtd("P1D")),
348            Some(Ordering::Greater)
349        );
350    }
351
352    #[test]
353    fn genuinely_incomparable_durations() {
354        // P1M and P30D are incomparable: a month is 28–31 days so no consistent ordering
355        assert_eq!(compare_terms(&dur("P1M"), &dur("P30D")), None);
356    }
357
358    #[test]
359    fn cross_type_incomparable() {
360        assert_eq!(compare_terms(&date("2020-01-01"), &time("12:00:00")), None);
361        assert_eq!(compare_terms(&date("2020-01-01"), &dur("P1Y")), None);
362    }
363
364    #[test]
365    fn valid_lexical_form_date() {
366        let good = Literal::new_typed_literal(
367            "2020-01-01",
368            oxrdf::NamedNode::new("http://www.w3.org/2001/XMLSchema#date").unwrap(),
369        );
370        let bad = Literal::new_typed_literal(
371            "not-a-date",
372            oxrdf::NamedNode::new("http://www.w3.org/2001/XMLSchema#date").unwrap(),
373        );
374        assert!(valid_lexical_form(&good));
375        assert!(!valid_lexical_form(&bad));
376    }
377
378    #[test]
379    fn valid_lexical_form_duration() {
380        let good = Literal::new_typed_literal(
381            "P1Y2M3DT4H",
382            oxrdf::NamedNode::new("http://www.w3.org/2001/XMLSchema#duration").unwrap(),
383        );
384        let bad = Literal::new_typed_literal(
385            "not-a-duration",
386            oxrdf::NamedNode::new("http://www.w3.org/2001/XMLSchema#duration").unwrap(),
387        );
388        assert!(valid_lexical_form(&good));
389        assert!(!valid_lexical_form(&bad));
390    }
391}