Skip to main content

oxgraph_hyper_bcsr/internal/
validation.rs

1//! Layout- and Strict-tier validation for bipartite-CSR section payloads.
2//!
3//! Validation walks the eight section payloads at open time and rejects any
4//! shape that would let a later traversal step out of bounds, observe stale
5//! data, or yield an inconsistent view of the bipartite incidence relation.
6//! The depth of the walk is selected by [`BcsrValidation`].
7
8use oxgraph_layout_util::{OffsetIntegrityIssue, check_offset_section, check_value_range};
9
10use crate::{
11    error::{BcsrError, BcsrRoleSide, BcsrSection},
12    internal::view::BcsrSections,
13    word::{BcsrIndex, BcsrWord},
14};
15
16/// Maps an [`OffsetIntegrityIssue`] from `oxgraph-csr-util` into a typed
17/// [`BcsrError`], stamping the originating [`BcsrSection`] discriminator.
18fn map_offsets_issue<W: BcsrWord>(
19    section: BcsrSection,
20    offsets: &[W],
21    issue: OffsetIntegrityIssue,
22) -> BcsrError {
23    match issue {
24        OffsetIntegrityIssue::Length { expected, actual } => BcsrError::OffsetLength {
25            section,
26            expected,
27            actual,
28        },
29        OffsetIntegrityIssue::FirstNonZero { actual } => BcsrError::FirstOffset { section, actual },
30        OffsetIntegrityIssue::NonMonotonic {
31            index,
32            previous,
33            actual,
34        } => BcsrError::NonMonotonicOffset {
35            section,
36            index,
37            previous,
38            actual,
39        },
40        OffsetIntegrityIssue::FinalMismatch {
41            final_offset,
42            value_len,
43        } => BcsrError::FinalOffset {
44            section,
45            final_offset,
46            value_len,
47        },
48        OffsetIntegrityIssue::UsizeOverflow { index } => {
49            let value = offsets
50                .get(index)
51                .copied()
52                .and_then(|w| w.get().to_usize())
53                .unwrap_or(usize::MAX);
54            BcsrError::UsizeOverflow { value }
55        }
56        _ => BcsrError::UsizeOverflow { value: usize::MAX },
57    }
58}
59
60/// Maps a value-range [`OffsetIntegrityIssue`] for vertex IDs into a
61/// [`BcsrError::VertexOutOfRange`].
62fn map_vertex_value_issue<W: BcsrWord>(
63    section: BcsrSection,
64    values: &[W],
65    issue: OffsetIntegrityIssue,
66) -> BcsrError {
67    match issue {
68        OffsetIntegrityIssue::ValueOutOfRange {
69            index,
70            value,
71            bound,
72        } => BcsrError::VertexOutOfRange {
73            section,
74            index,
75            vertex: value,
76            vertex_count: bound,
77        },
78        OffsetIntegrityIssue::UsizeOverflow { index } => {
79            let value = values
80                .get(index)
81                .copied()
82                .and_then(|w| w.get().to_usize())
83                .unwrap_or(usize::MAX);
84            BcsrError::UsizeOverflow { value }
85        }
86        _ => BcsrError::UsizeOverflow { value: usize::MAX },
87    }
88}
89
90/// Maps a value-range [`OffsetIntegrityIssue`] for hyperedge IDs into a
91/// [`BcsrError::HyperedgeOutOfRange`].
92fn map_hyperedge_value_issue<W: BcsrWord>(
93    section: BcsrSection,
94    values: &[W],
95    issue: OffsetIntegrityIssue,
96) -> BcsrError {
97    match issue {
98        OffsetIntegrityIssue::ValueOutOfRange {
99            index,
100            value,
101            bound,
102        } => BcsrError::HyperedgeOutOfRange {
103            section,
104            index,
105            hyperedge: value,
106            hyperedge_count: bound,
107        },
108        OffsetIntegrityIssue::UsizeOverflow { index } => {
109            let value = values
110                .get(index)
111                .copied()
112                .and_then(|w| w.get().to_usize())
113                .unwrap_or(usize::MAX);
114            BcsrError::UsizeOverflow { value }
115        }
116        _ => BcsrError::UsizeOverflow { value: usize::MAX },
117    }
118}
119
120/// Validation depth applied at view open time.
121///
122/// `Layout` is the cheap default and catches every violation that lets a
123/// downstream traversal walk out of bounds. `Strict` additionally verifies
124/// cross-direction consistency: the hyperedge-major and vertex-major
125/// indexes describe the same set of incidences. `Strict` is required for
126/// end-to-end semantic guarantees on untrusted producers.
127///
128/// # Performance
129///
130/// `perf: unspecified`; this is a metadata enum.
131#[non_exhaustive]
132#[derive(Clone, Copy, Debug, Eq, PartialEq)]
133pub enum BcsrValidation {
134    /// Length, monotonicity, in-range IDs, sorted-and-unique within ranges.
135    /// Cost is `O(P_head + P_tail + P_outgoing + P_incoming)`.
136    Layout,
137    /// Layout plus cross-direction multiset equality.
138    /// Cost is `O((P_head + P_tail) · log d)` where `d` is the maximum
139    /// vertex outgoing or incoming degree.
140    Strict,
141}
142
143/// Counts derived from validated offset arrays.
144///
145/// Returned by [`validate_sections`] so the view can store them without
146/// recomputing.
147///
148/// # Performance
149///
150/// `perf: unspecified`; copying is `O(1)`.
151#[derive(Clone, Copy, Debug)]
152pub(in crate::internal) struct DerivedCounts {
153    /// Number of vertices visible in this view.
154    pub(in crate::internal) vertex_count: usize,
155    /// Number of hyperedges visible in this view.
156    pub(in crate::internal) hyperedge_count: usize,
157    /// Number of outgoing incidences (`P_head == P_outgoing`).
158    pub(in crate::internal) p_outgoing: usize,
159    /// Number of incoming incidences (`P_tail == P_incoming`).
160    pub(in crate::internal) p_incoming: usize,
161    /// Total incidence count (`P_outgoing + P_incoming`). Validation
162    /// guarantees this fits in `usize`.
163    pub(in crate::internal) total_incidences: usize,
164}
165
166/// Validates the eight bipartite-CSR section payloads.
167///
168/// Walks the sections in a deterministic order and returns the derived
169/// vertex / hyperedge / per-direction incidence counts on success.
170///
171/// # Errors
172///
173/// Returns the first [`BcsrError`] encountered during validation.
174///
175/// # Performance
176///
177/// At [`BcsrValidation::Layout`] the cost is
178/// `O(P_head + P_tail + P_outgoing + P_incoming)`. [`BcsrValidation::Strict`]
179/// adds `O((P_head + P_tail) · log d)` for the cross-direction walk.
180pub(in crate::internal) fn validate_sections<OffsetWord, VertexWord, RelationWord>(
181    sections: &BcsrSections<'_, OffsetWord, VertexWord, RelationWord>,
182    level: BcsrValidation,
183) -> Result<DerivedCounts, BcsrError>
184where
185    OffsetWord: BcsrWord,
186    VertexWord: BcsrWord,
187    RelationWord: BcsrWord,
188{
189    let counts = derive_counts(sections)?;
190    validate_all_offsets(sections, counts)?;
191    validate_total_lengths(sections)?;
192    validate_value_ranges(sections, counts)?;
193    validate_within_range_sorted(sections)?;
194    if matches!(level, BcsrValidation::Strict) {
195        validate_cross_direction(sections)?;
196    }
197    Ok(counts)
198}
199
200/// Derives `vertex_count`, `hyperedge_count`, `P_outgoing`, `P_incoming`
201/// after checking offset slice lengths agree pairwise.
202fn derive_counts<OffsetWord, VertexWord, RelationWord>(
203    sections: &BcsrSections<'_, OffsetWord, VertexWord, RelationWord>,
204) -> Result<DerivedCounts, BcsrError>
205where
206    OffsetWord: BcsrWord,
207    VertexWord: BcsrWord,
208    RelationWord: BcsrWord,
209{
210    let head_len = sections.head_offsets.len();
211    let tail_len = sections.tail_offsets.len();
212    if head_len != tail_len {
213        return Err(BcsrError::HyperedgeOffsetLengthMismatch {
214            head_offsets_len: head_len,
215            tail_offsets_len: tail_len,
216        });
217    }
218    let outgoing_len = sections.vertex_outgoing_offsets.len();
219    let incoming_len = sections.vertex_incoming_offsets.len();
220    if outgoing_len != incoming_len {
221        return Err(BcsrError::VertexOffsetLengthMismatch {
222            outgoing_offsets_len: outgoing_len,
223            incoming_offsets_len: incoming_len,
224        });
225    }
226
227    let hyperedge_count = derive_count_from_offsets(head_len, BcsrSection::HeadOffsets)?;
228    let vertex_count = derive_count_from_offsets(outgoing_len, BcsrSection::VertexOutgoingOffsets)?;
229    let p_outgoing = sections.vertex_outgoing_hyperedges.len();
230    let p_incoming = sections.vertex_incoming_hyperedges.len();
231    let total_incidences =
232        p_outgoing
233            .checked_add(p_incoming)
234            .ok_or(BcsrError::TotalIncidenceCountOverflow {
235                p_head: p_outgoing,
236                p_tail: p_incoming,
237            })?;
238
239    Ok(DerivedCounts {
240        vertex_count,
241        hyperedge_count,
242        p_outgoing,
243        p_incoming,
244        total_incidences,
245    })
246}
247
248/// Returns `offsets.len() - 1` after checking length is non-zero.
249const fn derive_count_from_offsets(
250    offsets_len: usize,
251    section: BcsrSection,
252) -> Result<usize, BcsrError> {
253    if offsets_len == 0 {
254        return Err(BcsrError::OffsetLength {
255            section,
256            expected: 1,
257            actual: 0,
258        });
259    }
260    Ok(offsets_len - 1)
261}
262
263/// Validates each of the four offset arrays end-to-end.
264fn validate_all_offsets<OffsetWord, VertexWord, RelationWord>(
265    sections: &BcsrSections<'_, OffsetWord, VertexWord, RelationWord>,
266    counts: DerivedCounts,
267) -> Result<(), BcsrError>
268where
269    OffsetWord: BcsrWord,
270    VertexWord: BcsrWord,
271    RelationWord: BcsrWord,
272{
273    validate_one_offsets(
274        sections.head_offsets,
275        BcsrSection::HeadOffsets,
276        counts.hyperedge_count,
277        sections.head_participants.len(),
278    )?;
279    validate_one_offsets(
280        sections.tail_offsets,
281        BcsrSection::TailOffsets,
282        counts.hyperedge_count,
283        sections.tail_participants.len(),
284    )?;
285    validate_one_offsets(
286        sections.vertex_outgoing_offsets,
287        BcsrSection::VertexOutgoingOffsets,
288        counts.vertex_count,
289        sections.vertex_outgoing_hyperedges.len(),
290    )?;
291    validate_one_offsets(
292        sections.vertex_incoming_offsets,
293        BcsrSection::VertexIncomingOffsets,
294        counts.vertex_count,
295        sections.vertex_incoming_hyperedges.len(),
296    )
297}
298
299/// Validates one offset array: length is `count + 1`, first offset is 0,
300/// monotonic non-decreasing, final offset matches `value_len`. Delegates to
301/// [`oxgraph_layout_util::check_offset_section`] and stamps the [`BcsrSection`]
302/// discriminator on any returned issue.
303fn validate_one_offsets<Word: BcsrWord>(
304    offsets: &[Word],
305    section: BcsrSection,
306    count: usize,
307    value_len: usize,
308) -> Result<(), BcsrError> {
309    if count.checked_add(1).is_none() {
310        return Err(BcsrError::OffsetLengthOverflow { count });
311    }
312    check_offset_section(offsets, count, value_len)
313        .map_err(|issue| map_offsets_issue(section, offsets, issue))
314}
315
316/// Verifies that head/outgoing and tail/incoming totals agree, so the
317/// bipartite index has a single shared incidence count per direction.
318const fn validate_total_lengths<OffsetWord, VertexWord, RelationWord>(
319    sections: &BcsrSections<'_, OffsetWord, VertexWord, RelationWord>,
320) -> Result<(), BcsrError>
321where
322    OffsetWord: BcsrWord,
323    VertexWord: BcsrWord,
324    RelationWord: BcsrWord,
325{
326    if sections.head_participants.len() != sections.vertex_outgoing_hyperedges.len() {
327        return Err(BcsrError::OutgoingTotalMismatch {
328            head_participants_len: sections.head_participants.len(),
329            outgoing_hyperedges_len: sections.vertex_outgoing_hyperedges.len(),
330        });
331    }
332    if sections.tail_participants.len() != sections.vertex_incoming_hyperedges.len() {
333        return Err(BcsrError::IncomingTotalMismatch {
334            tail_participants_len: sections.tail_participants.len(),
335            incoming_hyperedges_len: sections.vertex_incoming_hyperedges.len(),
336        });
337    }
338    Ok(())
339}
340
341/// Verifies vertex IDs and hyperedge IDs in value sections are in range.
342fn validate_value_ranges<OffsetWord, VertexWord, RelationWord>(
343    sections: &BcsrSections<'_, OffsetWord, VertexWord, RelationWord>,
344    counts: DerivedCounts,
345) -> Result<(), BcsrError>
346where
347    OffsetWord: BcsrWord,
348    VertexWord: BcsrWord,
349    RelationWord: BcsrWord,
350{
351    check_vertex_values(
352        sections.head_participants,
353        BcsrSection::HeadParticipants,
354        counts.vertex_count,
355    )?;
356    check_vertex_values(
357        sections.tail_participants,
358        BcsrSection::TailParticipants,
359        counts.vertex_count,
360    )?;
361    check_hyperedge_values(
362        sections.vertex_outgoing_hyperedges,
363        BcsrSection::VertexOutgoingHyperedges,
364        counts.hyperedge_count,
365    )?;
366    check_hyperedge_values(
367        sections.vertex_incoming_hyperedges,
368        BcsrSection::VertexIncomingHyperedges,
369        counts.hyperedge_count,
370    )
371}
372
373/// Returns `Err` if any vertex word is `>= vertex_count`. Delegates to
374/// [`oxgraph_layout_util::check_value_range`] and stamps [`BcsrSection`] on the
375/// returned issue.
376fn check_vertex_values<Word: BcsrWord>(
377    values: &[Word],
378    section: BcsrSection,
379    vertex_count: usize,
380) -> Result<(), BcsrError> {
381    check_value_range(values, vertex_count)
382        .map_err(|issue| map_vertex_value_issue(section, values, issue))
383}
384
385/// Returns `Err` if any hyperedge word is `>= hyperedge_count`. Delegates to
386/// [`oxgraph_layout_util::check_value_range`] and stamps [`BcsrSection`] on the
387/// returned issue.
388fn check_hyperedge_values<Word: BcsrWord>(
389    values: &[Word],
390    section: BcsrSection,
391    hyperedge_count: usize,
392) -> Result<(), BcsrError> {
393    check_value_range(values, hyperedge_count)
394        .map_err(|issue| map_hyperedge_value_issue(section, values, issue))
395}
396
397/// Verifies that values within every per-bucket range are strictly
398/// ascending. Bipartite-CSR uses set semantics inside each range.
399fn validate_within_range_sorted<OffsetWord, VertexWord, RelationWord>(
400    sections: &BcsrSections<'_, OffsetWord, VertexWord, RelationWord>,
401) -> Result<(), BcsrError>
402where
403    OffsetWord: BcsrWord,
404    VertexWord: BcsrWord,
405    RelationWord: BcsrWord,
406{
407    check_strictly_ascending_buckets(
408        sections.head_offsets,
409        sections.head_participants,
410        BcsrSection::HeadParticipants,
411    )?;
412    check_strictly_ascending_buckets(
413        sections.tail_offsets,
414        sections.tail_participants,
415        BcsrSection::TailParticipants,
416    )?;
417    check_strictly_ascending_buckets(
418        sections.vertex_outgoing_offsets,
419        sections.vertex_outgoing_hyperedges,
420        BcsrSection::VertexOutgoingHyperedges,
421    )?;
422    check_strictly_ascending_buckets(
423        sections.vertex_incoming_offsets,
424        sections.vertex_incoming_hyperedges,
425        BcsrSection::VertexIncomingHyperedges,
426    )
427}
428
429/// Checks each `[offsets[i], offsets[i + 1])` bucket of `values` is
430/// strictly ascending.
431fn check_strictly_ascending_buckets<OffsetWord, Word>(
432    offsets: &[OffsetWord],
433    values: &[Word],
434    section: BcsrSection,
435) -> Result<(), BcsrError>
436where
437    OffsetWord: BcsrWord,
438    Word: BcsrWord,
439{
440    if offsets.len() < 2 {
441        return Ok(());
442    }
443    for window in offsets.windows(2) {
444        let start = index_to_usize(window[0].get())?;
445        let end = index_to_usize(window[1].get())?;
446        check_strictly_ascending_range(values, start, end, section)?;
447    }
448    Ok(())
449}
450
451/// Verifies `values[start..end]` is strictly ascending.
452fn check_strictly_ascending_range<Word: BcsrWord>(
453    values: &[Word],
454    start: usize,
455    end: usize,
456    section: BcsrSection,
457) -> Result<(), BcsrError> {
458    if end <= start + 1 {
459        return Ok(());
460    }
461    let mut previous = index_to_usize(values[start].get())?;
462    for relative in 1..(end - start) {
463        let index = start + relative;
464        let actual = index_to_usize(values[index].get())?;
465        if actual <= previous {
466            return Err(BcsrError::NotStrictlyAscending {
467                section,
468                index,
469                previous,
470                actual,
471            });
472        }
473        previous = actual;
474    }
475    Ok(())
476}
477
478/// Verifies that the hyperedge-major and vertex-major indexes describe the
479/// same multiset of incidences (Strict-tier check). Set semantics let the
480/// check use binary search; cost is `O((P_head + P_tail) · log d)`.
481fn validate_cross_direction<OffsetWord, VertexWord, RelationWord>(
482    sections: &BcsrSections<'_, OffsetWord, VertexWord, RelationWord>,
483) -> Result<(), BcsrError>
484where
485    OffsetWord: BcsrWord,
486    VertexWord: BcsrWord,
487    RelationWord: BcsrWord,
488{
489    cross_direction_walk(
490        sections.head_offsets,
491        sections.head_participants,
492        sections.vertex_outgoing_offsets,
493        sections.vertex_outgoing_hyperedges,
494        BcsrRoleSide::Outgoing,
495    )?;
496    cross_direction_walk(
497        sections.tail_offsets,
498        sections.tail_participants,
499        sections.vertex_incoming_offsets,
500        sections.vertex_incoming_hyperedges,
501        BcsrRoleSide::Incoming,
502    )
503}
504
505/// Walks one side of the bipartite index hyperedge-by-hyperedge and confirms
506/// every `(h, v)` pair appears in the matching vertex-major bucket.
507fn cross_direction_walk<OffsetWord, VertexWord, RelationWord>(
508    edge_offsets: &[OffsetWord],
509    edge_values: &[VertexWord],
510    vertex_offsets: &[OffsetWord],
511    vertex_values: &[RelationWord],
512    side: BcsrRoleSide,
513) -> Result<(), BcsrError>
514where
515    OffsetWord: BcsrWord,
516    VertexWord: BcsrWord,
517    RelationWord: BcsrWord,
518{
519    if edge_offsets.len() < 2 {
520        return Ok(());
521    }
522    for hyperedge_index in 0..(edge_offsets.len() - 1) {
523        let start = index_to_usize(edge_offsets[hyperedge_index].get())?;
524        let end = index_to_usize(edge_offsets[hyperedge_index + 1].get())?;
525        let hyperedge = hyperedge_index;
526        cross_direction_check_bucket(CrossDirectionBucket {
527            edge_values,
528            start,
529            end,
530            vertex_offsets,
531            vertex_values,
532            hyperedge,
533            side,
534        })?;
535    }
536    Ok(())
537}
538
539/// Parameter bundle for [`cross_direction_check_bucket`].
540#[derive(Clone, Copy)]
541struct CrossDirectionBucket<'a, OffsetWord, VertexWord, RelationWord> {
542    /// Hyperedge-major value slice (`head_participants` or `tail_participants`).
543    edge_values: &'a [VertexWord],
544    /// Inclusive start of the hyperedge's range inside `edge_values`.
545    start: usize,
546    /// Exclusive end of the hyperedge's range inside `edge_values`.
547    end: usize,
548    /// Vertex-major offset slice on the matching side.
549    vertex_offsets: &'a [OffsetWord],
550    /// Vertex-major hyperedge ID slice on the matching side.
551    vertex_values: &'a [RelationWord],
552    /// Hyperedge ID being checked.
553    hyperedge: usize,
554    /// Which side of the bipartite index this check covers.
555    side: BcsrRoleSide,
556}
557
558/// Confirms every vertex in `edge_values[start..end]` lists `hyperedge`
559/// in its own vertex-major bucket.
560fn cross_direction_check_bucket<OffsetWord, VertexWord, RelationWord>(
561    args: CrossDirectionBucket<'_, OffsetWord, VertexWord, RelationWord>,
562) -> Result<(), BcsrError>
563where
564    OffsetWord: BcsrWord,
565    VertexWord: BcsrWord,
566    RelationWord: BcsrWord,
567{
568    for word in args.edge_values.iter().take(args.end).skip(args.start) {
569        let vertex = index_to_usize(word.get())?;
570        let v_index = vertex;
571        let bucket_start = index_to_usize(args.vertex_offsets[v_index].get())?;
572        let bucket_end = index_to_usize(args.vertex_offsets[v_index + 1].get())?;
573        if !bucket_contains(args.vertex_values, bucket_start, bucket_end, args.hyperedge) {
574            return Err(BcsrError::CrossDirectionMismatch {
575                side: args.side,
576                hyperedge: args.hyperedge,
577                vertex,
578            });
579        }
580    }
581    Ok(())
582}
583
584/// Returns whether `values[start..end]` contains `needle`. Values within the
585/// range are required to be strictly ascending, so binary search is correct.
586fn bucket_contains<Word: BcsrWord>(
587    values: &[Word],
588    start: usize,
589    end: usize,
590    needle: usize,
591) -> bool {
592    let bucket = &values[start..end];
593    bucket
594        .binary_search_by(|word| index_to_usize_validated(word.get()).cmp(&needle))
595        .is_ok()
596}
597
598/// Converts a BCSR index to `usize`, returning a typed error on truncation.
599pub(in crate::internal) fn index_to_usize<Index: BcsrIndex>(
600    value: Index,
601) -> Result<usize, BcsrError> {
602    value
603        .to_usize()
604        .ok_or(BcsrError::UsizeOverflow { value: usize::MAX })
605}
606
607/// Converts a previously validated BCSR index to `usize`.
608///
609/// # Panics
610///
611/// Panics via `unreachable!()` only if validation has been bypassed. All
612/// in-tree callers run after [`validate_sections`], which surfaces truncation
613/// as [`BcsrError::UsizeOverflow`] before any `_validated` call.
614///
615/// # Performance
616///
617/// This function is `O(1)`.
618pub(in crate::internal) fn index_to_usize_validated<Index: BcsrIndex>(value: Index) -> usize {
619    value
620        .to_usize()
621        .unwrap_or_else(|| unreachable!("validated bipartite-CSR index must fit usize"))
622}
623
624/// Converts a previously validated `usize` slot to a BCSR index.
625///
626/// # Panics
627///
628/// Panics via `unreachable!()` only if validation failed to enforce the target
629/// index width.
630///
631/// # Performance
632///
633/// This function is `O(1)`.
634pub(in crate::internal) fn usize_to_index_validated<Index: BcsrIndex>(value: usize) -> Index {
635    Index::from_usize(value).unwrap_or_else(|| unreachable!("validated BCSR slot must fit index"))
636}