Skip to main content

oxirs_core/canon/
algorithm.rs

1//! URDNA2015 / RDNA 2015 core algorithm implementation.
2//!
3//! This module provides a complete, spec-faithful implementation of the W3C
4//! RDF Dataset Normalization Algorithm (URDNA2015) as described in:
5//! <https://www.w3.org/TR/rdf-canon/>
6//!
7//! ## Algorithm Overview
8//!
9//! URDNA2015 assigns deterministic canonical blank node identifiers to every
10//! blank node in an RDF dataset.  The algorithm works in five stages:
11//!
12//! 1. **Collect blank nodes** — enumerate every blank node appearing in any quad.
13//! 2. **Hash First-Degree Quads** — for each blank node, hash the set of quads
14//!    it appears in using `_:a` for the node itself and `_:z` for all other blanks
15//!    (unless they already have a canonical ID).
16//! 3. **Issue simple IDs** — blank nodes with a *unique* first-degree hash
17//!    receive their canonical identifier immediately (`_:c14n0`, `_:c14n1`, …)
18//!    in hash order.
19//! 4. **Hash N-Degree Quads** — for blank nodes that still share a first-degree
20//!    hash, apply the full recursive N-degree hashing algorithm which walks the
21//!    RDF neighbourhood to break ties.
22//! 5. **Emit canonical N-Quads** — replace all blank node identifiers with their
23//!    canonical form, sort the resulting quads lexicographically, and join with
24//!    `\n`.
25//!
26//! ## Reference
27//!
28//! - W3C RDF Canonicalization 1.0: <https://www.w3.org/TR/rdf-canon/>
29//! - URDNA2015 test suite: <https://json-ld.github.io/rdf-dataset-normalization/tests/>
30
31use std::collections::{BTreeMap, HashMap};
32
33use super::{
34    hash::sha256_hex,
35    nquads::{quad_to_nquad, quad_to_nquad_with_placeholders},
36    types::RdfQuad,
37};
38
39// ─── Identifier Issuer ───────────────────────────────────────────────────────
40
41/// Issues monotonically increasing canonical blank node identifiers.
42///
43/// Each call to [`IdentifierIssuer::issue`] either returns the previously
44/// assigned identifier for a blank node or mints a fresh one with the format
45/// `_:c14n<N>`.  The internal counter starts at 0.
46#[derive(Debug, Clone)]
47pub struct IdentifierIssuer {
48    prefix: String,
49    counter: usize,
50    /// Ordered map from original blank node id → canonical id.
51    /// Ordered so that the issuer can be cloned and iterated deterministically.
52    issued: Vec<(String, String)>,
53    /// Fast lookup set.
54    issued_map: HashMap<String, String>,
55}
56
57impl IdentifierIssuer {
58    /// Create a new issuer using `prefix` as the canonical identifier prefix.
59    pub fn new(prefix: impl Into<String>) -> Self {
60        IdentifierIssuer {
61            prefix: prefix.into(),
62            counter: 0,
63            issued: Vec::new(),
64            issued_map: HashMap::new(),
65        }
66    }
67
68    /// Return the canonical identifier that has been (or will be) assigned to
69    /// `original`.  If this is the first call for `original`, a new identifier
70    /// is minted.
71    pub fn issue(&mut self, original: &str) -> String {
72        if let Some(existing) = self.issued_map.get(original) {
73            return existing.clone();
74        }
75        let canonical = format!("{}{}", self.prefix, self.counter);
76        self.counter += 1;
77        self.issued_map
78            .insert(original.to_string(), canonical.clone());
79        self.issued.push((original.to_string(), canonical.clone()));
80        canonical
81    }
82
83    /// Return the canonical identifier for `original` if it has already been
84    /// issued, without minting a new one.
85    pub fn get(&self, original: &str) -> Option<&str> {
86        self.issued_map.get(original).map(String::as_str)
87    }
88
89    /// Return `true` if a canonical identifier has already been issued for
90    /// `original`.
91    pub fn has_issued(&self, original: &str) -> bool {
92        self.issued_map.contains_key(original)
93    }
94
95    /// Return an iterator over `(original, canonical)` pairs in issue order.
96    pub fn issued_pairs(&self) -> impl Iterator<Item = (&str, &str)> {
97        self.issued.iter().map(|(o, c)| (o.as_str(), c.as_str()))
98    }
99
100    /// Return the mapping as a `HashMap<String, String>` for N-Quads serialization.
101    pub fn as_map(&self) -> &HashMap<String, String> {
102        &self.issued_map
103    }
104}
105
106// ─── Canonicalizer ───────────────────────────────────────────────────────────
107
108/// The main URDNA2015 canonicalizer.
109///
110/// Instantiate with [`Canonicalizer::new`] then call
111/// [`Canonicalizer::canonicalize`].  The canonicalizer is reusable but is
112/// stateful during a single canonicalization run — all state is reset at the
113/// beginning of [`Canonicalizer::canonicalize`].
114pub struct Canonicalizer {
115    /// Issues canonical IDs: `_:c14n0`, `_:c14n1`, …
116    issuer: IdentifierIssuer,
117    /// Map from blank node id → list of quads in which the node appears.
118    bnode_to_quads: HashMap<String, Vec<usize>>,
119    /// All quads in the dataset.
120    quads: Vec<RdfQuad>,
121    /// First-degree hash → list of blank node ids with that hash.
122    hash_to_bnodes: BTreeMap<String, Vec<String>>,
123}
124
125impl Canonicalizer {
126    /// Create a new canonicalizer.
127    pub fn new() -> Self {
128        Canonicalizer {
129            issuer: IdentifierIssuer::new("_:c14n"),
130            bnode_to_quads: HashMap::new(),
131            quads: Vec::new(),
132            hash_to_bnodes: BTreeMap::new(),
133        }
134    }
135
136    // ─── Stage 1: Collect blank nodes ─────────────────────────────────────
137
138    /// Populate `bnode_to_quads` from the input quads.
139    fn collect_blank_nodes(&mut self) {
140        for (idx, quad) in self.quads.iter().enumerate() {
141            for bnode_id in quad.blank_nodes() {
142                self.bnode_to_quads
143                    .entry(bnode_id.to_string())
144                    .or_default()
145                    .push(idx);
146            }
147        }
148    }
149
150    // ─── Stage 2: First-Degree Hash ───────────────────────────────────────
151
152    /// Hash the set of quads that contain `bnode`, using placeholder names:
153    /// - `_:a` for the blank node being hashed
154    /// - `_:z` for all other blank nodes (unless they already have a canonical
155    ///   identifier in `issued`)
156    fn hash_first_degree_quads(&self, bnode: &str, issued: &IdentifierIssuer) -> String {
157        let quad_indices = match self.bnode_to_quads.get(bnode) {
158            Some(v) => v,
159            None => return sha256_hex(""),
160        };
161
162        // Serialize each quad in placeholder form, then sort, then hash the join.
163        let mut nquad_lines: Vec<String> = quad_indices
164            .iter()
165            .map(|&idx| quad_to_nquad_with_placeholders(&self.quads[idx], bnode, issued.as_map()))
166            .collect();
167
168        nquad_lines.sort();
169        sha256_hex(&nquad_lines.join(""))
170    }
171
172    // ─── Stage 3: Issue simple IDs ────────────────────────────────────────
173
174    /// For each first-degree hash that belongs to exactly one blank node, issue
175    /// a canonical identifier in sorted-hash order.
176    fn issue_simple_canonical_ids(&mut self) {
177        // Collect hashes with exactly one blank node.
178        let mut simple: Vec<(String, String)> = self
179            .hash_to_bnodes
180            .iter()
181            .filter(|(_, bnodes)| bnodes.len() == 1)
182            .map(|(h, bnodes)| (h.clone(), bnodes[0].clone()))
183            .collect();
184
185        // Sorted by hash (BTreeMap already iterates in sorted order, so this
186        // preserves the spec-required deterministic order).
187        simple.sort_by(|a, b| a.0.cmp(&b.0));
188
189        for (_, bnode) in simple {
190            self.issuer.issue(&bnode);
191        }
192    }
193
194    // ─── Stage 4: N-Degree Hash ───────────────────────────────────────────
195
196    /// Compute the N-degree hash for a blank node, resolving all ambiguous
197    /// blank nodes in its neighbourhood recursively.
198    ///
199    /// This is the most complex part of URDNA2015.  The algorithm:
200    ///
201    /// 1. For each blank node `p` in the same first-degree-hash group, try all
202    ///    permutations of assigning temporary canonical IDs to those nodes and
203    ///    hash the resulting neighbourhood quads.
204    /// 2. Choose the lexicographically smallest hash to break ties.
205    ///
206    /// The `depth` parameter guards against infinite recursion in pathological
207    /// graphs (cycles of blank nodes).
208    fn hash_n_degree_quads(
209        &self,
210        bnode: &str,
211        issuer: &IdentifierIssuer,
212        depth: usize,
213    ) -> (String, IdentifierIssuer) {
214        // Hard limit on recursion depth (spec leaves implementation-defined).
215        const MAX_DEPTH: usize = 64;
216
217        // Build the set of related blank nodes in the RDF neighbourhood.
218        // "Related" means: sharing a quad with `bnode`.
219        let mut hash_to_related: BTreeMap<String, Vec<String>> = BTreeMap::new();
220
221        let quad_indices = match self.bnode_to_quads.get(bnode) {
222            Some(v) => v.clone(),
223            None => return (sha256_hex(""), issuer.clone()),
224        };
225
226        for quad_idx in &quad_indices {
227            let quad = &self.quads[*quad_idx];
228            // Collect every blank node in this quad that is NOT `bnode`.
229            for related_id in quad.blank_nodes() {
230                if related_id == bnode {
231                    continue;
232                }
233                // Hash the related node's first-degree quads to group it.
234                let related_hash = self.hash_first_degree_quads(related_id, issuer);
235                hash_to_related
236                    .entry(related_hash)
237                    .or_default()
238                    .push(related_id.to_string());
239            }
240        }
241
242        // Build a deterministic "data to hash" string.
243        let mut combined = String::new();
244
245        // Append the initial quads of `bnode` with canonical substitution.
246        {
247            let mut nquad_lines: Vec<String> = quad_indices
248                .iter()
249                .map(|&idx| quad_to_nquad(&self.quads[idx], issuer.as_map()))
250                .collect();
251            nquad_lines.sort();
252            combined.push_str(&nquad_lines.join(""));
253        }
254
255        let mut chosen_issuer = issuer.clone();
256
257        // For each group of related nodes (sorted by their first-degree hash),
258        // try all permutations to find the lexicographically smallest hash.
259        for (group_hash, related_group) in &hash_to_related {
260            combined.push_str(group_hash);
261
262            let mut best_path: Option<(String, IdentifierIssuer)> = None;
263
264            // Generate all permutations of the related group.
265            let permutations = permutations_of(related_group);
266
267            for perm in permutations {
268                let mut path_issuer = issuer.clone();
269                let mut path = String::new();
270
271                for related_bnode in &perm {
272                    // Issue a temporary ID if not yet canonical.
273                    if let Some(canonical) = issuer.get(related_bnode) {
274                        path.push_str(canonical);
275                    } else {
276                        // Recursively hash this related node (up to depth limit).
277                        if depth < MAX_DEPTH {
278                            let temp_id = path_issuer.issue(related_bnode);
279                            path.push_str(&temp_id);
280                            let (recursive_hash, _) =
281                                self.hash_n_degree_quads(related_bnode, &path_issuer, depth + 1);
282                            path.push_str(&recursive_hash);
283                        } else {
284                            // At depth limit: just use the first-degree hash.
285                            let fd_hash = self.hash_first_degree_quads(related_bnode, &path_issuer);
286                            path.push_str(&fd_hash);
287                        }
288                    }
289                }
290
291                let candidate_hash = sha256_hex(&path);
292
293                if let Some((ref best_hash, _)) = best_path {
294                    if candidate_hash < *best_hash {
295                        best_path = Some((candidate_hash, path_issuer));
296                    }
297                } else {
298                    best_path = Some((candidate_hash, path_issuer));
299                }
300            }
301
302            if let Some((best_hash, best_issuer)) = best_path {
303                combined.push_str(&best_hash);
304                chosen_issuer = best_issuer;
305            }
306        }
307
308        (sha256_hex(&combined), chosen_issuer)
309    }
310
311    /// Issue canonical identifiers to all blank nodes that still lack one,
312    /// using the N-degree hash to break ties.
313    fn issue_ndegree_canonical_ids(&mut self) {
314        // Collect the groups that still have more than one candidate (or any
315        // unissued node).  Work in sorted-by-hash order for determinism.
316        let unresolved_groups: Vec<(String, Vec<String>)> = self
317            .hash_to_bnodes
318            .iter()
319            .filter(|(_, bnodes)| bnodes.len() > 1)
320            .map(|(h, bnodes)| (h.clone(), bnodes.clone()))
321            .collect();
322
323        for (_, group) in unresolved_groups {
324            // For each blank node in the group, compute an N-degree hash.
325            let mut nd_hashes: Vec<(String, String, IdentifierIssuer)> = group
326                .iter()
327                .map(|bnode| {
328                    let (nd_hash, temp_issuer) =
329                        self.hash_n_degree_quads(bnode, &self.issuer.clone(), 0);
330                    (nd_hash, bnode.clone(), temp_issuer)
331                })
332                .collect();
333
334            // Sort by N-degree hash for deterministic ordering.
335            nd_hashes.sort_by(|a, b| a.0.cmp(&b.0));
336
337            for (_, bnode, _temp_issuer) in &nd_hashes {
338                if !self.issuer.has_issued(bnode) {
339                    self.issuer.issue(bnode);
340                }
341            }
342        }
343    }
344
345    // ─── Stage 5: Emit canonical N-Quads ─────────────────────────────────
346
347    /// Serialize all quads with canonical blank node identifiers, sort them,
348    /// and join with newlines.
349    fn emit_canonical_nquads(&self) -> String {
350        let mapping = self.issuer.as_map();
351        let mut lines: Vec<String> = self
352            .quads
353            .iter()
354            .map(|q| quad_to_nquad(q, mapping))
355            .collect();
356        lines.sort();
357        lines.join("\n")
358    }
359
360    // ─── Public entry point ───────────────────────────────────────────────
361
362    /// Canonicalize a slice of RDF quads according to URDNA2015.
363    ///
364    /// Returns a UTF-8 string containing sorted canonical N-Quads (one per
365    /// line, lines separated by `\n`).  An empty input yields an empty string.
366    pub fn canonicalize(quads: &[RdfQuad]) -> String {
367        let mut c = Canonicalizer::new();
368        c.quads = quads.to_vec();
369
370        if c.quads.is_empty() {
371            return String::new();
372        }
373
374        // Stage 1: collect blank nodes.
375        c.collect_blank_nodes();
376
377        // Stage 2: compute first-degree hashes.
378        let all_bnodes: Vec<String> = c.bnode_to_quads.keys().cloned().collect();
379        let temp_issuer = IdentifierIssuer::new("_:c14n");
380        for bnode in &all_bnodes {
381            let hash = c.hash_first_degree_quads(bnode, &temp_issuer);
382            c.hash_to_bnodes
383                .entry(hash)
384                .or_default()
385                .push(bnode.clone());
386        }
387
388        // Sort each group to ensure determinism before issuing.
389        for group in c.hash_to_bnodes.values_mut() {
390            group.sort();
391        }
392
393        // Stage 3: issue simple canonical IDs.
394        c.issue_simple_canonical_ids();
395
396        // Stage 4: issue N-degree canonical IDs.
397        c.issue_ndegree_canonical_ids();
398
399        // Stage 5: emit.
400        c.emit_canonical_nquads()
401    }
402}
403
404impl Default for Canonicalizer {
405    fn default() -> Self {
406        Self::new()
407    }
408}
409
410// ─── Permutation helpers ─────────────────────────────────────────────────────
411
412/// Generate all permutations of a slice.
413///
414/// Uses Heap's algorithm for efficiency.  The number of permutations is `n!`,
415/// so this should only be called on small slices (in practice blank nodes
416/// sharing the same first-degree hash are rare and few).
417fn permutations_of(items: &[String]) -> Vec<Vec<String>> {
418    let n = items.len();
419    if n == 0 {
420        return vec![vec![]];
421    }
422    if n == 1 {
423        return vec![items.to_vec()];
424    }
425
426    let mut result = Vec::new();
427    let mut arr = items.to_vec();
428    heap_permute(&mut arr, n, &mut result);
429    result
430}
431
432fn heap_permute(arr: &mut Vec<String>, k: usize, out: &mut Vec<Vec<String>>) {
433    if k == 1 {
434        out.push(arr.clone());
435        return;
436    }
437    for i in 0..k {
438        heap_permute(arr, k - 1, out);
439        if k % 2 == 0 {
440            arr.swap(i, k - 1);
441        } else {
442            arr.swap(0, k - 1);
443        }
444    }
445}
446
447// ─── Convenience free function ────────────────────────────────────────────────
448
449/// Canonicalize an RDF dataset (slice of [`RdfQuad`]s) to a canonical N-Quads
450/// string.
451///
452/// This is the primary public entry point for URDNA2015.
453///
454/// ```rust
455/// use oxirs_core::canon::{canonicalize, QuadTerm, RdfQuad};
456///
457/// let quads = vec![RdfQuad::new(
458///     QuadTerm::iri("http://example.org/s"),
459///     QuadTerm::iri("http://example.org/p"),
460///     QuadTerm::iri("http://example.org/o"),
461/// )];
462/// let canonical = canonicalize(&quads);
463/// assert!(canonical.contains("<http://example.org/s>"));
464/// ```
465pub fn canonicalize(quads: &[RdfQuad]) -> String {
466    Canonicalizer::canonicalize(quads)
467}
468
469// ─── Tests ────────────────────────────────────────────────────────────────────
470
471#[cfg(test)]
472mod tests {
473    use super::*;
474    use crate::canon::types::{QuadTerm, RdfQuad};
475    use std::collections::HashSet;
476
477    // ── Helper builders ──────────────────────────────────────────────────
478
479    fn iri_quad(s: &str, p: &str, o: &str) -> RdfQuad {
480        RdfQuad::new(QuadTerm::iri(s), QuadTerm::iri(p), QuadTerm::iri(o))
481    }
482
483    fn blank_iri_quad(bnode: &str, p: &str, o: &str) -> RdfQuad {
484        RdfQuad::new(QuadTerm::blank(bnode), QuadTerm::iri(p), QuadTerm::iri(o))
485    }
486
487    // ── Test 1: Empty graph ──────────────────────────────────────────────
488
489    #[test]
490    fn test_empty_graph_yields_empty_output() {
491        let result = canonicalize(&[]);
492        assert!(result.is_empty(), "expected empty string for empty input");
493    }
494
495    // ── Test 2: Single IRI quad ──────────────────────────────────────────
496
497    #[test]
498    fn test_single_iri_quad() {
499        let quads = vec![iri_quad(
500            "http://example.org/s",
501            "http://example.org/p",
502            "http://example.org/o",
503        )];
504        let result = canonicalize(&quads);
505        assert_eq!(
506            result,
507            "<http://example.org/s> <http://example.org/p> <http://example.org/o> ."
508        );
509    }
510
511    // ── Test 3: Literal with datatype ────────────────────────────────────
512
513    #[test]
514    fn test_literal_with_datatype() {
515        let quads = vec![RdfQuad::new(
516            QuadTerm::iri("http://example.org/s"),
517            QuadTerm::iri("http://example.org/p"),
518            QuadTerm::typed_literal("42", "http://www.w3.org/2001/XMLSchema#integer"),
519        )];
520        let result = canonicalize(&quads);
521        assert!(
522            result.contains("\"42\"^^<http://www.w3.org/2001/XMLSchema#integer>"),
523            "expected typed literal; got: {result}"
524        );
525    }
526
527    // ── Test 4: Literal with language tag ────────────────────────────────
528
529    #[test]
530    fn test_literal_with_language_tag() {
531        let quads = vec![RdfQuad::new(
532            QuadTerm::iri("http://example.org/s"),
533            QuadTerm::iri("http://example.org/p"),
534            QuadTerm::lang_literal("Hello", "en"),
535        )];
536        let result = canonicalize(&quads);
537        assert!(
538            result.contains("\"Hello\"@en"),
539            "expected language-tagged literal; got: {result}"
540        );
541    }
542
543    // ── Test 5: Single blank node → _:c14n0 ──────────────────────────────
544
545    #[test]
546    fn test_single_blank_node_gets_c14n0() {
547        let quads = vec![blank_iri_quad(
548            "b0",
549            "http://example.org/p",
550            "http://example.org/o",
551        )];
552        let result = canonicalize(&quads);
553        assert!(
554            result.contains("_:c14n0"),
555            "expected _:c14n0; got: {result}"
556        );
557        assert!(
558            !result.contains("_:b0"),
559            "original blank node id must not appear in output"
560        );
561    }
562
563    // ── Test 6: Two blank nodes → _:c14n0 and _:c14n1 ───────────────────
564
565    #[test]
566    fn test_two_blank_nodes_get_sequential_ids() {
567        let quads = vec![
568            blank_iri_quad("b0", "http://example.org/type", "http://example.org/A"),
569            blank_iri_quad("b1", "http://example.org/type", "http://example.org/B"),
570        ];
571        let result = canonicalize(&quads);
572        assert!(
573            result.contains("_:c14n0"),
574            "expected _:c14n0; got: {result}"
575        );
576        assert!(
577            result.contains("_:c14n1"),
578            "expected _:c14n1; got: {result}"
579        );
580    }
581
582    // ── Test 7: Same blank node in multiple quads → same canonical ID ─────
583
584    #[test]
585    fn test_same_blank_node_across_quads() {
586        let quads = vec![
587            blank_iri_quad(
588                "alice",
589                "http://example.org/name",
590                "http://example.org/Alice",
591            ),
592            blank_iri_quad(
593                "alice",
594                "http://example.org/knows",
595                "http://example.org/Bob",
596            ),
597        ];
598        let result = canonicalize(&quads);
599        // Both lines should reference the same canonical ID (c14n0).
600        let c14n_count = result.matches("_:c14n0").count();
601        assert_eq!(
602            c14n_count, 2,
603            "_:c14n0 should appear once per quad; got {c14n_count} occurrences:\n{result}"
604        );
605        // Only one canonical ID should exist.
606        assert!(
607            !result.contains("_:c14n1"),
608            "only one blank node exists; _:c14n1 must not appear; got:\n{result}"
609        );
610    }
611
612    // ── Test 8: Named graph quad ──────────────────────────────────────────
613
614    #[test]
615    fn test_named_graph_quad() {
616        let quads = vec![RdfQuad::new_in_graph(
617            QuadTerm::iri("http://example.org/s"),
618            QuadTerm::iri("http://example.org/p"),
619            QuadTerm::iri("http://example.org/o"),
620            QuadTerm::iri("http://example.org/graph1"),
621        )];
622        let result = canonicalize(&quads);
623        assert!(
624            result.contains("<http://example.org/graph1>"),
625            "named graph must appear in output; got: {result}"
626        );
627        assert!(
628            result.ends_with('.') || result.ends_with(". "),
629            "N-Quads line must end with period; got: {result}"
630        );
631    }
632
633    // ── Test 9: Canonical output is sorted lexicographically ─────────────
634
635    #[test]
636    fn test_output_is_sorted_lexicographically() {
637        let quads = vec![
638            iri_quad(
639                "http://z.example.org/s",
640                "http://example.org/p",
641                "http://example.org/o",
642            ),
643            iri_quad(
644                "http://a.example.org/s",
645                "http://example.org/p",
646                "http://example.org/o",
647            ),
648            iri_quad(
649                "http://m.example.org/s",
650                "http://example.org/p",
651                "http://example.org/o",
652            ),
653        ];
654        let result = canonicalize(&quads);
655        let lines: Vec<&str> = result.lines().collect();
656        assert_eq!(lines.len(), 3);
657        // Must be in ascending order.
658        assert!(lines[0] < lines[1], "line 0 must be < line 1");
659        assert!(lines[1] < lines[2], "line 1 must be < line 2");
660        assert!(
661            lines[0].contains("/a.example.org"),
662            "first line must be /a; got: {}",
663            lines[0]
664        );
665        assert!(
666            lines[2].contains("/z.example.org"),
667            "last line must be /z; got: {}",
668            lines[2]
669        );
670    }
671
672    // ── Test 10: Deterministic — two calls with same input → same output ──
673
674    #[test]
675    fn test_canonicalization_is_deterministic() {
676        let quads = vec![
677            blank_iri_quad("x", "http://example.org/p", "http://example.org/o"),
678            blank_iri_quad("y", "http://example.org/q", "http://example.org/o"),
679            RdfQuad::new(
680                QuadTerm::blank("x"),
681                QuadTerm::iri("http://example.org/knows"),
682                QuadTerm::blank("y"),
683            ),
684        ];
685        let first = canonicalize(&quads);
686        let second = canonicalize(&quads);
687        assert_eq!(
688            first, second,
689            "canonicalization must be deterministic across calls"
690        );
691    }
692
693    // ── Test 11: Blank node renamed from _:b0 to _:c14n0 ─────────────────
694
695    #[test]
696    fn test_blank_node_renamed_from_b0() {
697        let quads = vec![blank_iri_quad(
698            "b0",
699            "http://example.org/type",
700            "http://example.org/Thing",
701        )];
702        let result = canonicalize(&quads);
703        assert!(
704            result.contains("_:c14n0"),
705            "blank node must be renamed to _:c14n0; got: {result}"
706        );
707        assert!(
708            !result.contains("_:b0"),
709            "original _:b0 must NOT appear in output; got: {result}"
710        );
711    }
712
713    // ── Test 12: Complex graph — 3 blank nodes all get unique IDs ─────────
714
715    #[test]
716    fn test_three_blank_nodes_unique_ids() {
717        // Three blank nodes connected in a chain: x → y → z
718        let quads = vec![
719            RdfQuad::new(
720                QuadTerm::blank("x"),
721                QuadTerm::iri("http://example.org/next"),
722                QuadTerm::blank("y"),
723            ),
724            RdfQuad::new(
725                QuadTerm::blank("y"),
726                QuadTerm::iri("http://example.org/next"),
727                QuadTerm::blank("z"),
728            ),
729            blank_iri_quad("z", "http://example.org/type", "http://example.org/End"),
730        ];
731        let result = canonicalize(&quads);
732
733        let mut ids: HashSet<String> = HashSet::new();
734        for i in 0..3 {
735            let id = format!("_:c14n{}", i);
736            assert!(
737                result.contains(&id),
738                "expected {id} in output; got:\n{result}"
739            );
740            ids.insert(id);
741        }
742        assert_eq!(ids.len(), 3, "all three canonical IDs must be distinct");
743    }
744
745    // ── Test 13: Blank node in object position ────────────────────────────
746
747    #[test]
748    fn test_blank_node_in_object_position() {
749        let quads = vec![RdfQuad::new(
750            QuadTerm::iri("http://example.org/s"),
751            QuadTerm::iri("http://example.org/p"),
752            QuadTerm::blank("obj_node"),
753        )];
754        let result = canonicalize(&quads);
755        assert!(
756            result.contains("_:c14n0"),
757            "blank object must be canonicalized; got: {result}"
758        );
759    }
760
761    // ── Test 14: Blank node in named graph position ───────────────────────
762
763    #[test]
764    fn test_blank_node_as_named_graph() {
765        let quads = vec![RdfQuad::new_in_graph(
766            QuadTerm::iri("http://example.org/s"),
767            QuadTerm::iri("http://example.org/p"),
768            QuadTerm::iri("http://example.org/o"),
769            QuadTerm::blank("g"),
770        )];
771        let result = canonicalize(&quads);
772        assert!(
773            result.contains("_:c14n0"),
774            "blank graph name must be canonicalized; got: {result}"
775        );
776    }
777
778    // ── Test 15: Permutation issuer ───────────────────────────────────────
779
780    #[test]
781    fn test_identifier_issuer_sequential() {
782        let mut issuer = IdentifierIssuer::new("_:c14n");
783        let id0 = issuer.issue("b0");
784        let id1 = issuer.issue("b1");
785        let id0_again = issuer.issue("b0");
786        assert_eq!(id0, "_:c14n0");
787        assert_eq!(id1, "_:c14n1");
788        assert_eq!(id0_again, "_:c14n0", "same node must get same canonical id");
789    }
790
791    // ── Test 16: Two separate canonicalize calls are independent ──────────
792
793    #[test]
794    fn test_two_calls_are_independent() {
795        let quads_a = vec![blank_iri_quad(
796            "x",
797            "http://example.org/p",
798            "http://example.org/A",
799        )];
800        let quads_b = vec![blank_iri_quad(
801            "x",
802            "http://example.org/p",
803            "http://example.org/A",
804        )];
805        assert_eq!(
806            canonicalize(&quads_a),
807            canonicalize(&quads_b),
808            "identical inputs must produce identical outputs"
809        );
810    }
811}