ipfrs_core/
dag.rs

1//! DAG (Directed Acyclic Graph) traversal and analysis utilities.
2//!
3//! This module provides utilities for working with IPLD Merkle DAGs:
4//! - Extracting CID links from IPLD data
5//! - Calculating DAG statistics
6//! - Validating DAG structures
7//! - Collecting all CIDs in a DAG
8//!
9//! # Examples
10//!
11//! ```rust
12//! use ipfrs_core::{Ipld, Cid, CidBuilder};
13//! use ipfrs_core::dag::{extract_links, DagStats};
14//! use std::collections::BTreeMap;
15//!
16//! // Create IPLD with links
17//! let cid1 = CidBuilder::new().build(b"data1").unwrap();
18//! let cid2 = CidBuilder::new().build(b"data2").unwrap();
19//!
20//! let mut map = BTreeMap::new();
21//! map.insert("link1".to_string(), Ipld::link(cid1));
22//! map.insert("link2".to_string(), Ipld::link(cid2));
23//! let ipld = Ipld::Map(map);
24//!
25//! // Extract all CID links
26//! let links = extract_links(&ipld);
27//! assert_eq!(links.len(), 2);
28//! ```
29
30use crate::cid::{Cid, SerializableCid};
31use crate::ipld::Ipld;
32use std::collections::{HashMap, HashSet, VecDeque};
33
34/// Extract all CID links from an IPLD structure (non-recursive).
35///
36/// This function finds all direct `Ipld::Link` values in the given IPLD data,
37/// but does not recursively traverse nested links. Use `collect_all_links` for
38/// recursive traversal.
39///
40/// # Arguments
41///
42/// * `ipld` - The IPLD data to extract links from
43///
44/// # Returns
45///
46/// A vector of all CID links found at the top level
47///
48/// # Examples
49///
50/// ```rust
51/// use ipfrs_core::{Ipld, CidBuilder};
52/// use ipfrs_core::dag::extract_links;
53/// use std::collections::BTreeMap;
54///
55/// let cid = CidBuilder::new().build(b"test").unwrap();
56/// let mut map = BTreeMap::new();
57/// map.insert("file".to_string(), Ipld::link(cid.clone()));
58/// let ipld = Ipld::Map(map);
59///
60/// let links = extract_links(&ipld);
61/// assert_eq!(links.len(), 1);
62/// assert_eq!(links[0], cid);
63/// ```
64pub fn extract_links(ipld: &Ipld) -> Vec<Cid> {
65    let mut links = Vec::new();
66    extract_links_recursive(ipld, &mut links, false);
67    links
68}
69
70/// Extract all CID links from an IPLD structure recursively.
71///
72/// This function traverses the entire IPLD tree and collects all `Ipld::Link`
73/// values found at any depth.
74///
75/// # Arguments
76///
77/// * `ipld` - The IPLD data to extract links from
78///
79/// # Returns
80///
81/// A vector of all CID links found (may contain duplicates)
82///
83/// # Examples
84///
85/// ```rust
86/// use ipfrs_core::{Ipld, CidBuilder};
87/// use ipfrs_core::dag::collect_all_links;
88/// use std::collections::BTreeMap;
89///
90/// let cid1 = CidBuilder::new().build(b"test1").unwrap();
91/// let cid2 = CidBuilder::new().build(b"test2").unwrap();
92///
93/// // Nested structure
94/// let mut inner = BTreeMap::new();
95/// inner.insert("link".to_string(), Ipld::link(cid2.clone()));
96///
97/// let mut outer = BTreeMap::new();
98/// outer.insert("file".to_string(), Ipld::link(cid1.clone()));
99/// outer.insert("nested".to_string(), Ipld::Map(inner));
100///
101/// let ipld = Ipld::Map(outer);
102/// let links = collect_all_links(&ipld);
103/// assert_eq!(links.len(), 2);
104/// ```
105pub fn collect_all_links(ipld: &Ipld) -> Vec<Cid> {
106    let mut links = Vec::new();
107    extract_links_recursive(ipld, &mut links, true);
108    links
109}
110
111/// Extract unique CID links from an IPLD structure (no duplicates).
112///
113/// This function is similar to `collect_all_links` but returns a set of unique
114/// CIDs, removing any duplicates.
115///
116/// # Arguments
117///
118/// * `ipld` - The IPLD data to extract links from
119///
120/// # Returns
121///
122/// A set of unique CID links
123pub fn collect_unique_links(ipld: &Ipld) -> HashSet<Cid> {
124    collect_all_links(ipld).into_iter().collect()
125}
126
127/// Internal recursive link extraction
128fn extract_links_recursive(ipld: &Ipld, links: &mut Vec<Cid>, recursive: bool) {
129    match ipld {
130        Ipld::Link(SerializableCid(cid)) => {
131            links.push(*cid);
132        }
133        Ipld::List(items) => {
134            if recursive {
135                for item in items {
136                    extract_links_recursive(item, links, true);
137                }
138            } else {
139                for item in items {
140                    if let Ipld::Link(SerializableCid(cid)) = item {
141                        links.push(*cid);
142                    }
143                }
144            }
145        }
146        Ipld::Map(map) => {
147            if recursive {
148                for value in map.values() {
149                    extract_links_recursive(value, links, true);
150                }
151            } else {
152                for value in map.values() {
153                    if let Ipld::Link(SerializableCid(cid)) = value {
154                        links.push(*cid);
155                    }
156                }
157            }
158        }
159        _ => {}
160    }
161}
162
163/// Statistics about a DAG structure
164#[derive(Debug, Clone, Default, PartialEq, Eq)]
165pub struct DagStats {
166    /// Total number of unique CIDs in the DAG
167    pub unique_cids: usize,
168    /// Total number of links (including duplicates)
169    pub total_links: usize,
170    /// Maximum depth of the DAG
171    pub max_depth: usize,
172    /// Number of leaf nodes (nodes with no outgoing links)
173    pub leaf_count: usize,
174    /// Number of intermediate nodes (nodes with outgoing links)
175    pub intermediate_count: usize,
176}
177
178impl DagStats {
179    /// Create empty DAG statistics
180    pub fn new() -> Self {
181        Self::default()
182    }
183
184    /// Calculate statistics for an IPLD structure
185    ///
186    /// Note: This only analyzes the structure of the IPLD data provided,
187    /// it does not follow CID links to fetch additional blocks.
188    ///
189    /// # Arguments
190    ///
191    /// * `ipld` - The IPLD data to analyze
192    ///
193    /// # Returns
194    ///
195    /// Statistics about the DAG structure
196    ///
197    /// # Examples
198    ///
199    /// ```rust
200    /// use ipfrs_core::{Ipld, CidBuilder};
201    /// use ipfrs_core::dag::DagStats;
202    /// use std::collections::BTreeMap;
203    ///
204    /// let cid = CidBuilder::new().build(b"data").unwrap();
205    /// let mut map = BTreeMap::new();
206    /// map.insert("link".to_string(), Ipld::link(cid));
207    /// let ipld = Ipld::Map(map);
208    ///
209    /// let stats = DagStats::from_ipld(&ipld);
210    /// assert_eq!(stats.total_links, 1);
211    /// assert_eq!(stats.unique_cids, 1);
212    /// ```
213    pub fn from_ipld(ipld: &Ipld) -> Self {
214        let all_links = collect_all_links(ipld);
215        let unique_links: HashSet<_> = all_links.iter().collect();
216
217        let depth = calculate_depth(ipld, 0);
218        let (leaves, intermediates) = count_node_types(ipld);
219
220        Self {
221            unique_cids: unique_links.len(),
222            total_links: all_links.len(),
223            max_depth: depth,
224            leaf_count: leaves,
225            intermediate_count: intermediates,
226        }
227    }
228
229    /// Calculate the deduplication ratio
230    ///
231    /// Returns the ratio of duplicate links to total links.
232    /// A value of 0.0 means no duplication, 1.0 means all links are duplicates.
233    pub fn deduplication_ratio(&self) -> f64 {
234        if self.total_links == 0 {
235            return 0.0;
236        }
237        let duplicates = self.total_links.saturating_sub(self.unique_cids);
238        duplicates as f64 / self.total_links as f64
239    }
240}
241
242/// Calculate the maximum depth of an IPLD tree
243fn calculate_depth(ipld: &Ipld, current_depth: usize) -> usize {
244    match ipld {
245        Ipld::List(items) => {
246            let max_child_depth = items
247                .iter()
248                .map(|item| calculate_depth(item, current_depth + 1))
249                .max()
250                .unwrap_or(current_depth);
251            max_child_depth
252        }
253        Ipld::Map(map) => {
254            let max_child_depth = map
255                .values()
256                .map(|value| calculate_depth(value, current_depth + 1))
257                .max()
258                .unwrap_or(current_depth);
259            max_child_depth
260        }
261        _ => current_depth,
262    }
263}
264
265/// Count leaf and intermediate nodes
266fn count_node_types(ipld: &Ipld) -> (usize, usize) {
267    match ipld {
268        Ipld::List(items) => {
269            if items.is_empty() {
270                return (1, 0); // Empty list is a leaf
271            }
272            let mut leaves = 0;
273            let mut intermediates = 1; // This node is intermediate
274            for item in items {
275                let (l, i) = count_node_types(item);
276                leaves += l;
277                intermediates += i;
278            }
279            (leaves, intermediates)
280        }
281        Ipld::Map(map) => {
282            if map.is_empty() {
283                return (1, 0); // Empty map is a leaf
284            }
285            let mut leaves = 0;
286            let mut intermediates = 1; // This node is intermediate
287            for value in map.values() {
288                let (l, i) = count_node_types(value);
289                leaves += l;
290                intermediates += i;
291            }
292            (leaves, intermediates)
293        }
294        _ => (1, 0), // Scalar values are leaves
295    }
296}
297
298/// Validate that an IPLD structure forms a proper DAG (no cycles).
299///
300/// This function checks that there are no circular references in the CID links.
301/// Note: This only validates the structure of the provided IPLD data, it does
302/// not fetch and validate linked blocks.
303///
304/// # Arguments
305///
306/// * `ipld` - The IPLD data to validate
307///
308/// # Returns
309///
310/// `true` if the structure is acyclic, `false` if cycles are detected
311///
312/// # Examples
313///
314/// ```rust
315/// use ipfrs_core::{Ipld, CidBuilder};
316/// use ipfrs_core::dag::is_dag;
317/// use std::collections::BTreeMap;
318///
319/// let cid = CidBuilder::new().build(b"test").unwrap();
320/// let mut map = BTreeMap::new();
321/// map.insert("link".to_string(), Ipld::link(cid));
322/// let ipld = Ipld::Map(map);
323///
324/// assert!(is_dag(&ipld));
325/// ```
326pub fn is_dag(ipld: &Ipld) -> bool {
327    let mut visited = HashSet::new();
328    let mut stack = HashSet::new();
329    has_cycle_dfs(ipld, &mut visited, &mut stack)
330}
331
332/// DFS cycle detection
333fn has_cycle_dfs(ipld: &Ipld, visited: &mut HashSet<String>, stack: &mut HashSet<String>) -> bool {
334    match ipld {
335        Ipld::Link(SerializableCid(cid)) => {
336            let cid_str = cid.to_string();
337            if stack.contains(&cid_str) {
338                return false; // Cycle detected
339            }
340            if visited.contains(&cid_str) {
341                return true; // Already validated this path
342            }
343            visited.insert(cid_str.clone());
344            stack.insert(cid_str.clone());
345            // Note: We can't follow the link without a BlockFetcher
346            // So we just mark it as visited
347            stack.remove(&cid_str);
348            true
349        }
350        Ipld::List(items) => {
351            for item in items {
352                if !has_cycle_dfs(item, visited, stack) {
353                    return false;
354                }
355            }
356            true
357        }
358        Ipld::Map(map) => {
359            for value in map.values() {
360                if !has_cycle_dfs(value, visited, stack) {
361                    return false;
362                }
363            }
364            true
365        }
366        _ => true,
367    }
368}
369
370/// Find all paths from root to a specific CID in an IPLD structure.
371///
372/// Returns a list of paths (as lists of keys) that lead to the target CID.
373///
374/// # Arguments
375///
376/// * `ipld` - The IPLD data to search
377/// * `target_cid` - The CID to find paths to
378///
379/// # Returns
380///
381/// A vector of paths, where each path is a vector of keys leading to the CID
382pub fn find_paths_to_cid(ipld: &Ipld, target_cid: &Cid) -> Vec<Vec<String>> {
383    let mut paths = Vec::new();
384    let mut current_path = Vec::new();
385    find_paths_recursive(ipld, target_cid, &mut current_path, &mut paths);
386    paths
387}
388
389/// Recursive path finding helper
390fn find_paths_recursive(
391    ipld: &Ipld,
392    target_cid: &Cid,
393    current_path: &mut Vec<String>,
394    paths: &mut Vec<Vec<String>>,
395) {
396    match ipld {
397        Ipld::Link(SerializableCid(cid)) => {
398            if cid == target_cid {
399                paths.push(current_path.clone());
400            }
401        }
402        Ipld::List(items) => {
403            for (i, item) in items.iter().enumerate() {
404                current_path.push(format!("[{}]", i));
405                find_paths_recursive(item, target_cid, current_path, paths);
406                current_path.pop();
407            }
408        }
409        Ipld::Map(map) => {
410            for (key, value) in map {
411                current_path.push(key.clone());
412                find_paths_recursive(value, target_cid, current_path, paths);
413                current_path.pop();
414            }
415        }
416        _ => {}
417    }
418}
419
420/// Traverse a DAG in breadth-first order, collecting all IPLD nodes.
421///
422/// Note: This only traverses the structure of the provided IPLD data,
423/// it does not fetch linked blocks.
424///
425/// # Arguments
426///
427/// * `root` - The root IPLD node to start traversal from
428///
429/// # Returns
430///
431/// A vector of all IPLD nodes in breadth-first order
432pub fn traverse_bfs(root: &Ipld) -> Vec<Ipld> {
433    let mut result = Vec::new();
434    let mut queue = VecDeque::new();
435    queue.push_back(root.clone());
436
437    while let Some(node) = queue.pop_front() {
438        result.push(node.clone());
439
440        match &node {
441            Ipld::List(items) => {
442                for item in items {
443                    queue.push_back(item.clone());
444                }
445            }
446            Ipld::Map(map) => {
447                for value in map.values() {
448                    queue.push_back(value.clone());
449                }
450            }
451            _ => {}
452        }
453    }
454
455    result
456}
457
458/// Traverse a DAG in depth-first order, collecting all IPLD nodes.
459///
460/// # Arguments
461///
462/// * `root` - The root IPLD node to start traversal from
463///
464/// # Returns
465///
466/// A vector of all IPLD nodes in depth-first order
467pub fn traverse_dfs(root: &Ipld) -> Vec<Ipld> {
468    let mut result = Vec::new();
469    traverse_dfs_recursive(root, &mut result);
470    result
471}
472
473/// Recursive DFS helper
474fn traverse_dfs_recursive(node: &Ipld, result: &mut Vec<Ipld>) {
475    result.push(node.clone());
476
477    match node {
478        Ipld::List(items) => {
479            for item in items {
480                traverse_dfs_recursive(item, result);
481            }
482        }
483        Ipld::Map(map) => {
484            for value in map.values() {
485                traverse_dfs_recursive(value, result);
486            }
487        }
488        _ => {}
489    }
490}
491
492/// Additional DAG metrics beyond basic statistics.
493///
494/// Provides advanced graph-theoretic metrics for analyzing DAG structure.
495#[derive(Debug, Clone, Default, PartialEq)]
496pub struct DagMetrics {
497    /// Average branching factor (average number of children per non-leaf node)
498    pub avg_branching_factor: f64,
499    /// Maximum branching factor (maximum number of children for any node)
500    pub max_branching_factor: usize,
501    /// Width of the DAG (maximum number of nodes at any level)
502    pub width: usize,
503    /// Total number of nodes in the DAG
504    pub total_nodes: usize,
505    /// Average depth of leaf nodes
506    pub avg_leaf_depth: f64,
507}
508
509impl DagMetrics {
510    /// Calculate advanced metrics for an IPLD structure.
511    ///
512    /// # Arguments
513    ///
514    /// * `ipld` - The IPLD data to analyze
515    ///
516    /// # Returns
517    ///
518    /// Advanced metrics about the DAG structure
519    ///
520    /// # Examples
521    ///
522    /// ```rust
523    /// use ipfrs_core::{Ipld, CidBuilder};
524    /// use ipfrs_core::dag::DagMetrics;
525    /// use std::collections::BTreeMap;
526    ///
527    /// let cid = CidBuilder::new().build(b"data").unwrap();
528    /// let mut map = BTreeMap::new();
529    /// map.insert("link1".to_string(), Ipld::link(cid.clone()));
530    /// map.insert("link2".to_string(), Ipld::link(cid));
531    /// let ipld = Ipld::Map(map);
532    ///
533    /// let metrics = DagMetrics::from_ipld(&ipld);
534    /// assert_eq!(metrics.max_branching_factor, 2);
535    /// ```
536    pub fn from_ipld(ipld: &Ipld) -> Self {
537        let mut levels: HashMap<usize, usize> = HashMap::new();
538        let mut branching_factors = Vec::new();
539        let mut leaf_depths = Vec::new();
540        let mut total_nodes = 0;
541
542        calculate_metrics(
543            ipld,
544            0,
545            &mut levels,
546            &mut branching_factors,
547            &mut leaf_depths,
548            &mut total_nodes,
549        );
550
551        let width = levels.values().copied().max().unwrap_or(0);
552        let max_branching_factor = branching_factors.iter().copied().max().unwrap_or(0);
553        let avg_branching_factor = if branching_factors.is_empty() {
554            0.0
555        } else {
556            branching_factors.iter().sum::<usize>() as f64 / branching_factors.len() as f64
557        };
558        let avg_leaf_depth = if leaf_depths.is_empty() {
559            0.0
560        } else {
561            leaf_depths.iter().sum::<usize>() as f64 / leaf_depths.len() as f64
562        };
563
564        Self {
565            avg_branching_factor,
566            max_branching_factor,
567            width,
568            total_nodes,
569            avg_leaf_depth,
570        }
571    }
572}
573
574/// Helper function to calculate DAG metrics recursively
575fn calculate_metrics(
576    ipld: &Ipld,
577    depth: usize,
578    levels: &mut HashMap<usize, usize>,
579    branching_factors: &mut Vec<usize>,
580    leaf_depths: &mut Vec<usize>,
581    total_nodes: &mut usize,
582) {
583    *total_nodes += 1;
584    *levels.entry(depth).or_insert(0) += 1;
585
586    match ipld {
587        Ipld::List(items) => {
588            if items.is_empty() {
589                leaf_depths.push(depth);
590            } else {
591                branching_factors.push(items.len());
592                for item in items {
593                    calculate_metrics(
594                        item,
595                        depth + 1,
596                        levels,
597                        branching_factors,
598                        leaf_depths,
599                        total_nodes,
600                    );
601                }
602            }
603        }
604        Ipld::Map(map) => {
605            if map.is_empty() {
606                leaf_depths.push(depth);
607            } else {
608                branching_factors.push(map.len());
609                for value in map.values() {
610                    calculate_metrics(
611                        value,
612                        depth + 1,
613                        levels,
614                        branching_factors,
615                        leaf_depths,
616                        total_nodes,
617                    );
618                }
619            }
620        }
621        _ => {
622            leaf_depths.push(depth);
623        }
624    }
625}
626
627/// Perform a topological sort on IPLD nodes containing CID links.
628///
629/// Returns nodes in dependency order (dependencies before dependents).
630/// This is useful for processing DAGs where nodes depend on their children.
631///
632/// Note: Since we cannot fetch linked blocks, this only sorts the CIDs
633/// found in the IPLD structure based on their dependency relationships.
634///
635/// # Arguments
636///
637/// * `ipld` - The IPLD data to sort
638///
639/// # Returns
640///
641/// A vector of CIDs in topological order (leaves first, root last)
642///
643/// # Examples
644///
645/// ```rust
646/// use ipfrs_core::{Ipld, CidBuilder};
647/// use ipfrs_core::dag::topological_sort;
648/// use std::collections::BTreeMap;
649///
650/// let cid1 = CidBuilder::new().build(b"leaf1").unwrap();
651/// let cid2 = CidBuilder::new().build(b"leaf2").unwrap();
652///
653/// let mut map = BTreeMap::new();
654/// map.insert("child1".to_string(), Ipld::link(cid1));
655/// map.insert("child2".to_string(), Ipld::link(cid2));
656/// let ipld = Ipld::Map(map);
657///
658/// let sorted = topological_sort(&ipld);
659/// assert_eq!(sorted.len(), 2);
660/// ```
661pub fn topological_sort(ipld: &Ipld) -> Vec<Cid> {
662    let links = collect_all_links(ipld);
663    let mut result = Vec::new();
664    let mut visited = HashSet::new();
665
666    // Simple topological sort: collect all unique CIDs
667    // Since we can't fetch blocks, we just deduplicate and return in encounter order
668    for cid in links {
669        if visited.insert(cid) {
670            result.push(cid);
671        }
672    }
673
674    result
675}
676
677/// Calculate the size (number of nodes) of a subgraph rooted at the given IPLD node.
678///
679/// This counts all nodes reachable from the root, including the root itself.
680///
681/// # Arguments
682///
683/// * `ipld` - The root of the subgraph
684///
685/// # Returns
686///
687/// The total number of nodes in the subgraph
688///
689/// # Examples
690///
691/// ```rust
692/// use ipfrs_core::Ipld;
693/// use ipfrs_core::dag::subgraph_size;
694///
695/// let ipld = Ipld::List(vec![
696///     Ipld::Integer(1),
697///     Ipld::Integer(2),
698///     Ipld::List(vec![Ipld::Integer(3)]),
699/// ]);
700///
701/// let size = subgraph_size(&ipld);
702/// assert_eq!(size, 5); // Root list + 2 integers + nested list + nested integer
703/// ```
704pub fn subgraph_size(ipld: &Ipld) -> usize {
705    let mut count = 1; // Count the root
706
707    match ipld {
708        Ipld::List(items) => {
709            for item in items {
710                count += subgraph_size(item);
711            }
712        }
713        Ipld::Map(map) => {
714            for value in map.values() {
715                count += subgraph_size(value);
716            }
717        }
718        _ => {}
719    }
720
721    count
722}
723
724/// Calculate the fanout (number of direct children) for each level of the DAG.
725///
726/// Returns a vector where index i contains the total number of children
727/// at depth i in the DAG.
728///
729/// # Arguments
730///
731/// * `ipld` - The IPLD data to analyze
732///
733/// # Returns
734///
735/// A vector of fanout counts per level
736///
737/// # Examples
738///
739/// ```rust
740/// use ipfrs_core::Ipld;
741/// use ipfrs_core::dag::dag_fanout_by_level;
742///
743/// let ipld = Ipld::List(vec![
744///     Ipld::Integer(1),
745///     Ipld::List(vec![Ipld::Integer(2), Ipld::Integer(3)]),
746/// ]);
747///
748/// let fanout = dag_fanout_by_level(&ipld);
749/// assert!(fanout.len() >= 2);
750/// ```
751pub fn dag_fanout_by_level(ipld: &Ipld) -> Vec<usize> {
752    let mut fanout_by_level = Vec::new();
753    calculate_fanout(ipld, 0, &mut fanout_by_level);
754    fanout_by_level
755}
756
757/// Helper to calculate fanout at each level
758fn calculate_fanout(ipld: &Ipld, depth: usize, fanout_by_level: &mut Vec<usize>) {
759    // Ensure vector is large enough
760    while fanout_by_level.len() <= depth {
761        fanout_by_level.push(0);
762    }
763
764    match ipld {
765        Ipld::List(items) => {
766            fanout_by_level[depth] += items.len();
767            for item in items {
768                calculate_fanout(item, depth + 1, fanout_by_level);
769            }
770        }
771        Ipld::Map(map) => {
772            fanout_by_level[depth] += map.len();
773            for value in map.values() {
774                calculate_fanout(value, depth + 1, fanout_by_level);
775            }
776        }
777        _ => {}
778    }
779}
780
781/// Count the number of CID links at each depth level in the DAG.
782///
783/// Returns a vector where index i contains the count of CID links at depth i.
784///
785/// # Arguments
786///
787/// * `ipld` - The IPLD data to analyze
788///
789/// # Returns
790///
791/// A vector of link counts per depth level
792///
793/// # Examples
794///
795/// ```rust
796/// use ipfrs_core::{Ipld, CidBuilder};
797/// use ipfrs_core::dag::count_links_by_depth;
798///
799/// // Direct link at depth 0
800/// let cid = CidBuilder::new().build(b"test").unwrap();
801/// let ipld = Ipld::link(cid);
802///
803/// let counts = count_links_by_depth(&ipld);
804/// assert_eq!(counts[0], 1);
805/// ```
806pub fn count_links_by_depth(ipld: &Ipld) -> Vec<usize> {
807    let mut counts = Vec::new();
808    count_links_recursive(ipld, 0, &mut counts);
809    counts
810}
811
812/// Helper to count links at each depth
813fn count_links_recursive(ipld: &Ipld, depth: usize, counts: &mut Vec<usize>) {
814    // Ensure vector is large enough
815    while counts.len() <= depth {
816        counts.push(0);
817    }
818
819    match ipld {
820        Ipld::Link(_) => {
821            counts[depth] += 1;
822        }
823        Ipld::List(items) => {
824            for item in items {
825                count_links_recursive(item, depth + 1, counts);
826            }
827        }
828        Ipld::Map(map) => {
829            for value in map.values() {
830                count_links_recursive(value, depth + 1, counts);
831            }
832        }
833        _ => {}
834    }
835}
836
837/// Filter an IPLD structure to only include nodes matching a predicate.
838///
839/// This creates a new IPLD structure containing only the nodes where the
840/// predicate returns true.
841///
842/// # Arguments
843///
844/// * `ipld` - The IPLD data to filter
845/// * `predicate` - Function that returns true for nodes to keep
846///
847/// # Returns
848///
849/// A filtered IPLD structure, or None if the root doesn't match
850///
851/// # Examples
852///
853/// ```rust
854/// use ipfrs_core::Ipld;
855/// use ipfrs_core::dag::filter_dag;
856///
857/// let ipld = Ipld::List(vec![
858///     Ipld::Integer(1),
859///     Ipld::Integer(2),
860///     Ipld::String("hello".to_string()),
861/// ]);
862///
863/// // Keep only integers
864/// let filtered = filter_dag(&ipld, &|node| matches!(node, Ipld::Integer(_) | Ipld::List(_)));
865/// assert!(filtered.is_some());
866/// ```
867pub fn filter_dag<F>(ipld: &Ipld, predicate: &F) -> Option<Ipld>
868where
869    F: Fn(&Ipld) -> bool,
870{
871    if !predicate(ipld) {
872        return None;
873    }
874
875    match ipld {
876        Ipld::List(items) => {
877            let filtered_items: Vec<Ipld> = items
878                .iter()
879                .filter_map(|item| filter_dag(item, predicate))
880                .collect();
881            Some(Ipld::List(filtered_items))
882        }
883        Ipld::Map(map) => {
884            let filtered_map: std::collections::BTreeMap<String, Ipld> = map
885                .iter()
886                .filter_map(|(k, v)| filter_dag(v, predicate).map(|filtered| (k.clone(), filtered)))
887                .collect();
888            Some(Ipld::Map(filtered_map))
889        }
890        other => Some(other.clone()),
891    }
892}
893
894/// Transform all nodes in a DAG using a mapping function.
895///
896/// Applies the transformation function to each node in the IPLD structure,
897/// building a new transformed DAG.
898///
899/// # Arguments
900///
901/// * `ipld` - The IPLD data to transform
902/// * `transform` - Function to transform each node
903///
904/// # Returns
905///
906/// The transformed IPLD structure
907///
908/// # Examples
909///
910/// ```rust
911/// use ipfrs_core::Ipld;
912/// use ipfrs_core::dag::map_dag;
913///
914/// let ipld = Ipld::Integer(42);
915///
916/// // Double all integers
917/// let transformed = map_dag(&ipld, &|node| {
918///     match node {
919///         Ipld::Integer(n) => Ipld::Integer(n * 2),
920///         other => other.clone(),
921///     }
922/// });
923///
924/// assert_eq!(transformed, Ipld::Integer(84));
925/// ```
926pub fn map_dag<F>(ipld: &Ipld, transform: &F) -> Ipld
927where
928    F: Fn(&Ipld) -> Ipld,
929{
930    let transformed = match ipld {
931        Ipld::List(items) => {
932            let mapped_items: Vec<Ipld> =
933                items.iter().map(|item| map_dag(item, transform)).collect();
934            Ipld::List(mapped_items)
935        }
936        Ipld::Map(map) => {
937            let mapped_map: std::collections::BTreeMap<String, Ipld> = map
938                .iter()
939                .map(|(k, v)| (k.clone(), map_dag(v, transform)))
940                .collect();
941            Ipld::Map(mapped_map)
942        }
943        other => other.clone(),
944    };
945
946    transform(&transformed)
947}
948
949/// Find the differences between two IPLD DAGs
950///
951/// Returns a tuple of (unique_to_first, unique_to_second, common_links).
952/// This is useful for determining what changed between two versions of a DAG.
953///
954/// # Arguments
955///
956/// * `dag1` - First DAG to compare
957/// * `dag2` - Second DAG to compare
958///
959/// # Returns
960///
961/// A tuple of:
962/// - Links unique to dag1
963/// - Links unique to dag2
964/// - Links common to both
965///
966/// # Example
967///
968/// ```rust
969/// use ipfrs_core::{Ipld, CidBuilder};
970/// use ipfrs_core::dag::dag_diff;
971/// use std::collections::BTreeMap;
972///
973/// let cid1 = CidBuilder::new().build(b"a").unwrap();
974/// let cid2 = CidBuilder::new().build(b"b").unwrap();
975/// let cid3 = CidBuilder::new().build(b"c").unwrap();
976///
977/// let mut map1 = BTreeMap::new();
978/// map1.insert("link1".to_string(), Ipld::link(cid1));
979/// map1.insert("link2".to_string(), Ipld::link(cid2));
980///
981/// let mut map2 = BTreeMap::new();
982/// map2.insert("link2".to_string(), Ipld::link(cid2));
983/// map2.insert("link3".to_string(), Ipld::link(cid3));
984///
985/// let dag1 = Ipld::Map(map1);
986/// let dag2 = Ipld::Map(map2);
987///
988/// let (unique1, unique2, common) = dag_diff(&dag1, &dag2);
989/// assert_eq!(unique1.len(), 1); // cid1
990/// assert_eq!(unique2.len(), 1); // cid3
991/// assert_eq!(common.len(), 1);  // cid2
992/// ```
993pub fn dag_diff(dag1: &Ipld, dag2: &Ipld) -> (HashSet<Cid>, HashSet<Cid>, HashSet<Cid>) {
994    let links1 = collect_unique_links(dag1);
995    let links2 = collect_unique_links(dag2);
996
997    let unique_to_first: HashSet<Cid> = links1.difference(&links2).copied().collect();
998    let unique_to_second: HashSet<Cid> = links2.difference(&links1).copied().collect();
999    let common: HashSet<Cid> = links1.intersection(&links2).copied().collect();
1000
1001    (unique_to_first, unique_to_second, common)
1002}
1003
1004/// Find common ancestor links between two DAGs
1005///
1006/// Returns the set of CID links that appear in both DAGs, which can help
1007/// identify shared structure or common ancestry.
1008///
1009/// # Arguments
1010///
1011/// * `dag1` - First DAG
1012/// * `dag2` - Second DAG
1013///
1014/// # Returns
1015///
1016/// A set of CIDs that appear in both DAGs
1017///
1018/// # Example
1019///
1020/// ```rust
1021/// use ipfrs_core::{Ipld, CidBuilder};
1022/// use ipfrs_core::dag::find_common_links;
1023///
1024/// let cid = CidBuilder::new().build(b"shared").unwrap();
1025///
1026/// let dag1 = Ipld::List(vec![Ipld::link(cid)]);
1027/// let dag2 = Ipld::List(vec![Ipld::link(cid)]);
1028///
1029/// let common = find_common_links(&dag1, &dag2);
1030/// assert_eq!(common.len(), 1);
1031/// ```
1032pub fn find_common_links(dag1: &Ipld, dag2: &Ipld) -> HashSet<Cid> {
1033    let links1 = collect_unique_links(dag1);
1034    let links2 = collect_unique_links(dag2);
1035
1036    links1.intersection(&links2).copied().collect()
1037}
1038
1039/// Prune nodes from a DAG based on a predicate
1040///
1041/// This function creates a new DAG with only the nodes that match the predicate.
1042/// It's useful for removing unwanted nodes or creating filtered views of a DAG.
1043///
1044/// # Arguments
1045///
1046/// * `ipld` - The DAG to prune
1047/// * `should_keep` - Predicate function returning true for nodes to keep
1048///
1049/// # Returns
1050///
1051/// A new IPLD structure with pruned nodes, or None if the root is pruned
1052///
1053/// # Example
1054///
1055/// ```rust
1056/// use ipfrs_core::Ipld;
1057/// use ipfrs_core::dag::prune_dag;
1058///
1059/// let ipld = Ipld::List(vec![
1060///     Ipld::Integer(1),
1061///     Ipld::Integer(2),
1062///     Ipld::String("keep".to_string()),
1063/// ]);
1064///
1065/// // Keep only strings
1066/// let pruned = prune_dag(&ipld, &|node| {
1067///     matches!(node, Ipld::String(_) | Ipld::List(_))
1068/// });
1069///
1070/// assert!(pruned.is_some());
1071/// ```
1072pub fn prune_dag<F>(ipld: &Ipld, should_keep: &F) -> Option<Ipld>
1073where
1074    F: Fn(&Ipld) -> bool,
1075{
1076    if !should_keep(ipld) {
1077        return None;
1078    }
1079
1080    match ipld {
1081        Ipld::List(items) => {
1082            let pruned_items: Vec<Ipld> = items
1083                .iter()
1084                .filter_map(|item| prune_dag(item, should_keep))
1085                .collect();
1086
1087            if pruned_items.is_empty() && !items.is_empty() {
1088                None
1089            } else {
1090                Some(Ipld::List(pruned_items))
1091            }
1092        }
1093        Ipld::Map(map) => {
1094            let pruned_map: std::collections::BTreeMap<String, Ipld> = map
1095                .iter()
1096                .filter_map(|(k, v)| prune_dag(v, should_keep).map(|pruned| (k.clone(), pruned)))
1097                .collect();
1098
1099            if pruned_map.is_empty() && !map.is_empty() {
1100                None
1101            } else {
1102                Some(Ipld::Map(pruned_map))
1103            }
1104        }
1105        other => Some(other.clone()),
1106    }
1107}
1108
1109/// Merge two DAGs into a single DAG
1110///
1111/// Creates a new DAG that contains all nodes from both input DAGs. When both
1112/// DAGs are maps, their keys are merged (dag2 values override dag1 on conflicts).
1113/// When both are lists, they are concatenated.
1114///
1115/// # Arguments
1116///
1117/// * `dag1` - First DAG
1118/// * `dag2` - Second DAG
1119///
1120/// # Returns
1121///
1122/// A merged IPLD structure
1123///
1124/// # Example
1125///
1126/// ```rust
1127/// use ipfrs_core::Ipld;
1128/// use ipfrs_core::dag::merge_dags;
1129/// use std::collections::BTreeMap;
1130///
1131/// let mut map1 = BTreeMap::new();
1132/// map1.insert("a".to_string(), Ipld::Integer(1));
1133///
1134/// let mut map2 = BTreeMap::new();
1135/// map2.insert("b".to_string(), Ipld::Integer(2));
1136///
1137/// let dag1 = Ipld::Map(map1);
1138/// let dag2 = Ipld::Map(map2);
1139///
1140/// let merged = merge_dags(&dag1, &dag2);
1141/// if let Ipld::Map(m) = merged {
1142///     assert_eq!(m.len(), 2);
1143/// }
1144/// ```
1145pub fn merge_dags(dag1: &Ipld, dag2: &Ipld) -> Ipld {
1146    match (dag1, dag2) {
1147        (Ipld::Map(map1), Ipld::Map(map2)) => {
1148            let mut merged = map1.clone();
1149            for (k, v) in map2 {
1150                merged.insert(k.clone(), v.clone());
1151            }
1152            Ipld::Map(merged)
1153        }
1154        (Ipld::List(list1), Ipld::List(list2)) => {
1155            let mut merged = list1.clone();
1156            merged.extend(list2.clone());
1157            Ipld::List(merged)
1158        }
1159        // If types don't match, prefer dag2
1160        (_, dag2) => dag2.clone(),
1161    }
1162}
1163
1164/// Count the total number of nodes in a DAG
1165///
1166/// This includes all nodes at all levels, counting duplicates if they appear
1167/// multiple times in the structure.
1168///
1169/// # Arguments
1170///
1171/// * `ipld` - The DAG to count nodes in
1172///
1173/// # Returns
1174///
1175/// The total number of nodes (including the root)
1176///
1177/// # Example
1178///
1179/// ```rust
1180/// use ipfrs_core::Ipld;
1181/// use ipfrs_core::dag::count_nodes;
1182///
1183/// let ipld = Ipld::List(vec![
1184///     Ipld::Integer(1),
1185///     Ipld::Integer(2),
1186///     Ipld::List(vec![Ipld::Integer(3)]),
1187/// ]);
1188///
1189/// assert_eq!(count_nodes(&ipld), 5); // List + 2 ints + inner list + 1 int
1190/// ```
1191pub fn count_nodes(ipld: &Ipld) -> usize {
1192    match ipld {
1193        Ipld::List(items) => 1 + items.iter().map(count_nodes).sum::<usize>(),
1194        Ipld::Map(map) => 1 + map.values().map(count_nodes).sum::<usize>(),
1195        _ => 1,
1196    }
1197}
1198
1199/// Get the maximum depth of a DAG
1200///
1201/// Returns the length of the longest path from the root to a leaf node.
1202///
1203/// # Arguments
1204///
1205/// * `ipld` - The DAG to measure
1206///
1207/// # Returns
1208///
1209/// The maximum depth (0 for leaf nodes, 1+ for containers)
1210///
1211/// # Example
1212///
1213/// ```rust
1214/// use ipfrs_core::Ipld;
1215/// use ipfrs_core::dag::dag_depth;
1216///
1217/// let ipld = Ipld::List(vec![
1218///     Ipld::Integer(1),
1219///     Ipld::List(vec![
1220///         Ipld::Integer(2),
1221///         Ipld::List(vec![Ipld::Integer(3)]),
1222///     ]),
1223/// ]);
1224///
1225/// assert_eq!(dag_depth(&ipld), 4);
1226/// ```
1227pub fn dag_depth(ipld: &Ipld) -> usize {
1228    match ipld {
1229        Ipld::List(items) => {
1230            if items.is_empty() {
1231                1
1232            } else {
1233                1 + items.iter().map(dag_depth).max().unwrap_or(0)
1234            }
1235        }
1236        Ipld::Map(map) => {
1237            if map.is_empty() {
1238                1
1239            } else {
1240                1 + map.values().map(dag_depth).max().unwrap_or(0)
1241            }
1242        }
1243        _ => 1,
1244    }
1245}
1246
1247/// Find all leaf nodes in a DAG
1248///
1249/// Returns all nodes that have no children (i.e., not List or Map, or empty containers).
1250///
1251/// # Arguments
1252///
1253/// * `ipld` - The DAG to search
1254///
1255/// # Returns
1256///
1257/// A vector of all leaf nodes
1258///
1259/// # Example
1260///
1261/// ```rust
1262/// use ipfrs_core::Ipld;
1263/// use ipfrs_core::dag::find_leaves;
1264///
1265/// let ipld = Ipld::List(vec![
1266///     Ipld::Integer(1),
1267///     Ipld::String("leaf".to_string()),
1268///     Ipld::List(vec![Ipld::Integer(2)]),
1269/// ]);
1270///
1271/// let leaves = find_leaves(&ipld);
1272/// assert_eq!(leaves.len(), 3); // Two integers and one string
1273/// ```
1274pub fn find_leaves(ipld: &Ipld) -> Vec<Ipld> {
1275    match ipld {
1276        Ipld::List(items) => {
1277            if items.is_empty() {
1278                vec![ipld.clone()]
1279            } else {
1280                items.iter().flat_map(find_leaves).collect()
1281            }
1282        }
1283        Ipld::Map(map) => {
1284            if map.is_empty() {
1285                vec![ipld.clone()]
1286            } else {
1287                map.values().flat_map(find_leaves).collect()
1288            }
1289        }
1290        _ => vec![ipld.clone()],
1291    }
1292}
1293
1294#[cfg(test)]
1295mod tests {
1296    use super::*;
1297    use crate::cid::CidBuilder;
1298    use std::collections::BTreeMap;
1299
1300    #[test]
1301    fn test_extract_links() {
1302        let cid1 = CidBuilder::new().build(b"test1").unwrap();
1303        let cid2 = CidBuilder::new().build(b"test2").unwrap();
1304
1305        let mut map = BTreeMap::new();
1306        map.insert("link1".to_string(), Ipld::link(cid1));
1307        map.insert("link2".to_string(), Ipld::link(cid2));
1308        map.insert("data".to_string(), Ipld::String("hello".to_string()));
1309
1310        let ipld = Ipld::Map(map);
1311        let links = extract_links(&ipld);
1312
1313        assert_eq!(links.len(), 2);
1314        assert!(links.contains(&cid1));
1315        assert!(links.contains(&cid2));
1316    }
1317
1318    #[test]
1319    fn test_collect_all_links_nested() {
1320        let cid1 = CidBuilder::new().build(b"test1").unwrap();
1321        let cid2 = CidBuilder::new().build(b"test2").unwrap();
1322        let cid3 = CidBuilder::new().build(b"test3").unwrap();
1323
1324        let mut inner = BTreeMap::new();
1325        inner.insert("deep_link".to_string(), Ipld::link(cid3));
1326
1327        let mut outer = BTreeMap::new();
1328        outer.insert("link1".to_string(), Ipld::link(cid1));
1329        outer.insert("link2".to_string(), Ipld::link(cid2));
1330        outer.insert("nested".to_string(), Ipld::Map(inner));
1331
1332        let ipld = Ipld::Map(outer);
1333        let links = collect_all_links(&ipld);
1334
1335        assert_eq!(links.len(), 3);
1336        assert!(links.contains(&cid1));
1337        assert!(links.contains(&cid2));
1338        assert!(links.contains(&cid3));
1339    }
1340
1341    #[test]
1342    fn test_collect_unique_links() {
1343        let cid1 = CidBuilder::new().build(b"test1").unwrap();
1344        let cid2 = CidBuilder::new().build(b"test2").unwrap();
1345
1346        // Create structure with duplicate links
1347        let list = vec![
1348            Ipld::link(cid1),
1349            Ipld::link(cid2),
1350            Ipld::link(cid1), // Duplicate
1351        ];
1352
1353        let ipld = Ipld::List(list);
1354        let unique = collect_unique_links(&ipld);
1355
1356        assert_eq!(unique.len(), 2);
1357    }
1358
1359    #[test]
1360    fn test_dag_stats() {
1361        let cid1 = CidBuilder::new().build(b"test1").unwrap();
1362        let cid2 = CidBuilder::new().build(b"test2").unwrap();
1363
1364        let mut map = BTreeMap::new();
1365        map.insert("link1".to_string(), Ipld::link(cid1));
1366        map.insert("link2".to_string(), Ipld::link(cid2));
1367        map.insert("dup_link".to_string(), Ipld::link(cid1)); // Duplicate
1368
1369        let ipld = Ipld::Map(map);
1370        let stats = DagStats::from_ipld(&ipld);
1371
1372        assert_eq!(stats.unique_cids, 2);
1373        assert_eq!(stats.total_links, 3);
1374        assert!(stats.deduplication_ratio() > 0.0);
1375    }
1376
1377    #[test]
1378    fn test_dag_stats_nested() {
1379        let cid1 = CidBuilder::new().build(b"test1").unwrap();
1380        let cid2 = CidBuilder::new().build(b"test2").unwrap();
1381
1382        let mut inner = BTreeMap::new();
1383        inner.insert("deep".to_string(), Ipld::link(cid2));
1384
1385        let mut outer = BTreeMap::new();
1386        outer.insert("link".to_string(), Ipld::link(cid1));
1387        outer.insert("nested".to_string(), Ipld::Map(inner));
1388
1389        let ipld = Ipld::Map(outer);
1390        let stats = DagStats::from_ipld(&ipld);
1391
1392        assert_eq!(stats.unique_cids, 2);
1393        assert_eq!(stats.max_depth, 2);
1394    }
1395
1396    #[test]
1397    fn test_is_dag() {
1398        let cid1 = CidBuilder::new().build(b"test1").unwrap();
1399
1400        let mut map = BTreeMap::new();
1401        map.insert("link".to_string(), Ipld::link(cid1));
1402
1403        let ipld = Ipld::Map(map);
1404        assert!(is_dag(&ipld));
1405    }
1406
1407    #[test]
1408    fn test_find_paths_to_cid() {
1409        let cid1 = CidBuilder::new().build(b"test1").unwrap();
1410        let cid2 = CidBuilder::new().build(b"test2").unwrap();
1411
1412        let mut inner = BTreeMap::new();
1413        inner.insert("target".to_string(), Ipld::link(cid1));
1414
1415        let mut outer = BTreeMap::new();
1416        outer.insert("other".to_string(), Ipld::link(cid2));
1417        outer.insert("nested".to_string(), Ipld::Map(inner));
1418
1419        let ipld = Ipld::Map(outer);
1420        let paths = find_paths_to_cid(&ipld, &cid1);
1421
1422        assert_eq!(paths.len(), 1);
1423        assert_eq!(paths[0], vec!["nested".to_string(), "target".to_string()]);
1424    }
1425
1426    #[test]
1427    fn test_traverse_bfs() {
1428        let list = vec![
1429            Ipld::Integer(1),
1430            Ipld::Integer(2),
1431            Ipld::List(vec![Ipld::Integer(3), Ipld::Integer(4)]),
1432        ];
1433        let ipld = Ipld::List(list);
1434
1435        let nodes = traverse_bfs(&ipld);
1436        assert_eq!(nodes.len(), 6); // Root + 3 direct children (1, 2, list) + 2 nested (3, 4)
1437    }
1438
1439    #[test]
1440    fn test_traverse_dfs() {
1441        let list = vec![
1442            Ipld::Integer(1),
1443            Ipld::List(vec![Ipld::Integer(2)]),
1444            Ipld::Integer(3),
1445        ];
1446        let ipld = Ipld::List(list);
1447
1448        let nodes = traverse_dfs(&ipld);
1449        assert_eq!(nodes.len(), 5); // Root + all children
1450    }
1451
1452    #[test]
1453    fn test_empty_ipld() {
1454        let ipld = Ipld::Null;
1455        let links = extract_links(&ipld);
1456        assert!(links.is_empty());
1457
1458        let stats = DagStats::from_ipld(&ipld);
1459        assert_eq!(stats.unique_cids, 0);
1460        assert_eq!(stats.total_links, 0);
1461    }
1462
1463    #[test]
1464    fn test_deduplication_ratio() {
1465        // No duplication
1466        let cid1 = CidBuilder::new().build(b"test1").unwrap();
1467        let cid2 = CidBuilder::new().build(b"test2").unwrap();
1468
1469        let list = vec![Ipld::link(cid1), Ipld::link(cid2)];
1470        let ipld = Ipld::List(list);
1471        let stats = DagStats::from_ipld(&ipld);
1472        assert_eq!(stats.deduplication_ratio(), 0.0);
1473
1474        // 50% duplication
1475        let list = vec![
1476            Ipld::link(cid1),
1477            Ipld::link(cid2),
1478            Ipld::link(cid1),
1479            Ipld::link(cid2),
1480        ];
1481        let ipld = Ipld::List(list);
1482        let stats = DagStats::from_ipld(&ipld);
1483        assert_eq!(stats.deduplication_ratio(), 0.5);
1484    }
1485
1486    #[test]
1487    fn test_dag_metrics() {
1488        let cid = CidBuilder::new().build(b"test").unwrap();
1489        let mut map = BTreeMap::new();
1490        map.insert("link1".to_string(), Ipld::link(cid));
1491        map.insert("link2".to_string(), Ipld::link(cid));
1492        let ipld = Ipld::Map(map);
1493
1494        let metrics = DagMetrics::from_ipld(&ipld);
1495        assert_eq!(metrics.max_branching_factor, 2);
1496        assert!(metrics.width >= 1);
1497        assert!(metrics.total_nodes > 0);
1498    }
1499
1500    #[test]
1501    fn test_dag_metrics_nested() {
1502        let cid = CidBuilder::new().build(b"test").unwrap();
1503
1504        let mut inner = BTreeMap::new();
1505        inner.insert("deep1".to_string(), Ipld::Integer(1));
1506        inner.insert("deep2".to_string(), Ipld::Integer(2));
1507        inner.insert("deep3".to_string(), Ipld::link(cid));
1508
1509        let mut outer = BTreeMap::new();
1510        outer.insert("nested".to_string(), Ipld::Map(inner));
1511        outer.insert("value".to_string(), Ipld::String("test".to_string()));
1512
1513        let ipld = Ipld::Map(outer);
1514        let metrics = DagMetrics::from_ipld(&ipld);
1515
1516        assert_eq!(metrics.max_branching_factor, 3);
1517        assert!(metrics.avg_branching_factor > 0.0);
1518        assert!(metrics.avg_leaf_depth > 0.0);
1519    }
1520
1521    #[test]
1522    fn test_topological_sort() {
1523        let cid1 = CidBuilder::new().build(b"leaf1").unwrap();
1524        let cid2 = CidBuilder::new().build(b"leaf2").unwrap();
1525
1526        let mut map = BTreeMap::new();
1527        map.insert("child1".to_string(), Ipld::link(cid1));
1528        map.insert("child2".to_string(), Ipld::link(cid2));
1529        let ipld = Ipld::Map(map);
1530
1531        let sorted = topological_sort(&ipld);
1532        assert_eq!(sorted.len(), 2);
1533        assert!(sorted.contains(&cid1));
1534        assert!(sorted.contains(&cid2));
1535    }
1536
1537    #[test]
1538    fn test_topological_sort_with_duplicates() {
1539        let cid1 = CidBuilder::new().build(b"test").unwrap();
1540
1541        let list = vec![Ipld::link(cid1), Ipld::link(cid1), Ipld::link(cid1)];
1542        let ipld = Ipld::List(list);
1543
1544        let sorted = topological_sort(&ipld);
1545        // Should deduplicate
1546        assert_eq!(sorted.len(), 1);
1547        assert_eq!(sorted[0], cid1);
1548    }
1549
1550    #[test]
1551    fn test_subgraph_size() {
1552        let ipld = Ipld::List(vec![
1553            Ipld::Integer(1),
1554            Ipld::Integer(2),
1555            Ipld::List(vec![Ipld::Integer(3)]),
1556        ]);
1557
1558        let size = subgraph_size(&ipld);
1559        assert_eq!(size, 5); // Root list + 2 integers + nested list + nested integer
1560    }
1561
1562    #[test]
1563    fn test_subgraph_size_single_node() {
1564        let ipld = Ipld::Integer(42);
1565        let size = subgraph_size(&ipld);
1566        assert_eq!(size, 1);
1567    }
1568
1569    #[test]
1570    fn test_dag_fanout_by_level() {
1571        let ipld = Ipld::List(vec![
1572            Ipld::Integer(1),
1573            Ipld::List(vec![Ipld::Integer(2), Ipld::Integer(3)]),
1574        ]);
1575
1576        let fanout = dag_fanout_by_level(&ipld);
1577        assert!(fanout.len() >= 2);
1578        assert_eq!(fanout[0], 2); // Root has 2 children
1579    }
1580
1581    #[test]
1582    fn test_dag_fanout_empty() {
1583        let ipld = Ipld::Integer(42);
1584        let fanout = dag_fanout_by_level(&ipld);
1585        // Scalar value has no children
1586        assert!(fanout.is_empty() || fanout.iter().all(|&f| f == 0));
1587    }
1588
1589    #[test]
1590    fn test_count_links_by_depth() {
1591        let cid = CidBuilder::new().build(b"test").unwrap();
1592        let mut map = BTreeMap::new();
1593        map.insert("link".to_string(), Ipld::link(cid));
1594        let ipld = Ipld::Map(map);
1595
1596        let counts = count_links_by_depth(&ipld);
1597        assert!(!counts.is_empty());
1598        // Links inside map values are at depth 1
1599        assert_eq!(counts[1], 1);
1600    }
1601
1602    #[test]
1603    fn test_count_links_by_depth_nested() {
1604        let cid1 = CidBuilder::new().build(b"test1").unwrap();
1605        let cid2 = CidBuilder::new().build(b"test2").unwrap();
1606
1607        let mut inner = BTreeMap::new();
1608        inner.insert("deep".to_string(), Ipld::link(cid2));
1609
1610        let mut outer = BTreeMap::new();
1611        outer.insert("shallow".to_string(), Ipld::link(cid1));
1612        outer.insert("nested".to_string(), Ipld::Map(inner));
1613
1614        let ipld = Ipld::Map(outer);
1615        let counts = count_links_by_depth(&ipld);
1616
1617        assert!(counts.len() >= 2);
1618        // Links in outer map are at depth 1
1619        assert_eq!(counts[1], 1); // One link at depth 1 (shallow)
1620                                  // Links in inner map are at depth 2
1621        assert_eq!(counts[2], 1); // One link at depth 2 (deep)
1622    }
1623
1624    #[test]
1625    fn test_filter_dag() {
1626        let ipld = Ipld::List(vec![
1627            Ipld::Integer(1),
1628            Ipld::Integer(2),
1629            Ipld::String("hello".to_string()),
1630        ]);
1631
1632        // Keep only integers and lists
1633        let filtered = filter_dag(&ipld, &|node| {
1634            matches!(node, Ipld::Integer(_) | Ipld::List(_))
1635        });
1636        assert!(filtered.is_some());
1637
1638        if let Some(Ipld::List(items)) = filtered {
1639            assert_eq!(items.len(), 2); // Only the two integers
1640        } else {
1641            panic!("Expected filtered list");
1642        }
1643    }
1644
1645    #[test]
1646    fn test_filter_dag_all_filtered() {
1647        let ipld = Ipld::Integer(42);
1648
1649        // Filter out everything
1650        let filtered = filter_dag(&ipld, &|_| false);
1651        assert!(filtered.is_none());
1652    }
1653
1654    #[test]
1655    fn test_map_dag() {
1656        let ipld = Ipld::Integer(42);
1657
1658        // Double all integers
1659        let transformed = map_dag(&ipld, &|node| match node {
1660            Ipld::Integer(n) => Ipld::Integer(n * 2),
1661            other => other.clone(),
1662        });
1663
1664        assert_eq!(transformed, Ipld::Integer(84));
1665    }
1666
1667    #[test]
1668    fn test_map_dag_nested() {
1669        let ipld = Ipld::List(vec![Ipld::Integer(1), Ipld::Integer(2)]);
1670
1671        // Double all integers
1672        let transformed = map_dag(&ipld, &|node| match node {
1673            Ipld::Integer(n) => Ipld::Integer(n * 2),
1674            other => other.clone(),
1675        });
1676
1677        if let Ipld::List(items) = transformed {
1678            assert_eq!(items[0], Ipld::Integer(2));
1679            assert_eq!(items[1], Ipld::Integer(4));
1680        } else {
1681            panic!("Expected list");
1682        }
1683    }
1684
1685    #[test]
1686    fn test_map_dag_preserve_structure() {
1687        let mut map = BTreeMap::new();
1688        map.insert("a".to_string(), Ipld::Integer(1));
1689        map.insert("b".to_string(), Ipld::Integer(2));
1690        let ipld = Ipld::Map(map);
1691
1692        // Transform preserves map structure
1693        let transformed = map_dag(&ipld, &|node| match node {
1694            Ipld::Integer(n) => Ipld::Integer(n + 10),
1695            other => other.clone(),
1696        });
1697
1698        if let Ipld::Map(result_map) = transformed {
1699            assert_eq!(result_map.get("a"), Some(&Ipld::Integer(11)));
1700            assert_eq!(result_map.get("b"), Some(&Ipld::Integer(12)));
1701        } else {
1702            panic!("Expected map");
1703        }
1704    }
1705}