Skip to main content

clayers_spec/
connectivity.rs

1use std::collections::{HashMap, HashSet};
2use std::path::Path;
3
4use petgraph::Direction;
5use petgraph::graph::{DiGraph, NodeIndex};
6use petgraph::visit::EdgeRef;
7
8use crate::namespace;
9
10/// Graph metrics from connectivity analysis.
11#[derive(Debug)]
12pub struct ConnectivityReport {
13    pub spec_name: String,
14    pub node_count: usize,
15    pub edge_count: usize,
16    pub density: f64,
17    pub components: Vec<Vec<String>>,
18    pub isolated_nodes: Vec<String>,
19    pub hub_nodes: Vec<HubNode>,
20    pub bridge_nodes: Vec<BridgeNode>,
21    pub cycles: Vec<Cycle>,
22    pub acyclic_violations: usize,
23    pub relation_type_counts: HashMap<String, usize>,
24}
25
26#[derive(Debug)]
27pub struct HubNode {
28    pub id: String,
29    pub in_degree: usize,
30    pub out_degree: usize,
31    pub total_degree: usize,
32}
33
34#[derive(Debug)]
35pub struct BridgeNode {
36    pub id: String,
37    pub centrality: f64,
38}
39
40#[derive(Debug)]
41pub struct Cycle {
42    pub nodes: Vec<String>,
43    pub edge_types: HashSet<String>,
44    pub has_acyclic_violation: bool,
45}
46
47/// Analyze the connectivity of a spec's nodes and relations.
48///
49/// Builds a directed graph from all elements with `@id` attributes and
50/// all `rel:relation` elements. Reports components, isolated nodes,
51/// hub nodes, bridge nodes, and cycles.
52///
53/// # Errors
54///
55/// Returns an error if spec files cannot be read.
56pub fn analyze_connectivity(spec_dir: &Path) -> Result<ConnectivityReport, crate::Error> {
57    let schema_dir = crate::discovery::find_schema_dir(spec_dir);
58    let acyclic_types = if let Some(ref sd) = schema_dir {
59        crate::schema::discover_acyclic_types(sd)?
60    } else {
61        HashSet::new()
62    };
63
64    let index_files = crate::discovery::find_index_files(spec_dir)?;
65    let spec_name = spec_dir
66        .file_name()
67        .map_or_else(|| "unknown".into(), |n| n.to_string_lossy().into_owned());
68
69    if index_files.is_empty() {
70        return Ok(ConnectivityReport {
71            spec_name,
72            node_count: 0,
73            edge_count: 0,
74            density: 0.0,
75            components: vec![],
76            isolated_nodes: vec![],
77            hub_nodes: vec![],
78            bridge_nodes: vec![],
79            cycles: vec![],
80            acyclic_violations: 0,
81            relation_type_counts: HashMap::new(),
82        });
83    }
84
85    let mut all_nodes: HashMap<String, String> = HashMap::new(); // id -> tag
86    let mut all_relations: Vec<Relation> = Vec::new();
87
88    for index_path in &index_files {
89        let file_paths = crate::discovery::discover_spec_files(index_path)?;
90        collect_nodes_and_relations(&file_paths, &mut all_nodes, &mut all_relations)?;
91    }
92
93    // Build petgraph
94    let (graph, _id_to_idx, idx_to_id) = build_graph(&all_nodes, &all_relations);
95
96    let node_count = graph.node_count();
97    let edge_count = graph.edge_count();
98    #[allow(clippy::cast_precision_loss)]
99    let density = if node_count > 1 {
100        edge_count as f64 / (node_count as f64 * (node_count as f64 - 1.0))
101    } else {
102        0.0
103    };
104
105    // Connected components (weakly connected)
106    let components = weakly_connected_components(&graph, &idx_to_id);
107
108    // Isolated nodes
109    let isolated_nodes = find_isolated_nodes(&graph, &idx_to_id);
110
111    // Hub nodes (top 5 by total degree)
112    let hub_nodes = find_hub_nodes(&graph, &idx_to_id, 5);
113
114    // Bridge nodes (betweenness centrality)
115    let bridge_nodes = find_bridge_nodes(&graph, &idx_to_id, 5);
116
117    // Cycles
118    let (cycles, acyclic_violations) = find_cycles(&graph, &idx_to_id, &acyclic_types);
119
120    // Relation type distribution
121    let mut relation_type_counts: HashMap<String, usize> = HashMap::new();
122    for rel in &all_relations {
123        if rel.to_spec.is_none() {
124            *relation_type_counts
125                .entry(rel.rel_type.clone())
126                .or_insert(0) += 1;
127        }
128    }
129
130    Ok(ConnectivityReport {
131        spec_name,
132        node_count,
133        edge_count,
134        density,
135        components,
136        isolated_nodes,
137        hub_nodes,
138        bridge_nodes,
139        cycles,
140        acyclic_violations,
141        relation_type_counts,
142    })
143}
144
145
146struct Relation {
147    rel_type: String,
148    from: String,
149    to: String,
150    to_spec: Option<String>,
151}
152
153fn collect_nodes_and_relations(
154    file_paths: &[impl AsRef<Path>],
155    nodes: &mut HashMap<String, String>,
156    relations: &mut Vec<Relation>,
157) -> Result<(), crate::Error> {
158    for file_path in file_paths {
159        let content = std::fs::read_to_string(file_path.as_ref())?;
160        let mut xot = xot::Xot::new();
161        let doc = xot.parse(&content).map_err(xot::Error::from)?;
162        let root = xot.document_element(doc)?;
163
164        let id_attr = xot.add_name("id");
165        let xml_ns = xot.add_namespace(namespace::XML);
166        let xml_id_attr = xot.add_name_ns("id", xml_ns);
167        let relation_ns = xot.add_namespace(namespace::RELATION);
168        let relation_tag = xot.add_name_ns("relation", relation_ns);
169        let art_ns = xot.add_namespace(namespace::ARTIFACT);
170        let mapping_tag = xot.add_name_ns("mapping", art_ns);
171        let revision_ns = xot.add_namespace(namespace::REVISION);
172        let revision_tag = xot.add_name_ns("revision", revision_ns);
173        let type_attr = xot.add_name("type");
174        let from_attr = xot.add_name("from");
175        let to_attr = xot.add_name("to");
176        let to_spec_attr = xot.add_name("to-spec");
177
178        collect_from_tree(
179            &xot,
180            root,
181            id_attr,
182            xml_id_attr,
183            relation_tag,
184            mapping_tag,
185            revision_tag,
186            type_attr,
187            from_attr,
188            to_attr,
189            to_spec_attr,
190            nodes,
191            relations,
192        );
193    }
194    Ok(())
195}
196
197#[allow(clippy::too_many_arguments)]
198fn collect_from_tree(
199    xot: &xot::Xot,
200    node: xot::Node,
201    id_attr: xot::NameId,
202    xml_id_attr: xot::NameId,
203    relation_tag: xot::NameId,
204    mapping_tag: xot::NameId,
205    revision_tag: xot::NameId,
206    type_attr: xot::NameId,
207    from_attr: xot::NameId,
208    to_attr: xot::NameId,
209    to_spec_attr: xot::NameId,
210    nodes: &mut HashMap<String, String>,
211    relations: &mut Vec<Relation>,
212) {
213    if xot.is_element(node) {
214        let name = xot.element(node).map(xot::Element::name);
215
216        // Skip art:mapping and rev:revision from graph nodes
217        if name != Some(mapping_tag) && name != Some(revision_tag) {
218            // Check bare @id
219            if let Some(id) = xot.get_attribute(node, id_attr) {
220                let tag = name
221                    .map(|n| xot.name_ns_str(n).0.to_string())
222                    .unwrap_or_default();
223                nodes.insert(id.to_string(), tag.clone());
224            }
225            // Check xml:id (W3C standard, used by XMI/UML elements)
226            if let Some(xml_id) = xot.get_attribute(node, xml_id_attr) {
227                let tag = name
228                    .map(|n| xot.name_ns_str(n).0.to_string())
229                    .unwrap_or_default();
230                nodes.insert(xml_id.to_string(), tag);
231            }
232        }
233
234        if name == Some(relation_tag) {
235            let rel = Relation {
236                rel_type: xot
237                    .get_attribute(node, type_attr)
238                    .unwrap_or("")
239                    .to_string(),
240                from: xot
241                    .get_attribute(node, from_attr)
242                    .unwrap_or("")
243                    .to_string(),
244                to: xot
245                    .get_attribute(node, to_attr)
246                    .unwrap_or("")
247                    .to_string(),
248                to_spec: xot
249                    .get_attribute(node, to_spec_attr)
250                    .map(String::from),
251            };
252            relations.push(rel);
253        }
254    }
255    for child in xot.children(node) {
256        collect_from_tree(
257            xot,
258            child,
259            id_attr,
260            xml_id_attr,
261            relation_tag,
262            mapping_tag,
263            revision_tag,
264            type_attr,
265            from_attr,
266            to_attr,
267            to_spec_attr,
268            nodes,
269            relations,
270        );
271    }
272}
273
274fn build_graph(
275    nodes: &HashMap<String, String>,
276    relations: &[Relation],
277) -> (
278    DiGraph<String, String>,
279    HashMap<String, NodeIndex>,
280    HashMap<NodeIndex, String>,
281) {
282    let mut graph = DiGraph::new();
283    let mut id_to_idx: HashMap<String, NodeIndex> = HashMap::new();
284    let mut idx_to_id: HashMap<NodeIndex, String> = HashMap::new();
285
286    for id in nodes.keys() {
287        let idx = graph.add_node(id.clone());
288        id_to_idx.insert(id.clone(), idx);
289        idx_to_id.insert(idx, id.clone());
290    }
291
292    for rel in relations {
293        if rel.to_spec.is_some() {
294            continue;
295        }
296        if let (Some(&from_idx), Some(&to_idx)) = (id_to_idx.get(&rel.from), id_to_idx.get(&rel.to))
297        {
298            graph.add_edge(from_idx, to_idx, rel.rel_type.clone());
299        }
300    }
301
302    (graph, id_to_idx, idx_to_id)
303}
304
305fn weakly_connected_components(
306    graph: &DiGraph<String, String>,
307    idx_to_id: &HashMap<NodeIndex, String>,
308) -> Vec<Vec<String>> {
309    let undirected = petgraph::algo::condensation(graph.clone(), false);
310    // Use Tarjan-based approach: convert to undirected and find components
311    let mut visited = HashSet::new();
312    let mut components = Vec::new();
313
314    for node_idx in graph.node_indices() {
315        if visited.contains(&node_idx) {
316            continue;
317        }
318        let mut component = Vec::new();
319        let mut stack = vec![node_idx];
320        while let Some(current) = stack.pop() {
321            if visited.contains(&current) {
322                continue;
323            }
324            visited.insert(current);
325            if let Some(id) = idx_to_id.get(&current) {
326                component.push(id.clone());
327            }
328            // Follow both outgoing and incoming edges (weakly connected)
329            for neighbor in graph.neighbors_directed(current, Direction::Outgoing) {
330                if !visited.contains(&neighbor) {
331                    stack.push(neighbor);
332                }
333            }
334            for neighbor in graph.neighbors_directed(current, Direction::Incoming) {
335                if !visited.contains(&neighbor) {
336                    stack.push(neighbor);
337                }
338            }
339        }
340        component.sort();
341        components.push(component);
342    }
343
344    let _ = undirected; // suppress unused warning
345    components.sort_by_key(|c| std::cmp::Reverse(c.len()));
346    components
347}
348
349fn find_isolated_nodes(
350    graph: &DiGraph<String, String>,
351    idx_to_id: &HashMap<NodeIndex, String>,
352) -> Vec<String> {
353    let mut isolated = Vec::new();
354    for node_idx in graph.node_indices() {
355        let in_d = graph
356            .neighbors_directed(node_idx, Direction::Incoming)
357            .count();
358        let out_d = graph
359            .neighbors_directed(node_idx, Direction::Outgoing)
360            .count();
361        if in_d == 0
362            && out_d == 0
363            && let Some(id) = idx_to_id.get(&node_idx)
364        {
365            isolated.push(id.clone());
366        }
367    }
368    isolated.sort();
369    isolated
370}
371
372fn find_hub_nodes(
373    graph: &DiGraph<String, String>,
374    idx_to_id: &HashMap<NodeIndex, String>,
375    top_n: usize,
376) -> Vec<HubNode> {
377    let mut degrees: Vec<_> = graph
378        .node_indices()
379        .filter_map(|idx| {
380            let id = idx_to_id.get(&idx)?;
381            let in_d = graph.neighbors_directed(idx, Direction::Incoming).count();
382            let out_d = graph.neighbors_directed(idx, Direction::Outgoing).count();
383            Some(HubNode {
384                id: id.clone(),
385                in_degree: in_d,
386                out_degree: out_d,
387                total_degree: in_d + out_d,
388            })
389        })
390        .collect();
391
392    degrees.sort_by(|a, b| b.total_degree.cmp(&a.total_degree));
393    degrees.truncate(top_n);
394    degrees
395}
396
397fn find_bridge_nodes(
398    graph: &DiGraph<String, String>,
399    idx_to_id: &HashMap<NodeIndex, String>,
400    top_n: usize,
401) -> Vec<BridgeNode> {
402    // Simple betweenness centrality approximation
403    let nodes: Vec<NodeIndex> = graph.node_indices().collect();
404    let n = nodes.len();
405    let mut centrality: HashMap<NodeIndex, f64> = HashMap::new();
406
407    for &source in &nodes {
408        // BFS from source
409        let mut stack = Vec::new();
410        let mut predecessors: HashMap<NodeIndex, Vec<NodeIndex>> = HashMap::new();
411        let mut sigma: HashMap<NodeIndex, f64> = HashMap::new();
412        let mut dist: HashMap<NodeIndex, i64> = HashMap::new();
413
414        sigma.insert(source, 1.0);
415        dist.insert(source, 0);
416        let mut queue = std::collections::VecDeque::new();
417        queue.push_back(source);
418
419        while let Some(v) = queue.pop_front() {
420            stack.push(v);
421            let d_v = dist[&v];
422            for neighbor in graph.neighbors_directed(v, Direction::Outgoing) {
423                if let std::collections::hash_map::Entry::Vacant(entry) = dist.entry(neighbor) {
424                    queue.push_back(neighbor);
425                    entry.insert(d_v + 1);
426                }
427                if dist[&neighbor] == d_v + 1 {
428                    *sigma.entry(neighbor).or_insert(0.0) += sigma[&v];
429                    predecessors.entry(neighbor).or_default().push(v);
430                }
431            }
432        }
433
434        let mut delta: HashMap<NodeIndex, f64> = HashMap::new();
435        while let Some(w) = stack.pop() {
436            if let Some(preds) = predecessors.get(&w) {
437                for &v in preds {
438                    let d = sigma.get(&v).copied().unwrap_or(0.0)
439                        / sigma.get(&w).copied().unwrap_or(1.0)
440                        * (1.0 + delta.get(&w).copied().unwrap_or(0.0));
441                    *delta.entry(v).or_insert(0.0) += d;
442                }
443            }
444            if w != source {
445                *centrality.entry(w).or_insert(0.0) += delta.get(&w).copied().unwrap_or(0.0);
446            }
447        }
448    }
449
450    // Normalize
451    #[allow(clippy::cast_precision_loss)]
452    let norm = if n > 2 {
453        1.0 / ((n - 1) as f64 * (n - 2) as f64)
454    } else {
455        1.0
456    };
457
458    let mut bridges: Vec<BridgeNode> = centrality
459        .into_iter()
460        .filter(|(_, c)| *c > 0.0)
461        .filter_map(|(idx, c)| {
462            let id = idx_to_id.get(&idx)?;
463            Some(BridgeNode {
464                id: id.clone(),
465                centrality: c * norm,
466            })
467        })
468        .collect();
469
470    bridges.sort_by(|a, b| {
471        b.centrality
472            .partial_cmp(&a.centrality)
473            .unwrap_or(std::cmp::Ordering::Equal)
474    });
475    bridges.truncate(top_n);
476    bridges
477}
478
479fn find_cycles(
480    graph: &DiGraph<String, String>,
481    idx_to_id: &HashMap<NodeIndex, String>,
482    acyclic_types: &HashSet<String>,
483) -> (Vec<Cycle>, usize) {
484    // Use petgraph's SCC to find cycles
485    let sccs = petgraph::algo::tarjan_scc(graph);
486    let mut cycles = Vec::new();
487    let mut violations = 0;
488
489    for scc in &sccs {
490        if scc.len() <= 1 {
491            // Check self-loop
492            if scc.len() == 1 {
493                let idx = scc[0];
494                if graph.contains_edge(idx, idx) {
495                    let node_ids: Vec<String> = scc
496                        .iter()
497                        .filter_map(|i| idx_to_id.get(i).cloned())
498                        .collect();
499                    let mut edge_types = HashSet::new();
500                    for e in graph.edges(idx) {
501                        if e.target() == idx {
502                            edge_types.insert(e.weight().clone());
503                        }
504                    }
505                    let has_violation = !edge_types.is_disjoint(acyclic_types);
506                    if has_violation {
507                        violations += 1;
508                    }
509                    cycles.push(Cycle {
510                        nodes: node_ids,
511                        edge_types,
512                        has_acyclic_violation: has_violation,
513                    });
514                }
515            }
516            continue;
517        }
518
519        // SCC with multiple nodes implies cycles
520        let node_ids: Vec<String> = scc
521            .iter()
522            .filter_map(|i| idx_to_id.get(i).cloned())
523            .collect();
524
525        let mut edge_types = HashSet::new();
526        let scc_set: HashSet<_> = scc.iter().copied().collect();
527        for &idx in scc {
528            for e in graph.edges(idx) {
529                if scc_set.contains(&e.target()) {
530                    edge_types.insert(e.weight().clone());
531                }
532            }
533        }
534
535        let has_violation = !edge_types.is_disjoint(acyclic_types);
536        if has_violation {
537            violations += 1;
538        }
539        cycles.push(Cycle {
540            nodes: node_ids,
541            edge_types,
542            has_acyclic_violation: has_violation,
543        });
544    }
545
546    (cycles, violations)
547}
548
549#[cfg(test)]
550mod tests {
551    use super::*;
552    use std::path::PathBuf;
553
554    fn spec_dir() -> PathBuf {
555        PathBuf::from(env!("CARGO_MANIFEST_DIR"))
556            .join("../../clayers/clayers")
557            .canonicalize()
558            .expect("clayers/clayers/ not found")
559    }
560
561    #[test]
562    fn shipped_spec_has_sufficient_nodes_and_edges() {
563        let report = analyze_connectivity(&spec_dir()).expect("analysis failed");
564        assert!(
565            report.node_count >= 50,
566            "expected 50+ nodes, got {}",
567            report.node_count
568        );
569        assert!(
570            report.edge_count >= 30,
571            "expected 30+ edges, got {}",
572            report.edge_count
573        );
574    }
575
576    #[test]
577    fn shipped_spec_reports_components() {
578        let report = analyze_connectivity(&spec_dir()).expect("analysis failed");
579        assert!(
580            !report.components.is_empty(),
581            "should have at least one component"
582        );
583    }
584
585    #[test]
586    fn shipped_spec_identifies_isolated_nodes() {
587        let report = analyze_connectivity(&spec_dir()).expect("analysis failed");
588        // The spec has some isolated nodes (plan items, criteria, etc.)
589        // Just verify we detect some
590        assert!(
591            report.isolated_nodes.len() >= 2,
592            "expected some isolated nodes, got {}",
593            report.isolated_nodes.len()
594        );
595    }
596
597    #[test]
598    fn shipped_spec_has_hub_nodes() {
599        let report = analyze_connectivity(&spec_dir()).expect("analysis failed");
600        assert!(!report.hub_nodes.is_empty(), "should identify hub nodes");
601        // layered-architecture should be among the top hubs
602        let hub_ids: Vec<&str> = report.hub_nodes.iter().map(|h| h.id.as_str()).collect();
603        assert!(
604            hub_ids.contains(&"layered-architecture"),
605            "layered-architecture should be a hub, top hubs: {hub_ids:?}"
606        );
607    }
608
609    #[test]
610    fn shipped_spec_no_acyclic_violations() {
611        let report = analyze_connectivity(&spec_dir()).expect("analysis failed");
612        assert_eq!(
613            report.acyclic_violations, 0,
614            "shipped spec should have no acyclic violations"
615        );
616    }
617
618    #[test]
619    fn synthetic_cycle_detected() {
620        let dir = tempfile::tempdir().expect("tempdir");
621        let index_xml = r#"<?xml version="1.0"?>
622<spec:clayers xmlns:spec="urn:clayers:spec"
623              xmlns:idx="urn:clayers:index"
624              xmlns:pr="urn:clayers:prose"
625              xmlns:rel="urn:clayers:relation">
626  <idx:file href="content.xml"/>
627</spec:clayers>"#;
628        std::fs::write(dir.path().join("index.xml"), index_xml).expect("write");
629
630        let content_xml = r#"<?xml version="1.0"?>
631<spec:clayers xmlns:spec="urn:clayers:spec"
632              xmlns:pr="urn:clayers:prose"
633              xmlns:rel="urn:clayers:relation"
634              spec:index="index.xml">
635  <pr:section id="a"><pr:title>A</pr:title></pr:section>
636  <pr:section id="b"><pr:title>B</pr:title></pr:section>
637  <rel:relation type="references" from="a" to="b"/>
638  <rel:relation type="references" from="b" to="a"/>
639</spec:clayers>"#;
640        std::fs::write(dir.path().join("content.xml"), content_xml).expect("write");
641
642        let report = analyze_connectivity(dir.path()).expect("analysis failed");
643        assert!(
644            !report.cycles.is_empty(),
645            "should detect the cycle between a and b"
646        );
647    }
648}