1use std::collections::{HashMap, HashSet};
2use std::path::Path;
3
4use petgraph::Direction;
5use petgraph::graph::{DiGraph, NodeIndex};
6use petgraph::visit::EdgeRef;
7
8use crate::namespace;
9
10#[derive(Debug)]
12pub struct ConnectivityReport {
13 pub spec_name: String,
14 pub node_count: usize,
15 pub edge_count: usize,
16 pub density: f64,
17 pub components: Vec<Vec<String>>,
18 pub isolated_nodes: Vec<String>,
19 pub hub_nodes: Vec<HubNode>,
20 pub bridge_nodes: Vec<BridgeNode>,
21 pub cycles: Vec<Cycle>,
22 pub acyclic_violations: usize,
23 pub relation_type_counts: HashMap<String, usize>,
24}
25
26#[derive(Debug)]
27pub struct HubNode {
28 pub id: String,
29 pub in_degree: usize,
30 pub out_degree: usize,
31 pub total_degree: usize,
32}
33
34#[derive(Debug)]
35pub struct BridgeNode {
36 pub id: String,
37 pub centrality: f64,
38}
39
40#[derive(Debug)]
41pub struct Cycle {
42 pub nodes: Vec<String>,
43 pub edge_types: HashSet<String>,
44 pub has_acyclic_violation: bool,
45}
46
47pub fn analyze_connectivity(spec_dir: &Path) -> Result<ConnectivityReport, crate::Error> {
57 let schema_dir = crate::discovery::find_schema_dir(spec_dir);
58 let acyclic_types = if let Some(ref sd) = schema_dir {
59 crate::schema::discover_acyclic_types(sd)?
60 } else {
61 HashSet::new()
62 };
63
64 let index_files = crate::discovery::find_index_files(spec_dir)?;
65 let spec_name = spec_dir
66 .file_name()
67 .map_or_else(|| "unknown".into(), |n| n.to_string_lossy().into_owned());
68
69 if index_files.is_empty() {
70 return Ok(ConnectivityReport {
71 spec_name,
72 node_count: 0,
73 edge_count: 0,
74 density: 0.0,
75 components: vec![],
76 isolated_nodes: vec![],
77 hub_nodes: vec![],
78 bridge_nodes: vec![],
79 cycles: vec![],
80 acyclic_violations: 0,
81 relation_type_counts: HashMap::new(),
82 });
83 }
84
85 let mut all_nodes: HashMap<String, String> = HashMap::new(); let mut all_relations: Vec<Relation> = Vec::new();
87
88 for index_path in &index_files {
89 let file_paths = crate::discovery::discover_spec_files(index_path)?;
90 collect_nodes_and_relations(&file_paths, &mut all_nodes, &mut all_relations)?;
91 }
92
93 let (graph, _id_to_idx, idx_to_id) = build_graph(&all_nodes, &all_relations);
95
96 let node_count = graph.node_count();
97 let edge_count = graph.edge_count();
98 #[allow(clippy::cast_precision_loss)]
99 let density = if node_count > 1 {
100 edge_count as f64 / (node_count as f64 * (node_count as f64 - 1.0))
101 } else {
102 0.0
103 };
104
105 let components = weakly_connected_components(&graph, &idx_to_id);
107
108 let isolated_nodes = find_isolated_nodes(&graph, &idx_to_id);
110
111 let hub_nodes = find_hub_nodes(&graph, &idx_to_id, 5);
113
114 let bridge_nodes = find_bridge_nodes(&graph, &idx_to_id, 5);
116
117 let (cycles, acyclic_violations) = find_cycles(&graph, &idx_to_id, &acyclic_types);
119
120 let mut relation_type_counts: HashMap<String, usize> = HashMap::new();
122 for rel in &all_relations {
123 if rel.to_spec.is_none() {
124 *relation_type_counts
125 .entry(rel.rel_type.clone())
126 .or_insert(0) += 1;
127 }
128 }
129
130 Ok(ConnectivityReport {
131 spec_name,
132 node_count,
133 edge_count,
134 density,
135 components,
136 isolated_nodes,
137 hub_nodes,
138 bridge_nodes,
139 cycles,
140 acyclic_violations,
141 relation_type_counts,
142 })
143}
144
145
146struct Relation {
147 rel_type: String,
148 from: String,
149 to: String,
150 to_spec: Option<String>,
151}
152
153fn collect_nodes_and_relations(
154 file_paths: &[impl AsRef<Path>],
155 nodes: &mut HashMap<String, String>,
156 relations: &mut Vec<Relation>,
157) -> Result<(), crate::Error> {
158 for file_path in file_paths {
159 let content = std::fs::read_to_string(file_path.as_ref())?;
160 let mut xot = xot::Xot::new();
161 let doc = xot.parse(&content).map_err(xot::Error::from)?;
162 let root = xot.document_element(doc)?;
163
164 let id_attr = xot.add_name("id");
165 let xml_ns = xot.add_namespace(namespace::XML);
166 let xml_id_attr = xot.add_name_ns("id", xml_ns);
167 let relation_ns = xot.add_namespace(namespace::RELATION);
168 let relation_tag = xot.add_name_ns("relation", relation_ns);
169 let art_ns = xot.add_namespace(namespace::ARTIFACT);
170 let mapping_tag = xot.add_name_ns("mapping", art_ns);
171 let revision_ns = xot.add_namespace(namespace::REVISION);
172 let revision_tag = xot.add_name_ns("revision", revision_ns);
173 let type_attr = xot.add_name("type");
174 let from_attr = xot.add_name("from");
175 let to_attr = xot.add_name("to");
176 let to_spec_attr = xot.add_name("to-spec");
177
178 collect_from_tree(
179 &xot,
180 root,
181 id_attr,
182 xml_id_attr,
183 relation_tag,
184 mapping_tag,
185 revision_tag,
186 type_attr,
187 from_attr,
188 to_attr,
189 to_spec_attr,
190 nodes,
191 relations,
192 );
193 }
194 Ok(())
195}
196
197#[allow(clippy::too_many_arguments)]
198fn collect_from_tree(
199 xot: &xot::Xot,
200 node: xot::Node,
201 id_attr: xot::NameId,
202 xml_id_attr: xot::NameId,
203 relation_tag: xot::NameId,
204 mapping_tag: xot::NameId,
205 revision_tag: xot::NameId,
206 type_attr: xot::NameId,
207 from_attr: xot::NameId,
208 to_attr: xot::NameId,
209 to_spec_attr: xot::NameId,
210 nodes: &mut HashMap<String, String>,
211 relations: &mut Vec<Relation>,
212) {
213 if xot.is_element(node) {
214 let name = xot.element(node).map(xot::Element::name);
215
216 if name != Some(mapping_tag) && name != Some(revision_tag) {
218 if let Some(id) = xot.get_attribute(node, id_attr) {
220 let tag = name
221 .map(|n| xot.name_ns_str(n).0.to_string())
222 .unwrap_or_default();
223 nodes.insert(id.to_string(), tag.clone());
224 }
225 if let Some(xml_id) = xot.get_attribute(node, xml_id_attr) {
227 let tag = name
228 .map(|n| xot.name_ns_str(n).0.to_string())
229 .unwrap_or_default();
230 nodes.insert(xml_id.to_string(), tag);
231 }
232 }
233
234 if name == Some(relation_tag) {
235 let rel = Relation {
236 rel_type: xot
237 .get_attribute(node, type_attr)
238 .unwrap_or("")
239 .to_string(),
240 from: xot
241 .get_attribute(node, from_attr)
242 .unwrap_or("")
243 .to_string(),
244 to: xot
245 .get_attribute(node, to_attr)
246 .unwrap_or("")
247 .to_string(),
248 to_spec: xot
249 .get_attribute(node, to_spec_attr)
250 .map(String::from),
251 };
252 relations.push(rel);
253 }
254 }
255 for child in xot.children(node) {
256 collect_from_tree(
257 xot,
258 child,
259 id_attr,
260 xml_id_attr,
261 relation_tag,
262 mapping_tag,
263 revision_tag,
264 type_attr,
265 from_attr,
266 to_attr,
267 to_spec_attr,
268 nodes,
269 relations,
270 );
271 }
272}
273
274fn build_graph(
275 nodes: &HashMap<String, String>,
276 relations: &[Relation],
277) -> (
278 DiGraph<String, String>,
279 HashMap<String, NodeIndex>,
280 HashMap<NodeIndex, String>,
281) {
282 let mut graph = DiGraph::new();
283 let mut id_to_idx: HashMap<String, NodeIndex> = HashMap::new();
284 let mut idx_to_id: HashMap<NodeIndex, String> = HashMap::new();
285
286 for id in nodes.keys() {
287 let idx = graph.add_node(id.clone());
288 id_to_idx.insert(id.clone(), idx);
289 idx_to_id.insert(idx, id.clone());
290 }
291
292 for rel in relations {
293 if rel.to_spec.is_some() {
294 continue;
295 }
296 if let (Some(&from_idx), Some(&to_idx)) = (id_to_idx.get(&rel.from), id_to_idx.get(&rel.to))
297 {
298 graph.add_edge(from_idx, to_idx, rel.rel_type.clone());
299 }
300 }
301
302 (graph, id_to_idx, idx_to_id)
303}
304
305fn weakly_connected_components(
306 graph: &DiGraph<String, String>,
307 idx_to_id: &HashMap<NodeIndex, String>,
308) -> Vec<Vec<String>> {
309 let undirected = petgraph::algo::condensation(graph.clone(), false);
310 let mut visited = HashSet::new();
312 let mut components = Vec::new();
313
314 for node_idx in graph.node_indices() {
315 if visited.contains(&node_idx) {
316 continue;
317 }
318 let mut component = Vec::new();
319 let mut stack = vec![node_idx];
320 while let Some(current) = stack.pop() {
321 if visited.contains(¤t) {
322 continue;
323 }
324 visited.insert(current);
325 if let Some(id) = idx_to_id.get(¤t) {
326 component.push(id.clone());
327 }
328 for neighbor in graph.neighbors_directed(current, Direction::Outgoing) {
330 if !visited.contains(&neighbor) {
331 stack.push(neighbor);
332 }
333 }
334 for neighbor in graph.neighbors_directed(current, Direction::Incoming) {
335 if !visited.contains(&neighbor) {
336 stack.push(neighbor);
337 }
338 }
339 }
340 component.sort();
341 components.push(component);
342 }
343
344 let _ = undirected; components.sort_by_key(|c| std::cmp::Reverse(c.len()));
346 components
347}
348
349fn find_isolated_nodes(
350 graph: &DiGraph<String, String>,
351 idx_to_id: &HashMap<NodeIndex, String>,
352) -> Vec<String> {
353 let mut isolated = Vec::new();
354 for node_idx in graph.node_indices() {
355 let in_d = graph
356 .neighbors_directed(node_idx, Direction::Incoming)
357 .count();
358 let out_d = graph
359 .neighbors_directed(node_idx, Direction::Outgoing)
360 .count();
361 if in_d == 0
362 && out_d == 0
363 && let Some(id) = idx_to_id.get(&node_idx)
364 {
365 isolated.push(id.clone());
366 }
367 }
368 isolated.sort();
369 isolated
370}
371
372fn find_hub_nodes(
373 graph: &DiGraph<String, String>,
374 idx_to_id: &HashMap<NodeIndex, String>,
375 top_n: usize,
376) -> Vec<HubNode> {
377 let mut degrees: Vec<_> = graph
378 .node_indices()
379 .filter_map(|idx| {
380 let id = idx_to_id.get(&idx)?;
381 let in_d = graph.neighbors_directed(idx, Direction::Incoming).count();
382 let out_d = graph.neighbors_directed(idx, Direction::Outgoing).count();
383 Some(HubNode {
384 id: id.clone(),
385 in_degree: in_d,
386 out_degree: out_d,
387 total_degree: in_d + out_d,
388 })
389 })
390 .collect();
391
392 degrees.sort_by(|a, b| b.total_degree.cmp(&a.total_degree));
393 degrees.truncate(top_n);
394 degrees
395}
396
397fn find_bridge_nodes(
398 graph: &DiGraph<String, String>,
399 idx_to_id: &HashMap<NodeIndex, String>,
400 top_n: usize,
401) -> Vec<BridgeNode> {
402 let nodes: Vec<NodeIndex> = graph.node_indices().collect();
404 let n = nodes.len();
405 let mut centrality: HashMap<NodeIndex, f64> = HashMap::new();
406
407 for &source in &nodes {
408 let mut stack = Vec::new();
410 let mut predecessors: HashMap<NodeIndex, Vec<NodeIndex>> = HashMap::new();
411 let mut sigma: HashMap<NodeIndex, f64> = HashMap::new();
412 let mut dist: HashMap<NodeIndex, i64> = HashMap::new();
413
414 sigma.insert(source, 1.0);
415 dist.insert(source, 0);
416 let mut queue = std::collections::VecDeque::new();
417 queue.push_back(source);
418
419 while let Some(v) = queue.pop_front() {
420 stack.push(v);
421 let d_v = dist[&v];
422 for neighbor in graph.neighbors_directed(v, Direction::Outgoing) {
423 if let std::collections::hash_map::Entry::Vacant(entry) = dist.entry(neighbor) {
424 queue.push_back(neighbor);
425 entry.insert(d_v + 1);
426 }
427 if dist[&neighbor] == d_v + 1 {
428 *sigma.entry(neighbor).or_insert(0.0) += sigma[&v];
429 predecessors.entry(neighbor).or_default().push(v);
430 }
431 }
432 }
433
434 let mut delta: HashMap<NodeIndex, f64> = HashMap::new();
435 while let Some(w) = stack.pop() {
436 if let Some(preds) = predecessors.get(&w) {
437 for &v in preds {
438 let d = sigma.get(&v).copied().unwrap_or(0.0)
439 / sigma.get(&w).copied().unwrap_or(1.0)
440 * (1.0 + delta.get(&w).copied().unwrap_or(0.0));
441 *delta.entry(v).or_insert(0.0) += d;
442 }
443 }
444 if w != source {
445 *centrality.entry(w).or_insert(0.0) += delta.get(&w).copied().unwrap_or(0.0);
446 }
447 }
448 }
449
450 #[allow(clippy::cast_precision_loss)]
452 let norm = if n > 2 {
453 1.0 / ((n - 1) as f64 * (n - 2) as f64)
454 } else {
455 1.0
456 };
457
458 let mut bridges: Vec<BridgeNode> = centrality
459 .into_iter()
460 .filter(|(_, c)| *c > 0.0)
461 .filter_map(|(idx, c)| {
462 let id = idx_to_id.get(&idx)?;
463 Some(BridgeNode {
464 id: id.clone(),
465 centrality: c * norm,
466 })
467 })
468 .collect();
469
470 bridges.sort_by(|a, b| {
471 b.centrality
472 .partial_cmp(&a.centrality)
473 .unwrap_or(std::cmp::Ordering::Equal)
474 });
475 bridges.truncate(top_n);
476 bridges
477}
478
479fn find_cycles(
480 graph: &DiGraph<String, String>,
481 idx_to_id: &HashMap<NodeIndex, String>,
482 acyclic_types: &HashSet<String>,
483) -> (Vec<Cycle>, usize) {
484 let sccs = petgraph::algo::tarjan_scc(graph);
486 let mut cycles = Vec::new();
487 let mut violations = 0;
488
489 for scc in &sccs {
490 if scc.len() <= 1 {
491 if scc.len() == 1 {
493 let idx = scc[0];
494 if graph.contains_edge(idx, idx) {
495 let node_ids: Vec<String> = scc
496 .iter()
497 .filter_map(|i| idx_to_id.get(i).cloned())
498 .collect();
499 let mut edge_types = HashSet::new();
500 for e in graph.edges(idx) {
501 if e.target() == idx {
502 edge_types.insert(e.weight().clone());
503 }
504 }
505 let has_violation = !edge_types.is_disjoint(acyclic_types);
506 if has_violation {
507 violations += 1;
508 }
509 cycles.push(Cycle {
510 nodes: node_ids,
511 edge_types,
512 has_acyclic_violation: has_violation,
513 });
514 }
515 }
516 continue;
517 }
518
519 let node_ids: Vec<String> = scc
521 .iter()
522 .filter_map(|i| idx_to_id.get(i).cloned())
523 .collect();
524
525 let mut edge_types = HashSet::new();
526 let scc_set: HashSet<_> = scc.iter().copied().collect();
527 for &idx in scc {
528 for e in graph.edges(idx) {
529 if scc_set.contains(&e.target()) {
530 edge_types.insert(e.weight().clone());
531 }
532 }
533 }
534
535 let has_violation = !edge_types.is_disjoint(acyclic_types);
536 if has_violation {
537 violations += 1;
538 }
539 cycles.push(Cycle {
540 nodes: node_ids,
541 edge_types,
542 has_acyclic_violation: has_violation,
543 });
544 }
545
546 (cycles, violations)
547}
548
549#[cfg(test)]
550mod tests {
551 use super::*;
552 use std::path::PathBuf;
553
554 fn spec_dir() -> PathBuf {
555 PathBuf::from(env!("CARGO_MANIFEST_DIR"))
556 .join("../../clayers/clayers")
557 .canonicalize()
558 .expect("clayers/clayers/ not found")
559 }
560
561 #[test]
562 fn shipped_spec_has_sufficient_nodes_and_edges() {
563 let report = analyze_connectivity(&spec_dir()).expect("analysis failed");
564 assert!(
565 report.node_count >= 50,
566 "expected 50+ nodes, got {}",
567 report.node_count
568 );
569 assert!(
570 report.edge_count >= 30,
571 "expected 30+ edges, got {}",
572 report.edge_count
573 );
574 }
575
576 #[test]
577 fn shipped_spec_reports_components() {
578 let report = analyze_connectivity(&spec_dir()).expect("analysis failed");
579 assert!(
580 !report.components.is_empty(),
581 "should have at least one component"
582 );
583 }
584
585 #[test]
586 fn shipped_spec_identifies_isolated_nodes() {
587 let report = analyze_connectivity(&spec_dir()).expect("analysis failed");
588 assert!(
591 report.isolated_nodes.len() >= 2,
592 "expected some isolated nodes, got {}",
593 report.isolated_nodes.len()
594 );
595 }
596
597 #[test]
598 fn shipped_spec_has_hub_nodes() {
599 let report = analyze_connectivity(&spec_dir()).expect("analysis failed");
600 assert!(!report.hub_nodes.is_empty(), "should identify hub nodes");
601 let hub_ids: Vec<&str> = report.hub_nodes.iter().map(|h| h.id.as_str()).collect();
603 assert!(
604 hub_ids.contains(&"layered-architecture"),
605 "layered-architecture should be a hub, top hubs: {hub_ids:?}"
606 );
607 }
608
609 #[test]
610 fn shipped_spec_no_acyclic_violations() {
611 let report = analyze_connectivity(&spec_dir()).expect("analysis failed");
612 assert_eq!(
613 report.acyclic_violations, 0,
614 "shipped spec should have no acyclic violations"
615 );
616 }
617
618 #[test]
619 fn synthetic_cycle_detected() {
620 let dir = tempfile::tempdir().expect("tempdir");
621 let index_xml = r#"<?xml version="1.0"?>
622<spec:clayers xmlns:spec="urn:clayers:spec"
623 xmlns:idx="urn:clayers:index"
624 xmlns:pr="urn:clayers:prose"
625 xmlns:rel="urn:clayers:relation">
626 <idx:file href="content.xml"/>
627</spec:clayers>"#;
628 std::fs::write(dir.path().join("index.xml"), index_xml).expect("write");
629
630 let content_xml = r#"<?xml version="1.0"?>
631<spec:clayers xmlns:spec="urn:clayers:spec"
632 xmlns:pr="urn:clayers:prose"
633 xmlns:rel="urn:clayers:relation"
634 spec:index="index.xml">
635 <pr:section id="a"><pr:title>A</pr:title></pr:section>
636 <pr:section id="b"><pr:title>B</pr:title></pr:section>
637 <rel:relation type="references" from="a" to="b"/>
638 <rel:relation type="references" from="b" to="a"/>
639</spec:clayers>"#;
640 std::fs::write(dir.path().join("content.xml"), content_xml).expect("write");
641
642 let report = analyze_connectivity(dir.path()).expect("analysis failed");
643 assert!(
644 !report.cycles.is_empty(),
645 "should detect the cycle between a and b"
646 );
647 }
648}