Skip to main content

cargo_crosscut/
lib.rs

1//! Workspace layout analysis for Rust projects.
2//!
3//! Parses `Cargo.toml` workspace members, counts lines of Rust code per crate,
4//! and groups crates into compartments that fit within model context limits.
5//! Useful for partitioning large workspaces into bounded analysis units.
6
7pub mod config;
8mod compartments;
9pub mod connectivity;
10pub mod overlap;
11pub mod types;
12pub mod workspace;
13
14#[cfg(test)]
15mod tests;
16
17use std::collections::HashMap;
18use std::path::{Path, PathBuf};
19
20pub use config::LayoutConfig;
21pub use connectivity::{compute_dry_tiles, compute_dry_tiles_with, connectivity_tiles};
22pub use overlap::{compute_priority, crate_compartment_idx, generate_overlap_tiles};
23pub use types::{Compartment, ConnectomeEdge, OverlapPriority, OverlapTile, ProjectLayout};
24pub use workspace::{
25    count_rs_lines, expand_member_pattern, parse_workspace_deps, parse_workspace_members,
26    resolve_crate_infos,
27};
28
29/// A workspace member with its estimated LOC.
30pub struct CrateInfo {
31    /// Relative path from project root (e.g. `crates/yh-core`).
32    pub path: PathBuf,
33    /// Crate name derived from directory name.
34    pub name: String,
35    /// Lines of `.rs` source code.
36    pub loc: usize,
37}
38
39/// Minimal struct to extract dependency names from a crate's Cargo.toml.
40#[derive(serde::Deserialize, Default)]
41pub(crate) struct CrateCargoToml {
42    #[serde(default)]
43    pub(crate) dependencies: HashMap<String, toml::Value>,
44    #[serde(default, rename = "dev-dependencies")]
45    pub(crate) dev_dependencies: HashMap<String, toml::Value>,
46}
47
48/// Minimal struct to extract workspace members from Cargo.toml.
49#[derive(serde::Deserialize)]
50pub(crate) struct CargoToml {
51    pub(crate) workspace: Option<WorkspaceSection>,
52}
53
54#[derive(serde::Deserialize)]
55pub(crate) struct WorkspaceSection {
56    #[serde(default)]
57    pub(crate) members: Vec<String>,
58}
59
60/// Analyze a Rust workspace and return a [`ProjectLayout`].
61pub fn analyze_rust_layout(project_dir: &Path) -> ProjectLayout {
62    analyze_rust_layout_with(project_dir, &LayoutConfig::default())
63}
64
65/// Analyze a Rust workspace with custom thresholds.
66pub fn analyze_rust_layout_with(project_dir: &Path, config: &LayoutConfig) -> ProjectLayout {
67    let cargo_toml_path = project_dir.join("Cargo.toml");
68    let content = match std::fs::read_to_string(&cargo_toml_path) {
69        Ok(c) => c,
70        Err(_) => return ProjectLayout::Whole,
71    };
72
73    let cargo: CargoToml = match toml::from_str(&content) {
74        Ok(v) => v,
75        Err(_) => return ProjectLayout::Whole,
76    };
77
78    let members = match cargo.workspace {
79        Some(ws) if !ws.members.is_empty() => ws.members,
80        _ => return ProjectLayout::Whole,
81    };
82
83    let crates = workspace::resolve_crate_infos(project_dir, &members);
84
85    if crates.len() <= config.member_count_threshold {
86        return ProjectLayout::Whole;
87    }
88
89    let total_loc: usize = crates.iter().map(|c| c.loc).sum();
90    if total_loc < config.loc_threshold {
91        return ProjectLayout::Whole;
92    }
93
94    // Parse the workspace dependency graph before grouping consumes crate info.
95    let dep_edges = workspace::parse_workspace_deps(project_dir, &crates);
96    let crate_names: Vec<String> = crates.iter().map(|c| c.name.clone()).collect();
97
98    // Build connectome (named edge list) for prompt injection.
99    let connectome: Vec<ConnectomeEdge> = dep_edges
100        .iter()
101        .map(|&(from, to)| ConnectomeEdge {
102            from: crate_names[from].clone(),
103            to: crate_names[to].clone(),
104        })
105        .collect();
106
107    let compartments = compartments::group_into_compartments(crates, config);
108    let overlap_tiles =
109        overlap::generate_overlap_tiles(&compartments, &crate_names, &dep_edges, config);
110
111    ProjectLayout::Compartmentalized {
112        compartments,
113        overlap_tiles,
114        connectome,
115        total_loc,
116    }
117}
118
119/// Partition tiles into batches where tiles within a batch have disjoint file
120/// paths and can safely run in parallel.
121///
122/// Tiles with overlapping paths are placed in separate batches to avoid
123/// concurrent modifications to the same files.
124pub fn batch_by_disjointness(tiles: &[Compartment]) -> Vec<Vec<&Compartment>> {
125    let mut batches: Vec<Vec<&Compartment>> = Vec::new();
126
127    for tile in tiles {
128        // Find the first batch where no existing tile shares a path.
129        let slot = batches.iter().position(|batch| {
130            !batch
131                .iter()
132                .any(|existing| existing.paths.iter().any(|p| tile.paths.contains(p)))
133        });
134
135        match slot {
136            Some(idx) => batches[idx].push(tile),
137            None => batches.push(vec![tile]),
138        }
139    }
140
141    batches
142}