Skip to main content

fallow_graph/graph/
partition_order.rs

1//! Partition + order engine: from a changed-file set, split the change into
2//! coherent, independently-reviewable UNITS and suggest a dependency-sensible
3//! review ORDER.
4//!
5//! v1 partitioning is BY-MODULE only (the load-bearing panel decision): a "unit"
6//! is the parent directory of a changed file, root-relative. This is the only
7//! clustering definition that is byte-identical-deterministic straight from the
8//! graph with zero heuristics; feature-cluster and concern partitioning are
9//! explicitly DEFERRED (they need scoring heuristics whose tie-breaks are a fresh
10//! nondeterminism + false-positive surface).
11//!
12//! The ORDER is a dependency-sensible topological sequence over the unit DAG: a
13//! unit that DEFINES what another CONSUMES comes first (review the load-bearing
14//! definition before its consumers), mechanical/leaf units last, ties broken by
15//! the path sort. Inter-unit edges come from the graph's forward edges
16//! ([`super::ModuleGraph::edges_for`], `mod.rs` L255; the inverse is
17//! `reverse_deps`, L75).
18//!
19//! Determinism (the roadmap done-condition "Same PR run twice -> byte-identical
20//! unit assignment and order"): the engine is a pure function of
21//! `(graph, changed_file_ids)`. No timestamps, no randomness. No `FxHashMap`
22//! iteration order ever reaches output; every collection is materialized into a
23//! `Vec` and explicitly sorted before use. FileIds are path-sorted and stable
24//! cross-run (ADR-004), so sorting by FileId == sorting by path. The only choice
25//! point in the topological sort is a min-pick over sorted `module_dir` strings.
26
27use std::path::{Path, PathBuf};
28
29use fallow_types::discover::FileId;
30use rustc_hash::{FxHashMap, FxHashSet};
31
32use super::ModuleGraph;
33
34/// A single review unit: a coherent by-module cluster of the changed set. The
35/// `module_dir` is the root-relative parent directory shared by `files`; the
36/// changed root file (one with no parent directory) clusters under the
37/// repository-root key (the empty string).
38#[derive(Debug, Clone, PartialEq, Eq)]
39pub struct ReviewUnit {
40    /// The module directory the unit covers (root-relative, forward-slashed).
41    /// The empty string is the repository-root group for changed files with no
42    /// parent directory.
43    pub module_dir: String,
44    /// The changed files in this unit, `FileId`-sorted (== path-sorted, ADR-004).
45    pub files: Vec<FileId>,
46}
47
48/// Result of a partition + order computation, keyed by `FileId` / `module_dir`.
49/// The caller relativizes via [`ModuleGraph::partition_order_with_paths`] for
50/// serialization, mirroring the `impact_closure` / `closure_with_paths` pair.
51#[derive(Debug, Clone, Default, PartialEq, Eq)]
52pub struct PartitionOrder {
53    /// The by-module units, sorted by `module_dir` string.
54    pub units: Vec<ReviewUnit>,
55    /// The dependency-sensible review order: `module_dir` strings, definitions
56    /// before consumers, mechanical/leaf units last, ties broken by the path
57    /// sort. One entry per unit; a permutation of the `units` `module_dir` set.
58    pub order: Vec<String>,
59}
60
61/// The same partition + order with each unit's `FileId`s resolved to
62/// root-relative, forward-slashed path strings, sorted for deterministic output.
63#[derive(Debug, Clone, Default, PartialEq, Eq)]
64pub struct PartitionOrderPaths {
65    /// The by-module units with file paths resolved, sorted by `module_dir`.
66    pub units: Vec<ReviewUnitPaths>,
67    /// The dependency-sensible review order of `module_dir` strings.
68    pub order: Vec<String>,
69}
70
71/// A [`ReviewUnit`] with `FileId`s resolved to root-relative paths.
72#[derive(Debug, Clone, PartialEq, Eq)]
73pub struct ReviewUnitPaths {
74    /// The module directory the unit covers (root-relative, forward-slashed).
75    pub module_dir: String,
76    /// The changed files in this unit, path-sorted.
77    pub files: Vec<String>,
78}
79
80impl ModuleGraph {
81    /// Compute the by-module partition and dependency-sensible order for a
82    /// changed-file seed set.
83    ///
84    /// Out-of-range or duplicate ids in `changed` are tolerated (dropped /
85    /// deduped). The partition groups each changed file by its parent directory;
86    /// the order is a deterministic topological sort over the inter-unit DAG
87    /// (definitions before consumers, ties broken by the `module_dir` sort).
88    #[must_use]
89    pub fn partition_order(&self, changed: &[FileId]) -> PartitionOrder {
90        // Dedup + drop out-of-range ids, keeping a path-stable working set.
91        let mut seen = FxHashSet::default();
92        let mut changed_ids: Vec<FileId> = Vec::with_capacity(changed.len());
93        for &id in changed {
94            if (id.0 as usize) < self.modules.len() && seen.insert(id) {
95                changed_ids.push(id);
96            }
97        }
98        changed_ids.sort_unstable_by_key(|f| f.0);
99
100        let units = self.build_units(&changed_ids);
101        let order = self.order_units(&units, &changed_ids);
102        PartitionOrder { units, order }
103    }
104
105    /// Group changed files by their parent directory (the module). Returns the
106    /// units sorted by `module_dir`, each unit's files `FileId`-sorted.
107    fn build_units(&self, changed_ids: &[FileId]) -> Vec<ReviewUnit> {
108        // module_dir -> files. FxHashMap iteration order never reaches output:
109        // the keys are pulled into a Vec and sorted below.
110        let mut by_dir: FxHashMap<String, Vec<FileId>> = FxHashMap::default();
111        for &id in changed_ids {
112            let Some(module) = self.modules.get(id.0 as usize) else {
113                continue;
114            };
115            let dir = module_dir_key(&module.path);
116            by_dir.entry(dir).or_default().push(id);
117        }
118
119        let mut units: Vec<ReviewUnit> = by_dir
120            .into_iter()
121            .map(|(module_dir, mut files)| {
122                files.sort_unstable_by_key(|f| f.0);
123                ReviewUnit { module_dir, files }
124            })
125            .collect();
126        units.sort_by(|a, b| a.module_dir.cmp(&b.module_dir));
127        units
128    }
129
130    /// Order the units so a unit defining what another consumes comes first, ties
131    /// broken by the `module_dir` sort. Deterministic Kahn topological sort with
132    /// a min-pick ready set.
133    fn order_units(&self, units: &[ReviewUnit], changed_ids: &[FileId]) -> Vec<String> {
134        if units.is_empty() {
135            return Vec::new();
136        }
137
138        // FileId -> owning unit index, for resolving inter-unit edges.
139        let unit_of: FxHashMap<FileId, usize> = units
140            .iter()
141            .enumerate()
142            .flat_map(|(i, unit)| unit.files.iter().map(move |&f| (f, i)))
143            .collect();
144
145        let unit_count = units.len();
146        // `dep_count[c]` = number of distinct units `c` depends on (consumes from)
147        // that are still unemitted. A unit emerges ready once all its deps are
148        // emitted, so a pure definition (depends on nothing in the changed set)
149        // is ready first.
150        let mut deps: Vec<FxHashSet<usize>> = vec![FxHashSet::default(); unit_count];
151        for &id in changed_ids {
152            let Some(&consumer_unit) = unit_of.get(&id) else {
153                continue;
154            };
155            for dep_target in self.edges_for(id) {
156                let Some(&dep_unit) = unit_of.get(&dep_target) else {
157                    continue;
158                };
159                if dep_unit != consumer_unit {
160                    deps[consumer_unit].insert(dep_unit);
161                }
162            }
163        }
164
165        kahn_min_pick(units, &deps)
166    }
167
168    /// Resolve a partition + order's `FileId`s to root-relative, forward-slashed
169    /// paths, sorted for deterministic output. Files whose module is missing are
170    /// dropped; a unit left empty after that drop is omitted. The `module_dir`
171    /// keys (and the `order` entries, which are `module_dir` strings) are
172    /// root-relativized too so the whole shape is root-relative.
173    #[must_use]
174    pub fn partition_order_with_paths(
175        &self,
176        partition: &PartitionOrder,
177        root: &Path,
178    ) -> PartitionOrderPaths {
179        let resolve = |id: FileId| -> Option<String> {
180            self.modules
181                .get(id.0 as usize)
182                .map(|m| relativize(&m.path, root))
183        };
184
185        let units: Vec<ReviewUnitPaths> = partition
186            .units
187            .iter()
188            .filter_map(|unit| {
189                let mut files: Vec<String> =
190                    unit.files.iter().filter_map(|&id| resolve(id)).collect();
191                if files.is_empty() {
192                    return None;
193                }
194                files.sort();
195                Some(ReviewUnitPaths {
196                    module_dir: relativize_dir(&unit.module_dir, root),
197                    files,
198                })
199            })
200            .collect();
201
202        let order: Vec<String> = partition
203            .order
204            .iter()
205            .map(|dir| relativize_dir(dir, root))
206            .collect();
207
208        PartitionOrderPaths { units, order }
209    }
210}
211
212/// Deterministic Kahn topological sort: emit a unit only once every unit it
213/// depends on (consumes from) has been emitted, so definitions precede
214/// consumers. The ready set is resolved by a min-pick over the `module_dir`
215/// strings, so ties (independent units) and any cycle break resolve by the path
216/// sort. A residual cycle's units are appended in `module_dir`-sorted order.
217fn kahn_min_pick(units: &[ReviewUnit], deps: &[FxHashSet<usize>]) -> Vec<String> {
218    let unit_count = units.len();
219    let mut remaining: FxHashSet<usize> = (0..unit_count).collect();
220    let mut emitted: FxHashSet<usize> = FxHashSet::default();
221    let mut order: Vec<String> = Vec::with_capacity(unit_count);
222
223    while !remaining.is_empty() {
224        // Find the lexicographically smallest module_dir among ready units (all
225        // deps emitted). Iterating `remaining` (an FxHashSet) is fine: we pick
226        // the min by module_dir, not by iteration order.
227        let mut ready: Option<usize> = None;
228        for &idx in &remaining {
229            let all_deps_emitted = deps[idx].iter().all(|d| emitted.contains(d));
230            if !all_deps_emitted {
231                continue;
232            }
233            ready = Some(match ready {
234                Some(cur) if units[cur].module_dir <= units[idx].module_dir => cur,
235                _ => idx,
236            });
237        }
238
239        match ready {
240            Some(idx) => {
241                order.push(units[idx].module_dir.clone());
242                emitted.insert(idx);
243                remaining.remove(&idx);
244            }
245            None => {
246                // Cycle: no ready unit but units remain. Append the rest in
247                // module_dir-sorted order (deterministic fallback).
248                let mut rest: Vec<usize> = remaining.iter().copied().collect();
249                rest.sort_by(|&a, &b| units[a].module_dir.cmp(&units[b].module_dir));
250                for idx in rest {
251                    order.push(units[idx].module_dir.clone());
252                }
253                break;
254            }
255        }
256    }
257
258    order
259}
260
261/// The root-relative parent-directory key for a module path. The repository-root
262/// file (no parent component) maps to the empty string (the root group).
263fn module_dir_key(path: &Path) -> String {
264    path.parent()
265        .map(|p| p.to_string_lossy().replace('\\', "/"))
266        .unwrap_or_default()
267}
268
269/// Strip `root` and forward-slash-normalize a module path for cross-platform
270/// JSON parity (mirrors `impact_closure::relativize`).
271fn relativize(path: &Path, root: &Path) -> String {
272    let rel: PathBuf = path.strip_prefix(root).unwrap_or(path).to_path_buf();
273    rel.to_string_lossy().replace('\\', "/")
274}
275
276/// Root-relativize a `module_dir` key (a forward-slashed directory string).
277/// The empty root-group key stays empty; otherwise the `root` prefix is stripped
278/// via the same `Path`-based logic so the output matches the file path-space.
279fn relativize_dir(dir: &str, root: &Path) -> String {
280    if dir.is_empty() {
281        return String::new();
282    }
283    relativize(Path::new(dir), root)
284}
285
286#[cfg(test)]
287mod tests {
288    use super::*;
289    use crate::resolve::{ResolveResult, ResolvedImport, ResolvedModule};
290    use fallow_types::discover::{DiscoveredFile, EntryPoint, EntryPointSource};
291    use fallow_types::extract::{ExportInfo, ExportName, ImportInfo, ImportedName, VisibilityTag};
292    use std::path::PathBuf;
293
294    fn file(id: u32, path: &str) -> DiscoveredFile {
295        DiscoveredFile {
296            id: FileId(id),
297            path: PathBuf::from(path),
298            size_bytes: 10,
299        }
300    }
301
302    fn named_import(source: &str, name: &str, target: FileId) -> ResolvedImport {
303        ResolvedImport {
304            info: ImportInfo {
305                source: source.to_string(),
306                imported_name: ImportedName::Named(name.to_string()),
307                local_name: name.to_string(),
308                is_type_only: false,
309                from_style: false,
310                span: oxc_span::Span::new(0, 10),
311                source_span: oxc_span::Span::default(),
312            },
313            target: ResolveResult::InternalModule(target),
314        }
315    }
316
317    fn named_export(name: &str) -> ExportInfo {
318        ExportInfo {
319            name: ExportName::Named(name.to_string()),
320            local_name: Some(name.to_string()),
321            is_type_only: false,
322            visibility: VisibilityTag::None,
323            expected_unused_reason: None,
324            span: oxc_span::Span::new(0, 20),
325            members: vec![],
326            is_side_effect_used: false,
327            super_class: None,
328        }
329    }
330
331    /// Three directories: `core/` defines, `mid/` consumes core, `app/` consumes
332    /// mid. Files: core/a.ts, core/b.ts, mid/m.ts, app/x.ts. entry is app/x.ts.
333    fn build_three_dir_graph() -> ModuleGraph {
334        let files = vec![
335            file(0, "/p/src/app/x.ts"),
336            file(1, "/p/src/core/a.ts"),
337            file(2, "/p/src/core/b.ts"),
338            file(3, "/p/src/mid/m.ts"),
339        ];
340        let entry_points = vec![EntryPoint {
341            path: PathBuf::from("/p/src/app/x.ts"),
342            source: EntryPointSource::PackageJsonMain,
343        }];
344        let resolved = vec![
345            ResolvedModule {
346                file_id: FileId(0),
347                path: PathBuf::from("/p/src/app/x.ts"),
348                resolved_imports: vec![named_import("../mid/m", "midFn", FileId(3))],
349                ..Default::default()
350            },
351            ResolvedModule {
352                file_id: FileId(1),
353                path: PathBuf::from("/p/src/core/a.ts"),
354                exports: vec![named_export("alpha")],
355                ..Default::default()
356            },
357            ResolvedModule {
358                file_id: FileId(2),
359                path: PathBuf::from("/p/src/core/b.ts"),
360                exports: vec![named_export("beta")],
361                ..Default::default()
362            },
363            ResolvedModule {
364                file_id: FileId(3),
365                path: PathBuf::from("/p/src/mid/m.ts"),
366                resolved_imports: vec![named_import("../core/a", "alpha", FileId(1))],
367                exports: vec![named_export("midFn")],
368                ..Default::default()
369            },
370        ];
371        ModuleGraph::build(&resolved, &entry_points, &files)
372    }
373
374    #[test]
375    fn partition_groups_changed_files_by_module_directory() {
376        let graph = build_three_dir_graph();
377        // Change all four files.
378        let partition = graph.partition_order(&[FileId(0), FileId(1), FileId(2), FileId(3)]);
379        let paths = graph.partition_order_with_paths(&partition, Path::new("/p"));
380        // Three units, sorted by module_dir.
381        let dirs: Vec<&str> = paths.units.iter().map(|u| u.module_dir.as_str()).collect();
382        assert_eq!(dirs, vec!["src/app", "src/core", "src/mid"]);
383        // core/ groups its two files, path-sorted.
384        let core = paths
385            .units
386            .iter()
387            .find(|u| u.module_dir == "src/core")
388            .expect("core unit");
389        assert_eq!(core.files, vec!["src/core/a.ts", "src/core/b.ts"]);
390    }
391
392    #[test]
393    fn order_places_definitions_before_consumers() {
394        let graph = build_three_dir_graph();
395        // app consumes mid consumes core, so order = core, mid, app.
396        let partition = graph.partition_order(&[FileId(0), FileId(1), FileId(2), FileId(3)]);
397        assert_eq!(
398            partition.order,
399            vec![
400                "/p/src/core".to_string(),
401                "/p/src/mid".to_string(),
402                "/p/src/app".to_string(),
403            ]
404        );
405    }
406
407    #[test]
408    fn independent_units_order_by_path_sort() {
409        // Two unrelated directories (no inter-unit edge): order is the path sort.
410        let files = vec![file(0, "/p/src/billing/b.ts"), file(1, "/p/src/auth/a.ts")];
411        let entry_points = vec![EntryPoint {
412            path: PathBuf::from("/p/src/auth/a.ts"),
413            source: EntryPointSource::PackageJsonMain,
414        }];
415        let resolved = vec![
416            ResolvedModule {
417                file_id: FileId(0),
418                path: PathBuf::from("/p/src/billing/b.ts"),
419                ..Default::default()
420            },
421            ResolvedModule {
422                file_id: FileId(1),
423                path: PathBuf::from("/p/src/auth/a.ts"),
424                ..Default::default()
425            },
426        ];
427        let graph = ModuleGraph::build(&resolved, &entry_points, &files);
428        let partition = graph.partition_order(&[FileId(0), FileId(1)]);
429        assert_eq!(
430            partition.order,
431            vec!["/p/src/auth".to_string(), "/p/src/billing".to_string()]
432        );
433    }
434
435    #[test]
436    fn partition_order_is_byte_identical_across_runs() {
437        let graph = build_three_dir_graph();
438        let changed = [FileId(0), FileId(1), FileId(2), FileId(3)];
439        let first = graph.partition_order(&changed);
440        let second = graph.partition_order(&changed);
441        // FileId-keyed shape is structurally identical.
442        assert_eq!(first, second);
443        // Path-resolved shape is byte-identical when debug-rendered (a proxy for
444        // serialization; the audit_brief layer serializes the same data).
445        let p1 = graph.partition_order_with_paths(&first, Path::new("/p"));
446        let p2 = graph.partition_order_with_paths(&second, Path::new("/p"));
447        assert_eq!(format!("{p1:?}"), format!("{p2:?}"));
448    }
449
450    #[test]
451    fn changed_set_order_does_not_affect_result() {
452        // Feeding the changed ids in a different input order yields the same
453        // partition + order (the engine sorts internally).
454        let graph = build_three_dir_graph();
455        let a = graph.partition_order(&[FileId(3), FileId(0), FileId(2), FileId(1)]);
456        let b = graph.partition_order(&[FileId(0), FileId(1), FileId(2), FileId(3)]);
457        assert_eq!(a, b);
458    }
459
460    #[test]
461    fn root_file_clusters_under_root_group() {
462        let files = vec![file(0, "index.ts")];
463        let entry_points = vec![EntryPoint {
464            path: PathBuf::from("index.ts"),
465            source: EntryPointSource::PackageJsonMain,
466        }];
467        let resolved = vec![ResolvedModule {
468            file_id: FileId(0),
469            path: PathBuf::from("index.ts"),
470            ..Default::default()
471        }];
472        let graph = ModuleGraph::build(&resolved, &entry_points, &files);
473        let partition = graph.partition_order(&[FileId(0)]);
474        assert_eq!(partition.units.len(), 1);
475        assert_eq!(partition.units[0].module_dir, "");
476    }
477
478    #[test]
479    fn empty_changed_set_yields_empty_partition() {
480        let graph = build_three_dir_graph();
481        let partition = graph.partition_order(&[]);
482        assert!(partition.units.is_empty());
483        assert!(partition.order.is_empty());
484    }
485
486    #[test]
487    fn scale_300_file_multi_module_graph_is_stable() {
488        // 30 directories x 10 files = 300 files. Each dir's first file imports the
489        // previous dir's first file (a chain across modules), so the order is a
490        // real topological sequence, not just the path sort.
491        const DIRS: u32 = 30;
492        const PER_DIR: u32 = 10;
493        let mut files = Vec::new();
494        let mut resolved = Vec::new();
495        for d in 0..DIRS {
496            for f in 0..PER_DIR {
497                let id = d * PER_DIR + f;
498                let path = format!("/p/src/mod{d:02}/file{f:02}.ts");
499                files.push(file(id, &path));
500            }
501        }
502        for d in 0..DIRS {
503            for f in 0..PER_DIR {
504                let id = d * PER_DIR + f;
505                let mut module = ResolvedModule {
506                    file_id: FileId(id),
507                    path: PathBuf::from(format!("/p/src/mod{d:02}/file{f:02}.ts")),
508                    exports: vec![named_export(&format!("e{id}"))],
509                    ..Default::default()
510                };
511                // The first file of each dir (except dir 0) imports the first file
512                // of the previous dir: mod01 consumes mod00, mod02 consumes mod01.
513                if f == 0 && d > 0 {
514                    let dep = (d - 1) * PER_DIR;
515                    module.resolved_imports =
516                        vec![named_import("../prev", &format!("e{dep}"), FileId(dep))];
517                }
518                resolved.push(module);
519            }
520        }
521        let entry_points = vec![EntryPoint {
522            path: PathBuf::from("/p/src/mod00/file00.ts"),
523            source: EntryPointSource::PackageJsonMain,
524        }];
525        let graph = ModuleGraph::build(&resolved, &entry_points, &files);
526
527        let changed: Vec<FileId> = (0..DIRS * PER_DIR).map(FileId).collect();
528        let first = graph.partition_order(&changed);
529        let second = graph.partition_order(&changed);
530        assert_eq!(first, second, "300-file partition must be stable");
531        assert_eq!(first.units.len(), DIRS as usize, "one unit per directory");
532        // The dependency chain forces mod00 before mod01 before ... before mod29.
533        let expected: Vec<String> = (0..DIRS).map(|d| format!("/p/src/mod{d:02}")).collect();
534        assert_eq!(
535            first.order, expected,
536            "definitions precede consumers at scale"
537        );
538        // Path-resolved serialization proxy is byte-identical across runs.
539        let p1 = graph.partition_order_with_paths(&first, Path::new("/p"));
540        let p2 = graph.partition_order_with_paths(&second, Path::new("/p"));
541        assert_eq!(format!("{p1:?}"), format!("{p2:?}"));
542    }
543}