car-ast 0.25.0

Tree-sitter AST parsing for code-aware inference
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
//! Advisory symbol-footprint scheduler for the Foreman pattern (B4).
//!
//! **This is NOT a soundness mechanism.** Correctness is owned entirely by the
//! merge-verify gate (see `car-multi`'s foreman gate). This module only schedules
//! subtasks for *better parallelism* — it decides which farmed-out subtasks may
//! run concurrently versus serially. Because it is only a hint, it is allowed to
//! be wrong in the *conservative* direction (serialize things that could have
//! run in parallel) but never in the dangerous direction.
//!
//! That bias is enforced by being **fail-closed on uncertainty** — but the
//! *kind* of uncertainty determines how conservative we must be ([`Scheduling`]):
//!
//! - **[`Precise`](Scheduling::Precise)** — the blast radius fully resolved.
//!   Schedule on exact symbol-overlap.
//! - **[`FileLevel`](Scheduling::FileLevel)** — a declared symbol isn't in the
//!   index *because it's new* (greenfield "create `foo`"). A symbol that does
//!   not exist yet has no existing callers, so there is no blast radius to miss;
//!   the declared write *files* are still trustworthy. Fall back to file-level
//!   disjointness — the **same basis as the no-footprint partitioner**, which
//!   the system already trusts. This inherits the partitioner's one assumption:
//!   the agent edits only the files it declared (callers are semantic impact,
//!   not files it touches). A worktree collision on an *undeclared* file, or any
//!   cross-file semantic break the lost blast radius would have flagged, is
//!   caught by the gate's containment + union merge — not the scheduler.
//! - **[`Serialize`](Scheduling::Serialize)** — the unresolved blast radius
//!   cannot be safely downgraded. Under a truncated [`ProjectIndex`] an unknown
//!   symbol is *ambiguous*: it could be genuinely new (no callers) OR an existing
//!   symbol whose callers are merely **hidden** by the truncation — a real blast
//!   radius wrongly read as "no callers" (the Phase-0 fail-open finding). The
//!   declared file set is still known, but we cannot tell the two cases apart, so
//!   we collapse the ambiguity safely and conflict with everything. (A
//!   planner-declared uncertain footprint is likewise treated as `Serialize`.)
//!
//! Conflict model over footprints:
//! - both `Precise`:    `write(A) ∩ write(B) ≠ ∅` (symbol blast radius)  → conflict.
//! - either `FileLevel` (neither `Serialize`): declared write *files* overlap → conflict.
//! - either `Serialize`: always conflict (serialized).
//! - `write(A) ∩ read(B) ≠ ∅` (declared symbols)  → **edge** A→B: B runs after A.
//! - otherwise                                      → **independent**: may run in parallel.

use std::collections::{BTreeSet, HashMap, HashSet};

use crate::index::ProjectIndex;

/// A `(file, symbol)` location. `file` is repo-relative.
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct SymbolRef {
    pub file: String,
    pub symbol: String,
}

impl SymbolRef {
    pub fn new(file: impl Into<String>, symbol: impl Into<String>) -> Self {
        Self {
            file: file.into(),
            symbol: symbol.into(),
        }
    }
}

/// The symbols a subtask is expected to write (define/modify) and read.
#[derive(Debug, Clone, Default)]
pub struct SymbolFootprint {
    pub writes: HashSet<SymbolRef>,
    pub reads: HashSet<SymbolRef>,
    /// On a *declared* footprint: an input the planner sets when it could not
    /// confidently determine the footprint → expands to [`Scheduling::Serialize`].
    /// On an *expanded* footprint: a derived inspection view, `true` whenever the
    /// blast radius wasn't fully resolved (i.e. `Scheduling::FileLevel` or
    /// `Serialize`). The scheduler branches on [`Scheduling`], **never** on this —
    /// `FileLevel` and `Serialize` both report `uncertain == true` but schedule
    /// very differently.
    pub uncertain: bool,
}

impl SymbolFootprint {
    pub fn writing(writes: impl IntoIterator<Item = SymbolRef>) -> Self {
        Self {
            writes: writes.into_iter().collect(),
            ..Default::default()
        }
    }

    pub fn with_reads(mut self, reads: impl IntoIterator<Item = SymbolRef>) -> Self {
        self.reads = reads.into_iter().collect();
        self
    }
}

/// How [`analyze`] must treat a footprint whose blast radius could not be fully
/// resolved. The variants are ordered by conservatism: `Precise` < `FileLevel` <
/// `Serialize`. See the module docs for the soundness argument behind each.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Scheduling {
    /// Blast radius fully resolved — schedule on exact symbol-overlap.
    Precise,
    /// Blast radius unresolved (a declared symbol is new/unknown, so it has no
    /// existing callers to bound) but the declared write *files* are trustworthy.
    /// Schedule on file-level disjointness.
    FileLevel,
    /// Neither blast radius nor file set trustworthy (truncated index, or
    /// planner-declared uncertain). Conflict with everything — fail-closed.
    Serialize,
}

/// A footprint that has been through [`expand_footprint`] (or whose caller has
/// explicitly asserted it needs no expansion). [`analyze`] accepts only this, so
/// a caller cannot *accidentally* schedule on a raw declared footprint — which
/// would carry `Scheduling::Precise` and no blast radius, silently losing the
/// fail-closed guarantee.
#[derive(Debug, Clone)]
pub struct ExpandedFootprint {
    expanded: SymbolFootprint,
    /// Declared write *files* (repo-relative) — the basis for `FileLevel`
    /// scheduling. Always the *declared* writes, never the expanded blast radius:
    /// the agent edits only what it declared, so only those files can produce a
    /// worktree merge conflict; callers in the blast radius are semantic impact
    /// the gate checks, not files the subtask touches.
    declared_files: BTreeSet<String>,
    scheduling: Scheduling,
}

impl ExpandedFootprint {
    /// Escape hatch for callers that genuinely have no [`ProjectIndex`] (and for
    /// tests): you assert the footprint is already as-expanded-as-it-gets. A
    /// declared-`uncertain` footprint maps to [`Scheduling::Serialize`] (no index
    /// means no basis to downgrade to file-level safely). Prefer
    /// [`expand_footprint`]; reach for this only when there is no index.
    pub fn assume_expanded(footprint: SymbolFootprint) -> Self {
        let declared_files = footprint.writes.iter().map(|w| w.file.clone()).collect();
        let scheduling = if footprint.uncertain {
            Scheduling::Serialize
        } else {
            Scheduling::Precise
        };
        Self {
            expanded: footprint,
            declared_files,
            scheduling,
        }
    }

    pub fn inner(&self) -> &SymbolFootprint {
        &self.expanded
    }

    pub fn scheduling(&self) -> Scheduling {
        self.scheduling
    }

    /// Declared write files (repo-relative) — the `FileLevel` scheduling basis.
    pub fn declared_files(&self) -> &BTreeSet<String> {
        &self.declared_files
    }
}

/// A subtask paired with its expanded footprint, for [`analyze`].
#[derive(Debug, Clone)]
pub struct FootprintSubtask {
    pub id: String,
    pub footprint: ExpandedFootprint,
}

/// The scheduling plan: parallel levels plus the dependency/conflict structure
/// that produced them (kept for inspection and the audit trail).
#[derive(Debug, Default, PartialEq, Eq)]
pub struct DecompositionPlan {
    /// Subtask ids grouped into levels; ids within a level may run concurrently.
    pub levels: Vec<Vec<String>>,
    /// `(a, b)`: b reads something a writes, so a must run before b.
    pub edges: Vec<(String, String)>,
    /// `(a, b)`: a and b write an overlapping symbol (or one is uncertain), so
    /// they were placed in different levels.
    pub conflicts: Vec<(String, String)>,
}

/// Expand a declared footprint to its blast radius: every transitive caller of a
/// written symbol is also (potentially) affected and joins the write-set, up to
/// `max_depth` hops. Fail-closed: a truncated index, or a declared symbol the
/// index doesn't know, marks the result `uncertain`.
pub fn expand_footprint(
    index: &ProjectIndex,
    declared: &SymbolFootprint,
    max_depth: usize,
) -> ExpandedFootprint {
    let declared_files: BTreeSet<String> =
        declared.writes.iter().map(|w| w.file.clone()).collect();
    let mut writes = declared.writes.clone();
    // Scheduling conservatism only ever ratchets UP (Precise → FileLevel →
    // Serialize), never down. Serialize is decided ONCE here, pre-loop: a
    // truncated index can't tell a new symbol from one with hidden callers, and a
    // planner-declared uncertain footprint is untrusted outright — both go
    // straight to Serialize (the Phase-0 fail-open fix). The in-loop expansion
    // below can only ever ratchet Precise → FileLevel, never reach Serialize and
    // never step back down.
    let mut scheduling = if declared.uncertain || index.truncated {
        Scheduling::Serialize
    } else {
        Scheduling::Precise
    };

    let mut frontier: Vec<SymbolRef> = declared.writes.iter().cloned().collect();
    for _ in 0..max_depth {
        let mut next = Vec::new();
        for w in &frontier {
            // The index has never heard of this symbol — it's new (greenfield).
            // A symbol that doesn't exist yet has no existing callers, so there is
            // no blast radius to miss; the declared write file is still real.
            // Downgrade to file-level scheduling (not Serialize) unless we are
            // already forced higher by a truncated index / declared uncertainty.
            if index.find(&w.symbol).is_empty() {
                if scheduling == Scheduling::Precise {
                    scheduling = Scheduling::FileLevel;
                }
                continue;
            }
            for cref in index.callers_of(&w.symbol) {
                let caller = SymbolRef::new(cref.from_file.clone(), cref.from_symbol.clone());
                if writes.insert(caller.clone()) {
                    next.push(caller);
                }
            }
        }
        if next.is_empty() {
            break;
        }
        frontier = next;
    }

    ExpandedFootprint {
        expanded: SymbolFootprint {
            writes,
            reads: declared.reads.clone(),
            // `uncertain` stays the inspection/back-compat view: true whenever the
            // blast radius wasn't fully resolved (FileLevel or Serialize).
            uncertain: scheduling != Scheduling::Precise,
        },
        declared_files,
        scheduling,
    }
}

/// Schedule subtasks into parallel levels from their footprints. Deterministic:
/// ties break on subtask id. Cyclic read/write dependencies (which cannot be
/// satisfied) are broken by serializing the remaining subtasks one per level.
pub fn analyze(subtasks: &[FootprintSubtask]) -> DecompositionPlan {
    let n = subtasks.len();
    let mut conflict_pairs: HashSet<(usize, usize)> = HashSet::new();
    let mut deps: HashMap<usize, BTreeSet<usize>> = (0..n).map(|i| (i, BTreeSet::new())).collect();
    let mut conflicts = Vec::new();
    let mut edges = Vec::new();

    for i in 0..n {
        for j in (i + 1)..n {
            let fa = &subtasks[i].footprint;
            let fb = &subtasks[j].footprint;
            let a = fa.inner();
            let b = fb.inner();

            // Conflict basis depends on how much each footprint can be trusted:
            //   - either Serialize        → always conflict (fail-closed)
            //   - both Precise            → exact symbol blast-radius overlap
            //   - otherwise (≥1 FileLevel,
            //     neither Serialize)      → declared write-FILE overlap
            // Exhaustive on purpose (no `_`): a future Scheduling variant must
            // force a decision here, not silently inherit file-level scheduling
            // — which for a *more*-conservative variant would be the exact
            // fail-open hole this module exists to prevent (CLAUDE.md rule #2).
            let conflict = match (fa.scheduling(), fb.scheduling()) {
                // Either side untrusted even at file level → always conflict.
                (Scheduling::Serialize, _) | (_, Scheduling::Serialize) => true,
                // Both fully resolved → exact symbol blast-radius overlap.
                (Scheduling::Precise, Scheduling::Precise) => !a.writes.is_disjoint(&b.writes),
                // ≥1 FileLevel, neither Serialize → declared write-FILE overlap.
                (Scheduling::FileLevel, Scheduling::FileLevel)
                | (Scheduling::FileLevel, Scheduling::Precise)
                | (Scheduling::Precise, Scheduling::FileLevel) => {
                    !fa.declared_files().is_disjoint(fb.declared_files())
                }
            };
            if conflict {
                conflict_pairs.insert((i, j));
                conflict_pairs.insert((j, i));
                conflicts.push((subtasks[i].id.clone(), subtasks[j].id.clone()));
            }

            // Edge a→b: b reads what a writes (b runs after a).
            if !a.writes.is_disjoint(&b.reads) {
                deps.get_mut(&j).unwrap().insert(i);
                edges.push((subtasks[i].id.clone(), subtasks[j].id.clone()));
            }
            // Edge b→a: a reads what b writes.
            if !b.writes.is_disjoint(&a.reads) {
                deps.get_mut(&i).unwrap().insert(j);
                edges.push((subtasks[j].id.clone(), subtasks[i].id.clone()));
            }
        }
    }

    let mut placed = vec![false; n];
    let mut levels: Vec<Vec<String>> = Vec::new();

    while placed.iter().any(|p| !p) {
        // Ready = not placed, all dependencies already placed.
        let ready: Vec<usize> = (0..n)
            .filter(|&i| !placed[i] && deps[&i].iter().all(|d| placed[*d]))
            .collect();

        let mut chosen: Vec<usize> = if ready.is_empty() {
            // Dependency cycle: break it by serializing the lowest remaining id.
            vec![(0..n).find(|&i| !placed[i]).unwrap()]
        } else {
            let mut level: Vec<usize> = Vec::new();
            for &i in &ready {
                if level.iter().all(|&k| !conflict_pairs.contains(&(i, k))) {
                    level.push(i);
                }
            }
            level
        };
        chosen.sort();

        for &i in &chosen {
            placed[i] = true;
        }
        levels.push(chosen.into_iter().map(|i| subtasks[i].id.clone()).collect());
    }

    DecompositionPlan {
        levels,
        edges,
        conflicts,
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    fn sub(id: &str, fp: SymbolFootprint) -> FootprintSubtask {
        FootprintSubtask {
            id: id.to_string(),
            footprint: ExpandedFootprint::assume_expanded(fp),
        }
    }

    /// Build a subtask with an explicit [`Scheduling`] (tests can reach the
    /// private fields because they are a child module).
    fn sub_sched(id: &str, fp: SymbolFootprint, scheduling: Scheduling) -> FootprintSubtask {
        let declared_files = fp.writes.iter().map(|w| w.file.clone()).collect();
        FootprintSubtask {
            id: id.to_string(),
            footprint: ExpandedFootprint {
                expanded: fp,
                declared_files,
                scheduling,
            },
        }
    }

    fn w(file: &str, sym: &str) -> SymbolRef {
        SymbolRef::new(file, sym)
    }

    #[test]
    fn disjoint_writes_run_in_one_level() {
        let plan = analyze(&[
            sub("a", SymbolFootprint::writing([w("a.rs", "fa")])),
            sub("b", SymbolFootprint::writing([w("b.rs", "fb")])),
        ]);
        assert_eq!(plan.levels, vec![vec!["a".to_string(), "b".to_string()]]);
        assert!(plan.conflicts.is_empty());
    }

    #[test]
    fn overlapping_writes_serialize() {
        let plan = analyze(&[
            sub("a", SymbolFootprint::writing([w("lib.rs", "shared")])),
            sub("b", SymbolFootprint::writing([w("lib.rs", "shared")])),
        ]);
        assert_eq!(plan.levels.len(), 2, "{plan:?}");
        assert_eq!(plan.conflicts.len(), 1);
    }

    #[test]
    fn write_read_dependency_orders_levels() {
        // b reads what a writes → a before b.
        let a = SymbolFootprint::writing([w("lib.rs", "api")]);
        let b = SymbolFootprint::default().with_reads([w("lib.rs", "api")]);
        let plan = analyze(&[sub("a", a), sub("b", b)]);
        assert_eq!(plan.levels, vec![vec!["a".to_string()], vec!["b".to_string()]]);
        assert_eq!(plan.edges, vec![("a".to_string(), "b".to_string())]);
    }

    #[test]
    fn uncertain_subtask_conflicts_with_everything() {
        let mut uncertain = SymbolFootprint::writing([w("x.rs", "fx")]);
        uncertain.uncertain = true;
        let plan = analyze(&[
            sub("u", uncertain),
            sub("a", SymbolFootprint::writing([w("a.rs", "fa")])),
            sub("b", SymbolFootprint::writing([w("b.rs", "fb")])),
        ]);
        // u must not share a level with a or b.
        for level in &plan.levels {
            if level.contains(&"u".to_string()) {
                assert_eq!(level.len(), 1, "uncertain subtask is isolated: {plan:?}");
            }
        }
        assert_eq!(plan.conflicts.len(), 2, "u conflicts with both");
    }

    #[test]
    fn file_level_disjoint_files_run_in_parallel() {
        // The greenfield fix: two subtasks creating NEW symbols (unknown to the
        // index → FileLevel) in DIFFERENT files run concurrently instead of
        // serializing. This is what unblocks the parallel-wins regime for
        // create-new-symbol work.
        let plan = analyze(&[
            sub_sched("a", SymbolFootprint::writing([w("a.rs", "new_a")]), Scheduling::FileLevel),
            sub_sched("b", SymbolFootprint::writing([w("b.rs", "new_b")]), Scheduling::FileLevel),
        ]);
        assert_eq!(plan.levels, vec![vec!["a".to_string(), "b".to_string()]], "{plan:?}");
        assert!(plan.conflicts.is_empty(), "disjoint files do not conflict: {plan:?}");
    }

    #[test]
    fn file_level_same_file_serializes() {
        // FileLevel still serializes same-file work — the agents would otherwise
        // race the same file and merge-conflict at integration.
        let plan = analyze(&[
            sub_sched("a", SymbolFootprint::writing([w("lib.rs", "new_a")]), Scheduling::FileLevel),
            sub_sched("b", SymbolFootprint::writing([w("lib.rs", "new_b")]), Scheduling::FileLevel),
        ]);
        assert_eq!(plan.levels.len(), 2, "same file → separate levels: {plan:?}");
        assert_eq!(plan.conflicts.len(), 1);
    }

    #[test]
    fn mixed_precise_and_file_level_compares_at_file_level() {
        // A is Precise, B is FileLevel. The weaker (file) basis governs the pair.
        // Disjoint files → parallel.
        let parallel = analyze(&[
            sub_sched("a", SymbolFootprint::writing([w("a.rs", "fa")]), Scheduling::Precise),
            sub_sched("b", SymbolFootprint::writing([w("b.rs", "new_b")]), Scheduling::FileLevel),
        ]);
        assert_eq!(parallel.levels, vec![vec!["a".to_string(), "b".to_string()]], "{parallel:?}");

        // Same file → serialize even though the symbols differ (file basis).
        let serial = analyze(&[
            sub_sched("a", SymbolFootprint::writing([w("lib.rs", "fa")]), Scheduling::Precise),
            sub_sched("b", SymbolFootprint::writing([w("lib.rs", "new_b")]), Scheduling::FileLevel),
        ]);
        assert_eq!(serial.levels.len(), 2, "{serial:?}");
    }

    #[test]
    fn serialize_conflicts_with_everything_even_disjoint_files() {
        // A truncated-index / planner-uncertain subtask (Serialize) still
        // conflicts with everyone, even file-disjoint work — the Phase-0 fix.
        let plan = analyze(&[
            sub_sched("s", SymbolFootprint::writing([w("s.rs", "fs")]), Scheduling::Serialize),
            sub_sched("a", SymbolFootprint::writing([w("a.rs", "fa")]), Scheduling::FileLevel),
            sub_sched("b", SymbolFootprint::writing([w("b.rs", "fb")]), Scheduling::Precise),
        ]);
        for level in &plan.levels {
            if level.contains(&"s".to_string()) {
                assert_eq!(level.len(), 1, "Serialize subtask is isolated: {plan:?}");
            }
        }
        assert_eq!(plan.conflicts.len(), 2, "s conflicts with both a and b");
    }

    #[test]
    fn file_level_still_honors_read_write_edges() {
        // Even when FileLevel, a declared read/write dependency still orders the
        // levels — the declared symbols remain meaningful for edges.
        let writer = SymbolFootprint::writing([w("a.rs", "api")]);
        let reader = SymbolFootprint::default().with_reads([w("a.rs", "api")]);
        let plan = analyze(&[
            sub_sched("writer", writer, Scheduling::FileLevel),
            sub_sched("reader", reader, Scheduling::FileLevel),
        ]);
        assert_eq!(
            plan.levels,
            vec![vec!["writer".to_string()], vec!["reader".to_string()]],
            "reader runs after writer: {plan:?}"
        );
    }

    #[test]
    fn cyclic_dependency_is_broken_by_serializing() {
        // a writes X reads Y; b writes Y reads X → mutual dependency cycle.
        let a = SymbolFootprint {
            writes: [w("lib.rs", "X")].into_iter().collect(),
            reads: [w("lib.rs", "Y")].into_iter().collect(),
            uncertain: false,
        };
        let b = SymbolFootprint {
            writes: [w("lib.rs", "Y")].into_iter().collect(),
            reads: [w("lib.rs", "X")].into_iter().collect(),
            uncertain: false,
        };
        let plan = analyze(&[sub("a", a), sub("b", b)]);
        // Cannot satisfy both orders — must serialize, not deadlock.
        assert_eq!(plan.levels.len(), 2, "{plan:?}");
    }

    #[test]
    fn expand_marks_unknown_symbol_file_level() {
        // A new/unknown symbol downgrades to FileLevel (not Serialize): no
        // existing callers to miss, declared file still trustworthy.
        let dir = tempfile::tempdir().unwrap();
        std::fs::write(dir.path().join("lib.rs"), "pub fn known() {}\n").unwrap();
        let index = ProjectIndex::build(dir.path());

        let declared = SymbolFootprint::writing([w("lib.rs", "does_not_exist")]);
        let expanded = expand_footprint(&index, &declared, 3);
        assert_eq!(expanded.scheduling(), Scheduling::FileLevel);
        assert!(expanded.inner().uncertain, "unknown symbol is still 'uncertain' for inspection");
        assert!(expanded.declared_files().contains("lib.rs"));
    }

    #[test]
    fn expand_known_symbol_is_precise() {
        let dir = tempfile::tempdir().unwrap();
        std::fs::write(dir.path().join("lib.rs"), "pub fn known() {}\n").unwrap();
        let index = ProjectIndex::build(dir.path());

        let declared = SymbolFootprint::writing([w("lib.rs", "known")]);
        let expanded = expand_footprint(&index, &declared, 3);
        assert_eq!(expanded.scheduling(), Scheduling::Precise);
        assert!(!expanded.inner().uncertain);
    }

    #[test]
    fn truncated_index_forces_serialize_even_for_known_symbol() {
        // The Phase-0 fail-open fix, preserved EXACTLY: a truncated index has
        // incomplete references, so expansion from it is Serialize (conflicts with
        // everything) — NOT downgraded to file-level. Callers exist but are hidden.
        let dir = tempfile::tempdir().unwrap();
        std::fs::write(dir.path().join("lib.rs"), "pub fn known() {}\n").unwrap();
        let mut index = ProjectIndex::build(dir.path());
        assert!(!index.truncated, "small build is not truncated");
        index.truncated = true; // simulate a budget-truncated build

        let declared = SymbolFootprint::writing([w("lib.rs", "known")]);
        let expanded = expand_footprint(&index, &declared, 3);
        assert_eq!(
            expanded.scheduling(),
            Scheduling::Serialize,
            "truncated index must stay fail-closed, not relax to file-level"
        );
    }

    #[test]
    fn greenfield_disjoint_files_parallelize_end_to_end() {
        // The full fix, through expand + analyze: two subtasks each creating a new
        // function in its own file, against a real index that doesn't know either
        // symbol yet, schedule into ONE parallel level.
        let dir = tempfile::tempdir().unwrap();
        std::fs::write(dir.path().join("a.rs"), "// implement alpha\n").unwrap();
        std::fs::write(dir.path().join("b.rs"), "// implement beta\n").unwrap();
        let index = ProjectIndex::build(dir.path());

        let fa = expand_footprint(&index, &SymbolFootprint::writing([w("a.rs", "alpha")]), 3);
        let fb = expand_footprint(&index, &SymbolFootprint::writing([w("b.rs", "beta")]), 3);
        assert_eq!(fa.scheduling(), Scheduling::FileLevel);
        assert_eq!(fb.scheduling(), Scheduling::FileLevel);

        let plan = analyze(&[
            FootprintSubtask { id: "a".into(), footprint: fa },
            FootprintSubtask { id: "b".into(), footprint: fb },
        ]);
        assert_eq!(
            plan.levels,
            vec![vec!["a".to_string(), "b".to_string()]],
            "greenfield disjoint-file subtasks now run in parallel: {plan:?}"
        );
    }
}